diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6863c6e --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# Developpment +.idea/ + +# Environment +venv/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..582bda9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.12 + +# copy the application +WORKDIR /app +COPY ./ ./ + +# install the dependencies +RUN pip3 install -r ./requirements.txt + +# expose the API port +EXPOSE 8000 + +# environment variables +ENV MODEL_DIRECTORY=/models/ + +# run the server +CMD ["python3", "-m", "source"] diff --git a/LICENSE b/LICENSE.md similarity index 100% rename from LICENSE rename to LICENSE.md diff --git a/README.md b/README.md index 61cd357..4ad7096 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,15 @@ -# ai-server +# AI-Server -A server that can serve AI models with an API and an authentication system \ No newline at end of file +A server that can serve AI models with an API and an authentication system + +# Usage + +## Docker + + + +# Environment Variables + +| Name | Description | +|-----------------|-------------------------------------------| +| MODEL_DIRECTORY | the directory where the models are stored | diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..f2c1132 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,17 @@ +services: + ai-server: + build: + context: . + dockerfile: ./Dockerfile + runtime: nvidia + volumes: + - models:/models/ + - /root/.cache/huggingface:/root/.cache/huggingface + environment: + - MODEL_LIBRARY=/models/ + - NVIDIA_VISIBLE_DEVICES=all + ports: + - "8000:8000" + +volumes: + models: diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0473dc2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +# web +fastapi +uvicorn +pydantic diff --git a/samples/models/dummy/config.json b/samples/models/dummy/config.json new file mode 100644 index 0000000..490264e --- /dev/null +++ b/samples/models/dummy/config.json @@ -0,0 +1,3 @@ +{ + "type": "dummy" +} diff --git a/samples/models/python-bert-1/config.json b/samples/models/python-bert-1/config.json new file mode 100644 index 0000000..1d4687a --- /dev/null +++ b/samples/models/python-bert-1/config.json @@ -0,0 +1,11 @@ +{ + "type": "python", + "file": "model.py", + + "requirements": [ + "transformers", + "torch", + "torchvision", + "torchaudio" + ] +} diff --git a/samples/models/python-bert-1/model.py b/samples/models/python-bert-1/model.py new file mode 100644 index 0000000..0d992dc --- /dev/null +++ b/samples/models/python-bert-1/model.py @@ -0,0 +1,28 @@ +import json + +import torch +import transformers + + +MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D" + + +def load(model): + model.model = transformers.AutoModel.from_pretrained(MODEL_NAME) + model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME) + +def unload(model): + model.model = None + model.tokenizer = None + +def infer(model, payload: dict) -> str: + inputs = model.tokenizer(payload["prompt"], return_tensors="pt") + + with torch.no_grad(): + outputs = model.model(**inputs) + + embeddings = outputs.last_hidden_state + + return json.dumps({ + "data": embeddings.tolist() + }) diff --git a/samples/models/python-bert-2/config.json b/samples/models/python-bert-2/config.json new file mode 100644 index 0000000..af6ab11 --- /dev/null +++ b/samples/models/python-bert-2/config.json @@ -0,0 +1,4 @@ +{ + "type": "python", + "file": "model.py" +} diff --git a/samples/models/python-bert-2/model.py b/samples/models/python-bert-2/model.py new file mode 100644 index 0000000..0d992dc --- /dev/null +++ b/samples/models/python-bert-2/model.py @@ -0,0 +1,28 @@ +import json + +import torch +import transformers + + +MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D" + + +def load(model): + model.model = transformers.AutoModel.from_pretrained(MODEL_NAME) + model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME) + +def unload(model): + model.model = None + model.tokenizer = None + +def infer(model, payload: dict) -> str: + inputs = model.tokenizer(payload["prompt"], return_tensors="pt") + + with torch.no_grad(): + outputs = model.model(**inputs) + + embeddings = outputs.last_hidden_state + + return json.dumps({ + "data": embeddings.tolist() + }) diff --git a/source/__init__.py b/source/__init__.py new file mode 100644 index 0000000..29afff1 --- /dev/null +++ b/source/__init__.py @@ -0,0 +1,3 @@ +from . import api +from . import model +from . import manager diff --git a/source/__main__.py b/source/__main__.py new file mode 100644 index 0000000..e9481db --- /dev/null +++ b/source/__main__.py @@ -0,0 +1,19 @@ +import os + +from source import manager, model, api + +# create a fastapi application +application = api.Application() + + +# create the model controller +model_controller = manager.ModelManager(os.environ["MODEL_LIBRARY"]) +model_controller.register_model_type("dummy", model.DummyModel) +model_controller.register_model_type("python", model.PythonModel) +model_controller.reload() + +api.route.models.load(application, model_controller) + + +# serve the application +application.serve("0.0.0.0", 8000) diff --git a/source/api/Application.py b/source/api/Application.py new file mode 100644 index 0000000..2406f40 --- /dev/null +++ b/source/api/Application.py @@ -0,0 +1,15 @@ +import fastapi +import uvicorn + +from source import meta + + +class Application(fastapi.FastAPI): + def __init__(self): + super().__init__( + title=meta.name, + description=meta.description + ) + + def serve(self, host: str = "0.0.0.0", port: int = 8080): + uvicorn.run(self, host=host, port=port) \ No newline at end of file diff --git a/source/api/__init__.py b/source/api/__init__.py new file mode 100644 index 0000000..a84f34b --- /dev/null +++ b/source/api/__init__.py @@ -0,0 +1,3 @@ +from . import route + +from .Application import Application diff --git a/source/api/route/__init__.py b/source/api/route/__init__.py new file mode 100644 index 0000000..0650744 --- /dev/null +++ b/source/api/route/__init__.py @@ -0,0 +1 @@ +from . import models diff --git a/source/api/route/models.py b/source/api/route/models.py new file mode 100644 index 0000000..47fbb8f --- /dev/null +++ b/source/api/route/models.py @@ -0,0 +1,74 @@ +import sys +import traceback + +import fastapi +import pydantic + +from source.api import Application +from source import manager + + +class InferenceRequest(pydantic.BaseModel): + """ + Represent a request made when inferring a model + """ + + request: dict + + +def load(application: Application, model_manager: manager.ModelManager): + @application.get("/models") + async def get_models() -> list[str]: + """ + Get the list of models available + :return: the list of models available + """ + + # reload the model list + model_manager.reload() + # list the models found + return list(model_manager.models.keys()) + + @application.get("/models/{model_name}") + async def get_model(model_name: str) -> dict: + """ + Get information about a specific model + :param model_name: the name of the model + :return: the information about the corresponding model + """ + + # get the corresponding model + model = model_manager.models.get(model_name) + if model is None: + raise fastapi.HTTPException(status_code=404, detail="Model not found") + + # return the model information + return model.get_information() + + + @application.post("/models/{model_name}/infer") + async def infer_model(model_name: str, request: InferenceRequest) -> fastapi.Response: + """ + Run an inference through the selected model + :param model_name: the name of the model + :param request: the data to infer to the model + :return: the model response + """ + + # get the corresponding model + model = model_manager.models.get(model_name) + if model is None: + raise fastapi.HTTPException(status_code=404, detail="Model not found") + + # infer the data through the model + try: + response = model.infer(request.data) + except Exception: + print(traceback.format_exc(), file=sys.stderr) + raise fastapi.HTTPException(status_code=500, detail="An error occurred while inferring the model.") + + # pack the model response into a fastapi response + return fastapi.Response( + content=response, + media_type=model.response_mimetype, + ) diff --git a/source/manager/ModelManager.py b/source/manager/ModelManager.py new file mode 100644 index 0000000..f501dfb --- /dev/null +++ b/source/manager/ModelManager.py @@ -0,0 +1,52 @@ +import json +import os +import typing +import warnings +from pathlib import Path + +from source import model + + +class ModelManager: + def __init__(self, model_library: os.PathLike | str): + self.model_library: Path = Path(model_library) + + # the model types + self.model_types: dict[str, typing.Type[model.base.BaseModel]] = {} + # the models + self.models: dict[str, model.base.BaseModel] = {} + + # the currently loaded model + # TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue + self.current_loaded_model: typing.Optional[model.base.BaseModel] = None + + def register_model_type(self, name: str, model_type: typing.Type[model.base.BaseModel]): + self.model_types[name] = model_type + + def reload(self): + for model_path in self.model_library.iterdir(): + model_name: str = model_path.name + model_configuration_path: Path = model_path / "config.json" + + # check if the configuration file exists + if not model_configuration_path.exists(): + warnings.warn(f"Model {model_name!r} is missing a config.json file.") + continue + + # load the configuration file + model_configuration = json.loads(model_configuration_path.read_text()) + + # get the model type for this model + model_type_name: str = model_configuration.get("type") + if model_type_name not in self.model_types: + warnings.warn("Field 'type' missing from the model configuration file.") + continue + + # get the class of this model type + model_type = self.model_types.get(model_type_name) + if model_type is None: + warnings.warn(f"Model type {model_type_name!r} does not exists. Has it been registered ?") + continue + + # load the model + self.models[model_name] = model_type(self, model_configuration, model_path) diff --git a/source/manager/__init__.py b/source/manager/__init__.py new file mode 100644 index 0000000..ab87b8b --- /dev/null +++ b/source/manager/__init__.py @@ -0,0 +1 @@ +from .ModelManager import ModelManager diff --git a/source/meta.py b/source/meta.py new file mode 100644 index 0000000..e7a6c2e --- /dev/null +++ b/source/meta.py @@ -0,0 +1,2 @@ +name: str = "AI-Server" +description: str = "Serve models through an API" diff --git a/source/model/DummyModel.py b/source/model/DummyModel.py new file mode 100644 index 0000000..e18aeb3 --- /dev/null +++ b/source/model/DummyModel.py @@ -0,0 +1,19 @@ +import json + +from source.model import base + + +class DummyModel(base.BaseModel): + """ + A dummy model, mainly used to test the API and the manager. + simply send back the request made to it. + """ + + def _load(self) -> None: + pass + + def _unload(self) -> None: + pass + + def _infer(self, payload: dict) -> str | bytes: + return json.dumps(payload) diff --git a/source/model/PythonModel.py b/source/model/PythonModel.py new file mode 100644 index 0000000..d82dfe3 --- /dev/null +++ b/source/model/PythonModel.py @@ -0,0 +1,46 @@ +import importlib.util +import subprocess +import sys +import uuid +from pathlib import Path + +from source.manager import ModelManager +from source.model import base + + +class PythonModel(base.BaseModel): + """ + A model running a custom python model. + """ + + def __init__(self, manager: ModelManager, configuration: dict, path: Path): + super().__init__(manager, configuration, path) + + # get the name of the file containing the model code + file = configuration.get("file") + if file is None: + raise ValueError("Field 'file' is missing from the configuration") + + # install custom requirements + requirements = configuration.get("requirements", []) + if len(requirements) > 0: + subprocess.run([sys.executable, "-m", "pip", "install", *requirements]) + + # create the module specification + module_spec = importlib.util.spec_from_file_location( + f"model-{uuid.uuid4()}", + self.path / file + ) + # get the module + self.module = importlib.util.module_from_spec(module_spec) + # load the module + module_spec.loader.exec_module(self.module) + + def _load(self) -> None: + return self.module.load(self) + + def _unload(self) -> None: + return self.module.unload(self) + + def _infer(self, payload: dict) -> str | bytes: + return self.module.infer(self, payload) diff --git a/source/model/__init__.py b/source/model/__init__.py new file mode 100644 index 0000000..e4bff02 --- /dev/null +++ b/source/model/__init__.py @@ -0,0 +1,4 @@ +from . import base + +from .DummyModel import DummyModel +from .PythonModel import PythonModel diff --git a/source/model/base/BaseModel.py b/source/model/base/BaseModel.py new file mode 100644 index 0000000..1401eeb --- /dev/null +++ b/source/model/base/BaseModel.py @@ -0,0 +1,123 @@ +import abc +import gc +from pathlib import Path + +from source.manager import ModelManager + + +class BaseModel(abc.ABC): + """ + Represent a model. + """ + + def __init__(self, manager: ModelManager, configuration: dict, path: Path): + # the environment directory of the model + self.path = path + # the model manager + self.manager = manager + # the mimetype of the model responses + self.response_mimetype: str = configuration.get("response_type", "application/json") + + self._loaded = False + + def __repr__(self): + return f"<{self.__class__.__name__}: {self.name}>" + + @property + def name(self): + """ + Get the name of the model + :return: the name of the model + """ + + return self.path.name + + def get_information(self): + """ + Get information about the model + :return: information about the model + """ + + return { + "name": self.name, + "response_mimetype": self.response_mimetype, + } + + def load(self) -> None: + """ + Load the model within the model manager + """ + + # if we are already loaded, stop + if self._loaded: + return + + # check if we are the current loaded model + if self.manager.current_loaded_model is not self: + # unload the previous model + if self.manager.current_loaded_model is not None: + self.manager.current_loaded_model.unload() + + # model specific loading + self._load() + + # declare ourselves as the currently loaded model + self.manager.current_loaded_model = self + + @abc.abstractmethod + def _load(self): + """ + Load the model + Do not call manually, use `load` instead. + """ + + def unload(self) -> None: + """ + Unload the model within the model manager + """ + + # if we are not already loaded, stop + if not self._loaded: + return + + # if we were the currently loaded model of the manager, demote ourselves + if self.manager.current_loaded_model is self: + self.manager.current_loaded_model = None + + # model specific unloading part + self._unload() + + # force the garbage collector to clean the memory + gc.collect() + + # mark the model as unloaded + self._loaded = False + + @abc.abstractmethod + def _unload(self): + """ + Unload the model + Do not call manually, use `unload` instead. + :return: + """ + + def infer(self, payload: dict) -> str | bytes: + """ + Infer our payload through the model within the model manager + :param payload: the payload to give to the model + :return: the response of the model + """ + + # make sure we are loaded before an inference + self.load() + + # model specific inference part + return self._infer(payload) + + @abc.abstractmethod + def _infer(self, payload: dict) -> str | bytes: + """ + Infer our payload through the model + :param payload: the payload to give to the model + :return: the response of the model + """ diff --git a/source/model/base/__init__.py b/source/model/base/__init__.py new file mode 100644 index 0000000..4c587f0 --- /dev/null +++ b/source/model/base/__init__.py @@ -0,0 +1 @@ +from .BaseModel import BaseModel