base for the application : support custom python application and auto-install dependencies

2025-07-12 07:18:17 +02:00 · 2025-01-08 19:00:47 +01:00 · 2025-01-08 19:00:47 +01:00 · e2ebbf8a82
commit e2ebbf8a82
parent a20371e1ab
25 changed files with 494 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
+# Developpment
+.idea/
+
+# Environment
+venv/
--- a/17
+++ b/17
@ -0,0 +1,17 @@
+FROM python:3.12
+
+# copy the application
+WORKDIR /app
+COPY ./ ./
+
+# install the dependencies
+RUN pip3 install -r ./requirements.txt
+
+# expose the API port
+EXPOSE 8000
+
+# environment variables
+ENV MODEL_DIRECTORY=/models/
+
+# run the server
+CMD ["python3", "-m", "source"]
--- a/LICENSE.md
+++ b/LICENSE.md
--- a/README.md
+++ b/README.md
@ -1,3 +1,15 @@
-# ai-server
+# AI-Server

-A server that can serve AI models with an API and an authentication system
+A server that can serve AI models with an API and an authentication system
+
+# Usage
+
+## Docker
+
+
+
+# Environment Variables
+
+| Name            | Description                               |
+|-----------------|-------------------------------------------|
+| MODEL_DIRECTORY | the directory where the models are stored |
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,17 @@
+services:
+  ai-server:
+    build:
+      context: .
+      dockerfile: ./Dockerfile
+    runtime: nvidia
+    volumes:
+      - models:/models/
+      - /root/.cache/huggingface:/root/.cache/huggingface
+    environment:
+      - MODEL_LIBRARY=/models/
+      - NVIDIA_VISIBLE_DEVICES=all
+    ports:
+      - "8000:8000"
+
+volumes:
+  models:
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,4 @@
+# web
+fastapi
+uvicorn
+pydantic
--- a/samples/models/dummy/config.json
+++ b/samples/models/dummy/config.json
@ -0,0 +1,3 @@
+{
+  "type": "dummy"
+}
--- a/samples/models/python-bert-1/config.json
+++ b/samples/models/python-bert-1/config.json
@ -0,0 +1,11 @@
+{
+  "type": "python",
+  "file": "model.py",
+
+  "requirements": [
+    "transformers",
+    "torch",
+    "torchvision",
+    "torchaudio"
+  ]
+}
--- a/samples/models/python-bert-1/model.py
+++ b/samples/models/python-bert-1/model.py
@ -0,0 +1,28 @@
+import json
+
+import torch
+import transformers
+
+
+MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D"
+
+
+def load(model):
+    model.model = transformers.AutoModel.from_pretrained(MODEL_NAME)
+    model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
+
+def unload(model):
+    model.model = None
+    model.tokenizer = None
+
+def infer(model, payload: dict) -> str:
+    inputs = model.tokenizer(payload["prompt"], return_tensors="pt")
+
+    with torch.no_grad():
+        outputs = model.model(**inputs)
+
+    embeddings = outputs.last_hidden_state
+
+    return json.dumps({
+        "data": embeddings.tolist()
+    })
--- a/samples/models/python-bert-2/config.json
+++ b/samples/models/python-bert-2/config.json
@ -0,0 +1,4 @@
+{
+  "type": "python",
+  "file": "model.py"
+}
--- a/samples/models/python-bert-2/model.py
+++ b/samples/models/python-bert-2/model.py
@ -0,0 +1,28 @@
+import json
+
+import torch
+import transformers
+
+
+MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D"
+
+
+def load(model):
+    model.model = transformers.AutoModel.from_pretrained(MODEL_NAME)
+    model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
+
+def unload(model):
+    model.model = None
+    model.tokenizer = None
+
+def infer(model, payload: dict) -> str:
+    inputs = model.tokenizer(payload["prompt"], return_tensors="pt")
+
+    with torch.no_grad():
+        outputs = model.model(**inputs)
+
+    embeddings = outputs.last_hidden_state
+
+    return json.dumps({
+        "data": embeddings.tolist()
+    })
--- a/source/init.py
+++ b/source/init.py
@ -0,0 +1,3 @@
+from . import api
+from . import model
+from . import manager
--- a/source/main.py
+++ b/source/main.py
@ -0,0 +1,19 @@
+import os
+
+from source import manager, model, api
+
+# create a fastapi application
+application = api.Application()
+
+
+# create the model controller
+model_controller = manager.ModelManager(os.environ["MODEL_LIBRARY"])
+model_controller.register_model_type("dummy", model.DummyModel)
+model_controller.register_model_type("python", model.PythonModel)
+model_controller.reload()
+
+api.route.models.load(application, model_controller)
+
+
+# serve the application
+application.serve("0.0.0.0", 8000)
--- a/source/api/Application.py
+++ b/source/api/Application.py
@ -0,0 +1,15 @@
+import fastapi
+import uvicorn
+
+from source import meta
+
+
+class Application(fastapi.FastAPI):
+    def __init__(self):
+        super().__init__(
+            title=meta.name,
+            description=meta.description
+        )
+
+    def serve(self, host: str = "0.0.0.0", port: int = 8080):
+        uvicorn.run(self, host=host, port=port)
--- a/source/api/init.py
+++ b/source/api/init.py
@ -0,0 +1,3 @@
+from . import route
+
+from .Application import Application
--- a/source/api/route/init.py
+++ b/source/api/route/init.py
@ -0,0 +1 @@
+from . import models
--- a/source/api/route/models.py
+++ b/source/api/route/models.py
@ -0,0 +1,74 @@
+import sys
+import traceback
+
+import fastapi
+import pydantic
+
+from source.api import Application
+from source import manager
+
+
+class InferenceRequest(pydantic.BaseModel):
+    """
+    Represent a request made when inferring a model
+    """
+
+    request: dict
+
+
+def load(application: Application, model_manager: manager.ModelManager):
+    @application.get("/models")
+    async def get_models() -> list[str]:
+        """
+        Get the list of models available
+        :return: the list of models available
+        """
+
+        # reload the model list
+        model_manager.reload()
+        # list the models found
+        return list(model_manager.models.keys())
+
+    @application.get("/models/{model_name}")
+    async def get_model(model_name: str) -> dict:
+        """
+        Get information about a specific model
+        :param model_name: the name of the model
+        :return: the information about the corresponding model
+        """
+
+        # get the corresponding model
+        model = model_manager.models.get(model_name)
+        if model is None:
+            raise fastapi.HTTPException(status_code=404, detail="Model not found")
+
+        # return the model information
+        return model.get_information()
+
+
+    @application.post("/models/{model_name}/infer")
+    async def infer_model(model_name: str, request: InferenceRequest) -> fastapi.Response:
+        """
+        Run an inference through the selected model
+        :param model_name: the name of the model
+        :param request: the data to infer to the model
+        :return: the model response
+        """
+
+        # get the corresponding model
+        model = model_manager.models.get(model_name)
+        if model is None:
+            raise fastapi.HTTPException(status_code=404, detail="Model not found")
+
+        # infer the data through the model
+        try:
+            response = model.infer(request.data)
+        except Exception:
+            print(traceback.format_exc(), file=sys.stderr)
+            raise fastapi.HTTPException(status_code=500, detail="An error occurred while inferring the model.")
+
+        # pack the model response into a fastapi response
+        return fastapi.Response(
+            content=response,
+            media_type=model.response_mimetype,
+        )
--- a/source/manager/ModelManager.py
+++ b/source/manager/ModelManager.py
@ -0,0 +1,52 @@
+import json
+import os
+import typing
+import warnings
+from pathlib import Path
+
+from source import model
+
+
+class ModelManager:
+    def __init__(self, model_library: os.PathLike | str):
+        self.model_library: Path = Path(model_library)
+
+        # the model types
+        self.model_types: dict[str, typing.Type[model.base.BaseModel]] = {}
+        # the models
+        self.models: dict[str, model.base.BaseModel] = {}
+
+        # the currently loaded model
+        # TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue
+        self.current_loaded_model: typing.Optional[model.base.BaseModel] = None
+
+    def register_model_type(self, name: str, model_type: typing.Type[model.base.BaseModel]):
+        self.model_types[name] = model_type
+
+    def reload(self):
+        for model_path in self.model_library.iterdir():
+            model_name: str = model_path.name
+            model_configuration_path: Path = model_path / "config.json"
+
+            # check if the configuration file exists
+            if not model_configuration_path.exists():
+                warnings.warn(f"Model {model_name!r} is missing a config.json file.")
+                continue
+
+            # load the configuration file
+            model_configuration = json.loads(model_configuration_path.read_text())
+
+            # get the model type for this model
+            model_type_name: str = model_configuration.get("type")
+            if model_type_name not in self.model_types:
+                warnings.warn("Field 'type' missing from the model configuration file.")
+                continue
+
+            # get the class of this model type
+            model_type = self.model_types.get(model_type_name)
+            if model_type is None:
+                warnings.warn(f"Model type {model_type_name!r} does not exists. Has it been registered ?")
+                continue
+
+            # load the model
+            self.models[model_name] = model_type(self, model_configuration, model_path)
--- a/source/manager/init.py
+++ b/source/manager/init.py
@ -0,0 +1 @@
+from .ModelManager import ModelManager
--- a/source/meta.py
+++ b/source/meta.py
@ -0,0 +1,2 @@
+name: str = "AI-Server"
+description: str = "Serve models through an API"
--- a/source/model/DummyModel.py
+++ b/source/model/DummyModel.py
@ -0,0 +1,19 @@
+import json
+
+from source.model import base
+
+
+class DummyModel(base.BaseModel):
+    """
+    A dummy model, mainly used to test the API and the manager.
+    simply send back the request made to it.
+    """
+
+    def _load(self) -> None:
+        pass
+
+    def _unload(self) -> None:
+        pass
+
+    def _infer(self, payload: dict) -> str | bytes:
+        return json.dumps(payload)
--- a/source/model/PythonModel.py
+++ b/source/model/PythonModel.py
@ -0,0 +1,46 @@
+import importlib.util
+import subprocess
+import sys
+import uuid
+from pathlib import Path
+
+from source.manager import ModelManager
+from source.model import base
+
+
+class PythonModel(base.BaseModel):
+    """
+    A model running a custom python model.
+    """
+
+    def __init__(self, manager: ModelManager, configuration: dict, path: Path):
+        super().__init__(manager, configuration, path)
+
+        # get the name of the file containing the model code
+        file = configuration.get("file")
+        if file is None:
+            raise ValueError("Field 'file' is missing from the configuration")
+
+        # install custom requirements
+        requirements = configuration.get("requirements", [])
+        if len(requirements) > 0:
+            subprocess.run([sys.executable, "-m", "pip", "install", *requirements])
+
+        # create the module specification
+        module_spec = importlib.util.spec_from_file_location(
+            f"model-{uuid.uuid4()}",
+            self.path / file
+        )
+        # get the module
+        self.module = importlib.util.module_from_spec(module_spec)
+        # load the module
+        module_spec.loader.exec_module(self.module)
+
+    def _load(self) -> None:
+        return self.module.load(self)
+
+    def _unload(self) -> None:
+        return self.module.unload(self)
+
+    def _infer(self, payload: dict) -> str | bytes:
+        return self.module.infer(self, payload)
--- a/source/model/init.py
+++ b/source/model/init.py
@ -0,0 +1,4 @@
+from . import base
+
+from .DummyModel import DummyModel
+from .PythonModel import PythonModel
--- a/source/model/base/BaseModel.py
+++ b/source/model/base/BaseModel.py
@ -0,0 +1,123 @@
+import abc
+import gc
+from pathlib import Path
+
+from source.manager import ModelManager
+
+
+class BaseModel(abc.ABC):
+    """
+    Represent a model.
+    """
+
+    def __init__(self, manager: ModelManager, configuration: dict, path: Path):
+        # the environment directory of the model
+        self.path = path
+        # the model manager
+        self.manager = manager
+        # the mimetype of the model responses
+        self.response_mimetype: str = configuration.get("response_type", "application/json")
+
+        self._loaded = False
+
+    def __repr__(self):
+        return f"<{self.__class__.__name__}: {self.name}>"
+
+    @property
+    def name(self):
+        """
+        Get the name of the model
+        :return: the name of the model
+        """
+
+        return self.path.name
+
+    def get_information(self):
+        """
+        Get information about the model
+        :return: information about the model
+        """
+
+        return {
+            "name": self.name,
+            "response_mimetype": self.response_mimetype,
+        }
+
+    def load(self) -> None:
+        """
+        Load the model within the model manager
+        """
+
+        # if we are already loaded, stop
+        if self._loaded:
+            return
+
+        # check if we are the current loaded model
+        if self.manager.current_loaded_model is not self:
+            # unload the previous model
+            if self.manager.current_loaded_model is not None:
+                self.manager.current_loaded_model.unload()
+
+        # model specific loading
+        self._load()
+
+        # declare ourselves as the currently loaded model
+        self.manager.current_loaded_model = self
+
+    @abc.abstractmethod
+    def _load(self):
+        """
+        Load the model
+        Do not call manually, use `load` instead.
+        """
+
+    def unload(self) -> None:
+        """
+        Unload the model within the model manager
+        """
+
+        # if we are not already loaded, stop
+        if not self._loaded:
+            return
+
+        # if we were the currently loaded model of the manager, demote ourselves
+        if self.manager.current_loaded_model is self:
+            self.manager.current_loaded_model = None
+
+        # model specific unloading part
+        self._unload()
+
+        # force the garbage collector to clean the memory
+        gc.collect()
+
+        # mark the model as unloaded
+        self._loaded = False
+
+    @abc.abstractmethod
+    def _unload(self):
+        """
+        Unload the model
+        Do not call manually, use `unload` instead.
+        :return:
+        """
+
+    def infer(self, payload: dict) -> str | bytes:
+        """
+        Infer our payload through the model within the model manager
+        :param payload: the payload to give to the model
+        :return: the response of the model
+        """
+
+        # make sure we are loaded before an inference
+        self.load()
+
+        # model specific inference part
+        return self._infer(payload)
+
+    @abc.abstractmethod
+    def _infer(self, payload: dict) -> str | bytes:
+        """
+        Infer our payload through the model
+        :param payload: the payload to give to the model
+        :return: the response of the model
+        """
--- a/source/model/base/init.py
+++ b/source/model/base/init.py
@ -0,0 +1 @@
+from .BaseModel import BaseModel