base for the application : support custom python application and auto-install dependencies

2025-07-06 04:58:17 +02:00 · 2025-01-08 19:00:47 +01:00 · 2025-01-08 19:00:47 +01:00 · e2ebbf8a82
commit e2ebbf8a82
parent a20371e1ab
25 changed files with 494 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
 # Developpment
 .idea/
 # Environment
 venv/
--- a/17
+++ b/17
@ -0,0 +1,17 @@
 FROM python:3.12
 # copy the application
 WORKDIR /app
 COPY ./ ./
 # install the dependencies
 RUN pip3 install -r ./requirements.txt
 # expose the API port
 EXPOSE 8000
 # environment variables
 ENV MODEL_DIRECTORY=/models/
 # run the server
 CMD ["python3", "-m", "source"]
--- a/LICENSE.md
+++ b/LICENSE.md
--- a/README.md
+++ b/README.md
@ -1,3 +1,15 @@
-# ai-server
+# AI-Server
 A server that can serve AI models with an API and an authentication system
 # Usage
 ## Docker
 # Environment Variables
 | Name            | Description                               |
 |-----------------|-------------------------------------------|
 | MODEL_DIRECTORY | the directory where the models are stored |
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,17 @@
 services:
  ai-server:
    build:
      context: .
      dockerfile: ./Dockerfile
    runtime: nvidia
    volumes:
      - models:/models/
      - /root/.cache/huggingface:/root/.cache/huggingface
    environment:
      - MODEL_LIBRARY=/models/
      - NVIDIA_VISIBLE_DEVICES=all
    ports:
      - "8000:8000"
 volumes:
  models:
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,4 @@
 # web
 fastapi
 uvicorn
 pydantic
--- a/samples/models/dummy/config.json
+++ b/samples/models/dummy/config.json
@ -0,0 +1,3 @@
 {
  "type": "dummy"
 }
--- a/samples/models/python-bert-1/config.json
+++ b/samples/models/python-bert-1/config.json
@ -0,0 +1,11 @@
 {
  "type": "python",
  "file": "model.py",
  "requirements": [
    "transformers",
    "torch",
    "torchvision",
    "torchaudio"
  ]
 }
--- a/samples/models/python-bert-1/model.py
+++ b/samples/models/python-bert-1/model.py
@ -0,0 +1,28 @@
 import json
 import torch
 import transformers
 MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D"
 def load(model):
    model.model = transformers.AutoModel.from_pretrained(MODEL_NAME)
    model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
 def unload(model):
    model.model = None
    model.tokenizer = None
 def infer(model, payload: dict) -> str:
    inputs = model.tokenizer(payload["prompt"], return_tensors="pt")
    with torch.no_grad():
        outputs = model.model(**inputs)
    embeddings = outputs.last_hidden_state
    return json.dumps({
        "data": embeddings.tolist()
    })
--- a/samples/models/python-bert-2/config.json
+++ b/samples/models/python-bert-2/config.json
@ -0,0 +1,4 @@
 {
  "type": "python",
  "file": "model.py"
 }
--- a/samples/models/python-bert-2/model.py
+++ b/samples/models/python-bert-2/model.py
@ -0,0 +1,28 @@
 import json
 import torch
 import transformers
 MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D"
 def load(model):
    model.model = transformers.AutoModel.from_pretrained(MODEL_NAME)
    model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
 def unload(model):
    model.model = None
    model.tokenizer = None
 def infer(model, payload: dict) -> str:
    inputs = model.tokenizer(payload["prompt"], return_tensors="pt")
    with torch.no_grad():
        outputs = model.model(**inputs)
    embeddings = outputs.last_hidden_state
    return json.dumps({
        "data": embeddings.tolist()
    })
--- a/source/init.py
+++ b/source/init.py
@ -0,0 +1,3 @@
 from . import api
 from . import model
 from . import manager
--- a/source/main.py
+++ b/source/main.py
@ -0,0 +1,19 @@
 import os
 from source import manager, model, api
 # create a fastapi application
 application = api.Application()
 # create the model controller
 model_controller = manager.ModelManager(os.environ["MODEL_LIBRARY"])
 model_controller.register_model_type("dummy", model.DummyModel)
 model_controller.register_model_type("python", model.PythonModel)
 model_controller.reload()
 api.route.models.load(application, model_controller)
 # serve the application
 application.serve("0.0.0.0", 8000)
--- a/source/api/Application.py
+++ b/source/api/Application.py
@ -0,0 +1,15 @@
 import fastapi
 import uvicorn
 from source import meta
 class Application(fastapi.FastAPI):
    def __init__(self):
        super().__init__(
            title=meta.name,
            description=meta.description
        )
    def serve(self, host: str = "0.0.0.0", port: int = 8080):
        uvicorn.run(self, host=host, port=port)
--- a/source/api/init.py
+++ b/source/api/init.py
@ -0,0 +1,3 @@
 from . import route
 from .Application import Application
--- a/source/api/route/init.py
+++ b/source/api/route/init.py
@ -0,0 +1 @@
 from . import models
--- a/source/api/route/models.py
+++ b/source/api/route/models.py
@ -0,0 +1,74 @@
 import sys
 import traceback
 import fastapi
 import pydantic
 from source.api import Application
 from source import manager
 class InferenceRequest(pydantic.BaseModel):
    """
    Represent a request made when inferring a model
    """
    request: dict
 def load(application: Application, model_manager: manager.ModelManager):
    @application.get("/models")
    async def get_models() -> list[str]:
        """
        Get the list of models available
        :return: the list of models available
        """
        # reload the model list
        model_manager.reload()
        # list the models found
        return list(model_manager.models.keys())
    @application.get("/models/{model_name}")
    async def get_model(model_name: str) -> dict:
        """
        Get information about a specific model
        :param model_name: the name of the model
        :return: the information about the corresponding model
        """
        # get the corresponding model
        model = model_manager.models.get(model_name)
        if model is None:
            raise fastapi.HTTPException(status_code=404, detail="Model not found")
        # return the model information
        return model.get_information()
    @application.post("/models/{model_name}/infer")
    async def infer_model(model_name: str, request: InferenceRequest) -> fastapi.Response:
        """
        Run an inference through the selected model
        :param model_name: the name of the model
        :param request: the data to infer to the model
        :return: the model response
        """
        # get the corresponding model
        model = model_manager.models.get(model_name)
        if model is None:
            raise fastapi.HTTPException(status_code=404, detail="Model not found")
        # infer the data through the model
        try:
            response = model.infer(request.data)
        except Exception:
            print(traceback.format_exc(), file=sys.stderr)
            raise fastapi.HTTPException(status_code=500, detail="An error occurred while inferring the model.")
        # pack the model response into a fastapi response
        return fastapi.Response(
            content=response,
            media_type=model.response_mimetype,
        )
--- a/source/manager/ModelManager.py
+++ b/source/manager/ModelManager.py
@ -0,0 +1,52 @@
 import json
 import os
 import typing
 import warnings
 from pathlib import Path
 from source import model
 class ModelManager:
    def __init__(self, model_library: os.PathLike | str):
        self.model_library: Path = Path(model_library)
        # the model types
        self.model_types: dict[str, typing.Type[model.base.BaseModel]] = {}
        # the models
        self.models: dict[str, model.base.BaseModel] = {}
        # the currently loaded model
        # TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue
        self.current_loaded_model: typing.Optional[model.base.BaseModel] = None
    def register_model_type(self, name: str, model_type: typing.Type[model.base.BaseModel]):
        self.model_types[name] = model_type
    def reload(self):
        for model_path in self.model_library.iterdir():
            model_name: str = model_path.name
            model_configuration_path: Path = model_path / "config.json"
            # check if the configuration file exists
            if not model_configuration_path.exists():
                warnings.warn(f"Model {model_name!r} is missing a config.json file.")
                continue
            # load the configuration file
            model_configuration = json.loads(model_configuration_path.read_text())
            # get the model type for this model
            model_type_name: str = model_configuration.get("type")
            if model_type_name not in self.model_types:
                warnings.warn("Field 'type' missing from the model configuration file.")
                continue
            # get the class of this model type
            model_type = self.model_types.get(model_type_name)
            if model_type is None:
                warnings.warn(f"Model type {model_type_name!r} does not exists. Has it been registered ?")
                continue
            # load the model
            self.models[model_name] = model_type(self, model_configuration, model_path)
--- a/source/manager/init.py
+++ b/source/manager/init.py
@ -0,0 +1 @@
 from .ModelManager import ModelManager
--- a/source/meta.py
+++ b/source/meta.py
@ -0,0 +1,2 @@
 name: str = "AI-Server"
 description: str = "Serve models through an API"
--- a/source/model/DummyModel.py
+++ b/source/model/DummyModel.py
@ -0,0 +1,19 @@
 import json
 from source.model import base
 class DummyModel(base.BaseModel):
    """
    A dummy model, mainly used to test the API and the manager.
    simply send back the request made to it.
    """
    def _load(self) -> None:
        pass
    def _unload(self) -> None:
        pass
    def _infer(self, payload: dict) -> str | bytes:
        return json.dumps(payload)
--- a/source/model/PythonModel.py
+++ b/source/model/PythonModel.py
@ -0,0 +1,46 @@
 import importlib.util
 import subprocess
 import sys
 import uuid
 from pathlib import Path
 from source.manager import ModelManager
 from source.model import base
 class PythonModel(base.BaseModel):
    """
    A model running a custom python model.
    """
    def __init__(self, manager: ModelManager, configuration: dict, path: Path):
        super().__init__(manager, configuration, path)
        # get the name of the file containing the model code
        file = configuration.get("file")
        if file is None:
            raise ValueError("Field 'file' is missing from the configuration")
        # install custom requirements
        requirements = configuration.get("requirements", [])
        if len(requirements) > 0:
            subprocess.run([sys.executable, "-m", "pip", "install", *requirements])
        # create the module specification
        module_spec = importlib.util.spec_from_file_location(
            f"model-{uuid.uuid4()}",
            self.path / file
        )
        # get the module
        self.module = importlib.util.module_from_spec(module_spec)
        # load the module
        module_spec.loader.exec_module(self.module)
    def _load(self) -> None:
        return self.module.load(self)
    def _unload(self) -> None:
        return self.module.unload(self)
    def _infer(self, payload: dict) -> str | bytes:
        return self.module.infer(self, payload)
--- a/source/model/init.py
+++ b/source/model/init.py
@ -0,0 +1,4 @@
 from . import base
 from .DummyModel import DummyModel
 from .PythonModel import PythonModel
--- a/source/model/base/BaseModel.py
+++ b/source/model/base/BaseModel.py
@ -0,0 +1,123 @@
 import abc
 import gc
 from pathlib import Path
 from source.manager import ModelManager
 class BaseModel(abc.ABC):
    """
    Represent a model.
    """
    def __init__(self, manager: ModelManager, configuration: dict, path: Path):
        # the environment directory of the model
        self.path = path
        # the model manager
        self.manager = manager
        # the mimetype of the model responses
        self.response_mimetype: str = configuration.get("response_type", "application/json")
        self._loaded = False
    def __repr__(self):
        return f"<{self.__class__.__name__}: {self.name}>"
    @property
    def name(self):
        """
        Get the name of the model
        :return: the name of the model
        """
        return self.path.name
    def get_information(self):
        """
        Get information about the model
        :return: information about the model
        """
        return {
            "name": self.name,
            "response_mimetype": self.response_mimetype,
        }
    def load(self) -> None:
        """
        Load the model within the model manager
        """
        # if we are already loaded, stop
        if self._loaded:
            return
        # check if we are the current loaded model
        if self.manager.current_loaded_model is not self:
            # unload the previous model
            if self.manager.current_loaded_model is not None:
                self.manager.current_loaded_model.unload()
        # model specific loading
        self._load()
        # declare ourselves as the currently loaded model
        self.manager.current_loaded_model = self
    @abc.abstractmethod
    def _load(self):
        """
        Load the model
        Do not call manually, use `load` instead.
        """
    def unload(self) -> None:
        """
        Unload the model within the model manager
        """
        # if we are not already loaded, stop
        if not self._loaded:
            return
        # if we were the currently loaded model of the manager, demote ourselves
        if self.manager.current_loaded_model is self:
            self.manager.current_loaded_model = None
        # model specific unloading part
        self._unload()
        # force the garbage collector to clean the memory
        gc.collect()
        # mark the model as unloaded
        self._loaded = False
    @abc.abstractmethod
    def _unload(self):
        """
        Unload the model
        Do not call manually, use `unload` instead.
        :return:
        """
    def infer(self, payload: dict) -> str | bytes:
        """
        Infer our payload through the model within the model manager
        :param payload: the payload to give to the model
        :return: the response of the model
        """
        # make sure we are loaded before an inference
        self.load()
        # model specific inference part
        return self._infer(payload)
    @abc.abstractmethod
    def _infer(self, payload: dict) -> str | bytes:
        """
        Infer our payload through the model
        :param payload: the payload to give to the model
        :return: the response of the model
        """
--- a/source/model/base/init.py
+++ b/source/model/base/init.py
@ -0,0 +1 @@
 from .BaseModel import BaseModel
		`@ -0,0 +1,3 @@`
							`from . import route`

							`from .Application import Application`
		`@ -0,0 +1,2 @@`
							`name: str = "AI-Server"`
							`description: str = "Serve models through an API"`