base for the application : support custom python application and auto-install dependencies

This commit is contained in:
faraphel 2025-01-08 19:00:47 +01:00
parent a20371e1ab
commit e2ebbf8a82
25 changed files with 494 additions and 2 deletions

5
.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
# Developpment
.idea/
# Environment
venv/

17
Dockerfile Normal file
View file

@ -0,0 +1,17 @@
FROM python:3.12
# copy the application
WORKDIR /app
COPY ./ ./
# install the dependencies
RUN pip3 install -r ./requirements.txt
# expose the API port
EXPOSE 8000
# environment variables
ENV MODEL_DIRECTORY=/models/
# run the server
CMD ["python3", "-m", "source"]

View file

View file

@ -1,3 +1,15 @@
# ai-server
# AI-Server
A server that can serve AI models with an API and an authentication system
A server that can serve AI models with an API and an authentication system
# Usage
## Docker
# Environment Variables
| Name | Description |
|-----------------|-------------------------------------------|
| MODEL_DIRECTORY | the directory where the models are stored |

17
docker-compose.yml Normal file
View file

@ -0,0 +1,17 @@
services:
ai-server:
build:
context: .
dockerfile: ./Dockerfile
runtime: nvidia
volumes:
- models:/models/
- /root/.cache/huggingface:/root/.cache/huggingface
environment:
- MODEL_LIBRARY=/models/
- NVIDIA_VISIBLE_DEVICES=all
ports:
- "8000:8000"
volumes:
models:

4
requirements.txt Normal file
View file

@ -0,0 +1,4 @@
# web
fastapi
uvicorn
pydantic

View file

@ -0,0 +1,3 @@
{
"type": "dummy"
}

View file

@ -0,0 +1,11 @@
{
"type": "python",
"file": "model.py",
"requirements": [
"transformers",
"torch",
"torchvision",
"torchaudio"
]
}

View file

@ -0,0 +1,28 @@
import json
import torch
import transformers
MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D"
def load(model):
model.model = transformers.AutoModel.from_pretrained(MODEL_NAME)
model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
def unload(model):
model.model = None
model.tokenizer = None
def infer(model, payload: dict) -> str:
inputs = model.tokenizer(payload["prompt"], return_tensors="pt")
with torch.no_grad():
outputs = model.model(**inputs)
embeddings = outputs.last_hidden_state
return json.dumps({
"data": embeddings.tolist()
})

View file

@ -0,0 +1,4 @@
{
"type": "python",
"file": "model.py"
}

View file

@ -0,0 +1,28 @@
import json
import torch
import transformers
MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D"
def load(model):
model.model = transformers.AutoModel.from_pretrained(MODEL_NAME)
model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
def unload(model):
model.model = None
model.tokenizer = None
def infer(model, payload: dict) -> str:
inputs = model.tokenizer(payload["prompt"], return_tensors="pt")
with torch.no_grad():
outputs = model.model(**inputs)
embeddings = outputs.last_hidden_state
return json.dumps({
"data": embeddings.tolist()
})

3
source/__init__.py Normal file
View file

@ -0,0 +1,3 @@
from . import api
from . import model
from . import manager

19
source/__main__.py Normal file
View file

@ -0,0 +1,19 @@
import os
from source import manager, model, api
# create a fastapi application
application = api.Application()
# create the model controller
model_controller = manager.ModelManager(os.environ["MODEL_LIBRARY"])
model_controller.register_model_type("dummy", model.DummyModel)
model_controller.register_model_type("python", model.PythonModel)
model_controller.reload()
api.route.models.load(application, model_controller)
# serve the application
application.serve("0.0.0.0", 8000)

15
source/api/Application.py Normal file
View file

@ -0,0 +1,15 @@
import fastapi
import uvicorn
from source import meta
class Application(fastapi.FastAPI):
def __init__(self):
super().__init__(
title=meta.name,
description=meta.description
)
def serve(self, host: str = "0.0.0.0", port: int = 8080):
uvicorn.run(self, host=host, port=port)

3
source/api/__init__.py Normal file
View file

@ -0,0 +1,3 @@
from . import route
from .Application import Application

View file

@ -0,0 +1 @@
from . import models

View file

@ -0,0 +1,74 @@
import sys
import traceback
import fastapi
import pydantic
from source.api import Application
from source import manager
class InferenceRequest(pydantic.BaseModel):
"""
Represent a request made when inferring a model
"""
request: dict
def load(application: Application, model_manager: manager.ModelManager):
@application.get("/models")
async def get_models() -> list[str]:
"""
Get the list of models available
:return: the list of models available
"""
# reload the model list
model_manager.reload()
# list the models found
return list(model_manager.models.keys())
@application.get("/models/{model_name}")
async def get_model(model_name: str) -> dict:
"""
Get information about a specific model
:param model_name: the name of the model
:return: the information about the corresponding model
"""
# get the corresponding model
model = model_manager.models.get(model_name)
if model is None:
raise fastapi.HTTPException(status_code=404, detail="Model not found")
# return the model information
return model.get_information()
@application.post("/models/{model_name}/infer")
async def infer_model(model_name: str, request: InferenceRequest) -> fastapi.Response:
"""
Run an inference through the selected model
:param model_name: the name of the model
:param request: the data to infer to the model
:return: the model response
"""
# get the corresponding model
model = model_manager.models.get(model_name)
if model is None:
raise fastapi.HTTPException(status_code=404, detail="Model not found")
# infer the data through the model
try:
response = model.infer(request.data)
except Exception:
print(traceback.format_exc(), file=sys.stderr)
raise fastapi.HTTPException(status_code=500, detail="An error occurred while inferring the model.")
# pack the model response into a fastapi response
return fastapi.Response(
content=response,
media_type=model.response_mimetype,
)

View file

@ -0,0 +1,52 @@
import json
import os
import typing
import warnings
from pathlib import Path
from source import model
class ModelManager:
def __init__(self, model_library: os.PathLike | str):
self.model_library: Path = Path(model_library)
# the model types
self.model_types: dict[str, typing.Type[model.base.BaseModel]] = {}
# the models
self.models: dict[str, model.base.BaseModel] = {}
# the currently loaded model
# TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue
self.current_loaded_model: typing.Optional[model.base.BaseModel] = None
def register_model_type(self, name: str, model_type: typing.Type[model.base.BaseModel]):
self.model_types[name] = model_type
def reload(self):
for model_path in self.model_library.iterdir():
model_name: str = model_path.name
model_configuration_path: Path = model_path / "config.json"
# check if the configuration file exists
if not model_configuration_path.exists():
warnings.warn(f"Model {model_name!r} is missing a config.json file.")
continue
# load the configuration file
model_configuration = json.loads(model_configuration_path.read_text())
# get the model type for this model
model_type_name: str = model_configuration.get("type")
if model_type_name not in self.model_types:
warnings.warn("Field 'type' missing from the model configuration file.")
continue
# get the class of this model type
model_type = self.model_types.get(model_type_name)
if model_type is None:
warnings.warn(f"Model type {model_type_name!r} does not exists. Has it been registered ?")
continue
# load the model
self.models[model_name] = model_type(self, model_configuration, model_path)

View file

@ -0,0 +1 @@
from .ModelManager import ModelManager

2
source/meta.py Normal file
View file

@ -0,0 +1,2 @@
name: str = "AI-Server"
description: str = "Serve models through an API"

View file

@ -0,0 +1,19 @@
import json
from source.model import base
class DummyModel(base.BaseModel):
"""
A dummy model, mainly used to test the API and the manager.
simply send back the request made to it.
"""
def _load(self) -> None:
pass
def _unload(self) -> None:
pass
def _infer(self, payload: dict) -> str | bytes:
return json.dumps(payload)

View file

@ -0,0 +1,46 @@
import importlib.util
import subprocess
import sys
import uuid
from pathlib import Path
from source.manager import ModelManager
from source.model import base
class PythonModel(base.BaseModel):
"""
A model running a custom python model.
"""
def __init__(self, manager: ModelManager, configuration: dict, path: Path):
super().__init__(manager, configuration, path)
# get the name of the file containing the model code
file = configuration.get("file")
if file is None:
raise ValueError("Field 'file' is missing from the configuration")
# install custom requirements
requirements = configuration.get("requirements", [])
if len(requirements) > 0:
subprocess.run([sys.executable, "-m", "pip", "install", *requirements])
# create the module specification
module_spec = importlib.util.spec_from_file_location(
f"model-{uuid.uuid4()}",
self.path / file
)
# get the module
self.module = importlib.util.module_from_spec(module_spec)
# load the module
module_spec.loader.exec_module(self.module)
def _load(self) -> None:
return self.module.load(self)
def _unload(self) -> None:
return self.module.unload(self)
def _infer(self, payload: dict) -> str | bytes:
return self.module.infer(self, payload)

4
source/model/__init__.py Normal file
View file

@ -0,0 +1,4 @@
from . import base
from .DummyModel import DummyModel
from .PythonModel import PythonModel

View file

@ -0,0 +1,123 @@
import abc
import gc
from pathlib import Path
from source.manager import ModelManager
class BaseModel(abc.ABC):
"""
Represent a model.
"""
def __init__(self, manager: ModelManager, configuration: dict, path: Path):
# the environment directory of the model
self.path = path
# the model manager
self.manager = manager
# the mimetype of the model responses
self.response_mimetype: str = configuration.get("response_type", "application/json")
self._loaded = False
def __repr__(self):
return f"<{self.__class__.__name__}: {self.name}>"
@property
def name(self):
"""
Get the name of the model
:return: the name of the model
"""
return self.path.name
def get_information(self):
"""
Get information about the model
:return: information about the model
"""
return {
"name": self.name,
"response_mimetype": self.response_mimetype,
}
def load(self) -> None:
"""
Load the model within the model manager
"""
# if we are already loaded, stop
if self._loaded:
return
# check if we are the current loaded model
if self.manager.current_loaded_model is not self:
# unload the previous model
if self.manager.current_loaded_model is not None:
self.manager.current_loaded_model.unload()
# model specific loading
self._load()
# declare ourselves as the currently loaded model
self.manager.current_loaded_model = self
@abc.abstractmethod
def _load(self):
"""
Load the model
Do not call manually, use `load` instead.
"""
def unload(self) -> None:
"""
Unload the model within the model manager
"""
# if we are not already loaded, stop
if not self._loaded:
return
# if we were the currently loaded model of the manager, demote ourselves
if self.manager.current_loaded_model is self:
self.manager.current_loaded_model = None
# model specific unloading part
self._unload()
# force the garbage collector to clean the memory
gc.collect()
# mark the model as unloaded
self._loaded = False
@abc.abstractmethod
def _unload(self):
"""
Unload the model
Do not call manually, use `unload` instead.
:return:
"""
def infer(self, payload: dict) -> str | bytes:
"""
Infer our payload through the model within the model manager
:param payload: the payload to give to the model
:return: the response of the model
"""
# make sure we are loaded before an inference
self.load()
# model specific inference part
return self._infer(payload)
@abc.abstractmethod
def _infer(self, payload: dict) -> str | bytes:
"""
Infer our payload through the model
:param payload: the payload to give to the model
:return: the response of the model
"""

View file

@ -0,0 +1 @@
from .BaseModel import BaseModel