mirror of
https://git.isriupjv.fr/ISRI/ai-server
synced 2025-04-24 10:08:11 +02:00
base for the application : support custom python application and auto-install dependencies
This commit is contained in:
parent
a20371e1ab
commit
e2ebbf8a82
25 changed files with 494 additions and 2 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Developpment
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
venv/
|
17
Dockerfile
Normal file
17
Dockerfile
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
FROM python:3.12
|
||||||
|
|
||||||
|
# copy the application
|
||||||
|
WORKDIR /app
|
||||||
|
COPY ./ ./
|
||||||
|
|
||||||
|
# install the dependencies
|
||||||
|
RUN pip3 install -r ./requirements.txt
|
||||||
|
|
||||||
|
# expose the API port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# environment variables
|
||||||
|
ENV MODEL_DIRECTORY=/models/
|
||||||
|
|
||||||
|
# run the server
|
||||||
|
CMD ["python3", "-m", "source"]
|
14
README.md
14
README.md
|
@ -1,3 +1,15 @@
|
||||||
# ai-server
|
# AI-Server
|
||||||
|
|
||||||
A server that can serve AI models with an API and an authentication system
|
A server that can serve AI models with an API and an authentication system
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
|
||||||
|
## Docker
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Environment Variables
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
|-----------------|-------------------------------------------|
|
||||||
|
| MODEL_DIRECTORY | the directory where the models are stored |
|
||||||
|
|
17
docker-compose.yml
Normal file
17
docker-compose.yml
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
services:
|
||||||
|
ai-server:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: ./Dockerfile
|
||||||
|
runtime: nvidia
|
||||||
|
volumes:
|
||||||
|
- models:/models/
|
||||||
|
- /root/.cache/huggingface:/root/.cache/huggingface
|
||||||
|
environment:
|
||||||
|
- MODEL_LIBRARY=/models/
|
||||||
|
- NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
models:
|
4
requirements.txt
Normal file
4
requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
# web
|
||||||
|
fastapi
|
||||||
|
uvicorn
|
||||||
|
pydantic
|
3
samples/models/dummy/config.json
Normal file
3
samples/models/dummy/config.json
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
{
|
||||||
|
"type": "dummy"
|
||||||
|
}
|
11
samples/models/python-bert-1/config.json
Normal file
11
samples/models/python-bert-1/config.json
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
{
|
||||||
|
"type": "python",
|
||||||
|
"file": "model.py",
|
||||||
|
|
||||||
|
"requirements": [
|
||||||
|
"transformers",
|
||||||
|
"torch",
|
||||||
|
"torchvision",
|
||||||
|
"torchaudio"
|
||||||
|
]
|
||||||
|
}
|
28
samples/models/python-bert-1/model.py
Normal file
28
samples/models/python-bert-1/model.py
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import transformers
|
||||||
|
|
||||||
|
|
||||||
|
MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D"
|
||||||
|
|
||||||
|
|
||||||
|
def load(model):
|
||||||
|
model.model = transformers.AutoModel.from_pretrained(MODEL_NAME)
|
||||||
|
model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
|
||||||
|
|
||||||
|
def unload(model):
|
||||||
|
model.model = None
|
||||||
|
model.tokenizer = None
|
||||||
|
|
||||||
|
def infer(model, payload: dict) -> str:
|
||||||
|
inputs = model.tokenizer(payload["prompt"], return_tensors="pt")
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
outputs = model.model(**inputs)
|
||||||
|
|
||||||
|
embeddings = outputs.last_hidden_state
|
||||||
|
|
||||||
|
return json.dumps({
|
||||||
|
"data": embeddings.tolist()
|
||||||
|
})
|
4
samples/models/python-bert-2/config.json
Normal file
4
samples/models/python-bert-2/config.json
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{
|
||||||
|
"type": "python",
|
||||||
|
"file": "model.py"
|
||||||
|
}
|
28
samples/models/python-bert-2/model.py
Normal file
28
samples/models/python-bert-2/model.py
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import transformers
|
||||||
|
|
||||||
|
|
||||||
|
MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D"
|
||||||
|
|
||||||
|
|
||||||
|
def load(model):
|
||||||
|
model.model = transformers.AutoModel.from_pretrained(MODEL_NAME)
|
||||||
|
model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
|
||||||
|
|
||||||
|
def unload(model):
|
||||||
|
model.model = None
|
||||||
|
model.tokenizer = None
|
||||||
|
|
||||||
|
def infer(model, payload: dict) -> str:
|
||||||
|
inputs = model.tokenizer(payload["prompt"], return_tensors="pt")
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
outputs = model.model(**inputs)
|
||||||
|
|
||||||
|
embeddings = outputs.last_hidden_state
|
||||||
|
|
||||||
|
return json.dumps({
|
||||||
|
"data": embeddings.tolist()
|
||||||
|
})
|
3
source/__init__.py
Normal file
3
source/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
from . import api
|
||||||
|
from . import model
|
||||||
|
from . import manager
|
19
source/__main__.py
Normal file
19
source/__main__.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
from source import manager, model, api
|
||||||
|
|
||||||
|
# create a fastapi application
|
||||||
|
application = api.Application()
|
||||||
|
|
||||||
|
|
||||||
|
# create the model controller
|
||||||
|
model_controller = manager.ModelManager(os.environ["MODEL_LIBRARY"])
|
||||||
|
model_controller.register_model_type("dummy", model.DummyModel)
|
||||||
|
model_controller.register_model_type("python", model.PythonModel)
|
||||||
|
model_controller.reload()
|
||||||
|
|
||||||
|
api.route.models.load(application, model_controller)
|
||||||
|
|
||||||
|
|
||||||
|
# serve the application
|
||||||
|
application.serve("0.0.0.0", 8000)
|
15
source/api/Application.py
Normal file
15
source/api/Application.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
import fastapi
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
from source import meta
|
||||||
|
|
||||||
|
|
||||||
|
class Application(fastapi.FastAPI):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
title=meta.name,
|
||||||
|
description=meta.description
|
||||||
|
)
|
||||||
|
|
||||||
|
def serve(self, host: str = "0.0.0.0", port: int = 8080):
|
||||||
|
uvicorn.run(self, host=host, port=port)
|
3
source/api/__init__.py
Normal file
3
source/api/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
from . import route
|
||||||
|
|
||||||
|
from .Application import Application
|
1
source/api/route/__init__.py
Normal file
1
source/api/route/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
from . import models
|
74
source/api/route/models.py
Normal file
74
source/api/route/models.py
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
import fastapi
|
||||||
|
import pydantic
|
||||||
|
|
||||||
|
from source.api import Application
|
||||||
|
from source import manager
|
||||||
|
|
||||||
|
|
||||||
|
class InferenceRequest(pydantic.BaseModel):
|
||||||
|
"""
|
||||||
|
Represent a request made when inferring a model
|
||||||
|
"""
|
||||||
|
|
||||||
|
request: dict
|
||||||
|
|
||||||
|
|
||||||
|
def load(application: Application, model_manager: manager.ModelManager):
|
||||||
|
@application.get("/models")
|
||||||
|
async def get_models() -> list[str]:
|
||||||
|
"""
|
||||||
|
Get the list of models available
|
||||||
|
:return: the list of models available
|
||||||
|
"""
|
||||||
|
|
||||||
|
# reload the model list
|
||||||
|
model_manager.reload()
|
||||||
|
# list the models found
|
||||||
|
return list(model_manager.models.keys())
|
||||||
|
|
||||||
|
@application.get("/models/{model_name}")
|
||||||
|
async def get_model(model_name: str) -> dict:
|
||||||
|
"""
|
||||||
|
Get information about a specific model
|
||||||
|
:param model_name: the name of the model
|
||||||
|
:return: the information about the corresponding model
|
||||||
|
"""
|
||||||
|
|
||||||
|
# get the corresponding model
|
||||||
|
model = model_manager.models.get(model_name)
|
||||||
|
if model is None:
|
||||||
|
raise fastapi.HTTPException(status_code=404, detail="Model not found")
|
||||||
|
|
||||||
|
# return the model information
|
||||||
|
return model.get_information()
|
||||||
|
|
||||||
|
|
||||||
|
@application.post("/models/{model_name}/infer")
|
||||||
|
async def infer_model(model_name: str, request: InferenceRequest) -> fastapi.Response:
|
||||||
|
"""
|
||||||
|
Run an inference through the selected model
|
||||||
|
:param model_name: the name of the model
|
||||||
|
:param request: the data to infer to the model
|
||||||
|
:return: the model response
|
||||||
|
"""
|
||||||
|
|
||||||
|
# get the corresponding model
|
||||||
|
model = model_manager.models.get(model_name)
|
||||||
|
if model is None:
|
||||||
|
raise fastapi.HTTPException(status_code=404, detail="Model not found")
|
||||||
|
|
||||||
|
# infer the data through the model
|
||||||
|
try:
|
||||||
|
response = model.infer(request.data)
|
||||||
|
except Exception:
|
||||||
|
print(traceback.format_exc(), file=sys.stderr)
|
||||||
|
raise fastapi.HTTPException(status_code=500, detail="An error occurred while inferring the model.")
|
||||||
|
|
||||||
|
# pack the model response into a fastapi response
|
||||||
|
return fastapi.Response(
|
||||||
|
content=response,
|
||||||
|
media_type=model.response_mimetype,
|
||||||
|
)
|
52
source/manager/ModelManager.py
Normal file
52
source/manager/ModelManager.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import typing
|
||||||
|
import warnings
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from source import model
|
||||||
|
|
||||||
|
|
||||||
|
class ModelManager:
|
||||||
|
def __init__(self, model_library: os.PathLike | str):
|
||||||
|
self.model_library: Path = Path(model_library)
|
||||||
|
|
||||||
|
# the model types
|
||||||
|
self.model_types: dict[str, typing.Type[model.base.BaseModel]] = {}
|
||||||
|
# the models
|
||||||
|
self.models: dict[str, model.base.BaseModel] = {}
|
||||||
|
|
||||||
|
# the currently loaded model
|
||||||
|
# TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue
|
||||||
|
self.current_loaded_model: typing.Optional[model.base.BaseModel] = None
|
||||||
|
|
||||||
|
def register_model_type(self, name: str, model_type: typing.Type[model.base.BaseModel]):
|
||||||
|
self.model_types[name] = model_type
|
||||||
|
|
||||||
|
def reload(self):
|
||||||
|
for model_path in self.model_library.iterdir():
|
||||||
|
model_name: str = model_path.name
|
||||||
|
model_configuration_path: Path = model_path / "config.json"
|
||||||
|
|
||||||
|
# check if the configuration file exists
|
||||||
|
if not model_configuration_path.exists():
|
||||||
|
warnings.warn(f"Model {model_name!r} is missing a config.json file.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# load the configuration file
|
||||||
|
model_configuration = json.loads(model_configuration_path.read_text())
|
||||||
|
|
||||||
|
# get the model type for this model
|
||||||
|
model_type_name: str = model_configuration.get("type")
|
||||||
|
if model_type_name not in self.model_types:
|
||||||
|
warnings.warn("Field 'type' missing from the model configuration file.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# get the class of this model type
|
||||||
|
model_type = self.model_types.get(model_type_name)
|
||||||
|
if model_type is None:
|
||||||
|
warnings.warn(f"Model type {model_type_name!r} does not exists. Has it been registered ?")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# load the model
|
||||||
|
self.models[model_name] = model_type(self, model_configuration, model_path)
|
1
source/manager/__init__.py
Normal file
1
source/manager/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
from .ModelManager import ModelManager
|
2
source/meta.py
Normal file
2
source/meta.py
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
name: str = "AI-Server"
|
||||||
|
description: str = "Serve models through an API"
|
19
source/model/DummyModel.py
Normal file
19
source/model/DummyModel.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
from source.model import base
|
||||||
|
|
||||||
|
|
||||||
|
class DummyModel(base.BaseModel):
|
||||||
|
"""
|
||||||
|
A dummy model, mainly used to test the API and the manager.
|
||||||
|
simply send back the request made to it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _load(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _unload(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _infer(self, payload: dict) -> str | bytes:
|
||||||
|
return json.dumps(payload)
|
46
source/model/PythonModel.py
Normal file
46
source/model/PythonModel.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
import importlib.util
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from source.manager import ModelManager
|
||||||
|
from source.model import base
|
||||||
|
|
||||||
|
|
||||||
|
class PythonModel(base.BaseModel):
|
||||||
|
"""
|
||||||
|
A model running a custom python model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, manager: ModelManager, configuration: dict, path: Path):
|
||||||
|
super().__init__(manager, configuration, path)
|
||||||
|
|
||||||
|
# get the name of the file containing the model code
|
||||||
|
file = configuration.get("file")
|
||||||
|
if file is None:
|
||||||
|
raise ValueError("Field 'file' is missing from the configuration")
|
||||||
|
|
||||||
|
# install custom requirements
|
||||||
|
requirements = configuration.get("requirements", [])
|
||||||
|
if len(requirements) > 0:
|
||||||
|
subprocess.run([sys.executable, "-m", "pip", "install", *requirements])
|
||||||
|
|
||||||
|
# create the module specification
|
||||||
|
module_spec = importlib.util.spec_from_file_location(
|
||||||
|
f"model-{uuid.uuid4()}",
|
||||||
|
self.path / file
|
||||||
|
)
|
||||||
|
# get the module
|
||||||
|
self.module = importlib.util.module_from_spec(module_spec)
|
||||||
|
# load the module
|
||||||
|
module_spec.loader.exec_module(self.module)
|
||||||
|
|
||||||
|
def _load(self) -> None:
|
||||||
|
return self.module.load(self)
|
||||||
|
|
||||||
|
def _unload(self) -> None:
|
||||||
|
return self.module.unload(self)
|
||||||
|
|
||||||
|
def _infer(self, payload: dict) -> str | bytes:
|
||||||
|
return self.module.infer(self, payload)
|
4
source/model/__init__.py
Normal file
4
source/model/__init__.py
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
from . import base
|
||||||
|
|
||||||
|
from .DummyModel import DummyModel
|
||||||
|
from .PythonModel import PythonModel
|
123
source/model/base/BaseModel.py
Normal file
123
source/model/base/BaseModel.py
Normal file
|
@ -0,0 +1,123 @@
|
||||||
|
import abc
|
||||||
|
import gc
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from source.manager import ModelManager
|
||||||
|
|
||||||
|
|
||||||
|
class BaseModel(abc.ABC):
|
||||||
|
"""
|
||||||
|
Represent a model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, manager: ModelManager, configuration: dict, path: Path):
|
||||||
|
# the environment directory of the model
|
||||||
|
self.path = path
|
||||||
|
# the model manager
|
||||||
|
self.manager = manager
|
||||||
|
# the mimetype of the model responses
|
||||||
|
self.response_mimetype: str = configuration.get("response_type", "application/json")
|
||||||
|
|
||||||
|
self._loaded = False
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<{self.__class__.__name__}: {self.name}>"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
"""
|
||||||
|
Get the name of the model
|
||||||
|
:return: the name of the model
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.path.name
|
||||||
|
|
||||||
|
def get_information(self):
|
||||||
|
"""
|
||||||
|
Get information about the model
|
||||||
|
:return: information about the model
|
||||||
|
"""
|
||||||
|
|
||||||
|
return {
|
||||||
|
"name": self.name,
|
||||||
|
"response_mimetype": self.response_mimetype,
|
||||||
|
}
|
||||||
|
|
||||||
|
def load(self) -> None:
|
||||||
|
"""
|
||||||
|
Load the model within the model manager
|
||||||
|
"""
|
||||||
|
|
||||||
|
# if we are already loaded, stop
|
||||||
|
if self._loaded:
|
||||||
|
return
|
||||||
|
|
||||||
|
# check if we are the current loaded model
|
||||||
|
if self.manager.current_loaded_model is not self:
|
||||||
|
# unload the previous model
|
||||||
|
if self.manager.current_loaded_model is not None:
|
||||||
|
self.manager.current_loaded_model.unload()
|
||||||
|
|
||||||
|
# model specific loading
|
||||||
|
self._load()
|
||||||
|
|
||||||
|
# declare ourselves as the currently loaded model
|
||||||
|
self.manager.current_loaded_model = self
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def _load(self):
|
||||||
|
"""
|
||||||
|
Load the model
|
||||||
|
Do not call manually, use `load` instead.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def unload(self) -> None:
|
||||||
|
"""
|
||||||
|
Unload the model within the model manager
|
||||||
|
"""
|
||||||
|
|
||||||
|
# if we are not already loaded, stop
|
||||||
|
if not self._loaded:
|
||||||
|
return
|
||||||
|
|
||||||
|
# if we were the currently loaded model of the manager, demote ourselves
|
||||||
|
if self.manager.current_loaded_model is self:
|
||||||
|
self.manager.current_loaded_model = None
|
||||||
|
|
||||||
|
# model specific unloading part
|
||||||
|
self._unload()
|
||||||
|
|
||||||
|
# force the garbage collector to clean the memory
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
# mark the model as unloaded
|
||||||
|
self._loaded = False
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def _unload(self):
|
||||||
|
"""
|
||||||
|
Unload the model
|
||||||
|
Do not call manually, use `unload` instead.
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
def infer(self, payload: dict) -> str | bytes:
|
||||||
|
"""
|
||||||
|
Infer our payload through the model within the model manager
|
||||||
|
:param payload: the payload to give to the model
|
||||||
|
:return: the response of the model
|
||||||
|
"""
|
||||||
|
|
||||||
|
# make sure we are loaded before an inference
|
||||||
|
self.load()
|
||||||
|
|
||||||
|
# model specific inference part
|
||||||
|
return self._infer(payload)
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def _infer(self, payload: dict) -> str | bytes:
|
||||||
|
"""
|
||||||
|
Infer our payload through the model
|
||||||
|
:param payload: the payload to give to the model
|
||||||
|
:return: the response of the model
|
||||||
|
"""
|
1
source/model/base/__init__.py
Normal file
1
source/model/base/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
from .BaseModel import BaseModel
|
Loading…
Reference in a new issue