mirror of
https://git.isriupjv.fr/ISRI/ai-server
synced 2025-04-24 01:58:12 +02:00
base for the application : support custom python application and auto-install dependencies
This commit is contained in:
parent
a20371e1ab
commit
e2ebbf8a82
25 changed files with 494 additions and 2 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Developpment
|
||||
.idea/
|
||||
|
||||
# Environment
|
||||
venv/
|
17
Dockerfile
Normal file
17
Dockerfile
Normal file
|
@ -0,0 +1,17 @@
|
|||
FROM python:3.12
|
||||
|
||||
# copy the application
|
||||
WORKDIR /app
|
||||
COPY ./ ./
|
||||
|
||||
# install the dependencies
|
||||
RUN pip3 install -r ./requirements.txt
|
||||
|
||||
# expose the API port
|
||||
EXPOSE 8000
|
||||
|
||||
# environment variables
|
||||
ENV MODEL_DIRECTORY=/models/
|
||||
|
||||
# run the server
|
||||
CMD ["python3", "-m", "source"]
|
16
README.md
16
README.md
|
@ -1,3 +1,15 @@
|
|||
# ai-server
|
||||
# AI-Server
|
||||
|
||||
A server that can serve AI models with an API and an authentication system
|
||||
A server that can serve AI models with an API and an authentication system
|
||||
|
||||
# Usage
|
||||
|
||||
## Docker
|
||||
|
||||
|
||||
|
||||
# Environment Variables
|
||||
|
||||
| Name | Description |
|
||||
|-----------------|-------------------------------------------|
|
||||
| MODEL_DIRECTORY | the directory where the models are stored |
|
||||
|
|
17
docker-compose.yml
Normal file
17
docker-compose.yml
Normal file
|
@ -0,0 +1,17 @@
|
|||
services:
|
||||
ai-server:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./Dockerfile
|
||||
runtime: nvidia
|
||||
volumes:
|
||||
- models:/models/
|
||||
- /root/.cache/huggingface:/root/.cache/huggingface
|
||||
environment:
|
||||
- MODEL_LIBRARY=/models/
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
ports:
|
||||
- "8000:8000"
|
||||
|
||||
volumes:
|
||||
models:
|
4
requirements.txt
Normal file
4
requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
|||
# web
|
||||
fastapi
|
||||
uvicorn
|
||||
pydantic
|
3
samples/models/dummy/config.json
Normal file
3
samples/models/dummy/config.json
Normal file
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"type": "dummy"
|
||||
}
|
11
samples/models/python-bert-1/config.json
Normal file
11
samples/models/python-bert-1/config.json
Normal file
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"type": "python",
|
||||
"file": "model.py",
|
||||
|
||||
"requirements": [
|
||||
"transformers",
|
||||
"torch",
|
||||
"torchvision",
|
||||
"torchaudio"
|
||||
]
|
||||
}
|
28
samples/models/python-bert-1/model.py
Normal file
28
samples/models/python-bert-1/model.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
import json
|
||||
|
||||
import torch
|
||||
import transformers
|
||||
|
||||
|
||||
MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D"
|
||||
|
||||
|
||||
def load(model):
|
||||
model.model = transformers.AutoModel.from_pretrained(MODEL_NAME)
|
||||
model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
|
||||
|
||||
def unload(model):
|
||||
model.model = None
|
||||
model.tokenizer = None
|
||||
|
||||
def infer(model, payload: dict) -> str:
|
||||
inputs = model.tokenizer(payload["prompt"], return_tensors="pt")
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = model.model(**inputs)
|
||||
|
||||
embeddings = outputs.last_hidden_state
|
||||
|
||||
return json.dumps({
|
||||
"data": embeddings.tolist()
|
||||
})
|
4
samples/models/python-bert-2/config.json
Normal file
4
samples/models/python-bert-2/config.json
Normal file
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"type": "python",
|
||||
"file": "model.py"
|
||||
}
|
28
samples/models/python-bert-2/model.py
Normal file
28
samples/models/python-bert-2/model.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
import json
|
||||
|
||||
import torch
|
||||
import transformers
|
||||
|
||||
|
||||
MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D"
|
||||
|
||||
|
||||
def load(model):
|
||||
model.model = transformers.AutoModel.from_pretrained(MODEL_NAME)
|
||||
model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
|
||||
|
||||
def unload(model):
|
||||
model.model = None
|
||||
model.tokenizer = None
|
||||
|
||||
def infer(model, payload: dict) -> str:
|
||||
inputs = model.tokenizer(payload["prompt"], return_tensors="pt")
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = model.model(**inputs)
|
||||
|
||||
embeddings = outputs.last_hidden_state
|
||||
|
||||
return json.dumps({
|
||||
"data": embeddings.tolist()
|
||||
})
|
3
source/__init__.py
Normal file
3
source/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
from . import api
|
||||
from . import model
|
||||
from . import manager
|
19
source/__main__.py
Normal file
19
source/__main__.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
import os
|
||||
|
||||
from source import manager, model, api
|
||||
|
||||
# create a fastapi application
|
||||
application = api.Application()
|
||||
|
||||
|
||||
# create the model controller
|
||||
model_controller = manager.ModelManager(os.environ["MODEL_LIBRARY"])
|
||||
model_controller.register_model_type("dummy", model.DummyModel)
|
||||
model_controller.register_model_type("python", model.PythonModel)
|
||||
model_controller.reload()
|
||||
|
||||
api.route.models.load(application, model_controller)
|
||||
|
||||
|
||||
# serve the application
|
||||
application.serve("0.0.0.0", 8000)
|
15
source/api/Application.py
Normal file
15
source/api/Application.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
import fastapi
|
||||
import uvicorn
|
||||
|
||||
from source import meta
|
||||
|
||||
|
||||
class Application(fastapi.FastAPI):
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
title=meta.name,
|
||||
description=meta.description
|
||||
)
|
||||
|
||||
def serve(self, host: str = "0.0.0.0", port: int = 8080):
|
||||
uvicorn.run(self, host=host, port=port)
|
3
source/api/__init__.py
Normal file
3
source/api/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
from . import route
|
||||
|
||||
from .Application import Application
|
1
source/api/route/__init__.py
Normal file
1
source/api/route/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from . import models
|
74
source/api/route/models.py
Normal file
74
source/api/route/models.py
Normal file
|
@ -0,0 +1,74 @@
|
|||
import sys
|
||||
import traceback
|
||||
|
||||
import fastapi
|
||||
import pydantic
|
||||
|
||||
from source.api import Application
|
||||
from source import manager
|
||||
|
||||
|
||||
class InferenceRequest(pydantic.BaseModel):
|
||||
"""
|
||||
Represent a request made when inferring a model
|
||||
"""
|
||||
|
||||
request: dict
|
||||
|
||||
|
||||
def load(application: Application, model_manager: manager.ModelManager):
|
||||
@application.get("/models")
|
||||
async def get_models() -> list[str]:
|
||||
"""
|
||||
Get the list of models available
|
||||
:return: the list of models available
|
||||
"""
|
||||
|
||||
# reload the model list
|
||||
model_manager.reload()
|
||||
# list the models found
|
||||
return list(model_manager.models.keys())
|
||||
|
||||
@application.get("/models/{model_name}")
|
||||
async def get_model(model_name: str) -> dict:
|
||||
"""
|
||||
Get information about a specific model
|
||||
:param model_name: the name of the model
|
||||
:return: the information about the corresponding model
|
||||
"""
|
||||
|
||||
# get the corresponding model
|
||||
model = model_manager.models.get(model_name)
|
||||
if model is None:
|
||||
raise fastapi.HTTPException(status_code=404, detail="Model not found")
|
||||
|
||||
# return the model information
|
||||
return model.get_information()
|
||||
|
||||
|
||||
@application.post("/models/{model_name}/infer")
|
||||
async def infer_model(model_name: str, request: InferenceRequest) -> fastapi.Response:
|
||||
"""
|
||||
Run an inference through the selected model
|
||||
:param model_name: the name of the model
|
||||
:param request: the data to infer to the model
|
||||
:return: the model response
|
||||
"""
|
||||
|
||||
# get the corresponding model
|
||||
model = model_manager.models.get(model_name)
|
||||
if model is None:
|
||||
raise fastapi.HTTPException(status_code=404, detail="Model not found")
|
||||
|
||||
# infer the data through the model
|
||||
try:
|
||||
response = model.infer(request.data)
|
||||
except Exception:
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
raise fastapi.HTTPException(status_code=500, detail="An error occurred while inferring the model.")
|
||||
|
||||
# pack the model response into a fastapi response
|
||||
return fastapi.Response(
|
||||
content=response,
|
||||
media_type=model.response_mimetype,
|
||||
)
|
52
source/manager/ModelManager.py
Normal file
52
source/manager/ModelManager.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
import json
|
||||
import os
|
||||
import typing
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
from source import model
|
||||
|
||||
|
||||
class ModelManager:
|
||||
def __init__(self, model_library: os.PathLike | str):
|
||||
self.model_library: Path = Path(model_library)
|
||||
|
||||
# the model types
|
||||
self.model_types: dict[str, typing.Type[model.base.BaseModel]] = {}
|
||||
# the models
|
||||
self.models: dict[str, model.base.BaseModel] = {}
|
||||
|
||||
# the currently loaded model
|
||||
# TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue
|
||||
self.current_loaded_model: typing.Optional[model.base.BaseModel] = None
|
||||
|
||||
def register_model_type(self, name: str, model_type: typing.Type[model.base.BaseModel]):
|
||||
self.model_types[name] = model_type
|
||||
|
||||
def reload(self):
|
||||
for model_path in self.model_library.iterdir():
|
||||
model_name: str = model_path.name
|
||||
model_configuration_path: Path = model_path / "config.json"
|
||||
|
||||
# check if the configuration file exists
|
||||
if not model_configuration_path.exists():
|
||||
warnings.warn(f"Model {model_name!r} is missing a config.json file.")
|
||||
continue
|
||||
|
||||
# load the configuration file
|
||||
model_configuration = json.loads(model_configuration_path.read_text())
|
||||
|
||||
# get the model type for this model
|
||||
model_type_name: str = model_configuration.get("type")
|
||||
if model_type_name not in self.model_types:
|
||||
warnings.warn("Field 'type' missing from the model configuration file.")
|
||||
continue
|
||||
|
||||
# get the class of this model type
|
||||
model_type = self.model_types.get(model_type_name)
|
||||
if model_type is None:
|
||||
warnings.warn(f"Model type {model_type_name!r} does not exists. Has it been registered ?")
|
||||
continue
|
||||
|
||||
# load the model
|
||||
self.models[model_name] = model_type(self, model_configuration, model_path)
|
1
source/manager/__init__.py
Normal file
1
source/manager/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .ModelManager import ModelManager
|
2
source/meta.py
Normal file
2
source/meta.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
name: str = "AI-Server"
|
||||
description: str = "Serve models through an API"
|
19
source/model/DummyModel.py
Normal file
19
source/model/DummyModel.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
import json
|
||||
|
||||
from source.model import base
|
||||
|
||||
|
||||
class DummyModel(base.BaseModel):
|
||||
"""
|
||||
A dummy model, mainly used to test the API and the manager.
|
||||
simply send back the request made to it.
|
||||
"""
|
||||
|
||||
def _load(self) -> None:
|
||||
pass
|
||||
|
||||
def _unload(self) -> None:
|
||||
pass
|
||||
|
||||
def _infer(self, payload: dict) -> str | bytes:
|
||||
return json.dumps(payload)
|
46
source/model/PythonModel.py
Normal file
46
source/model/PythonModel.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
import importlib.util
|
||||
import subprocess
|
||||
import sys
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from source.manager import ModelManager
|
||||
from source.model import base
|
||||
|
||||
|
||||
class PythonModel(base.BaseModel):
|
||||
"""
|
||||
A model running a custom python model.
|
||||
"""
|
||||
|
||||
def __init__(self, manager: ModelManager, configuration: dict, path: Path):
|
||||
super().__init__(manager, configuration, path)
|
||||
|
||||
# get the name of the file containing the model code
|
||||
file = configuration.get("file")
|
||||
if file is None:
|
||||
raise ValueError("Field 'file' is missing from the configuration")
|
||||
|
||||
# install custom requirements
|
||||
requirements = configuration.get("requirements", [])
|
||||
if len(requirements) > 0:
|
||||
subprocess.run([sys.executable, "-m", "pip", "install", *requirements])
|
||||
|
||||
# create the module specification
|
||||
module_spec = importlib.util.spec_from_file_location(
|
||||
f"model-{uuid.uuid4()}",
|
||||
self.path / file
|
||||
)
|
||||
# get the module
|
||||
self.module = importlib.util.module_from_spec(module_spec)
|
||||
# load the module
|
||||
module_spec.loader.exec_module(self.module)
|
||||
|
||||
def _load(self) -> None:
|
||||
return self.module.load(self)
|
||||
|
||||
def _unload(self) -> None:
|
||||
return self.module.unload(self)
|
||||
|
||||
def _infer(self, payload: dict) -> str | bytes:
|
||||
return self.module.infer(self, payload)
|
4
source/model/__init__.py
Normal file
4
source/model/__init__.py
Normal file
|
@ -0,0 +1,4 @@
|
|||
from . import base
|
||||
|
||||
from .DummyModel import DummyModel
|
||||
from .PythonModel import PythonModel
|
123
source/model/base/BaseModel.py
Normal file
123
source/model/base/BaseModel.py
Normal file
|
@ -0,0 +1,123 @@
|
|||
import abc
|
||||
import gc
|
||||
from pathlib import Path
|
||||
|
||||
from source.manager import ModelManager
|
||||
|
||||
|
||||
class BaseModel(abc.ABC):
|
||||
"""
|
||||
Represent a model.
|
||||
"""
|
||||
|
||||
def __init__(self, manager: ModelManager, configuration: dict, path: Path):
|
||||
# the environment directory of the model
|
||||
self.path = path
|
||||
# the model manager
|
||||
self.manager = manager
|
||||
# the mimetype of the model responses
|
||||
self.response_mimetype: str = configuration.get("response_type", "application/json")
|
||||
|
||||
self._loaded = False
|
||||
|
||||
def __repr__(self):
|
||||
return f"<{self.__class__.__name__}: {self.name}>"
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""
|
||||
Get the name of the model
|
||||
:return: the name of the model
|
||||
"""
|
||||
|
||||
return self.path.name
|
||||
|
||||
def get_information(self):
|
||||
"""
|
||||
Get information about the model
|
||||
:return: information about the model
|
||||
"""
|
||||
|
||||
return {
|
||||
"name": self.name,
|
||||
"response_mimetype": self.response_mimetype,
|
||||
}
|
||||
|
||||
def load(self) -> None:
|
||||
"""
|
||||
Load the model within the model manager
|
||||
"""
|
||||
|
||||
# if we are already loaded, stop
|
||||
if self._loaded:
|
||||
return
|
||||
|
||||
# check if we are the current loaded model
|
||||
if self.manager.current_loaded_model is not self:
|
||||
# unload the previous model
|
||||
if self.manager.current_loaded_model is not None:
|
||||
self.manager.current_loaded_model.unload()
|
||||
|
||||
# model specific loading
|
||||
self._load()
|
||||
|
||||
# declare ourselves as the currently loaded model
|
||||
self.manager.current_loaded_model = self
|
||||
|
||||
@abc.abstractmethod
|
||||
def _load(self):
|
||||
"""
|
||||
Load the model
|
||||
Do not call manually, use `load` instead.
|
||||
"""
|
||||
|
||||
def unload(self) -> None:
|
||||
"""
|
||||
Unload the model within the model manager
|
||||
"""
|
||||
|
||||
# if we are not already loaded, stop
|
||||
if not self._loaded:
|
||||
return
|
||||
|
||||
# if we were the currently loaded model of the manager, demote ourselves
|
||||
if self.manager.current_loaded_model is self:
|
||||
self.manager.current_loaded_model = None
|
||||
|
||||
# model specific unloading part
|
||||
self._unload()
|
||||
|
||||
# force the garbage collector to clean the memory
|
||||
gc.collect()
|
||||
|
||||
# mark the model as unloaded
|
||||
self._loaded = False
|
||||
|
||||
@abc.abstractmethod
|
||||
def _unload(self):
|
||||
"""
|
||||
Unload the model
|
||||
Do not call manually, use `unload` instead.
|
||||
:return:
|
||||
"""
|
||||
|
||||
def infer(self, payload: dict) -> str | bytes:
|
||||
"""
|
||||
Infer our payload through the model within the model manager
|
||||
:param payload: the payload to give to the model
|
||||
:return: the response of the model
|
||||
"""
|
||||
|
||||
# make sure we are loaded before an inference
|
||||
self.load()
|
||||
|
||||
# model specific inference part
|
||||
return self._infer(payload)
|
||||
|
||||
@abc.abstractmethod
|
||||
def _infer(self, payload: dict) -> str | bytes:
|
||||
"""
|
||||
Infer our payload through the model
|
||||
:param payload: the payload to give to the model
|
||||
:return: the response of the model
|
||||
"""
|
1
source/model/base/__init__.py
Normal file
1
source/model/base/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .BaseModel import BaseModel
|
Loading…
Reference in a new issue