From 7bd84c8570a8e686a3e830aeeeaa0bd77001f384 Mon Sep 17 00:00:00 2001 From: faraphel Date: Thu, 9 Jan 2025 23:12:54 +0100 Subject: [PATCH] added support of inputs parameters that are recognised by the API. Models are now loaded in separate endpoints for the inputs to be easier to recognise --- requirements.txt | 1 + samples/models/dummy/config.json | 5 +- samples/models/dummy/model.py | 12 ++++ samples/models/python-bert-1/config.json | 4 ++ samples/models/python-bert-1/model.py | 13 +++-- samples/models/python-bert-2/model.py | 13 +++-- source/__main__.py | 6 +- source/api/__init__.py | 2 - source/api/route/__init__.py | 1 - source/api/route/models.py | 74 ------------------------ source/manager/ModelManager.py | 42 +++++++++++++- source/model/DummyModel.py | 19 ------ source/model/PythonModel.py | 28 ++++++++- source/model/__init__.py | 1 - source/model/base/BaseModel.py | 15 +++-- source/utils/__init__.py | 1 + source/utils/parameters.py | 54 +++++++++++++++++ 17 files changed, 163 insertions(+), 128 deletions(-) create mode 100644 samples/models/dummy/model.py delete mode 100644 source/api/route/__init__.py delete mode 100644 source/api/route/models.py delete mode 100644 source/model/DummyModel.py create mode 100644 source/utils/__init__.py create mode 100644 source/utils/parameters.py diff --git a/requirements.txt b/requirements.txt index 9d43188..26d6902 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ fastapi uvicorn pydantic +python-multipart # AI accelerate diff --git a/samples/models/dummy/config.json b/samples/models/dummy/config.json index 490264e..25be088 100644 --- a/samples/models/dummy/config.json +++ b/samples/models/dummy/config.json @@ -1,3 +1,6 @@ { - "type": "dummy" + "type": "python", + "file": "model.py", + + "inputs": {} } diff --git a/samples/models/dummy/model.py b/samples/models/dummy/model.py new file mode 100644 index 0000000..d22c850 --- /dev/null +++ b/samples/models/dummy/model.py @@ -0,0 +1,12 @@ +import json +import typing + + +def load(model) -> None: + pass + +def unload(model) -> None: + pass + +def infer(model) -> typing.Iterator[bytes]: + yield json.dumps({"hello": "world!"}).encode("utf-8") diff --git a/samples/models/python-bert-1/config.json b/samples/models/python-bert-1/config.json index 1d4687a..5177187 100644 --- a/samples/models/python-bert-1/config.json +++ b/samples/models/python-bert-1/config.json @@ -2,6 +2,10 @@ "type": "python", "file": "model.py", + "inputs": { + "prompt": {"type": "str"} + }, + "requirements": [ "transformers", "torch", diff --git a/samples/models/python-bert-1/model.py b/samples/models/python-bert-1/model.py index 0d992dc..4c013a6 100644 --- a/samples/models/python-bert-1/model.py +++ b/samples/models/python-bert-1/model.py @@ -1,4 +1,5 @@ import json +import typing import torch import transformers @@ -7,22 +8,22 @@ import transformers MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D" -def load(model): +def load(model) -> None: model.model = transformers.AutoModel.from_pretrained(MODEL_NAME) model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME) -def unload(model): +def unload(model) -> None: model.model = None model.tokenizer = None -def infer(model, payload: dict) -> str: - inputs = model.tokenizer(payload["prompt"], return_tensors="pt") +def infer(model, prompt: str) -> typing.Iterator[bytes]: + inputs = model.tokenizer(prompt, return_tensors="pt") with torch.no_grad(): outputs = model.model(**inputs) embeddings = outputs.last_hidden_state - return json.dumps({ + yield json.dumps({ "data": embeddings.tolist() - }) + }).encode("utf-8") diff --git a/samples/models/python-bert-2/model.py b/samples/models/python-bert-2/model.py index 0d992dc..4c013a6 100644 --- a/samples/models/python-bert-2/model.py +++ b/samples/models/python-bert-2/model.py @@ -1,4 +1,5 @@ import json +import typing import torch import transformers @@ -7,22 +8,22 @@ import transformers MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D" -def load(model): +def load(model) -> None: model.model = transformers.AutoModel.from_pretrained(MODEL_NAME) model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME) -def unload(model): +def unload(model) -> None: model.model = None model.tokenizer = None -def infer(model, payload: dict) -> str: - inputs = model.tokenizer(payload["prompt"], return_tensors="pt") +def infer(model, prompt: str) -> typing.Iterator[bytes]: + inputs = model.tokenizer(prompt, return_tensors="pt") with torch.no_grad(): outputs = model.model(**inputs) embeddings = outputs.last_hidden_state - return json.dumps({ + yield json.dumps({ "data": embeddings.tolist() - }) + }).encode("utf-8") diff --git a/source/__main__.py b/source/__main__.py index e9481db..42663f4 100644 --- a/source/__main__.py +++ b/source/__main__.py @@ -7,13 +7,9 @@ application = api.Application() # create the model controller -model_controller = manager.ModelManager(os.environ["MODEL_LIBRARY"]) -model_controller.register_model_type("dummy", model.DummyModel) +model_controller = manager.ModelManager(application, os.environ["MODEL_LIBRARY"]) model_controller.register_model_type("python", model.PythonModel) model_controller.reload() -api.route.models.load(application, model_controller) - - # serve the application application.serve("0.0.0.0", 8000) diff --git a/source/api/__init__.py b/source/api/__init__.py index a84f34b..9041d95 100644 --- a/source/api/__init__.py +++ b/source/api/__init__.py @@ -1,3 +1 @@ -from . import route - from .Application import Application diff --git a/source/api/route/__init__.py b/source/api/route/__init__.py deleted file mode 100644 index 0650744..0000000 --- a/source/api/route/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import models diff --git a/source/api/route/models.py b/source/api/route/models.py deleted file mode 100644 index 700bc2a..0000000 --- a/source/api/route/models.py +++ /dev/null @@ -1,74 +0,0 @@ -import sys -import traceback - -import fastapi -import pydantic - -from source.api import Application -from source import manager - - -class InferenceRequest(pydantic.BaseModel): - """ - Represent a request made when inferring a model - """ - - request: dict - - -def load(application: Application, model_manager: manager.ModelManager): - @application.get("/models") - async def get_models() -> list[str]: - """ - Get the list of models available - :return: the list of models available - """ - - # reload the model list - model_manager.reload() - # list the models found - return list(model_manager.models.keys()) - - @application.get("/models/{model_name}") - async def get_model(model_name: str) -> dict: - """ - Get information about a specific model - :param model_name: the name of the model - :return: the information about the corresponding model - """ - - # get the corresponding model - model = model_manager.models.get(model_name) - if model is None: - raise fastapi.HTTPException(status_code=404, detail="Model not found") - - # return the model information - return model.get_information() - - - @application.post("/models/{model_name}/infer") - async def infer_model(model_name: str, request: InferenceRequest) -> fastapi.Response: - """ - Run an inference through the selected model - :param model_name: the name of the model - :param request: the data to infer to the model - :return: the model response - """ - - # get the corresponding model - model = model_manager.models.get(model_name) - if model is None: - raise fastapi.HTTPException(status_code=404, detail="Model not found") - - # infer the data through the model - try: - response = model.infer(request.request) - except Exception: - print(traceback.format_exc(), file=sys.stderr) - raise fastapi.HTTPException(status_code=500, detail="An error occurred while inferring the model.") - - # pack the model response into a fastapi response - return fastapi.Response( - content=response, - media_type=model.response_mimetype, - ) diff --git a/source/manager/ModelManager.py b/source/manager/ModelManager.py index f501dfb..2c7106c 100644 --- a/source/manager/ModelManager.py +++ b/source/manager/ModelManager.py @@ -4,11 +4,14 @@ import typing import warnings from pathlib import Path -from source import model +import fastapi + +from source import model, api class ModelManager: - def __init__(self, model_library: os.PathLike | str): + def __init__(self, application: api.Application, model_library: os.PathLike | str): + self.application: api.Application = application self.model_library: Path = Path(model_library) # the model types @@ -20,10 +23,43 @@ class ModelManager: # TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue self.current_loaded_model: typing.Optional[model.base.BaseModel] = None - def register_model_type(self, name: str, model_type: typing.Type[model.base.BaseModel]): + @self.application.get("/models") + async def get_models() -> list[str]: + """ + Get the list of models available + :return: the list of models available + """ + + # list the models found + return list(self.models.keys()) + + @self.application.get("/models/{model_name}") + async def get_model(model_name: str) -> dict: + """ + Get information about a specific model + :param model_name: the name of the model + :return: the information about the corresponding model + """ + + # get the corresponding model + model = self.models.get(model_name) + if model is None: + raise fastapi.HTTPException(status_code=404, detail="Model not found") + + # return the model information + return model.get_information() + + + def register_model_type(self, name: str, model_type: "typing.Type[model.base.BaseModel]"): self.model_types[name] = model_type def reload(self): + # reset the model list + for model in self.models.values(): + model.unload() + self.models.clear() + + # load all the models in the library for model_path in self.model_library.iterdir(): model_name: str = model_path.name model_configuration_path: Path = model_path / "config.json" diff --git a/source/model/DummyModel.py b/source/model/DummyModel.py deleted file mode 100644 index e18aeb3..0000000 --- a/source/model/DummyModel.py +++ /dev/null @@ -1,19 +0,0 @@ -import json - -from source.model import base - - -class DummyModel(base.BaseModel): - """ - A dummy model, mainly used to test the API and the manager. - simply send back the request made to it. - """ - - def _load(self) -> None: - pass - - def _unload(self) -> None: - pass - - def _infer(self, payload: dict) -> str | bytes: - return json.dumps(payload) diff --git a/source/model/PythonModel.py b/source/model/PythonModel.py index d82dfe3..8a85d7f 100644 --- a/source/model/PythonModel.py +++ b/source/model/PythonModel.py @@ -1,9 +1,14 @@ import importlib.util import subprocess import sys +import typing import uuid +import inspect from pathlib import Path +import fastapi + +from source import utils from source.manager import ModelManager from source.model import base @@ -16,6 +21,8 @@ class PythonModel(base.BaseModel): def __init__(self, manager: ModelManager, configuration: dict, path: Path): super().__init__(manager, configuration, path) + ## Configuration + # get the name of the file containing the model code file = configuration.get("file") if file is None: @@ -36,11 +43,28 @@ class PythonModel(base.BaseModel): # load the module module_spec.loader.exec_module(self.module) + ## Api + + # load the inputs data into the inference function signature (used by FastAPI) + parameters = utils.parameters.load(configuration.get("inputs", {})) + + # create an endpoint wrapping the inference inside a fastapi call + async def infer_api(*args, **kwargs): + return fastapi.responses.StreamingResponse( + content=self.infer(*args, **kwargs), + media_type=self.output_type, + ) + + infer_api.__signature__ = inspect.Signature(parameters=parameters) + + # add the inference endpoint on the API + self.manager.application.add_api_route(f"/models/{self.name}/infer", infer_api, methods=["POST"]) + def _load(self) -> None: return self.module.load(self) def _unload(self) -> None: return self.module.unload(self) - def _infer(self, payload: dict) -> str | bytes: - return self.module.infer(self, payload) + def _infer(self, *args, **kwargs) -> typing.Iterator[bytes]: + return self.module.infer(self, *args, **kwargs) diff --git a/source/model/__init__.py b/source/model/__init__.py index e4bff02..31f365d 100644 --- a/source/model/__init__.py +++ b/source/model/__init__.py @@ -1,4 +1,3 @@ from . import base -from .DummyModel import DummyModel from .PythonModel import PythonModel diff --git a/source/model/base/BaseModel.py b/source/model/base/BaseModel.py index 6137483..ce763b0 100644 --- a/source/model/base/BaseModel.py +++ b/source/model/base/BaseModel.py @@ -1,7 +1,9 @@ import abc import gc +import typing from pathlib import Path +from source import api from source.manager import ModelManager @@ -10,13 +12,13 @@ class BaseModel(abc.ABC): Represent a model. """ - def __init__(self, manager: ModelManager, configuration: dict, path: Path): + def __init__(self, manager: ModelManager, configuration: dict[str, typing.Any], path: Path): # the environment directory of the model self.path = path # the model manager self.manager = manager # the mimetype of the model responses - self.response_mimetype: str = configuration.get("response_mimetype", "application/json") + self.output_type: str = configuration.get("output_type", "application/json") self._loaded = False @@ -101,13 +103,11 @@ class BaseModel(abc.ABC): """ Unload the model Do not call manually, use `unload` instead. - :return: """ - def infer(self, payload: dict) -> str | bytes: + def infer(self, *args, **kwargs) -> typing.Iterator[bytes]: """ Infer our payload through the model within the model manager - :param payload: the payload to give to the model :return: the response of the model """ @@ -115,12 +115,11 @@ class BaseModel(abc.ABC): self.load() # model specific inference part - return self._infer(payload) + return self._infer(*args, **kwargs) @abc.abstractmethod - def _infer(self, payload: dict) -> str | bytes: + def _infer(self, *args, **kwargs) -> typing.Iterator[bytes]: """ Infer our payload through the model - :param payload: the payload to give to the model :return: the response of the model """ diff --git a/source/utils/__init__.py b/source/utils/__init__.py new file mode 100644 index 0000000..f6bd50e --- /dev/null +++ b/source/utils/__init__.py @@ -0,0 +1 @@ +from . import parameters diff --git a/source/utils/parameters.py b/source/utils/parameters.py new file mode 100644 index 0000000..5304bb1 --- /dev/null +++ b/source/utils/parameters.py @@ -0,0 +1,54 @@ +import inspect +from datetime import datetime + +import fastapi + + +# the list of types and their name that can be used by the API +types: dict[str, type] = { + "bool": bool, + "int": int, + "float": float, + "str": str, + "bytes": bytes, + "list": list, + "tuple": tuple, + "set": set, + "dict": dict, + "datetime": datetime, + "file": fastapi.UploadFile, +} + + +def load(parameters_definition: dict[str, dict]) -> list[inspect.Parameter]: + """ + Load a list python function parameters from their definitions. + :param parameters_definition: the definitions of the parameters + :return: the python function parameters + + Examples: + >>> parameters_definition = { + ... "boolean": {"type": "bool", "default": False}, + ... "list": {"type": "list", "default": [1, 2, 3]}, + ... "datetime": {"type": "datetime"}, + ... "file": {"type": "file"}, + ... } + >>> parameters = load_parameters(parameters_definition) + """ + + parameters: list[inspect.Parameter] = [] + + for name, definition in parameters_definition.items(): + # deserialize the parameter + parameter = inspect.Parameter( + name, + inspect.Parameter.POSITIONAL_OR_KEYWORD, + default=definition.get("default", inspect.Parameter.empty), + annotation=types[definition["type"]], + ) + parameters.append(parameter) + + # sort the parameters so that non-default arguments always end up before default ones + parameters.sort(key=lambda parameter: parameter.default is inspect.Parameter.empty, reverse=True) + + return parameters