added support of inputs parameters that are recognised by the API.

Models are now loaded in separate endpoints for the inputs to be easier to recognise
2025-07-01 02:28:18 +02:00 · 2025-01-09 23:12:54 +01:00 · 2025-01-09 23:12:54 +01:00 · 7bd84c8570
commit 7bd84c8570
parent 900c58ffcb
17 changed files with 163 additions and 128 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -2,6 +2,7 @@
 fastapi
 uvicorn
 pydantic
+python-multipart

 # AI
 accelerate
--- a/samples/models/dummy/config.json
+++ b/samples/models/dummy/config.json
@ -1,3 +1,6 @@
 {
-  "type": "dummy"
+  "type": "python",
+  "file": "model.py",
+
+  "inputs": {}
 }
--- a/samples/models/dummy/model.py
+++ b/samples/models/dummy/model.py
@ -0,0 +1,12 @@
+import json
+import typing
+
+
+def load(model) -> None:
+    pass
+
+def unload(model) -> None:
+    pass
+
+def infer(model) -> typing.Iterator[bytes]:
+    yield json.dumps({"hello": "world!"}).encode("utf-8")
--- a/samples/models/python-bert-1/config.json
+++ b/samples/models/python-bert-1/config.json
@ -2,6 +2,10 @@
  "type": "python",
  "file": "model.py",

+  "inputs": {
+    "prompt": {"type": "str"}
+  },
+
  "requirements": [
    "transformers",
    "torch",
--- a/samples/models/python-bert-1/model.py
+++ b/samples/models/python-bert-1/model.py
@ -1,4 +1,5 @@
 import json
+import typing

 import torch
 import transformers
@ -7,22 +8,22 @@ import transformers
 MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D"


-def load(model):
+def load(model) -> None:
    model.model = transformers.AutoModel.from_pretrained(MODEL_NAME)
    model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)

-def unload(model):
+def unload(model) -> None:
    model.model = None
    model.tokenizer = None

-def infer(model, payload: dict) -> str:
-    inputs = model.tokenizer(payload["prompt"], return_tensors="pt")
+def infer(model, prompt: str) -> typing.Iterator[bytes]:
+    inputs = model.tokenizer(prompt, return_tensors="pt")

    with torch.no_grad():
        outputs = model.model(**inputs)

    embeddings = outputs.last_hidden_state

-    return json.dumps({
+    yield json.dumps({
        "data": embeddings.tolist()
-    })
+    }).encode("utf-8")
--- a/samples/models/python-bert-2/model.py
+++ b/samples/models/python-bert-2/model.py
@ -1,4 +1,5 @@
 import json
+import typing

 import torch
 import transformers
@ -7,22 +8,22 @@ import transformers
 MODEL_NAME: str = "huawei-noah/TinyBERT_General_4L_312D"


-def load(model):
+def load(model) -> None:
    model.model = transformers.AutoModel.from_pretrained(MODEL_NAME)
    model.tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)

-def unload(model):
+def unload(model) -> None:
    model.model = None
    model.tokenizer = None

-def infer(model, payload: dict) -> str:
-    inputs = model.tokenizer(payload["prompt"], return_tensors="pt")
+def infer(model, prompt: str) -> typing.Iterator[bytes]:
+    inputs = model.tokenizer(prompt, return_tensors="pt")

    with torch.no_grad():
        outputs = model.model(**inputs)

    embeddings = outputs.last_hidden_state

-    return json.dumps({
+    yield json.dumps({
        "data": embeddings.tolist()
-    })
+    }).encode("utf-8")
--- a/source/main.py
+++ b/source/main.py
@ -7,13 +7,9 @@ application = api.Application()


 # create the model controller
-model_controller = manager.ModelManager(os.environ["MODEL_LIBRARY"])
-model_controller.register_model_type("dummy", model.DummyModel)
+model_controller = manager.ModelManager(application, os.environ["MODEL_LIBRARY"])
 model_controller.register_model_type("python", model.PythonModel)
 model_controller.reload()

-api.route.models.load(application, model_controller)
-
-
 # serve the application
 application.serve("0.0.0.0", 8000)
--- a/source/api/init.py
+++ b/source/api/init.py
@ -1,3 +1 @@
-from . import route
-
 from .Application import Application
--- a/source/api/route/init.py
+++ b/source/api/route/init.py
@ -1 +0,0 @@
-from . import models
--- a/source/api/route/models.py
+++ b/source/api/route/models.py
@ -1,74 +0,0 @@
-import sys
-import traceback
-
-import fastapi
-import pydantic
-
-from source.api import Application
-from source import manager
-
-
-class InferenceRequest(pydantic.BaseModel):
-    """
-    Represent a request made when inferring a model
-    """
-
-    request: dict
-
-
-def load(application: Application, model_manager: manager.ModelManager):
-    @application.get("/models")
-    async def get_models() -> list[str]:
-        """
-        Get the list of models available
-        :return: the list of models available
-        """
-
-        # reload the model list
-        model_manager.reload()
-        # list the models found
-        return list(model_manager.models.keys())
-
-    @application.get("/models/{model_name}")
-    async def get_model(model_name: str) -> dict:
-        """
-        Get information about a specific model
-        :param model_name: the name of the model
-        :return: the information about the corresponding model
-        """
-
-        # get the corresponding model
-        model = model_manager.models.get(model_name)
-        if model is None:
-            raise fastapi.HTTPException(status_code=404, detail="Model not found")
-
-        # return the model information
-        return model.get_information()
-
-
-    @application.post("/models/{model_name}/infer")
-    async def infer_model(model_name: str, request: InferenceRequest) -> fastapi.Response:
-        """
-        Run an inference through the selected model
-        :param model_name: the name of the model
-        :param request: the data to infer to the model
-        :return: the model response
-        """
-
-        # get the corresponding model
-        model = model_manager.models.get(model_name)
-        if model is None:
-            raise fastapi.HTTPException(status_code=404, detail="Model not found")
-
-        # infer the data through the model
-        try:
-            response = model.infer(request.request)
-        except Exception:
-            print(traceback.format_exc(), file=sys.stderr)
-            raise fastapi.HTTPException(status_code=500, detail="An error occurred while inferring the model.")
-
-        # pack the model response into a fastapi response
-        return fastapi.Response(
-            content=response,
-            media_type=model.response_mimetype,
-        )
--- a/source/manager/ModelManager.py
+++ b/source/manager/ModelManager.py
@ -4,11 +4,14 @@ import typing
 import warnings
 from pathlib import Path

-from source import model
+import fastapi
+
+from source import model, api


 class ModelManager:
-    def __init__(self, model_library: os.PathLike | str):
+    def __init__(self, application: api.Application, model_library: os.PathLike | str):
+        self.application: api.Application = application
        self.model_library: Path = Path(model_library)

        # the model types
@ -20,10 +23,43 @@ class ModelManager:
        # TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue
        self.current_loaded_model: typing.Optional[model.base.BaseModel] = None

-    def register_model_type(self, name: str, model_type: typing.Type[model.base.BaseModel]):
+        @self.application.get("/models")
+        async def get_models() -> list[str]:
+            """
+            Get the list of models available
+            :return: the list of models available
+            """
+
+            # list the models found
+            return list(self.models.keys())
+
+        @self.application.get("/models/{model_name}")
+        async def get_model(model_name: str) -> dict:
+            """
+            Get information about a specific model
+            :param model_name: the name of the model
+            :return: the information about the corresponding model
+            """
+
+            # get the corresponding model
+            model = self.models.get(model_name)
+            if model is None:
+                raise fastapi.HTTPException(status_code=404, detail="Model not found")
+
+            # return the model information
+            return model.get_information()
+
+
+    def register_model_type(self, name: str, model_type: "typing.Type[model.base.BaseModel]"):
        self.model_types[name] = model_type

    def reload(self):
+        # reset the model list
+        for model in self.models.values():
+            model.unload()
+        self.models.clear()
+
+        # load all the models in the library
        for model_path in self.model_library.iterdir():
            model_name: str = model_path.name
            model_configuration_path: Path = model_path / "config.json"
--- a/source/model/DummyModel.py
+++ b/source/model/DummyModel.py
@ -1,19 +0,0 @@
-import json
-
-from source.model import base
-
-
-class DummyModel(base.BaseModel):
-    """
-    A dummy model, mainly used to test the API and the manager.
-    simply send back the request made to it.
-    """
-
-    def _load(self) -> None:
-        pass
-
-    def _unload(self) -> None:
-        pass
-
-    def _infer(self, payload: dict) -> str | bytes:
-        return json.dumps(payload)
--- a/source/model/PythonModel.py
+++ b/source/model/PythonModel.py
@ -1,9 +1,14 @@
 import importlib.util
 import subprocess
 import sys
+import typing
 import uuid
+import inspect
 from pathlib import Path

+import fastapi
+
+from source import utils
 from source.manager import ModelManager
 from source.model import base

@ -16,6 +21,8 @@ class PythonModel(base.BaseModel):
    def __init__(self, manager: ModelManager, configuration: dict, path: Path):
        super().__init__(manager, configuration, path)

+        ## Configuration
+
        # get the name of the file containing the model code
        file = configuration.get("file")
        if file is None:
@ -36,11 +43,28 @@ class PythonModel(base.BaseModel):
        # load the module
        module_spec.loader.exec_module(self.module)

+        ## Api
+
+        # load the inputs data into the inference function signature (used by FastAPI)
+        parameters = utils.parameters.load(configuration.get("inputs", {}))
+
+        # create an endpoint wrapping the inference inside a fastapi call
+        async def infer_api(*args, **kwargs):
+            return fastapi.responses.StreamingResponse(
+                content=self.infer(*args, **kwargs),
+                media_type=self.output_type,
+            )
+
+        infer_api.__signature__ = inspect.Signature(parameters=parameters)
+
+        # add the inference endpoint on the API
+        self.manager.application.add_api_route(f"/models/{self.name}/infer", infer_api, methods=["POST"])
+
    def _load(self) -> None:
        return self.module.load(self)

    def _unload(self) -> None:
        return self.module.unload(self)

-    def _infer(self, payload: dict) -> str | bytes:
-        return self.module.infer(self, payload)
+    def _infer(self, *args, **kwargs) -> typing.Iterator[bytes]:
+        return self.module.infer(self, *args, **kwargs)
--- a/source/model/init.py
+++ b/source/model/init.py
@ -1,4 +1,3 @@
 from . import base

-from .DummyModel import DummyModel
 from .PythonModel import PythonModel
--- a/source/model/base/BaseModel.py
+++ b/source/model/base/BaseModel.py
@ -1,7 +1,9 @@
 import abc
 import gc
+import typing
 from pathlib import Path

+from source import api
 from source.manager import ModelManager


@ -10,13 +12,13 @@ class BaseModel(abc.ABC):
    Represent a model.
    """

-    def __init__(self, manager: ModelManager, configuration: dict, path: Path):
+    def __init__(self, manager: ModelManager, configuration: dict[str, typing.Any], path: Path):
        # the environment directory of the model
        self.path = path
        # the model manager
        self.manager = manager
        # the mimetype of the model responses
-        self.response_mimetype: str = configuration.get("response_mimetype", "application/json")
+        self.output_type: str = configuration.get("output_type", "application/json")

        self._loaded = False

@ -101,13 +103,11 @@ class BaseModel(abc.ABC):
        """
        Unload the model
        Do not call manually, use `unload` instead.
-        :return:
        """

-    def infer(self, payload: dict) -> str | bytes:
+    def infer(self, *args, **kwargs) -> typing.Iterator[bytes]:
        """
        Infer our payload through the model within the model manager
-        :param payload: the payload to give to the model
        :return: the response of the model
        """

@ -115,12 +115,11 @@ class BaseModel(abc.ABC):
        self.load()

        # model specific inference part
-        return self._infer(payload)
+        return self._infer(*args, **kwargs)

    @abc.abstractmethod
-    def _infer(self, payload: dict) -> str | bytes:
+    def _infer(self, *args, **kwargs) -> typing.Iterator[bytes]:
        """
        Infer our payload through the model
-        :param payload: the payload to give to the model
        :return: the response of the model
        """
--- a/source/utils/init.py
+++ b/source/utils/init.py
@ -0,0 +1 @@
+from . import parameters
--- a/source/utils/parameters.py
+++ b/source/utils/parameters.py
@ -0,0 +1,54 @@
+import inspect
+from datetime import datetime
+
+import fastapi
+
+
+# the list of types and their name that can be used by the API
+types: dict[str, type] = {
+    "bool": bool,
+    "int": int,
+    "float": float,
+    "str": str,
+    "bytes": bytes,
+    "list": list,
+    "tuple": tuple,
+    "set": set,
+    "dict": dict,
+    "datetime": datetime,
+    "file": fastapi.UploadFile,
+}
+
+
+def load(parameters_definition: dict[str, dict]) -> list[inspect.Parameter]:
+    """
+    Load a list python function parameters from their definitions.
+    :param parameters_definition: the definitions of the parameters
+    :return: the python function parameters
+
+    Examples:
+        >>> parameters_definition = {
+        ...     "boolean": {"type": "bool", "default": False},
+        ...     "list": {"type": "list", "default": [1, 2, 3]},
+        ...     "datetime": {"type": "datetime"},
+        ...     "file": {"type": "file"},
+        ... }
+        >>> parameters = load_parameters(parameters_definition)
+    """
+
+    parameters: list[inspect.Parameter] = []
+
+    for name, definition in parameters_definition.items():
+        # deserialize the parameter
+        parameter = inspect.Parameter(
+            name,
+            inspect.Parameter.POSITIONAL_OR_KEYWORD,
+            default=definition.get("default", inspect.Parameter.empty),
+            annotation=types[definition["type"]],
+        )
+        parameters.append(parameter)
+
+    # sort the parameters so that non-default arguments always end up before default ones
+    parameters.sort(key=lambda parameter: parameter.default is inspect.Parameter.empty, reverse=True)
+
+    return parameters