Clarifai · phatvo9 · Jan 10, 2024 · Dec 28, 2023 · Dec 28, 2023 · Dec 29, 2023
diff --git a/.github/workflows/run_triton_test.yaml b/.github/workflows/run_triton_test.yaml
@@ -0,0 +1,32 @@
+name: Run triton test
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+
+jobs:
+  triton-model-test:
+    runs-on: ubuntu-latest
+
+    container:
+      image: nvcr.io/nvidia/tritonserver:23.10-py3
+
+    steps:
+      - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install -r tests/requirements.txt
+
+      - name: start triton
+        run: |
+          tritonserver --model-repository ./tests/dummy_triton_models --exit-on-error false &
+        background: true
+
+      - name: Wait for Triton to Initialize
+        run: sleep 10
+
+      - name: Start Model Tests
+        run: |
+          python3 -m pytest tests/dummy_triton_models/_test_all_dummies.py -s
diff --git a/clarifai/models/model_serving/models/default_test.py b/clarifai/models/model_serving/models/default_test.py
@@ -67,6 +67,9 @@ def intitialize(self,
                   is_instance_kind_gpu: bool = True,
                   inference_parameters: Union[str, Dict[str, Any]] = ""):
     import sys
+    #
+    if 'inference' in sys.modules:
+      del sys.modules['inference']
     sys.path.append(repo_version_dir)
     self.model_type = model_type
     self.is_instance_kind_gpu = is_instance_kind_gpu

diff --git a/tests/dummy_triton_models/_test_all_dummies.py b/tests/dummy_triton_models/_test_all_dummies.py
@@ -0,0 +1,171 @@
+import time
+
+import numpy as np
+import pytest as pytest
+from tritonclient.grpc import InferenceServerClient, InferInput, InferRequestedOutput
+from tritonclient.utils import np_to_triton_dtype
+
+MAX_BATCH_SIZE = 4
+MAX_TRIES = 5
+INTERVAL = 3
+count = 0
+while count < MAX_TRIES:
+  try:
+    _ = InferenceServerClient('localhost:8001').is_server_live()
+    break
+  except Exception as e:
+    print(e)
+    count += 1
+    time.sleep(INTERVAL)
+
+
+@pytest.fixture
+def triton_client():
+  return InferenceServerClient('localhost:8001')
+
+
+def make_input(name, inputs):
+  model_input = InferInput(name, inputs.shape, np_to_triton_dtype(inputs.dtype))
+  model_input.set_data_from_numpy(inputs)
+  return model_input
+
+
+def make_random_image_input(name="image", bs=1, size=256):
+  image = np.random.rand(bs, size, size, 3) * 255
+  image = image.astype("uint8")
+  return make_input(name, image)
+
+
+def make_text_input(name="text", text="this is text", bs=1):
+  text = np.array([text] * bs, dtype=np.object_).reshape(-1, 1)
+  return make_input(name, text)
+
+
+def inference(triton_client, model_name, input_: list, output_names: list):
+  res = triton_client.infer(
+      model_name=model_name,
+      inputs=input_,
+      outputs=[InferRequestedOutput(each) for each in output_names])
+  return {output_name: res.as_numpy(output_name) for output_name in output_names}
+
+
+def execute_test_image_as_input(triton_client, model_name, input_name, output_names):
+  single_input = make_random_image_input(name=input_name, bs=1, size=256)
+  res = inference(triton_client, model_name, [single_input], output_names=output_names)
+  outputs = [res[each] for each in output_names]
+
+  if len(outputs) > 1:
+    assert all(len(each[0]) == 1
+               for each in outputs), f"[{model_name}], All predictions must have same length"
+  elif model_name == "visual-classifier":
+    assert outputs[0].all() <= 1.
+  else:
+    assert len(outputs[0].shape)
+
+  # Test bs > 1
+  multi_input = make_random_image_input(name=input_name, bs=2, size=256)
+  res = inference(triton_client, model_name, [multi_input], output_names=output_names)
+  outputs = [res[each] for each in output_names]
+
+  if len(outputs) > 1:
+    assert all(len(each[0]) == 1
+               for each in outputs), f"[{model_name}], All predictions must have same length"
+  elif model_name == "visual-classifier":
+    assert outputs[0].all() <= 1.
+  else:
+    assert len(outputs[0].shape)
+
+  # Test bs > max_batch_size
+  with pytest.raises(Exception):
+    multi_input = make_random_image_input(name=input_name, bs=10, size=256)
+    res = inference(triton_client, model_name, [multi_input], output_names=output_names)
+
+
+def execute_test_text_as_input(triton_client, model_name, input_name, output_names):
+  single_input = make_text_input(name=input_name, bs=1)
+  res = inference(triton_client, model_name, [single_input], output_names=output_names)
+  outputs = [res[each] for each in output_names]
+
+  if model_name == "text-to-image":
+    assert len(outputs[0][0].shape) == 3
+  elif model_name == "text-classifier":
+    assert outputs[0].all() <= 1.
+  else:
+    assert len(outputs[0].shape)
+
+  # Test bs > 1
+  multi_input = make_text_input(name=input_name, bs=2)
+  res = inference(triton_client, model_name, [multi_input], output_names=output_names)
+  outputs = [res[each] for each in output_names]
+
+  if model_name == "text-to-image":
+    assert len(outputs[0][0].shape) == 3
+  elif model_name == "text-classifier":
+    assert outputs[0].all() <= 1.
+  else:
+    assert len(outputs[0].shape)
+
+  # Test bs > max_batch_size
+  with pytest.raises(Exception):
+    multi_input = make_text_input(name=input_name, bs=10)
+    res = inference(triton_client, model_name, [multi_input], output_names=output_names)
+
+
+class TestModelTypes:
+
+  # --------- Image Input --------- #
+  def test_visual_detector(self, triton_client):
+    model_name = "visual-detector"
+    input_name = "image"
+    output_names = ["predicted_bboxes", "predicted_labels", "predicted_scores"]
+    execute_test_image_as_input(triton_client, model_name, input_name, output_names)
+
+  def test_visual_classifier(self, triton_client):
+    model_name = "visual-classifier"
+    input_name = "image"
+    output_names = ["softmax_predictions"]
+    execute_test_image_as_input(triton_client, model_name, input_name, output_names)
+
+  def test_visual_embedder(self, triton_client):
+    model_name = "visual-embedder"
+    input_name = "image"
+    output_names = ["embeddings"]
+    execute_test_image_as_input(triton_client, model_name, input_name, output_names)
+
+  def test_visual_segmenter(self, triton_client):
+    model_name = "visual-segmenter"
+    input_name = "image"
+    output_names = ["predicted_mask"]
+    execute_test_image_as_input(triton_client, model_name, input_name, output_names)
+
+  # --------- Text Input --------- #
+  def test_text_to_image(self, triton_client):
+    model_name = "text-to-image"
+    input_name = "text"
+    output_names = ["image"]
+    execute_test_text_as_input(triton_client, model_name, input_name, output_names)
+
+  def test_text_classifier(self, triton_client):
+    model_name = "text-classifier"
+    input_name = "text"
+    output_names = ["softmax_predictions"]
+    execute_test_text_as_input(triton_client, model_name, input_name, output_names)
+
+  def test_text_embedder(self, triton_client):
+    model_name = "text-embedder"
+    input_name = "text"
+    output_names = ["embeddings"]
+    execute_test_text_as_input(triton_client, model_name, input_name, output_names)
+
+  def test_text_to_text(self, triton_client):
+    model_name = "text-to-text"
+    input_name = "text"
+    output_names = ["text"]
+    execute_test_text_as_input(triton_client, model_name, input_name, output_names)
+
+  # --------- Multimodal Inputs --------- #
+  def test_multimodal_embedder(self, triton_client):
+    model_name = "multimodal-embedder"
+    output_names = ["embeddings"]
+    execute_test_image_as_input(triton_client, model_name, "image", output_names)
+    execute_test_text_as_input(triton_client, model_name, "text", output_names)
diff --git a/tests/dummy_triton_models/multimodal-embedder/1/__init__.py b/tests/dummy_triton_models/multimodal-embedder/1/__init__.py
diff --git a/tests/dummy_triton_models/multimodal-embedder/1/inference.py b/tests/dummy_triton_models/multimodal-embedder/1/inference.py
@@ -0,0 +1,56 @@
+# This file contains boilerplate code to allow users write their model
+# inference code that will then interact with the Triton Inference Server
+# Python backend to serve end user requests.
+# The module name, module path, class name & get_predictions() method names MUST be maintained as is
+# but other methods may be added within the class as deemed fit provided
+# they are invoked within the main get_predictions() inference method
+# if they play a role in any step of model inference
+"""User model inference script."""
+
+import os
+from pathlib import Path
+import numpy as np
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
+
+config = get_model_config(ModelTypes.multimodal_embedder)
+
+
+class InferenceModel:
+  """User model inference class."""
+
+  def __init__(self) -> None:
+    """
+    Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc.
+    in this method so they are loaded only once for faster inference.
+    """
+    self.base_path: Path = os.path.dirname(__file__)
+    ## sample model loading code:
+    #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
+    #self.model: Callable = <load_your_model_here from checkpoint or folder>
+
+  @config.inference.wrap_func
+  def get_predictions(self, input_data, **kwargs):
+    """
+    Main model inference method.
+
+    Args:
+    -----
+      input_data: A single input data item to predict on.
+        Input data can be an image or text, etc depending on the model type.
+
+    Returns:
+    --------
+      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+    """
+    outputs = []
+    for inp_data in input_data:
+      image, text = inp_data["image"], inp_data["text"]
+      if text is not None:
+        assert isinstance(text, str), "Incorrect type of text, expected str"
+        embeddings = np.zeros(768)
+      else:
+        assert isinstance(image, np.ndarray), "Incorrect type of image, expected np.ndarray"
+        embeddings = np.ones(768)
+      outputs.append(config.inference.return_type(embedding_vector=embeddings))
+
+    return outputs
diff --git a/tests/dummy_triton_models/multimodal-embedder/1/model.py b/tests/dummy_triton_models/multimodal-embedder/1/model.py
@@ -0,0 +1,74 @@
+# Copyright 2023 Clarifai, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Triton inference server Python Backend Model."""
+
+import os
+import sys
+
+try:
+  import triton_python_backend_utils as pb_utils
+except ModuleNotFoundError:
+  pass
+from google.protobuf import text_format
+from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
+
+
+class TritonPythonModel:
+  """
+  Triton Python BE Model.
+  """
+
+  def initialize(self, args):
+    """
+    Triton server init.
+    """
+    args["model_repository"] = args["model_repository"].replace("/1/model.py", "")
+    sys.path.append(os.path.dirname(__file__))
+    from inference import InferenceModel
+
+    self.inference_obj = InferenceModel()
+
+    # Read input_name from config file
+    self.config_msg = ModelConfig()
+    with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
+      cfg = f.read()
+    text_format.Merge(cfg, self.config_msg)
+    self.input_names = [inp.name for inp in self.config_msg.input]
+
+  def execute(self, requests):
+    """
+    Serve model inference requests.
+    """
+    responses = []
+
+    for request in requests:
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
+      responses.append(inference_response)
+
+    return responses