From 8771ab093b2e9ac214e13671275ada5c5b97b3a3 Mon Sep 17 00:00:00 2001
From: phtvo <vohungphat99@gmail.com>
Date: Thu, 28 Dec 2023 20:27:09 +0700
Subject: [PATCH 1/7] add tests

---
 .../model_serving/models/default_test.py      |   3 +
 .../dummy_triton_models/_test_all_dummies.py  | 171 ++++++++++++++++++
 .../multimodal-embedder/1/__init__.py         |   0
 .../multimodal-embedder/1/inference.py        |  56 ++++++
 .../multimodal-embedder/1/model.py            |  74 ++++++++
 .../1/test_multimodal_embedder.py             |  64 +++++++
 .../multimodal-embedder/config.pbtxt          |  29 +++
 .../multimodal-embedder/requirements.txt      |   2 +
 .../text-classifier/1/__init__.py             |   0
 .../text-classifier/1/inference.py            |  55 ++++++
 .../text-classifier/1/model.py                |  74 ++++++++
 .../text-classifier/1/test_text_classifier.py |  64 +++++++
 .../text-classifier/config.pbtxt              |  21 +++
 .../text-classifier/labels.txt                |   4 +
 .../text-classifier/requirements.txt          |   2 +
 .../text-embedder/1/__init__.py               |   0
 .../text-embedder/1/inference.py              |  55 ++++++
 .../text-embedder/1/model.py                  |  74 ++++++++
 .../text-embedder/1/test_text_embedder.py     |  64 +++++++
 .../text-embedder/config.pbtxt                |  20 ++
 .../text-embedder/requirements.txt            |   2 +
 .../text-to-image/1/__init__.py               |   0
 .../text-to-image/1/inference.py              |  55 ++++++
 .../text-to-image/1/model.py                  |  74 ++++++++
 .../text-to-image/1/test_text_to_image.py     |  64 +++++++
 .../text-to-image/config.pbtxt                |  22 +++
 .../text-to-image/requirements.txt            |   2 +
 .../text-to-text/1/__init__.py                |   0
 .../text-to-text/1/inference.py               |  55 ++++++
 .../text-to-text/1/model.py                   |  74 ++++++++
 .../text-to-text/1/test_text_to_text.py       |  64 +++++++
 .../text-to-text/config.pbtxt                 |  20 ++
 .../text-to-text/requirements.txt             |   2 +
 .../visual-classifier/1/__init__.py           |   0
 .../visual-classifier/1/inference.py          |  55 ++++++
 .../visual-classifier/1/model.py              |  74 ++++++++
 .../1/test_visual_classifier.py               |  64 +++++++
 .../visual-classifier/config.pbtxt            |  23 +++
 .../visual-classifier/labels.txt              |   5 +
 .../visual-classifier/requirements.txt        |   2 +
 .../visual-detector/1/__init__.py             |   0
 .../visual-detector/1/inference.py            |  58 ++++++
 .../visual-detector/1/model.py                |  74 ++++++++
 .../visual-detector/1/test_visual_detector.py |  64 +++++++
 .../visual-detector/config.pbtxt              |  36 ++++
 .../visual-detector/labels.txt                |   2 +
 .../visual-detector/requirements.txt          |   2 +
 .../visual-embedder/1/__init__.py             |   0
 .../visual-embedder/1/inference.py            |  54 ++++++
 .../visual-embedder/1/model.py                |  74 ++++++++
 .../visual-embedder/1/test_visual_embedder.py |  64 +++++++
 .../visual-embedder/config.pbtxt              |  22 +++
 .../visual-embedder/requirements.txt          |   2 +
 .../visual-segmenter/1/__init__.py            |   0
 .../visual-segmenter/1/inference.py           |  54 ++++++
 .../visual-segmenter/1/model.py               |  74 ++++++++
 .../1/test_visual_segmenter.py                |  64 +++++++
 .../visual-segmenter/config.pbtxt             |  24 +++
 .../visual-segmenter/labels.txt               |   2 +
 .../visual-segmenter/requirements.txt         |   2 +
 60 files changed, 2161 insertions(+)
 create mode 100644 tests/dummy_triton_models/_test_all_dummies.py
 create mode 100644 tests/dummy_triton_models/multimodal-embedder/1/__init__.py
 create mode 100644 tests/dummy_triton_models/multimodal-embedder/1/inference.py
 create mode 100644 tests/dummy_triton_models/multimodal-embedder/1/model.py
 create mode 100644 tests/dummy_triton_models/multimodal-embedder/1/test_multimodal_embedder.py
 create mode 100644 tests/dummy_triton_models/multimodal-embedder/config.pbtxt
 create mode 100644 tests/dummy_triton_models/multimodal-embedder/requirements.txt
 create mode 100644 tests/dummy_triton_models/text-classifier/1/__init__.py
 create mode 100644 tests/dummy_triton_models/text-classifier/1/inference.py
 create mode 100644 tests/dummy_triton_models/text-classifier/1/model.py
 create mode 100644 tests/dummy_triton_models/text-classifier/1/test_text_classifier.py
 create mode 100644 tests/dummy_triton_models/text-classifier/config.pbtxt
 create mode 100644 tests/dummy_triton_models/text-classifier/labels.txt
 create mode 100644 tests/dummy_triton_models/text-classifier/requirements.txt
 create mode 100644 tests/dummy_triton_models/text-embedder/1/__init__.py
 create mode 100644 tests/dummy_triton_models/text-embedder/1/inference.py
 create mode 100644 tests/dummy_triton_models/text-embedder/1/model.py
 create mode 100644 tests/dummy_triton_models/text-embedder/1/test_text_embedder.py
 create mode 100644 tests/dummy_triton_models/text-embedder/config.pbtxt
 create mode 100644 tests/dummy_triton_models/text-embedder/requirements.txt
 create mode 100644 tests/dummy_triton_models/text-to-image/1/__init__.py
 create mode 100644 tests/dummy_triton_models/text-to-image/1/inference.py
 create mode 100644 tests/dummy_triton_models/text-to-image/1/model.py
 create mode 100644 tests/dummy_triton_models/text-to-image/1/test_text_to_image.py
 create mode 100644 tests/dummy_triton_models/text-to-image/config.pbtxt
 create mode 100644 tests/dummy_triton_models/text-to-image/requirements.txt
 create mode 100644 tests/dummy_triton_models/text-to-text/1/__init__.py
 create mode 100644 tests/dummy_triton_models/text-to-text/1/inference.py
 create mode 100644 tests/dummy_triton_models/text-to-text/1/model.py
 create mode 100644 tests/dummy_triton_models/text-to-text/1/test_text_to_text.py
 create mode 100644 tests/dummy_triton_models/text-to-text/config.pbtxt
 create mode 100644 tests/dummy_triton_models/text-to-text/requirements.txt
 create mode 100644 tests/dummy_triton_models/visual-classifier/1/__init__.py
 create mode 100644 tests/dummy_triton_models/visual-classifier/1/inference.py
 create mode 100644 tests/dummy_triton_models/visual-classifier/1/model.py
 create mode 100644 tests/dummy_triton_models/visual-classifier/1/test_visual_classifier.py
 create mode 100644 tests/dummy_triton_models/visual-classifier/config.pbtxt
 create mode 100644 tests/dummy_triton_models/visual-classifier/labels.txt
 create mode 100644 tests/dummy_triton_models/visual-classifier/requirements.txt
 create mode 100644 tests/dummy_triton_models/visual-detector/1/__init__.py
 create mode 100644 tests/dummy_triton_models/visual-detector/1/inference.py
 create mode 100644 tests/dummy_triton_models/visual-detector/1/model.py
 create mode 100644 tests/dummy_triton_models/visual-detector/1/test_visual_detector.py
 create mode 100644 tests/dummy_triton_models/visual-detector/config.pbtxt
 create mode 100644 tests/dummy_triton_models/visual-detector/labels.txt
 create mode 100644 tests/dummy_triton_models/visual-detector/requirements.txt
 create mode 100644 tests/dummy_triton_models/visual-embedder/1/__init__.py
 create mode 100644 tests/dummy_triton_models/visual-embedder/1/inference.py
 create mode 100644 tests/dummy_triton_models/visual-embedder/1/model.py
 create mode 100644 tests/dummy_triton_models/visual-embedder/1/test_visual_embedder.py
 create mode 100644 tests/dummy_triton_models/visual-embedder/config.pbtxt
 create mode 100644 tests/dummy_triton_models/visual-embedder/requirements.txt
 create mode 100644 tests/dummy_triton_models/visual-segmenter/1/__init__.py
 create mode 100644 tests/dummy_triton_models/visual-segmenter/1/inference.py
 create mode 100644 tests/dummy_triton_models/visual-segmenter/1/model.py
 create mode 100644 tests/dummy_triton_models/visual-segmenter/1/test_visual_segmenter.py
 create mode 100644 tests/dummy_triton_models/visual-segmenter/config.pbtxt
 create mode 100644 tests/dummy_triton_models/visual-segmenter/labels.txt
 create mode 100644 tests/dummy_triton_models/visual-segmenter/requirements.txt

diff --git a/clarifai/models/model_serving/models/default_test.py b/clarifai/models/model_serving/models/default_test.py
index 84cfb382..62af20f8 100644
--- a/clarifai/models/model_serving/models/default_test.py
+++ b/clarifai/models/model_serving/models/default_test.py
@@ -67,6 +67,9 @@ def intitialize(self,
                   is_instance_kind_gpu: bool = True,
                   inference_parameters: Union[str, Dict[str, Any]] = ""):
     import sys
+    #
+    if 'inference' in sys.modules:
+      del sys.modules['inference']
     sys.path.append(repo_version_dir)
     self.model_type = model_type
     self.is_instance_kind_gpu = is_instance_kind_gpu
diff --git a/tests/dummy_triton_models/_test_all_dummies.py b/tests/dummy_triton_models/_test_all_dummies.py
new file mode 100644
index 00000000..2ac06bf5
--- /dev/null
+++ b/tests/dummy_triton_models/_test_all_dummies.py
@@ -0,0 +1,171 @@
+import time
+
+import numpy as np
+import pytest as pytest
+from tritonclient.grpc import InferenceServerClient, InferInput, InferRequestedOutput
+from tritonclient.utils import np_to_triton_dtype
+
+MAX_BATCH_SIZE = 4
+MAX_TRIES = 5
+INTERVAL = 3
+count = 0
+while count < MAX_TRIES:
+  try:
+    _ = InferenceServerClient('localhost:8001').is_server_live()
+    break
+  except Exception as e:
+    print(e)
+    count += 1
+    time.sleep(INTERVAL)
+
+
+@pytest.fixture
+def triton_client():
+  return InferenceServerClient('localhost:8001')
+
+
+def make_input(name, inputs):
+  model_input = InferInput(name, inputs.shape, np_to_triton_dtype(inputs.dtype))
+  model_input.set_data_from_numpy(inputs)
+  return model_input
+
+
+def make_random_image_input(name="image", bs=1, size=256):
+  image = np.random.rand(bs, size, size, 3) * 255
+  image = image.astype("uint8")
+  return make_input(name, image)
+
+
+def make_text_input(name="text", text="this is text", bs=1):
+  text = np.array([text] * bs, dtype=np.object_).reshape(-1, 1)
+  return make_input(name, text)
+
+
+def inference(triton_client, model_name, input_: list, output_names: list):
+  res = triton_client.infer(
+      model_name=model_name,
+      inputs=input_,
+      outputs=[InferRequestedOutput(each) for each in output_names])
+  return {output_name: res.as_numpy(output_name) for output_name in output_names}
+
+
+def execute_test_image_as_input(triton_client, model_name, input_name, output_names):
+  single_input = make_random_image_input(name=input_name, bs=1, size=256)
+  res = inference(triton_client, model_name, [single_input], output_names=output_names)
+  outputs = [res[each] for each in output_names]
+
+  if len(outputs) > 1:
+    assert all(len(each[0]) == 1
+               for each in outputs), f"[{model_name}], All predictions must have same length"
+  elif model_name == "visual-classifier":
+    assert outputs[0].all() <= 1.
+  else:
+    assert len(outputs[0].shape)
+
+  # Test bs > 1
+  multi_input = make_random_image_input(name=input_name, bs=2, size=256)
+  res = inference(triton_client, model_name, [multi_input], output_names=output_names)
+  outputs = [res[each] for each in output_names]
+
+  if len(outputs) > 1:
+    assert all(len(each[0]) == 1
+               for each in outputs), f"[{model_name}], All predictions must have same length"
+  elif model_name == "visual-classifier":
+    assert outputs[0].all() <= 1.
+  else:
+    assert len(outputs[0].shape)
+
+  # Test bs > max_batch_size
+  with pytest.raises(Exception):
+    multi_input = make_random_image_input(name=input_name, bs=10, size=256)
+    res = inference(triton_client, model_name, [multi_input], output_names=output_names)
+
+
+def execute_test_text_as_input(triton_client, model_name, input_name, output_names):
+  single_input = make_text_input(name=input_name, bs=1)
+  res = inference(triton_client, model_name, [single_input], output_names=output_names)
+  outputs = [res[each] for each in output_names]
+
+  if model_name == "text-to-image":
+    assert len(outputs[0][0].shape) == 3
+  elif model_name == "text-classifier":
+    assert outputs[0].all() <= 1.
+  else:
+    assert len(outputs[0].shape)
+
+  # Test bs > 1
+  multi_input = make_text_input(name=input_name, bs=2)
+  res = inference(triton_client, model_name, [multi_input], output_names=output_names)
+  outputs = [res[each] for each in output_names]
+
+  if model_name == "text-to-image":
+    assert len(outputs[0][0].shape) == 3
+  elif model_name == "text-classifier":
+    assert outputs[0].all() <= 1.
+  else:
+    assert len(outputs[0].shape)
+
+  # Test bs > max_batch_size
+  with pytest.raises(Exception):
+    multi_input = make_text_input(name=input_name, bs=10)
+    res = inference(triton_client, model_name, [multi_input], output_names=output_names)
+
+
+class TestModelTypes:
+
+  # --------- Image Input --------- #
+  def test_visual_detector(self, triton_client):
+    model_name = "visual-detector"
+    input_name = "image"
+    output_names = ["predicted_bboxes", "predicted_labels", "predicted_scores"]
+    execute_test_image_as_input(triton_client, model_name, input_name, output_names)
+
+  def test_visual_classifier(self, triton_client):
+    model_name = "visual-classifier"
+    input_name = "image"
+    output_names = ["softmax_predictions"]
+    execute_test_image_as_input(triton_client, model_name, input_name, output_names)
+
+  def test_visual_embedder(self, triton_client):
+    model_name = "visual-embedder"
+    input_name = "image"
+    output_names = ["embeddings"]
+    execute_test_image_as_input(triton_client, model_name, input_name, output_names)
+
+  def test_visual_segmenter(self, triton_client):
+    model_name = "visual-segmenter"
+    input_name = "image"
+    output_names = ["predicted_mask"]
+    execute_test_image_as_input(triton_client, model_name, input_name, output_names)
+
+  # --------- Text Input --------- #
+  def test_text_to_image(self, triton_client):
+    model_name = "text-to-image"
+    input_name = "text"
+    output_names = ["image"]
+    execute_test_text_as_input(triton_client, model_name, input_name, output_names)
+
+  def test_text_classifier(self, triton_client):
+    model_name = "text-classifier"
+    input_name = "text"
+    output_names = ["softmax_predictions"]
+    execute_test_text_as_input(triton_client, model_name, input_name, output_names)
+
+  def test_text_embedder(self, triton_client):
+    model_name = "text-embedder"
+    input_name = "text"
+    output_names = ["embeddings"]
+    execute_test_text_as_input(triton_client, model_name, input_name, output_names)
+
+  def test_text_to_text(self, triton_client):
+    model_name = "text-to-text"
+    input_name = "text"
+    output_names = ["text"]
+    execute_test_text_as_input(triton_client, model_name, input_name, output_names)
+
+  # --------- Multimodal Inputs --------- #
+  def test_multimodal_embedder(self, triton_client):
+    model_name = "multimodal-embedder"
+    output_names = ["embeddings"]
+    execute_test_image_as_input(triton_client, model_name, "image", output_names)
+    execute_test_text_as_input(triton_client, model_name, "text", output_names)
diff --git a/tests/dummy_triton_models/multimodal-embedder/1/__init__.py b/tests/dummy_triton_models/multimodal-embedder/1/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/dummy_triton_models/multimodal-embedder/1/inference.py b/tests/dummy_triton_models/multimodal-embedder/1/inference.py
new file mode 100644
index 00000000..e210d62a
--- /dev/null
+++ b/tests/dummy_triton_models/multimodal-embedder/1/inference.py
@@ -0,0 +1,56 @@
+# This file contains boilerplate code to allow users write their model
+# inference code that will then interact with the Triton Inference Server
+# Python backend to serve end user requests.
+# The module name, module path, class name & get_predictions() method names MUST be maintained as is
+# but other methods may be added within the class as deemed fit provided
+# they are invoked within the main get_predictions() inference method
+# if they play a role in any step of model inference
+"""User model inference script."""
+
+import os
+from pathlib import Path
+import numpy as np
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
+
+config = get_model_config(ModelTypes.multimodal_embedder)
+
+
+class InferenceModel:
+  """User model inference class."""
+
+  def __init__(self) -> None:
+    """
+    Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc.
+    in this method so they are loaded only once for faster inference.
+    """
+    self.base_path: Path = os.path.dirname(__file__)
+    ## sample model loading code:
+    #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
+    #self.model: Callable = <load_your_model_here from checkpoint or folder>
+
+  @config.inference.wrap_func
+  def get_predictions(self, input_data, **kwargs):
+    """
+    Main model inference method.
+
+    Args:
+    -----
+      input_data: A single input data item to predict on.
+        Input data can be an image or text, etc depending on the model type.
+
+    Returns:
+    --------
+      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+    """
+    outputs = []
+    for inp_data in input_data:
+      image, text = inp_data["image"], inp_data["text"]
+      if text is not None:
+        assert isinstance(text, str), "Incorrect type of text, expected str"
+        embeddings = np.zeros(768)
+      else:
+        assert isinstance(image, np.ndarray), "Incorrect type of image, expected np.ndarray"
+        embeddings = np.ones(768)
+      outputs.append(config.inference.return_type(embedding_vector=embeddings))
+
+    return outputs
diff --git a/tests/dummy_triton_models/multimodal-embedder/1/model.py b/tests/dummy_triton_models/multimodal-embedder/1/model.py
new file mode 100644
index 00000000..36b54b37
--- /dev/null
+++ b/tests/dummy_triton_models/multimodal-embedder/1/model.py
@@ -0,0 +1,74 @@
+# Copyright 2023 Clarifai, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Triton inference server Python Backend Model."""
+
+import os
+import sys
+
+try:
+  import triton_python_backend_utils as pb_utils
+except ModuleNotFoundError:
+  pass
+from google.protobuf import text_format
+from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
+
+
+class TritonPythonModel:
+  """
+  Triton Python BE Model.
+  """
+
+  def initialize(self, args):
+    """
+    Triton server init.
+    """
+    args["model_repository"] = args["model_repository"].replace("/1/model.py", "")
+    sys.path.append(os.path.dirname(__file__))
+    from inference import InferenceModel
+
+    self.inference_obj = InferenceModel()
+
+    # Read input_name from config file
+    self.config_msg = ModelConfig()
+    with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
+      cfg = f.read()
+    text_format.Merge(cfg, self.config_msg)
+    self.input_names = [inp.name for inp in self.config_msg.input]
+
+  def execute(self, requests):
+    """
+    Serve model inference requests.
+    """
+    responses = []
+
+    for request in requests:
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
+      responses.append(inference_response)
+
+    return responses
diff --git a/tests/dummy_triton_models/multimodal-embedder/1/test_multimodal_embedder.py b/tests/dummy_triton_models/multimodal-embedder/1/test_multimodal_embedder.py
new file mode 100644
index 00000000..1874c676
--- /dev/null
+++ b/tests/dummy_triton_models/multimodal-embedder/1/test_multimodal_embedder.py
@@ -0,0 +1,64 @@
+import logging
+import os
+import unittest
+
+from clarifai.models.model_serving.models.default_test import DefaultTestInferenceModel
+
+
+class CustomTestInferenceModel(DefaultTestInferenceModel):
+  """
+  Run this file to test your implementation of InferenceModel in inference.py with default tests of Triton configuration and its output values based on basic predefined inputs
+  If you want to write custom testcase or just test output value.
+  Please follow these instrucitons:
+  1. Name your test function with prefix "test" so that pytest can execute
+  2. In order to obtain output of InferenceModel, call `self.triton_get_predictions(input_data)`.
+  3. If your input is `image` and you have set custom size of it when building model repository,
+  call `self.preprocess(image)` to obtain correct resized input
+  4. Run this test by calling
+  ```bash
+  pytest ./your_triton_folder/1/test.py
+  #to see std output
+  pytest --log-cli-level=INFO  -s ./your_triton_folder/1/test.py
+  ```
+
+  ### Examples:
+  + test text-to-image output
+  ```
+  def test_text_to_image_output(self):
+    text = "Test text"
+    output = self.triton_get_predictions(text)
+    image = output.image # uint8 np.ndarray image
+    #show or save
+  ```
+  + test visual-classifier output
+  ```
+  def test_visual_classifier(self):
+    image = cv2.imread("your/local/image.jpg") # Keep in mind of format of image (BGR or RGB)
+    output = self.triton_get_predictions(image)
+    scores = output.predicted_scores # np.ndarray
+    #process scores to get class id and its score
+    logger.info(result)
+  """
+
+  # Insert your inference parameters json path here
+  # or insert a dictionary of your_parameter_name and value, e.g dict(x=1.5, y="text", c=True)
+  # or Leave it as "" if you don't have it.
+  inference_parameters = ""
+
+  ########### Initialization. Do not change it ###########
+  __test__ = True
+
+  def setUp(self) -> None:
+    logging.info("Initializing...")
+    model_type = "multimodal-embedder"  # your model type
+    self.intitialize(
+        model_type,
+        repo_version_dir=os.path.dirname(__file__),
+        is_instance_kind_gpu=True,
+        inference_parameters=self.inference_parameters)
+
+  ########################################################
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tests/dummy_triton_models/multimodal-embedder/config.pbtxt b/tests/dummy_triton_models/multimodal-embedder/config.pbtxt
new file mode 100644
index 00000000..7a49e967
--- /dev/null
+++ b/tests/dummy_triton_models/multimodal-embedder/config.pbtxt
@@ -0,0 +1,29 @@
+name: "multimodal-embedder"
+max_batch_size: 4
+input {
+  name: "image"
+  data_type: TYPE_UINT8
+  dims: -1
+  dims: -1
+  dims: 3
+  optional: true
+}
+input {
+  name: "text"
+  data_type: TYPE_STRING
+  dims: 1
+  optional: true
+}
+output {
+  name: "embeddings"
+  data_type: TYPE_FP32
+  dims: -1
+}
+instance_group {
+  count: 1
+  kind: KIND_GPU
+}
+dynamic_batching {
+  max_queue_delay_microseconds: 500
+}
+backend: "python"
diff --git a/tests/dummy_triton_models/multimodal-embedder/requirements.txt b/tests/dummy_triton_models/multimodal-embedder/requirements.txt
new file mode 100644
index 00000000..6f35289e
--- /dev/null
+++ b/tests/dummy_triton_models/multimodal-embedder/requirements.txt
@@ -0,0 +1,2 @@
+clarifai>=9.11.0
+tritonclient[all]
diff --git a/tests/dummy_triton_models/text-classifier/1/__init__.py b/tests/dummy_triton_models/text-classifier/1/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/dummy_triton_models/text-classifier/1/inference.py b/tests/dummy_triton_models/text-classifier/1/inference.py
new file mode 100644
index 00000000..db774567
--- /dev/null
+++ b/tests/dummy_triton_models/text-classifier/1/inference.py
@@ -0,0 +1,55 @@
+# This file contains boilerplate code to allow users write their model
+# inference code that will then interact with the Triton Inference Server
+# Python backend to serve end user requests.
+# The module name, module path, class name & get_predictions() method names MUST be maintained as is
+# but other methods may be added within the class as deemed fit provided
+# they are invoked within the main get_predictions() inference method
+# if they play a role in any step of model inference
+"""User model inference script."""
+
+import os
+from pathlib import Path
+import numpy as np
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
+
+config = get_model_config(ModelTypes.text_classifier)
+
+
+class InferenceModel:
+  """User model inference class."""
+
+  def __init__(self) -> None:
+    """
+    Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc.
+    in this method so they are loaded only once for faster inference.
+    """
+    self.base_path: Path = os.path.dirname(__file__)
+    ## sample model loading code:
+    #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
+    #self.model: Callable = <load_your_model_here from checkpoint or folder>
+
+  #Add relevant model type decorator to the method below (see docs/model_types for ref.)
+  @config.inference.wrap_func
+  def get_predictions(self, input_data, **kwargs):
+    """
+    Main model inference method.
+
+    Args:
+    -----
+      input_data: A single input data item to predict on.
+        Input data can be an image or text, etc depending on the model type.
+
+    Returns:
+    --------
+      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+    """
+    # Delete/Comment out line below and add your inference code
+    outputs = []
+
+    for inp in input_data:
+      assert isinstance(inp, str), "Incorrect type of text, expected str"
+      output = np.random.rand(4)
+      output = config.inference.return_type(output)
+      outputs.append(output)
+
+    return outputs
diff --git a/tests/dummy_triton_models/text-classifier/1/model.py b/tests/dummy_triton_models/text-classifier/1/model.py
new file mode 100644
index 00000000..36b54b37
--- /dev/null
+++ b/tests/dummy_triton_models/text-classifier/1/model.py
@@ -0,0 +1,74 @@
+# Copyright 2023 Clarifai, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Triton inference server Python Backend Model."""
+
+import os
+import sys
+
+try:
+  import triton_python_backend_utils as pb_utils
+except ModuleNotFoundError:
+  pass
+from google.protobuf import text_format
+from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
+
+
+class TritonPythonModel:
+  """
+  Triton Python BE Model.
+  """
+
+  def initialize(self, args):
+    """
+    Triton server init.
+    """
+    args["model_repository"] = args["model_repository"].replace("/1/model.py", "")
+    sys.path.append(os.path.dirname(__file__))
+    from inference import InferenceModel
+
+    self.inference_obj = InferenceModel()
+
+    # Read input_name from config file
+    self.config_msg = ModelConfig()
+    with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
+      cfg = f.read()
+    text_format.Merge(cfg, self.config_msg)
+    self.input_names = [inp.name for inp in self.config_msg.input]
+
+  def execute(self, requests):
+    """
+    Serve model inference requests.
+    """
+    responses = []
+
+    for request in requests:
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
+      responses.append(inference_response)
+
+    return responses
diff --git a/tests/dummy_triton_models/text-classifier/1/test_text_classifier.py b/tests/dummy_triton_models/text-classifier/1/test_text_classifier.py
new file mode 100644
index 00000000..012a7732
--- /dev/null
+++ b/tests/dummy_triton_models/text-classifier/1/test_text_classifier.py
@@ -0,0 +1,64 @@
+import logging
+import os
+import unittest
+
+from clarifai.models.model_serving.models.default_test import DefaultTestInferenceModel
+
+
+class CustomTestInferenceModel(DefaultTestInferenceModel):
+  """
+  Run this file to test your implementation of InferenceModel in inference.py with default tests of Triton configuration and its output values based on basic predefined inputs
+  If you want to write custom testcase or just test output value.
+  Please follow these instrucitons:
+  1. Name your test function with prefix "test" so that pytest can execute
+  2. In order to obtain output of InferenceModel, call `self.triton_get_predictions(input_data)`.
+  3. If your input is `image` and you have set custom size of it when building model repository,
+  call `self.preprocess(image)` to obtain correct resized input
+  4. Run this test by calling
+  ```bash
+  pytest ./your_triton_folder/1/test.py
+  #to see std output
+  pytest --log-cli-level=INFO  -s ./your_triton_folder/1/test.py
+  ```
+
+  ### Examples:
+  + test text-to-image output
+  ```
+  def test_text_to_image_output(self):
+    text = "Test text"
+    output = self.triton_get_predictions(text)
+    image = output.image # uint8 np.ndarray image
+    #show or save
+  ```
+  + test visual-classifier output
+  ```
+  def test_visual_classifier(self):
+    image = cv2.imread("your/local/image.jpg") # Keep in mind of format of image (BGR or RGB)
+    output = self.triton_get_predictions(image)
+    scores = output.predicted_scores # np.ndarray
+    #process scores to get class id and its score
+    logger.info(result)
+  """
+
+  # Insert your inference parameters json path here
+  # or insert a dictionary of your_parameter_name and value, e.g dict(x=1.5, y="text", c=True)
+  # or Leave it as "" if you don't have it.
+  inference_parameters = ""
+
+  ########### Initialization. Do not change it ###########
+  __test__ = True
+
+  def setUp(self) -> None:
+    logging.info("Initializing...")
+    model_type = "text-classifier"  # your model type
+    self.intitialize(
+        model_type,
+        repo_version_dir=os.path.dirname(__file__),
+        is_instance_kind_gpu=True,
+        inference_parameters=self.inference_parameters)
+
+  ########################################################
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tests/dummy_triton_models/text-classifier/config.pbtxt b/tests/dummy_triton_models/text-classifier/config.pbtxt
new file mode 100644
index 00000000..542d0929
--- /dev/null
+++ b/tests/dummy_triton_models/text-classifier/config.pbtxt
@@ -0,0 +1,21 @@
+name: "text-classifier"
+max_batch_size: 4
+input {
+  name: "text"
+  data_type: TYPE_STRING
+  dims: 1
+}
+output {
+  name: "softmax_predictions"
+  data_type: TYPE_FP32
+  dims: -1
+  label_filename: "labels.txt"
+}
+instance_group {
+  count: 1
+  kind: KIND_GPU
+}
+dynamic_batching {
+  max_queue_delay_microseconds: 500
+}
+backend: "python"
diff --git a/tests/dummy_triton_models/text-classifier/labels.txt b/tests/dummy_triton_models/text-classifier/labels.txt
new file mode 100644
index 00000000..94ebaf90
--- /dev/null
+++ b/tests/dummy_triton_models/text-classifier/labels.txt
@@ -0,0 +1,4 @@
+1
+2
+3
+4
diff --git a/tests/dummy_triton_models/text-classifier/requirements.txt b/tests/dummy_triton_models/text-classifier/requirements.txt
new file mode 100644
index 00000000..6f35289e
--- /dev/null
+++ b/tests/dummy_triton_models/text-classifier/requirements.txt
@@ -0,0 +1,2 @@
+clarifai>=9.11.0
+tritonclient[all]
diff --git a/tests/dummy_triton_models/text-embedder/1/__init__.py b/tests/dummy_triton_models/text-embedder/1/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/dummy_triton_models/text-embedder/1/inference.py b/tests/dummy_triton_models/text-embedder/1/inference.py
new file mode 100644
index 00000000..009cf3bc
--- /dev/null
+++ b/tests/dummy_triton_models/text-embedder/1/inference.py
@@ -0,0 +1,55 @@
+# This file contains boilerplate code to allow users write their model
+# inference code that will then interact with the Triton Inference Server
+# Python backend to serve end user requests.
+# The module name, module path, class name & get_predictions() method names MUST be maintained as is
+# but other methods may be added within the class as deemed fit provided
+# they are invoked within the main get_predictions() inference method
+# if they play a role in any step of model inference
+"""User model inference script."""
+
+import os
+from pathlib import Path
+import numpy as np
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
+
+config = get_model_config(ModelTypes.text_embedder)
+
+
+class InferenceModel:
+  """User model inference class."""
+
+  def __init__(self) -> None:
+    """
+    Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc.
+    in this method so they are loaded only once for faster inference.
+    """
+    self.base_path: Path = os.path.dirname(__file__)
+    ## sample model loading code:
+    #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
+    #self.model: Callable = <load_your_model_here from checkpoint or folder>
+
+  #Add relevant model type decorator to the method below (see docs/model_types for ref.)
+  @config.inference.wrap_func
+  def get_predictions(self, input_data, **kwargs):
+    """
+    Main model inference method.
+
+    Args:
+    -----
+      input_data: A single input data item to predict on.
+        Input data can be an image or text, etc depending on the model type.
+
+    Returns:
+    --------
+      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+    """
+    # Delete/Comment out line below and add your inference code
+    outputs = []
+
+    for inp in input_data:
+      assert isinstance(inp, str), "Incorrect type of text, expected str"
+      output = np.random.randn(768)
+      output = config.inference.return_type(output)
+      outputs.append(output)
+
+    return outputs
diff --git a/tests/dummy_triton_models/text-embedder/1/model.py b/tests/dummy_triton_models/text-embedder/1/model.py
new file mode 100644
index 00000000..36b54b37
--- /dev/null
+++ b/tests/dummy_triton_models/text-embedder/1/model.py
@@ -0,0 +1,74 @@
+# Copyright 2023 Clarifai, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Triton inference server Python Backend Model."""
+
+import os
+import sys
+
+try:
+  import triton_python_backend_utils as pb_utils
+except ModuleNotFoundError:
+  pass
+from google.protobuf import text_format
+from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
+
+
+class TritonPythonModel:
+  """
+  Triton Python BE Model.
+  """
+
+  def initialize(self, args):
+    """
+    Triton server init.
+    """
+    args["model_repository"] = args["model_repository"].replace("/1/model.py", "")
+    sys.path.append(os.path.dirname(__file__))
+    from inference import InferenceModel
+
+    self.inference_obj = InferenceModel()
+
+    # Read input_name from config file
+    self.config_msg = ModelConfig()
+    with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
+      cfg = f.read()
+    text_format.Merge(cfg, self.config_msg)
+    self.input_names = [inp.name for inp in self.config_msg.input]
+
+  def execute(self, requests):
+    """
+    Serve model inference requests.
+    """
+    responses = []
+
+    for request in requests:
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
+      responses.append(inference_response)
+
+    return responses
diff --git a/tests/dummy_triton_models/text-embedder/1/test_text_embedder.py b/tests/dummy_triton_models/text-embedder/1/test_text_embedder.py
new file mode 100644
index 00000000..5c8bcd90
--- /dev/null
+++ b/tests/dummy_triton_models/text-embedder/1/test_text_embedder.py
@@ -0,0 +1,64 @@
+import logging
+import os
+import unittest
+
+from clarifai.models.model_serving.models.default_test import DefaultTestInferenceModel
+
+
+class CustomTestInferenceModel(DefaultTestInferenceModel):
+  """
+  Run this file to test your implementation of InferenceModel in inference.py with default tests of Triton configuration and its output values based on basic predefined inputs
+  If you want to write custom testcase or just test output value.
+  Please follow these instrucitons:
+  1. Name your test function with prefix "test" so that pytest can execute
+  2. In order to obtain output of InferenceModel, call `self.triton_get_predictions(input_data)`.
+  3. If your input is `image` and you have set custom size of it when building model repository,
+  call `self.preprocess(image)` to obtain correct resized input
+  4. Run this test by calling
+  ```bash
+  pytest ./your_triton_folder/1/test.py
+  #to see std output
+  pytest --log-cli-level=INFO  -s ./your_triton_folder/1/test.py
+  ```
+
+  ### Examples:
+  + test text-to-image output
+  ```
+  def test_text_to_image_output(self):
+    text = "Test text"
+    output = self.triton_get_predictions(text)
+    image = output.image # uint8 np.ndarray image
+    #show or save
+  ```
+  + test visual-classifier output
+  ```
+  def test_visual_classifier(self):
+    image = cv2.imread("your/local/image.jpg") # Keep in mind of format of image (BGR or RGB)
+    output = self.triton_get_predictions(image)
+    scores = output.predicted_scores # np.ndarray
+    #process scores to get class id and its score
+    logger.info(result)
+  """
+
+  # Insert your inference parameters json path here
+  # or insert a dictionary of your_parameter_name and value, e.g dict(x=1.5, y="text", c=True)
+  # or Leave it as "" if you don't have it.
+  inference_parameters = ""
+
+  ########### Initialization. Do not change it ###########
+  __test__ = True
+
+  def setUp(self) -> None:
+    logging.info("Initializing...")
+    model_type = "text-embedder"  # your model type
+    self.intitialize(
+        model_type,
+        repo_version_dir=os.path.dirname(__file__),
+        is_instance_kind_gpu=True,
+        inference_parameters=self.inference_parameters)
+
+  ########################################################
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tests/dummy_triton_models/text-embedder/config.pbtxt b/tests/dummy_triton_models/text-embedder/config.pbtxt
new file mode 100644
index 00000000..98bf2867
--- /dev/null
+++ b/tests/dummy_triton_models/text-embedder/config.pbtxt
@@ -0,0 +1,20 @@
+name: "text-embedder"
+max_batch_size: 4
+input {
+  name: "text"
+  data_type: TYPE_STRING
+  dims: 1
+}
+output {
+  name: "embeddings"
+  data_type: TYPE_FP32
+  dims: -1
+}
+instance_group {
+  count: 1
+  kind: KIND_GPU
+}
+dynamic_batching {
+  max_queue_delay_microseconds: 500
+}
+backend: "python"
diff --git a/tests/dummy_triton_models/text-embedder/requirements.txt b/tests/dummy_triton_models/text-embedder/requirements.txt
new file mode 100644
index 00000000..6f35289e
--- /dev/null
+++ b/tests/dummy_triton_models/text-embedder/requirements.txt
@@ -0,0 +1,2 @@
+clarifai>=9.11.0
+tritonclient[all]
diff --git a/tests/dummy_triton_models/text-to-image/1/__init__.py b/tests/dummy_triton_models/text-to-image/1/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/dummy_triton_models/text-to-image/1/inference.py b/tests/dummy_triton_models/text-to-image/1/inference.py
new file mode 100644
index 00000000..4fe0f2bc
--- /dev/null
+++ b/tests/dummy_triton_models/text-to-image/1/inference.py
@@ -0,0 +1,55 @@
+# This file contains boilerplate code to allow users write their model
+# inference code that will then interact with the Triton Inference Server
+# Python backend to serve end user requests.
+# The module name, module path, class name & get_predictions() method names MUST be maintained as is
+# but other methods may be added within the class as deemed fit provided
+# they are invoked within the main get_predictions() inference method
+# if they play a role in any step of model inference
+"""User model inference script."""
+
+import os
+from pathlib import Path
+import numpy as np
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
+
+config = get_model_config(ModelTypes.text_to_image)
+
+
+class InferenceModel:
+  """User model inference class."""
+
+  def __init__(self) -> None:
+    """
+    Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc.
+    in this method so they are loaded only once for faster inference.
+    """
+    self.base_path: Path = os.path.dirname(__file__)
+    ## sample model loading code:
+    #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
+    #self.model: Callable = <load_your_model_here from checkpoint or folder>
+
+  #Add relevant model type decorator to the method below (see docs/model_types for ref.)
+  @config.inference.wrap_func
+  def get_predictions(self, input_data, **kwargs):
+    """
+    Main model inference method.
+
+    Args:
+    -----
+      input_data: A single input data item to predict on.
+        Input data can be an image or text, etc depending on the model type.
+
+    Returns:
+    --------
+      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+    """
+    # Delete/Comment out line below and add your inference code
+    outputs = []
+
+    for inp in input_data:
+      assert isinstance(inp, str), "Incorrect type of text, expected str"
+      output = np.random.rand(512, 512, 3) * 255
+      output = config.inference.return_type(output.astype("uint8"))
+      outputs.append(output)
+
+    return outputs
diff --git a/tests/dummy_triton_models/text-to-image/1/model.py b/tests/dummy_triton_models/text-to-image/1/model.py
new file mode 100644
index 00000000..36b54b37
--- /dev/null
+++ b/tests/dummy_triton_models/text-to-image/1/model.py
@@ -0,0 +1,74 @@
+# Copyright 2023 Clarifai, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Triton inference server Python Backend Model."""
+
+import os
+import sys
+
+try:
+  import triton_python_backend_utils as pb_utils
+except ModuleNotFoundError:
+  pass
+from google.protobuf import text_format
+from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
+
+
+class TritonPythonModel:
+  """
+  Triton Python BE Model.
+  """
+
+  def initialize(self, args):
+    """
+    Triton server init.
+    """
+    args["model_repository"] = args["model_repository"].replace("/1/model.py", "")
+    sys.path.append(os.path.dirname(__file__))
+    from inference import InferenceModel
+
+    self.inference_obj = InferenceModel()
+
+    # Read input_name from config file
+    self.config_msg = ModelConfig()
+    with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
+      cfg = f.read()
+    text_format.Merge(cfg, self.config_msg)
+    self.input_names = [inp.name for inp in self.config_msg.input]
+
+  def execute(self, requests):
+    """
+    Serve model inference requests.
+    """
+    responses = []
+
+    for request in requests:
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
+      responses.append(inference_response)
+
+    return responses
diff --git a/tests/dummy_triton_models/text-to-image/1/test_text_to_image.py b/tests/dummy_triton_models/text-to-image/1/test_text_to_image.py
new file mode 100644
index 00000000..a277ed7e
--- /dev/null
+++ b/tests/dummy_triton_models/text-to-image/1/test_text_to_image.py
@@ -0,0 +1,64 @@
+import logging
+import os
+import unittest
+
+from clarifai.models.model_serving.models.default_test import DefaultTestInferenceModel
+
+
+class CustomTestInferenceModel(DefaultTestInferenceModel):
+  """
+  Run this file to test your implementation of InferenceModel in inference.py with default tests of Triton configuration and its output values based on basic predefined inputs
+  If you want to write custom testcase or just test output value.
+  Please follow these instrucitons:
+  1. Name your test function with prefix "test" so that pytest can execute
+  2. In order to obtain output of InferenceModel, call `self.triton_get_predictions(input_data)`.
+  3. If your input is `image` and you have set custom size of it when building model repository,
+  call `self.preprocess(image)` to obtain correct resized input
+  4. Run this test by calling
+  ```bash
+  pytest ./your_triton_folder/1/test.py
+  #to see std output
+  pytest --log-cli-level=INFO  -s ./your_triton_folder/1/test.py
+  ```
+
+  ### Examples:
+  + test text-to-image output
+  ```
+  def test_text_to_image_output(self):
+    text = "Test text"
+    output = self.triton_get_predictions(text)
+    image = output.image # uint8 np.ndarray image
+    #show or save
+  ```
+  + test visual-classifier output
+  ```
+  def test_visual_classifier(self):
+    image = cv2.imread("your/local/image.jpg") # Keep in mind of format of image (BGR or RGB)
+    output = self.triton_get_predictions(image)
+    scores = output.predicted_scores # np.ndarray
+    #process scores to get class id and its score
+    logger.info(result)
+  """
+
+  # Insert your inference parameters json path here
+  # or insert a dictionary of your_parameter_name and value, e.g dict(x=1.5, y="text", c=True)
+  # or Leave it as "" if you don't have it.
+  inference_parameters = ""
+
+  ########### Initialization. Do not change it ###########
+  __test__ = True
+
+  def setUp(self) -> None:
+    logging.info("Initializing...")
+    model_type = "text-to-image"  # your model type
+    self.intitialize(
+        model_type,
+        repo_version_dir=os.path.dirname(__file__),
+        is_instance_kind_gpu=True,
+        inference_parameters=self.inference_parameters)
+
+  ########################################################
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tests/dummy_triton_models/text-to-image/config.pbtxt b/tests/dummy_triton_models/text-to-image/config.pbtxt
new file mode 100644
index 00000000..09cf8fbf
--- /dev/null
+++ b/tests/dummy_triton_models/text-to-image/config.pbtxt
@@ -0,0 +1,22 @@
+name: "text-to-image"
+max_batch_size: 4
+input {
+  name: "text"
+  data_type: TYPE_STRING
+  dims: 1
+}
+output {
+  name: "image"
+  data_type: TYPE_UINT8
+  dims: -1
+  dims: -1
+  dims: 3
+}
+instance_group {
+  count: 1
+  kind: KIND_GPU
+}
+dynamic_batching {
+  max_queue_delay_microseconds: 500
+}
+backend: "python"
diff --git a/tests/dummy_triton_models/text-to-image/requirements.txt b/tests/dummy_triton_models/text-to-image/requirements.txt
new file mode 100644
index 00000000..6f35289e
--- /dev/null
+++ b/tests/dummy_triton_models/text-to-image/requirements.txt
@@ -0,0 +1,2 @@
+clarifai>=9.11.0
+tritonclient[all]
diff --git a/tests/dummy_triton_models/text-to-text/1/__init__.py b/tests/dummy_triton_models/text-to-text/1/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/dummy_triton_models/text-to-text/1/inference.py b/tests/dummy_triton_models/text-to-text/1/inference.py
new file mode 100644
index 00000000..26bc19f0
--- /dev/null
+++ b/tests/dummy_triton_models/text-to-text/1/inference.py
@@ -0,0 +1,55 @@
+# This file contains boilerplate code to allow users write their model
+# inference code that will then interact with the Triton Inference Server
+# Python backend to serve end user requests.
+# The module name, module path, class name & get_predictions() method names MUST be maintained as is
+# but other methods may be added within the class as deemed fit provided
+# they are invoked within the main get_predictions() inference method
+# if they play a role in any step of model inference
+"""User model inference script."""
+
+import os
+from pathlib import Path
+import numpy as np
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
+
+config = get_model_config(ModelTypes.text_to_text)
+
+
+class InferenceModel:
+  """User model inference class."""
+
+  def __init__(self) -> None:
+    """
+    Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc.
+    in this method so they are loaded only once for faster inference.
+    """
+    self.base_path: Path = os.path.dirname(__file__)
+    ## sample model loading code:
+    #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
+    #self.model: Callable = <load_your_model_here from checkpoint or folder>
+
+  #Add relevant model type decorator to the method below (see docs/model_types for ref.)
+  @config.inference.wrap_func
+  def get_predictions(self, input_data, **kwargs):
+    """
+    Main model inference method.
+
+    Args:
+    -----
+      input_data: A single input data item to predict on.
+        Input data can be an image or text, etc depending on the model type.
+
+    Returns:
+    --------
+      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+    """
+    # Delete/Comment out line below and add your inference code
+    outputs = []
+
+    for inp in input_data:
+      assert isinstance(inp, str), "Incorrect type of text, expected str"
+      output = np.asarray(f"Dummy output: {inp}", dtype=object)
+      output = config.inference.return_type(output)
+      outputs.append(output)
+
+    return outputs
diff --git a/tests/dummy_triton_models/text-to-text/1/model.py b/tests/dummy_triton_models/text-to-text/1/model.py
new file mode 100644
index 00000000..36b54b37
--- /dev/null
+++ b/tests/dummy_triton_models/text-to-text/1/model.py
@@ -0,0 +1,74 @@
+# Copyright 2023 Clarifai, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Triton inference server Python Backend Model."""
+
+import os
+import sys
+
+try:
+  import triton_python_backend_utils as pb_utils
+except ModuleNotFoundError:
+  pass
+from google.protobuf import text_format
+from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
+
+
+class TritonPythonModel:
+  """
+  Triton Python BE Model.
+  """
+
+  def initialize(self, args):
+    """
+    Triton server init.
+    """
+    args["model_repository"] = args["model_repository"].replace("/1/model.py", "")
+    sys.path.append(os.path.dirname(__file__))
+    from inference import InferenceModel
+
+    self.inference_obj = InferenceModel()
+
+    # Read input_name from config file
+    self.config_msg = ModelConfig()
+    with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
+      cfg = f.read()
+    text_format.Merge(cfg, self.config_msg)
+    self.input_names = [inp.name for inp in self.config_msg.input]
+
+  def execute(self, requests):
+    """
+    Serve model inference requests.
+    """
+    responses = []
+
+    for request in requests:
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
+      responses.append(inference_response)
+
+    return responses
diff --git a/tests/dummy_triton_models/text-to-text/1/test_text_to_text.py b/tests/dummy_triton_models/text-to-text/1/test_text_to_text.py
new file mode 100644
index 00000000..1d7ff13d
--- /dev/null
+++ b/tests/dummy_triton_models/text-to-text/1/test_text_to_text.py
@@ -0,0 +1,64 @@
+import logging
+import os
+import unittest
+
+from clarifai.models.model_serving.models.default_test import DefaultTestInferenceModel
+
+
+class CustomTestInferenceModel(DefaultTestInferenceModel):
+  """
+  Run this file to test your implementation of InferenceModel in inference.py with default tests of Triton configuration and its output values based on basic predefined inputs
+  If you want to write custom testcase or just test output value.
+  Please follow these instrucitons:
+  1. Name your test function with prefix "test" so that pytest can execute
+  2. In order to obtain output of InferenceModel, call `self.triton_get_predictions(input_data)`.
+  3. If your input is `image` and you have set custom size of it when building model repository,
+  call `self.preprocess(image)` to obtain correct resized input
+  4. Run this test by calling
+  ```bash
+  pytest ./your_triton_folder/1/test.py
+  #to see std output
+  pytest --log-cli-level=INFO  -s ./your_triton_folder/1/test.py
+  ```
+
+  ### Examples:
+  + test text-to-image output
+  ```
+  def test_text_to_image_output(self):
+    text = "Test text"
+    output = self.triton_get_predictions(text)
+    image = output.image # uint8 np.ndarray image
+    #show or save
+  ```
+  + test visual-classifier output
+  ```
+  def test_visual_classifier(self):
+    image = cv2.imread("your/local/image.jpg") # Keep in mind of format of image (BGR or RGB)
+    output = self.triton_get_predictions(image)
+    scores = output.predicted_scores # np.ndarray
+    #process scores to get class id and its score
+    logger.info(result)
+  """
+
+  # Insert your inference parameters json path here
+  # or insert a dictionary of your_parameter_name and value, e.g dict(x=1.5, y="text", c=True)
+  # or Leave it as "" if you don't have it.
+  inference_parameters = ""
+
+  ########### Initialization. Do not change it ###########
+  __test__ = True
+
+  def setUp(self) -> None:
+    logging.info("Initializing...")
+    model_type = "text-to-text"  # your model type
+    self.intitialize(
+        model_type,
+        repo_version_dir=os.path.dirname(__file__),
+        is_instance_kind_gpu=True,
+        inference_parameters=self.inference_parameters)
+
+  ########################################################
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tests/dummy_triton_models/text-to-text/config.pbtxt b/tests/dummy_triton_models/text-to-text/config.pbtxt
new file mode 100644
index 00000000..7559f732
--- /dev/null
+++ b/tests/dummy_triton_models/text-to-text/config.pbtxt
@@ -0,0 +1,20 @@
+name: "text-to-text"
+max_batch_size: 4
+input {
+  name: "text"
+  data_type: TYPE_STRING
+  dims: 1
+}
+output {
+  name: "text"
+  data_type: TYPE_STRING
+  dims: 1
+}
+instance_group {
+  count: 1
+  kind: KIND_GPU
+}
+dynamic_batching {
+  max_queue_delay_microseconds: 500
+}
+backend: "python"
diff --git a/tests/dummy_triton_models/text-to-text/requirements.txt b/tests/dummy_triton_models/text-to-text/requirements.txt
new file mode 100644
index 00000000..6f35289e
--- /dev/null
+++ b/tests/dummy_triton_models/text-to-text/requirements.txt
@@ -0,0 +1,2 @@
+clarifai>=9.11.0
+tritonclient[all]
diff --git a/tests/dummy_triton_models/visual-classifier/1/__init__.py b/tests/dummy_triton_models/visual-classifier/1/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/dummy_triton_models/visual-classifier/1/inference.py b/tests/dummy_triton_models/visual-classifier/1/inference.py
new file mode 100644
index 00000000..bacf5493
--- /dev/null
+++ b/tests/dummy_triton_models/visual-classifier/1/inference.py
@@ -0,0 +1,55 @@
+# This file contains boilerplate code to allow users write their model
+# inference code that will then interact with the Triton Inference Server
+# Python backend to serve end user requests.
+# The module name, module path, class name & get_predictions() method names MUST be maintained as is
+# but other methods may be added within the class as deemed fit provided
+# they are invoked within the main get_predictions() inference method
+# if they play a role in any step of model inference
+"""User model inference script."""
+
+import os
+from pathlib import Path
+import numpy as np
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
+
+config = get_model_config(ModelTypes.visual_classifier)
+
+
+class InferenceModel:
+  """User model inference class."""
+
+  def __init__(self) -> None:
+    """
+    Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc.
+    in this method so they are loaded only once for faster inference.
+    """
+    self.base_path: Path = os.path.dirname(__file__)
+    ## sample model loading code:
+    #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
+    #self.model: Callable = <load_your_model_here from checkpoint or folder>
+
+  #Add relevant model type decorator to the method below (see docs/model_types for ref.)
+  @config.inference.wrap_func
+  def get_predictions(self, input_data, **kwargs):
+    """
+    Main model inference method.
+
+    Args:
+    -----
+      input_data: A single input data item to predict on.
+        Input data can be an image or text, etc depending on the model type.
+
+    Returns:
+    --------
+      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+    """
+    # Delete/Comment out line below and add your inference code
+    outputs = []
+
+    for inp in input_data:
+      assert isinstance(inp, np.ndarray), "Incorrect type of image, expected np.ndarray"
+      output = np.random.rand(5)
+      output = config.inference.return_type(output)
+      outputs.append(output)
+
+    return outputs
diff --git a/tests/dummy_triton_models/visual-classifier/1/model.py b/tests/dummy_triton_models/visual-classifier/1/model.py
new file mode 100644
index 00000000..36b54b37
--- /dev/null
+++ b/tests/dummy_triton_models/visual-classifier/1/model.py
@@ -0,0 +1,74 @@
+# Copyright 2023 Clarifai, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Triton inference server Python Backend Model."""
+
+import os
+import sys
+
+try:
+  import triton_python_backend_utils as pb_utils
+except ModuleNotFoundError:
+  pass
+from google.protobuf import text_format
+from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
+
+
+class TritonPythonModel:
+  """
+  Triton Python BE Model.
+  """
+
+  def initialize(self, args):
+    """
+    Triton server init.
+    """
+    args["model_repository"] = args["model_repository"].replace("/1/model.py", "")
+    sys.path.append(os.path.dirname(__file__))
+    from inference import InferenceModel
+
+    self.inference_obj = InferenceModel()
+
+    # Read input_name from config file
+    self.config_msg = ModelConfig()
+    with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
+      cfg = f.read()
+    text_format.Merge(cfg, self.config_msg)
+    self.input_names = [inp.name for inp in self.config_msg.input]
+
+  def execute(self, requests):
+    """
+    Serve model inference requests.
+    """
+    responses = []
+
+    for request in requests:
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
+      responses.append(inference_response)
+
+    return responses
diff --git a/tests/dummy_triton_models/visual-classifier/1/test_visual_classifier.py b/tests/dummy_triton_models/visual-classifier/1/test_visual_classifier.py
new file mode 100644
index 00000000..dcb5fa6d
--- /dev/null
+++ b/tests/dummy_triton_models/visual-classifier/1/test_visual_classifier.py
@@ -0,0 +1,64 @@
+import logging
+import os
+import unittest
+
+from clarifai.models.model_serving.models.default_test import DefaultTestInferenceModel
+
+
+class CustomTestInferenceModel(DefaultTestInferenceModel):
+  """
+  Run this file to test your implementation of InferenceModel in inference.py with default tests of Triton configuration and its output values based on basic predefined inputs
+  If you want to write custom testcase or just test output value.
+  Please follow these instrucitons:
+  1. Name your test function with prefix "test" so that pytest can execute
+  2. In order to obtain output of InferenceModel, call `self.triton_get_predictions(input_data)`.
+  3. If your input is `image` and you have set custom size of it when building model repository,
+  call `self.preprocess(image)` to obtain correct resized input
+  4. Run this test by calling
+  ```bash
+  pytest ./your_triton_folder/1/test.py
+  #to see std output
+  pytest --log-cli-level=INFO  -s ./your_triton_folder/1/test.py
+  ```
+
+  ### Examples:
+  + test text-to-image output
+  ```
+  def test_text_to_image_output(self):
+    text = "Test text"
+    output = self.triton_get_predictions(text)
+    image = output.image # uint8 np.ndarray image
+    #show or save
+  ```
+  + test visual-classifier output
+  ```
+  def test_visual_classifier(self):
+    image = cv2.imread("your/local/image.jpg") # Keep in mind of format of image (BGR or RGB)
+    output = self.triton_get_predictions(image)
+    scores = output.predicted_scores # np.ndarray
+    #process scores to get class id and its score
+    logger.info(result)
+  """
+
+  # Insert your inference parameters json path here
+  # or insert a dictionary of your_parameter_name and value, e.g dict(x=1.5, y="text", c=True)
+  # or Leave it as "" if you don't have it.
+  inference_parameters = ""
+
+  ########### Initialization. Do not change it ###########
+  __test__ = True
+
+  def setUp(self) -> None:
+    logging.info("Initializing...")
+    model_type = "visual-classifier"  # your model type
+    self.intitialize(
+        model_type,
+        repo_version_dir=os.path.dirname(__file__),
+        is_instance_kind_gpu=True,
+        inference_parameters=self.inference_parameters)
+
+  ########################################################
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tests/dummy_triton_models/visual-classifier/config.pbtxt b/tests/dummy_triton_models/visual-classifier/config.pbtxt
new file mode 100644
index 00000000..3bc9a28b
--- /dev/null
+++ b/tests/dummy_triton_models/visual-classifier/config.pbtxt
@@ -0,0 +1,23 @@
+name: "visual-classifier"
+max_batch_size: 4
+input {
+  name: "image"
+  data_type: TYPE_UINT8
+  dims: -1
+  dims: -1
+  dims: 3
+}
+output {
+  name: "softmax_predictions"
+  data_type: TYPE_FP32
+  dims: -1
+  label_filename: "labels.txt"
+}
+instance_group {
+  count: 1
+  kind: KIND_GPU
+}
+dynamic_batching {
+  max_queue_delay_microseconds: 500
+}
+backend: "python"
diff --git a/tests/dummy_triton_models/visual-classifier/labels.txt b/tests/dummy_triton_models/visual-classifier/labels.txt
new file mode 100644
index 00000000..94053253
--- /dev/null
+++ b/tests/dummy_triton_models/visual-classifier/labels.txt
@@ -0,0 +1,5 @@
+a
+b
+c
+d
+e
diff --git a/tests/dummy_triton_models/visual-classifier/requirements.txt b/tests/dummy_triton_models/visual-classifier/requirements.txt
new file mode 100644
index 00000000..6f35289e
--- /dev/null
+++ b/tests/dummy_triton_models/visual-classifier/requirements.txt
@@ -0,0 +1,2 @@
+clarifai>=9.11.0
+tritonclient[all]
diff --git a/tests/dummy_triton_models/visual-detector/1/__init__.py b/tests/dummy_triton_models/visual-detector/1/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/dummy_triton_models/visual-detector/1/inference.py b/tests/dummy_triton_models/visual-detector/1/inference.py
new file mode 100644
index 00000000..d8a4dcd7
--- /dev/null
+++ b/tests/dummy_triton_models/visual-detector/1/inference.py
@@ -0,0 +1,58 @@
+# This file contains boilerplate code to allow users write their model
+# inference code that will then interact with the Triton Inference Server
+# Python backend to serve end user requests.
+# The module name, module path, class name & get_predictions() method names MUST be maintained as is
+# but other methods may be added within the class as deemed fit provided
+# they are invoked within the main get_predictions() inference method
+# if they play a role in any step of model inference
+"""User model inference script."""
+
+import os
+from pathlib import Path
+import numpy as np
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
+
+config = get_model_config(ModelTypes.visual_detector)
+
+
+class InferenceModel:
+  """User model inference class."""
+
+  def __init__(self) -> None:
+    """
+    Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc.
+    in this method so they are loaded only once for faster inference.
+    """
+    self.base_path: Path = os.path.dirname(__file__)
+    ## sample model loading code:
+    #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
+    #self.model: Callable = <load_your_model_here from checkpoint or folder>
+
+  #Add relevant model type decorator to the method below (see docs/model_types for ref.)
+  @config.inference.wrap_func
+  def get_predictions(self, input_data, **kwargs):
+    """
+    Main model inference method.
+
+    Args:
+    -----
+      input_data: A single input data item to predict on.
+        Input data can be an image or text, etc depending on the model type.
+
+    Returns:
+    --------
+      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+    """
+    # Delete/Comment out line below and add your inference code
+    outputs = []
+
+    for inp in input_data:
+      assert isinstance(inp, np.ndarray), "Incorrect type of image, expected np.ndarray"
+      bboxes = np.random.rand(1, 4)
+      classes = np.random.randint(0, 1, size=(1, 1))
+      scores = np.random.rand(1, 1)
+      output = config.inference.return_type(
+          predicted_bboxes=bboxes, predicted_labels=classes, predicted_scores=scores)
+      outputs.append(output)
+
+    return outputs
diff --git a/tests/dummy_triton_models/visual-detector/1/model.py b/tests/dummy_triton_models/visual-detector/1/model.py
new file mode 100644
index 00000000..36b54b37
--- /dev/null
+++ b/tests/dummy_triton_models/visual-detector/1/model.py
@@ -0,0 +1,74 @@
+# Copyright 2023 Clarifai, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Triton inference server Python Backend Model."""
+
+import os
+import sys
+
+try:
+  import triton_python_backend_utils as pb_utils
+except ModuleNotFoundError:
+  pass
+from google.protobuf import text_format
+from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
+
+
+class TritonPythonModel:
+  """
+  Triton Python BE Model.
+  """
+
+  def initialize(self, args):
+    """
+    Triton server init.
+    """
+    args["model_repository"] = args["model_repository"].replace("/1/model.py", "")
+    sys.path.append(os.path.dirname(__file__))
+    from inference import InferenceModel
+
+    self.inference_obj = InferenceModel()
+
+    # Read input_name from config file
+    self.config_msg = ModelConfig()
+    with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
+      cfg = f.read()
+    text_format.Merge(cfg, self.config_msg)
+    self.input_names = [inp.name for inp in self.config_msg.input]
+
+  def execute(self, requests):
+    """
+    Serve model inference requests.
+    """
+    responses = []
+
+    for request in requests:
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
+      responses.append(inference_response)
+
+    return responses
diff --git a/tests/dummy_triton_models/visual-detector/1/test_visual_detector.py b/tests/dummy_triton_models/visual-detector/1/test_visual_detector.py
new file mode 100644
index 00000000..50fda157
--- /dev/null
+++ b/tests/dummy_triton_models/visual-detector/1/test_visual_detector.py
@@ -0,0 +1,64 @@
+import logging
+import os
+import unittest
+
+from clarifai.models.model_serving.models.default_test import DefaultTestInferenceModel
+
+
+class CustomTestInferenceModel(DefaultTestInferenceModel):
+  """
+  Run this file to test your implementation of InferenceModel in inference.py with default tests of Triton configuration and its output values based on basic predefined inputs
+  If you want to write custom testcase or just test output value.
+  Please follow these instrucitons:
+  1. Name your test function with prefix "test" so that pytest can execute
+  2. In order to obtain output of InferenceModel, call `self.triton_get_predictions(input_data)`.
+  3. If your input is `image` and you have set custom size of it when building model repository,
+  call `self.preprocess(image)` to obtain correct resized input
+  4. Run this test by calling
+  ```bash
+  pytest ./your_triton_folder/1/test.py
+  #to see std output
+  pytest --log-cli-level=INFO  -s ./your_triton_folder/1/test.py
+  ```
+
+  ### Examples:
+  + test text-to-image output
+  ```
+  def test_text_to_image_output(self):
+    text = "Test text"
+    output = self.triton_get_predictions(text)
+    image = output.image # uint8 np.ndarray image
+    #show or save
+  ```
+  + test visual-classifier output
+  ```
+  def test_visual_classifier(self):
+    image = cv2.imread("your/local/image.jpg") # Keep in mind of format of image (BGR or RGB)
+    output = self.triton_get_predictions(image)
+    scores = output.predicted_scores # np.ndarray
+    #process scores to get class id and its score
+    logger.info(result)
+  """
+
+  # Insert your inference parameters json path here
+  # or insert a dictionary of your_parameter_name and value, e.g dict(x=1.5, y="text", c=True)
+  # or Leave it as "" if you don't have it.
+  inference_parameters = ""
+
+  ########### Initialization. Do not change it ###########
+  __test__ = True
+
+  def setUp(self) -> None:
+    logging.info("Initializing...")
+    model_type = "visual-detector"  # your model type
+    self.intitialize(
+        model_type,
+        repo_version_dir=os.path.dirname(__file__),
+        is_instance_kind_gpu=True,
+        inference_parameters=self.inference_parameters)
+
+  ########################################################
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tests/dummy_triton_models/visual-detector/config.pbtxt b/tests/dummy_triton_models/visual-detector/config.pbtxt
new file mode 100644
index 00000000..8a12b6fc
--- /dev/null
+++ b/tests/dummy_triton_models/visual-detector/config.pbtxt
@@ -0,0 +1,36 @@
+name: "visual-detector"
+max_batch_size: 4
+input {
+  name: "image"
+  data_type: TYPE_UINT8
+  dims: -1
+  dims: -1
+  dims: 3
+}
+output {
+  name: "predicted_bboxes"
+  data_type: TYPE_FP32
+  dims: -1
+  dims: 4
+}
+output {
+  name: "predicted_labels"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 1
+  label_filename: "labels.txt"
+}
+output {
+  name: "predicted_scores"
+  data_type: TYPE_FP32
+  dims: -1
+  dims: 1
+}
+instance_group {
+  count: 1
+  kind: KIND_GPU
+}
+dynamic_batching {
+  max_queue_delay_microseconds: 500
+}
+backend: "python"
diff --git a/tests/dummy_triton_models/visual-detector/labels.txt b/tests/dummy_triton_models/visual-detector/labels.txt
new file mode 100644
index 00000000..0d0547b0
--- /dev/null
+++ b/tests/dummy_triton_models/visual-detector/labels.txt
@@ -0,0 +1,2 @@
+cat
+meow
diff --git a/tests/dummy_triton_models/visual-detector/requirements.txt b/tests/dummy_triton_models/visual-detector/requirements.txt
new file mode 100644
index 00000000..6f35289e
--- /dev/null
+++ b/tests/dummy_triton_models/visual-detector/requirements.txt
@@ -0,0 +1,2 @@
+clarifai>=9.11.0
+tritonclient[all]
diff --git a/tests/dummy_triton_models/visual-embedder/1/__init__.py b/tests/dummy_triton_models/visual-embedder/1/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/dummy_triton_models/visual-embedder/1/inference.py b/tests/dummy_triton_models/visual-embedder/1/inference.py
new file mode 100644
index 00000000..62e321f6
--- /dev/null
+++ b/tests/dummy_triton_models/visual-embedder/1/inference.py
@@ -0,0 +1,54 @@
+# This file contains boilerplate code to allow users write their model
+# inference code that will then interact with the Triton Inference Server
+# Python backend to serve end user requests.
+# The module name, module path, class name & get_predictions() method names MUST be maintained as is
+# but other methods may be added within the class as deemed fit provided
+# they are invoked within the main get_predictions() inference method
+# if they play a role in any step of model inference
+"""User model inference script."""
+
+import os
+from pathlib import Path
+import numpy as np
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
+
+config = get_model_config(ModelTypes.visual_embedder)
+
+
+class InferenceModel:
+  """User model inference class."""
+
+  def __init__(self) -> None:
+    """
+    Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc.
+    in this method so they are loaded only once for faster inference.
+    """
+    self.base_path: Path = os.path.dirname(__file__)
+    ## sample model loading code:
+    #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
+    #self.model: Callable = <load_your_model_here from checkpoint or folder>
+
+  #Add relevant model type decorator to the method below (see docs/model_types for ref.)
+  @config.inference.wrap_func
+  def get_predictions(self, input_data, **kwargs):
+    """
+    Main model inference method.
+
+    Args:
+    -----
+      input_data: A single input data item to predict on.
+        Input data can be an image or text, etc depending on the model type.
+
+    Returns:
+    --------
+      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+    """
+    outputs = []
+
+    for inp in input_data:
+      assert isinstance(inp, np.ndarray), "Incorrect type of image, expected np.ndarray"
+      output = np.random.randn(768)
+      output = config.inference.return_type(output)
+      outputs.append(output)
+
+    return outputs
diff --git a/tests/dummy_triton_models/visual-embedder/1/model.py b/tests/dummy_triton_models/visual-embedder/1/model.py
new file mode 100644
index 00000000..36b54b37
--- /dev/null
+++ b/tests/dummy_triton_models/visual-embedder/1/model.py
@@ -0,0 +1,74 @@
+# Copyright 2023 Clarifai, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Triton inference server Python Backend Model."""
+
+import os
+import sys
+
+try:
+  import triton_python_backend_utils as pb_utils
+except ModuleNotFoundError:
+  pass
+from google.protobuf import text_format
+from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
+
+
+class TritonPythonModel:
+  """
+  Triton Python BE Model.
+  """
+
+  def initialize(self, args):
+    """
+    Triton server init.
+    """
+    args["model_repository"] = args["model_repository"].replace("/1/model.py", "")
+    sys.path.append(os.path.dirname(__file__))
+    from inference import InferenceModel
+
+    self.inference_obj = InferenceModel()
+
+    # Read input_name from config file
+    self.config_msg = ModelConfig()
+    with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
+      cfg = f.read()
+    text_format.Merge(cfg, self.config_msg)
+    self.input_names = [inp.name for inp in self.config_msg.input]
+
+  def execute(self, requests):
+    """
+    Serve model inference requests.
+    """
+    responses = []
+
+    for request in requests:
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
+      responses.append(inference_response)
+
+    return responses
diff --git a/tests/dummy_triton_models/visual-embedder/1/test_visual_embedder.py b/tests/dummy_triton_models/visual-embedder/1/test_visual_embedder.py
new file mode 100644
index 00000000..2c142003
--- /dev/null
+++ b/tests/dummy_triton_models/visual-embedder/1/test_visual_embedder.py
@@ -0,0 +1,64 @@
+import logging
+import os
+import unittest
+
+from clarifai.models.model_serving.models.default_test import DefaultTestInferenceModel
+
+
+class CustomTestInferenceModel(DefaultTestInferenceModel):
+  """
+  Run this file to test your implementation of InferenceModel in inference.py with default tests of Triton configuration and its output values based on basic predefined inputs
+  If you want to write custom testcase or just test output value.
+  Please follow these instrucitons:
+  1. Name your test function with prefix "test" so that pytest can execute
+  2. In order to obtain output of InferenceModel, call `self.triton_get_predictions(input_data)`.
+  3. If your input is `image` and you have set custom size of it when building model repository,
+  call `self.preprocess(image)` to obtain correct resized input
+  4. Run this test by calling
+  ```bash
+  pytest ./your_triton_folder/1/test.py
+  #to see std output
+  pytest --log-cli-level=INFO  -s ./your_triton_folder/1/test.py
+  ```
+
+  ### Examples:
+  + test text-to-image output
+  ```
+  def test_text_to_image_output(self):
+    text = "Test text"
+    output = self.triton_get_predictions(text)
+    image = output.image # uint8 np.ndarray image
+    #show or save
+  ```
+  + test visual-classifier output
+  ```
+  def test_visual_classifier(self):
+    image = cv2.imread("your/local/image.jpg") # Keep in mind of format of image (BGR or RGB)
+    output = self.triton_get_predictions(image)
+    scores = output.predicted_scores # np.ndarray
+    #process scores to get class id and its score
+    logger.info(result)
+  """
+
+  # Insert your inference parameters json path here
+  # or insert a dictionary of your_parameter_name and value, e.g dict(x=1.5, y="text", c=True)
+  # or Leave it as "" if you don't have it.
+  inference_parameters = ""
+
+  ########### Initialization. Do not change it ###########
+  __test__ = True
+
+  def setUp(self) -> None:
+    logging.info("Initializing...")
+    model_type = "visual-embedder"  # your model type
+    self.intitialize(
+        model_type,
+        repo_version_dir=os.path.dirname(__file__),
+        is_instance_kind_gpu=True,
+        inference_parameters=self.inference_parameters)
+
+  ########################################################
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tests/dummy_triton_models/visual-embedder/config.pbtxt b/tests/dummy_triton_models/visual-embedder/config.pbtxt
new file mode 100644
index 00000000..16ff7c8f
--- /dev/null
+++ b/tests/dummy_triton_models/visual-embedder/config.pbtxt
@@ -0,0 +1,22 @@
+name: "visual-embedder"
+max_batch_size: 4
+input {
+  name: "image"
+  data_type: TYPE_UINT8
+  dims: -1
+  dims: -1
+  dims: 3
+}
+output {
+  name: "embeddings"
+  data_type: TYPE_FP32
+  dims: -1
+}
+instance_group {
+  count: 1
+  kind: KIND_GPU
+}
+dynamic_batching {
+  max_queue_delay_microseconds: 500
+}
+backend: "python"
diff --git a/tests/dummy_triton_models/visual-embedder/requirements.txt b/tests/dummy_triton_models/visual-embedder/requirements.txt
new file mode 100644
index 00000000..6f35289e
--- /dev/null
+++ b/tests/dummy_triton_models/visual-embedder/requirements.txt
@@ -0,0 +1,2 @@
+clarifai>=9.11.0
+tritonclient[all]
diff --git a/tests/dummy_triton_models/visual-segmenter/1/__init__.py b/tests/dummy_triton_models/visual-segmenter/1/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/dummy_triton_models/visual-segmenter/1/inference.py b/tests/dummy_triton_models/visual-segmenter/1/inference.py
new file mode 100644
index 00000000..29a84a44
--- /dev/null
+++ b/tests/dummy_triton_models/visual-segmenter/1/inference.py
@@ -0,0 +1,54 @@
+# This file contains boilerplate code to allow users write their model
+# inference code that will then interact with the Triton Inference Server
+# Python backend to serve end user requests.
+# The module name, module path, class name & get_predictions() method names MUST be maintained as is
+# but other methods may be added within the class as deemed fit provided
+# they are invoked within the main get_predictions() inference method
+# if they play a role in any step of model inference
+"""User model inference script."""
+
+import os
+from pathlib import Path
+import numpy as np
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
+
+config = get_model_config(ModelTypes.visual_segmenter)
+
+
+class InferenceModel:
+  """User model inference class."""
+
+  def __init__(self) -> None:
+    """
+    Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc.
+    in this method so they are loaded only once for faster inference.
+    """
+    self.base_path: Path = os.path.dirname(__file__)
+    ## sample model loading code:
+    #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
+    #self.model: Callable = <load_your_model_here from checkpoint or folder>
+
+  #Add relevant model type decorator to the method below (see docs/model_types for ref.)
+  @config.inference.wrap_func
+  def get_predictions(self, input_data, **kwargs):
+    """
+    Main model inference method.
+
+    Args:
+    -----
+      input_data: A single input data item to predict on.
+        Input data can be an image or text, etc depending on the model type.
+
+    Returns:
+    --------
+      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+    """
+    outputs = []
+
+    for inp in input_data:
+      assert isinstance(inp, np.ndarray), "Incorrect type of image, expected np.ndarray"
+      output = np.random.randint(0, 1, size=(200, 200))
+      output = config.inference.return_type(output)
+      outputs.append(output)
+
+    return outputs
diff --git a/tests/dummy_triton_models/visual-segmenter/1/model.py b/tests/dummy_triton_models/visual-segmenter/1/model.py
new file mode 100644
index 00000000..36b54b37
--- /dev/null
+++ b/tests/dummy_triton_models/visual-segmenter/1/model.py
@@ -0,0 +1,74 @@
+# Copyright 2023 Clarifai, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Triton inference server Python Backend Model."""
+
+import os
+import sys
+
+try:
+  import triton_python_backend_utils as pb_utils
+except ModuleNotFoundError:
+  pass
+from google.protobuf import text_format
+from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
+
+
+class TritonPythonModel:
+  """
+  Triton Python BE Model.
+  """
+
+  def initialize(self, args):
+    """
+    Triton server init.
+    """
+    args["model_repository"] = args["model_repository"].replace("/1/model.py", "")
+    sys.path.append(os.path.dirname(__file__))
+    from inference import InferenceModel
+
+    self.inference_obj = InferenceModel()
+
+    # Read input_name from config file
+    self.config_msg = ModelConfig()
+    with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
+      cfg = f.read()
+    text_format.Merge(cfg, self.config_msg)
+    self.input_names = [inp.name for inp in self.config_msg.input]
+
+  def execute(self, requests):
+    """
+    Serve model inference requests.
+    """
+    responses = []
+
+    for request in requests:
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
+      responses.append(inference_response)
+
+    return responses
diff --git a/tests/dummy_triton_models/visual-segmenter/1/test_visual_segmenter.py b/tests/dummy_triton_models/visual-segmenter/1/test_visual_segmenter.py
new file mode 100644
index 00000000..7c84a834
--- /dev/null
+++ b/tests/dummy_triton_models/visual-segmenter/1/test_visual_segmenter.py
@@ -0,0 +1,64 @@
+import logging
+import os
+import unittest
+
+from clarifai.models.model_serving.models.default_test import DefaultTestInferenceModel
+
+
+class CustomTestInferenceModel(DefaultTestInferenceModel):
+  """
+  Run this file to test your implementation of InferenceModel in inference.py with default tests of Triton configuration and its output values based on basic predefined inputs
+  If you want to write custom testcase or just test output value.
+  Please follow these instrucitons:
+  1. Name your test function with prefix "test" so that pytest can execute
+  2. In order to obtain output of InferenceModel, call `self.triton_get_predictions(input_data)`.
+  3. If your input is `image` and you have set custom size of it when building model repository,
+  call `self.preprocess(image)` to obtain correct resized input
+  4. Run this test by calling
+  ```bash
+  pytest ./your_triton_folder/1/test.py
+  #to see std output
+  pytest --log-cli-level=INFO  -s ./your_triton_folder/1/test.py
+  ```
+
+  ### Examples:
+  + test text-to-image output
+  ```
+  def test_text_to_image_output(self):
+    text = "Test text"
+    output = self.triton_get_predictions(text)
+    image = output.image # uint8 np.ndarray image
+    #show or save
+  ```
+  + test visual-classifier output
+  ```
+  def test_visual_classifier(self):
+    image = cv2.imread("your/local/image.jpg") # Keep in mind of format of image (BGR or RGB)
+    output = self.triton_get_predictions(image)
+    scores = output.predicted_scores # np.ndarray
+    #process scores to get class id and its score
+    logger.info(result)
+  """
+
+  # Insert your inference parameters json path here
+  # or insert a dictionary of your_parameter_name and value, e.g dict(x=1.5, y="text", c=True)
+  # or Leave it as "" if you don't have it.
+  inference_parameters = ""
+
+  ########### Initialization. Do not change it ###########
+  __test__ = True
+
+  def setUp(self) -> None:
+    logging.info("Initializing...")
+    model_type = "visual-segmenter"  # your model type
+    self.intitialize(
+        model_type,
+        repo_version_dir=os.path.dirname(__file__),
+        is_instance_kind_gpu=True,
+        inference_parameters=self.inference_parameters)
+
+  ########################################################
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tests/dummy_triton_models/visual-segmenter/config.pbtxt b/tests/dummy_triton_models/visual-segmenter/config.pbtxt
new file mode 100644
index 00000000..2db1d050
--- /dev/null
+++ b/tests/dummy_triton_models/visual-segmenter/config.pbtxt
@@ -0,0 +1,24 @@
+name: "visual-segmenter"
+max_batch_size: 4
+input {
+  name: "image"
+  data_type: TYPE_UINT8
+  dims: -1
+  dims: -1
+  dims: 3
+}
+output {
+  name: "predicted_mask"
+  data_type: TYPE_INT64
+  dims: -1
+  dims: -1
+  label_filename: "labels.txt"
+}
+instance_group {
+  count: 1
+  kind: KIND_GPU
+}
+dynamic_batching {
+  max_queue_delay_microseconds: 500
+}
+backend: "python"
diff --git a/tests/dummy_triton_models/visual-segmenter/labels.txt b/tests/dummy_triton_models/visual-segmenter/labels.txt
new file mode 100644
index 00000000..0d66ea1a
--- /dev/null
+++ b/tests/dummy_triton_models/visual-segmenter/labels.txt
@@ -0,0 +1,2 @@
+0
+1
diff --git a/tests/dummy_triton_models/visual-segmenter/requirements.txt b/tests/dummy_triton_models/visual-segmenter/requirements.txt
new file mode 100644
index 00000000..6f35289e
--- /dev/null
+++ b/tests/dummy_triton_models/visual-segmenter/requirements.txt
@@ -0,0 +1,2 @@
+clarifai>=9.11.0
+tritonclient[all]

From 1f9aed1d19283771c48efed0db802b9efa3f4669 Mon Sep 17 00:00:00 2001
From: phtvo <vohungphat99@gmail.com>
Date: Thu, 28 Dec 2023 20:31:38 +0700
Subject: [PATCH 2/7] run test triton models

---
 .github/workflows/run_triton_test.yaml | 32 ++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 .github/workflows/run_triton_test.yaml

diff --git a/.github/workflows/run_triton_test.yaml b/.github/workflows/run_triton_test.yaml
new file mode 100644
index 00000000..e5b130c0
--- /dev/null
+++ b/.github/workflows/run_triton_test.yaml
@@ -0,0 +1,32 @@
+name: Run triton test
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+
+jobs:
+  triton-model-test:
+    runs-on: ubuntu-latest
+
+    container:
+      image: nvcr.io/nvidia/tritonserver:23.10-py3
+
+    steps:
+      - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install -r tests/requirements.txt
+
+      - name: start triton
+        run: |
+          tritonserver --model-repository ./tests/dummy_triton_models --exit-on-error false &
+        background: true
+
+      - name: Wait for Triton to Initialize
+        run: sleep 10
+
+      - name: Start Model Tests
+        run: |
+          python3 -m pytest tests/dummy_triton_models/_test_all_dummies.py -s

From 408b6ad42899a5441ff8be445b0acd5935c41071 Mon Sep 17 00:00:00 2001
From: phtvo <vohungphat99@gmail.com>
Date: Fri, 29 Dec 2023 14:41:02 +0700
Subject: [PATCH 3/7] fix triton test workflow

---
 .github/workflows/run_triton_test.yaml | 33 +++++++++++++-------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/run_triton_test.yaml b/.github/workflows/run_triton_test.yaml
index e5b130c0..e5531bd2 100644
--- a/.github/workflows/run_triton_test.yaml
+++ b/.github/workflows/run_triton_test.yaml
@@ -3,7 +3,11 @@ name: Run triton test
 on:
   push:
     branches: [ master ]
+    paths:
+      - 'clarifai/models/**'
   pull_request:
+    paths:
+      - 'clarifai/models/**'
 
 jobs:
   triton-model-test:
@@ -13,20 +17,17 @@ jobs:
       image: nvcr.io/nvidia/tritonserver:23.10-py3
 
     steps:
-      - name: Install dependencies
+    - name: Install dependencies
+    run: |
+      python -m pip install --upgrade pip
+      pip install -r requirements.txt
+      pip install -r tests/requirements.txt
+    - name: start triton
       run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements.txt
-        pip install -r tests/requirements.txt
-
-      - name: start triton
-        run: |
-          tritonserver --model-repository ./tests/dummy_triton_models --exit-on-error false &
-        background: true
-
-      - name: Wait for Triton to Initialize
-        run: sleep 10
-
-      - name: Start Model Tests
-        run: |
-          python3 -m pytest tests/dummy_triton_models/_test_all_dummies.py -s
+        tritonserver --model-repository ./tests/dummy_triton_models --exit-on-error false &
+      background: true
+    - name: Wait for Triton to Initialize
+      run: sleep 10
+    - name: Start Model Tests
+      run: |
+        python3 -m pytest tests/dummy_triton_models/_test_all_dummies.py -s

From f30f85b28ad473af9b28e77113843f884a9c2bf2 Mon Sep 17 00:00:00 2001
From: phtvo <vohungphat99@gmail.com>
Date: Fri, 29 Dec 2023 14:47:32 +0700
Subject: [PATCH 4/7] fix triton test workflow

---
 .github/workflows/run_triton_test.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/run_triton_test.yaml b/.github/workflows/run_triton_test.yaml
index e5531bd2..ffacc2f4 100644
--- a/.github/workflows/run_triton_test.yaml
+++ b/.github/workflows/run_triton_test.yaml
@@ -11,11 +11,10 @@ on:
 
 jobs:
   triton-model-test:
-    runs-on: ubuntu-latest
 
+    runs-on: ubuntu-latest
     container:
       image: nvcr.io/nvidia/tritonserver:23.10-py3
-
     steps:
     - name: Install dependencies
     run: |

From 13e68123a2e84bab0bbf0116e2a4bee5e56805b5 Mon Sep 17 00:00:00 2001
From: phtvo <vohungphat99@gmail.com>
Date: Fri, 29 Dec 2023 15:34:03 +0700
Subject: [PATCH 5/7] fix test workflow

---
 .github/workflows/run_triton_test.yaml          | 17 +++++++----------
 tests/dummy_triton_models/_test_all_dummies.py  |  6 +++---
 .../multimodal-embedder/config.pbtxt            |  2 +-
 .../text-classifier/config.pbtxt                |  2 +-
 .../text-embedder/config.pbtxt                  |  2 +-
 .../text-to-image/config.pbtxt                  |  2 +-
 .../text-to-text/config.pbtxt                   |  2 +-
 .../visual-classifier/config.pbtxt              |  2 +-
 .../visual-detector/config.pbtxt                |  2 +-
 .../visual-embedder/config.pbtxt                |  2 +-
 .../visual-segmenter/config.pbtxt               |  2 +-
 11 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/run_triton_test.yaml b/.github/workflows/run_triton_test.yaml
index ffacc2f4..252db2f9 100644
--- a/.github/workflows/run_triton_test.yaml
+++ b/.github/workflows/run_triton_test.yaml
@@ -16,17 +16,14 @@ jobs:
     container:
       image: nvcr.io/nvidia/tritonserver:23.10-py3
     steps:
+    - name: Checkout Repository
+      uses: actions/checkout@v4
     - name: Install dependencies
-    run: |
-      python -m pip install --upgrade pip
-      pip install -r requirements.txt
-      pip install -r tests/requirements.txt
-    - name: start triton
       run: |
-        tritonserver --model-repository ./tests/dummy_triton_models --exit-on-error false &
-      background: true
-    - name: Wait for Triton to Initialize
-      run: sleep 10
-    - name: Start Model Tests
+        python3 -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install -r tests/requirements.txt
+    - name: start triton
       run: |
+        tritonserver --model-repository ./tests/dummy_triton_models --exit-on-error false &>/dev/null &
         python3 -m pytest tests/dummy_triton_models/_test_all_dummies.py -s
diff --git a/tests/dummy_triton_models/_test_all_dummies.py b/tests/dummy_triton_models/_test_all_dummies.py
index 2ac06bf5..016c17c6 100644
--- a/tests/dummy_triton_models/_test_all_dummies.py
+++ b/tests/dummy_triton_models/_test_all_dummies.py
@@ -7,11 +7,11 @@
 
 MAX_BATCH_SIZE = 4
 MAX_TRIES = 5
-INTERVAL = 3
+INTERVAL = 10
 count = 0
 while count < MAX_TRIES:
   try:
-    _ = InferenceServerClient('localhost:8001').is_server_live()
+    _ = InferenceServerClient('0.0.0.0:8001').is_server_live()
     break
   except Exception as e:
     print(e)
@@ -21,7 +21,7 @@
 
 @pytest.fixture
 def triton_client():
-  return InferenceServerClient('localhost:8001')
+  return InferenceServerClient('0.0.0.0:8001')
 
 
 def make_input(name, inputs):
diff --git a/tests/dummy_triton_models/multimodal-embedder/config.pbtxt b/tests/dummy_triton_models/multimodal-embedder/config.pbtxt
index 7a49e967..00c212a5 100644
--- a/tests/dummy_triton_models/multimodal-embedder/config.pbtxt
+++ b/tests/dummy_triton_models/multimodal-embedder/config.pbtxt
@@ -21,7 +21,7 @@ output {
 }
 instance_group {
   count: 1
-  kind: KIND_GPU
+  kind: KIND_CPU # Use CPU only for test
 }
 dynamic_batching {
   max_queue_delay_microseconds: 500
diff --git a/tests/dummy_triton_models/text-classifier/config.pbtxt b/tests/dummy_triton_models/text-classifier/config.pbtxt
index 542d0929..c4c26391 100644
--- a/tests/dummy_triton_models/text-classifier/config.pbtxt
+++ b/tests/dummy_triton_models/text-classifier/config.pbtxt
@@ -13,7 +13,7 @@ output {
 }
 instance_group {
   count: 1
-  kind: KIND_GPU
+  kind: KIND_CPU # Use CPU only for test
 }
 dynamic_batching {
   max_queue_delay_microseconds: 500
diff --git a/tests/dummy_triton_models/text-embedder/config.pbtxt b/tests/dummy_triton_models/text-embedder/config.pbtxt
index 98bf2867..f41df507 100644
--- a/tests/dummy_triton_models/text-embedder/config.pbtxt
+++ b/tests/dummy_triton_models/text-embedder/config.pbtxt
@@ -12,7 +12,7 @@ output {
 }
 instance_group {
   count: 1
-  kind: KIND_GPU
+  kind: KIND_CPU # Use CPU only for test
 }
 dynamic_batching {
   max_queue_delay_microseconds: 500
diff --git a/tests/dummy_triton_models/text-to-image/config.pbtxt b/tests/dummy_triton_models/text-to-image/config.pbtxt
index 09cf8fbf..031103d1 100644
--- a/tests/dummy_triton_models/text-to-image/config.pbtxt
+++ b/tests/dummy_triton_models/text-to-image/config.pbtxt
@@ -14,7 +14,7 @@ output {
 }
 instance_group {
   count: 1
-  kind: KIND_GPU
+  kind: KIND_CPU # Use CPU only for test
 }
 dynamic_batching {
   max_queue_delay_microseconds: 500
diff --git a/tests/dummy_triton_models/text-to-text/config.pbtxt b/tests/dummy_triton_models/text-to-text/config.pbtxt
index 7559f732..e60a77fc 100644
--- a/tests/dummy_triton_models/text-to-text/config.pbtxt
+++ b/tests/dummy_triton_models/text-to-text/config.pbtxt
@@ -12,7 +12,7 @@ output {
 }
 instance_group {
   count: 1
-  kind: KIND_GPU
+  kind: KIND_CPU # Use CPU only for test
 }
 dynamic_batching {
   max_queue_delay_microseconds: 500
diff --git a/tests/dummy_triton_models/visual-classifier/config.pbtxt b/tests/dummy_triton_models/visual-classifier/config.pbtxt
index 3bc9a28b..23d5881a 100644
--- a/tests/dummy_triton_models/visual-classifier/config.pbtxt
+++ b/tests/dummy_triton_models/visual-classifier/config.pbtxt
@@ -15,7 +15,7 @@ output {
 }
 instance_group {
   count: 1
-  kind: KIND_GPU
+  kind: KIND_CPU # Use CPU only for test
 }
 dynamic_batching {
   max_queue_delay_microseconds: 500
diff --git a/tests/dummy_triton_models/visual-detector/config.pbtxt b/tests/dummy_triton_models/visual-detector/config.pbtxt
index 8a12b6fc..b6064ae3 100644
--- a/tests/dummy_triton_models/visual-detector/config.pbtxt
+++ b/tests/dummy_triton_models/visual-detector/config.pbtxt
@@ -28,7 +28,7 @@ output {
 }
 instance_group {
   count: 1
-  kind: KIND_GPU
+  kind: KIND_CPU # Use CPU only for test
 }
 dynamic_batching {
   max_queue_delay_microseconds: 500
diff --git a/tests/dummy_triton_models/visual-embedder/config.pbtxt b/tests/dummy_triton_models/visual-embedder/config.pbtxt
index 16ff7c8f..832d9afa 100644
--- a/tests/dummy_triton_models/visual-embedder/config.pbtxt
+++ b/tests/dummy_triton_models/visual-embedder/config.pbtxt
@@ -14,7 +14,7 @@ output {
 }
 instance_group {
   count: 1
-  kind: KIND_GPU
+  kind: KIND_CPU # Use CPU only for test
 }
 dynamic_batching {
   max_queue_delay_microseconds: 500
diff --git a/tests/dummy_triton_models/visual-segmenter/config.pbtxt b/tests/dummy_triton_models/visual-segmenter/config.pbtxt
index 2db1d050..09f00d84 100644
--- a/tests/dummy_triton_models/visual-segmenter/config.pbtxt
+++ b/tests/dummy_triton_models/visual-segmenter/config.pbtxt
@@ -16,7 +16,7 @@ output {
 }
 instance_group {
   count: 1
-  kind: KIND_GPU
+  kind: KIND_CPU # Use CPU only for test
 }
 dynamic_batching {
   max_queue_delay_microseconds: 500

From f17ba4ff254f83ad0171c3cd62c2f67d6b89f005 Mon Sep 17 00:00:00 2001
From: phtvo <vohungphat99@gmail.com>
Date: Tue, 2 Jan 2024 22:40:20 +0700
Subject: [PATCH 6/7] downgrade tritonserver

---
 .github/workflows/run_triton_test.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run_triton_test.yaml b/.github/workflows/run_triton_test.yaml
index 252db2f9..edbf342e 100644
--- a/.github/workflows/run_triton_test.yaml
+++ b/.github/workflows/run_triton_test.yaml
@@ -14,7 +14,7 @@ jobs:
 
     runs-on: ubuntu-latest
     container:
-      image: nvcr.io/nvidia/tritonserver:23.10-py3
+      image: nvcr.io/nvidia/tritonserver:23.03-py3
     steps:
     - name: Checkout Repository
       uses: actions/checkout@v4

From f7b3e2222b524a6e20b4715802ddef7c430bdd0b Mon Sep 17 00:00:00 2001
From: phtvo <vohungphat99@gmail.com>
Date: Tue, 2 Jan 2024 23:24:32 +0700
Subject: [PATCH 7/7] fix triton test

---
 .github/workflows/run_triton_test.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/run_triton_test.yaml b/.github/workflows/run_triton_test.yaml
index edbf342e..541a553e 100644
--- a/.github/workflows/run_triton_test.yaml
+++ b/.github/workflows/run_triton_test.yaml
@@ -15,6 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     container:
       image: nvcr.io/nvidia/tritonserver:23.03-py3
+      options: --shm-size 2g
     steps:
     - name: Checkout Repository
       uses: actions/checkout@v4