Clarifai · phatvo9 · Dec 4, 2023 · Nov 24, 2023 · Nov 27, 2023 · Nov 30, 2023
diff --git a/clarifai/models/api.py b/clarifai/models/api.py
@@ -18,8 +18,8 @@
 from google.protobuf.json_format import MessageToDict
 from google.protobuf.struct_pb2 import Struct, Value
 
-from clarifai.auth.helper import ClarifaiAuthHelper
 from clarifai.client import create_stub
+from clarifai.client.auth.helper import ClarifaiAuthHelper
 
 
 def _make_default_value_proto(dtype, value):

diff --git a/clarifai/models/model_serving/README.md b/clarifai/models/model_serving/README.md
@@ -10,7 +10,9 @@ A step by step guide to building your own triton inference model and deploying i
 ```console
 clarifai-model-upload-init --model_name <Your model name> \
 		--model_type <select model type from available ones> \
-		--repo_dir <directory in which to create your model repository>
+		--repo_dir <directory in which to create your model repository> \
+    --image_shape <(H, W) dims for models with an image input type. H and W each have a max value of 1024> \
+    --max_bs <Max batch size. Default is 1.>
 ```
 2.  1. Edit the `requirements.txt` file with dependencies needed to run inference on your model and the `labels.txt` (if available in dir) with the labels your model is to predict.
     2.  Add your model loading and inference code inside `inference.py` script of the generated model repository under the `setup()` and `predict()` functions respectively. Refer to  The [Inference Script section]() for a description of this file.
@@ -81,7 +83,6 @@ A generated triton model repository looks as illustrated in the directory tree a
 | `config.pbtxt` | Contains the triton model configuration used by the triton inference server to guide inference requests processing. |
 | `requirements.txt` | Contains dependencies needed by a user model to successfully make predictions.|
 | `labels.txt` | Contains labels listed one per line, a model is trained to predict. The order of labels should match the model predicted class indexes. |
-| `triton_conda.yaml` | Contains dependencies available in pre-configured execution environment. |
 | `1/inference.py` | The inference script where users write their inference code. |
 | `1/model.py` | The triton python backend model file run to serve inference requests. |
 | `1/test.py` | Contains some predefined tests in order to test inference implementation and dependencies locally. |
@@ -97,7 +98,11 @@ This script is composed of a single class that contains a default init method an
 
 import os
 from pathlib import Path
-from typing import Callable
+
+from clarifai.models.model_serving.model_config import (ModelTypes, get_model_config)
+
+config = get_model_config("MODEL_TYPE_PLACEHOLDER") # Input your model type
+
 
 class InferenceModel:
   """User model inference class."""
@@ -112,29 +117,32 @@ class InferenceModel:
     #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
     #self.model: Callable = <load_your_model_here from checkpoint or folder>
 
-  #Add relevant model type decorator to the method below (see docs/model_types for ref.)
-  def get_predictions(self, input_data, **kwargs):
+  @config.inference.wrap_func
+  def get_predictions(self, input_data: list, **kwargs) -> list:
     """
     Main model inference method.
 
     Args:
     -----
-      input_data: A single input data item to predict on.
+      input_data: A list of input data item to predict on.
         Input data can be an image or text, etc depending on the model type.
 
+      **kwargs: your inference parameters.
+
     Returns:
     --------
-      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+      List of one of the `clarifai.models.model_serving.models.output types` or `config.inference.return_type(your_output)`. Refer to the README/docs
     """
+
     # Delete/Comment out line below and add your inference code
     raise NotImplementedError()
 ```
 
 - `__init__()` used for one-time loading of inference time artifacts such as models, tokenizers, etc that are frequently called during inference to improve inference speed.
 
-- `get_predictions()` takes an input data item whose type depends on the task the model solves, & returns predictions for an input data item.
+- `get_predictions()` takes a list of input data items whose type depends on the task the model solves, & returns list of predictions.
 
-`get_predictions()` should return any of the output types defined under [output](docs/output.md) and the predict function MUST be decorated with a task corresponding [model type decorator](docs/model_types.md). The model type decorators are responsible for passing input request batches for prediction and formatting the resultant predictions into triton inference responses.
+`get_predictions()` should return a list of any of the output types defined under [output](docs/output.md) and the predict function MUST be decorated with a task corresponding [@config.inference.wrap_func](docs/model_types.md). The model type decorators are responsible for passing input request batches for prediction and formatting the resultant predictions into triton inference responses.
 
 Additional methods can be added to this script's `Infer` class by the user as deemed necessary for their model inference provided they are invoked inside `get_predictions()` if used at inference time.
 

diff --git a/clarifai/models/model_serving/cli/deploy_cli.py b/clarifai/models/model_serving/cli/deploy_cli.py
@@ -13,7 +13,7 @@
 """Commandline interface for model upload utils."""
 import argparse
 
-from clarifai.auth.helper import ClarifaiAuthHelper
+from clarifai.client.auth.helper import ClarifaiAuthHelper
 from clarifai.models.api import Models
 from clarifai.models.model_serving.model_config import MODEL_TYPES, get_model_config
 from clarifai.models.model_serving.model_config.inference_parameter import InferParamManager

diff --git a/clarifai/models/model_serving/cli/repository.py b/clarifai/models/model_serving/cli/repository.py
@@ -60,6 +60,7 @@ def model_upload_init():
       default=".",
       required=True,
       help="Directory to create triton repository.")
+  parser.add_argument("--max_bs", type=int, default=1, required=False, help="Max batch size")
 
   args = parser.parse_args()
 
@@ -77,6 +78,7 @@ def model_upload_init():
       model_name=args.model_name,
       model_version="1",
       image_shape=args.image_shape,
+      max_batch_size=args.max_bs,
   )
 
   triton_repo = TritonModelRepository(model_config)

diff --git a/clarifai/models/model_serving/examples/image_classification/README.md b/clarifai/models/model_serving/examples/image_classification/README.md
@@ -6,4 +6,7 @@ These can be used on the fly with minimal or no changes to test deploy image cla
 
 	Required files to run tests locally:
 
-	* Download the [model checkpoint from huggingface](https://huggingface.co/nateraw/vit-age-classifier/tree/main) and store it under `age_vit/1/vit-age-classifier/`
+	* Download the [model checkpoint from huggingface](https://huggingface.co/nateraw/vit-age-classifier/tree/main) and store it under `age_vit/1/checkpoint/`
+	```
+	huggingface-cli download nateraw/vit-age-classifier --local-dir age_vit/1/checkpoint/ --local-dir-use-symlinks False
+	```
diff --git a/clarifai/models/model_serving/examples/image_classification/age_vit/1/inference.py b/clarifai/models/model_serving/examples/image_classification/age_vit/1/inference.py
@@ -13,11 +13,13 @@
 
 import torch
 from scipy.special import softmax
-from transformers import ViTFeatureExtractor, ViTForImageClassification
+from transformers import AutoImageProcessor, ViTForImageClassification
 
-from clarifai.models.model_serving.models.model_types import visual_classifier
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
 from clarifai.models.model_serving.models.output import ClassifierOutput
 
+config = get_model_config(ModelTypes.visual_classifier)
+
 
 class InferenceModel:
   """User model inference class."""
@@ -28,29 +30,35 @@ def __init__(self) -> None:
     in this method so they are loaded only once for faster inference.
     """
     self.base_path: Path = os.path.dirname(__file__)
-    self.huggingface_model_path: Path = os.path.join(self.base_path, "vit-age-classifier")
-    self.transforms = ViTFeatureExtractor.from_pretrained(self.huggingface_model_path)
+    self.huggingface_model_path: Path = os.path.join(self.base_path, "checkpoint")
+    self.transforms = AutoImageProcessor.from_pretrained(self.huggingface_model_path)
     self.model: Callable = ViTForImageClassification.from_pretrained(self.huggingface_model_path)
     self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
-  @visual_classifier
-  def get_predictions(self, input_data) -> ClassifierOutput:
+  @config.inference.wrap_func
+  def get_predictions(self, input_data: list, **kwargs) -> list:
     """
     Main model inference method.
 
     Args:
     -----
-      input_data: A single input data item to predict on.
+      input_data: A list of input data item to predict on.
         Input data can be an image or text, etc depending on the model type.
 
+      **kwargs: your inference parameters.
+
     Returns:
     --------
-      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+      List of one of the `clarifai.models.model_serving.models.output types` or `config.inference.return_type(your_output)`. Refer to the README/docs
     """
     # Transform image and pass it to the model
     inputs = self.transforms(input_data, return_tensors='pt')
-    output = self.model(**inputs)
-    pred_scores = softmax(
-        output[0][0].detach().numpy())  # alt: softmax(output.logits[0].detach().numpy())
+    with torch.no_grad():
+      preds = self.model(**inputs).logits
+    outputs = []
+    for pred in preds:
+      pred_scores = softmax(
+          pred.detach().numpy())  # alt: softmax(output.logits[0].detach().numpy())
+      outputs.append(ClassifierOutput(predicted_scores=pred_scores))
 
-    return ClassifierOutput(predicted_scores=pred_scores)
+    return outputs
diff --git a/clarifai/models/model_serving/examples/image_classification/age_vit/1/model.py b/clarifai/models/model_serving/examples/image_classification/age_vit/1/model.py
@@ -21,6 +21,7 @@
   pass
 from google.protobuf import text_format
 from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
 
 
 class TritonPythonModel:
@@ -37,14 +38,13 @@ def initialize(self, args):
     from inference import InferenceModel
 
     self.inference_obj = InferenceModel()
-    self.device = "cuda:0" if "GPU" in args["model_instance_kind"] else "cpu"
 
     # Read input_name from config file
     self.config_msg = ModelConfig()
     with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
       cfg = f.read()
     text_format.Merge(cfg, self.config_msg)
-    self.input_name = [inp.name for inp in self.config_msg.input][0]
+    self.input_names = [inp.name for inp in self.config_msg.input]
 
   def execute(self, requests):
     """
@@ -53,9 +53,22 @@ def execute(self, requests):
     responses = []
 
     for request in requests:
-      in_batch = pb_utils.get_input_tensor_by_name(request, self.input_name)
-      in_batch = in_batch.as_numpy()
-      inference_response = self.inference_obj.get_predictions(in_batch)
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
       responses.append(inference_response)
 
     return responses
diff --git a/clarifai/models/model_serving/examples/image_classification/age_vit/requirements.txt b/clarifai/models/model_serving/examples/image_classification/age_vit/requirements.txt
@@ -1,4 +1,4 @@
-clarifai>9.5.3 # for model upload features
+clarifai>9.10.5
 tritonclient[all]
 torch==1.13.1
 transformers==4.30.2

diff --git a/clarifai/models/model_serving/examples/text_classification/README.md b/clarifai/models/model_serving/examples/text_classification/README.md
@@ -6,4 +6,7 @@ These can be used on the fly with minimal or no changes to test deploy text clas
 
 	Required files to run tests locally:
 
-	* Download the [model checkpoint & sentencepiece bpe model from huggingface](https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment/tree/main) and store it under `xlm-roberta/1/twitter-xlm-roberta-base-sentiment/`
+	* Download the [model checkpoint](https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment/tree/main) and store it under `xlm-roberta/1/checkpoint/`
+	```
+	huggingface-cli download cardiffnlp/twitter-xlm-roberta-base-sentiment --local-dir xlm-roberta/1/checkpoint/ --local-dir-use-symlinks False
+	```
diff --git a/clarifai/models/model_serving/examples/text_classification/xlm-roberta/1/inference.py b/clarifai/models/model_serving/examples/text_classification/xlm-roberta/1/inference.py
@@ -15,9 +15,11 @@
 from scipy.special import softmax
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
-from clarifai.models.model_serving.models.model_types import text_classifier
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
 from clarifai.models.model_serving.models.output import ClassifierOutput
 
+config = get_model_config(ModelTypes.text_classifier)
+
 
 class InferenceModel:
   """User model inference class."""
@@ -28,28 +30,33 @@ def __init__(self) -> None:
     in this method so they are loaded only once for faster inference.
     """
     self.base_path: Path = os.path.dirname(__file__)
-    self.checkpoint_path: Path = os.path.join(self.base_path, "twitter-xlm-roberta-base-sentiment")
+    self.checkpoint_path: Path = os.path.join(self.base_path, "checkpoint")
     self.model: Callable = AutoModelForSequenceClassification.from_pretrained(self.checkpoint_path)
     self.tokenizer: Callable = AutoTokenizer.from_pretrained(self.checkpoint_path)
     self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
-  @text_classifier
-  def get_predictions(self, input_data) -> ClassifierOutput:
+  @config.inference.wrap_func
+  def get_predictions(self, input_data: list, **kwargs) -> list:
     """
     Main model inference method.
 
     Args:
     -----
-      input_data: A single input data item to predict on.
+      input_data: A list of input data item to predict on.
         Input data can be an image or text, etc depending on the model type.
 
+      **kwargs: your inference parameters.
+
     Returns:
     --------
-      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+      List of one of the `clarifai.models.model_serving.models.output types` or `config.inference.return_type(your_output)`. Refer to the README/docs
     """
-    encoded_input = self.tokenizer(input_data, return_tensors='pt')
-    output = self.model(**encoded_input)
-    scores = output[0][0].detach().numpy()
-    scores = softmax(scores)
-
-    return ClassifierOutput(predicted_scores=scores)
+    outputs = []
+    for inp in input_data:
+      encoded_input = self.tokenizer(inp, return_tensors='pt')
+      output = self.model(**encoded_input)
+      scores = output[0][0].detach().numpy()
+      scores = softmax(scores)
+      outputs.append(ClassifierOutput(predicted_scores=scores))
+
+    return outputs
diff --git a/clarifai/models/model_serving/examples/text_classification/xlm-roberta/1/model.py b/clarifai/models/model_serving/examples/text_classification/xlm-roberta/1/model.py
@@ -21,6 +21,7 @@
   pass
 from google.protobuf import text_format
 from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
 
 
 class TritonPythonModel:
@@ -37,14 +38,13 @@ def initialize(self, args):
     from inference import InferenceModel
 
     self.inference_obj = InferenceModel()
-    self.device = "cuda:0" if "GPU" in args["model_instance_kind"] else "cpu"
 
     # Read input_name from config file
     self.config_msg = ModelConfig()
     with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
       cfg = f.read()
     text_format.Merge(cfg, self.config_msg)
-    self.input_name = [inp.name for inp in self.config_msg.input][0]
+    self.input_names = [inp.name for inp in self.config_msg.input]
 
   def execute(self, requests):
     """
@@ -53,9 +53,22 @@ def execute(self, requests):
     responses = []
 
     for request in requests:
-      in_batch = pb_utils.get_input_tensor_by_name(request, self.input_name)
-      in_batch = in_batch.as_numpy()
-      inference_response = self.inference_obj.get_predictions(in_batch)
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
       responses.append(inference_response)
 
     return responses
diff --git a/clarifai/models/model_serving/examples/text_classification/xlm-roberta/config.pbtxt b/clarifai/models/model_serving/examples/text_classification/xlm-roberta/config.pbtxt
@@ -1,5 +1,5 @@
 name: "xlm-roberta"
-max_batch_size: 1
+max_batch_size: 2
 input {
   name: "text"
   data_type: TYPE_STRING

diff --git a/clarifai/models/model_serving/examples/text_classification/xlm-roberta/requirements.txt b/clarifai/models/model_serving/examples/text_classification/xlm-roberta/requirements.txt
@@ -1,4 +1,4 @@
-clarifai>9.5.3 # for model upload features
+clarifai>9.10.5
 tritonclient[all]
 torch==1.13.1
 transformers==4.30.2

diff --git a/clarifai/models/model_serving/examples/text_to_image/README.md b/clarifai/models/model_serving/examples/text_to_image/README.md
@@ -4,6 +4,7 @@ These can be used on the fly with minimal or no changes to test deploy text to i
 
 * ### [sd-v1.5 (Stable-Diffusion-v1.5)](./sd-v1.5/)
 
-	Requirements to run tests locally:
-
-	* Download/Clone the [huggingface model](https://huggingface.co/runwayml/stable-diffusion-v1-5) into the **sd-v1.5/1/** directory then start the triton server.
+	* Download the [model checkpoint](https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main) and store it under `sd-v1.5/1/checkpoint`
+	```
+	huggingface-cli download runwayml/stable-diffusion-v1-5 --local-dir sd-v1.5/1/checkpoint --local-dir-use-symlinks False
+	```