Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EAGLE-3792]-Test case for model upload #256

Merged
merged 7 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/run_triton_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Run triton test

on:
push:
branches: [ master ]
pull_request:

jobs:
triton-model-test:
runs-on: ubuntu-latest

container:
image: nvcr.io/nvidia/tritonserver:23.10-py3
phatvo9 marked this conversation as resolved.
Show resolved Hide resolved

steps:
- name: Install dependencies
run: |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the indentation is broken here, you either need to indent lines 17-20 to match 16 or remove 2 spaces from 16 to match 17-20 and then also fix line 22 and below

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for spotting that error

python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -r tests/requirements.txt

- name: start triton
run: |
tritonserver --model-repository ./tests/dummy_triton_models --exit-on-error false &
phatvo9 marked this conversation as resolved.
Show resolved Hide resolved
background: true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesnt seem to actually do anything


- name: Wait for Triton to Initialize
run: sleep 10

- name: Start Model Tests
run: |
python3 -m pytest tests/dummy_triton_models/_test_all_dummies.py -s
phatvo9 marked this conversation as resolved.
Show resolved Hide resolved
3 changes: 3 additions & 0 deletions clarifai/models/model_serving/models/default_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ def intitialize(self,
is_instance_kind_gpu: bool = True,
inference_parameters: Union[str, Dict[str, Any]] = ""):
import sys
#
if 'inference' in sys.modules:
del sys.modules['inference']
sys.path.append(repo_version_dir)
self.model_type = model_type
self.is_instance_kind_gpu = is_instance_kind_gpu
Expand Down
171 changes: 171 additions & 0 deletions tests/dummy_triton_models/_test_all_dummies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import time

import numpy as np
import pytest as pytest
from tritonclient.grpc import InferenceServerClient, InferInput, InferRequestedOutput
from tritonclient.utils import np_to_triton_dtype

MAX_BATCH_SIZE = 4
MAX_TRIES = 5
INTERVAL = 3
count = 0
while count < MAX_TRIES:
try:
_ = InferenceServerClient('localhost:8001').is_server_live()
break
except Exception as e:
print(e)
count += 1
time.sleep(INTERVAL)


@pytest.fixture
def triton_client():
return InferenceServerClient('localhost:8001')


def make_input(name, inputs):
model_input = InferInput(name, inputs.shape, np_to_triton_dtype(inputs.dtype))
model_input.set_data_from_numpy(inputs)
return model_input


def make_random_image_input(name="image", bs=1, size=256):
image = np.random.rand(bs, size, size, 3) * 255
image = image.astype("uint8")
return make_input(name, image)


def make_text_input(name="text", text="this is text", bs=1):
text = np.array([text] * bs, dtype=np.object_).reshape(-1, 1)
return make_input(name, text)


def inference(triton_client, model_name, input_: list, output_names: list):
res = triton_client.infer(
model_name=model_name,
inputs=input_,
outputs=[InferRequestedOutput(each) for each in output_names])
return {output_name: res.as_numpy(output_name) for output_name in output_names}


def execute_test_image_as_input(triton_client, model_name, input_name, output_names):
single_input = make_random_image_input(name=input_name, bs=1, size=256)
res = inference(triton_client, model_name, [single_input], output_names=output_names)
outputs = [res[each] for each in output_names]

if len(outputs) > 1:
assert all(len(each[0]) == 1
for each in outputs), f"[{model_name}], All predictions must have same length"
elif model_name == "visual-classifier":
assert outputs[0].all() <= 1.
else:
assert len(outputs[0].shape)

# Test bs > 1
multi_input = make_random_image_input(name=input_name, bs=2, size=256)
res = inference(triton_client, model_name, [multi_input], output_names=output_names)
outputs = [res[each] for each in output_names]

if len(outputs) > 1:
assert all(len(each[0]) == 1
for each in outputs), f"[{model_name}], All predictions must have same length"
elif model_name == "visual-classifier":
assert outputs[0].all() <= 1.
else:
assert len(outputs[0].shape)

# Test bs > max_batch_size
with pytest.raises(Exception):
multi_input = make_random_image_input(name=input_name, bs=10, size=256)
res = inference(triton_client, model_name, [multi_input], output_names=output_names)


def execute_test_text_as_input(triton_client, model_name, input_name, output_names):
single_input = make_text_input(name=input_name, bs=1)
res = inference(triton_client, model_name, [single_input], output_names=output_names)
outputs = [res[each] for each in output_names]

if model_name == "text-to-image":
assert len(outputs[0][0].shape) == 3
elif model_name == "text-classifier":
assert outputs[0].all() <= 1.
else:
assert len(outputs[0].shape)

# Test bs > 1
multi_input = make_text_input(name=input_name, bs=2)
res = inference(triton_client, model_name, [multi_input], output_names=output_names)
outputs = [res[each] for each in output_names]

if model_name == "text-to-image":
assert len(outputs[0][0].shape) == 3
elif model_name == "text-classifier":
assert outputs[0].all() <= 1.
else:
assert len(outputs[0].shape)

# Test bs > max_batch_size
with pytest.raises(Exception):
multi_input = make_text_input(name=input_name, bs=10)
res = inference(triton_client, model_name, [multi_input], output_names=output_names)


class TestModelTypes:

# --------- Image Input --------- #
def test_visual_detector(self, triton_client):
model_name = "visual-detector"
input_name = "image"
output_names = ["predicted_bboxes", "predicted_labels", "predicted_scores"]
execute_test_image_as_input(triton_client, model_name, input_name, output_names)

def test_visual_classifier(self, triton_client):
model_name = "visual-classifier"
input_name = "image"
output_names = ["softmax_predictions"]
execute_test_image_as_input(triton_client, model_name, input_name, output_names)

def test_visual_embedder(self, triton_client):
model_name = "visual-embedder"
input_name = "image"
output_names = ["embeddings"]
execute_test_image_as_input(triton_client, model_name, input_name, output_names)

def test_visual_segmenter(self, triton_client):
model_name = "visual-segmenter"
input_name = "image"
output_names = ["predicted_mask"]
execute_test_image_as_input(triton_client, model_name, input_name, output_names)

# --------- Text Input --------- #
def test_text_to_image(self, triton_client):
model_name = "text-to-image"
input_name = "text"
output_names = ["image"]
execute_test_text_as_input(triton_client, model_name, input_name, output_names)

def test_text_classifier(self, triton_client):
model_name = "text-classifier"
input_name = "text"
output_names = ["softmax_predictions"]
execute_test_text_as_input(triton_client, model_name, input_name, output_names)

def test_text_embedder(self, triton_client):
model_name = "text-embedder"
input_name = "text"
output_names = ["embeddings"]
execute_test_text_as_input(triton_client, model_name, input_name, output_names)

def test_text_to_text(self, triton_client):
model_name = "text-to-text"
input_name = "text"
output_names = ["text"]
execute_test_text_as_input(triton_client, model_name, input_name, output_names)

# --------- Multimodal Inputs --------- #
def test_multimodal_embedder(self, triton_client):
model_name = "multimodal-embedder"
output_names = ["embeddings"]
execute_test_image_as_input(triton_client, model_name, "image", output_names)
execute_test_text_as_input(triton_client, model_name, "text", output_names)
Empty file.
56 changes: 56 additions & 0 deletions tests/dummy_triton_models/multimodal-embedder/1/inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# This file contains boilerplate code to allow users write their model
# inference code that will then interact with the Triton Inference Server
# Python backend to serve end user requests.
# The module name, module path, class name & get_predictions() method names MUST be maintained as is
# but other methods may be added within the class as deemed fit provided
# they are invoked within the main get_predictions() inference method
# if they play a role in any step of model inference
"""User model inference script."""

import os
from pathlib import Path
import numpy as np
from clarifai.models.model_serving.model_config import ModelTypes, get_model_config

config = get_model_config(ModelTypes.multimodal_embedder)


class InferenceModel:
"""User model inference class."""

def __init__(self) -> None:
"""
Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc.
in this method so they are loaded only once for faster inference.
"""
self.base_path: Path = os.path.dirname(__file__)
## sample model loading code:
#self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
#self.model: Callable = <load_your_model_here from checkpoint or folder>

@config.inference.wrap_func
def get_predictions(self, input_data, **kwargs):
"""
Main model inference method.

Args:
-----
input_data: A single input data item to predict on.
Input data can be an image or text, etc depending on the model type.

Returns:
--------
One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
"""
outputs = []
for inp_data in input_data:
image, text = inp_data["image"], inp_data["text"]
if text is not None:
assert isinstance(text, str), "Incorrect type of text, expected str"
embeddings = np.zeros(768)
else:
assert isinstance(image, np.ndarray), "Incorrect type of image, expected np.ndarray"
embeddings = np.ones(768)
outputs.append(config.inference.return_type(embedding_vector=embeddings))

return outputs
74 changes: 74 additions & 0 deletions tests/dummy_triton_models/multimodal-embedder/1/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright 2023 Clarifai, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Triton inference server Python Backend Model."""

import os
import sys

try:
import triton_python_backend_utils as pb_utils
except ModuleNotFoundError:
pass
from google.protobuf import text_format
from tritonclient.grpc.model_config_pb2 import ModelConfig
from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters


class TritonPythonModel:
"""
Triton Python BE Model.
"""

def initialize(self, args):
"""
Triton server init.
"""
args["model_repository"] = args["model_repository"].replace("/1/model.py", "")
sys.path.append(os.path.dirname(__file__))
from inference import InferenceModel

self.inference_obj = InferenceModel()

# Read input_name from config file
self.config_msg = ModelConfig()
with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
cfg = f.read()
text_format.Merge(cfg, self.config_msg)
self.input_names = [inp.name for inp in self.config_msg.input]

def execute(self, requests):
"""
Serve model inference requests.
"""
responses = []

for request in requests:
parameters = request.parameters()
parameters = parse_req_parameters(parameters) if parameters else {}

if len(self.input_names) == 1:
in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
in_batch = in_batch.as_numpy()
inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
else:
multi_in_batch_dict = {}
for input_name in self.input_names:
in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
in_batch = in_batch.as_numpy() if in_batch is not None else []
multi_in_batch_dict.update({input_name: in_batch})

inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)

responses.append(inference_response)

return responses
Loading
Loading