Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip: deploy to triton scripts for launch #349

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions examples/launch/nvidia-triton/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from nvcr.io/nvidia/tritonserver:22.11-py3

RUN pip install wandb
RUN mkdir /code
RUN mkdir /model_repository
COPY ./deploy.py /code/deploy.py

WORKDIR /code

ENTRYPOINT ["python3", "deploy.py"]
40 changes: 40 additions & 0 deletions examples/launch/nvidia-triton/deploy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import wandb
import os
import shutil
import subprocess


settings = wandb.Settings()
settings.update({"enable_job_creation": True})

config = {
"wandb_model_artifact": "jzhao/examples-examples_keras_keras-cnn-fashion/model-fresh-dream-1:v8",

}

run = wandb.init(entity="jzhao", config=config, project="examples-examples_keras_keras-cnn-fashion", settings=settings)
config = run.config

model_artifact = config["wandb_model_artifact"]
artifact = run.use_artifact(config["wandb_model_artifact"])
artifact_name, artifact_version_str = artifact.name.split(":")
artifact_version = artifact_version_str.strip("v")
path = artifact.download()


# DEPLOY TO REMOTE MODEL REPOSITORY (S3, GCP, NFS)
# assume the artifact is a tensorflow artifact, create a "model.savedmodel" folder
model_path = os.path.join("/model_repository", artifact_name, artifact_version, "model.savedmodel")
# create the local model repository for Triton
os.makedirs(model_path, exist_ok=True)
# copy over the saved model files to triton
shutil.copytree(path, model_path, dirs_exist_ok=True)

# CALL TRITON LOAD MODEL URL

# VERIFY MODEL IS RUNNING

run.finish()

# start the triton server
proc = subprocess.run(["tritonserver", "--model-repository", "/model_repository"])
81 changes: 81 additions & 0 deletions examples/launch/nvidia-triton/deploy_remote.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import boto3
import os
import sys
import json
import tritonclient.http as httpclient

import wandb


settings = wandb.Settings()
settings.update({"enable_job_creation": True})

config = {
"triton_url": "localhost:8000",
"triton_bucket": "wandb-jason-test",
"triton_model_repository_path": "model_repository",
"wandb_model_artifact": "jzhao/examples-examples_keras_keras-cnn-fashion/model-fresh-dream-1:v8",

}

run = wandb.init(entity="jzhao", config=config, project="examples-examples_keras_keras-cnn-fashion", settings=settings)
config = run.config

s3_client = boto3.client("s3")
triton_client = httpclient.InferenceServerClient(url=config["triton_url"])


model_artifact = config["wandb_model_artifact"]
artifact = run.use_artifact(config["wandb_model_artifact"])
artifact_name, artifact_version_str = artifact.name.split(":")
# Triton model version numbers must be integer
artifact_version = artifact_version_str.strip("v")

# Triton model API doesn't handle versions very well
for model in triton_client.get_model_repository_index():
if model.get("name") != artifact_name:
continue
if model.get("version", None) is None or model.get("state", None) is None:
continue
if model.get("version") == artifact_version and model.get("state") == "READY":
print(f"Model {artifact_name} is already loaded, skipping deployment")
sys.exit(0)

path = artifact.download()

# copy the content of the model to the remote model repository, assume model repository is in s3 for now
print("Uploading model to Triton model repository...")
remote_path = os.path.join(config["triton_model_repository_path"], artifact_name, artifact_version, "model.savedmodel")

for root, _, files in os.walk(path):
for f in files:
full_path = os.path.join(root, f)
rel_path = os.path.relpath(full_path, path)
remote_obj_path = os.path.join(remote_path, rel_path)
print(f"Uploading {rel_path} to {remote_obj_path}")
s3_client.upload_file(full_path, config["triton_bucket"], remote_obj_path)


print("Finished uploading model to Triton model repository")


# verify model exists and load the model, note we cannot verify version numbers prior to loading
model_exists = False
for model in triton_client.get_model_repository_index():
if model["name"] == artifact_name:
model_exists = True

if not model_exists:
raise Exception(f"Triton: Failed to add model {artifact_name} to repository")

# generate model config policy to load specific version
# see: https://github.com/triton-inference-server/server/issues/4416
version_config = { "version_policy": { "specific": { "versions": [artifact_version]}}}
triton_client.load_model(artifact_name, config=json.dumps(version_config))
if not triton_client.is_model_ready(artifact_name):
raise Exception(f"Triton: Failed to load model {artifact_name}")

print("Successfully loaded Triton model")

run.log_code()
run.finish()
2 changes: 2 additions & 0 deletions examples/launch/nvidia-triton/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
boto3
tritonclient[http]