From b91f03f2bbd6d48f61efbad0049b50c88763e277 Mon Sep 17 00:00:00 2001 From: pvijayakrish Date: Tue, 29 Oct 2024 09:47:43 -0700 Subject: [PATCH 1/5] Update server versions post 24.09 --- Dockerfile.sdk | 2 +- README.md | 10 +++++----- TRITON_VERSION | 2 +- build.py | 12 ++++++------ deploy/aws/values.yaml | 2 +- deploy/fleetcommand/Chart.yaml | 2 +- deploy/fleetcommand/values.yaml | 6 +++--- deploy/gcp/values.yaml | 2 +- .../perf-analyzer-script/triton_client.yaml | 2 +- .../server-deployer/build_and_push.sh | 6 +++--- .../server-deployer/chart/triton/Chart.yaml | 4 ++-- .../server-deployer/chart/triton/values.yaml | 6 +++--- .../server-deployer/data-test/schema.yaml | 2 +- .../server-deployer/schema.yaml | 4 ++-- .../gke-marketplace-app/trt-engine/README.md | 6 +++--- deploy/k8s-onprem/values.yaml | 2 +- deploy/oci/values.yaml | 2 +- docs/customization_guide/build.md | 6 +++--- docs/customization_guide/compose.md | 18 +++++++++--------- docs/customization_guide/test.md | 2 +- docs/generate_docs.py | 4 ++-- docs/user_guide/custom_operations.md | 6 +++--- docs/user_guide/performance_tuning.md | 4 ++-- qa/common/gen_jetson_trt_models | 2 +- qa/common/gen_qa_custom_ops | 2 +- qa/common/gen_qa_model_repository | 6 +++--- 26 files changed, 61 insertions(+), 61 deletions(-) diff --git a/Dockerfile.sdk b/Dockerfile.sdk index e68d76752b..36995dbf6c 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -29,7 +29,7 @@ # # Base image on the minimum Triton container -ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.09-py3-min +ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.10-py3-min ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo diff --git a/README.md b/README.md index 36ef51f279..9acd16ff52 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,8 @@ >[!WARNING] >You are currently on the `main` branch which tracks under-development progress ->towards the next release. The current release is version [2.50.0](https://github.com/triton-inference-server/server/releases/latest) ->and corresponds to the 24.09 container release on NVIDIA GPU Cloud (NGC). +>towards the next release. The current release is version [2.51.0](https://github.com/triton-inference-server/server/releases/latest) +>and corresponds to the 24.10 container release on NVIDIA GPU Cloud (NGC). Triton Inference Server is an open source inference serving software that streamlines AI inferencing. Triton enables teams to deploy any AI model from @@ -91,16 +91,16 @@ Inference Server with the ```bash # Step 1: Create the example model repository -git clone -b r24.09 https://github.com/triton-inference-server/server.git +git clone -b r24.10 https://github.com/triton-inference-server/server.git cd server/docs/examples ./fetch_models.sh # Step 2: Launch triton from the NGC Triton container -docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.09-py3 tritonserver --model-repository=/models +docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.10-py3 tritonserver --model-repository=/models # Step 3: Sending an Inference Request # In a separate console, launch the image_client example from the NGC Triton SDK container -docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.09-py3-sdk +docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.10-py3-sdk /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg # Inference should return the following diff --git a/TRITON_VERSION b/TRITON_VERSION index 124ddb483d..c75284b908 100644 --- a/TRITON_VERSION +++ b/TRITON_VERSION @@ -1 +1 @@ -2.51.0dev \ No newline at end of file +2.52.0dev \ No newline at end of file diff --git a/build.py b/build.py index 14301f843d..dbea6f083e 100755 --- a/build.py +++ b/build.py @@ -70,14 +70,14 @@ # incorrectly load the other version of the openvino libraries. # TRITON_VERSION_MAP = { - "2.51.0dev": ( - "24.10dev", # triton container - "24.09", # upstream container + "2.52.0dev": ( + "24.11dev", # triton container + "24.10", # upstream container "1.19.2", # ORT - "2024.0.0", # ORT OpenVINO - "2024.0.0", # Standalone OpenVINO + "2024.4.0", # ORT OpenVINO + "2024.4.0", # Standalone OpenVINO "3.2.6", # DCGM version - "0.5.3.post1", # vLLM version + "0.5.5", # vLLM version ) } diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml index bd8ae0fe3b..1e0eb97960 100644 --- a/deploy/aws/values.yaml +++ b/deploy/aws/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:24.09-py3 + imageName: nvcr.io/nvidia/tritonserver:24.10-py3 pullPolicy: IfNotPresent modelRepositoryPath: s3://triton-inference-server-repository/model_repository numGpus: 1 diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml index 8feee92b3c..4e3c87c387 100644 --- a/deploy/fleetcommand/Chart.yaml +++ b/deploy/fleetcommand/Chart.yaml @@ -26,7 +26,7 @@ apiVersion: v1 # appVersion is the Triton version; update when changing release -appVersion: "2.50.0" +appVersion: "2.51.0" description: Triton Inference Server (Fleet Command) name: triton-inference-server # version is the Chart version; update when changing anything in the chart diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml index dc5f37ca3b..6d8357cfdc 100644 --- a/deploy/fleetcommand/values.yaml +++ b/deploy/fleetcommand/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:24.09-py3 + imageName: nvcr.io/nvidia/tritonserver:24.10-py3 pullPolicy: IfNotPresent numGpus: 1 serverCommand: tritonserver @@ -47,13 +47,13 @@ image: # # To set model control mode, uncomment and configure below # TODO: Fix the following url, it is invalid - # See https://github.com/triton-inference-server/server/blob/r24.09/docs/model_management.md + # See https://github.com/triton-inference-server/server/blob/r24.10/docs/model_management.md # for more details #- --model-control-mode=explicit|poll|none # # Additional server args # - # see https://github.com/triton-inference-server/server/blob/r24.09/README.md + # see https://github.com/triton-inference-server/server/blob/r24.10/README.md # for more details service: diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml index c5427c151e..d52c652709 100644 --- a/deploy/gcp/values.yaml +++ b/deploy/gcp/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:24.09-py3 + imageName: nvcr.io/nvidia/tritonserver:24.10-py3 pullPolicy: IfNotPresent modelRepositoryPath: gs://triton-inference-server-repository/model_repository numGpus: 1 diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml index a63a12ce34..d50d3bd610 100644 --- a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml +++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml @@ -33,7 +33,7 @@ metadata: namespace: default spec: containers: - - image: nvcr.io/nvidia/tritonserver:24.09-py3-sdk + - image: nvcr.io/nvidia/tritonserver:24.10-py3-sdk imagePullPolicy: Always name: nv-triton-client securityContext: diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh index 19d84816a0..4aff10903f 100755 --- a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh +++ b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh @@ -27,9 +27,9 @@ export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/') export APP_NAME=tritonserver -export MAJOR_VERSION=2.50 -export MINOR_VERSION=2.50.0 -export NGC_VERSION=24.09-py3 +export MAJOR_VERSION=2.51 +export MINOR_VERSION=2.51.0 +export NGC_VERSION=24.10-py3 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml index e9f8880a0b..027deb1d2f 100644 --- a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml +++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml @@ -25,7 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. apiVersion: v1 -appVersion: "2.50" +appVersion: "2.51" description: Triton Inference Server name: triton-inference-server -version: 2.50.0 +version: 2.51.0 diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml index 450d8f735c..76a457bc86 100644 --- a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml +++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml @@ -31,14 +31,14 @@ maxReplicaCount: 3 tritonProtocol: HTTP # HPA GPU utilization autoscaling target HPATargetAverageValue: 85 -modelRepositoryPath: gs://triton_sample_models/24.09 -publishedVersion: '2.50.0' +modelRepositoryPath: gs://triton_sample_models/24.10 +publishedVersion: '2.51.0' gcpMarketplace: true image: registry: gcr.io repository: nvidia-ngc-public/tritonserver - tag: 24.09-py3 + tag: 24.10-py3 pullPolicy: IfNotPresent # modify the model repository here to match your GCP storage bucket numGpus: 1 diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml index 16494b5261..be46874dba 100644 --- a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml +++ b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml @@ -27,7 +27,7 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: '2.50.0' + publishedVersion: '2.51.0' publishedVersionMetadata: releaseNote: >- Initial release. diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml index f3525a52f1..545abda602 100644 --- a/deploy/gke-marketplace-app/server-deployer/schema.yaml +++ b/deploy/gke-marketplace-app/server-deployer/schema.yaml @@ -27,7 +27,7 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: '2.50.0' + publishedVersion: '2.51.0' publishedVersionMetadata: releaseNote: >- Initial release. @@ -89,7 +89,7 @@ properties: modelRepositoryPath: type: string title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc. - default: gs://triton_sample_models/24.09 + default: gs://triton_sample_models/24.10 image.ldPreloadPath: type: string title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or Tensorflow custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable. diff --git a/deploy/gke-marketplace-app/trt-engine/README.md b/deploy/gke-marketplace-app/trt-engine/README.md index 0c8012eb68..846d95f9f4 100644 --- a/deploy/gke-marketplace-app/trt-engine/README.md +++ b/deploy/gke-marketplace-app/trt-engine/README.md @@ -33,7 +33,7 @@ ``` docker run --gpus all -it --network host \ --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \ - -v ~:/scripts nvcr.io/nvidia/tensorrt:24.09-py3 + -v ~:/scripts nvcr.io/nvidia/tensorrt:24.10-py3 pip install onnx six torch tf2onnx tensorflow @@ -57,7 +57,7 @@ mkdir -p engines python3 builder.py -m models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/model.ckpt -o engines/bert_large_int8_bs1_s128.engine -b 1 -s 128 -c models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/ -v models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/vocab.txt --int8 --fp16 --strict --calib-num 1 -iln -imh -gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/24.09/bert/1/model.plan +gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/24.10/bert/1/model.plan ``` -For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/24.09/` should be updated accordingly with the correct version. +For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/24.10/` should be updated accordingly with the correct version. diff --git a/deploy/k8s-onprem/values.yaml b/deploy/k8s-onprem/values.yaml index ccee5e9c24..3dce86900e 100644 --- a/deploy/k8s-onprem/values.yaml +++ b/deploy/k8s-onprem/values.yaml @@ -29,7 +29,7 @@ tags: loadBalancing: true image: - imageName: nvcr.io/nvidia/tritonserver:24.09-py3 + imageName: nvcr.io/nvidia/tritonserver:24.10-py3 pullPolicy: IfNotPresent modelRepositoryServer: < Replace with the IP Address of your file server > modelRepositoryPath: /srv/models diff --git a/deploy/oci/values.yaml b/deploy/oci/values.yaml index 55b8193ee2..c4226eb993 100644 --- a/deploy/oci/values.yaml +++ b/deploy/oci/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:24.09-py3 + imageName: nvcr.io/nvidia/tritonserver:24.10-py3 pullPolicy: IfNotPresent modelRepositoryPath: s3://https://.compat.objectstorage..oraclecloud.com:443/triton-inference-server-repository numGpus: 1 diff --git a/docs/customization_guide/build.md b/docs/customization_guide/build.md index 56e5875776..9a03c2e97f 100644 --- a/docs/customization_guide/build.md +++ b/docs/customization_guide/build.md @@ -173,7 +173,7 @@ $ ./build.py ... --repo-tag=common: --repo-tag=core:` will default to the branch name. For example, if you are building on the -r24.09 branch, `` will default to r24.09. If you are +r24.10 branch, `` will default to r24.10. If you are building on any other branch (including the *main* branch) then `` will default to "main". Therefore, you typically do not need to provide `` at all (nor the preceding @@ -334,8 +334,8 @@ python build.py --cmake-dir=/build --build-dir=/tmp/citritonbuild If you are building on *main* branch then `` will default to "main". If you are building on a release branch then `` will default to the branch name. For example, if you -are building on the r24.09 branch, `` will default to -r24.09. Therefore, you typically do not need to provide `` will default to +r24.10. Therefore, you typically do not need to provide `` at all (nor the preceding colon). You can use a different `` for a component to instead use the corresponding branch/tag in the build. For example, if you have a branch called diff --git a/docs/customization_guide/compose.md b/docs/customization_guide/compose.md index 0c6afc1e0b..98c770b9b2 100644 --- a/docs/customization_guide/compose.md +++ b/docs/customization_guide/compose.md @@ -46,8 +46,8 @@ The `compose.py` script can be found in the Simply clone the repository and run `compose.py` to create a custom container. Note: Created container version will depend on the branch that was cloned. For example branch - [r24.09](https://github.com/triton-inference-server/server/tree/r24.09) -should be used to create a image based on the NGC 24.09 Triton release. + [r24.10](https://github.com/triton-inference-server/server/tree/r24.10) +should be used to create a image based on the NGC 24.10 Triton release. `compose.py` provides `--backend`, `--repoagent` options that allow you to specify which backends and repository agents to include in the custom image. @@ -79,20 +79,20 @@ For example, running ``` python3 compose.py --backend pytorch --repoagent checksum ``` -on branch [r24.09](https://github.com/triton-inference-server/server/tree/r24.09) pulls: -- `min` container `nvcr.io/nvidia/tritonserver:24.09-py3-min` -- `full` container `nvcr.io/nvidia/tritonserver:24.09-py3` +on branch [r24.10](https://github.com/triton-inference-server/server/tree/r24.10) pulls: +- `min` container `nvcr.io/nvidia/tritonserver:24.10-py3-min` +- `full` container `nvcr.io/nvidia/tritonserver:24.10-py3` Alternatively, users can specify the version of Triton container to pull from any branch by either: 1. Adding flag `--container-version ` to branch ``` -python3 compose.py --backend pytorch --repoagent checksum --container-version 24.09 +python3 compose.py --backend pytorch --repoagent checksum --container-version 24.10 ``` 2. Specifying `--image min, --image full,`. The user is responsible for specifying compatible `min` and `full` containers. ``` -python3 compose.py --backend pytorch --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:24.09-py3-min --image full,nvcr.io/nvidia/tritonserver:24.09-py3 +python3 compose.py --backend pytorch --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:24.10-py3-min --image full,nvcr.io/nvidia/tritonserver:24.10-py3 ``` Method 1 and 2 will result in the same composed container. Furthermore, `--image` flag overrides the `--container-version` flag when both are specified. @@ -103,8 +103,8 @@ Note: 2. vLLM and TensorRT-LLM backends are currently not supported backends for `compose.py`. If you want to build additional backends on top of these backends, it would be better to [build it yourself](#build-it-yourself) by using -`nvcr.io/nvidia/tritonserver:24.09-vllm-python-py3` or -`nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3` as a `min` container. +`nvcr.io/nvidia/tritonserver:24.10-vllm-python-py3` or +`nvcr.io/nvidia/tritonserver:24.10-trtllm-python-py3` as a `min` container. ### CPU-only container composition diff --git a/docs/customization_guide/test.md b/docs/customization_guide/test.md index 8487e6e3ad..5cdfecf034 100644 --- a/docs/customization_guide/test.md +++ b/docs/customization_guide/test.md @@ -49,7 +49,7 @@ $ ./gen_qa_custom_ops ``` This will create multiple model repositories in /tmp/\/qa_* -(for example /tmp/24.09/qa_model_repository). The TensorRT models +(for example /tmp/24.10/qa_model_repository). The TensorRT models will be created for the GPU on the system that CUDA considers device 0 (zero). If you have multiple GPUs on your system see the documentation in the scripts for how to target a specific GPU. diff --git a/docs/generate_docs.py b/docs/generate_docs.py index cb7ed02d9f..f2ef78c228 100755 --- a/docs/generate_docs.py +++ b/docs/generate_docs.py @@ -43,11 +43,11 @@ """ TODO: Needs to handle cross-branch linkage. -For example, server/docs/user_guide/architecture.md on branch 24.09 links to +For example, server/docs/user_guide/architecture.md on branch 24.10 links to server/docs/user_guide/model_analyzer.md on main branch. In this case, the hyperlink of model_analyzer.md should be a URL instead of relative path. -Another example can be server/docs/user_guide/model_analyzer.md on branch 24.09 +Another example can be server/docs/user_guide/model_analyzer.md on branch 24.10 links to a file in server repo with relative path. Currently all URLs are hardcoded to main branch. We need to make sure that the URL actually points to the correct branch. We also need to handle cases like deprecated or removed files from diff --git a/docs/user_guide/custom_operations.md b/docs/user_guide/custom_operations.md index 88a7037c7f..fdc9f2044f 100644 --- a/docs/user_guide/custom_operations.md +++ b/docs/user_guide/custom_operations.md @@ -64,7 +64,7 @@ simple way to ensure you are using the correct version of TensorRT is to use the [NGC TensorRT container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt) corresponding to the Triton container. For example, if you are using -the 24.09 version of Triton, use the 24.09 version of the TensorRT +the 24.10 version of Triton, use the 24.10 version of the TensorRT container. ## TensorFlow @@ -123,7 +123,7 @@ simple way to ensure you are using the correct version of TensorFlow is to use the [NGC TensorFlow container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow) corresponding to the Triton container. For example, if you are using -the 24.09 version of Triton, use the 24.09 version of the TensorFlow +the 24.10 version of Triton, use the 24.10 version of the TensorFlow container. ## PyTorch @@ -167,7 +167,7 @@ simple way to ensure you are using the correct version of PyTorch is to use the [NGC PyTorch container](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) corresponding to the Triton container. For example, if you are using -the 24.09 version of Triton, use the 24.09 version of the PyTorch +the 24.10 version of Triton, use the 24.10 version of the PyTorch container. ## ONNX diff --git a/docs/user_guide/performance_tuning.md b/docs/user_guide/performance_tuning.md index efea32a63b..3f3202457d 100644 --- a/docs/user_guide/performance_tuning.md +++ b/docs/user_guide/performance_tuning.md @@ -235,7 +235,7 @@ with a `tritonserver` binary. ```bash # Start server container -docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:24.09-py3 +docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:24.10-py3 # Start serving your models tritonserver --model-repository=/mnt/models @@ -284,7 +284,7 @@ by setting the `-u` flag, such as `perf_analyzer -m densenet_onnx -u ```bash # Start the SDK container interactively -docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:24.09-py3-sdk +docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:24.10-py3-sdk # Benchmark model being served from step 3 perf_analyzer -m densenet_onnx --concurrency-range 1:4 diff --git a/qa/common/gen_jetson_trt_models b/qa/common/gen_jetson_trt_models index 21e9fe53ff..31c7294fc7 100755 --- a/qa/common/gen_jetson_trt_models +++ b/qa/common/gen_jetson_trt_models @@ -34,7 +34,7 @@ # Make all generated files accessible outside of container umask 0000 # Set the version of the models -TRITON_VERSION=${TRITON_VERSION:=24.09} +TRITON_VERSION=${TRITON_VERSION:=24.10} # Set the CUDA device to use CUDA_DEVICE=${RUNNER_ID:=0} # Set TensorRT image diff --git a/qa/common/gen_qa_custom_ops b/qa/common/gen_qa_custom_ops index 286052914b..ad2b22bc71 100755 --- a/qa/common/gen_qa_custom_ops +++ b/qa/common/gen_qa_custom_ops @@ -37,7 +37,7 @@ ## ############################################################################ -TRITON_VERSION=${TRITON_VERSION:=24.09} +TRITON_VERSION=${TRITON_VERSION:=24.10} NVIDIA_UPSTREAM_VERSION=${NVIDIA_UPSTREAM_VERSION:=$TRITON_VERSION} TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$NVIDIA_UPSTREAM_VERSION-tf2-py3} PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$NVIDIA_UPSTREAM_VERSION-py3} diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index f26ba863ce..18e3d00682 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -48,14 +48,14 @@ ## ############################################################################ -TRITON_VERSION=${TRITON_VERSION:=24.09} +TRITON_VERSION=${TRITON_VERSION:=24.10} # ONNX. Use ONNX_OPSET 0 to use the default for ONNX version -ONNX_VERSION=1.13.0 +ONNX_VERSION=1.19.2 ONNX_OPSET=0 # OPENVINO version -OPENVINO_VERSION=2023.3.0 +OPENVINO_VERSION=2024.4.0 UBUNTU_IMAGE=${UBUNTU_IMAGE:=ubuntu:22.04} PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$TRITON_VERSION-py3} From 4b82a2a4899e315dcfa8bebd1b35c83671545a7f Mon Sep 17 00:00:00 2001 From: Misha Chornyi <99709299+mc-nv@users.noreply.github.com> Date: Thu, 24 Oct 2024 11:35:23 -0700 Subject: [PATCH 2/5] Fix array size for jetson tests (#7734) --- qa/L0_trt_dla/dla_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/L0_trt_dla/dla_test.py b/qa/L0_trt_dla/dla_test.py index d71d277ac4..c1fdf47e98 100755 --- a/qa/L0_trt_dla/dla_test.py +++ b/qa/L0_trt_dla/dla_test.py @@ -91,7 +91,7 @@ def test_resnet50(self): # Validate the results by comparing with precomputed values. # VULTURE class corresponds with index 23 - EXPECTED_CLASS_INDEX = 23 + EXPECTED_CLASS_INDEX = 418 for i in range(batch_size): self.assertEqual(output_data[i][0][0], EXPECTED_CLASS_INDEX) From e05e2a8a227aa29823a73ca9ea439e25b3794b58 Mon Sep 17 00:00:00 2001 From: pvijayakrish Date: Fri, 18 Oct 2024 09:53:57 -0700 Subject: [PATCH 3/5] Specifying the TRT version to clone for model generation --- qa/common/gen_qa_model_repository | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 18e3d00682..ef6a36803d 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -57,6 +57,9 @@ ONNX_OPSET=0 # OPENVINO version OPENVINO_VERSION=2024.4.0 +#TRT_VERSION +TRT_VERSION=${TRT_VERSION:-10.5.0.18} + UBUNTU_IMAGE=${UBUNTU_IMAGE:=ubuntu:22.04} PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$TRITON_VERSION-py3} TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$TRITON_VERSION-tf2-py3} @@ -512,7 +515,7 @@ chmod -R 777 $FORMATDESTDIR python3 $SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$DATADEPENDENTDIR chmod -R 777 $DATADEPENDENTDIR # Make shared library for custom Hardmax plugin. -(git clone -b release/10.0 https://github.com/NVIDIA/TensorRT.git && \ +(git clone -b release/$(echo ${TRT_VERSION} | cut -d '.' -f 1-2) https://github.com/NVIDIA/TensorRT.git && \ cd /workspace/TensorRT/samples/python/onnx_custom_plugin && rm -rf build && mkdir build && \ cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $PLGDESTDIR/.) LD_PRELOAD=$PLGDESTDIR/libcustomHardmaxPlugin.so python3 $SRCDIR/gen_qa_trt_plugin_models.py --models_dir=$PLGDESTDIR From c59117a35bf22903e75cbfcad04599ec2b57a13c Mon Sep 17 00:00:00 2001 From: pvijayakrish Date: Wed, 30 Oct 2024 16:00:11 -0700 Subject: [PATCH 4/5] Update to use the latest ONNX version --- qa/common/gen_qa_model_repository | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index ef6a36803d..84fcaa49a8 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -51,7 +51,7 @@ TRITON_VERSION=${TRITON_VERSION:=24.10} # ONNX. Use ONNX_OPSET 0 to use the default for ONNX version -ONNX_VERSION=1.19.2 +ONNX_VERSION=1.17.0 ONNX_OPSET=0 # OPENVINO version From 1af618a24ad409fae3f931ce673e1153cd395d0f Mon Sep 17 00:00:00 2001 From: pvijayakrish Date: Fri, 1 Nov 2024 08:02:07 -0700 Subject: [PATCH 5/5] Update TRT version fetch and openvino version --- qa/common/gen_qa_model_repository | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 84fcaa49a8..701f745fbf 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -55,10 +55,7 @@ ONNX_VERSION=1.17.0 ONNX_OPSET=0 # OPENVINO version -OPENVINO_VERSION=2024.4.0 - -#TRT_VERSION -TRT_VERSION=${TRT_VERSION:-10.5.0.18} +OPENVINO_VERSION=2023.3.0 UBUNTU_IMAGE=${UBUNTU_IMAGE:=ubuntu:22.04} PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$TRITON_VERSION-py3} @@ -515,7 +512,10 @@ chmod -R 777 $FORMATDESTDIR python3 $SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$DATADEPENDENTDIR chmod -R 777 $DATADEPENDENTDIR # Make shared library for custom Hardmax plugin. -(git clone -b release/$(echo ${TRT_VERSION} | cut -d '.' -f 1-2) https://github.com/NVIDIA/TensorRT.git && \ + +docker pull ${TENSORRT_IMAGE} +TENSORRT_VERSION="$(docker inspect ${TENSORRT_IMAGE} --format '{{index .Config.Labels "com.nvidia.tensorrt.version"}}' | cut -d . -f -2)" +(git clone -b release/${TENSORRT_VERSION} https://github.com/NVIDIA/TensorRT.git && \ cd /workspace/TensorRT/samples/python/onnx_custom_plugin && rm -rf build && mkdir build && \ cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $PLGDESTDIR/.) LD_PRELOAD=$PLGDESTDIR/libcustomHardmaxPlugin.so python3 $SRCDIR/gen_qa_trt_plugin_models.py --models_dir=$PLGDESTDIR