From a0b19893d698ddd9bb81bb9a131e563ca8c983a0 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 28 Oct 2024 08:57:59 -0700 Subject: [PATCH 01/31] add test tensorrt workflow --- .github/workflows/build-test-linux.yml | 2 +- .github/workflows/build-test-windows.yml | 2 +- toolchains/ci_workspaces/MODULE.bazel.tmpl | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml index 1da61a033c..0af6c631d0 100644 --- a/.github/workflows/build-test-linux.yml +++ b/.github/workflows/build-test-linux.yml @@ -1,7 +1,7 @@ name: Build and test Linux wheels on: - pull_request: + # pull_request: push: branches: - main diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index c926b929ef..dd26da3388 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -1,7 +1,7 @@ name: Build and test Windows wheels on: - pull_request: + # pull_request: push: branches: - main diff --git a/toolchains/ci_workspaces/MODULE.bazel.tmpl b/toolchains/ci_workspaces/MODULE.bazel.tmpl index 49ad6f473a..4d5a456ceb 100644 --- a/toolchains/ci_workspaces/MODULE.bazel.tmpl +++ b/toolchains/ci_workspaces/MODULE.bazel.tmpl @@ -67,20 +67,20 @@ http_archive( http_archive( name = "tensorrt", build_file = "@//third_party/tensorrt/archive:BUILD", - sha256 = "adff1cd5abe5d87013806172351e58fd024e5bf0fc61d49ef4b84cd38ed99081", - strip_prefix = "TensorRT-10.3.0.26", + sha256 = "${TENSORRT_SHA256}", + strip_prefix = "${TENSORRT_STRIP_PREFIX}", urls = [ - "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz", + ${TENSORRT_URLS}, ], ) http_archive( name = "tensorrt_win", build_file = "@//third_party/tensorrt/archive:BUILD", - sha256 = "2bb4bcb79e8c33575816d874b0512ea28c302af1c06ee6d224da71aa182f75e0", - strip_prefix = "TensorRT-10.3.0.26", + sha256 = "${TENSORRT_SHA256}", + strip_prefix = "${TENSORRT_STRIP_PREFIX}", urls = [ - "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/zip/TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip", + ${TENSORRT_URLS}, ], ) From 09689ee569e3afc54857274498b4da223e2ef173 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 28 Oct 2024 08:58:22 -0700 Subject: [PATCH 02/31] test --- .../scripts/generate-tensorrt-test-matrix.py | 94 ++++++++ .github/workflows/build-tensorrt-linux.yml | 220 ++++++++++++++++++ .../workflows/build-test-tensorrt-linux.yml | 69 ++++++ 3 files changed, 383 insertions(+) create mode 100644 .github/scripts/generate-tensorrt-test-matrix.py create mode 100644 .github/workflows/build-tensorrt-linux.yml create mode 100644 .github/workflows/build-test-tensorrt-linux.yml diff --git a/.github/scripts/generate-tensorrt-test-matrix.py b/.github/scripts/generate-tensorrt-test-matrix.py new file mode 100644 index 0000000000..23434b9841 --- /dev/null +++ b/.github/scripts/generate-tensorrt-test-matrix.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 + +import argparse +import copy +import json +import sys + +CUDA_VERSIONS_DICT = { + "nightly": ["cu121"], + "test": ["cu121", "cu124"], + "release": ["cu121", "cu124"], +} + +PYTHON_VERSIONS_DICT = { + "nightly": ["3.9"], + "test": ["3.9", "3.12"], + "release": ["3.9", "3.12"], +} + +TENSORRT_VERSIONS_DICT = { + "windows": { + "10.4": { + "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/zip/TensorRT-10.4.0.26.Windows.win10.cuda-12.6.zip", + "strip_prefix": "TensorRT-10.4.0.26", + "sha256": "3a7de83778b9e9f812fd8901e07e0d7d6fc54ce633fcff2e340f994df2c6356c", + }, + "10.5": { + "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/zip/TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip", + "strip_prefix": "TensorRT-10.5.0.18", + "sha256": "e6436f4164db4e44d727354dccf7d93755efb70d6fbfd6fa95bdfeb2e7331b24", + }, + }, + "linux": { + "10.4": { + "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/tars/TensorRT-10.4.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz", + "strip_prefix": "TensorRT-10.4.0.26", + "sha256": "cb0273ecb3ba4db8993a408eedd354712301a6c7f20704c52cdf9f78aa97bbdb", + }, + "10.5": { + "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz", + "strip_prefix": "TensorRT-10.5.0.18", + "sha256": "f404d379d639552a3e026cd5267213bd6df18a4eb899d6e47815bbdb34854958", + }, + }, +} + + +def main(args: list[str]) -> None: + parser = argparse.ArgumentParser() + parser.add_argument( + "--matrix", + help="matrix", + type=str, + default="", + ) + + options = parser.parse_args(args) + if options.matrix == "": + raise Exception(f"--matrix is empty, please provide the matrix json str") + + matrix_dict = json.loads(options.matrix) + includes = matrix_dict["include"] + assert len(includes) > 0 + channel = includes[0].channel + if "windows" in includes[0].validation_runner: + arch = "windows" + elif "linux" in includes[0].validation_runner: + arch = "linux" + else: + raise Exception( + f"{includes[0].validation_runner} is not the supported arch, currently only support windows and linux" + ) + + cuda_versions = CUDA_VERSIONS_DICT[channel] + python_versions = PYTHON_VERSIONS_DICT[channel] + tensorrt_versions = TENSORRT_VERSIONS_DICT[arch] + + filtered_includes = [] + for item in includes: + if ( + item["desired_cuda"] in cuda_versions + and item["python_version"] in python_versions + ): + for _, tensorrt_json in tensorrt_versions.items(): + new_item = copy.deepcopy(item) + new_item["tensorrt"] = tensorrt_json + filtered_includes.append(new_item) + filtered_matrix_dict = {} + filtered_matrix_dict["include"] = filtered_includes + print(json.dumps(filtered_matrix_dict)) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/.github/workflows/build-tensorrt-linux.yml b/.github/workflows/build-tensorrt-linux.yml new file mode 100644 index 0000000000..e27849d2d2 --- /dev/null +++ b/.github/workflows/build-tensorrt-linux.yml @@ -0,0 +1,220 @@ +name: Build Torch-TensorRT wheel on Linux + +on: + workflow_call: + inputs: + repository: + description: 'Repository to checkout, defaults to ""' + default: "" + type: string + ref: + description: 'Reference to checkout, defaults to "nightly"' + default: "nightly" + type: string + test-infra-repository: + description: "Test infra repository to use" + default: "pytorch/test-infra" + type: string + test-infra-ref: + description: "Test infra reference to use" + default: "" + type: string + build-matrix: + description: "Build matrix to utilize" + default: "" + type: string + pre-script: + description: "Pre script to run prior to build" + default: "" + type: string + post-script: + description: "Post script to run prior to build" + default: "" + type: string + smoke-test-script: + description: "Script for Smoke Test for a specific domain" + default: "" + type: string + env-var-script: + description: "Script that sets Domain-Specific Environment Variables" + default: "" + type: string + package-name: + description: "Name of the actual python package that is imported" + default: "" + type: string + trigger-event: + description: "Trigger Event in caller that determines whether or not to upload" + default: "" + type: string + cache-path: + description: "The path(s) on the runner to cache or restore. The path is relative to repository." + default: "" + type: string + cache-key: + description: "The key created when saving a cache and the key used to search for a cache." + default: "" + type: string + architecture: + description: Architecture to build for x86_64 for default Linux, or aarch64 for Linux aarch64 builds + required: false + type: string + default: x86_64 + submodules: + description: Works as stated in actions/checkout, but the default value is recursive + required: false + type: string + default: recursive + setup-miniconda: + description: Set to true if setup-miniconda is needed + required: false + type: boolean + default: true + +permissions: + id-token: write + contents: read + +jobs: + build: + strategy: + fail-fast: false + matrix: ${{ fromJSON(inputs.build-matrix) }} + env: + PYTHON_VERSION: ${{ matrix.python_version }} + PACKAGE_TYPE: wheel + REPOSITORY: ${{ inputs.repository }} + REF: ${{ inputs.ref }} + CU_VERSION: ${{ matrix.desired_cuda }} + UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} + ARCH: ${{ inputs.architecture }} + TENSORRT_STRIP_PREFIX: ${{ matrix.tensorrt.strip_prefix }} + TENSORRT_URLS: ${{ matrix.tensorrt.urls }} + TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }} + name: release_${{ matrix.build_name }} + runs-on: ${{ matrix.validation_runner }} + container: + image: ${{ matrix.container_image }} + options: ${{ matrix.gpu_arch_type == 'cuda' && '--gpus all' || ' ' }} + # If a build is taking longer than 120 minutes on these runners we need + # to have a conversation + timeout-minutes: 120 + + steps: + - name: Clean workspace + shell: bash -l {0} + run: | + set -x + echo "::group::Cleanup debug output" + rm -rf "${GITHUB_WORKSPACE}" + mkdir -p "${GITHUB_WORKSPACE}" + if [[ "${{ inputs.architecture }}" = "aarch64" ]]; then + rm -rf "${RUNNER_TEMP}/*" + fi + echo "::endgroup::" + - uses: actions/checkout@v3 + with: + # Support the use case where we need to checkout someone's fork + repository: ${{ inputs.test-infra-repository }} + ref: ${{ inputs.test-infra-ref }} + path: test-infra + - uses: actions/checkout@v3 + if: ${{ env.ARCH == 'aarch64' }} + with: + # Support the use case where we need to checkout someone's fork + repository: "pytorch/builder" + ref: "main" + path: builder + - name: Set linux aarch64 CI + if: ${{ inputs.architecture == 'aarch64' }} + shell: bash -l {0} + env: + DESIRED_PYTHON: ${{ matrix.python_version }} + run: | + set +e + # TODO: This is temporary aarch64 setup script, this should be integrated into aarch64 docker. + ${GITHUB_WORKSPACE}/builder/aarch64_linux/aarch64_ci_setup.sh + echo "/opt/conda/bin" >> $GITHUB_PATH + set -e + - uses: ./test-infra/.github/actions/set-channel + - name: Set PYTORCH_VERSION + if: ${{ env.CHANNEL == 'test' }} + run: | + # When building RC, set the version to be the current candidate version, + # otherwise, leave it alone so nightly will pick up the latest + echo "PYTORCH_VERSION=${{ matrix.stable_version }}" >> "${GITHUB_ENV}" + - uses: ./test-infra/.github/actions/setup-binary-builds + env: + PLATFORM: ${{ inputs.architecture == 'aarch64' && 'linux-aarch64' || ''}} + with: + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref }} + submodules: ${{ inputs.submodules }} + setup-miniconda: ${{ inputs.setup-miniconda }} + python-version: ${{ env.PYTHON_VERSION }} + cuda-version: ${{ env.CU_VERSION }} + arch: ${{ env.ARCH }} + - name: Combine Env Var and Build Env Files + if: ${{ inputs.env-var-script != '' }} + working-directory: ${{ inputs.repository }} + shell: bash -l {0} + run: | + cat "${{ inputs.env-var-script }}" >> "${BUILD_ENV_FILE}" + - name: Install torch dependency + shell: bash -l {0} + run: | + set -x + # shellcheck disable=SC1090 + source "${BUILD_ENV_FILE}" + # shellcheck disable=SC2086 + ${CONDA_RUN} ${PIP_INSTALL_TORCH} + - name: Run Pre-Script with Caching + if: ${{ inputs.pre-script != '' }} + uses: ./test-infra/.github/actions/run-script-with-cache + with: + cache-path: ${{ inputs.cache-path }} + cache-key: ${{ inputs.cache-key }} + repository: ${{ inputs.repository }} + script: ${{ inputs.pre-script }} + - name: Build clean + working-directory: ${{ inputs.repository }} + shell: bash -l {0} + run: | + set -x + source "${BUILD_ENV_FILE}" + ${CONDA_RUN} python setup.py clean + - name: Build the wheel (bdist_wheel) + working-directory: ${{ inputs.repository }} + shell: bash -l {0} + run: | + set -x + source "${BUILD_ENV_FILE}" + ${CONDA_RUN} python setup.py bdist_wheel + + - name: Run Post-Script + if: ${{ inputs.post-script != '' }} + uses: ./test-infra/.github/actions/run-script-with-cache + with: + repository: ${{ inputs.repository }} + script: ${{ inputs.post-script }} + - name: Smoke Test + shell: bash -l {0} + env: + PACKAGE_NAME: ${{ inputs.package-name }} + SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} + run: | + set -x + source "${BUILD_ENV_FILE}" + # TODO: add smoke test for the auditwheel tarball built + + # NB: Only upload to GitHub after passing smoke tests + - name: Upload wheel to GitHub + continue-on-error: true + uses: actions/upload-artifact@v3 + with: + name: ${{ env.ARTIFACT_NAME }} + path: ${{ inputs.repository }}/dist + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}-${{ inputs.cxx11-tarball-release }} + cancel-in-progress: true \ No newline at end of file diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml new file mode 100644 index 0000000000..4db674d15e --- /dev/null +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -0,0 +1,69 @@ +name: Build and test Torch-TensorRT on Linux + +on: + pull_request: + workflow_dispatch: + +permissions: + id-token: write + contents: read + packages: write + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: linux + test-infra-repository: pytorch/test-infra + test-infra-ref: main + with-rocm: false + with-cpu: false + + generate-tensorrt-matrix: + needs: [generate-matrix] + outputs: + matrix: ${{ steps.generate.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - uses: actions/checkout@v3 + with: + repository: pytorch/tensorrt + - name: Generate tensorrt matrix + id: generate + run: | + set -eou pipefail + MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} + MATRIX_BLOB="$(python3 .github/scripts/generate-tensorrt-test-matrix.py --matrix "${MATRIX_BLOB}")" + echo "${MATRIX_BLOB}" + echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" + + build: + needs: [generate-tensorrt-matrix] + name: Build torch-tensorrt whl package + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + pre-script: packaging/pre_build_script.sh + env-var-script: packaging/env_vars.txt + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + package-name: torch_tensorrt + uses: ./.github/workflows/build-torch-tensorrt-linux.yml + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + env-var-script: ${{ matrix.env-var-script }} + post-script: ${{ matrix.post-script }} + package-name: ${{ matrix.package-name }} + smoke-test-script: ${{ matrix.smoke-test-script }} + trigger-event: ${{ github.event_name }} \ No newline at end of file From 3206da72379cac59e44fcc0c0d64c8d65db61aeb Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 28 Oct 2024 09:00:59 -0700 Subject: [PATCH 03/31] test --- .github/workflows/build-test-tensorrt-linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml index 4db674d15e..3275f0762d 100644 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -54,7 +54,7 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - uses: ./.github/workflows/build-torch-tensorrt-linux.yml + uses: ./.github/workflows/build-tensorrt-linux.yml with: repository: ${{ matrix.repository }} ref: "" From 0a649869af84dd89576ef14803a87cc058116a34 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 28 Oct 2024 09:08:24 -0700 Subject: [PATCH 04/31] test --- .../scripts/generate-tensorrt-test-matrix.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/.github/scripts/generate-tensorrt-test-matrix.py b/.github/scripts/generate-tensorrt-test-matrix.py index 23434b9841..12a55cccbd 100644 --- a/.github/scripts/generate-tensorrt-test-matrix.py +++ b/.github/scripts/generate-tensorrt-test-matrix.py @@ -61,10 +61,21 @@ def main(args: list[str]) -> None: matrix_dict = json.loads(options.matrix) includes = matrix_dict["include"] assert len(includes) > 0 - channel = includes[0].channel - if "windows" in includes[0].validation_runner: + if "channel" not in includes[0]: + raise Exception(f"channel field is missing from the matrix: {options.matrix}") + channel = includes[0]["channel"] + if channel not in ("nightly", "test", "release"): + raise Exception( + f"channel field: {channel} is not supported, currently supported value: nightly, test, release" + ) + + if "validation_runner" not in includes[0]: + raise Exception( + f"validation_runner field is missing from the matrix: {options.matrix}" + ) + if "windows" in includes[0]["validation_runner"]: arch = "windows" - elif "linux" in includes[0].validation_runner: + elif "linux" in includes[0]["validation_runner"]: arch = "linux" else: raise Exception( From a02d9443cb75fa033f53597df91756d0655b705a Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 28 Oct 2024 09:31:08 -0700 Subject: [PATCH 05/31] test --- .github/scripts/generate-tensorrt-test-matrix.py | 11 ++++++----- .github/workflows/build-tensorrt-linux.yml | 1 + toolchains/ci_workspaces/MODULE.bazel.tmpl | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/scripts/generate-tensorrt-test-matrix.py b/.github/scripts/generate-tensorrt-test-matrix.py index 12a55cccbd..6fddbf6aca 100644 --- a/.github/scripts/generate-tensorrt-test-matrix.py +++ b/.github/scripts/generate-tensorrt-test-matrix.py @@ -19,24 +19,24 @@ TENSORRT_VERSIONS_DICT = { "windows": { - "10.4": { + "10.4.0": { "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/zip/TensorRT-10.4.0.26.Windows.win10.cuda-12.6.zip", "strip_prefix": "TensorRT-10.4.0.26", "sha256": "3a7de83778b9e9f812fd8901e07e0d7d6fc54ce633fcff2e340f994df2c6356c", }, - "10.5": { + "10.5.0": { "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/zip/TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip", "strip_prefix": "TensorRT-10.5.0.18", "sha256": "e6436f4164db4e44d727354dccf7d93755efb70d6fbfd6fa95bdfeb2e7331b24", }, }, "linux": { - "10.4": { + "10.4.0": { "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/tars/TensorRT-10.4.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz", "strip_prefix": "TensorRT-10.4.0.26", "sha256": "cb0273ecb3ba4db8993a408eedd354712301a6c7f20704c52cdf9f78aa97bbdb", }, - "10.5": { + "10.5.0": { "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz", "strip_prefix": "TensorRT-10.5.0.18", "sha256": "f404d379d639552a3e026cd5267213bd6df18a4eb899d6e47815bbdb34854958", @@ -92,8 +92,9 @@ def main(args: list[str]) -> None: item["desired_cuda"] in cuda_versions and item["python_version"] in python_versions ): - for _, tensorrt_json in tensorrt_versions.items(): + for tensorrt_version, tensorrt_json in tensorrt_versions.items(): new_item = copy.deepcopy(item) + tensorrt_json["version"] = tensorrt_version new_item["tensorrt"] = tensorrt_json filtered_includes.append(new_item) filtered_matrix_dict = {} diff --git a/.github/workflows/build-tensorrt-linux.yml b/.github/workflows/build-tensorrt-linux.yml index e27849d2d2..2c68e97ab3 100644 --- a/.github/workflows/build-tensorrt-linux.yml +++ b/.github/workflows/build-tensorrt-linux.yml @@ -89,6 +89,7 @@ jobs: UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} ARCH: ${{ inputs.architecture }} TENSORRT_STRIP_PREFIX: ${{ matrix.tensorrt.strip_prefix }} + TENSORRT_VERSION: ${{ matrix.tensorrt.version }} TENSORRT_URLS: ${{ matrix.tensorrt.urls }} TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }} name: release_${{ matrix.build_name }} diff --git a/toolchains/ci_workspaces/MODULE.bazel.tmpl b/toolchains/ci_workspaces/MODULE.bazel.tmpl index 4d5a456ceb..aff07a0383 100644 --- a/toolchains/ci_workspaces/MODULE.bazel.tmpl +++ b/toolchains/ci_workspaces/MODULE.bazel.tmpl @@ -70,7 +70,7 @@ http_archive( sha256 = "${TENSORRT_SHA256}", strip_prefix = "${TENSORRT_STRIP_PREFIX}", urls = [ - ${TENSORRT_URLS}, + "${TENSORRT_URLS}", ], ) @@ -80,7 +80,7 @@ http_archive( sha256 = "${TENSORRT_SHA256}", strip_prefix = "${TENSORRT_STRIP_PREFIX}", urls = [ - ${TENSORRT_URLS}, + "${TENSORRT_URLS}", ], ) From 9c0ca36007171446f0ac5506241e741d2c767305 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 28 Oct 2024 10:02:40 -0700 Subject: [PATCH 06/31] test --- .github/scripts/generate-tensorrt-test-matrix.py | 6 +++--- .github/workflows/build-tensorrt-linux.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/scripts/generate-tensorrt-test-matrix.py b/.github/scripts/generate-tensorrt-test-matrix.py index 6fddbf6aca..345d6c8c9b 100644 --- a/.github/scripts/generate-tensorrt-test-matrix.py +++ b/.github/scripts/generate-tensorrt-test-matrix.py @@ -6,15 +6,15 @@ import sys CUDA_VERSIONS_DICT = { - "nightly": ["cu121"], + "nightly": ["cu124"], "test": ["cu121", "cu124"], "release": ["cu121", "cu124"], } PYTHON_VERSIONS_DICT = { "nightly": ["3.9"], - "test": ["3.9", "3.12"], - "release": ["3.9", "3.12"], + "test": ["3.9", "3.10", "3.11", "3.12"], + "release": ["3.9", "3.10", "3.11", "3.12"], } TENSORRT_VERSIONS_DICT = { diff --git a/.github/workflows/build-tensorrt-linux.yml b/.github/workflows/build-tensorrt-linux.yml index 2c68e97ab3..d3507b23c9 100644 --- a/.github/workflows/build-tensorrt-linux.yml +++ b/.github/workflows/build-tensorrt-linux.yml @@ -92,7 +92,7 @@ jobs: TENSORRT_VERSION: ${{ matrix.tensorrt.version }} TENSORRT_URLS: ${{ matrix.tensorrt.urls }} TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }} - name: release_${{ matrix.build_name }} + name: build_tensorrt${{ matrix.tensorrt.version }}_py${{matrix.python_version}}_cu${{matrix.desired_cuda}} runs-on: ${{ matrix.validation_runner }} container: image: ${{ matrix.container_image }} @@ -217,5 +217,5 @@ jobs: path: ${{ inputs.repository }}/dist concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}-${{ inputs.cxx11-tarball-release }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} cancel-in-progress: true \ No newline at end of file From 67cbaf3b6d7f496cdd11d591f93aa678e0514ee6 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Fri, 1 Nov 2024 11:14:41 -0700 Subject: [PATCH 07/31] test --- .github/workflows/build-tensorrt-linux.yml | 4 ++-- packaging/pre_build_script.sh | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-tensorrt-linux.yml b/.github/workflows/build-tensorrt-linux.yml index d3507b23c9..3296a14c0f 100644 --- a/.github/workflows/build-tensorrt-linux.yml +++ b/.github/workflows/build-tensorrt-linux.yml @@ -92,7 +92,7 @@ jobs: TENSORRT_VERSION: ${{ matrix.tensorrt.version }} TENSORRT_URLS: ${{ matrix.tensorrt.urls }} TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }} - name: build_tensorrt${{ matrix.tensorrt.version }}_py${{matrix.python_version}}_cu${{matrix.desired_cuda}} + name: build_tensorrt${{ matrix.tensorrt.version }}_py${{matrix.python_version}}_${{matrix.desired_cuda}} runs-on: ${{ matrix.validation_runner }} container: image: ${{ matrix.container_image }} @@ -213,7 +213,7 @@ jobs: continue-on-error: true uses: actions/upload-artifact@v3 with: - name: ${{ env.ARTIFACT_NAME }} + name: torch_tensorrt_${{env.TENSORRT_VERSION}}_py${{env.PYTHON_VERSION}}_${{env.CU_VERSION}} path: ${{ inputs.repository }}/dist concurrency: diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh index 1dbfb1c3eb..1fa609088f 100755 --- a/packaging/pre_build_script.sh +++ b/packaging/pre_build_script.sh @@ -21,6 +21,13 @@ pip install --force-reinstall --pre ${TORCH_TORCHVISION} --index-url ${INDEX_URL export TORCH_BUILD_NUMBER=$(python -c "import torch, urllib.parse as ul; print(ul.quote_plus(torch.__version__))") export TORCH_INSTALL_PATH=$(python -c "import torch, os; print(os.path.dirname(torch.__file__))") +# replace current tensorrt version to the upgraded tensorrt version +current_version="10.3.0" +sed -i -e "s/tensorrt-cu12==${current_version}/tensorrt-cu12==${TENSORRT_VERSION}/g" \ + -e "s/tensorrt-cu12-bindings==${current_version}/tensorrt-cu12-bindings==${TENSORRT_VERSION}/g" \ + -e "s/tensorrt-cu12-libs==${current_version}/tensorrt-cu12-libs==${TENSORRT_VERSION}/g" \ + pyproject.toml + if [[ "${CU_VERSION::4}" < "cu12" ]]; then # replace dependencies from tensorrt-cu12-bindings/libs to tensorrt-cu11-bindings/libs sed -i -e "s/tensorrt-cu12==/tensorrt-${CU_VERSION::4}==/g" \ From 7765146413a332513aa42080d340b0e72647815a Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Sun, 3 Nov 2024 13:33:02 -0800 Subject: [PATCH 08/31] add some test --- .../workflows/build-test-tensorrt-linux.yml | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml index 3275f0762d..b1a87fb835 100644 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -66,4 +66,40 @@ jobs: post-script: ${{ matrix.post-script }} package-name: ${{ matrix.package-name }} smoke-test-script: ${{ matrix.smoke-test-script }} - trigger-event: ${{ github.event_name }} \ No newline at end of file + trigger-event: ${{ github.event_name }} + + tests-py-torchscript-fe: + name: Test torchscript frontend [Python] + needs: [generate-tensorrt-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-torchscript-fe + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH + pushd . + cd tests/modules + python hub.py + popd + pushd . + cd tests/py/ts + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ + popd From 8a199aab8436aa125fd57ac11c66dca0b438688c Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 4 Nov 2024 10:51:12 -0800 Subject: [PATCH 09/31] test --- .github/workflows/build-tensorrt-linux.yml | 6 +++++- .github/workflows/build-test-tensorrt-linux.yml | 2 ++ .github/workflows/linux-test.yml | 11 +++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-tensorrt-linux.yml b/.github/workflows/build-tensorrt-linux.yml index 3296a14c0f..742dd6ddbe 100644 --- a/.github/workflows/build-tensorrt-linux.yml +++ b/.github/workflows/build-tensorrt-linux.yml @@ -70,6 +70,10 @@ on: required: false type: boolean default: true + upload-artifact: + description: 'Name to give artifacts uploaded from ${inputs.repository}/dist' + default: '' + type: string permissions: id-token: write @@ -213,7 +217,7 @@ jobs: continue-on-error: true uses: actions/upload-artifact@v3 with: - name: torch_tensorrt_${{env.TENSORRT_VERSION}}_py${{env.PYTHON_VERSION}}_${{env.CU_VERSION}} + name: ${{ inputs.upload-artifact }} path: ${{ inputs.repository }}/dist concurrency: diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml index b1a87fb835..eaac67dbbe 100644 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -67,6 +67,7 @@ jobs: package-name: ${{ matrix.package-name }} smoke-test-script: ${{ matrix.smoke-test-script }} trigger-event: ${{ github.event_name }} + upload-artifact: torch_tensorrt_${{ matrix.tensorrt.version }}_py${{ matrix.python_version }}_${{ matrix.desired_cuda }} tests-py-torchscript-fe: name: Test torchscript frontend [Python] @@ -89,6 +90,7 @@ jobs: test-infra-ref: main build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} pre-script: ${{ matrix.pre-script }} + download-artifact: torch_tensorrt_${{ matrix.tensorrt.version }}_py${{ matrix.python_version }}_${{ matrix.desired_cuda }} script: | export USE_HOST_DEPS=1 export CI_BUILD=1 diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml index 7b8a826453..ad9b5db597 100644 --- a/.github/workflows/linux-test.yml +++ b/.github/workflows/linux-test.yml @@ -49,6 +49,10 @@ on: description: "Prevents a job from failing when a step fails. Set to true to allow a job to pass when exec script step fails." default: false type: boolean + download-artifact: + description: 'Name of the artifacts to be downloaded' + default: '' + type: string upload-artifact: description: 'Name to give artifacts uploaded from ${RUNNER_ARTIFACT_DIR}' default: '' @@ -112,10 +116,17 @@ jobs: repository: ${{ inputs.repository }} script: ${{ inputs.pre-script }} - name: Download artifacts + if: ${{ inputs.download-artifact == '' }} uses: actions/download-artifact@v3 with: name: ${{ env.ARTIFACT_NAME }} path: /opt/torch-tensorrt-builds/ + - name: Download artifacts + if: ${{ inputs.download-artifact != '' }} + uses: actions/download-artifact@v3 + with: + name: ${{ inputs.download-artifact }} + path: /opt/torch-tensorrt-builds/ # - name: Install torch and torch-tensorrt # if: ${{ inputs.pre-script != '' }} # uses: ./test-infra/.github/actions/run-script-with-cache From 546a5746787f2ec4616fdf4f3bb8b1253c873e5a Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 4 Nov 2024 13:50:58 -0800 Subject: [PATCH 10/31] test --- .github/workflows/build-tensorrt-linux.yml | 2 +- .github/workflows/build-test-tensorrt-linux.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-tensorrt-linux.yml b/.github/workflows/build-tensorrt-linux.yml index 742dd6ddbe..dafc995546 100644 --- a/.github/workflows/build-tensorrt-linux.yml +++ b/.github/workflows/build-tensorrt-linux.yml @@ -1,4 +1,4 @@ -name: Build Torch-TensorRT wheel on Linux +name: Build Torch-TensorRT wheel on Linux with specified tensorRT version on: workflow_call: diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml index eaac67dbbe..782e6bd683 100644 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -1,4 +1,4 @@ -name: Build and test Torch-TensorRT on Linux +name: Build and test Torch-TensorRT on Linux with specified tensorRT version on: pull_request: @@ -67,7 +67,7 @@ jobs: package-name: ${{ matrix.package-name }} smoke-test-script: ${{ matrix.smoke-test-script }} trigger-event: ${{ github.event_name }} - upload-artifact: torch_tensorrt_${{ matrix.tensorrt.version }}_py${{ matrix.python_version }}_${{ matrix.desired_cuda }} + upload-artifact: torch_tensorrt_${{ needs.generate-tensorrt-matrix.outputs.matrix.tensorrt.version }}_py${{ needs.generate-tensorrt-matrix.outputs.matrix.python_version }}_${{ needs.generate-tensorrt-matrix.outputs.matrix.desired_cuda }} tests-py-torchscript-fe: name: Test torchscript frontend [Python] @@ -83,14 +83,14 @@ jobs: smoke-test-script: packaging/smoke_test_script.sh uses: ./.github/workflows/linux-test.yml with: - job-name: tests-py-torchscript-fe + job-name: tests-py-torchscript-fe-tensorrt-${{ needs.generate-tensorrt-matrix.outputs.matrix.tensorrt.version }}-py${{ needs.generate-tensorrt-matrix.outputs.matrix.python_version }} repository: "pytorch/tensorrt" ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} pre-script: ${{ matrix.pre-script }} - download-artifact: torch_tensorrt_${{ matrix.tensorrt.version }}_py${{ matrix.python_version }}_${{ matrix.desired_cuda }} + download-artifact: ${{ needs.generate-tensorrt-matrix.outputs.matrix.tensorrt.version }}_py${{ needs.generate-tensorrt-matrix.outputs.matrix.python_version }}_${{ needs.generate-tensorrt-matrix.outputs.matrix.desired_cuda }} script: | export USE_HOST_DEPS=1 export CI_BUILD=1 From 41aec8b0ddb65298c65052ec7d4cc1fb0bf33a63 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 4 Nov 2024 14:39:20 -0800 Subject: [PATCH 11/31] test --- .../scripts/generate-tensorrt-test-matrix.py | 10 ++ .github/workflows/build-tensorrt-linux.yml | 7 +- .github/workflows/build-test-linux.yml | 2 +- .../workflows/build-test-tensorrt-linux.yml | 1 - .github/workflows/build-test-windows.yml | 2 +- packaging/pre_build_script.sh | 16 ++- toolchains/ci_workspaces/MODULE.bazel.tmpl | 12 +- .../ci_workspaces/MODULE_tensorrt.bazel.tmpl | 115 ++++++++++++++++++ 8 files changed, 146 insertions(+), 19 deletions(-) create mode 100644 toolchains/ci_workspaces/MODULE_tensorrt.bazel.tmpl diff --git a/.github/scripts/generate-tensorrt-test-matrix.py b/.github/scripts/generate-tensorrt-test-matrix.py index 345d6c8c9b..f978136a35 100644 --- a/.github/scripts/generate-tensorrt-test-matrix.py +++ b/.github/scripts/generate-tensorrt-test-matrix.py @@ -29,6 +29,11 @@ "strip_prefix": "TensorRT-10.5.0.18", "sha256": "e6436f4164db4e44d727354dccf7d93755efb70d6fbfd6fa95bdfeb2e7331b24", }, + "10.6.0": { + "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/zip/TensorRT-10.6.0.26.Windows.win10.cuda-12.6.zip", + "strip_prefix": "TensorRT-10.6.0.26", + "sha256": "6c6d92c108a1b3368423e8f69f08d31269830f1e4c9da43b37ba34a176797254", + }, }, "linux": { "10.4.0": { @@ -41,6 +46,11 @@ "strip_prefix": "TensorRT-10.5.0.18", "sha256": "f404d379d639552a3e026cd5267213bd6df18a4eb899d6e47815bbdb34854958", }, + "10.6.0": { + "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz", + "strip_prefix": "TensorRT-10.6.0.26", + "sha256": "f404d379d639552a3e026cd5267213bd6df18a4eb899d6e47815bbdb34854958", + }, }, } diff --git a/.github/workflows/build-tensorrt-linux.yml b/.github/workflows/build-tensorrt-linux.yml index dafc995546..e764a2f2c8 100644 --- a/.github/workflows/build-tensorrt-linux.yml +++ b/.github/workflows/build-tensorrt-linux.yml @@ -70,10 +70,6 @@ on: required: false type: boolean default: true - upload-artifact: - description: 'Name to give artifacts uploaded from ${inputs.repository}/dist' - default: '' - type: string permissions: id-token: write @@ -96,6 +92,7 @@ jobs: TENSORRT_VERSION: ${{ matrix.tensorrt.version }} TENSORRT_URLS: ${{ matrix.tensorrt.urls }} TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }} + ARTIFACT_NAME: torch_tensorrt_${{ matrix.tensorrt.version }}_py${{ matrix.python_version }}_${{ matrix.desired_cuda }} name: build_tensorrt${{ matrix.tensorrt.version }}_py${{matrix.python_version}}_${{matrix.desired_cuda}} runs-on: ${{ matrix.validation_runner }} container: @@ -217,7 +214,7 @@ jobs: continue-on-error: true uses: actions/upload-artifact@v3 with: - name: ${{ inputs.upload-artifact }} + name: ${{ env.ARTIFACT_NAME }} path: ${{ inputs.repository }}/dist concurrency: diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml index 0af6c631d0..1da61a033c 100644 --- a/.github/workflows/build-test-linux.yml +++ b/.github/workflows/build-test-linux.yml @@ -1,7 +1,7 @@ name: Build and test Linux wheels on: - # pull_request: + pull_request: push: branches: - main diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml index 782e6bd683..d695c4287f 100644 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -67,7 +67,6 @@ jobs: package-name: ${{ matrix.package-name }} smoke-test-script: ${{ matrix.smoke-test-script }} trigger-event: ${{ github.event_name }} - upload-artifact: torch_tensorrt_${{ needs.generate-tensorrt-matrix.outputs.matrix.tensorrt.version }}_py${{ needs.generate-tensorrt-matrix.outputs.matrix.python_version }}_${{ needs.generate-tensorrt-matrix.outputs.matrix.desired_cuda }} tests-py-torchscript-fe: name: Test torchscript frontend [Python] diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index dd26da3388..c926b929ef 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -1,7 +1,7 @@ name: Build and test Windows wheels on: - # pull_request: + pull_request: push: branches: - main diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh index 1fa609088f..6b107b63b0 100755 --- a/packaging/pre_build_script.sh +++ b/packaging/pre_build_script.sh @@ -21,12 +21,14 @@ pip install --force-reinstall --pre ${TORCH_TORCHVISION} --index-url ${INDEX_URL export TORCH_BUILD_NUMBER=$(python -c "import torch, urllib.parse as ul; print(ul.quote_plus(torch.__version__))") export TORCH_INSTALL_PATH=$(python -c "import torch, os; print(os.path.dirname(torch.__file__))") -# replace current tensorrt version to the upgraded tensorrt version -current_version="10.3.0" -sed -i -e "s/tensorrt-cu12==${current_version}/tensorrt-cu12==${TENSORRT_VERSION}/g" \ +if [[ ${TENSORRT_VERSION} != "" ]]; then + # this is the upgraded TensorRT version, replace current tensorrt version to the upgrade tensorRT version in the pyproject.toml + current_version=$(cat dev_dep_versions.yml | grep __tensorrt_version__ | sed 's/__tensorrt_version__: //g' | sed 's/"//g') + sed -i -e "s/tensorrt-cu12==${current_version}/tensorrt-cu12==${TENSORRT_VERSION}/g" \ -e "s/tensorrt-cu12-bindings==${current_version}/tensorrt-cu12-bindings==${TENSORRT_VERSION}/g" \ -e "s/tensorrt-cu12-libs==${current_version}/tensorrt-cu12-libs==${TENSORRT_VERSION}/g" \ - pyproject.toml + pyproject.toml +fi if [[ "${CU_VERSION::4}" < "cu12" ]]; then # replace dependencies from tensorrt-cu12-bindings/libs to tensorrt-cu11-bindings/libs @@ -36,7 +38,11 @@ if [[ "${CU_VERSION::4}" < "cu12" ]]; then pyproject.toml fi -cat toolchains/ci_workspaces/MODULE.bazel.tmpl | envsubst > MODULE.bazel +if [[ ${TENSORRT_VERSION} != "" ]]; then + cat toolchains/ci_workspaces/MODULE_tensorrt.bazel.tmpl | envsubst > MODULE.bazel +else + cat toolchains/ci_workspaces/MODULE.bazel.tmpl | envsubst > MODULE.bazel +fi cat MODULE.bazel export CI_BUILD=1 diff --git a/toolchains/ci_workspaces/MODULE.bazel.tmpl b/toolchains/ci_workspaces/MODULE.bazel.tmpl index aff07a0383..49ad6f473a 100644 --- a/toolchains/ci_workspaces/MODULE.bazel.tmpl +++ b/toolchains/ci_workspaces/MODULE.bazel.tmpl @@ -67,20 +67,20 @@ http_archive( http_archive( name = "tensorrt", build_file = "@//third_party/tensorrt/archive:BUILD", - sha256 = "${TENSORRT_SHA256}", - strip_prefix = "${TENSORRT_STRIP_PREFIX}", + sha256 = "adff1cd5abe5d87013806172351e58fd024e5bf0fc61d49ef4b84cd38ed99081", + strip_prefix = "TensorRT-10.3.0.26", urls = [ - "${TENSORRT_URLS}", + "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz", ], ) http_archive( name = "tensorrt_win", build_file = "@//third_party/tensorrt/archive:BUILD", - sha256 = "${TENSORRT_SHA256}", - strip_prefix = "${TENSORRT_STRIP_PREFIX}", + sha256 = "2bb4bcb79e8c33575816d874b0512ea28c302af1c06ee6d224da71aa182f75e0", + strip_prefix = "TensorRT-10.3.0.26", urls = [ - "${TENSORRT_URLS}", + "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/zip/TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip", ], ) diff --git a/toolchains/ci_workspaces/MODULE_tensorrt.bazel.tmpl b/toolchains/ci_workspaces/MODULE_tensorrt.bazel.tmpl new file mode 100644 index 0000000000..aff07a0383 --- /dev/null +++ b/toolchains/ci_workspaces/MODULE_tensorrt.bazel.tmpl @@ -0,0 +1,115 @@ +module( + name = "torch_tensorrt", + repo_name = "org_pytorch_tensorrt", + version = "${BUILD_VERSION}" +) + +bazel_dep(name = "googletest", version = "1.14.0") +bazel_dep(name = "platforms", version = "0.0.10") +bazel_dep(name = "rules_cc", version = "0.0.9") +bazel_dep(name = "rules_python", version = "0.34.0") + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain( + ignore_root_user_error = True, + python_version = "3.11", +) + +bazel_dep(name = "rules_pkg", version = "1.0.1") +git_override( + module_name = "rules_pkg", + commit = "17c57f4", + remote = "https://github.com/narendasan/rules_pkg", +) + +local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.bzl", "local_repository") + +# External dependency for torch_tensorrt if you already have precompiled binaries. +local_repository( + name = "torch_tensorrt", + path = "/opt/conda/lib/python3.8/site-packages/torch_tensorrt", +) + + +new_local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.bzl", "new_local_repository") + +# CUDA should be installed on the system locally +new_local_repository( + name = "cuda", + build_file = "@//third_party/cuda:BUILD", + path = "${CUDA_HOME}", +) + +new_local_repository( + name = "cuda_win", + build_file = "@//third_party/cuda:BUILD", + path = "${CUDA_HOME}", +) + + +http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +############################################################################################################# +# Tarballs and fetched dependencies (default - use in cases when building from precompiled bin and tarballs) +############################################################################################################# + +http_archive( + name = "libtorch", + build_file = "@//third_party/libtorch:BUILD", + strip_prefix = "libtorch", + urls = ["https://download.pytorch.org/libtorch/${CHANNEL}/${CU_VERSION}/libtorch-cxx11-abi-shared-with-deps-latest.zip"], +) + +# Download these tarballs manually from the NVIDIA website +# Either place them in the distdir directory in third_party and use the --distdir flag +# or modify the urls to "file:////.tar.gz + +http_archive( + name = "tensorrt", + build_file = "@//third_party/tensorrt/archive:BUILD", + sha256 = "${TENSORRT_SHA256}", + strip_prefix = "${TENSORRT_STRIP_PREFIX}", + urls = [ + "${TENSORRT_URLS}", + ], +) + +http_archive( + name = "tensorrt_win", + build_file = "@//third_party/tensorrt/archive:BUILD", + sha256 = "${TENSORRT_SHA256}", + strip_prefix = "${TENSORRT_STRIP_PREFIX}", + urls = [ + "${TENSORRT_URLS}", + ], +) + + +#################################################################################### +# Locally installed dependencies (use in cases of custom dependencies or aarch64) +#################################################################################### + +# NOTE: In the case you are using just the pre-cxx11-abi path or just the cxx11 abi path +# with your local libtorch, just point deps at the same path to satisfy bazel. + +# NOTE: NVIDIA's aarch64 PyTorch (python) wheel file uses the CXX11 ABI unlike PyTorch's standard +# x86_64 python distribution. If using NVIDIA's version just point to the root of the package +# for both versions here and do not use --config=pre-cxx11-abi + +new_local_repository( + name = "libtorch_win", + path = "${TORCH_INSTALL_PATH}", + build_file = "third_party/libtorch/BUILD" +) + +new_local_repository( + name = "libtorch_pre_cxx11_abi", + path = "${TORCH_INSTALL_PATH}", + build_file = "third_party/libtorch/BUILD" +) + +#new_local_repository( +# name = "tensorrt", +# path = "/usr/", +# build_file = "@//third_party/tensorrt/local:BUILD" +#) From a55428556ab411eed5d6dd8f59b85428c204e11e Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 4 Nov 2024 14:55:50 -0800 Subject: [PATCH 12/31] test --- .github/scripts/generate-tensorrt-test-matrix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/generate-tensorrt-test-matrix.py b/.github/scripts/generate-tensorrt-test-matrix.py index f978136a35..54dc8fd792 100644 --- a/.github/scripts/generate-tensorrt-test-matrix.py +++ b/.github/scripts/generate-tensorrt-test-matrix.py @@ -49,7 +49,7 @@ "10.6.0": { "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz", "strip_prefix": "TensorRT-10.6.0.26", - "sha256": "f404d379d639552a3e026cd5267213bd6df18a4eb899d6e47815bbdb34854958", + "sha256": "33d3c2f3f4c84dc7991a4337a6fde9ed33f5c8e5c4f03ac2eb6b994a382b03a0", }, }, } From 7102fa5a1136b657f5428b2c16edcc762d05166b Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 4 Nov 2024 16:10:14 -0800 Subject: [PATCH 13/31] test --- .github/scripts/generate-tensorrt-test-matrix.py | 2 +- .github/workflows/build-tensorrt-linux.yml | 4 ++-- .github/workflows/build-test-tensorrt-linux.yml | 1 - .github/workflows/linux-test.yml | 11 ++++------- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/scripts/generate-tensorrt-test-matrix.py b/.github/scripts/generate-tensorrt-test-matrix.py index 54dc8fd792..297581a736 100644 --- a/.github/scripts/generate-tensorrt-test-matrix.py +++ b/.github/scripts/generate-tensorrt-test-matrix.py @@ -66,7 +66,7 @@ def main(args: list[str]) -> None: options = parser.parse_args(args) if options.matrix == "": - raise Exception(f"--matrix is empty, please provide the matrix json str") + raise Exception("--matrix is empty, please provide the matrix json str") matrix_dict = json.loads(options.matrix) includes = matrix_dict["include"] diff --git a/.github/workflows/build-tensorrt-linux.yml b/.github/workflows/build-tensorrt-linux.yml index e764a2f2c8..be20e303e2 100644 --- a/.github/workflows/build-tensorrt-linux.yml +++ b/.github/workflows/build-tensorrt-linux.yml @@ -92,7 +92,7 @@ jobs: TENSORRT_VERSION: ${{ matrix.tensorrt.version }} TENSORRT_URLS: ${{ matrix.tensorrt.urls }} TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }} - ARTIFACT_NAME: torch_tensorrt_${{ matrix.tensorrt.version }}_py${{ matrix.python_version }}_${{ matrix.desired_cuda }} + UPLOAD_ARTIFACT_NAME: torch_tensorrt_${{ matrix.tensorrt.version }}_py${{ matrix.python_version }}_${{ matrix.desired_cuda }} name: build_tensorrt${{ matrix.tensorrt.version }}_py${{matrix.python_version}}_${{matrix.desired_cuda}} runs-on: ${{ matrix.validation_runner }} container: @@ -214,7 +214,7 @@ jobs: continue-on-error: true uses: actions/upload-artifact@v3 with: - name: ${{ env.ARTIFACT_NAME }} + name: ${{ env.UPLOAD_ARTIFACT_NAME }} path: ${{ inputs.repository }}/dist concurrency: diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml index d695c4287f..cac152cc48 100644 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -89,7 +89,6 @@ jobs: test-infra-ref: main build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} pre-script: ${{ matrix.pre-script }} - download-artifact: ${{ needs.generate-tensorrt-matrix.outputs.matrix.tensorrt.version }}_py${{ needs.generate-tensorrt-matrix.outputs.matrix.python_version }}_${{ needs.generate-tensorrt-matrix.outputs.matrix.desired_cuda }} script: | export USE_HOST_DEPS=1 export CI_BUILD=1 diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml index ad9b5db597..116f70a892 100644 --- a/.github/workflows/linux-test.yml +++ b/.github/workflows/linux-test.yml @@ -49,10 +49,6 @@ on: description: "Prevents a job from failing when a step fails. Set to true to allow a job to pass when exec script step fails." default: false type: boolean - download-artifact: - description: 'Name of the artifacts to be downloaded' - default: '' - type: string upload-artifact: description: 'Name to give artifacts uploaded from ${RUNNER_ARTIFACT_DIR}' default: '' @@ -72,6 +68,7 @@ jobs: SCRIPT: ${{ inputs.script }} RUNNER_TEST_RESULTS_DIR: /tmp/test_results ARCH: ${{ inputs.architecture }} + DOWNLOAD_ARTIFACT_NAME: torch_tensorrt_${{ matrix.tensorrt.version }}_py${{ matrix.python_version }}_${{ matrix.desired_cuda }} name: ${{ inputs.job-name }}-${{ matrix.desired_cuda }} runs-on: ${{ matrix.validation_runner }} container: @@ -116,16 +113,16 @@ jobs: repository: ${{ inputs.repository }} script: ${{ inputs.pre-script }} - name: Download artifacts - if: ${{ inputs.download-artifact == '' }} + if: ${{ matrix.tensorrt == '' }} uses: actions/download-artifact@v3 with: name: ${{ env.ARTIFACT_NAME }} path: /opt/torch-tensorrt-builds/ - name: Download artifacts - if: ${{ inputs.download-artifact != '' }} + if: ${{ matrix.tensorrt != '' }} uses: actions/download-artifact@v3 with: - name: ${{ inputs.download-artifact }} + name: ${{ env.DOWNLOAD_ARTIFACT_NAME }} path: /opt/torch-tensorrt-builds/ # - name: Install torch and torch-tensorrt # if: ${{ inputs.pre-script != '' }} From 0fd94e6751b373753b167730ce3fe4d2be6aec88 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 4 Nov 2024 17:05:51 -0800 Subject: [PATCH 14/31] test --- .github/workflows/build-tensorrt-linux.yml | 2 +- .../workflows/build-test-tensorrt-linux.yml | 33 ++++++++++++++++++- .github/workflows/linux-test.yml | 4 +-- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-tensorrt-linux.yml b/.github/workflows/build-tensorrt-linux.yml index be20e303e2..31915fcbd5 100644 --- a/.github/workflows/build-tensorrt-linux.yml +++ b/.github/workflows/build-tensorrt-linux.yml @@ -92,7 +92,7 @@ jobs: TENSORRT_VERSION: ${{ matrix.tensorrt.version }} TENSORRT_URLS: ${{ matrix.tensorrt.urls }} TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }} - UPLOAD_ARTIFACT_NAME: torch_tensorrt_${{ matrix.tensorrt.version }}_py${{ matrix.python_version }}_${{ matrix.desired_cuda }} + UPLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_${{ inputs.architecture }} name: build_tensorrt${{ matrix.tensorrt.version }}_py${{matrix.python_version}}_${{matrix.desired_cuda}} runs-on: ${{ matrix.validation_runner }} container: diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml index cac152cc48..6907fd862b 100644 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -82,7 +82,7 @@ jobs: smoke-test-script: packaging/smoke_test_script.sh uses: ./.github/workflows/linux-test.yml with: - job-name: tests-py-torchscript-fe-tensorrt-${{ needs.generate-tensorrt-matrix.outputs.matrix.tensorrt.version }}-py${{ needs.generate-tensorrt-matrix.outputs.matrix.python_version }} + job-name: tests-py-torchscript-fe repository: "pytorch/tensorrt" ref: "" test-infra-repository: pytorch/test-infra @@ -103,3 +103,34 @@ jobs: python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ popd + + tests-py-dynamo-core: + name: Test dynamo core [Python] + needs: [generate-tensorrt-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-core + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/test_000_* + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ + popd \ No newline at end of file diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml index 116f70a892..6ddc601f2c 100644 --- a/.github/workflows/linux-test.yml +++ b/.github/workflows/linux-test.yml @@ -68,8 +68,8 @@ jobs: SCRIPT: ${{ inputs.script }} RUNNER_TEST_RESULTS_DIR: /tmp/test_results ARCH: ${{ inputs.architecture }} - DOWNLOAD_ARTIFACT_NAME: torch_tensorrt_${{ matrix.tensorrt.version }}_py${{ matrix.python_version }}_${{ matrix.desired_cuda }} - name: ${{ inputs.job-name }}-${{ matrix.desired_cuda }} + DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_${{ inputs.architecture }} + name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} runs-on: ${{ matrix.validation_runner }} container: image: ${{ matrix.container_image }} From 6cc2faa0350c68a057bc5d6914c2c3dc6d5c6a1c Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 5 Nov 2024 11:20:53 -0800 Subject: [PATCH 15/31] resolve comments --- .github/workflows/build-tensorrt-linux.yml | 2 +- .github/workflows/build-test-tensorrt-linux.yml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-tensorrt-linux.yml b/.github/workflows/build-tensorrt-linux.yml index 31915fcbd5..7581c38ae8 100644 --- a/.github/workflows/build-tensorrt-linux.yml +++ b/.github/workflows/build-tensorrt-linux.yml @@ -1,4 +1,4 @@ -name: Build Torch-TensorRT wheel on Linux with specified tensorRT version +name: Build Torch-TensorRT wheel on Linux with Future TensorRT Versions on: workflow_call: diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml index 6907fd862b..20a258a2b1 100644 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -1,7 +1,6 @@ -name: Build and test Torch-TensorRT on Linux with specified tensorRT version +name: Build and Test Torch-TensorRT on Linux with Future TensorRT Versions on: - pull_request: workflow_dispatch: permissions: From e9af038cfc073169b897b75c3cdf18e30b7783c4 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 7 Nov 2024 07:36:43 -0800 Subject: [PATCH 16/31] add more tests --- .../workflows/build-test-tensorrt-linux.yml | 187 +++++++++++++++++- 1 file changed, 185 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml index 20a258a2b1..6e86806066 100644 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -1,6 +1,7 @@ name: Build and Test Torch-TensorRT on Linux with Future TensorRT Versions on: + pull_request: workflow_dispatch: permissions: @@ -103,6 +104,124 @@ jobs: python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ popd + tests-py-dynamo-converters: + name: Test dynamo converters [Python] + needs: [generate-tensorrt-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-converters + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/ + popd + + tests-py-dynamo-fe: + name: Test dynamo frontend [Python] + needs: [generate-tensorrt-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-fe + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ + popd + + tests-py-dynamo-serde: + name: Test dynamo export serde [Python] + needs: [generate-tensorrt-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-serde + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py + popd + + tests-py-torch-compile-be: + name: Test torch compile backend [Python] + needs: [generate-tensorrt-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-torch-compile-be + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py + popd + tests-py-dynamo-core: name: Test dynamo core [Python] needs: [generate-tensorrt-matrix, build] @@ -129,7 +248,71 @@ jobs: export CI_BUILD=1 pushd . cd tests/py/dynamo - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/test_000_* + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ - popd \ No newline at end of file + popd + + tests-py-dynamo-cudagraphs: + name: Test dynamo cudagraphs [Python] + needs: [generate-tensorrt-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-cudagraphs + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + nvidia-smi + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py || true + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py || true + popd + + tests-py-core: + name: Test core [Python] + needs: [generate-tensorrt-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-core + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/core + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . + popd + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + cancel-in-progress: true \ No newline at end of file From 1bf5673659c50e030edf653242fb41b4358bb8f3 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Fri, 8 Nov 2024 14:21:13 -0800 Subject: [PATCH 17/31] merge main into the branch --- .github/workflows/build-test-tensorrt-linux.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml index 6e86806066..3f4abb9add 100644 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -1,7 +1,6 @@ name: Build and Test Torch-TensorRT on Linux with Future TensorRT Versions on: - pull_request: workflow_dispatch: permissions: From 1f92a78a09a504e7e6982ca047535a9fbf9fe479 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Fri, 8 Nov 2024 14:30:05 -0800 Subject: [PATCH 18/31] add comments --- .github/scripts/generate-tensorrt-test-matrix.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/scripts/generate-tensorrt-test-matrix.py b/.github/scripts/generate-tensorrt-test-matrix.py index 297581a736..e5a9b9fae4 100644 --- a/.github/scripts/generate-tensorrt-test-matrix.py +++ b/.github/scripts/generate-tensorrt-test-matrix.py @@ -5,18 +5,25 @@ import json import sys +# please update the cuda version you want to test with the future tensorRT version here +# channel: nightly if the future tensorRT version test workflow is triggered from the main branch or your personal branch +# channel: test if the future tensorRT version test workflow is triggered from the release branch(release/2.5 etc....) CUDA_VERSIONS_DICT = { "nightly": ["cu124"], "test": ["cu121", "cu124"], "release": ["cu121", "cu124"], } +# please update the python version you want to test with the future tensorRT version here +# channel: nightly if the future tensorRT version test workflow is triggered from the main branch or your personal branch +# channel: test if the future tensorRT version test workflow is triggered from the release branch(release/2.5 etc....) PYTHON_VERSIONS_DICT = { "nightly": ["3.9"], "test": ["3.9", "3.10", "3.11", "3.12"], "release": ["3.9", "3.10", "3.11", "3.12"], } +# please update the future tensorRT version you want to test here TENSORRT_VERSIONS_DICT = { "windows": { "10.4.0": { From f047aa13e7d21603d67530743ffcce7341d0599c Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 11 Nov 2024 21:29:25 -0800 Subject: [PATCH 19/31] add future tensorrt test workflow on windows --- .github/workflows/build-tensorrt-windows.yml | 229 ++++++++++++ .../workflows/build-test-tensorrt-windows.yml | 325 ++++++++++++++++++ .github/workflows/windows-test.yml | 10 +- packaging/pre_build_script_windows.sh | 16 +- 4 files changed, 578 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/build-tensorrt-windows.yml create mode 100644 .github/workflows/build-test-tensorrt-windows.yml diff --git a/.github/workflows/build-tensorrt-windows.yml b/.github/workflows/build-tensorrt-windows.yml new file mode 100644 index 0000000000..4b86910768 --- /dev/null +++ b/.github/workflows/build-tensorrt-windows.yml @@ -0,0 +1,229 @@ +name: Build Torch-TensorRT wheel on Windows with Future TensorRT Versions + +on: + workflow_call: + inputs: + repository: + description: 'Repository to checkout, defaults to ""' + default: "" + type: string + ref: + description: 'Reference to checkout, defaults to "nightly"' + default: "nightly" + type: string + test-infra-repository: + description: "Test infra repository to use" + default: "pytorch/test-infra" + type: string + test-infra-ref: + description: "Test infra reference to use" + default: "" + type: string + build-matrix: + description: "Build matrix to utilize" + default: "" + type: string + pre-script: + description: "Pre script to run prior to build" + default: "" + type: string + env-script: + description: "Script to setup environment variables for the build" + default: "" + type: string + wheel-build-params: + description: "Additional parameters for bdist_wheel" + default: "" + type: string + post-script: + description: "Post script to run prior to build" + default: "" + type: string + smoke-test-script: + description: "Script for Smoke Test for a specific domain" + default: "" + type: string + package-name: + description: "Name of the actual python package that is imported" + default: "" + type: string + trigger-event: + description: "Trigger Event in caller that determines whether or not to upload" + default: "" + type: string + cache-path: + description: "The path(s) on the runner to cache or restore. The path is relative to repository." + default: "" + type: string + cache-key: + description: "The key created when saving a cache and the key used to search for a cache." + default: "" + type: string + submodules: + description: "Works as stated in actions/checkout, but the default value is recursive" + required: false + type: string + default: recursive + timeout: + description: 'Timeout for the job (in minutes)' + default: 60 + type: number + +permissions: + id-token: write + contents: read + +jobs: + build: + strategy: + fail-fast: false + matrix: ${{ fromJSON(inputs.build-matrix) }} + env: + PYTHON_VERSION: ${{ matrix.python_version }} + PACKAGE_TYPE: wheel + REPOSITORY: ${{ inputs.repository }} + REF: ${{ inputs.ref }} + CU_VERSION: ${{ matrix.desired_cuda }} + UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} + ARCH: win_amd64 + TENSORRT_STRIP_PREFIX: ${{ matrix.tensorrt.strip_prefix }} + TENSORRT_VERSION: ${{ matrix.tensorrt.version }} + TENSORRT_URLS: ${{ matrix.tensorrt.urls }} + TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }} + UPLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_win_amd64 + name: build_tensorrt${{ matrix.tensorrt.version }}_py${{matrix.python_version}}_${{matrix.desired_cuda}} + runs-on: ${{ matrix.validation_runner }} + defaults: + run: + shell: bash -l {0} + # If a build is taking longer than 60 minutes on these runners we need + # to have a conversation + timeout-minutes: 120 + steps: + - uses: actions/checkout@v3 + with: + # Support the use case where we need to checkout someone's fork + repository: ${{ inputs.test-infra-repository }} + ref: ${{ inputs.test-infra-ref }} + path: test-infra + - uses: ./test-infra/.github/actions/setup-ssh + name: Setup SSH + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + activate-with-label: false + instructions: "SSH with rdesktop using ssh -L 3389:localhost:3389 %%username%%@%%hostname%%" + - name: Add Conda scripts to GitHub path + run: | + echo "C:/Jenkins/Miniconda3/Scripts" >> $GITHUB_PATH + - uses: ./test-infra/.github/actions/set-channel + - name: Set PYTORCH_VERSION + if: ${{ env.CHANNEL == 'test' }} + run: | + # When building RC, set the version to be the current candidate version, + # otherwise, leave it alone so nightly will pick up the latest + echo "PYTORCH_VERSION=${{ matrix.stable_version }}" >> "${GITHUB_ENV}" + - uses: ./test-infra/.github/actions/setup-binary-builds + with: + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref }} + submodules: ${{ inputs.submodules }} + setup-miniconda: false + python-version: ${{ env.PYTHON_VERSION }} + cuda-version: ${{ env.CU_VERSION }} + arch: ${{ env.ARCH }} + - name: Install XPU support package + if: ${{ matrix.gpu_arch_type == 'xpu' }} + run: | + cmd //c .\\test-infra\\.github\\scripts\\install_xpu.bat + - name: Install torch dependency + run: | + source "${BUILD_ENV_FILE}" + # shellcheck disable=SC2086 + ${CONDA_RUN} ${PIP_INSTALL_TORCH} + - name: Run Pre-Script with Caching + if: ${{ inputs.pre-script != '' }} + uses: ./test-infra/.github/actions/run-script-with-cache + with: + cache-path: ${{ inputs.cache-path }} + cache-key: ${{ inputs.cache-key }} + repository: ${{ inputs.repository }} + script: ${{ inputs.pre-script }} + is_windows: 'enabled' + - name: Build clean + working-directory: ${{ inputs.repository }} + env: + ENV_SCRIPT: ${{ inputs.env-script }} + run: | + source "${BUILD_ENV_FILE}" + if [[ -z "${ENV_SCRIPT}" ]]; then + ${CONDA_RUN} python setup.py clean + else + if [[ ! -f ${ENV_SCRIPT} ]]; then + echo "::error::Specified env-script file (${ENV_SCRIPT}) not found" + exit 1 + else + ${CONDA_RUN} ${ENV_SCRIPT} python setup.py clean + fi + fi + - name: Build the wheel (bdist_wheel) + working-directory: ${{ inputs.repository }} + env: + ENV_SCRIPT: ${{ inputs.env-script }} + BUILD_PARAMS: ${{ inputs.wheel-build-params }} + run: | + source "${BUILD_ENV_FILE}" + + if [[ "$CU_VERSION" == "cpu" ]]; then + # CUDA and CPU are ABI compatible on the CPU-only parts, so strip + # in this case + export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" + else + export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//')" + fi + + if [[ -z "${ENV_SCRIPT}" ]]; then + ${CONDA_RUN} python setup.py bdist_wheel + else + ${CONDA_RUN} ${ENV_SCRIPT} python setup.py bdist_wheel ${BUILD_PARAMS} + fi + - name: Run post-script + working-directory: ${{ inputs.repository }} + env: + POST_SCRIPT: ${{ inputs.post-script }} + ENV_SCRIPT: ${{ inputs.env-script }} + if: ${{ inputs.post-script != '' }} + run: | + set -euxo pipefail + source "${BUILD_ENV_FILE}" + ${CONDA_RUN} ${ENV_SCRIPT} ${POST_SCRIPT} + - name: Smoke Test + env: + ENV_SCRIPT: ${{ inputs.env-script }} + PACKAGE_NAME: ${{ inputs.package-name }} + SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} + run: | + source "${BUILD_ENV_FILE}" + WHEEL_NAME=$(ls "${{ inputs.repository }}/dist/") + echo "$WHEEL_NAME" + ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME" + if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" + ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + else + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" + ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" + fi + # NB: Only upload to GitHub after passing smoke tests + - name: Upload wheel to GitHub + continue-on-error: true + uses: actions/upload-artifact@v3 + with: + name: ${{ env.UPLOAD_ARTIFACT_NAME }} + path: ${{ inputs.repository }}/dist/ + - uses: ./test-infra/.github/actions/teardown-windows + if: always() + name: Teardown Windows + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + cancel-in-progress: true \ No newline at end of file diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml new file mode 100644 index 0000000000..1c4d2ef811 --- /dev/null +++ b/.github/workflows/build-test-tensorrt-windows.yml @@ -0,0 +1,325 @@ +name: Build and Test Torch-TensorRT on Windows with Future TensorRT Versions + +on: + pull_request: + workflow_dispatch: + +permissions: + id-token: write + contents: read + packages: write + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: windows + test-infra-repository: pytorch/test-infra + test-infra-ref: main + with-rocm: false + with-cpu: false + + generate-tensorrt-matrix: + needs: [generate-matrix] + outputs: + matrix: ${{ steps.generate.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - uses: actions/checkout@v3 + with: + repository: pytorch/tensorrt + - name: Generate tensorrt matrix + id: generate + run: | + set -eou pipefail + MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} + MATRIX_BLOB="$(python3 .github/scripts/generate-tensorrt-test-matrix.py --matrix "${MATRIX_BLOB}")" + echo "${MATRIX_BLOB}" + echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" + + substitute-runner: + needs: generate-tensorrt-matrix + outputs: + matrix: ${{ steps.substitute.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - name: Substitute runner + id: substitute + run: | + echo matrix="$(echo '${{ needs.generate-tensorrt-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} + + build: + needs: substitute-runner + name: Build torch-tensorrt whl package + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + pre-script: packaging/pre_build_script_windows.sh + env-script: packaging/vc_env_helper.bat + smoke-test-script: packaging/smoke_test_windows.py + package-name: torch_tensorrt + uses: ./.github/workflows/build-tensorrt-windows.yml + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + env-script: ${{ matrix.env-script }} + smoke-test-script: ${{ matrix.smoke-test-script }} + package-name: ${{ matrix.package-name }} + trigger-event: ${{ github.event_name }} + timeout: 120 + + tests-py-torchscript-fe: + name: Test torchscript frontend [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-torchscript-fe + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + set -x + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/modules + python hub.py + popd + pushd . + cd tests/py/ts + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ + popd + + # tests-py-dynamo-converters: + # name: Test dynamo converters [Python] + # needs: [generate-tensorrt-matrix, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # pre-script: packaging/pre_build_script.sh + # post-script: packaging/post_build_script.sh + # smoke-test-script: packaging/smoke_test_script.sh + # uses: ./.github/workflows/linux-test.yml + # with: + # job-name: tests-py-dynamo-converters + # repository: "pytorch/tensorrt" + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + # pre-script: ${{ matrix.pre-script }} + # script: | + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py/dynamo + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/ + # popd + + # tests-py-dynamo-fe: + # name: Test dynamo frontend [Python] + # needs: [generate-tensorrt-matrix, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # pre-script: packaging/pre_build_script.sh + # post-script: packaging/post_build_script.sh + # smoke-test-script: packaging/smoke_test_script.sh + # uses: ./.github/workflows/linux-test.yml + # with: + # job-name: tests-py-dynamo-fe + # repository: "pytorch/tensorrt" + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + # pre-script: ${{ matrix.pre-script }} + # script: | + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py/dynamo + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ + # popd + + # tests-py-dynamo-serde: + # name: Test dynamo export serde [Python] + # needs: [generate-tensorrt-matrix, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # pre-script: packaging/pre_build_script.sh + # post-script: packaging/post_build_script.sh + # smoke-test-script: packaging/smoke_test_script.sh + # uses: ./.github/workflows/linux-test.yml + # with: + # job-name: tests-py-dynamo-serde + # repository: "pytorch/tensorrt" + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + # pre-script: ${{ matrix.pre-script }} + # script: | + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py/dynamo + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py + # popd + + # tests-py-torch-compile-be: + # name: Test torch compile backend [Python] + # needs: [generate-tensorrt-matrix, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # pre-script: packaging/pre_build_script.sh + # post-script: packaging/post_build_script.sh + # smoke-test-script: packaging/smoke_test_script.sh + # uses: ./.github/workflows/linux-test.yml + # with: + # job-name: tests-py-torch-compile-be + # repository: "pytorch/tensorrt" + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + # pre-script: ${{ matrix.pre-script }} + # script: | + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py/dynamo + # python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ + # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py + # popd + + # tests-py-dynamo-core: + # name: Test dynamo core [Python] + # needs: [generate-tensorrt-matrix, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # pre-script: packaging/pre_build_script.sh + # post-script: packaging/post_build_script.sh + # smoke-test-script: packaging/smoke_test_script.sh + # uses: ./.github/workflows/linux-test.yml + # with: + # job-name: tests-py-dynamo-core + # repository: "pytorch/tensorrt" + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + # pre-script: ${{ matrix.pre-script }} + # script: | + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py/dynamo + # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ + # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ + # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ + # popd + + # tests-py-dynamo-cudagraphs: + # name: Test dynamo cudagraphs [Python] + # needs: [generate-tensorrt-matrix, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # pre-script: packaging/pre_build_script.sh + # post-script: packaging/post_build_script.sh + # smoke-test-script: packaging/smoke_test_script.sh + # uses: ./.github/workflows/linux-test.yml + # with: + # job-name: tests-py-dynamo-cudagraphs + # repository: "pytorch/tensorrt" + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + # pre-script: ${{ matrix.pre-script }} + # script: | + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py/dynamo + # nvidia-smi + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py || true + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py || true + # popd + + # tests-py-core: + # name: Test core [Python] + # needs: [generate-tensorrt-matrix, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # pre-script: packaging/pre_build_script.sh + # post-script: packaging/post_build_script.sh + # smoke-test-script: packaging/smoke_test_script.sh + # uses: ./.github/workflows/linux-test.yml + # with: + # job-name: tests-py-core + # repository: "pytorch/tensorrt" + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} + # pre-script: ${{ matrix.pre-script }} + # script: | + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py/core + # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . + # popd + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + cancel-in-progress: true \ No newline at end of file diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index 6d1d64ad96..14fa1d51c0 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -53,7 +53,8 @@ jobs: CU_VERSION: ${{ matrix.desired_cuda }} SCRIPT: ${{ inputs.script }} PYTHONUTF8: 1 - name: ${{ inputs.job-name }}-${{ matrix.desired_cuda }} + DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_win_amd64 + name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} runs-on: windows.8xlarge.nvidia.gpu.nonephemeral defaults: run: @@ -103,10 +104,17 @@ jobs: script: ${{ inputs.pre-script }} is_windows: 'enabled' - name: Download artifacts + if: ${{ matrix.tensorrt == '' }} uses: actions/download-artifact@v3 with: name: ${{ env.ARTIFACT_NAME }} path: ${{ runner.temp }}/artifacts/ + - name: Download artifacts + if: ${{ matrix.tensorrt != '' }} + uses: actions/download-artifact@v3 + with: + name: ${{ env.DOWNLOAD_ARTIFACT_NAME }} + path: ${{ runner.temp }}/artifacts/ - name: Pack script continue-on-error: ${{ inputs.continue-on-error }} working-directory: ${{ inputs.repository }} diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index 0912598bc0..246db69ed8 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -4,6 +4,15 @@ pip install -U numpy packaging pyyaml setuptools wheel choco install bazelisk -y +if [[ ${TENSORRT_VERSION} != "" ]]; then + # this is the upgraded TensorRT version, replace current tensorrt version to the upgrade tensorRT version in the pyproject.toml + current_version=$(cat dev_dep_versions.yml | grep __tensorrt_version__ | sed 's/__tensorrt_version__: //g' | sed 's/"//g') + sed -i -e "s/tensorrt-cu12==${current_version}/tensorrt-cu12==${TENSORRT_VERSION}/g" \ + -e "s/tensorrt-cu12-bindings==${current_version}/tensorrt-cu12-bindings==${TENSORRT_VERSION}/g" \ + -e "s/tensorrt-cu12-libs==${current_version}/tensorrt-cu12-libs==${TENSORRT_VERSION}/g" \ + pyproject.toml +fi + if [[ "${CU_VERSION::4}" < "cu12" ]]; then # replace dependencies from tensorrt-cu12-bindings/libs to tensorrt-cu11-bindings/libs sed -i -e "s/tensorrt-cu12==/tensorrt-${CU_VERSION::4}==/g" \ @@ -24,7 +33,12 @@ pip install --force-reinstall --pre ${TORCH_TORCHVISION} --index-url ${INDEX_URL export CUDA_HOME="$(echo ${CUDA_PATH} | sed -e 's#\\#\/#g')" export TORCH_INSTALL_PATH="$(python -c "import torch, os; print(os.path.dirname(torch.__file__))" | sed -e 's#\\#\/#g')" -cat toolchains/ci_workspaces/MODULE.bazel.tmpl | envsubst > MODULE.bazel + +if [[ ${TENSORRT_VERSION} != "" ]]; then + cat toolchains/ci_workspaces/MODULE_tensorrt.bazel.tmpl | envsubst > MODULE.bazel +else + cat toolchains/ci_workspaces/MODULE.bazel.tmpl | envsubst > MODULE.bazel +fi cat MODULE.bazel echo "RELEASE=1" >> ${GITHUB_ENV} From 80a4a7f9b0e3543563d7ebb28862b048bb89993b Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 11 Nov 2024 21:30:57 -0800 Subject: [PATCH 20/31] test --- .github/workflows/build-test-linux.yml | 2 +- .github/workflows/build-test-windows.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml index c462069d61..1a32fa87c0 100644 --- a/.github/workflows/build-test-linux.yml +++ b/.github/workflows/build-test-linux.yml @@ -1,7 +1,7 @@ name: Build and test Linux wheels on: - pull_request: + # pull_request: push: branches: - main diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index c926b929ef..dd26da3388 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -1,7 +1,7 @@ name: Build and test Windows wheels on: - pull_request: + # pull_request: push: branches: - main From 78878f64dca725ad1402c145ad49f5f1f248b468 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Nov 2024 05:29:14 -0800 Subject: [PATCH 21/31] test --- .github/workflows/build-test-tensorrt-windows.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml index 1c4d2ef811..ec8cdfc47d 100644 --- a/.github/workflows/build-test-tensorrt-windows.yml +++ b/.github/workflows/build-test-tensorrt-windows.yml @@ -50,7 +50,8 @@ jobs: - name: Substitute runner id: substitute run: | - echo matrix="$(echo '${{ needs.generate-tensorrt-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} + # echo matrix="$(echo '${{ needs.generate-tensorrt-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} + echo matrix="$(echo '${{ needs.generate-tensorrt-matrix.outputs.matrix }}')" >> ${GITHUB_OUTPUT} build: needs: substitute-runner From a6246107366b41852eee6bb6dccc96b60dfdbec8 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Nov 2024 07:03:49 -0800 Subject: [PATCH 22/31] test --- .github/workflows/build-test-tensorrt-windows.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml index ec8cdfc47d..a07b846865 100644 --- a/.github/workflows/build-test-tensorrt-windows.yml +++ b/.github/workflows/build-test-tensorrt-windows.yml @@ -50,8 +50,7 @@ jobs: - name: Substitute runner id: substitute run: | - # echo matrix="$(echo '${{ needs.generate-tensorrt-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} - echo matrix="$(echo '${{ needs.generate-tensorrt-matrix.outputs.matrix }}')" >> ${GITHUB_OUTPUT} + echo matrix="$(echo '${{ needs.generate-tensorrt-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g6.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} build: needs: substitute-runner From 5b7352c7aaad461e0a9c1a2789ce2fdb17dcde6a Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Nov 2024 10:21:20 -0800 Subject: [PATCH 23/31] test with other windows nodes --- .github/workflows/build-test-tensorrt-windows.yml | 9 ++++++--- .github/workflows/windows-test.yml | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml index a07b846865..ecd899314c 100644 --- a/.github/workflows/build-test-tensorrt-windows.yml +++ b/.github/workflows/build-test-tensorrt-windows.yml @@ -50,7 +50,7 @@ jobs: - name: Substitute runner id: substitute run: | - echo matrix="$(echo '${{ needs.generate-tensorrt-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g6.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} + echo matrix="$(echo '${{ needs.generate-tensorrt-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} build: needs: substitute-runner @@ -102,6 +102,9 @@ jobs: export CI_BUILD=1 pushd . cd tests/modules + python -c "import torch; print(torch.cuda.is_available())" + python -c "import torch; print(torch.cuda.get_device_name(0))" + python -c "import torch; print(torch.cuda.get_device_properties(0))" python hub.py popd pushd . @@ -321,5 +324,5 @@ jobs: # popd concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true \ No newline at end of file + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}-2 + cancel-in-progress: false \ No newline at end of file diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index 14fa1d51c0..0026a13cf9 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -55,7 +55,7 @@ jobs: PYTHONUTF8: 1 DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_win_amd64 name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} - runs-on: windows.8xlarge.nvidia.gpu.nonephemeral + runs-on: windows.4xlarge.nvidia.gpu defaults: run: shell: bash -l {0} From d984c902ae600b4aef68be95314547eda6a14b35 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Nov 2024 10:59:35 -0800 Subject: [PATCH 24/31] test --- .github/workflows/windows-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index 0026a13cf9..b2fa3e06c4 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -55,7 +55,7 @@ jobs: PYTHONUTF8: 1 DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_win_amd64 name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} - runs-on: windows.4xlarge.nvidia.gpu + runs-on: windows.g5.4xlarge.nvidia.gpu defaults: run: shell: bash -l {0} From 1928e8a6bcd055b3dcd52105aab0171f232e2ff0 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Nov 2024 12:32:59 -0800 Subject: [PATCH 25/31] test --- py/torch_tensorrt/dynamo/lowering/_decompositions.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/py/torch_tensorrt/dynamo/lowering/_decompositions.py b/py/torch_tensorrt/dynamo/lowering/_decompositions.py index dda014890d..5fcccb5c77 100644 --- a/py/torch_tensorrt/dynamo/lowering/_decompositions.py +++ b/py/torch_tensorrt/dynamo/lowering/_decompositions.py @@ -4,8 +4,8 @@ import torch from torch._decomp import register_decomposition -from torch._export.utils import _decomp_table_to_post_autograd_aten from torch._ops import OpOverload +from torch.export import default_decompositions from torch_tensorrt.dynamo._defaults import default_device from torch_tensorrt.dynamo.conversion.converter_utils import get_positive_dim from torch_tensorrt.dynamo.utils import to_torch_device @@ -412,7 +412,8 @@ def get_decompositions( return {**CORE_ATEN_DECOMPOSITIONS_FILTERED, **TORCH_TRT_DECOMPOSITIONS} else: # changes made here due to torch2.6 changes https://github.com/pytorch/pytorch/pull/135080 - decomp_table = _decomp_table_to_post_autograd_aten() + # changes made here due to torch2.6 changes https://github.com/pytorch/pytorch/pull/140085 + decomp_table = default_decompositions() DECOMP_TABLE_FILTERED: Dict[OpOverload, Callable[[Any], Any]] = { decomp: decomp_table[decomp] for decomp in decomp_table From 8fc3482cb57d9ca6c6fb80f431ac1bf7b765b7a3 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Nov 2024 13:03:55 -0800 Subject: [PATCH 26/31] change decomposition default table due to upstream torch change --- py/torch_tensorrt/dynamo/lowering/_decompositions.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/py/torch_tensorrt/dynamo/lowering/_decompositions.py b/py/torch_tensorrt/dynamo/lowering/_decompositions.py index dda014890d..5fcccb5c77 100644 --- a/py/torch_tensorrt/dynamo/lowering/_decompositions.py +++ b/py/torch_tensorrt/dynamo/lowering/_decompositions.py @@ -4,8 +4,8 @@ import torch from torch._decomp import register_decomposition -from torch._export.utils import _decomp_table_to_post_autograd_aten from torch._ops import OpOverload +from torch.export import default_decompositions from torch_tensorrt.dynamo._defaults import default_device from torch_tensorrt.dynamo.conversion.converter_utils import get_positive_dim from torch_tensorrt.dynamo.utils import to_torch_device @@ -412,7 +412,8 @@ def get_decompositions( return {**CORE_ATEN_DECOMPOSITIONS_FILTERED, **TORCH_TRT_DECOMPOSITIONS} else: # changes made here due to torch2.6 changes https://github.com/pytorch/pytorch/pull/135080 - decomp_table = _decomp_table_to_post_autograd_aten() + # changes made here due to torch2.6 changes https://github.com/pytorch/pytorch/pull/140085 + decomp_table = default_decompositions() DECOMP_TABLE_FILTERED: Dict[OpOverload, Callable[[Any], Any]] = { decomp: decomp_table[decomp] for decomp in decomp_table From 4ed8e28c195a78a49e6d7a6718c7fe984f958c33 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Nov 2024 13:32:11 -0800 Subject: [PATCH 27/31] test --- .github/workflows/build-test-linux.yml | 2 +- .../workflows/build-test-tensorrt-windows.yml | 390 ++++++++---------- .github/workflows/build-test-windows.yml | 2 +- .github/workflows/windows-test.yml | 2 +- 4 files changed, 185 insertions(+), 211 deletions(-) diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml index 1a32fa87c0..c462069d61 100644 --- a/.github/workflows/build-test-linux.yml +++ b/.github/workflows/build-test-linux.yml @@ -1,7 +1,7 @@ name: Build and test Linux wheels on: - # pull_request: + pull_request: push: branches: - main diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml index ecd899314c..626c4374da 100644 --- a/.github/workflows/build-test-tensorrt-windows.yml +++ b/.github/workflows/build-test-tensorrt-windows.yml @@ -97,14 +97,10 @@ jobs: build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | - set -x export USE_HOST_DEPS=1 export CI_BUILD=1 pushd . cd tests/modules - python -c "import torch; print(torch.cuda.is_available())" - python -c "import torch; print(torch.cuda.get_device_name(0))" - python -c "import torch; print(torch.cuda.get_device_properties(0))" python hub.py popd pushd . @@ -114,215 +110,193 @@ jobs: python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ popd - # tests-py-dynamo-converters: - # name: Test dynamo converters [Python] - # needs: [generate-tensorrt-matrix, build] - # strategy: - # fail-fast: false - # matrix: - # include: - # - repository: pytorch/tensorrt - # package-name: torch_tensorrt - # pre-script: packaging/pre_build_script.sh - # post-script: packaging/post_build_script.sh - # smoke-test-script: packaging/smoke_test_script.sh - # uses: ./.github/workflows/linux-test.yml - # with: - # job-name: tests-py-dynamo-converters - # repository: "pytorch/tensorrt" - # ref: "" - # test-infra-repository: pytorch/test-infra - # test-infra-ref: main - # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - # pre-script: ${{ matrix.pre-script }} - # script: | - # export USE_HOST_DEPS=1 - # export CI_BUILD=1 - # pushd . - # cd tests/py/dynamo - # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/ - # popd + tests-py-dynamo-converters: + name: Test dynamo converters [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-converters + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/ + popd - # tests-py-dynamo-fe: - # name: Test dynamo frontend [Python] - # needs: [generate-tensorrt-matrix, build] - # strategy: - # fail-fast: false - # matrix: - # include: - # - repository: pytorch/tensorrt - # package-name: torch_tensorrt - # pre-script: packaging/pre_build_script.sh - # post-script: packaging/post_build_script.sh - # smoke-test-script: packaging/smoke_test_script.sh - # uses: ./.github/workflows/linux-test.yml - # with: - # job-name: tests-py-dynamo-fe - # repository: "pytorch/tensorrt" - # ref: "" - # test-infra-repository: pytorch/test-infra - # test-infra-ref: main - # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - # pre-script: ${{ matrix.pre-script }} - # script: | - # export USE_HOST_DEPS=1 - # export CI_BUILD=1 - # pushd . - # cd tests/py/dynamo - # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ - # popd + tests-py-dynamo-fe: + name: Test dynamo frontend [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-fe + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ + popd - # tests-py-dynamo-serde: - # name: Test dynamo export serde [Python] - # needs: [generate-tensorrt-matrix, build] - # strategy: - # fail-fast: false - # matrix: - # include: - # - repository: pytorch/tensorrt - # package-name: torch_tensorrt - # pre-script: packaging/pre_build_script.sh - # post-script: packaging/post_build_script.sh - # smoke-test-script: packaging/smoke_test_script.sh - # uses: ./.github/workflows/linux-test.yml - # with: - # job-name: tests-py-dynamo-serde - # repository: "pytorch/tensorrt" - # ref: "" - # test-infra-repository: pytorch/test-infra - # test-infra-ref: main - # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - # pre-script: ${{ matrix.pre-script }} - # script: | - # export USE_HOST_DEPS=1 - # export CI_BUILD=1 - # pushd . - # cd tests/py/dynamo - # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py - # popd + tests-py-dynamo-serde: + name: Test dynamo export serde [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-serde + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py + popd - # tests-py-torch-compile-be: - # name: Test torch compile backend [Python] - # needs: [generate-tensorrt-matrix, build] - # strategy: - # fail-fast: false - # matrix: - # include: - # - repository: pytorch/tensorrt - # package-name: torch_tensorrt - # pre-script: packaging/pre_build_script.sh - # post-script: packaging/post_build_script.sh - # smoke-test-script: packaging/smoke_test_script.sh - # uses: ./.github/workflows/linux-test.yml - # with: - # job-name: tests-py-torch-compile-be - # repository: "pytorch/tensorrt" - # ref: "" - # test-infra-repository: pytorch/test-infra - # test-infra-ref: main - # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - # pre-script: ${{ matrix.pre-script }} - # script: | - # export USE_HOST_DEPS=1 - # export CI_BUILD=1 - # pushd . - # cd tests/py/dynamo - # python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ - # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py - # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py - # popd + tests-py-torch-compile-be: + name: Test torch compile backend [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-torch-compile-be + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py + popd - # tests-py-dynamo-core: - # name: Test dynamo core [Python] - # needs: [generate-tensorrt-matrix, build] - # strategy: - # fail-fast: false - # matrix: - # include: - # - repository: pytorch/tensorrt - # package-name: torch_tensorrt - # pre-script: packaging/pre_build_script.sh - # post-script: packaging/post_build_script.sh - # smoke-test-script: packaging/smoke_test_script.sh - # uses: ./.github/workflows/linux-test.yml - # with: - # job-name: tests-py-dynamo-core - # repository: "pytorch/tensorrt" - # ref: "" - # test-infra-repository: pytorch/test-infra - # test-infra-ref: main - # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - # pre-script: ${{ matrix.pre-script }} - # script: | - # export USE_HOST_DEPS=1 - # export CI_BUILD=1 - # pushd . - # cd tests/py/dynamo - # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ - # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ - # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ - # popd + tests-py-dynamo-core: + name: Test dynamo core [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-core + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ + popd - # tests-py-dynamo-cudagraphs: - # name: Test dynamo cudagraphs [Python] - # needs: [generate-tensorrt-matrix, build] - # strategy: - # fail-fast: false - # matrix: - # include: - # - repository: pytorch/tensorrt - # package-name: torch_tensorrt - # pre-script: packaging/pre_build_script.sh - # post-script: packaging/post_build_script.sh - # smoke-test-script: packaging/smoke_test_script.sh - # uses: ./.github/workflows/linux-test.yml - # with: - # job-name: tests-py-dynamo-cudagraphs - # repository: "pytorch/tensorrt" - # ref: "" - # test-infra-repository: pytorch/test-infra - # test-infra-ref: main - # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - # pre-script: ${{ matrix.pre-script }} - # script: | - # export USE_HOST_DEPS=1 - # export CI_BUILD=1 - # pushd . - # cd tests/py/dynamo - # nvidia-smi - # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py || true - # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py || true - # popd + tests-py-dynamo-cudagraphs: + name: Test dynamo cudagraphs [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-cudagraphs + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py + popd - # tests-py-core: - # name: Test core [Python] - # needs: [generate-tensorrt-matrix, build] - # strategy: - # fail-fast: false - # matrix: - # include: - # - repository: pytorch/tensorrt - # package-name: torch_tensorrt - # pre-script: packaging/pre_build_script.sh - # post-script: packaging/post_build_script.sh - # smoke-test-script: packaging/smoke_test_script.sh - # uses: ./.github/workflows/linux-test.yml - # with: - # job-name: tests-py-core - # repository: "pytorch/tensorrt" - # ref: "" - # test-infra-repository: pytorch/test-infra - # test-infra-ref: main - # build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - # pre-script: ${{ matrix.pre-script }} - # script: | - # export USE_HOST_DEPS=1 - # export CI_BUILD=1 - # pushd . - # cd tests/py/core - # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . - # popd + tests-py-core: + name: Test core [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-core + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/core + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . + popd concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}-2 - cancel-in-progress: false \ No newline at end of file + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + cancel-in-progress: true \ No newline at end of file diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index dd26da3388..c926b929ef 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -1,7 +1,7 @@ name: Build and test Windows wheels on: - # pull_request: + pull_request: push: branches: - main diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index b2fa3e06c4..13feedfa8c 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -55,7 +55,7 @@ jobs: PYTHONUTF8: 1 DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_win_amd64 name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} - runs-on: windows.g5.4xlarge.nvidia.gpu + runs-on: ${{ matrix.validation_runner }} defaults: run: shell: bash -l {0} From 7e3c1faabedce2f7749b6586d9dcc54e439c8003 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Nov 2024 13:56:12 -0800 Subject: [PATCH 28/31] test --- .github/workflows/build-test-windows.yml | 32 ++++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index c926b929ef..10073d5ef8 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -66,7 +66,7 @@ jobs: tests-py-torchscript-fe: name: Test torchscript frontend [Python] - needs: [generate-matrix, build] + needs: [substitute-runner, build] strategy: fail-fast: false matrix: @@ -80,7 +80,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -98,7 +98,7 @@ jobs: tests-py-dynamo-converters: name: Test dynamo converters [Python] - needs: [generate-matrix, build] + needs: [substitute-runner, build] strategy: fail-fast: false matrix: @@ -112,7 +112,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -124,7 +124,7 @@ jobs: tests-py-dynamo-fe: name: Test dynamo frontend [Python] - needs: [generate-matrix, build] + needs: [substitute-runner, build] strategy: fail-fast: false matrix: @@ -138,7 +138,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -150,7 +150,7 @@ jobs: tests-py-dynamo-serde: name: Test dynamo export serde [Python] - needs: [generate-matrix, build] + needs: [substitute-runner, build] strategy: fail-fast: false matrix: @@ -164,7 +164,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -176,7 +176,7 @@ jobs: tests-py-torch-compile-be: name: Test torch compile backend [Python] - needs: [generate-matrix, build] + needs: [substitute-runner, build] strategy: fail-fast: false matrix: @@ -190,7 +190,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -204,7 +204,7 @@ jobs: tests-py-dynamo-core: name: Test dynamo core [Python] - needs: [generate-matrix, build] + needs: [substitute-runner, build] strategy: fail-fast: false matrix: @@ -218,7 +218,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -232,7 +232,7 @@ jobs: tests-py-dynamo-cudagraphs: name: Test dynamo cudagraphs [Python] - needs: [generate-matrix, build] + needs: [substitute-runner, build] strategy: fail-fast: false matrix: @@ -246,7 +246,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -259,7 +259,7 @@ jobs: tests-py-core: name: Test core [Python] - needs: [generate-matrix, build] + needs: [substitute-runner, build] strategy: fail-fast: false matrix: @@ -273,7 +273,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 From dbd740b1820fa10ff3cc05482e47a83d9170373b Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 13 Nov 2024 10:31:14 -0800 Subject: [PATCH 29/31] test --- .github/workflows/build-test-tensorrt-windows.yml | 14 +++++++------- packaging/pre_build_script_windows.sh | 4 +++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml index 626c4374da..7d19c0f051 100644 --- a/.github/workflows/build-test-tensorrt-windows.yml +++ b/.github/workflows/build-test-tensorrt-windows.yml @@ -126,7 +126,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -152,7 +152,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -178,7 +178,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -204,7 +204,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -232,7 +232,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -260,7 +260,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 @@ -287,7 +287,7 @@ jobs: ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index 246db69ed8..5e6f32f569 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -1,9 +1,11 @@ -set -exou pipefail +set -x pip install -U numpy packaging pyyaml setuptools wheel choco install bazelisk -y +echo TENSORRT_VERSION=${TENSORRT_VERSION} + if [[ ${TENSORRT_VERSION} != "" ]]; then # this is the upgraded TensorRT version, replace current tensorrt version to the upgrade tensorRT version in the pyproject.toml current_version=$(cat dev_dep_versions.yml | grep __tensorrt_version__ | sed 's/__tensorrt_version__: //g' | sed 's/"//g') From 1fdb135402482cd0e9df30a9fa208857de533f5c Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 14 Nov 2024 14:00:54 -0800 Subject: [PATCH 30/31] test --- .github/workflows/build-test-tensorrt-windows.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml index 7d19c0f051..b6eb1d765c 100644 --- a/.github/workflows/build-test-tensorrt-windows.yml +++ b/.github/workflows/build-test-tensorrt-windows.yml @@ -1,7 +1,6 @@ name: Build and Test Torch-TensorRT on Windows with Future TensorRT Versions on: - pull_request: workflow_dispatch: permissions: From bb9b2b648496f43559a23f7254d081664a5b8ad4 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 14 Nov 2024 14:01:45 -0800 Subject: [PATCH 31/31] test --- py/torch_tensorrt/dynamo/lowering/_decompositions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/py/torch_tensorrt/dynamo/lowering/_decompositions.py b/py/torch_tensorrt/dynamo/lowering/_decompositions.py index 5fcccb5c77..9f5dc0de66 100644 --- a/py/torch_tensorrt/dynamo/lowering/_decompositions.py +++ b/py/torch_tensorrt/dynamo/lowering/_decompositions.py @@ -412,7 +412,6 @@ def get_decompositions( return {**CORE_ATEN_DECOMPOSITIONS_FILTERED, **TORCH_TRT_DECOMPOSITIONS} else: # changes made here due to torch2.6 changes https://github.com/pytorch/pytorch/pull/135080 - # changes made here due to torch2.6 changes https://github.com/pytorch/pytorch/pull/140085 decomp_table = default_decompositions() DECOMP_TABLE_FILTERED: Dict[OpOverload, Callable[[Any], Any]] = { decomp: decomp_table[decomp]