Skip to content

Commit

Permalink
Merge branch 'main' into instance_norm_decomposition
Browse files Browse the repository at this point in the history
  • Loading branch information
HolyWu committed Nov 15, 2024
2 parents 3c9ac96 + 0841f34 commit 30293aa
Show file tree
Hide file tree
Showing 171 changed files with 2,652 additions and 358 deletions.
123 changes: 123 additions & 0 deletions .github/scripts/generate-tensorrt-test-matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/usr/bin/env python3

import argparse
import copy
import json
import sys

# please update the cuda version you want to test with the future tensorRT version here
# channel: nightly if the future tensorRT version test workflow is triggered from the main branch or your personal branch
# channel: test if the future tensorRT version test workflow is triggered from the release branch(release/2.5 etc....)
CUDA_VERSIONS_DICT = {
"nightly": ["cu124"],
"test": ["cu121", "cu124"],
"release": ["cu121", "cu124"],
}

# please update the python version you want to test with the future tensorRT version here
# channel: nightly if the future tensorRT version test workflow is triggered from the main branch or your personal branch
# channel: test if the future tensorRT version test workflow is triggered from the release branch(release/2.5 etc....)
PYTHON_VERSIONS_DICT = {
"nightly": ["3.9"],
"test": ["3.9", "3.10", "3.11", "3.12"],
"release": ["3.9", "3.10", "3.11", "3.12"],
}

# please update the future tensorRT version you want to test here
TENSORRT_VERSIONS_DICT = {
"windows": {
"10.4.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/zip/TensorRT-10.4.0.26.Windows.win10.cuda-12.6.zip",
"strip_prefix": "TensorRT-10.4.0.26",
"sha256": "3a7de83778b9e9f812fd8901e07e0d7d6fc54ce633fcff2e340f994df2c6356c",
},
"10.5.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/zip/TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip",
"strip_prefix": "TensorRT-10.5.0.18",
"sha256": "e6436f4164db4e44d727354dccf7d93755efb70d6fbfd6fa95bdfeb2e7331b24",
},
"10.6.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/zip/TensorRT-10.6.0.26.Windows.win10.cuda-12.6.zip",
"strip_prefix": "TensorRT-10.6.0.26",
"sha256": "6c6d92c108a1b3368423e8f69f08d31269830f1e4c9da43b37ba34a176797254",
},
},
"linux": {
"10.4.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/tars/TensorRT-10.4.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz",
"strip_prefix": "TensorRT-10.4.0.26",
"sha256": "cb0273ecb3ba4db8993a408eedd354712301a6c7f20704c52cdf9f78aa97bbdb",
},
"10.5.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz",
"strip_prefix": "TensorRT-10.5.0.18",
"sha256": "f404d379d639552a3e026cd5267213bd6df18a4eb899d6e47815bbdb34854958",
},
"10.6.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz",
"strip_prefix": "TensorRT-10.6.0.26",
"sha256": "33d3c2f3f4c84dc7991a4337a6fde9ed33f5c8e5c4f03ac2eb6b994a382b03a0",
},
},
}


def main(args: list[str]) -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--matrix",
help="matrix",
type=str,
default="",
)

options = parser.parse_args(args)
if options.matrix == "":
raise Exception("--matrix is empty, please provide the matrix json str")

matrix_dict = json.loads(options.matrix)
includes = matrix_dict["include"]
assert len(includes) > 0
if "channel" not in includes[0]:
raise Exception(f"channel field is missing from the matrix: {options.matrix}")
channel = includes[0]["channel"]
if channel not in ("nightly", "test", "release"):
raise Exception(
f"channel field: {channel} is not supported, currently supported value: nightly, test, release"
)

if "validation_runner" not in includes[0]:
raise Exception(
f"validation_runner field is missing from the matrix: {options.matrix}"
)
if "windows" in includes[0]["validation_runner"]:
arch = "windows"
elif "linux" in includes[0]["validation_runner"]:
arch = "linux"
else:
raise Exception(
f"{includes[0].validation_runner} is not the supported arch, currently only support windows and linux"
)

cuda_versions = CUDA_VERSIONS_DICT[channel]
python_versions = PYTHON_VERSIONS_DICT[channel]
tensorrt_versions = TENSORRT_VERSIONS_DICT[arch]

filtered_includes = []
for item in includes:
if (
item["desired_cuda"] in cuda_versions
and item["python_version"] in python_versions
):
for tensorrt_version, tensorrt_json in tensorrt_versions.items():
new_item = copy.deepcopy(item)
tensorrt_json["version"] = tensorrt_version
new_item["tensorrt"] = tensorrt_json
filtered_includes.append(new_item)
filtered_matrix_dict = {}
filtered_matrix_dict["include"] = filtered_includes
print(json.dumps(filtered_matrix_dict))


if __name__ == "__main__":
main(sys.argv[1:])
222 changes: 222 additions & 0 deletions .github/workflows/build-tensorrt-linux.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
name: Build Torch-TensorRT wheel on Linux with Future TensorRT Versions

on:
workflow_call:
inputs:
repository:
description: 'Repository to checkout, defaults to ""'
default: ""
type: string
ref:
description: 'Reference to checkout, defaults to "nightly"'
default: "nightly"
type: string
test-infra-repository:
description: "Test infra repository to use"
default: "pytorch/test-infra"
type: string
test-infra-ref:
description: "Test infra reference to use"
default: ""
type: string
build-matrix:
description: "Build matrix to utilize"
default: ""
type: string
pre-script:
description: "Pre script to run prior to build"
default: ""
type: string
post-script:
description: "Post script to run prior to build"
default: ""
type: string
smoke-test-script:
description: "Script for Smoke Test for a specific domain"
default: ""
type: string
env-var-script:
description: "Script that sets Domain-Specific Environment Variables"
default: ""
type: string
package-name:
description: "Name of the actual python package that is imported"
default: ""
type: string
trigger-event:
description: "Trigger Event in caller that determines whether or not to upload"
default: ""
type: string
cache-path:
description: "The path(s) on the runner to cache or restore. The path is relative to repository."
default: ""
type: string
cache-key:
description: "The key created when saving a cache and the key used to search for a cache."
default: ""
type: string
architecture:
description: Architecture to build for x86_64 for default Linux, or aarch64 for Linux aarch64 builds
required: false
type: string
default: x86_64
submodules:
description: Works as stated in actions/checkout, but the default value is recursive
required: false
type: string
default: recursive
setup-miniconda:
description: Set to true if setup-miniconda is needed
required: false
type: boolean
default: true

permissions:
id-token: write
contents: read

jobs:
build:
strategy:
fail-fast: false
matrix: ${{ fromJSON(inputs.build-matrix) }}
env:
PYTHON_VERSION: ${{ matrix.python_version }}
PACKAGE_TYPE: wheel
REPOSITORY: ${{ inputs.repository }}
REF: ${{ inputs.ref }}
CU_VERSION: ${{ matrix.desired_cuda }}
UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }}
ARCH: ${{ inputs.architecture }}
TENSORRT_STRIP_PREFIX: ${{ matrix.tensorrt.strip_prefix }}
TENSORRT_VERSION: ${{ matrix.tensorrt.version }}
TENSORRT_URLS: ${{ matrix.tensorrt.urls }}
TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }}
UPLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_${{ inputs.architecture }}
name: build_tensorrt${{ matrix.tensorrt.version }}_py${{matrix.python_version}}_${{matrix.desired_cuda}}
runs-on: ${{ matrix.validation_runner }}
container:
image: ${{ matrix.container_image }}
options: ${{ matrix.gpu_arch_type == 'cuda' && '--gpus all' || ' ' }}
# If a build is taking longer than 120 minutes on these runners we need
# to have a conversation
timeout-minutes: 120

steps:
- name: Clean workspace
shell: bash -l {0}
run: |
set -x
echo "::group::Cleanup debug output"
rm -rf "${GITHUB_WORKSPACE}"
mkdir -p "${GITHUB_WORKSPACE}"
if [[ "${{ inputs.architecture }}" = "aarch64" ]]; then
rm -rf "${RUNNER_TEMP}/*"
fi
echo "::endgroup::"
- uses: actions/checkout@v3
with:
# Support the use case where we need to checkout someone's fork
repository: ${{ inputs.test-infra-repository }}
ref: ${{ inputs.test-infra-ref }}
path: test-infra
- uses: actions/checkout@v3
if: ${{ env.ARCH == 'aarch64' }}
with:
# Support the use case where we need to checkout someone's fork
repository: "pytorch/builder"
ref: "main"
path: builder
- name: Set linux aarch64 CI
if: ${{ inputs.architecture == 'aarch64' }}
shell: bash -l {0}
env:
DESIRED_PYTHON: ${{ matrix.python_version }}
run: |
set +e
# TODO: This is temporary aarch64 setup script, this should be integrated into aarch64 docker.
${GITHUB_WORKSPACE}/builder/aarch64_linux/aarch64_ci_setup.sh
echo "/opt/conda/bin" >> $GITHUB_PATH
set -e
- uses: ./test-infra/.github/actions/set-channel
- name: Set PYTORCH_VERSION
if: ${{ env.CHANNEL == 'test' }}
run: |
# When building RC, set the version to be the current candidate version,
# otherwise, leave it alone so nightly will pick up the latest
echo "PYTORCH_VERSION=${{ matrix.stable_version }}" >> "${GITHUB_ENV}"
- uses: ./test-infra/.github/actions/setup-binary-builds
env:
PLATFORM: ${{ inputs.architecture == 'aarch64' && 'linux-aarch64' || ''}}
with:
repository: ${{ inputs.repository }}
ref: ${{ inputs.ref }}
submodules: ${{ inputs.submodules }}
setup-miniconda: ${{ inputs.setup-miniconda }}
python-version: ${{ env.PYTHON_VERSION }}
cuda-version: ${{ env.CU_VERSION }}
arch: ${{ env.ARCH }}
- name: Combine Env Var and Build Env Files
if: ${{ inputs.env-var-script != '' }}
working-directory: ${{ inputs.repository }}
shell: bash -l {0}
run: |
cat "${{ inputs.env-var-script }}" >> "${BUILD_ENV_FILE}"
- name: Install torch dependency
shell: bash -l {0}
run: |
set -x
# shellcheck disable=SC1090
source "${BUILD_ENV_FILE}"
# shellcheck disable=SC2086
${CONDA_RUN} ${PIP_INSTALL_TORCH}
- name: Run Pre-Script with Caching
if: ${{ inputs.pre-script != '' }}
uses: ./test-infra/.github/actions/run-script-with-cache
with:
cache-path: ${{ inputs.cache-path }}
cache-key: ${{ inputs.cache-key }}
repository: ${{ inputs.repository }}
script: ${{ inputs.pre-script }}
- name: Build clean
working-directory: ${{ inputs.repository }}
shell: bash -l {0}
run: |
set -x
source "${BUILD_ENV_FILE}"
${CONDA_RUN} python setup.py clean
- name: Build the wheel (bdist_wheel)
working-directory: ${{ inputs.repository }}
shell: bash -l {0}
run: |
set -x
source "${BUILD_ENV_FILE}"
${CONDA_RUN} python setup.py bdist_wheel
- name: Run Post-Script
if: ${{ inputs.post-script != '' }}
uses: ./test-infra/.github/actions/run-script-with-cache
with:
repository: ${{ inputs.repository }}
script: ${{ inputs.post-script }}
- name: Smoke Test
shell: bash -l {0}
env:
PACKAGE_NAME: ${{ inputs.package-name }}
SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }}
run: |
set -x
source "${BUILD_ENV_FILE}"
# TODO: add smoke test for the auditwheel tarball built
# NB: Only upload to GitHub after passing smoke tests
- name: Upload wheel to GitHub
continue-on-error: true
uses: actions/upload-artifact@v3
with:
name: ${{ env.UPLOAD_ARTIFACT_NAME }}
path: ${{ inputs.repository }}/dist

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
cancel-in-progress: true
Loading

0 comments on commit 30293aa

Please sign in to comment.