Skip to content

Commit

Permalink
Disable test_nvidia_a100 + test_amd_mi250 jobs until runners are onli…
Browse files Browse the repository at this point in the history
…ne. (iree-org#17549)

Similar to iree-org#17527 which disabled
the benchmark job.

The a100 jobs have been queueing then timing out for over a week: 

![image](https://github.com/iree-org/iree/assets/4010439/d77c8324-0d71-465e-a8fd-f7a6c32fc90b)

The mi250 runner also went offline this morning.

skip-ci: config change
  • Loading branch information
ScottTodd authored Jun 3, 2024
1 parent 778d00d commit 7b319cb
Showing 1 changed file with 106 additions and 104 deletions.
210 changes: 106 additions & 104 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -326,108 +326,110 @@ jobs:
./build_tools/scripts/check_vulkan.sh
./build_tools/cmake/ctest_all.sh ${BUILD_DIR}"
test_nvidia_a100:
needs: [setup, build_all]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_nvidia_a100')
env:
BUILD_DIR: build-tests
INSTALL_DIR: ${{ needs.build_all.outputs.install-dir }}
INSTALL_DIR_ARCHIVE: ${{ needs.build_all.outputs.install-dir-archive }}
INSTALL_DIR_GCS_URL: ${{ needs.build_all.outputs.install-dir-gcs-url }}
IREE_CPU_DISABLE: 1
IREE_VULKAN_DISABLE: 0
IREE_CUDA_DISABLE: 0
IREE_HIP_DISABLE: 1
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
- environment=${{ needs.setup.outputs.runner-env }}
- a100
- os-family=Linux
steps:
- name: "Checking out repository"
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
- name: "Checking out runtime submodules"
run: ./build_tools/scripts/git/update_runtime_submodules.sh
- name: Querying GPU information
run: |
./build_tools/scripts/check_cuda.sh
./build_tools/scripts/check_vulkan.sh
- name: "Downloading install dir archive"
run: wget "${INSTALL_DIR_GCS_URL}" -O "${INSTALL_DIR_ARCHIVE}"
- name: "Extracting install directory"
run: tar -xf "${INSTALL_DIR_ARCHIVE}"
- name: "Building tests"
run: |
./build_tools/github_actions/docker_run.sh \
--env IREE_CPU_DISABLE \
--env IREE_VULKAN_DISABLE \
--env IREE_CUDA_DISABLE \
--env IREE_HIP_DISABLE \
gcr.io/iree-oss/nvidia@sha256:82fa00b5cdda1b35634796cd0f88cb5d6d22d80328b94bfb51e5f2820598ba23 \
./build_tools/pkgci/build_tests_using_package.sh ${INSTALL_DIR}
- name: "Running GPU tests"
env:
IREE_CTEST_LABEL_REGEX: ^requires-gpu-sm80|^requires-gpu|^driver=vulkan$|^driver=cuda$
IREE_NVIDIA_SM80_TESTS_DISABLE: 0
IREE_MULTI_DEVICE_TESTS_DISABLE: 1
run: |
./build_tools/github_actions/docker_run.sh \
--env IREE_VULKAN_DISABLE \
--env IREE_CUDA_DISABLE \
--env IREE_HIP_DISABLE \
--env IREE_CTEST_LABEL_REGEX \
--env IREE_NVIDIA_SM80_TESTS_DISABLE \
--env IREE_MULTI_DEVICE_TESTS_DISABLE \
--env IREE_VULKAN_F16_DISABLE=0 \
--env IREE_NVIDIA_GPU_TESTS_DISABLE=0 \
--env CTEST_PARALLEL_LEVEL=4 \
--env NVIDIA_DRIVER_CAPABILITIES=all \
--gpus all \
gcr.io/iree-oss/nvidia@sha256:82fa00b5cdda1b35634796cd0f88cb5d6d22d80328b94bfb51e5f2820598ba23 \
bash -euo pipefail -c \
"./build_tools/scripts/check_cuda.sh
./build_tools/scripts/check_vulkan.sh
./build_tools/cmake/ctest_all.sh ${BUILD_DIR}"
test_amd_mi250:
needs: [setup, build_all]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_amd_mi250')
env:
BUILD_DIR: build-tests
INSTALL_DIR: ${{ needs.build_all.outputs.install-dir }}
INSTALL_DIR_ARCHIVE: ${{ needs.build_all.outputs.install-dir-archive }}
INSTALL_DIR_GCS_URL: ${{ needs.build_all.outputs.install-dir-gcs-url }}
IREE_CPU_DISABLE: 1
IREE_VULKAN_DISABLE: 1
IREE_CUDA_DISABLE: 1
IREE_HIP_DISABLE: 0
IREE_HIP_TEST_TARGET_CHIP: "gfx90a"
runs-on: nodai-amdgpu-mi250-x86-64
steps:
- name: "Checking out repository"
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
- name: "Checking out runtime submodules"
run: ./build_tools/scripts/git/update_runtime_submodules.sh
- name: "Downloading install dir archive"
run: wget "${INSTALL_DIR_GCS_URL}" -O "${INSTALL_DIR_ARCHIVE}"
- name: "Extracting install directory"
run: tar -xf "${INSTALL_DIR_ARCHIVE}"
- name: "Building tests"
run: |
./build_tools/pkgci/build_tests_using_package.sh ${INSTALL_DIR}
- name: "Running GPU tests"
env:
IREE_CTEST_LABEL_REGEX: ^requires-gpu|^driver=hip$
IREE_NVIDIA_SM80_TESTS_DISABLE: 1
IREE_MULTI_DEVICE_TESTS_DISABLE: 0
IREE_AMD_RDNA3_TESTS_DISABLE: 1
IREE_NVIDIA_GPU_TESTS_DISABLE: 0
IREE_CUDA_DISABLE: 1
IREE_CPU_DISABLE: 1
IREE_HIP_DISABLE: 0
run: |
./build_tools/cmake/ctest_all.sh ${BUILD_DIR}
# TODO: re-enable when a100 runners are available again
# test_nvidia_a100:
# needs: [setup, build_all]
# if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_nvidia_a100')
# env:
# BUILD_DIR: build-tests
# INSTALL_DIR: ${{ needs.build_all.outputs.install-dir }}
# INSTALL_DIR_ARCHIVE: ${{ needs.build_all.outputs.install-dir-archive }}
# INSTALL_DIR_GCS_URL: ${{ needs.build_all.outputs.install-dir-gcs-url }}
# IREE_CPU_DISABLE: 1
# IREE_VULKAN_DISABLE: 0
# IREE_CUDA_DISABLE: 0
# IREE_HIP_DISABLE: 1
# runs-on:
# - self-hosted # must come first
# - runner-group=${{ needs.setup.outputs.runner-group }}
# - environment=${{ needs.setup.outputs.runner-env }}
# - a100
# - os-family=Linux
# steps:
# - name: "Checking out repository"
# uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
# - name: "Checking out runtime submodules"
# run: ./build_tools/scripts/git/update_runtime_submodules.sh
# - name: Querying GPU information
# run: |
# ./build_tools/scripts/check_cuda.sh
# ./build_tools/scripts/check_vulkan.sh
# - name: "Downloading install dir archive"
# run: wget "${INSTALL_DIR_GCS_URL}" -O "${INSTALL_DIR_ARCHIVE}"
# - name: "Extracting install directory"
# run: tar -xf "${INSTALL_DIR_ARCHIVE}"
# - name: "Building tests"
# run: |
# ./build_tools/github_actions/docker_run.sh \
# --env IREE_CPU_DISABLE \
# --env IREE_VULKAN_DISABLE \
# --env IREE_CUDA_DISABLE \
# --env IREE_HIP_DISABLE \
# gcr.io/iree-oss/nvidia@sha256:82fa00b5cdda1b35634796cd0f88cb5d6d22d80328b94bfb51e5f2820598ba23 \
# ./build_tools/pkgci/build_tests_using_package.sh ${INSTALL_DIR}
# - name: "Running GPU tests"
# env:
# IREE_CTEST_LABEL_REGEX: ^requires-gpu-sm80|^requires-gpu|^driver=vulkan$|^driver=cuda$
# IREE_NVIDIA_SM80_TESTS_DISABLE: 0
# IREE_MULTI_DEVICE_TESTS_DISABLE: 1
# run: |
# ./build_tools/github_actions/docker_run.sh \
# --env IREE_VULKAN_DISABLE \
# --env IREE_CUDA_DISABLE \
# --env IREE_HIP_DISABLE \
# --env IREE_CTEST_LABEL_REGEX \
# --env IREE_NVIDIA_SM80_TESTS_DISABLE \
# --env IREE_MULTI_DEVICE_TESTS_DISABLE \
# --env IREE_VULKAN_F16_DISABLE=0 \
# --env IREE_NVIDIA_GPU_TESTS_DISABLE=0 \
# --env CTEST_PARALLEL_LEVEL=4 \
# --env NVIDIA_DRIVER_CAPABILITIES=all \
# --gpus all \
# gcr.io/iree-oss/nvidia@sha256:82fa00b5cdda1b35634796cd0f88cb5d6d22d80328b94bfb51e5f2820598ba23 \
# bash -euo pipefail -c \
# "./build_tools/scripts/check_cuda.sh
# ./build_tools/scripts/check_vulkan.sh
# ./build_tools/cmake/ctest_all.sh ${BUILD_DIR}"

# TODO: re-enable when mi250 runners are available again
# test_amd_mi250:
# needs: [setup, build_all]
# if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_amd_mi250')
# env:
# BUILD_DIR: build-tests
# INSTALL_DIR: ${{ needs.build_all.outputs.install-dir }}
# INSTALL_DIR_ARCHIVE: ${{ needs.build_all.outputs.install-dir-archive }}
# INSTALL_DIR_GCS_URL: ${{ needs.build_all.outputs.install-dir-gcs-url }}
# IREE_CPU_DISABLE: 1
# IREE_VULKAN_DISABLE: 1
# IREE_CUDA_DISABLE: 1
# IREE_HIP_DISABLE: 0
# IREE_HIP_TEST_TARGET_CHIP: "gfx90a"
# runs-on: nodai-amdgpu-mi250-x86-64
# steps:
# - name: "Checking out repository"
# uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
# - name: "Checking out runtime submodules"
# run: ./build_tools/scripts/git/update_runtime_submodules.sh
# - name: "Downloading install dir archive"
# run: wget "${INSTALL_DIR_GCS_URL}" -O "${INSTALL_DIR_ARCHIVE}"
# - name: "Extracting install directory"
# run: tar -xf "${INSTALL_DIR_ARCHIVE}"
# - name: "Building tests"
# run: |
# ./build_tools/pkgci/build_tests_using_package.sh ${INSTALL_DIR}
# - name: "Running GPU tests"
# env:
# IREE_CTEST_LABEL_REGEX: ^requires-gpu|^driver=hip$
# IREE_NVIDIA_SM80_TESTS_DISABLE: 1
# IREE_MULTI_DEVICE_TESTS_DISABLE: 0
# IREE_AMD_RDNA3_TESTS_DISABLE: 1
# IREE_NVIDIA_GPU_TESTS_DISABLE: 0
# IREE_CUDA_DISABLE: 1
# IREE_CPU_DISABLE: 1
# IREE_HIP_DISABLE: 0
# run: |
# ./build_tools/cmake/ctest_all.sh ${BUILD_DIR}

# TODO(saienduri): re-enable when iree/hal/drivers/hip/dynamic_symbols_test is fixed
# test_amd_w7900:
Expand Down Expand Up @@ -919,8 +921,8 @@ jobs:

# Accelerators
- test_nvidia_gpu
- test_nvidia_a100
- test_amd_mi250
# - test_nvidia_a100
# - test_amd_mi250
# - test_amd_w7900

# Configurations
Expand Down

0 comments on commit 7b319cb

Please sign in to comment.