From b70c817c1712b14f7df910c9210005de14f60704 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Mon, 2 Dec 2024 13:34:08 +0100 Subject: [PATCH 1/6] secure ci --- .github/workflows/test_cli_rocm_pytorch.yaml | 56 ++++---------------- 1 file changed, 10 insertions(+), 46 deletions(-) diff --git a/.github/workflows/test_cli_rocm_pytorch.yaml b/.github/workflows/test_cli_rocm_pytorch.yaml index a212da5b..2a81cc24 100644 --- a/.github/workflows/test_cli_rocm_pytorch.yaml +++ b/.github/workflows/test_cli_rocm_pytorch.yaml @@ -31,27 +31,11 @@ jobs: contains( github.event.pull_request.labels.*.name, 'cli_rocm_pytorch_single_gpu') }} - runs-on: [self-hosted, amd-gpu, single-gpu] - - container: - image: ghcr.io/huggingface/optimum-benchmark:latest-rocm - options: --ipc host - --group-add video - --device /dev/kfd - --device /dev/dri - --env ROCR_VISIBLE_DEVICES - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install dependencies - run: | - pip install -e .[testing,diffusers,timm,peft,autoawq,auto-gptq] - - - name: Run tests - run: | - pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb" + uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@main + with: + machine_type: single-gpu + install_extras: testing,diffusers,timm,peft,autoawq,auto-gptq + pytest_keywords: "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb" run_cli_rocm_pytorch_multi_gpu_tests: if: ${{ @@ -64,28 +48,8 @@ jobs: contains( github.event.pull_request.labels.*.name, 'cli_rocm_pytorch_multi_gpu') }} - runs-on: [self-hosted, amd-gpu, multi-gpu] - - container: - image: ghcr.io/huggingface/optimum-benchmark:latest-rocm - options: --ipc host - --group-add video - --device /dev/kfd - --device /dev/dri - --env ROCR_VISIBLE_DEVICES - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install dependencies - run: | - pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq] "deepspeed<0.15" - - - name: Run tests (parallel) - run: | - pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map)" - - - name: Run tests (sequential) - run: | - FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (deepspeed_inference)" + uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@main + with: + machine_type: multi-gpu + install_extras: testing,diffusers,timm,peft,autoawq,auto-gptq + pytest_keywords: "cli and cuda and pytorch and (dp or ddp or device_map)" From c788f82d715244bc5792eabbc768cfa8f2773579 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 3 Dec 2024 10:51:10 +0100 Subject: [PATCH 2/6] api ci --- .github/workflows/test_api_rocm.yaml | 31 +++++----------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/.github/workflows/test_api_rocm.yaml b/.github/workflows/test_api_rocm.yaml index 170c2a0e..5ac41462 100644 --- a/.github/workflows/test_api_rocm.yaml +++ b/.github/workflows/test_api_rocm.yaml @@ -29,29 +29,8 @@ jobs: contains( github.event.pull_request.labels.*.name, 'api_rocm') }} - runs-on: [self-hosted, amd-gpu, single-gpu] - - container: - image: ghcr.io/huggingface/optimum-benchmark:latest-rocm - options: --ipc host - --shm-size "16gb" - --group-add video - --device /dev/kfd - --device /dev/dri - --env ROCR_VISIBLE_DEVICES - --env HIP_VISIBLE_DEVICES=0 - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Install dependencies - run: | - pip install -e .[testing,timm,diffusers,codecarbon] - - - name: Run tests - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - PUSH_REPO_ID: optimum-benchmark/rocm - run: | - pytest tests/test_api.py -x -s -k "api and cuda" + uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@main + with: + machine_type: single-gpu + install_extras: timm,diffusers,codecarbon + pytest_keywords: "api and cuda" From e5d1045b3151e0f5d2c6054c551dcd3618c7c781 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 3 Dec 2024 11:14:30 +0100 Subject: [PATCH 3/6] fix --- .github/workflows/test_api_rocm.yaml | 4 ++-- .github/workflows/test_cli_rocm_pytorch.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_api_rocm.yaml b/.github/workflows/test_api_rocm.yaml index 5ac41462..0a073604 100644 --- a/.github/workflows/test_api_rocm.yaml +++ b/.github/workflows/test_api_rocm.yaml @@ -32,5 +32,5 @@ jobs: uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@main with: machine_type: single-gpu - install_extras: timm,diffusers,codecarbon - pytest_keywords: "api and cuda" + install_extras: testing,timm,diffusers,codecarbon + pytest_keywords: api and cuda diff --git a/.github/workflows/test_cli_rocm_pytorch.yaml b/.github/workflows/test_cli_rocm_pytorch.yaml index 2a81cc24..bd01aa2a 100644 --- a/.github/workflows/test_cli_rocm_pytorch.yaml +++ b/.github/workflows/test_cli_rocm_pytorch.yaml @@ -35,7 +35,7 @@ jobs: with: machine_type: single-gpu install_extras: testing,diffusers,timm,peft,autoawq,auto-gptq - pytest_keywords: "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb" + pytest_keywords: cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb run_cli_rocm_pytorch_multi_gpu_tests: if: ${{ @@ -52,4 +52,4 @@ jobs: with: machine_type: multi-gpu install_extras: testing,diffusers,timm,peft,autoawq,auto-gptq - pytest_keywords: "cli and cuda and pytorch and (dp or ddp or device_map)" + pytest_keywords: cli and cuda and pytorch and (dp or ddp or device_map) From 06c1f7b35183a4b58e8f4f710970c10abec18a46 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 3 Dec 2024 11:17:51 +0100 Subject: [PATCH 4/6] add secrets --- .github/workflows/test_api_rocm.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test_api_rocm.yaml b/.github/workflows/test_api_rocm.yaml index 0a073604..cc0645d7 100644 --- a/.github/workflows/test_api_rocm.yaml +++ b/.github/workflows/test_api_rocm.yaml @@ -34,3 +34,5 @@ jobs: machine_type: single-gpu install_extras: testing,timm,diffusers,codecarbon pytest_keywords: api and cuda + secrets: + HF_TOKEN: ${{ secrets.HF_TOKEN }} From 72dd85e699aa856318e142db53483739d5f6e69f Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 3 Dec 2024 15:52:45 +0100 Subject: [PATCH 5/6] fix --- .github/workflows/test_cli_rocm_pytorch.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_cli_rocm_pytorch.yaml b/.github/workflows/test_cli_rocm_pytorch.yaml index bd01aa2a..112cb924 100644 --- a/.github/workflows/test_cli_rocm_pytorch.yaml +++ b/.github/workflows/test_cli_rocm_pytorch.yaml @@ -51,5 +51,5 @@ jobs: uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@main with: machine_type: multi-gpu - install_extras: testing,diffusers,timm,peft,autoawq,auto-gptq + install_extras: testing,diffusers,timm,peft pytest_keywords: cli and cuda and pytorch and (dp or ddp or device_map) From f5b5f2b545c0ac755b50e212515bfe4f7e65a607 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Mon, 9 Dec 2024 14:09:39 +0100 Subject: [PATCH 6/6] test mi250 --- .github/workflows/test_api_rocm.yaml | 2 +- .github/workflows/test_cli_rocm_pytorch.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_api_rocm.yaml b/.github/workflows/test_api_rocm.yaml index cc0645d7..f6f20aa4 100644 --- a/.github/workflows/test_api_rocm.yaml +++ b/.github/workflows/test_api_rocm.yaml @@ -29,7 +29,7 @@ jobs: contains( github.event.pull_request.labels.*.name, 'api_rocm') }} - uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@main + uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@testing with: machine_type: single-gpu install_extras: testing,timm,diffusers,codecarbon diff --git a/.github/workflows/test_cli_rocm_pytorch.yaml b/.github/workflows/test_cli_rocm_pytorch.yaml index 112cb924..3057b726 100644 --- a/.github/workflows/test_cli_rocm_pytorch.yaml +++ b/.github/workflows/test_cli_rocm_pytorch.yaml @@ -31,7 +31,7 @@ jobs: contains( github.event.pull_request.labels.*.name, 'cli_rocm_pytorch_single_gpu') }} - uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@main + uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@testing with: machine_type: single-gpu install_extras: testing,diffusers,timm,peft,autoawq,auto-gptq @@ -48,7 +48,7 @@ jobs: contains( github.event.pull_request.labels.*.name, 'cli_rocm_pytorch_multi_gpu') }} - uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@main + uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@testing with: machine_type: multi-gpu install_extras: testing,diffusers,timm,peft