From 5cc512713657609cbaf9c48ad1d363236836f231 Mon Sep 17 00:00:00 2001 From: Quentin Bourgerie Date: Thu, 12 Dec 2024 14:46:37 +0100 Subject: [PATCH] chore(ci): GPU benchmarks --- .../concrete_compiler_benchmark_gpu.yml | 181 ++++++++++++++++++ ci/slab.toml | 7 + 2 files changed, 188 insertions(+) create mode 100644 .github/workflows/concrete_compiler_benchmark_gpu.yml diff --git a/.github/workflows/concrete_compiler_benchmark_gpu.yml b/.github/workflows/concrete_compiler_benchmark_gpu.yml new file mode 100644 index 0000000000..08455f9db1 --- /dev/null +++ b/.github/workflows/concrete_compiler_benchmark_gpu.yml @@ -0,0 +1,181 @@ +name: concrete-compiler benchmark linux-gpu + +on: + workflow_dispatch: + pull_request: + paths: + - .github/workflows/concrete_compiler_benchmark_gpu.yml + - backends/concrete-cuda/** + push: + branches: + - 'main' + - 'release/*' + +env: + DOCKER_IMAGE_TEST: ghcr.io/zama-ai/concrete-compiler + CUDA_VERSION: 11.8 + GCC_VERSION: 8 + ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + +concurrency: + group: concrete_compiler_benchmark_gpu-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +jobs: + setup-instance: + runs-on: ubuntu-latest + outputs: + runner-name: ${{ steps.start-instance.outputs.label }} + steps: + - name: Start instance + id: start-instance + uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7 # v1.3.0 + with: + mode: start + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + backend: aws + profile: gpu-bench + + build-and-run-benchmarks: + needs: setup-instance + runs-on: ${{ needs.setup-instance.outputs.runner-name }} + outputs: + bench_date: ${{ steps.benchmark-details.outputs.bench_date }} + commit_date: ${{ steps.benchmark-details.outputs.commit_date }} + commit_hash: ${{ steps.benchmark-details.outputs.commit_hash }} + env: + CUDA_SUPPORT: ON + CUDA_PATH: /usr/local/cuda-${{ github.env.CUDA_VERSION }} + CC: /usr/bin/gcc-${{ github.env.GCC_VERSION }} + CXX: /usr/bin/g++-${{ github.env.GCC_VERSION }} + CUDAHOSTCXX: /usr/bin/g++-${{ github.env.GCC_VERSION }} + CUDACXX: ${{ github.env.CUDA_PATH }}/bin/nvcc + steps: + - name: Setup GPU environment + run: | + echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" + echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}" + env + - name: Checkout concrete + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: recursive + fetch-depth: 0 + - name: Ouput benchmark details + id: benchmark-details + run: | + { + echo "bench_date=$(date --iso-8601=seconds)"; + echo "commit_date=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; + echo "commit_hash=$(git describe --tags --dirty)"; + } >> "$GITHUB_OUTPUT" + - name: Set up home + # "Install rust" step require root user to have a HOME directory which is not set. + run: | + echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" + - name: Setup rust toolchain for concrete-cpu + uses: ./.github/actions/setup_rust_toolchain_for_concrete_cpu + - name: Build compiler benchmarks + run: | + set -e + git config --global --add safe.directory '*' + cd compilers/concrete-compiler/compiler + make BINDINGS_PYTHON_ENABLED=OFF CUDA_SUPPORT=${{ env.CUDA_SUPPORT }} build-benchmarks + - name: Run compiler benchmarks + run: | + set -e + cd compilers/concrete-compiler/compiler + make run-gpu-benchmarks + - name: Upload raw results artifact + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + with: + name: compiler-benchmarks-result + path: compilers/concrete-compiler/compiler/benchmarks_results.json + - name: Slack Notification + if: ${{ failure() && github.ref == 'refs/heads/main' }} + continue-on-error: true + uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 + env: + SLACK_COLOR: ${{ job.status }} + SLACK_MESSAGE: "build-and-run-benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + + parse-and-send-results: + name: Parse and send results + needs: [setup-instance, build-and-run-benchmarks] + runs-on: ${{ needs.setup-instance.outputs.runner-name }} + steps: + - name: Download compiler-benchmarks-result + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: compiler-benchmarks-result + - name: Parse results + shell: bash + run: | + # TODO output setup-instance (https://github.com/zama-ai/slab-github-runner/issues/38) + python3 ./ci/benchmark_parser.py benchmarks_results.json parsed_benchmark_results.json \ + --database compiler_benchmarks \ + --hardware "p3.8xlarge" \ + --project-version ${{ needs.build-and-run-benchmarks.outputs.commit_hash}} \ + --branch ${{ github.ref_name }} \ + --commit-date "${{ needs.build-and-run-benchmarks.outputs.commit_date }}" \ + --bench-date "${{ needs.build-and-run-benchmarks.outputs.bench_date }}" \ + --throughput + - name: Upload parsed results artifact + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + with: + name: compiler-benchmarks-parsed-result + path: parsed_benchmark_results.json + - name: Checkout Slab repo + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: zama-ai/slab + path: slab + token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }} + - name: Send data to Slab + shell: bash + run: | + echo "Computing HMac on downloaded artifact" + SIGNATURE="$(slab/scripts/hmac_calculator.sh parsed_benchmark_results.json ${{ secrets.JOB_SECRET }})" + echo "Sending results to Slab..." + curl -v -k \ + -H "Content-Type: application/json" \ + -H "X-Slab-Repository: ${{ github.repository }}" \ + -H "X-Slab-Command: store_data" \ + -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \ + -d @parsed_benchmark_results.json \ + ${{ secrets.SLAB_URL }} + - name: Slack Notification + if: ${{ failure() && github.ref == 'refs/heads/main' }} + continue-on-error: true + uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 + env: + SLACK_COLOR: ${{ job.status }} + SLACK_MESSAGE: "parse-and-send-results finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + + teardown-instance: + name: Teardown instance + needs: [ setup-instance, parse-and-send-results ] + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + runs-on: ubuntu-latest + steps: + - name: Stop instance + id: stop-instance + uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7 # v1.3.0 + with: + mode: stop + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + label: ${{ needs.setup-instance.outputs.runner-name }} + - name: Slack Notification + if: ${{ failure() }} + continue-on-error: true + uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 + env: + SLACK_COLOR: ${{ job.status }} + SLACK_MESSAGE: "Instance teardown finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/ci/slab.toml b/ci/slab.toml index aa62a7d072..63ba98d446 100644 --- a/ci/slab.toml +++ b/ci/slab.toml @@ -16,6 +16,13 @@ instance_type = "p3.2xlarge" subnet_id = "subnet-8123c9e7" security_group= ["sg-017afab1f328af917", ] +[backend.aws.gpu-bench] +region = "us-east-1" +image_id = "ami-07daa9292f60ade73" +instance_type = "p3.8xlarge" +subnet_id = "subnet-8123c9e7" +security_group= ["sg-017afab1f328af917", ] + [backend.aws.slurm-cluster] region = "eu-west-3" image_id = "ami-0bb5bb9cb747b5ddd"