zama-ai · BourgerieQuentin · Dec 12, 2024
diff --git a/.github/workflows/concrete_compiler_benchmark_gpu.yml b/.github/workflows/concrete_compiler_benchmark_gpu.yml
@@ -0,0 +1,185 @@
+name: concrete-compiler benchmark linux-gpu
+
+on:
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - .github/workflows/concrete_compiler_benchmark_gpu.yml
+      - backends/concrete-cuda/**
+  push:
+    branches:
+      - 'main'
+      - 'release/*'
+
+env:
+  DOCKER_IMAGE_TEST: ghcr.io/zama-ai/concrete-compiler
+  CUDA_VERSION: 12.6
+  GCC_VERSION: 11
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+concurrency:
+  group: concrete_compiler_benchmark_gpu-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+jobs:
+  setup-instance:
+    runs-on: ubuntu-latest
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7 # v1.3.0
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: aws
+          profile: gpu-bench
+
+  build-and-run-benchmarks:
+    needs: setup-instance
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    outputs:
+      bench_date: ${{ steps.benchmark-details.outputs.bench_date }}
+      commit_date: ${{ steps.benchmark-details.outputs.commit_date }}
+      commit_hash: ${{ steps.benchmark-details.outputs.commit_hash }}
+    env:
+      CUDA_SUPPORT: ON
+
+    steps:
+      - name: Setup GPU environment
+        run: |
+          {
+            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH"
+            echo "CUDA_PATH=/usr/local/cuda-${{ env.CUDA_VERSION }}"
+            echo "CC=/usr/bin/gcc-${{ env.GCC_VERSION }}"
+            echo "CXX=/usr/bin/g++-${{ env.GCC_VERSION }}"
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ env.GCC_VERSION }}"
+            echo "CUDACXX=${{ env.CUDA_PATH }}/bin/nvcc"
+          } >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+      - name: Checkout concrete
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          submodules: recursive
+          fetch-depth: 0
+      - name: Ouput benchmark details
+        id: benchmark-details
+        run: |
+          {
+            echo "bench_date=$(date --iso-8601=seconds)";
+            echo "commit_date=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
+            echo "commit_hash=$(git describe --tags --dirty)";
+          } >> "$GITHUB_OUTPUT"
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+      - name: Setup rust toolchain for concrete-cpu
+        uses: ./.github/actions/setup_rust_toolchain_for_concrete_cpu
+      - name: Build compiler benchmarks
+        run: |
+          set -e
+          echo "Debug env:"
+          env
+          git config --global --add safe.directory '*'
+          cd compilers/concrete-compiler/compiler
+          make BINDINGS_PYTHON_ENABLED=OFF CUDA_SUPPORT=ON build-benchmarks
+      - name: Run compiler benchmarks
+        run: |
+          set -e
+          cd compilers/concrete-compiler/compiler
+          make CUDA_SUPPORT=ON run-gpu-benchmarks
+      - name: Upload raw results artifact
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
+        with:
+          name: compiler-benchmarks-result
+          path: compilers/concrete-compiler/compiler/benchmarks_results.json
+      - name: Slack Notification
+        if: ${{ failure() && github.ref == 'refs/heads/main' }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "build-and-run-benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  parse-and-send-results:
+    name: Parse and send results
+    needs: [setup-instance, build-and-run-benchmarks]
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    steps:
+      - name: Download compiler-benchmarks-result
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          name: compiler-benchmarks-result
+      - name: Parse results
+        shell: bash
+        run: |
+          # TODO output setup-instance (https://github.com/zama-ai/slab-github-runner/issues/38)
+          python3 ./ci/benchmark_parser.py benchmarks_results.json parsed_benchmark_results.json \
+          --database compiler_benchmarks \
+          --hardware "p3.8xlarge" \
+          --project-version ${{ needs.build-and-run-benchmarks.outputs.commit_hash}} \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${{ needs.build-and-run-benchmarks.outputs.commit_date }}" \
+          --bench-date "${{ needs.build-and-run-benchmarks.outputs.bench_date }}" \
+          --throughput
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
+        with:
+          name: compiler-benchmarks-parsed-result
+          path: parsed_benchmark_results.json
+      - name: Checkout Slab repo
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on downloaded artifact"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh parsed_benchmark_results.json ${{ secrets.JOB_SECRET }})"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @parsed_benchmark_results.json \
+          ${{ secrets.SLAB_URL }}
+      - name: Slack Notification
+        if: ${{ failure() && github.ref == 'refs/heads/main' }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "parse-and-send-results finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-instance:
+    name: Teardown instance
+    needs: [ setup-instance, parse-and-send-results ]
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7 # v1.3.0
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
diff --git a/ci/slab.toml b/ci/slab.toml
@@ -16,6 +16,13 @@ instance_type = "p3.2xlarge"
 subnet_id = "subnet-8123c9e7"
 security_group= ["sg-017afab1f328af917", ]
 
+[backend.aws.gpu-bench]
+region = "us-east-1"
+image_id = "ami-0b362b8f9eadd7c30"
+instance_type = "p3.2xlarge"
+subnet_id = "subnet-8123c9e7"
+security_group= ["sg-017afab1f328af917", ]
+
 [backend.aws.slurm-cluster]
 region = "eu-west-3"
 image_id = "ami-0bb5bb9cb747b5ddd"