Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(ci): Add GPU benchmark workflow #1184

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 185 additions & 0 deletions .github/workflows/concrete_compiler_benchmark_gpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
name: concrete-compiler benchmark linux-gpu

on:
workflow_dispatch:
pull_request:
paths:
- .github/workflows/concrete_compiler_benchmark_gpu.yml
- backends/concrete-cuda/**
push:
branches:
- 'main'
- 'release/*'

env:
DOCKER_IMAGE_TEST: ghcr.io/zama-ai/concrete-compiler
CUDA_VERSION: 12.6
GCC_VERSION: 11
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

concurrency:
group: concrete_compiler_benchmark_gpu-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

jobs:
setup-instance:
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7 # v1.3.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: gpu-bench

build-and-run-benchmarks:
needs: setup-instance
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
outputs:
bench_date: ${{ steps.benchmark-details.outputs.bench_date }}
commit_date: ${{ steps.benchmark-details.outputs.commit_date }}
commit_hash: ${{ steps.benchmark-details.outputs.commit_hash }}
env:
CUDA_SUPPORT: ON

steps:
- name: Setup GPU environment
run: |
{
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH"
echo "CUDA_PATH=/usr/local/cuda-${{ env.CUDA_VERSION }}"
echo "CC=/usr/bin/gcc-${{ env.GCC_VERSION }}"
echo "CXX=/usr/bin/g++-${{ env.GCC_VERSION }}"
echo "CUDAHOSTCXX=/usr/bin/g++-${{ env.GCC_VERSION }}"
echo "CUDACXX=${{ env.CUDA_PATH }}/bin/nvcc"
} >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
- name: Checkout concrete
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: recursive
fetch-depth: 0
- name: Ouput benchmark details
id: benchmark-details
run: |
{
echo "bench_date=$(date --iso-8601=seconds)";
echo "commit_date=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "commit_hash=$(git describe --tags --dirty)";
} >> "$GITHUB_OUTPUT"
- name: Set up home
# "Install rust" step require root user to have a HOME directory which is not set.
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Setup rust toolchain for concrete-cpu
uses: ./.github/actions/setup_rust_toolchain_for_concrete_cpu
- name: Build compiler benchmarks
run: |
set -e
echo "Debug env:"
env
git config --global --add safe.directory '*'
cd compilers/concrete-compiler/compiler
make BINDINGS_PYTHON_ENABLED=OFF CUDA_SUPPORT=ON build-benchmarks
- name: Run compiler benchmarks
run: |
set -e
cd compilers/concrete-compiler/compiler
make CUDA_SUPPORT=ON run-gpu-benchmarks
- name: Upload raw results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: compiler-benchmarks-result
path: compilers/concrete-compiler/compiler/benchmarks_results.json
- name: Slack Notification
if: ${{ failure() && github.ref == 'refs/heads/main' }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "build-and-run-benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

parse-and-send-results:
name: Parse and send results
needs: [setup-instance, build-and-run-benchmarks]
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
- name: Download compiler-benchmarks-result
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: compiler-benchmarks-result
- name: Parse results
shell: bash
run: |
# TODO output setup-instance (https://github.com/zama-ai/slab-github-runner/issues/38)
python3 ./ci/benchmark_parser.py benchmarks_results.json parsed_benchmark_results.json \
--database compiler_benchmarks \
--hardware "p3.8xlarge" \
--project-version ${{ needs.build-and-run-benchmarks.outputs.commit_hash}} \
--branch ${{ github.ref_name }} \
--commit-date "${{ needs.build-and-run-benchmarks.outputs.commit_date }}" \
--bench-date "${{ needs.build-and-run-benchmarks.outputs.bench_date }}" \
--throughput
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: compiler-benchmarks-parsed-result
path: parsed_benchmark_results.json
- name: Checkout Slab repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: zama-ai/slab
path: slab
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash
run: |
echo "Computing HMac on downloaded artifact"
SIGNATURE="$(slab/scripts/hmac_calculator.sh parsed_benchmark_results.json ${{ secrets.JOB_SECRET }})"
echo "Sending results to Slab..."
curl -v -k \
-H "Content-Type: application/json" \
-H "X-Slab-Repository: ${{ github.repository }}" \
-H "X-Slab-Command: store_data" \
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
-d @parsed_benchmark_results.json \
${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() && github.ref == 'refs/heads/main' }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "parse-and-send-results finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

teardown-instance:
name: Teardown instance
needs: [ setup-instance, parse-and-send-results ]
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7 # v1.3.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
7 changes: 7 additions & 0 deletions ci/slab.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ instance_type = "p3.2xlarge"
subnet_id = "subnet-8123c9e7"
security_group= ["sg-017afab1f328af917", ]

[backend.aws.gpu-bench]
region = "us-east-1"
image_id = "ami-0b362b8f9eadd7c30"
instance_type = "p3.2xlarge"
subnet_id = "subnet-8123c9e7"
security_group= ["sg-017afab1f328af917", ]

[backend.aws.slurm-cluster]
region = "eu-west-3"
image_id = "ami-0bb5bb9cb747b5ddd"
Expand Down
Loading