Skip to content

CIFAR-10 benchmark CML #52

CIFAR-10 benchmark CML

CIFAR-10 benchmark CML #52

name: CIFAR-10 benchmark CML
on:
workflow_dispatch:
inputs:
git-ref:
description: Repo reference (branch, tag or SHA)
default: "main"
required: true
type: string
benchmark:
description: Benchmark to run (cifar-10-8b or cifar-10-16b)
default: "cifar-10-16b"
type: choice
options:
- "cifar-10-8b"
- "cifar-10-16b"
instance_type:
description: Instance type on which to launch benchmarks
default: "hpc7a.96xlarge"
type: choice
options:
- "m6i.metal"
- "u-6tb1.112xlarge"
- "hpc7a.96xlarge"
num_samples:
description: Number of samples to use
default: "3"
type: string
required: true
p_error:
description: P-error to use
default: "0.01"
type: string
required: true
# FIXME: Add recurrent launching
# https://github.com/zama-ai/concrete-ml-internal/issues/1851
# Global environnement variables
env:
# Github action url (used by slack notification)
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
AGENT_TOOLSDIRECTORY: /opt/hostedtoolcache
RUNNER_TOOL_CACHE: /opt/hostedtoolcache
# We need to use other settings than the CI here to be able to launch benchmarks
# on AWS Ireland that has the hpc7a (which are the fastest machines atm).
# We might clash with TFHE-rs benchmarks on hpc7a since they launch benches on all
# updates of main.
# Jobs
jobs:
setup-ec2:
name: Setup EC2 instance
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@ab65ad70bb9f9e9251e4915ea5612bcad23cd9b1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
profile: cpu-big
run-cifar-10:
needs: [start-ec2]

Check failure on line 72 in .github/workflows/cifar_benchmark.yaml

View workflow run for this annotation

GitHub Actions / CIFAR-10 benchmark CML

Invalid workflow file

The workflow is not valid. .github/workflows/cifar_benchmark.yaml (Line: 72, Col: 13): Job 'run-cifar-10' depends on unknown job 'start-ec2'. .github/workflows/cifar_benchmark.yaml (Line: 180, Col: 25): Job 'teardown-ec2' depends on job 'run-cifar-10' which creates a cycle in the dependency graph.
name: Run benchmark
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
env:
PIP_INDEX_URL: ${{ secrets.PIP_INDEX_URL }}
PIP_EXTRA_INDEX_URL: ${{ secrets.PIP_EXTRA_INDEX_URL }}
steps:
- name: Add masks
run: |
echo "::add-mask::${{ secrets.INTERNAL_PYPI_URL_FOR_MASK }}"
echo "::add-mask::${{ secrets.INTERNAL_REPO_URL_FOR_MASK }}"
echo "::add-mask::${{ secrets.INTERNAL_PYPI_URL }}"
echo "::add-mask::${{ secrets.INTERNAL_REPO_URL }}"
- name: Checkout code
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
with:
lfs: true
ref: ${{ github.event.inputs.git-ref }}
- name: Set up Python
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c
with:
python-version: "3.8"
- name: Install dependencies
id: install-deps
run: |
apt update
apt install --no-install-recommends -y gnome-keyring
apt install -y graphviz* graphviz-dev libgraphviz-dev pkg-config python3-dev
apt-mark hold docker.io
./script/make_utils/setup_os_deps.sh
make setup_env
# CIFAR-10-8b benchmark
- name: Benchmark - CIFAR-10-8b
if: github.event.inputs.benchmark == 'cifar-10-8b'
run: |
source .venv/bin/activate
NUM_SAMPLES=${{ github.event.inputs.num_samples }} python3 ./use_case_examples/cifar/cifar_brevitas_with_model_splitting/infer_fhe.py
python3 ./benchmarks/convert_cifar.py --model-name "8-bit-split-v0"
# CIFAR-10-16b benchmark
- name: Benchmark - CIFAR-10-16b
if: github.event.inputs.benchmark == 'cifar-10-16b'
run: |
source .venv/bin/activate
NUM_SAMPLES=${{ github.event.inputs.num_samples }} P_ERROR=${{ github.event.inputs.p_error }} python3 ./use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py
python3 ./benchmarks/convert_cifar.py --model-name "16-bits-trained-v0"
- name: Archive raw predictions
uses: actions/[email protected]
with:
name: predictions.csv
path: inference_results.csv
- name: Archive metrics
uses: actions/[email protected]
with:
name: metrics.json
path: to_upload.json
- name: Archive MLIR
uses: actions/[email protected]
with:
name: mlir.txt
path: cifar10.mlir
- name: Archive Graph
uses: actions/[email protected]
with:
name: graph.txt
path: cifar10.graph
- name: Archive client
uses: actions/[email protected]
with:
name: client.zip
path: client_server/client.zip
- name: Archive server
uses: actions/[email protected]
with:
name: server.zip
path: client_server/server.zip
# We need to keep this as the last step to avoid not uploading the artifacts
# if the step crashes
- name: Upload results
id: upload-results
run: |
# Log the json
cat to_upload.json | jq
# We need to sleep to avoid log issues
sleep 1.
# Upload the json to the benchmark database
curl --fail-with-body \
-H "Authorization: Bearer ${{ secrets.NEW_ML_PROGRESS_TRACKER_TOKEN }}" \
-H "Content-Type: application/json; charset=UTF-8" \
-d @to_upload.json \
-X POST "${{ secrets.NEW_ML_PROGRESS_TRACKER_URL }}experiment"
teardown-ec2:
name: Teardown EC2 instance (fast-tests)
if: ${{ always() }}
needs: [ setup-ec2, run-cifar-10 ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@ab65ad70bb9f9e9251e4915ea5612bcad23cd9b1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
profile: cpu-big
label: ${{ needs.setup-ec2.outputs.runner-name }}
slack-notification:
runs-on: ubuntu-20.04
needs: [run-cifar-10]
steps:
- name: Slack Notification
if: ${{ always() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
env:
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_COLOR: ${{ needs.run-cifar-10.result }}
SLACK_MESSAGE: "Benchmark action: ${{ github.event.inputs.benchmark }} (${{ env.ACTION_RUN_URL }}) ended with result: ${{ needs.run-cifar-10.result }}"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}