chore: expose p-error to cifar-16b benchmark and slab

Update cifar workflow to be able to run on hpc7a using slab and expose p-error as a parameter of the cifar benchmark workflow.
zama-ai · Feb 5, 2024 · 986fb38 · 986fb38
1 parent f07092a
commit 986fb38
Show file tree

Hide file tree

Showing 5 changed files with 84 additions and 87 deletions.
diff --git a/.github/workflows/cifar_benchmark.yaml b/.github/workflows/cifar_benchmark.yaml
@@ -14,18 +14,16 @@ on:
         options:
           - "cifar-10-8b"
           - "cifar-10-16b"
-      instance_type:
-        description: Instance type on which to launch benchmarks
-        default: "m6i.metal"
-        type: choice
-        options:
-          - "m6i.metal"
-          - "u-6tb1.112xlarge"
       num_samples:
         description: Number of samples to use
         default: "3"
         type: string
         required: true
+      p_error:
+        description: P-error to use
+        default: "0.01"
+        type: string
+        required: true
 
   # FIXME: Add recurrent launching
   # https://github.com/zama-ai/concrete-ml-internal/issues/1851
@@ -36,53 +34,32 @@ env:
   ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
   AGENT_TOOLSDIRECTORY: /opt/hostedtoolcache
   RUNNER_TOOL_CACHE: /opt/hostedtoolcache
+  SLAB_PROFILE: big-cpu
+
 
 # Jobs
 jobs:
-  start-cifar-runner:
-    name: Launch AWS instances
-    runs-on: ubuntu-20.04
-    defaults:
-      run:
-        shell: bash
-    container:
-      image: ubuntu:20.04
+  setup-ec2:
+    name: Setup EC2 instance
+    runs-on: ubuntu-latest
     outputs:
-      label: ${{ steps.start-cifar10-8bit-runner.outputs.label }}
-      ec2-instance-id: ${{ steps.start-cifar10-8bit-runner.outputs.ec2-instance-id || '' }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
+      instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
     steps:
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a
-        with:
-          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ secrets.AWS_REGION }}
-      - name: Start CIFAR-10 8-bit runner
-        id: start-cifar10-8bit-runner
-        uses: machulav/ec2-github-runner@2c4d1dcf2c54673ed3bfd194c4b6919ed396a209
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@ab65ad70bb9f9e9251e4915ea5612bcad23cd9b1
         with:
           mode: start
-          github-token: ${{ secrets.EC2_RUNNER_BOT_TOKEN }}
-          ec2-image-id: ${{ secrets.AWS_EC2_AMI }}
-          ec2-instance-type: ${{ github.event.inputs.instance_type }}
-          subnet-id: ${{ secrets.AWS_EC2_SUBNET_ID }}
-          security-group-id: ${{ secrets.AWS_EC2_SECURITY_GROUP_ID }}
-          aws-resource-tags: >
-            [
-              {"Key": "Name", "Value": "cml-benchmark-cifar10"},
-              {"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
-              {"Key": "Actor", "Value": "${{ github.actor }}"},
-              {"Key": "Action", "Value": "${{ github.action }}"},
-              {"Key": "GitHash", "Value": "${{ github.sha }}"},
-              {"Key": "RefName", "Value": "${{ github.ref_name }}"},
-              {"Key": "RunId", "Value": "${{ github.run_id }}"},
-              {"Key": "Team", "Value": "CML"}
-            ]
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          profile: ${{ env.SLAB_PROFILE }}
 
   run-cifar-10:
-    needs: [start-cifar-runner]
+    needs: [setup-ec2]
     name: Run benchmark
-    runs-on: ${{ needs.start-cifar-runner.outputs.label }}
+    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
     env:
       PIP_INDEX_URL: ${{ secrets.PIP_INDEX_URL }}
       PIP_EXTRA_INDEX_URL: ${{ secrets.PIP_EXTRA_INDEX_URL }}
@@ -128,7 +105,7 @@ jobs:
         if: github.event.inputs.benchmark == 'cifar-10-16b'
         run: |
           source .venv/bin/activate
-          NUM_SAMPLES=${{ github.event.inputs.num_samples }} python3 ./use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py
+          NUM_SAMPLES=${{ github.event.inputs.num_samples }} P_ERROR=${{ github.event.inputs.p_error }} python3 ./use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py
           python3 ./benchmarks/convert_cifar.py --model-name "16-bits-trained-v0"
 
       - name: Archive raw predictions
@@ -185,29 +162,22 @@ jobs:
           -d @to_upload.json \
           -X POST "${{ secrets.NEW_ML_PROGRESS_TRACKER_URL }}experiment"
 
-
-  stop-runner:
-    name: Stop EC2 runner
-    needs: [run-cifar-10, start-cifar-runner]
-    runs-on: ubuntu-20.04
-    timeout-minutes: 2
+  teardown-ec2:
+    name: Teardown EC2 instance (fast-tests)
     if: ${{ always() }}
+    needs: [ setup-ec2, run-cifar-10 ]
+    runs-on: ubuntu-latest
     steps:
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a
-        with:
-          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ secrets.AWS_REGION }}
-
-      - name: Stop EC2 runner
-        uses: machulav/ec2-github-runner@2c4d1dcf2c54673ed3bfd194c4b6919ed396a209
-        if: ${{ always() }}
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@ab65ad70bb9f9e9251e4915ea5612bcad23cd9b1
         with:
           mode: stop
-          github-token: ${{ secrets.EC2_RUNNER_BOT_TOKEN }}
-          label: ${{ needs.start-cifar-runner.outputs.label }}
-          ec2-instance-id: ${{ needs.start-cifar-runner.outputs.ec2-instance-id }}
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          profile: ${{ env.SLAB_PROFILE }}
+          label: ${{ needs.setup-ec2.outputs.runner-name }}
 
   slack-notification:
     runs-on: ubuntu-20.04

diff --git a/README.md b/README.md
@@ -193,7 +193,6 @@ To cite Concrete ML, notably in academic papers, please use the following entry,
   <img src="https://github.com/zama-ai/concrete-ml/assets/157474013/8ef18a7e-671b-495c-8346-fa75227d0af3">
 </a>
 
-
 ## License.
 
 This software is distributed under the BSD-3-Clause-Clear license. If you have any questions, please contact us at [email protected].
diff --git a/benchmarks/convert_cifar.py b/benchmarks/convert_cifar.py
@@ -6,7 +6,6 @@
 import platform
 import socket
 import subprocess
-from importlib.metadata import version
 from pathlib import Path
 from typing import Any, Dict, List, Union
 
@@ -104,6 +103,8 @@ def value_else_none(value):
 def main(model_name):
     # Get metrics
     results = pd.read_csv("./inference_results.csv")
+    with open("./metadata.json", "r", encoding="utf-8") as file:
+        metadata = json.load(file)
     assert isinstance(results, pd.DataFrame)
     timing_columns = [col for col in results.columns if col.endswith("_time")]
     timings = results[timing_columns]
@@ -158,40 +159,44 @@ def main(model_name):
     # Create experiments
     experiments = []
     dataset_name = "CIFAR-10"
-    experiment_representation: Dict[str, Any] = {}
-    experiment_representation["experiment_name"] = f"cifar-10-{model_name}"
-    experiment_representation["experiment_metadata"] = {
-        "model_name": model_name,
-        "dataset_name": dataset_name,
-        "cml_version": version("concrete-ml"),
-        "cnp_version": version("concrete-python"),
-    }
-    experiment_representation["git_hash"] = current_git_hash
-    experiment_representation["git_timestamp"] = current_git_hash_timestamp
-    experiment_representation["experiment_timestamp"] = current_timestamp
+    experiment_data: Dict[str, Any] = {}
+    experiment_data["experiment_name"] = f"cifar-10-{model_name}"
+    experiment_data["experiment_metadata"] = metadata
+    experiment_data["experiment_metadata"].update(
+        {
+            "model_name": model_name,
+            "dataset_name": dataset_name,
+        }
+    )
+    experiment_data["git_hash"] = current_git_hash
+    experiment_data["git_timestamp"] = current_git_hash_timestamp
+    experiment_data["experiment_timestamp"] = current_timestamp
 
-    experiment_representation["metrics"] = []
+    experiment_data["metrics"] = []
     for key, value in timing_means.items():
-        experiment_representation["metrics"].append(
+        experiment_data["metrics"].append(
             {"metric_name": f"{key}_mean", "value": value_else_none(value)}
         )
     for key, value in timing_stds.items():
-        experiment_representation["metrics"].append(
+        experiment_data["metrics"].append(
             {"metric_name": f"{key}_std", "value": value_else_none(value)}
         )
-    experiment_representation["metrics"].append(
+    experiment_data["metrics"].append(
         {"metric_name": "num_samples", "value": value_else_none(num_samples)}
     )
-    experiment_representation["metrics"].append(
+    experiment_data["metrics"].append(
         {"metric_name": "top_1_acc", "value": value_else_none(top_1_acc)}
     )
-    experiment_representation["metrics"].append(
+    experiment_data["metrics"].append(
         {"metric_name": "top_1_acc_diff", "value": value_else_none(top_1_acc_diff)}
     )
-    experiment_representation["metrics"].append(
-        {"metric_name": "chaos_distance_mean", "value": value_else_none(chaos_distance_mean)}
+    experiment_data["metrics"].append(
+        {
+            "metric_name": "chaos_distance_mean",
+            "value": value_else_none(chaos_distance_mean),
+        }
     )
-    experiments.append(experiment_representation)
+    experiments.append(experiment_data)
     session_data["experiments"] = experiments
 
     # Dump modified file

diff --git a/ci/slab.toml b/ci/slab.toml
@@ -6,6 +6,11 @@ instance_type = "m6i.metal"
 subnet_id = "subnet-a029b7ed"
 security_group= ["sg-0bf1c1d79c97bc88f", ]
 
+[profile.big-cpu]
+region = "eu-west-1"
+image_id = "ami-0898af27b3e2421d8"
+instance_type = "hpc7a.96xlarge"
+
 # Trigger benchmarks.
 [command.bench]
 workflow = "single_benchmark.yaml"

diff --git a/use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py b/use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py
@@ -1,6 +1,8 @@
+import json
 import os
 import time
 from functools import partial
+from importlib.metadata import version
 from pathlib import Path
 
 import torch
@@ -20,6 +22,7 @@
 # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3953
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 NUM_SAMPLES = int(os.environ.get("NUM_SAMPLES", 1))
+P_ERROR = float(os.environ.get("P_ERROR", 0.01))
 
 
 def measure_execution_time(func):
@@ -83,7 +86,13 @@ def wrapper(*args, **kwargs):
 print("Compiling the model.")
 quantized_numpy_module, compilation_execution_time = measure_execution_time(
     compile_brevitas_qat_model
-)(torch_model, x, configuration=configuration, rounding_threshold_bits=6, p_error=0.01)
+)(
+    torch_model,
+    x,
+    configuration=configuration,
+    rounding_threshold_bits=6,
+    p_error=P_ERROR,
+)
 assert isinstance(quantized_numpy_module, QuantizedModule)
 
 print(f"Compilation time took {compilation_execution_time} seconds")
@@ -174,6 +183,7 @@ def wrapper(*args, **kwargs):
         "decryption_time": decryption_execution_time,
         "inference_time": clear_inference_time,
         "label": labels[image_index].item(),
+        "p_error": P_ERROR,
     }
 
     for prediction_index, prediction in enumerate(expected_quantized_prediction[0]):
@@ -195,3 +205,11 @@ def wrapper(*args, **kwargs):
     # Write the data rows
     for result in all_results:
         file.write(",".join(str(result[column]) for column in columns) + "\n")
+
+metadata = {
+    "p_error": P_ERROR,
+    "cml_version": version("concrete-ml"),
+    "cnp_version": version("concrete-python"),
+}
+with open("metadata.json", "w") as file:
+    json.dump(metadata, file)