chore: working on CIFAR upload to the DB

zama-ai · Jan 18, 2024 · 58dc450 · 58dc450
1 parent 5a4e984
commit 58dc450
Show file tree

Hide file tree

Showing 3 changed files with 94 additions and 17 deletions.
diff --git a/.github/workflows/cifar_benchmark.yaml b/.github/workflows/cifar_benchmark.yaml
@@ -131,16 +131,6 @@ jobs:
           NUM_SAMPLES=${{ github.event.inputs.num_samples }} python3 ./use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py
           python3 ./benchmarks/convert_cifar.py --model-name "16-bits-trained-v0"
 
-      - name: Upload results
-        if: ${{ github.repository == 'zama-ai/concrete-ml-internal' }}
-        id: upload-results
-        run: |
-          curl \
-          -H "Authorization: Bearer ${{ secrets.NEW_ML_PROGRESS_TRACKER_TOKEN }}" \
-          -H "Content-Type: application/json" \
-          -d @to_upload.json \
-          -X POST "${{ secrets.NEW_ML_PROGRESS_TRACKER_URL }}experiment"
-
       - name: Archive raw predictions
         uses: actions/[email protected]
         with:
@@ -177,6 +167,25 @@ jobs:
           name: server.zip
           path: client_server/server.zip
 
+      # We need to keep this as the last step to avoid not uploading the artifacts
+      # if the step crashes
+      - name: Upload results
+        id: upload-results
+        run: |
+          # Log the json
+          cat to_upload.json | jq
+          
+          # We need to sleep to avoid log issues
+          sleep 1.
+
+          # Upload the json to the benchmark database
+          curl --fail-with-body \
+          -H "Authorization: Bearer ${{ secrets.NEW_ML_PROGRESS_TRACKER_TOKEN }}" \
+          -H "Content-Type: application/json; charset=UTF-8" \
+          --json @to_upload.json \
+          -X POST "${{ secrets.NEW_ML_PROGRESS_TRACKER_URL }}experiment"
+
+
   stop-runner:
     name: Stop EC2 runner
     needs: [run-cifar-10, start-cifar-runner]

diff --git a/.github/workflows/single_benchmark.yaml b/.github/workflows/single_benchmark.yaml
@@ -182,7 +182,7 @@ jobs:
       - name: Upload results
         id: upload-results
         run: |
-          curl \
+          curl --fail-with-body \
           -H "Authorization: Bearer ${{ secrets.NEW_ML_PROGRESS_TRACKER_TOKEN }}" \
           -H "Content-Type: application/json" \
           -d @converted.json \

diff --git a/benchmarks/convert_cifar.py b/benchmarks/convert_cifar.py
@@ -3,12 +3,20 @@
 import argparse
 import datetime
 import json
+import logging
+import platform
+import re
+import socket
+import subprocess
+import uuid
 from importlib.metadata import version
 from pathlib import Path
 from typing import Any, Dict, List, Union
 
+import cpuinfo
 import numpy as np
 import pandas as pd
+import psutil
 from convert import get_git_hash, get_git_hash_date, git_iso_to_python_iso, is_git_diff
 
 
@@ -28,6 +36,67 @@ def minimum_bribes(q):
     return bribes
 
 
+def get_size(bytes_count: float, suffix="B"):
+    """
+    Scale bytes to its proper format
+    e.g:
+        1253656 => '1.20MB'
+        1253656678 => '1.17GB'
+    """
+    factor = 1024
+    for unit in ["", "K", "M", "G", "T", "P"]:
+        if bytes_count < factor:
+            return f"{bytes_count:.2f} {unit}{suffix}"
+        bytes_count /= factor
+
+
+def get_system_information():
+    # From https://stackoverflow.com/questions/3103178/how-to-get-the-system-info-with-python
+    info = {}
+    # What is naturally dumped by python-progress-tracker
+    info["ram"] = get_size(psutil.virtual_memory().total)
+    info["cpu"] = cpuinfo.get_cpu_info()["brand_raw"]
+    info["os"] = f"{platform.system()} {platform.release()}"
+
+    # Added metadata about the system
+    info["platform"] = platform.system()
+    info["platform-release"] = platform.release()
+    info["platform-version"] = platform.version()
+    info["architecture"] = platform.machine()
+    info["hostname"] = socket.gethostname()
+    info["processor"] = platform.processor()
+    info["physical_cores"] = psutil.cpu_count(logical=False)
+    info["total_cores"] = psutil.cpu_count(logical=True)
+    uname = platform.uname()
+    info["machine"] = uname.machine
+    info["processor"] = uname.processor
+    info["system"] = uname.system
+    info["node_name"] = uname.node
+    info["release"] = uname.release
+    info["version"] = uname.version
+    info["swap"] = get_size(psutil.swap_memory().total)
+
+    return info
+
+
+def get_ec2_metadata():
+    res = {}
+    try:
+        output = subprocess.check_output("ec2metadata", shell=True, encoding="utf-8")
+        for line in output.split("\n"):
+            if line:
+                splitted = line.split(": ")
+                if len(splitted) == 2:
+                    key, value = splitted
+                    res[key] = value
+            else:
+                print(line)
+        return res
+    except Exception as exception:
+        print(exception)
+        return res
+
+
 def main(model_name):
     # Get metrics
     results = pd.read_csv("./inference_results.csv")
@@ -73,14 +142,13 @@ def main(model_name):
     # Collect everything
     session_data: Dict[str, Union[Dict, List]] = {}
 
+    ec2_metadata = get_ec2_metadata()
+
     # Create machine
+    # We should probably add the platform to the DB too
     session_data["machine"] = {
-        "machine_name": None,
-        "machine_specs": {
-            "cpu": None,
-            "ram": None,
-            "os": None,
-        },
+        "machine_name": ec2_metadata.get("instance-type", socket.gethostname()),
+        "machine_specs": get_system_information(),
     }
 
     # Create experiments