Skip to content

Commit

Permalink
chore: fix gpu notebook action (#856)
Browse files Browse the repository at this point in the history
  • Loading branch information
andrei-stoian-zama authored Sep 6, 2024
1 parent 810e8a8 commit dd84628
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 46 deletions.
69 changes: 30 additions & 39 deletions .github/workflows/refresh-notebooks-gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@ env:
jobs:
start-runner-linux:
name: Start EC2 runner
runs-on: ubuntu-20.04
runs-on: ubuntu-latest
outputs:
label-38: ${{ steps.start-ec2-runner-38.outputs.label }}
ec2-instance-id-38: ${{ steps.start-ec2-runner-38.outputs.ec2-instance-id || '' }}
label-38: ${{ steps.start-gpu-machine.outputs.label }}
steps:
- name: Checkout Code
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
Expand All @@ -25,51 +24,44 @@ jobs:
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
aws-region: "us-east-1"

- name: Start EC2 runner python 38
id: start-ec2-runner-38
uses: machulav/ec2-github-runner@fcfb31a5760dad1314a64a0e172b78ec6fc8a17e
- name: Start instance
id: start-gpu-machine
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
with:
mode: start
github-token: ${{ secrets.EC2_RUNNER_BOT_TOKEN }}
ec2-image-id: ${{ secrets.AWS_EC2_AMI }}
ec2-instance-type: "p3.2xlarge"
subnet-id: ${{ secrets.AWS_EC2_SUBNET_ID }}
security-group-id: ${{ secrets.AWS_EC2_SECURITY_GROUP_ID }}
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: single-h100

refresh-notebooks:
needs: [start-runner-linux]

runs-on: ${{ needs.start-runner-linux.outputs.label-38 }}
# Run in a clean container
container:
image: ubuntu:20.04
defaults:
run:
shell: bash
env:
PIP_INDEX_URL: ${{ secrets.PIP_INDEX_URL }}
PIP_EXTRA_INDEX_URL: ${{ secrets.PIP_EXTRA_INDEX_URL }}
KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}

steps:
- name: Add masks
run: |
echo "::add-mask::${{ secrets.INTERNAL_PYPI_URL_FOR_MASK }}"
echo "::add-mask::${{ secrets.INTERNAL_REPO_URL_FOR_MASK }}"
# Replace default archive.ubuntu.com from docker image with fr mirror
# original archive showed performance issues and is farther away
- name: Docker container related setup and git installation
- name: Git installation
run: |
TZ=Europe/Paris
echo "TZ=${TZ}" >> "$GITHUB_ENV"
ln -snf /usr/share/zoneinfo/${TZ} /etc/localtime && echo ${TZ} > /etc/timezone
sed -i 's|^deb http://archive|deb http://fr.archive|g' /etc/apt/sources.list
apt update && apt install git git-lfs -y
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Checkout Code
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
with:
Expand All @@ -84,9 +76,14 @@ jobs:
run: |
./script/make_utils/setup_os_deps.sh
make setup_env
source .venv/bin/activate
CP_VERSION=$(pip freeze | grep concrete-python)
pip uninstall -y concrete-python
pip install --extra-index-url https://pypi.zama.ai/gpu ${CP_VERSION}
- name: Refresh Notebooks
run: |
export CML_USE_GPU=1
make jupyter_execute_gpu
- name: Prepare PR Body
Expand All @@ -113,24 +110,18 @@ jobs:
stop-runner-linux:
name: Stop EC2 runner
needs: [refresh-notebooks, start-runner-linux]
runs-on: ubuntu-20.04
runs-on: ubuntu-latest
if: ${{ always() && (needs.start-runner-linux.result != 'skipped') }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}

- name: Stop EC2 runner python 38
uses: machulav/ec2-github-runner@fcfb31a5760dad1314a64a0e172b78ec6fc8a17e
if: ${{ always() && needs.start-runner-linux.outputs.ec2-instance-id-38 }}
with:
github-token: ${{ secrets.EC2_RUNNER_BOT_TOKEN }}
label: ${{ needs.start-runner-linux.outputs.label-38 }}
ec2-instance-id: ${{ needs.start-runner-linux.outputs.ec2-instance-id-38 }}
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.start-runner-linux.outputs.label-38 }}

send-report:
if: ${{ always() }}
Expand All @@ -142,7 +133,7 @@ jobs:
]

name: Send Slack notification
runs-on: ubuntu-20.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332

Expand Down
6 changes: 6 additions & 0 deletions ci/slab.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ region = "eu-west-1"
image_id = "ami-0898af27b3e2421d8"
instance_type = "hpc7a.96xlarge"


[backend.hyperstack.single-h100]
environment_name = "canada"
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
flavor_name = "n3-H100x1"

# Trigger benchmarks.
[command.bench]
workflow = "single_benchmark.yaml"
Expand Down
2 changes: 1 addition & 1 deletion docs/advanced_examples/DecisionTreeRegressor.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@
"source": [
"from concrete.compiler import check_gpu_available\n",
"\n",
"use_gpu_if_available = True\n",
"use_gpu_if_available = False\n",
"device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n",
"\n",
"x_train_subset = x_train[:500]\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/advanced_examples/XGBClassifier.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"\n",
"from concrete.ml.sklearn import XGBClassifier as ConcreteXGBClassifier\n",
"\n",
"use_gpu_if_available = True\n",
"use_gpu_if_available = False\n",
"device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n",
"\n",
"%matplotlib inline"
Expand Down
13 changes: 8 additions & 5 deletions src/concrete/ml/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,11 +728,14 @@ def check_compilation_device_is_valid_and_is_cuda(device: str) -> bool:
device = check_device_is_valid(device)

# Allow forcing device to GPU for tests
if (
os.environ.get("CML_USE_GPU", False) == "1"
and check_gpu_available()
and not device == "cuda"
): # pragma: no cover
if os.environ.get("CML_USE_GPU", False) == "1" and not device == "cuda": # pragma: no cover
if not check_gpu_enabled():
raise ValueError(
"CUDA FHE execution was requested with CML_USE_GPU but the Concrete runtime "
"that is installed on this system does not support CUDA. Please"
"install a GPU-enabled Concrete-Python package."
)

print(f"Compilation device override, was '{device}' -> change to 'cuda'")
device = "cuda"

Expand Down

0 comments on commit dd84628

Please sign in to comment.