Skip to content

Commit

Permalink
feat: Add GPU support (#849)
Browse files Browse the repository at this point in the history
  • Loading branch information
andrei-stoian-zama authored Sep 2, 2024
1 parent ee58a68 commit 945aead
Show file tree
Hide file tree
Showing 26 changed files with 1,694 additions and 2,369 deletions.
174 changes: 174 additions & 0 deletions .github/workflows/refresh-notebooks-gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
name: Refresh Notebooks GPU
on:
workflow_dispatch:

concurrency:
group: "${{ github.ref }}-${{ github.event_name }}-${{ github.workflow }}"
cancel-in-progress: false

env:
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}

jobs:
start-runner-linux:
name: Start EC2 runner
runs-on: ubuntu-20.04
outputs:
label-38: ${{ steps.start-ec2-runner-38.outputs.label }}
ec2-instance-id-38: ${{ steps.start-ec2-runner-38.outputs.ec2-instance-id || '' }}
steps:
- name: Checkout Code
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}

- name: Start EC2 runner python 38
id: start-ec2-runner-38
uses: machulav/ec2-github-runner@fcfb31a5760dad1314a64a0e172b78ec6fc8a17e
with:
mode: start
github-token: ${{ secrets.EC2_RUNNER_BOT_TOKEN }}
ec2-image-id: ${{ secrets.AWS_EC2_AMI }}
ec2-instance-type: "p3.2xlarge"
subnet-id: ${{ secrets.AWS_EC2_SUBNET_ID }}
security-group-id: ${{ secrets.AWS_EC2_SECURITY_GROUP_ID }}

refresh-notebooks:
needs: [start-runner-linux]

runs-on: ${{ needs.start-runner-linux.outputs.label-38 }}
# Run in a clean container
container:
image: ubuntu:20.04
defaults:
run:
shell: bash
env:
PIP_INDEX_URL: ${{ secrets.PIP_INDEX_URL }}
PIP_EXTRA_INDEX_URL: ${{ secrets.PIP_EXTRA_INDEX_URL }}
KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}

steps:
- name: Add masks
run: |
echo "::add-mask::${{ secrets.INTERNAL_PYPI_URL_FOR_MASK }}"
echo "::add-mask::${{ secrets.INTERNAL_REPO_URL_FOR_MASK }}"
# Replace default archive.ubuntu.com from docker image with fr mirror
# original archive showed performance issues and is farther away
- name: Docker container related setup and git installation
run: |
TZ=Europe/Paris
echo "TZ=${TZ}" >> "$GITHUB_ENV"
ln -snf /usr/share/zoneinfo/${TZ} /etc/localtime && echo ${TZ} > /etc/timezone
sed -i 's|^deb http://archive|deb http://fr.archive|g' /etc/apt/sources.list
apt update && apt install git git-lfs -y
- name: Checkout Code
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
with:
lfs: true

- name: 'Set up Python 3.8'
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f
with:
python-version: '3.8'

- name: Install dependencies
run: |
./script/make_utils/setup_os_deps.sh
make setup_env
- name: Refresh Notebooks
run: |
make jupyter_execute_gpu
- name: Prepare PR Body
run: |
SUCCESSFUL_NOTEBOOKS=$(cat ./successful_notebooks.txt | tr '\n' ' ' | sed 's/ /\\n- /g')
FAILED_NOTEBOOKS=$(cat ./failed_notebooks.txt | tr '\n' ' ' | sed 's/ /\\n- /g')
PR_BODY="Automatic PR with notebook refresh for ${{ github.ref_name }}.\\n"
PR_BODY+="## Successful Notebooks\\n- $SUCCESSFUL_NOTEBOOKS\\n"
PR_BODY+="## Failed Notebooks\\n- $FAILED_NOTEBOOKS"
echo "PR_BODY=${PR_BODY}" >> "$GITHUB_ENV"
- name: Open PR
uses: peter-evans/create-pull-request@c5a7806660adbe173f04e3e038b0ccdcd758773c
with:
token: ${{ secrets.BOT_TOKEN }}
commit-message: "chore: refresh notebooks"
branch: "refresh-notebooks-for-${{ github.ref_name }}"
base: "${{ github.ref_name }}"
title: "Refresh notebooks for ${{ github.ref_name }}"
body: ${{ env.PR_BODY }}
add-paths: |
docs/**/*.ipynb
stop-runner-linux:
name: Stop EC2 runner
needs: [refresh-notebooks, start-runner-linux]
runs-on: ubuntu-20.04
if: ${{ always() && (needs.start-runner-linux.result != 'skipped') }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}

- name: Stop EC2 runner python 38
uses: machulav/ec2-github-runner@fcfb31a5760dad1314a64a0e172b78ec6fc8a17e
if: ${{ always() && needs.start-runner-linux.outputs.ec2-instance-id-38 }}
with:
github-token: ${{ secrets.EC2_RUNNER_BOT_TOKEN }}
label: ${{ needs.start-runner-linux.outputs.label-38 }}
ec2-instance-id: ${{ needs.start-runner-linux.outputs.ec2-instance-id-38 }}
mode: stop

send-report:
if: ${{ always() }}
needs:
[
start-runner-linux,
refresh-notebooks,
stop-runner-linux,
]

name: Send Slack notification
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332

- name: Prepare whole job status
if: ${{ always() }}
continue-on-error: true
env:
NEEDS_JSON: ${{ toJSON(needs) }}
run: |
echo "${NEEDS_JSON}" > /tmp/needs_context.json
JOB_STATUS=$(python3 ./script/actions_utils/actions_combine_status.py \
--needs_context_json /tmp/needs_context.json)
echo "JOB_STATUS=${JOB_STATUS}" >> "$GITHUB_ENV"
- name: Slack Notification
if: ${{ always() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
env:
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_COLOR: ${{ env.JOB_STATUS || 'failure' }}
SLACK_MESSAGE: "Full run finished with status ${{ env.JOB_STATUS || 'failure' }} \
(${{ env.ACTION_RUN_URL }})\n\
- start-runner-linux: ${{ needs.start-runner-linux.result || 'Did not run.'}}\n\n\
- refresh-notebooks: ${{ needs.refresh-notebooks.result || 'Did not run.' }}\n\n\
- stop-runner-linux: ${{ needs.stop-runner-linux.result || 'Did not run.'}}"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,11 @@ jupyter_execute:
poetry run env ./script/make_utils/jupyter.sh --run_all_notebooks
"$(MAKE)" finalize_nb

.PHONY: jupyter_execute_gpu # Execute all GPU jupyter notebooks sequentially and sanitize
jupyter_execute_gpu:
poetry run env ./script/make_utils/jupyter.sh --run_all_notebooks_gpu
"$(MAKE)" finalize_nb

.PHONY: jupyter_execute_one # Execute one jupyter notebook and sanitize
jupyter_execute_one:
poetry run env ./script/make_utils/jupyter.sh --run_notebook "$${NOTEBOOK}"
Expand Down
20 changes: 20 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ def pytest_addoption(parser):
help="To do longer tests.",
)

parser.addoption(
"--use_gpu",
action="store_true",
help="Force GPU compilation and execution in tests.",
)

parser.addoption(
"--no-flaky", action="store_true", default=False, help="Don't run known flaky tests."
)
Expand Down Expand Up @@ -275,6 +281,20 @@ def is_weekly_option(request):
return is_weekly


@pytest.fixture
def get_device_for_compilation(request):
"""Get the hardware device to compile circuits in tests."""

def get_device_for_compilation_impl(fhe_mode):
use_gpu = request.config.getoption("--use_gpu")
device_for_tests = "cuda" if use_gpu else "cpu"
if fhe_mode == "execute":
return device_for_tests
return "cpu"

return get_device_for_compilation_impl


# Method is not ideal as some MLIR can contain TLUs but not the associated graph
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/2381
def check_graph_input_has_no_tlu_impl(graph: CPGraph):
Expand Down
1 change: 1 addition & 0 deletions docs/SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
- [Production deployment](guides/client_server.md)
- [Hybrid models](guides/hybrid-models.md)
- [Serialization](guides/serialization.md)
- [GPU acceleration](guides/using_gpu.md)

## Tutorials

Expand Down
33 changes: 23 additions & 10 deletions docs/advanced_examples/ClientServer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Compilation device override, was 'cpu' -> change to 'cuda'\n"
]
},
{
"name": "stdout",
"output_type": "stream",
Expand All @@ -102,6 +109,8 @@
}
],
"source": [
"from concrete.compiler import check_gpu_available\n",
"\n",
"# Let's first get some data and train a model.\n",
"X, y = load_breast_cancer(return_X_y=True)\n",
"\n",
Expand All @@ -114,10 +123,14 @@
"if platform.system() == \"Darwin\":\n",
" n_estimators = 9\n",
"\n",
"\n",
"use_gpu_if_available = False\n",
"device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n",
"\n",
"# Train the model and compile it\n",
"model_dev = XGBClassifier(n_bits=2, n_estimators=n_estimators, max_depth=3)\n",
"model_dev.fit(X_model_owner, y_model_owner)\n",
"model_dev.compile(X_model_owner)\n",
"model_dev.compile(X_model_owner, device=device)\n",
"\n",
"print(\"Model trained and compiled.\")"
]
Expand Down Expand Up @@ -145,9 +158,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"total 72K\r\n",
"-rw-r--r-- 1 root root 59K Jun 20 17:23 client.zip\r\n",
"-rw-r--r-- 1 root root 8.6K Jun 20 17:23 server.zip\r\n"
"total 64K\r\n",
"-rw-rw-r-- 1 stoiana stoiana 59K août 31 14:34 client.zip\r\n",
"-rw-rw-r-- 1 stoiana stoiana 2,6K août 31 14:34 server.zip\r\n"
]
}
],
Expand Down Expand Up @@ -175,8 +188,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"total 12K\r\n",
"-rw-r--r-- 1 root root 8.6K Jun 20 17:23 server.zip\r\n"
"total 4,0K\r\n",
"-rw-rw-r-- 1 stoiana stoiana 2,6K août 31 14:34 server.zip\r\n"
]
}
],
Expand All @@ -196,7 +209,7 @@
"output_type": "stream",
"text": [
"total 60K\r\n",
"-rw-r--r-- 1 root root 59K Jun 20 17:23 client.zip\r\n"
"-rw-rw-r-- 1 stoiana stoiana 59K août 31 14:34 client.zip\r\n"
]
}
],
Expand All @@ -222,7 +235,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"KeySetCache: miss, regenerating /tmp/tmpywfsyoq6/12032042141691665494\n"
"KeySetCache: miss, regenerating /tmp/tmpqbqd0vq6/3497183917896914639\n"
]
}
],
Expand All @@ -243,7 +256,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Evaluation keys size: 29.86 MB\n"
"Evaluation keys size: 29.14 MB\n"
]
}
],
Expand All @@ -267,7 +280,7 @@
"output_type": "stream",
"text": [
"Encrypted data is 4.10 times larger than the clear data\n",
"The average execution time is 0.36 seconds per sample.\n"
"The average execution time is 0.55 seconds per sample.\n"
]
}
],
Expand Down
Loading

0 comments on commit 945aead

Please sign in to comment.