From 0d604095e4f4fe0a220b62086ad60bcca2974d84 Mon Sep 17 00:00:00 2001 From: Mohammad Wasil Date: Tue, 23 Apr 2024 20:48:29 +0200 Subject: [PATCH 1/6] parameterize context to differentiate between base and ml nb --- .../workflows/docker-build-test-upload.yml | 19 +++++++------------ .github/workflows/docker.yml | 7 +++++++ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/.github/workflows/docker-build-test-upload.yml b/.github/workflows/docker-build-test-upload.yml index 707d864..47ee9f1 100644 --- a/.github/workflows/docker-build-test-upload.yml +++ b/.github/workflows/docker-build-test-upload.yml @@ -24,6 +24,11 @@ on: required: false type: string default: default + context: + description: Path to Dockerfile location + required: false + type: string + default: default registry: description: Registry required: false @@ -106,22 +111,12 @@ jobs: # generate dockerfile cd base-gpu-notebook && bash generate_dockerfile.sh && cd .. - - name: Build base image 🛠 - if: contains(inputs.image, 'base-gpu-notebook') - id: build_base_image - uses: docker/build-push-action@v5 - with: - context: ${{ inputs.image }}/.build/${{ inputs.variant }}/ - push: ${{ inputs.push }} - tags: ${{ inputs.registry }}/${{ env.OWNER }}/${{ inputs.image }}:${{ inputs.variant }} - - name: Build image 🛠 - if: | - inputs.parent-image != '' || !contains(inputs.image, 'base-gpu-notebook') + if: inputs.parent-image != '' id: build_image uses: docker/build-push-action@v5 with: - context: ${{ inputs.image }}/${{ inputs.variant }}/ + context: ${{ inputs.context }} push: ${{ inputs.push }} tags: ${{ inputs.registry }}/${{ env.OWNER }}/${{ inputs.image }}:${{ inputs.variant }} diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index ae443de..f293d64 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -20,6 +20,7 @@ on: branches: - main - master + - ci-test paths: - ".github/workflows/docker.yml" - ".github/workflows/docker-build-test-upload.yml" @@ -56,6 +57,7 @@ jobs: parent-variant: cuda11-pytorch-2.2.2 image: base-gpu-notebook variant: cuda11-pytorch-2.2.2 + context: base-gpu-notebook/.build/cuda11-pytorch-2.2.2/ push: ${{ github.event_name == 'push' }} runs-on: ubuntu-latest @@ -68,6 +70,7 @@ jobs: parent-variant: cuda11-pytorch-2.2.2 image: ml-notebook variant: cuda11-pytorch-2.2.2 + context: ml-notebook/cuda11-pytorch-2.2.2/ push: ${{ github.event_name == 'push' }} runs-on: ubuntu-latest @@ -80,6 +83,7 @@ jobs: parent-variant: cuda11-pytorch-2.2.2 image: nlp-notebook variant: cuda11-pytorch-2.2.2 + context: nlp-notebook/cuda11-pytorch-2.2.2/ push: ${{ github.event_name == 'push' }} runs-on: ubuntu-latest @@ -103,6 +107,7 @@ jobs: parent-variant: cuda12-pytorch-2.2.2 image: base-gpu-notebook variant: cuda12-pytorch-2.2.2 + context: base-gpu-notebook/.build/cuda12-pytorch-2.2.2/ push: ${{ github.event_name == 'push' }} runs-on: ubuntu-latest @@ -115,6 +120,7 @@ jobs: parent-variant: cuda12-pytorch-2.2.2 image: ml-notebook variant: cuda12-pytorch-2.2.2 + context: ml-notebook/cuda12-pytorch-2.2.2/ push: ${{ github.event_name == 'push' }} runs-on: ubuntu-latest @@ -127,6 +133,7 @@ jobs: parent-variant: cuda11-pytorch-2.2.2 image: nlp-notebook variant: cuda12-pytorch-2.2.2 + context: nlp-notebook/cuda11-pytorch-2.2.2/ push: ${{ github.event_name == 'push' }} runs-on: ubuntu-latest From 0170a5e16b74e70f52578afc74ed5c08fe642307 Mon Sep 17 00:00:00 2001 From: Mohammad Wasil Date: Tue, 23 Apr 2024 21:11:09 +0200 Subject: [PATCH 2/6] fix registry --- .github/workflows/docker-build-test-upload.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-build-test-upload.yml b/.github/workflows/docker-build-test-upload.yml index 47ee9f1..5586d76 100644 --- a/.github/workflows/docker-build-test-upload.yml +++ b/.github/workflows/docker-build-test-upload.yml @@ -131,7 +131,7 @@ jobs: if: inputs.parent-image != '' run: | mkdir -p /tmp/a2s/images/ - docker save ${{ env.registry }}/${{ env.OWNER }}/${{ inputs.image }}:${{ inputs.variant }} | zstd > /tmp/a2s/images/${{ inputs.image }}--${{ inputs.variant }}.tar.zst + docker save ${{ inputs.registry }}/${{ env.OWNER }}/${{ inputs.image }}:${{ inputs.variant }} | zstd > /tmp/a2s/images/${{ inputs.image }}--${{ inputs.variant }}.tar.zst shell: bash - name: Upload image as artifact 💾 From 854f8a27ace75526aca9b8421fdd9915f7541b22 Mon Sep 17 00:00:00 2001 From: Mohammad Wasil Date: Tue, 23 Apr 2024 21:51:01 +0200 Subject: [PATCH 3/6] change default registry to quay.io --- build_and_publish.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/build_and_publish.sh b/build_and_publish.sh index 8ba2e21..ef891df 100644 --- a/build_and_publish.sh +++ b/build_and_publish.sh @@ -53,11 +53,10 @@ parse_args() { if [ -z "$CONTAINER_REGISTRY" ] then echo "Container registry is not set!. Using docker hub registry" - CONTAINER_REG_OWNER=ghcr.io/a2s-institute/docker-stacks + CONTAINER_REG_OWNER=quay.io/a2s-institute else echo "Using $CONTAINER_REGISTRY registry" - OWNER=a2s-institute/docker-stacks - CONTAINER_REG_OWNER=$CONTAINER_REGISTRY/$OWNER + CONTAINER_REG_OWNER=$CONTAINER_REGISTRY/a2s-institute fi echo "Container registry/owner = $CONTAINER_REG_OWNER" From 8f0675de4da0531dc14034c970de88ab84fe5472 Mon Sep 17 00:00:00 2001 From: Mohammad Wasil Date: Tue, 23 Apr 2024 21:55:51 +0200 Subject: [PATCH 4/6] fix edgetpu compiler issue --- ml-notebook/cuda11-pytorch-2.2.2/Dockerfile | 6 +++--- ml-notebook/cuda12-pytorch-2.2.2/Dockerfile | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ml-notebook/cuda11-pytorch-2.2.2/Dockerfile b/ml-notebook/cuda11-pytorch-2.2.2/Dockerfile index b8f7b1b..b98be57 100644 --- a/ml-notebook/cuda11-pytorch-2.2.2/Dockerfile +++ b/ml-notebook/cuda11-pytorch-2.2.2/Dockerfile @@ -7,10 +7,10 @@ LABEL maintainer="Mohammad Wasil " USER root # Install apt packages - +RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - && \ + echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list + RUN apt update -y && \ - curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ - "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list && \ apt install -y edgetpu-compiler && \ apt install -y libxkbcommon0 libxkbcommon-x11-0 && \ apt install -y build-essential && \ diff --git a/ml-notebook/cuda12-pytorch-2.2.2/Dockerfile b/ml-notebook/cuda12-pytorch-2.2.2/Dockerfile index b921822..c6f891c 100644 --- a/ml-notebook/cuda12-pytorch-2.2.2/Dockerfile +++ b/ml-notebook/cuda12-pytorch-2.2.2/Dockerfile @@ -7,9 +7,10 @@ LABEL maintainer="Mohammad Wasil " USER root # Install apt packages +RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - && \ + echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list + RUN apt update -y && \ - curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ - echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list && \ apt install -y edgetpu-compiler && \ apt install -y libxkbcommon0 libxkbcommon-x11-0 && \ apt install -y build-essential && \ From b3772b38317ed36bb0302232b79b45c83478d3ed Mon Sep 17 00:00:00 2001 From: Mohammad Wasil Date: Tue, 23 Apr 2024 22:31:34 +0200 Subject: [PATCH 5/6] update versions --- README.md | 65 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 9ca0e7d..bab1d1e 100644 --- a/README.md +++ b/README.md @@ -1,44 +1,67 @@ -[![Release cuda11.3.1-ubuntu20.04](https://github.com/a2s-institute/docker-stacks/actions/workflows/cuda11.3.1-ubuntu20.04.yml/badge.svg)](https://github.com/a2s-institute/docker-stacks/actions?workflow=cuda11.3.1-ubuntu20.04) -[![Release cuda11.8.0-ubuntu22.04](https://github.com/a2s-institute/docker-stacks/actions/workflows/cuda11.8.0-ubuntu22.04.yml/badge.svg)](https://github.com/a2s-institute/docker-stacks/actions?workflow=cuda11.8.0-ubuntu22.04) -[![Docker Repository on Quay](https://quay.io/repository/a2s-institute/docker-stacks/gpu-notebook/status "Docker Repository on Quay")](https://quay.io/repository/a2s-institute/docker-stacks/gpu-notebook) +# A2S Institute Docker Images -# a2s-institute docker images +Our stacks provide GPU-enabled Jupyter Notebook in Docker containers, which can also run on Kubernetes. The images are based on [Jupyter docker-stacks jupyter/pytorch-notebook](https://github.com/jupyter/docker-stacks/tree/main/images/pytorch-notebook). All images are published on our [ghcr.io](https://github.com/orgs/a2s-institute/packages) and [quay.io](https://quay.io/user/a2s-institute/). -Our stacks provide GPU-enabled Jupyter Notebook in Docker containers, which can also be run on Kubernetes. The image is based on [released cuda version](https://hub.docker.com/r/nvidia/cuda/tags?page=1&name=12.) on docker hub and the Jupyter stacks are based on [jupyter/docker-stacks](https://github.com/jupyter/docker-stacks/). All images are published on our [github registry](https://github.com/orgs/a2s-institute/packages). +The stacks contain several machine learning packages such as TensorFlow, PyTorch, scikit-learn, and other machine learning tools. All images also include VSCode and xfce4 desktop environment. -The stacks contain several machine learning packages such as TensorFlow, PyTorch, scikit-learn, and other machine learning tools. +## Docker stack structure +* [gpu-base-notebook](https://github.com/a2s-institute/docker-stacks/tree/master/base-gpu-notebook): contains Jupyter related libraries and also includes different cuda and pytorch versions. It also has VSCode and xfce4 desktop environment. + * [ml-notebook](https://github.com/a2s-institute/docker-stacks/tree/master/ml-notebook): depends on `gpu-base-notebook` and includes several machine learning libaries such as TensorfLow, Keras, scipy, opencv, etc. + * [nlp-notebook](https://github.com/a2s-institute/docker-stacks/tree/master/nlp-notebook): depends on `ml-notebook` and includes NLP libraries such as spaCy, NLTK, llama-cpp-python and wikipedia-api. -## Building and running gpu-notebook in a local Docker container +## Avilable versions +* `gpu-base-notebook:cuda11-pytorch-2.2.2` +* `gpu-base-notebook:cuda12-pytorch-2.2.2` +* `ml-notebook:cuda11-pytorch-2.2.2` +* `ml-notebook:cuda12-pytorch-2.2.2` +* `nlp-notebook:cuda11-pytorch-2.2.2` +* `nlp-notebook:cuda12-pytorch-2.2.2` + +
+ Older images + +- `ghcr.io/a2s-institute/docker-stacks/gpu-notebook:cuda11.3.1-ubuntu22.04` (no vscode and xfce desktop) +- `ghcr.io/a2s-institute/docker-stacks/gpu-notebook:cuda11.8.0-ubuntu22.04` (no vscode and xfce desktop) +- `ghcr.io/a2s-institute/docker-stacks/gpu-notebook:cuda12.1.0-ubuntu22.04` (no vscode and xfce desktop) +- `ghcr.io/a2s-institute/docker-stacks/gpu-notebook:cuda12.1.0-ubuntu22.04` (no vscode and xfce desktop) + +
+ +## Building and running A2S images locally The base image contains several packages for deep learning projects with NVidia GPU support. * Build notebook image with gpu support ``` - bash build_and_publish.sh --registry ghcr.io --publish "" --cuda-version cuda11.8.0-ubuntu22.04 - ``` + # cuda11 and pytorch 2.2.2 + bash build_and_publish.sh --registry ghcr.io --publish "" \ + --image gpu-base-notebook --tag cuda11-pytorch-2.2.2 - You can build this image using different cuda versions available [here](https://hub.docker.com/r/nvidia/cuda/tags). + # cuda12 and pytorch 2.2.2 + bash build_and_publish.sh --registry ghcr.io --publish "" \ + --image gpu-base-notebook --tag cuda12-pytorch-2.2.2 + ``` * Run the image locally ``` - docker run --gpus all --name gpu-notebook -it --rm -d -p 8880:8888 ghcr.io/b-it-bots/docker/gpu-notebook:cuda11.8.0-ubuntu22.04 + # with GPU + docker run --gpus all --name ml-notebook -it --rm -d -p 8888:8888 \ + quay.io/ml-notebook:cuda12-pytorch-2.2.2 + + # without GPU + docker run --name ml-notebook -it --rm -d -p 8888:8888 \ + quay.io/ml-notebook:cuda12-pytorch-2.2.2 ``` -* Login to the container +* Check Jupyter Notebook token via log and open the link ``` - docker exec -ti gpu-notebook bash + docker logs --follow ml-notebook - # check nvidia - nvidia-smi ``` -## Available images - -* `cuda11.3.1-ubuntu20.04` (python=3.10, pytorch=1.12.1) -* `cuda11.8.9-ubuntu22.04` (python=3.11, pytorch=2.0.0) - ## Monitoring You can monitor the GPU usage using nvtop -![nvtop gpu monitoring](figures/nvtop.png) +nvtop gpu monitoring + From 068711b58dc451fff91fa2841a5bdc141f201a05 Mon Sep 17 00:00:00 2001 From: Mohammad Wasil Date: Tue, 23 Apr 2024 22:36:36 +0200 Subject: [PATCH 6/6] fix cuda version for nlp image --- .github/workflows/docker.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index f293d64..d83b34f 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -130,10 +130,10 @@ jobs: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: ml-notebook - parent-variant: cuda11-pytorch-2.2.2 + parent-variant: cuda12-pytorch-2.2.2 image: nlp-notebook variant: cuda12-pytorch-2.2.2 - context: nlp-notebook/cuda11-pytorch-2.2.2/ + context: nlp-notebook/cuda12-pytorch-2.2.2/ push: ${{ github.event_name == 'push' }} runs-on: ubuntu-latest