Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge multiple Dockerfiles into a single one #2167

Merged
merged 38 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
89c7251
Merge multiple Dockerfiles into a single one
SimonYansenZhao Sep 16, 2024
877066b
Correct pacakge path
SimonYansenZhao Sep 16, 2024
50898de
Set DEBIAN_FRONTEND=noninteractive
SimonYansenZhao Sep 16, 2024
5ee0343
set ENV DEBIAN_FRONTEND noninteractive
SimonYansenZhao Sep 16, 2024
7a40c2f
Debugging
SimonYansenZhao Sep 16, 2024
deb6afc
Correct gpu base image digest
SimonYansenZhao Sep 16, 2024
1688d89
Debugging
SimonYansenZhao Sep 16, 2024
b494c05
Debugging
SimonYansenZhao Sep 17, 2024
3f59bb3
Move ARGs to inner scope
SimonYansenZhao Sep 20, 2024
d0c64f1
Move SHELL to the last stage
SimonYansenZhao Sep 20, 2024
03c891f
Add DEBIAN_FRONTEND=noninteractive
SimonYansenZhao Sep 20, 2024
e56221c
Redeclare ARGs
SimonYansenZhao Sep 20, 2024
424bd1a
Remove options for venv and virtualenv
SimonYansenZhao Sep 20, 2024
4df5e5a
Source .bashrc to activate conda env
SimonYansenZhao Sep 20, 2024
55206ae
Correct conda path
SimonYansenZhao Sep 26, 2024
47a2b6e
Activate conda env
SimonYansenZhao Sep 26, 2024
35a0b21
Init conda
SimonYansenZhao Sep 26, 2024
2be5324
Correct extras
SimonYansenZhao Sep 26, 2024
ea39fc0
Set interactive bash
SimonYansenZhao Sep 26, 2024
a5325e2
Source .bashrc
SimonYansenZhao Sep 26, 2024
393c1a0
Activate conda env by setting system env vars
SimonYansenZhao Sep 26, 2024
2aebacf
Remove env vars defined in RUNs
SimonYansenZhao Sep 26, 2024
3d8560e
Correct conda activate
SimonYansenZhao Sep 27, 2024
040ba1b
Correct dev container setup
SimonYansenZhao Sep 28, 2024
06accbf
Merge branch 'staging' into simonz/dockerfile
SimonYansenZhao Nov 12, 2024
3d509e0
Update docs
SimonYansenZhao Nov 13, 2024
cfccc64
Correct ARGs in multiple stages
SimonYansenZhao Nov 13, 2024
a027502
Remove CONDA_PREFIX
SimonYansenZhao Nov 13, 2024
abcaf42
Update SETUP.md
SimonYansenZhao Nov 13, 2024
4353c35
Correct JAVA_HOME
SimonYansenZhao Nov 13, 2024
651fe6a
Update SETUP.md
SimonYansenZhao Nov 14, 2024
d5b8c5d
Update SETUP.md
SimonYansenZhao Nov 14, 2024
d868f56
Update SETUP.md
SimonYansenZhao Nov 14, 2024
f808391
Update SETUP.md
SimonYansenZhao Nov 14, 2024
f903aaf
Update SETUP.md
SimonYansenZhao Nov 14, 2024
df461b5
Update SETUP.md
SimonYansenZhao Nov 14, 2024
367c877
Update tests/README.md
SimonYansenZhao Nov 14, 2024
720b09e
Update SETUP.md
SimonYansenZhao Nov 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 29 additions & 25 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,22 +1,18 @@
{
"name": "Recommenders",
// Version list: https://github.com/devcontainers/images/tree/main/src/base-ubuntu
// Includes: curl, wget, ca-certificates, git, Oh My Zsh!,
"image": "mcr.microsoft.com/devcontainers/base:ubuntu-24.04",
"hostRequirements": {
"cpus": 4,
"memory": "16gb",
"storage": "32gb"
},
"features": {
// https://github.com/devcontainers/features/blob/main/src/anaconda/devcontainer-feature.json
"ghcr.io/devcontainers/features/anaconda:1": {
"version": "2024.06-1"
"build": {
"dockerfile": "../tools/docker/Dockerfile",
"context": "..",
"target": "deps",
"args": {
"COMPUTE": "cpu",
"PYTHON_VERSION": "3.11"
}
},
"customizations": {
"vscode": {
// Set *default* container specific settings.json values on container create.
// Set default container specific settings.json values on container
// create
"settings": {
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
Expand All @@ -27,24 +23,32 @@
},
"isort.args": ["--profile", "black"],
"python.analysis.autoImportCompletions": true,
"python.defaultInterpreterPath": "/usr/local/conda/envs/Recommenders/bin/python",
// Conda env name *must* align with the one in Dockerfle
"python.defaultInterpreterPath": "/root/conda/envs/Recommenders/bin/python",
"python.testing.pytestEnabled": true,
// set the directory where all tests are
// Test directory
"python.testing.pytestArgs": ["tests"]
},
// Add the IDs of extensions you want installed when the container is created.
// VS Code extensions to install on container create
"extensions": [
"ms-python.black-formatter", // https://marketplace.visualstudio.com/items?itemName=ms-python.black-formatter
"ms-python.isort", // https://marketplace.visualstudio.com/items?itemName=ms-python.isort
"ms-python.mypy-type-checker", // https://marketplace.visualstudio.com/items?itemName=ms-python.mypy-type-checker
"ms-python.pylint", // https://marketplace.visualstudio.com/items?itemName=ms-python.pylint
"ms-python.python", // https://marketplace.visualstudio.com/items?itemName=ms-python.python
"ms-toolsai.datawrangler", // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.datawrangler
"ms-toolsai.jupyter" // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter
// https://marketplace.visualstudio.com/items?itemName=ms-python.black-formatter
"ms-python.black-formatter",
// https://marketplace.visualstudio.com/items?itemName=ms-python.isort
"ms-python.isort",
// https://marketplace.visualstudio.com/items?itemName=ms-python.mypy-type-checker
"ms-python.mypy-type-checker",
// https://marketplace.visualstudio.com/items?itemName=ms-python.pylint
"ms-python.pylint",
// https://marketplace.visualstudio.com/items?itemName=ms-python.python
"ms-python.python",
// https://marketplace.visualstudio.com/items?itemName=ms-toolsai.datawrangler
SimonYansenZhao marked this conversation as resolved.
Show resolved Hide resolved
"ms-toolsai.datawrangler",
// https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter
"ms-toolsai.jupyter"
]
}
},

// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "conda create -n Recommenders -c conda-forge -y python=3.10 openjdk=21 pip && conda init bash && bash -c -i 'conda activate Recommenders && pip install -e .[dev,spark]' && conda config --set auto_activate_base false"
// Install Recommenders in development mode after container create
"postCreateCommand": "bash -i -c 'conda activate Recommenders && conda install -c conda-forge -y openjdk=21 && pip install -e .[dev,spark]'"
}
85 changes: 85 additions & 0 deletions SETUP.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,91 @@ git checkout staging
pip install -e .[all]
```

We also provides [devcontainer.json](./.devcontainer/devcontainer.json)
SimonYansenZhao marked this conversation as resolved.
Show resolved Hide resolved
and [Dockerfile](./tools/docker/Dockerfile) for developers to
facilitate the development on
[Dev Containers with VS Code](https://code.visualstudio.com/docs/devcontainers/containers)
and [GitHub Codespaces](https://github.com/features/codespaces).

<details>
<summary><strong><em>VS Code Dev Containers</em></strong></summary>

The typical scenario using Docker containers for development is as
follows. Say, we want to develop applications for a specific
environment, so
1. we create a contaienr with the dependencies required,
1. and mount the folder containing the code to the container,
1. then code parsing, debugging and testing are all performed against
the contaienr.
SimonYansenZhao marked this conversation as resolved.
Show resolved Hide resolved
This workflow seperates the development environment from your local
environment, so that your local environment won't be affected. The
container used here for this end is called Dev Container in the
VS Code Dev Containers extension. And the extension eases this
development workflow with Docker containers automatically without
pains.
SimonYansenZhao marked this conversation as resolved.
Show resolved Hide resolved

To use VS Code Dev Containers, your local machine must have the
following applicatioins installed:
* [Docker](https://docs.docker.com/get-started/get-docker/)
* [VS Code Remote Development Extension Pack](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.vscode-remote-extensionpack)

Then
* When you open your local Recommenders folder in VS Code, it will
detect [devcontainer.json](./.devcontainer/devcontainer.json), and
prompt you to **Reopen in Container**. If you'd like to reopen,
it will create a container with the required environment described
in devcontainer.json, install a VS Code server in the container,
SimonYansenZhao marked this conversation as resolved.
Show resolved Hide resolved
and mount the folder into the container.
+ If you don't see the prompt, you can use the command
**Dev Containers: Reopen in Container**
* If you don't have a local clone of Recommenders, you can also use
the command **Dev Containers: Clone Repository in Container Volume**,
and type in a branch/PR URL of Recommenders you'd like to develop
on, such as https://github.com/recommenders-team/recommenders,
https://github.com/recommenders-team/recommenders/tree/staging, or
https://github.com/recommenders-team/recommenders/pull/2098. VS
Code will create a container with the environment described in
devcontainer.json, and clone the specified branch of Recommenders
SimonYansenZhao marked this conversation as resolved.
Show resolved Hide resolved
into the container.

Once everything is set up, VS Code will act as a client to the server
in the container, and all subsequent operations on VS Code will be
performed against the container.

</details>

<details>
<summary><strong><em>GitHub Codespaces</em></strong></summary>

GitHub Codespaces also uses devcontainer.json and Dockerfile in the
SimonYansenZhao marked this conversation as resolved.
Show resolved Hide resolved
repo to create the environment on a VM for you to develop on the Web
VS Code. To use the GitHub Codespaces on Recommenders, you can go to
[Recommenders](https://github.com/recommenders-team/recommenders)
$\to$ switch to the branch of interest $\to$ Code $\to$ Codespaces
$\to$ Create codespaces on the branch.

</details>

<details>
<summary><strong><em>devcontainer.json & Dockerfile</em></strong></summary>

[devcontainer.json](./.devcontainer/devcontainer.json) describes:
* the Dockerfile to use with configurable build arguments, such as
`COMPUTE` and `PYTHON_VERSION`.
* settings on VS Code server, such as Python interpreter path in the
container, Python formatter.
* extensions on VS Code server, such as black-formatter, pylint.
* how to create the Conda environment for Recommenders in
`postCreateCommand`

[Dockerfile](./tools/docker/Dockerfile) serves 3 places:
SimonYansenZhao marked this conversation as resolved.
Show resolved Hide resolved
* Dev containers on VS Code and GitHub Codespaces
* [Testing workflows on AzureML](./tests/README.md)
* [Jupyter notebook examples on Docker](./tools/docker/README.md)

</details>


## Test Environments

Depending on the type of recommender system and the notebook that needs to be run, there are different computational requirements.
Expand Down
23 changes: 20 additions & 3 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,26 @@ GitHub workflows `azureml-unit-tests.yml`, `azureml-cpu-nightly.yml`, `azureml-g

There are three scripts used with each workflow, all of them are located in [ci/azureml_tests](./ci/azureml_tests/):

* `submit_groupwise_azureml_pytest.py`: this script uses parameters in the workflow yml to set up the AzureML environment for testing using the AzureML SDK.
* `run_groupwise_pytest.py`: this script uses pytest to run the tests of the libraries and notebooks. This script runs in an AzureML workspace with the environment created by the script above.
* `test_groups.py`: this script defines the groups of tests. If the tests are part of the unit tests, the total compute time of each group should be less than 15min. If the tests are part of the nightly builds, the total time of each group should be less than 35min.
* [`submit_groupwise_azureml_pytest.py`](./ci/azureml_tests/submit_groupwise_azureml_pytest.py):
this script uses parameters in the workflow yml to set up the
AzureML environment for testing using the AzureML SDK.
* [`run_groupwise_pytest.py`](./ci/azureml_tests/run_groupwise_pytest.pyy):
this script uses pytest to run the tests of the libraries and
notebooks. This script runs in an AzureML workspace with the
environment created by the script above.
* [`aml_utils.py`](./ci/azureml_tests/aml_utils.py): this script
defines several utility functions using
[the AzureML Python SDK v2](https://learn.microsoft.com/en-us/azure/machine-learning/concept-v2?view=azureml-api-2).
These fuctions are used by scripts above to set up the compute and
SimonYansenZhao marked this conversation as resolved.
Show resolved Hide resolved
the environment for the tests on AzureML. For example, the
environment with all dependencies of Recommenders is created by the
function `get_or_create_environment` via the [Dockerfile](../tools/docker/Dockerfile).
More details on Docker support can be found at [tools/docker/README.md](../tools/docker/README.md).
* [`test_groups.py`](./ci/azureml_tests/test_groups.py): this script
defines the groups of tests. If the tests are part of the unit
tests, the total compute time of each group should be less than
15min. If the tests are part of the nightly builds, the total time
of each group should be less than 35min.

## How to contribute tests to the repository

Expand Down
110 changes: 34 additions & 76 deletions tests/ci/azureml_tests/aml_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
* https://learn.microsoft.com/en-us/azure/machine-learning/reference-migrate-sdk-v1-mlflow-tracking?view=azureml-api-2&tabs=aml%2Ccli%2Cmlflow
"""
import pathlib
import tempfile
import re

from azure.ai.ml import MLClient, command
from azure.ai.ml.entities import AmlCompute, BuildContext, Environment, Workspace
from azure.ai.ml.exceptions import JobException
from azure.core.exceptions import ResourceExistsError
from azure.identity import DefaultAzureCredential


def get_client(subscription_id, resource_group, workspace_name):
"""
Get the client with specified AzureML workspace, or create one if not existing.
Expand Down Expand Up @@ -61,9 +62,8 @@ def get_or_create_environment(
environment_name,
use_gpu,
use_spark,
conda_pkg_jdk,
conda_openjdk_version,
python_version,
commit_sha,
):
"""
AzureML requires the run environment to be setup prior to submission.
Expand All @@ -77,81 +77,39 @@ def get_or_create_environment(
added to the conda environment, else False
use_spark (bool): True if PySpark packages should be
added to the conda environment, else False
conda_pkg_jdk (str): "openjdk=8" by default
python_version (str): python version, such as "3.9"
commit_sha (str): the commit that triggers the workflow
conda_openjdk_version (str): "21" by default
python_version (str): python version, such as "3.11"
"""
conda_env_name = "reco"
conda_env_yml = "environment.yml"
condafile = fr"""
name: {conda_env_name}
channels:
- conda-forge
dependencies:
- python={python_version}
- {conda_pkg_jdk}
- pip
- pip:
- recommenders[dev{",gpu" if use_gpu else ""}{",spark" if use_spark else ""}]@git+https://github.com/recommenders-team/recommenders.git@{commit_sha}
"""
# See https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04
image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04"
# See https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04
dockerfile = fr"""# syntax=docker/dockerfile:1
FROM nvcr.io/nvidia/cuda:12.5.1-devel-ubuntu22.04
SHELL ["/bin/bash", "-c"]
USER root:root
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get update && \
apt-get install -y wget git-all && \
apt-get clean -y && \
rm -rf /var/lib/apt/lists/*

# Install Conda
ENV CONDA_PREFIX /opt/miniconda
RUN wget -qO /tmp/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py311_24.5.0-0-Linux-x86_64.sh && \
bash /tmp/miniconda.sh -bf -p ${{CONDA_PREFIX}} && \
${{CONDA_PREFIX}}/bin/conda update --all -c conda-forge -y && \
${{CONDA_PREFIX}}/bin/conda clean -ay && \
rm -rf ${{CONDA_PREFIX}}/pkgs && \
rm /tmp/miniconda.sh && \
find / -type d -name __pycache__ | xargs rm -rf

# Create Conda environment
COPY {conda_env_yml} /tmp/{conda_env_yml}
RUN ${{CONDA_PREFIX}}/bin/conda env create -f /tmp/{conda_env_yml}

# Activate Conda environment
ENV CONDA_DEFAULT_ENV {conda_env_name}
ENV CONDA_PREFIX ${{CONDA_PREFIX}}/envs/${{CONDA_DEFAULT_ENV}}
ENV PATH="${{CONDA_PREFIX}}/bin:${{PATH}}" LD_LIBRARY_PATH="${{CONDA_PREFIX}}/lib:$LD_LIBRARY_PATH"
"""

with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = pathlib.Path(tmpdir)
dockerfile_path = tmpdir / "Dockerfile"
condafile_path = tmpdir / conda_env_yml
build = BuildContext(path=tmpdir, dockerfile_path=dockerfile_path.name)

with open(dockerfile_path, "w") as file:
file.write(dockerfile)
with open(condafile_path, "w") as file:
file.write(condafile)

try:
client.environments.create_or_update(
Environment(
name=environment_name,
image=None if use_gpu else image,
build=build if use_gpu else None,
conda_file=None if use_gpu else condafile_path,
)
compute = "gpu" if use_gpu else "cpu"
extras = (
"[dev" + (",gpu" if use_gpu else "") + (",spark" if use_spark else "") + "]"
)
dockerfile = pathlib.Path("tools/docker/Dockerfile")

# Docker's --build-args is not supported by AzureML Python SDK v2 as shown
# in [the issue #33902](https://github.com/Azure/azure-sdk-for-python/issues/33902)
# so the build args are configured by regex substituion
text = dockerfile.read_text()
text = re.sub(r"(ARG\sCOMPUTE=).*", rf'\1"{compute}"', text)
text = re.sub(r"(ARG\sEXTRAS=).*", rf'\1"{extras}"', text)
text = re.sub(r"(ARG\sGIT_REF=).*", r'\1""', text)
text = re.sub(r"(ARG\sJDK_VERSION=).*", rf'\1"{conda_openjdk_version}"', text)
text = re.sub(r"(ARG\sPYTHON_VERSION=).*", rf'\1"{python_version}"', text)
dockerfile.write_text(text)

try:
client.environments.create_or_update(
Environment(
name=environment_name,
build=BuildContext(
# Set path for Docker to access to Recommenders root
path=".",
dockerfile_path=dockerfile,
),
)
except ResourceExistsError:
pass
)
except ResourceExistsError:
pass


def run_tests(
Expand Down
Loading