diff --git a/changes/227.added b/changes/227.added new file mode 100644 index 00000000..5b64c1be --- /dev/null +++ b/changes/227.added @@ -0,0 +1 @@ +Add Docker GPU base image, activated through the `Dependency` object with the variable `use_gpu=True`. The Docker image used is `nvidia/cuda:11.8.0-runtime-ubuntu22.04`. diff --git a/changes/242.changed b/changes/242.changed new file mode 100644 index 00000000..424e2bd4 --- /dev/null +++ b/changes/242.changed @@ -0,0 +1 @@ +Remove packages named `build-essential` and `*-dev` after building dependencies to decrease CVE diff --git a/substrafl/algorithms/pytorch/torch_base_algo.py b/substrafl/algorithms/pytorch/torch_base_algo.py index 5752e1c7..d9fcdaa2 100644 --- a/substrafl/algorithms/pytorch/torch_base_algo.py +++ b/substrafl/algorithms/pytorch/torch_base_algo.py @@ -63,8 +63,7 @@ def __init__( np.random.seed(seed) torch.manual_seed(seed) - self._device = self._get_torch_device(disable_gpu=disable_gpu) - + self.disable_gpu = disable_gpu self._model = model.to(self._device) self._optimizer = optimizer # Move the optimizer to GPU if needed @@ -212,18 +211,16 @@ def _local_train( if self._scheduler is not None: self._scheduler.step() - def _get_torch_device(self, disable_gpu: bool) -> torch.device: + @property + def _device(self) -> torch.device: """Get the torch device, CPU or GPU, depending on availability and user input. - Args: - disable_gpu (bool): whether to use GPUs if available or not. - Returns: torch.device: Torch device """ device = torch.device("cpu") - if not disable_gpu and torch.cuda.is_available(): + if not self.disable_gpu and torch.cuda.is_available(): device = torch.device("cuda") return device @@ -249,8 +246,11 @@ def _update_from_checkpoint(self, path: Path) -> dict: return checkpoint """ assert path.is_file(), f'Cannot load the model - does not exist {list(path.parent.glob("*"))}' - checkpoint = torch.load(path, map_location=self._device) + checkpoint = torch.load(path) # TO CHANGE + self.disable_gpu = checkpoint.pop("disable_gpu") + self._model.load_state_dict(checkpoint.pop("model_state_dict")) + self._model.to(self._device) if self._optimizer is not None: self._optimizer.load_state_dict(checkpoint.pop("optimizer_state_dict")) @@ -307,6 +307,9 @@ def _get_state_to_save(self) -> dict: checkpoint = { "model_state_dict": self._model.state_dict(), "index_generator": self._index_generator, + "disable_gpu": self.disable_gpu, + "random_rng_state": random.getstate(), + "numpy_rng_state": np.random.get_state(), } if self._optimizer is not None: checkpoint["optimizer_state_dict"] = self._optimizer.state_dict() @@ -314,10 +317,6 @@ def _get_state_to_save(self) -> dict: if self._scheduler is not None: checkpoint["scheduler_state_dict"] = self._scheduler.state_dict() - checkpoint["random_rng_state"] = random.getstate() - - checkpoint["numpy_rng_state"] = np.random.get_state() - if self._device == torch.device("cpu"): checkpoint["torch_rng_state"] = torch.get_rng_state() else: diff --git a/substrafl/dependency/schemas.py b/substrafl/dependency/schemas.py index cd686fb9..9bef2c2d 100644 --- a/substrafl/dependency/schemas.py +++ b/substrafl/dependency/schemas.py @@ -55,6 +55,8 @@ class Dependency(BaseModel): force_included_paths (List[pathlib.Path]): Force include files otherwise excluded by `excluded_paths` and `excluded_regex` Default to [] + use_gpu (bool): Use nvidia docker image with cuda driver. Allow docker image to access GPU. The docker image + will be longer to build. It is recommended to pass ``use_gpu`` to ``True`` only if you want to use GPUs. """ editable_mode: bool = False @@ -65,6 +67,7 @@ class Dependency(BaseModel): excluded_paths: List[Path] = Field(default_factory=list) excluded_regex: List[str] = Field(default_factory=list) force_included_paths: List[Path] = Field(default_factory=list) + use_gpu: bool = False _wheels: List[Path] = [] _local_paths: List[Path] = [] _cache_directory: Optional[Path] = None diff --git a/substrafl/remote/operations.py b/substrafl/remote/operations.py index 5e2daccc..f049ea2d 100644 --- a/substrafl/remote/operations.py +++ b/substrafl/remote/operations.py @@ -1,6 +1,7 @@ """Dataclasses describing the operations to execute on the remote. """ + from dataclasses import dataclass from typing import Any from typing import List diff --git a/substrafl/remote/register/register.py b/substrafl/remote/register/register.py index 61b60b83..dd1f51cf 100644 --- a/substrafl/remote/register/register.py +++ b/substrafl/remote/register/register.py @@ -29,21 +29,38 @@ MINIMAL_PYTHON_VERSION = 9 # 3.9 MAXIMAL_PYTHON_VERSION = 12 # 3.12 -_DEFAULT_BASE_DOCKER_IMAGE = "python:{python_version}-slim" - -DOCKERFILE_TEMPLATE = """ -FROM {docker_image} +_CPU_BASE_IMAGE = """ +FROM python:{python_version}-slim # update image -RUN apt update -y +RUN apt-get update -y +""" + +_GPU_BASE_IMAGE = """ +FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04 +# update image & install Python +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update -y\ + && apt-get install -y software-properties-common\ + && add-apt-repository -y ppa:deadsnakes/ppa\ + && apt-get -y upgrade\ + && apt-get install -y python{python_version} python{python_version}-venv python3-pip\ + && apt-get clean\ + && rm -rf /var/lib/apt/lists/* + +""" + +DOCKERFILE_TEMPLATE = """{base_docker_image} # create a non-root user RUN addgroup --gid 1001 group RUN adduser --disabled-password --gecos "" --uid 1001 --gid 1001 --home /home/user user -ENV PYTHONPATH /home/user WORKDIR /home/user USER user +RUN python{python_version} -m venv /home/user/venv +ENV PATH="/home/user/venv/bin:$PATH" VIRTUAL_ENV="/home/user/venv" + # install dependencies RUN python{python_version} -m pip install -U pip @@ -56,6 +73,10 @@ # Install requirements RUN python{python_version} -m pip install --no-cache-dir -r requirements.txt +USER root +RUN apt-get purge -y --auto-remove build-essential *-dev +USER user + # Copy all other files COPY function.py . COPY {internal_dir}/cls_cloudpickle {internal_dir}/ @@ -110,15 +131,19 @@ def _check_python_version(python_major_minor: str) -> None: ) -def _get_base_docker_image(python_major_minor: str, editable_mode: bool) -> str: +def _get_base_docker_image(python_major_minor: str, use_gpu: bool) -> str: """Get the base Docker image for the Dockerfile""" - _check_python_version(python_major_minor) - substratools_image = _DEFAULT_BASE_DOCKER_IMAGE.format( - python_version=python_major_minor, - ) + if use_gpu: + base_docker_image = _GPU_BASE_IMAGE.format( + python_version=python_major_minor, + ) + else: + base_docker_image = _CPU_BASE_IMAGE.format( + python_version=python_major_minor, + ) - return substratools_image + return base_docker_image def _generate_copy_local_files(local_files: typing.List[Path]) -> str: @@ -132,10 +157,11 @@ def _create_dockerfile(install_libraries: bool, dependencies: Dependency, operat # Cloudpickle will crash if we don't deserialize with the same major.minor python_major_minor = ".".join(python_version().split(".")[:2]) + # check that the Python version is supported + _check_python_version(python_major_minor) + # Get the base Docker image - substratools_image = _get_base_docker_image( - python_major_minor=python_major_minor, editable_mode=dependencies.editable_mode - ) + base_docker_image = _get_base_docker_image(python_major_minor=python_major_minor, use_gpu=dependencies.use_gpu) # Build Substrafl, Substra and Substratools, and local dependencies wheels if necessary if install_libraries: # generate the copy wheel command @@ -148,7 +174,7 @@ def _create_dockerfile(install_libraries: bool, dependencies: Dependency, operat copy_local_code_cmd = _generate_copy_local_files(dependencies._local_paths) return DOCKERFILE_TEMPLATE.format( - docker_image=substratools_image, + base_docker_image=base_docker_image, python_version=python_major_minor, copy_wheels=copy_wheels_cmd, copy_local_code=copy_local_code_cmd, diff --git a/substrafl/remote/serializers/__init__.py b/substrafl/remote/serializers/__init__.py index 7c49c69b..bb4d6d92 100644 --- a/substrafl/remote/serializers/__init__.py +++ b/substrafl/remote/serializers/__init__.py @@ -1,6 +1,7 @@ """ Serializers to save the user code and wrap it in the Substra algo code. """ + from substrafl.remote.serializers.pickle_serializer import PickleSerializer from substrafl.remote.serializers.serializer import Serializer diff --git a/substrafl/strategies/schemas.py b/substrafl/strategies/schemas.py index 505d46ad..94b68aba 100644 --- a/substrafl/strategies/schemas.py +++ b/substrafl/strategies/schemas.py @@ -1,5 +1,6 @@ """Schemas used in the strategies. """ + from enum import Enum from typing import List diff --git a/tests/dependency/installable_library/setup.py b/tests/dependency/installable_library/setup.py index ec210da8..d665756f 100644 --- a/tests/dependency/installable_library/setup.py +++ b/tests/dependency/installable_library/setup.py @@ -1,4 +1,5 @@ """Packaging settings.""" + from setuptools import setup setup( diff --git a/tests/dependency/installable_library2/setup.py b/tests/dependency/installable_library2/setup.py index 0c00ca26..1ff3f8d9 100644 --- a/tests/dependency/installable_library2/setup.py +++ b/tests/dependency/installable_library2/setup.py @@ -1,4 +1,5 @@ """Packaging settings.""" + from setuptools import setup setup( diff --git a/tests/installable_library/setup.py b/tests/installable_library/setup.py index 95b65e9c..c003a2be 100644 --- a/tests/installable_library/setup.py +++ b/tests/installable_library/setup.py @@ -1,4 +1,5 @@ """Packaging settings.""" + from setuptools import find_packages from setuptools import setup diff --git a/tests/remote/register/test_register.py b/tests/remote/register/test_register.py index 8c34f946..56c2bd3c 100644 --- a/tests/remote/register/test_register.py +++ b/tests/remote/register/test_register.py @@ -12,6 +12,7 @@ from substrafl.remote.decorators import remote_data from substrafl.remote.register import register from substrafl.remote.register.register import _create_dockerfile +from substrafl.remote.register.register import _get_base_docker_image class RemoteClass: @@ -37,14 +38,41 @@ def test_check_python_version(version): register._check_python_version(version) -@pytest.mark.parametrize("version", ["3.9", "3.10", "3.11"]) +@pytest.mark.parametrize("version", ["3.9", "3.10", "3.11", "3.12"]) def test_check_python_version_valid(version): """Does not raise for supported versions""" register._check_python_version(version) -def test_create_dockerfile(tmp_path, mocker, local_installable_module): - mocker.patch("substrafl.remote.register.register._get_base_docker_image", return_value="substratools-mocked") +def test_get_base_docker_image_cpu(): + expected_dockerfile = """ +FROM python:3.12-slim + +# update image +RUN apt-get update -y +""" + assert expected_dockerfile == _get_base_docker_image("3.12", use_gpu=False) + + +def test_get_base_docker_image_gpu(): + expected_dockerfile = """ +FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04 + +# update image & install Python +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update -y\ + && apt-get install -y software-properties-common\ + && add-apt-repository -y ppa:deadsnakes/ppa\ + && apt-get -y upgrade\ + && apt-get install -y python3.11 python3.11-venv python3-pip\ + && apt-get clean\ + && rm -rf /var/lib/apt/lists/* + +""" + assert expected_dockerfile == _get_base_docker_image("3.11", use_gpu=True) + + +def test_create_dockerfile(tmp_path, local_installable_module): python_version = f"{sys.version_info.major}.{sys.version_info.minor}" substrafl_wheel = f"substrafl_internal/dist/substrafl-{substrafl.__version__}-py3-none-any.whl" substra_wheel = f"substrafl_internal/dist/substra-{substra.__version__}-py3-none-any.whl" @@ -61,22 +89,25 @@ def test_create_dockerfile(tmp_path, mocker, local_installable_module): pypi_dependencies=[], local_installable_dependencies=[local_installable_dependencies], local_code=[local_code_folder], + use_gpu=False, ) dependencies._compute_in_cache_directory expected_dockerfile = f""" -FROM substratools-mocked +FROM python:{python_version}-slim # update image -RUN apt update -y +RUN apt-get update -y # create a non-root user RUN addgroup --gid 1001 group RUN adduser --disabled-password --gecos "" --uid 1001 --gid 1001 --home /home/user user -ENV PYTHONPATH /home/user WORKDIR /home/user USER user +RUN python{python_version} -m venv /home/user/venv +ENV PATH="/home/user/venv/bin:$PATH" VIRTUAL_ENV="/home/user/venv" + # install dependencies RUN python{python_version} -m pip install -U pip @@ -92,6 +123,10 @@ def test_create_dockerfile(tmp_path, mocker, local_installable_module): # Install requirements RUN python{python_version} -m pip install --no-cache-dir -r requirements.txt +USER root +RUN apt-get purge -y --auto-remove build-essential *-dev +USER user + # Copy all other files COPY function.py . COPY substrafl_internal/cls_cloudpickle substrafl_internal/ diff --git a/tests/settings.py b/tests/settings.py index 52aaa9e7..0abd5995 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -1,4 +1,5 @@ """Global settings for all tests environment.""" + import functools from pathlib import Path from typing import List