diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index aea70c9..4469ee1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: # PEP 8 compliant opinionated formatter. - repo: https://github.com/psf/black - rev: 23.9.1 + rev: 23.10.1 hooks: - id: black exclude: (docs/) @@ -23,7 +23,7 @@ repos: # Cleaning unused imports. - repo: https://github.com/hadialqattan/pycln - rev: v2.2.2 + rev: v2.3.0 hooks: - id: pycln args: ["-a"] @@ -38,7 +38,7 @@ repos: # Used to have proper type annotations for library code. - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.5.1 + rev: v1.6.1 hooks: - id: mypy args: [] @@ -73,19 +73,13 @@ repos: hooks: - id: pylint exclude: (/test_|tests/|docs/) - # # You can add additional plugins for pylint here, - # here is an example for pydantic, remember to enable it in pyproject.toml - # additional_dependencies: - # - 'pylint_pydantic' - # args: - # # pylint can have issue with python libraries based on C - # # if it fails to find some objects likely you need to add them - # # here: - # ["--extension-pkg-whitelist=pydantic"] + additional_dependencies: + - 'docker>=6.1.3' # Finds common security issues in Python code. - - repo: https://github.com/Lucas-C/pre-commit-hooks-bandit - rev: v1.0.6 + - repo: https://github.com/PyCQA/bandit + rev: 1.7.5 hooks: - - id: python-bandit-vulnerability-check - args: [-c, pyproject.toml, --recursive, src, -ll] + - id: bandit + args: [-c, pyproject.toml, --recursive, src] + additional_dependencies: [".[toml]"] # required for pyproject.toml support diff --git a/pyproject.toml b/pyproject.toml index 7429fde..b210cd5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,39 +13,15 @@ known_first_party=[ 'ds_pycontain' ] known_third_party=[ # Most popular libraries. Extend if necessary. - 'IPython', - 'PIL', - 'cv2', 'dotenv', - 'editdistance', - 'fastapi', - 'fire', - 'hydra', - 'joblib', - 'loguru', - 'luigi', - 'matplotlib', - 'neptune', - 'neptune_config', - 'nltk', - 'numpy', - 'omegaconf', - 'pandas', 'pqdm', 'pydantic', 'pytest', - 'pytorch_lightning', 'requests', - 'scipy', 'setuptools', - 'shapely', - 'skimage', - 'sklearn', - 'streamlit', - 'torch', - 'torchvision', 'tqdm', 'typer', + 'docker', ] skip_gitignore=true @@ -95,6 +71,10 @@ module = "ds_pycontain.*" ignore_missing_imports = false disallow_untyped_defs = true +[[tool.mypy.overrides]] +module = "docker.*" +ignore_errors = true + [tool.pylint.basic] good-names="""i,j,x,y,z,x1,y1,z1,x2,y2,z2,cv,df,dx,dy,dz,w,h,c,b,g,qa,q,a"""" max-args=8 @@ -140,3 +120,6 @@ min-similarity-lines=10 [tool.bandit] exclude_dirs = ["venv",] +# B101 disables errors for asserts in the code +# remember to not use asserts for security and control flows +skips = ["B101"] diff --git a/src/ds_pycontain/__init__.py b/src/ds_pycontain/__init__.py index f732f7f..7ea6db5 100644 --- a/src/ds_pycontain/__init__.py +++ b/src/ds_pycontain/__init__.py @@ -1,4 +1,5 @@ -""" ds_pycontain """ +""" ds_pycontain is a Python package for managing Docker containers and images. """ from .__version__ import __version__ +from .docker_containers import DockerContainer, DockerImage, generate_random_container_tag, get_docker_client -__all__ = ["__version__"] +__all__ = ["__version__", "DockerContainer", "DockerImage", "generate_random_container_tag", "get_docker_client"] diff --git a/src/ds_pycontain/docker_containers.py b/src/ds_pycontain/docker_containers.py new file mode 100644 index 0000000..de3f6ac --- /dev/null +++ b/src/ds_pycontain/docker_containers.py @@ -0,0 +1,310 @@ +import io +import uuid +from datetime import datetime +from functools import lru_cache +from pathlib import Path +from types import TracebackType +from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Type, Union + +import docker + + +@lru_cache(maxsize=1) +def get_docker_client(**kwargs: Any) -> docker.DockerClient: # type: ignore[name-defined] + """cached version to retrieve docker client. By default it will use environment + variables to connect to docker daemon. + + :param kwargs: additional arguments to pass to docker client + :return: docker client object + """ + return docker.from_env(**kwargs) # type: ignore[attr-defined] + + +def generate_random_container_tag() -> str: + """Generates a random tag for a docker container. + Format: ds_pycontain_runner:YYYY-MM-DD-HH-MM-SS-<8 random chars> + + :return: random tag for a docker container. + """ + timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + return f"ds_pycontain_runner:{timestamp}-{uuid.uuid4().hex[:8]}" + + +class DockerImage: + """Represents a locally available docker image as a tag. + You can either use existing docker image or build a new one from Dockerfile. + + :example usage: + + >>> image = DockerImage.from_tag("alpine") + >>> image = DockerImage.from_tag("python", tag="3.9-slim") + >>> image = DockerImage.from_dockerfile("example/Dockerfile") + >>> image = DockerImage.from_dockerfile("path/to/dir_with_Dockerfile/", name="cow") + """ + + def __init__(self, name: str): + """Note that it does not pull the image from the internet. + It only represents a tag so it must exist on your system. + It throws ValueError if docker image by that name does not exist locally. + + :param name: docker image name with tag, e.g. "alpine:latest" + :raises ValueError: if docker image by that name does not exist locally. + """ + splitted_name = name.split(":") + if len(splitted_name) == 1: + # by default, image has latest tag. + self.name = name + ":latest" + else: + self.name = name + + if not self.exists(name): + raise ValueError( + f"Invalid value: name={name} does not exist on your system." "Use DockerImage.from_tag() to pull it." + ) + + def __repr__(self) -> str: + """String representation of the object. + :return: string representation of the object (container name).""" + return f"DockerImage(name={self.name})" + + @classmethod + def exists(cls, name: str) -> bool: + """Checks if the docker image exists. + :param name: docker image name with tag, e.g. "alpine:latest" + :return: True if docker image exists, False otherwise + """ + docker_client = get_docker_client() + return len(docker_client.images.list(name=name)) > 0 + + @classmethod + def remove(cls, name: str) -> None: + """WARNING: Removes image from the system, be cautious with this function. + It is irreversible operation!. + :param name: docker image name with tag, e.g. "alpine:latest" + """ + if cls.exists(name): + docker_client = get_docker_client() + docker_client.images.remove(name) + + @classmethod + def from_tag( + cls, + repository: str, + tag: str = "latest", + auth_config: Optional[Dict[str, str]] = None, + ) -> "DockerImage": + """Use image with a given repository and tag. It is going to pull it if it is + not present on the system. + Example: repository = "alpine" (will get "latest" tag) + Example: repository = "python" tag = "3.9-slim" + + :param repository: docker image repository, e.g. "alpine". + :param tag: docker image tag, e.g. "latest". + :param auth_config: authentication configuration for private repositories. + :return: DockerImage object representing pulled image on the system. + """ + docker_client = get_docker_client() + name = f"{repository}:{tag}" + if len(docker_client.images.list(name=name)) > 0: + return cls(name=name) + docker_client.images.pull(repository=repository, tag=tag, auth_config=auth_config) + return cls(name=name) + + @classmethod + def from_dockerfile( + cls, + dockerfile_path: Union[Path, str], + name: Union[str, Callable[[], str]] = generate_random_container_tag, + **kwargs: Any, + ) -> "DockerImage": + """Build a new image from Dockerfile given its file path. + + :param dockerfile_path: path to Dockerfile + :param name: name of the image to build or name generator function + defaults to generate_random_container_tag() + :param kwargs: additional arguments to pass to docker client images.build() + :return: DockerImage object representing built image on the system. + :raises ValueError: if dockerfile_path is not a valid path to Dockerfile. + """ + + img_name = name if isinstance(name, str) and name else generate_random_container_tag() + dockerfile = Path(dockerfile_path) + + docker_client = get_docker_client() + + if dockerfile.is_dir(): + if not (dockerfile / "Dockerfile").exists(): + raise ValueError(f"Directory {dockerfile} does not contain a Dockerfile.") + docker_client.images.build(path=str(dockerfile), tag=img_name, rm=True, **kwargs) + elif dockerfile.name == "Dockerfile" and dockerfile.is_file(): + with open(dockerfile, "rb") as df: + docker_client.images.build(fileobj=df, tag=img_name, rm=True, **kwargs) + else: + raise ValueError(f"Invalid parameter: dockerfile: {dockerfile}") + + return cls(name=img_name) + + @classmethod + def from_dockerfile_content( + cls, + dockerfile_str: str, + name: Union[str, Callable[[], str]] = generate_random_container_tag, + **kwargs: Any, + ) -> "DockerImage": + """Build a new image from Dockerfile given a string with Dockerfile content. + + :param dockerfile_str: string with Dockerfile content. + :param name: name of the image to build or name generator function + defaults to generate_random_container_tag() + :param kwargs: additional arguments to pass to docker client images.build() + :return: DockerImage object representing built image on the system. + """ + + img_name = name if isinstance(name, str) and name else generate_random_container_tag() + + buff = io.BytesIO(dockerfile_str.encode("utf-8")) + + docker_client = get_docker_client() + + docker_client.images.build(fileobj=buff, tag=img_name, rm=True, path=str(Path.cwd()), **kwargs) + + return cls(name=img_name) + + +class DockerContainer: + """An isolated environment for running commands, based on docker container. + + Examples: + If you need to run container for a single job: + >>> container = DockerContainer(DockerImage.from_tag("alpine")) + >>> status_code, logs = container.spawn_run("echo hello world") + + To run a container in background and execute commands: + >>> with DockerContainer(DockerImage.from_tag("alpine")) as container: + >>> status_code, logs = container.run("echo hello world") + """ + + def __init__(self, image: DockerImage, **kwargs: Any): + """Wraps docker image to control container interaction. + NOTE: **kwargs are passed to docker client containers.run() method so you can + use them as you wish. + + :param image: docker image to use for container + :param kwargs: additional arguments to pass to docker client containers.run() + """ + self.image = image + self._client = get_docker_client() + self._container = None + self._run_kwargs = kwargs + + def __enter__(self) -> "DockerContainer": + """Enters container context. It means that container is started and you can + execute commands inside it. + """ + self.unsafe_start() + return self + + def unsafe_start(self) -> None: + """Starts container without entering it. + Please prefer to use with DockerContainer statement. + """ + assert self._container is None, "You cannot re-entry container" + # tty=True is required to keep container alive + self._container = self._client.containers.run( + self.image.name, + detach=True, + tty=True, + **self._run_kwargs, + ) + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> bool: + """Cleanup container on exit. + :param exc_type: exception type + :param exc: exception instance (unused) + :param traceback: traceback object (unused) + :return: True if exception was handled, False otherwise + """ + assert self._container is not None, "You cannot exit unstarted container." + if exc_type is not None: + # re-throw exception. try to stop container and remove it + try: + self._cleanup() + except Exception as e: # pylint: disable=broad-except + print("Failed to stop and remove container to cleanup exception.", e) + return False + self.unsafe_exit() + return True + + def unsafe_exit(self) -> None: + """Cleanup container on exit. Please prefer to use `with` statement.""" + if self._container is None: + return + self._cleanup() + self._container = None + + def spawn_run(self, command: Union[str, List[str]], **kwargs: Any) -> Tuple[int, bytes]: + """Run a script in the isolated environment which is docker container with the + same lifetime as this function call. + + You can also pass all arguments that docker client containers.run() accepts. + It blocks till command is finished. + + :param command: command to execute in the container + :param kwargs: additional arguments to pass to docker client containers.run() + :return: tuple of exit code and logs + """ + # we can update here kwargs with self._run_kwargs so user can override them + custom_kwargs = self._run_kwargs.copy().update(kwargs) if kwargs else self._run_kwargs + # There is a known issue with auto_remove=True and docker-py: + # https://github.com/docker/docker-py/issues/1813 + # so as workaround we detach, wait & and remove container manually + container = self._client.containers.run(self.image.name, command=command, detach=True, **custom_kwargs) + status_code = container.wait().get("StatusCode", 1) + logs = container.logs() + container.remove() + return status_code, logs + + @property + def docker_container(self) -> docker.models.containers.Container: # type: ignore[name-defined] + """Returns docker container object. + :return: docker container object""" + assert self._container is not None, "You cannot access container that was not entered" + return self._container + + @property + def name(self) -> str: + """Name of the container if it exists, empty string otherwise. + :return: container name as string.""" + if self._container: + return self._container.name + return "" + + def run( + self, command: Union[str, List[str]], **kwargs: Any + ) -> Tuple[int, Union[bytes, Tuple[bytes, bytes], Generator[bytes, None, None]]]: + """Run a script in the isolated environment which is docker container. + You can send any args which docker-py exec_run accepts: + https://docker-py.readthedocs.io/en/stable/containers.html#docker.models.containers.Container.exec_run + Return is a tuple of exit code and output which is controlled by arguments: + stream, socket and demux. + + :param command: command to execute in the container + :param kwargs: additional arguments to pass to docker client containers.run() + :return: tuple of exit code and output (stream, socket or demux) + """ + assert self._container is not None, "You cannot execute command in container that was not entered" + + exit_code, output = self._container.exec_run(cmd=command, **kwargs) + return exit_code, output + + def _cleanup(self) -> None: + """Stops and removes container.""" + if self._container is None: + return + self._container.stop() + self._container.remove() diff --git a/tests/docker_test_data/Dockerfile b/tests/docker_test_data/Dockerfile new file mode 100644 index 0000000..7c19ef9 --- /dev/null +++ b/tests/docker_test_data/Dockerfile @@ -0,0 +1,5 @@ +# This is a test dockerfile that will be used to test the docker_containers. +FROM python:3.11-alpine +RUN pip install --no-cache-dir cowsay==6.0 +# This runs cowsay and it requires arguments like -t "hello world". +ENTRYPOINT ["python3", "-m", "cowsay"] \ No newline at end of file diff --git a/tests/test_ds_pycontain.py b/tests/test_ds_pycontain.py index 4f7a144..7760bed 100644 --- a/tests/test_ds_pycontain.py +++ b/tests/test_ds_pycontain.py @@ -1,6 +1,91 @@ -import ds_pycontain.hello +from pathlib import Path +from typing import cast +import pytest -def test_basic_test() -> None: - ds_pycontain.hello.hello_world() - assert True +from ds_pycontain import DockerContainer, DockerImage, generate_random_container_tag, get_docker_client + + +def test_generate_random_container_tag() -> None: + tag = generate_random_container_tag() + assert tag.startswith("ds_pycontain_") + assert len(tag) > len("ds_pycontain_") + new_tag = generate_random_container_tag() + assert tag != new_tag, "Tags should be different" + + +@pytest.mark.requires("docker") +def test_docker_image_throws_for_bad_name() -> None: + with pytest.raises(ValueError): + DockerImage(name="docker_image_which_should_not_exist_42") + + +@pytest.mark.requires("docker") +def run_container_cowsay(image: DockerImage) -> None: + """Helper for testing - runs cowsay command and verifies it works.""" + # note that our `cowsay` adds moo prefix as commands are executed + # by ENTRYPOINT defined in dockerfile. + try: + container = DockerContainer(image) + ret_code, log = container.spawn_run('-t "I like langchain!"') + assert ret_code == 0 + assert log.find(b"I like langchain") >= 0, "Cowsay should say same words" + finally: + DockerImage.remove(image) + + +@pytest.mark.requires("docker") +def test_build_image_from_dockerfile() -> None: + dockerfile_path = Path(__file__).parent / "docker_test_data/Dockerfile" + image = DockerImage.from_dockerfile(dockerfile_path, name="cow") + run_container_cowsay(image) + + +@pytest.mark.requires("docker") +def test_build_image_from_dockerfile_dirpath() -> None: + dockerfile_dir = Path(__file__).parent / "docker_test_data/" + image = DockerImage.from_dockerfile(dockerfile_dir) + run_container_cowsay(image) + + +@pytest.mark.requires("docker") +def test_build_image_from_dockerfile_string_content() -> None: + dockerfile_str = "FROM alpine\nRUN touch /animal.txt" + DockerImage.from_dockerfile_content(dockerfile_str) + + +@pytest.mark.requires("docker") +def test_docker_spawn_run_works() -> None: + container = DockerContainer(DockerImage.from_tag("alpine")) + status_code, logs = container.spawn_run(["echo", "hello", "world"]) + assert status_code == 0 + assert logs.find(b"hello world") >= 0 + + status_code, logs = container.spawn_run("echo good bye") + assert status_code == 0 + assert logs.find(b"good bye") >= 0 + + +@pytest.mark.requires("docker") +def test_docker_spawn_run_return_nonzero_status_code() -> None: + container = DockerContainer(DockerImage.from_tag("alpine")) + status_code, logs = container.spawn_run("sh -c 'echo hey && exit 1'") + assert status_code == 1 + assert logs.find(b"hey") >= 0 + + +@pytest.mark.requires("docker") +def test_docker_container_background_run_works() -> None: + client = get_docker_client() + container_name: str + with DockerContainer(DockerImage.from_tag("alpine")) as container: + container_name = container.name + assert len(client.containers.list(filters={"name": container_name})) == 1 + ret_code, output = container.run("touch /animal.txt") + assert ret_code == 0 + + ret_code, output = container.run("ls /") + assert ret_code == 0 + assert cast(bytes, output).find(b"animal.txt") >= 0 + + assert len(client.containers.list(filters={"name": container_name})) == 0