From 5f3c20a5c0d6d1d14d7bee838cf4c464f244eb59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Gr=C3=B3dek?= Date: Thu, 24 Aug 2023 17:51:03 +0200 Subject: [PATCH 1/8] Add optional dependency for docker --- libs/langchain/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index e38c5721ce04c..871603a47d283 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -70,6 +70,7 @@ pytesseract = {version = "^0.3.10", optional=true} html2text = {version="^2020.1.16", optional=true} numexpr = "^2.8.4" duckduckgo-search = {version="^3.8.3", optional=true} +docker = {version = "^6.1.3", optional = true} azure-cosmos = {version="^4.4.0b1", optional=true} lark = {version="^1.1.5", optional=true} lancedb = {version = "^0.1", optional = true} From 3609d5dbcd5bea0aa9c762441ed501fe69bc7211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Gr=C3=B3dek?= Date: Thu, 24 Aug 2023 17:51:41 +0200 Subject: [PATCH 2/8] Add utility to wrap docker API Goal is to execute code and commands in isolated env. --- .../langchain/utilities/docker_containers.py | 236 ++++++++++++++++++ .../utilities/docker_test_data/Dockerfile | 7 + .../utilities/test_docker_containers.py | 85 +++++++ 3 files changed, 328 insertions(+) create mode 100644 libs/langchain/langchain/utilities/docker_containers.py create mode 100644 libs/langchain/tests/integration_tests/utilities/docker_test_data/Dockerfile create mode 100644 libs/langchain/tests/integration_tests/utilities/test_docker_containers.py diff --git a/libs/langchain/langchain/utilities/docker_containers.py b/libs/langchain/langchain/utilities/docker_containers.py new file mode 100644 index 0000000000000..bfec211fa8dd6 --- /dev/null +++ b/libs/langchain/langchain/utilities/docker_containers.py @@ -0,0 +1,236 @@ +from functools import lru_cache +from pathlib import Path +from types import TracebackType +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Generator, + List, + Optional, + Tuple, + Type, + Union, +) + +import docker +import docker.errors + +if TYPE_CHECKING: + from docker.models.containers import Container + + +@lru_cache(maxsize=1) +def get_docker_client(**kwargs: Any) -> docker.DockerClient: + """cached version to retrieve docker client. By default it will use environment + variables to connect to docker daemon. + """ + return docker.from_env(**kwargs) + + +def generate_langchain_container_tag() -> str: + """Generates a random tag for a docker container.""" + import uuid + from datetime import datetime + + timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + return f"langchain_runner:{timestamp}-{uuid.uuid4().hex[:8]}" + + +class DockerImage: + """Represents a locally available docker image as a tag. + You can either use existing docker image or build a new one from Dockerfile. + + Examples: + >>> image = DockerImage.from_tag("alpine") + >>> image = DockerImage.from_tag("python", tag="3.9-slim") + >>> image = DockerImage.from_dockerfile("example/Dockerfile") + >>> image = DockerImage.from_dockerfile("path/to/dir_with_Dockerfile/", name="cow") + """ + + def __init__(self, name: str): + """Note that it does not pull the image from the internet. + It only represents a tag so it must exist on your system. + """ + self.name = name + # check if image exists + docker_client = get_docker_client() + if len(docker_client.images.list(name=name)) < 1: + raise ValueError( + f"Invalid value: name={name} does not exist on your system." + "Use DockerImage.from_tag() to pull it." + ) + + def __repr__(self) -> str: + return f"DockerImage(name={self.name})" + + @staticmethod + def from_tag( + repository: str, + tag: str = "latest", + auth_config: Optional[Dict[str, str]] = None, + ) -> "DockerImage": + """Use image with a given repository and tag. It is going to pull it if it is + not present on the system. + Example: repository = "alpine" (will get "latest" tag) + Example: repository = "python" tag = "3.9-slim" + """ + docker_client = get_docker_client() + docker_client.images.pull( + repository=repository, tag=tag, auth_config=auth_config + ) + return DockerImage(name=f"{repository}:{tag}") + + @staticmethod + def from_dockerfile( + dockerfile_path: Union[Path, str], + name: Union[str, Callable[[], str]] = generate_langchain_container_tag, + **kwargs: Any, + ) -> "DockerImage": + """Build a new image from Dockerfile given its file path.""" + + img_name = ( + name + if isinstance(name, str) and name + else generate_langchain_container_tag() + ) + dockerfile = Path(dockerfile_path) + + docker_client = get_docker_client() + + if dockerfile.is_dir(): + if not (dockerfile / "Dockerfile").exists(): + raise ValueError( + f"Directory {dockerfile} does not contain a Dockerfile." + ) + docker_client.images.build( + path=str(dockerfile), tag=img_name, rm=True, **kwargs + ) + elif dockerfile.name == "Dockerfile" and dockerfile.is_file(): + docker_client.images.build( + fileobj=dockerfile.open("rb"), tag=img_name, rm=True, **kwargs + ) + else: + raise ValueError(f"Invalid parameter: dockerfile: {dockerfile}") + + return DockerImage(name=img_name) + + +class DockerContainer: + """An isolated environment for running commands, based on docker container. + + Examples: + If you need to run container for a single job: + >>> container = DockerContainer(DockerImage.from_tag("alpine")) + >>> status_code, logs = container.spawn_run("echo hello world") + + To run a container in background and execute commands: + >>> with DockerContainer(DockerImage.from_tag("alpine")) as container: + >>> status_code, logs = container.run("echo hello world") + """ + + def __init__(self, image: DockerImage, **kwargs: Any): + """Wraps docker image to control container interaction. + NOTE: **kwargs are passed to docker client containers.run() method so you can + use them as you wish. + """ + self.image = image + self._client = get_docker_client() + self._container = None + self._run_kwargs = kwargs + + def __enter__(self) -> "DockerContainer": + """Enters container context. It means that container is started and you can + execute commands inside it. + """ + assert self._container is None, "You cannot re-entry container" + # tty=True is required to keep container alive + self._container = self._client.containers.run( + self.image.name, + detach=True, + tty=True, + **self._run_kwargs, + ) + return self + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> bool: + """Cleanup container on exit.""" + if exc_type is not None: + # re-throw exception. try to stop container and remove it + try: + self._cleanup() + except Exception as e: + print("Failed to stop and remove container to cleanup exception.", e) + return False + else: + self._cleanup() + self._container = None + return True + + def spawn_run( + self, command: Union[str, List[str]], **kwargs: Any + ) -> Tuple[int, bytes]: + """Run a script in the isolated environment which is docker container with the + same lifetime as this function call. + + You can also pass all arguments that docker client containers.run() accepts. + It blocks till command is finished. + """ + # we can update here kwargs with self._run_kwargs so user can override them + custom_kwargs = ( + self._run_kwargs.copy().update(kwargs) if kwargs else self._run_kwargs + ) + # There is a known issue with auto_remove=True and docker-py: + # https://github.com/docker/docker-py/issues/1813 + # so as workaround we detach, wait & and remove container manually + container = self._client.containers.run( + self.image.name, command=command, detach=True, **custom_kwargs + ) + status_code = container.wait().get("StatusCode", 1) + logs = container.logs() + container.remove() + return status_code, logs + + @property + def docker_container(self) -> Container: + """Returns docker container object.""" + assert ( + self._container is not None + ), "You cannot access container that was not entered" + return self._container + + @property + def name(self) -> str: + """Name of the container if it exists, empty string otherwise.""" + if self._container: + return self._container.name + return "" + + def run( + self, command: Union[str, List[str]], **kwargs: Any + ) -> Tuple[int, Union[bytes, Tuple[bytes, bytes], Generator[bytes, None, None]]]: + """Run a script in the isolated environment which is docker container. + You can send any args which docker-py exec_run accepts: + https://docker-py.readthedocs.io/en/stable/containers.html#docker.models.containers.Container.exec_run + Return is a tuple of exit code and output which is controlled by arguments: + stream, socket and demux. + """ + assert ( + self._container is not None + ), "You cannot execute command in container that was not entered" + + exit_code, output = self._container.exec_run(cmd=command, **kwargs) + return exit_code, output + + def _cleanup(self) -> None: + """Stops and removes container.""" + if self._container is None: + return + self._container.stop() + self._container.remove() diff --git a/libs/langchain/tests/integration_tests/utilities/docker_test_data/Dockerfile b/libs/langchain/tests/integration_tests/utilities/docker_test_data/Dockerfile new file mode 100644 index 0000000000000..fbc0d7f11be5f --- /dev/null +++ b/libs/langchain/tests/integration_tests/utilities/docker_test_data/Dockerfile @@ -0,0 +1,7 @@ +# This is a test dockerfile that will be used to test the docker_containers. +FROM python:3.11-slim +RUN pip install cowsay +# This runs cowsay with moo so that we can test that the image works +# and additionally every other run command within container +# will execute but with moo prefix for the user input. +ENTRYPOINT ["python3", "-m", "cowsay", "moo"] \ No newline at end of file diff --git a/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py new file mode 100644 index 0000000000000..56096d52e7a2f --- /dev/null +++ b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py @@ -0,0 +1,85 @@ +from pathlib import Path +from typing import cast + +import pytest +from docker_containers.docker_containers import ( + DockerContainer, + DockerImage, + generate_langchain_container_tag, + get_docker_client, +) + + +def test_generate_langchain_container_tag() -> None: + tag = generate_langchain_container_tag() + assert tag.startswith("langchain") + assert len(tag) > len("langchain") + new_tag = generate_langchain_container_tag() + assert tag != new_tag, "Tags should be different" + + +def test_docker_image_throws_for_bad_name() -> None: + with pytest.raises(ValueError): + DockerImage(name="riddiculus_docker_image_which_should_not_exist_42") + + +def run_container_cowsay(image: DockerImage) -> None: + """Helper for testing - runs cowsay command and verifies it works.""" + # note that our cowsay adds moo prefix as commands are executed + # by ENTRYPOINT defined in dockerfile. + try: + container = DockerContainer(image) + ret_code, log = container.spawn_run("I like langchain!") + assert ret_code == 0, "Cowsay should return 0" + assert ( + log.find(b"moo I like langchain") >= 0 + ), "Cowsay should say same words with moo" + finally: + docker_client = get_docker_client() + docker_client.images.remove(image.name) + + +def test_build_image_from_dockerfile() -> None: + dockerfile_path = Path(__file__).parent / "docker_test_data/Dockerfile" + image = DockerImage.from_dockerfile(dockerfile_path, name="cow") + run_container_cowsay(image) + + +def test_build_image_from_dockerfile_dirpath() -> None: + dockerfile_dir = Path(__file__).parent / "docker_test_data/" + image = DockerImage.from_dockerfile(dockerfile_dir) + run_container_cowsay(image) + + +def test_docker_spawn_run_works() -> None: + container = DockerContainer(DockerImage.from_tag("alpine")) + status_code, logs = container.spawn_run(["echo", "hello", "world"]) + assert status_code == 0 + assert logs.find(b"hello world") >= 0 + + status_code, logs = container.spawn_run("echo good bye") + assert status_code == 0 + assert logs.find(b"good bye") >= 0 + + +def test_docker_spawn_run_return_nonzero_status_code() -> None: + container = DockerContainer(DockerImage.from_tag("alpine")) + status_code, logs = container.spawn_run("sh -c 'echo langusta && exit 1'") + assert status_code == 1 + assert logs.find(b"langusta") >= 0 + + +def test_docker_container_background_run_works() -> None: + client = get_docker_client() + container_name: str + with DockerContainer(DockerImage.from_tag("alpine")) as container: + container_name = container.name + assert len(client.containers.list(filters={"name": container_name})) == 1 + ret_code, output = container.run("touch /animal.txt") + assert ret_code == 0 + + ret_code, output = container.run("ls /") + assert ret_code == 0 + assert cast(bytes, output).find(b"animal.txt") >= 0 + + assert len(client.containers.list(filters={"name": container_name})) == 0 From 2d1cd6e4b767cd4db636e29391d05d23e54ec5aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Gr=C3=B3dek?= Date: Fri, 25 Aug 2023 16:13:14 +0200 Subject: [PATCH 3/8] Small review fixes use classmethod instead of staticmethod tiny name changes in tests --- .../langchain/langchain/utilities/docker_containers.py | 10 ++++++---- .../utilities/test_docker_containers.py | 10 +++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/libs/langchain/langchain/utilities/docker_containers.py b/libs/langchain/langchain/utilities/docker_containers.py index bfec211fa8dd6..2efca4ccfd361 100644 --- a/libs/langchain/langchain/utilities/docker_containers.py +++ b/libs/langchain/langchain/utilities/docker_containers.py @@ -65,8 +65,9 @@ def __init__(self, name: str): def __repr__(self) -> str: return f"DockerImage(name={self.name})" - @staticmethod + @classmethod def from_tag( + cls, repository: str, tag: str = "latest", auth_config: Optional[Dict[str, str]] = None, @@ -80,10 +81,11 @@ def from_tag( docker_client.images.pull( repository=repository, tag=tag, auth_config=auth_config ) - return DockerImage(name=f"{repository}:{tag}") + return cls(name=f"{repository}:{tag}") - @staticmethod + @classmethod def from_dockerfile( + cls, dockerfile_path: Union[Path, str], name: Union[str, Callable[[], str]] = generate_langchain_container_tag, **kwargs: Any, @@ -114,7 +116,7 @@ def from_dockerfile( else: raise ValueError(f"Invalid parameter: dockerfile: {dockerfile}") - return DockerImage(name=img_name) + return cls(name=img_name) class DockerContainer: diff --git a/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py index 56096d52e7a2f..c7dfd06c61d3e 100644 --- a/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py +++ b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py @@ -20,17 +20,17 @@ def test_generate_langchain_container_tag() -> None: def test_docker_image_throws_for_bad_name() -> None: with pytest.raises(ValueError): - DockerImage(name="riddiculus_docker_image_which_should_not_exist_42") + DockerImage(name="docker_image_which_should_not_exist_42") def run_container_cowsay(image: DockerImage) -> None: """Helper for testing - runs cowsay command and verifies it works.""" - # note that our cowsay adds moo prefix as commands are executed + # note that our `cowsay` adds moo prefix as commands are executed # by ENTRYPOINT defined in dockerfile. try: container = DockerContainer(image) ret_code, log = container.spawn_run("I like langchain!") - assert ret_code == 0, "Cowsay should return 0" + assert ret_code == 0 assert ( log.find(b"moo I like langchain") >= 0 ), "Cowsay should say same words with moo" @@ -64,9 +64,9 @@ def test_docker_spawn_run_works() -> None: def test_docker_spawn_run_return_nonzero_status_code() -> None: container = DockerContainer(DockerImage.from_tag("alpine")) - status_code, logs = container.spawn_run("sh -c 'echo langusta && exit 1'") + status_code, logs = container.spawn_run("sh -c 'echo hey && exit 1'") assert status_code == 1 - assert logs.find(b"langusta") >= 0 + assert logs.find(b"hey") >= 0 def test_docker_container_background_run_works() -> None: From 77f0b5b344208a91b4f2d6ef0d98c8d6cb7db314 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Gr=C3=B3dek?= Date: Mon, 28 Aug 2023 08:15:56 +0200 Subject: [PATCH 4/8] Mark test_docker_containers as requiring docker package --- .../integration_tests/utilities/test_docker_containers.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py index c7dfd06c61d3e..b7b3018f380af 100644 --- a/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py +++ b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py @@ -18,11 +18,13 @@ def test_generate_langchain_container_tag() -> None: assert tag != new_tag, "Tags should be different" +@pytest.mark.requires("docker") def test_docker_image_throws_for_bad_name() -> None: with pytest.raises(ValueError): DockerImage(name="docker_image_which_should_not_exist_42") +@pytest.mark.requires("docker") def run_container_cowsay(image: DockerImage) -> None: """Helper for testing - runs cowsay command and verifies it works.""" # note that our `cowsay` adds moo prefix as commands are executed @@ -39,18 +41,21 @@ def run_container_cowsay(image: DockerImage) -> None: docker_client.images.remove(image.name) +@pytest.mark.requires("docker") def test_build_image_from_dockerfile() -> None: dockerfile_path = Path(__file__).parent / "docker_test_data/Dockerfile" image = DockerImage.from_dockerfile(dockerfile_path, name="cow") run_container_cowsay(image) +@pytest.mark.requires("docker") def test_build_image_from_dockerfile_dirpath() -> None: dockerfile_dir = Path(__file__).parent / "docker_test_data/" image = DockerImage.from_dockerfile(dockerfile_dir) run_container_cowsay(image) +@pytest.mark.requires("docker") def test_docker_spawn_run_works() -> None: container = DockerContainer(DockerImage.from_tag("alpine")) status_code, logs = container.spawn_run(["echo", "hello", "world"]) @@ -62,6 +67,7 @@ def test_docker_spawn_run_works() -> None: assert logs.find(b"good bye") >= 0 +@pytest.mark.requires("docker") def test_docker_spawn_run_return_nonzero_status_code() -> None: container = DockerContainer(DockerImage.from_tag("alpine")) status_code, logs = container.spawn_run("sh -c 'echo hey && exit 1'") @@ -69,6 +75,7 @@ def test_docker_spawn_run_return_nonzero_status_code() -> None: assert logs.find(b"hey") >= 0 +@pytest.mark.requires("docker") def test_docker_container_background_run_works() -> None: client = get_docker_client() container_name: str From 98e528facf238f092bc139b4a1d907fb3a0494f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Gr=C3=B3dek?= Date: Mon, 28 Aug 2023 08:21:00 +0200 Subject: [PATCH 5/8] Update poetry.lock --- libs/langchain/poetry.lock | 13 ++----------- libs/langchain/pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index e21acf426b83b..64483a003c0f6 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -3542,6 +3542,7 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -8148,10 +8149,8 @@ description = "Fast and Safe Tensor serialization" optional = true python-versions = "*" files = [ - {file = "safetensors-0.3.2-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:4c7827b64b1da3f082301b5f5a34331b8313104c14f257099a12d32ac621c5cd"}, {file = "safetensors-0.3.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b6a66989075c2891d743153e8ba9ca84ee7232c8539704488f454199b8b8f84d"}, {file = "safetensors-0.3.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:670d6bc3a3b377278ce2971fa7c36ebc0a35041c4ea23b9df750a39380800195"}, - {file = "safetensors-0.3.2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:67ef2cc747c88e3a8d8e4628d715874c0366a8ff1e66713a9d42285a429623ad"}, {file = "safetensors-0.3.2-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:564f42838721925b5313ae864ba6caa6f4c80a9fbe63cf24310c3be98ab013cd"}, {file = "safetensors-0.3.2-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:7f80af7e4ab3188daaff12d43d078da3017a90d732d38d7af4eb08b6ca2198a5"}, {file = "safetensors-0.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec30d78f20f1235b252d59cbb9755beb35a1fde8c24c89b3c98e6a1804cfd432"}, @@ -8160,9 +8159,7 @@ files = [ {file = "safetensors-0.3.2-cp310-cp310-win32.whl", hash = "sha256:2961c1243fd0da46aa6a1c835305cc4595486f8ac64632a604d0eb5f2de76175"}, {file = "safetensors-0.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:c813920482c337d1424d306e1b05824a38e3ef94303748a0a287dea7a8c4f805"}, {file = "safetensors-0.3.2-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:707df34bd9b9047e97332136ad98e57028faeccdb9cfe1c3b52aba5964cc24bf"}, - {file = "safetensors-0.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:23d1d9f74208c9dfdf852a9f986dac63e40092385f84bf0789d599efa8e6522f"}, {file = "safetensors-0.3.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:becc5bb85b2947eae20ed23b407ebfd5277d9a560f90381fe2c42e6c043677ba"}, - {file = "safetensors-0.3.2-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:c1913c6c549b1805e924f307159f0ee97b73ae3ce150cd2401964da015e0fa0b"}, {file = "safetensors-0.3.2-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:30a75707be5cc9686490bde14b9a371cede4af53244ea72b340cfbabfffdf58a"}, {file = "safetensors-0.3.2-cp311-cp311-macosx_13_0_universal2.whl", hash = "sha256:54ad6af663e15e2b99e2ea3280981b7514485df72ba6d014dc22dae7ba6a5e6c"}, {file = "safetensors-0.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37764b3197656ef507a266c453e909a3477dabc795962b38e3ad28226f53153b"}, @@ -8170,28 +8167,22 @@ files = [ {file = "safetensors-0.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ada0fac127ff8fb04834da5c6d85a8077e6a1c9180a11251d96f8068db922a17"}, {file = "safetensors-0.3.2-cp311-cp311-win32.whl", hash = "sha256:155b82dbe2b0ebff18cde3f76b42b6d9470296e92561ef1a282004d449fa2b4c"}, {file = "safetensors-0.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:a86428d196959619ce90197731be9391b5098b35100a7228ef4643957648f7f5"}, - {file = "safetensors-0.3.2-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:91e796b6e465d9ffaca4c411d749f236c211e257f3a8e9b25a5ffc1a42d3bfa7"}, {file = "safetensors-0.3.2-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:c1f8ab41ed735c5b581f451fd15d9602ff51aa88044bfa933c5fa4b1d0c644d1"}, - {file = "safetensors-0.3.2-cp37-cp37m-macosx_12_0_x86_64.whl", hash = "sha256:e6a8ff5652493598c45cd27f5613c193d3f15e76e0f81613d399c487a7b8cc50"}, {file = "safetensors-0.3.2-cp37-cp37m-macosx_13_0_x86_64.whl", hash = "sha256:bc9cfb3c9ea2aec89685b4d656f9f2296f0f0d67ecf2bebf950870e3be89b3db"}, {file = "safetensors-0.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ace5d471e3d78e0d93f952707d808b5ab5eac77ddb034ceb702e602e9acf2be9"}, {file = "safetensors-0.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de3e20a388b444381bcda1a3193cce51825ddca277e4cf3ed1fe8d9b2d5722cd"}, {file = "safetensors-0.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d7d70d48585fe8df00725aa788f2e64fd24a4c9ae07cd6be34f6859d0f89a9c"}, {file = "safetensors-0.3.2-cp37-cp37m-win32.whl", hash = "sha256:6ff59bc90cdc857f68b1023be9085fda6202bbe7f2fd67d06af8f976d6adcc10"}, {file = "safetensors-0.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:8b05c93da15fa911763a89281906ca333ed800ab0ef1c7ce53317aa1a2322f19"}, - {file = "safetensors-0.3.2-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:94857abc019b49a22a0065cc7741c48fb788aa7d8f3f4690c092c56090227abe"}, {file = "safetensors-0.3.2-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:8969cfd9e8d904e8d3c67c989e1bd9a95e3cc8980d4f95e4dcd43c299bb94253"}, - {file = "safetensors-0.3.2-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:da482fa011dc88fe7376d8f8b42c0ccef2f260e0cbc847ceca29c708bf75a868"}, {file = "safetensors-0.3.2-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:f54148ac027556eb02187e9bc1556c4d916c99ca3cb34ca36a7d304d675035c1"}, {file = "safetensors-0.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caec25fedbcf73f66c9261984f07885680f71417fc173f52279276c7f8a5edd3"}, {file = "safetensors-0.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:50224a1d99927ccf3b75e27c3d412f7043280431ab100b4f08aad470c37cf99a"}, {file = "safetensors-0.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa98f49e95f02eb750d32c4947e7d5aa43883149ebd0414920866446525b70f0"}, {file = "safetensors-0.3.2-cp38-cp38-win32.whl", hash = "sha256:33409df5e28a83dc5cc5547a3ac17c0f1b13a1847b1eb3bc4b3be0df9915171e"}, {file = "safetensors-0.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:e04a7cbbb3856159ab99e3adb14521544f65fcb8548cce773a1435a0f8d78d27"}, - {file = "safetensors-0.3.2-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:f39f3d951543b594c6bc5082149d994c47ca487fd5d55b4ce065ab90441aa334"}, {file = "safetensors-0.3.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:7c864cf5dcbfb608c5378f83319c60cc9c97263343b57c02756b7613cd5ab4dd"}, {file = "safetensors-0.3.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:14e8c19d6dc51d4f70ee33c46aff04c8ba3f95812e74daf8036c24bc86e75cae"}, - {file = "safetensors-0.3.2-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:41b10b0a6dfe8fdfbe4b911d64717d5647e87fbd7377b2eb3d03fb94b59810ea"}, {file = "safetensors-0.3.2-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:042a60f633c3c7009fdf6a7c182b165cb7283649d2a1e9c7a4a1c23454bd9a5b"}, {file = "safetensors-0.3.2-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:fafd95e5ef41e8f312e2a32b7031f7b9b2a621b255f867b221f94bb2e9f51ae8"}, {file = "safetensors-0.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ed77cf358abce2307f03634694e0b2a29822e322a1623e0b1aa4b41e871bf8b"}, @@ -10447,4 +10438,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "43a6bd42efc0baf917418087f788aaf3b1bc793cb4aa81de99c52ed6a7d54d26" +content-hash = "f89d812d676bdc77e633ec4bb81c9f5d44c0abf64e8102eb8d095362bba01428" diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index 871603a47d283..1d0da2336acab 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -70,7 +70,6 @@ pytesseract = {version = "^0.3.10", optional=true} html2text = {version="^2020.1.16", optional=true} numexpr = "^2.8.4" duckduckgo-search = {version="^3.8.3", optional=true} -docker = {version = "^6.1.3", optional = true} azure-cosmos = {version="^4.4.0b1", optional=true} lark = {version="^1.1.5", optional=true} lancedb = {version = "^0.1", optional = true} @@ -128,6 +127,7 @@ xata = {version = "^1.0.0a7", optional = true} xmltodict = {version = "^0.13.0", optional = true} markdownify = {version = "^0.11.6", optional = true} assemblyai = {version = "^0.17.0", optional = true} +docker = {version = "^6.1.3", optional = true} [tool.poetry.group.test.dependencies] From 41960a6ac01e6bbf3e34e40ba4e4967edc33e7b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Gr=C3=B3dek?= Date: Wed, 30 Aug 2023 12:35:39 +0200 Subject: [PATCH 6/8] Fix docker containers import path --- .../tests/integration_tests/utilities/test_docker_containers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py index b7b3018f380af..48e4c49ac3183 100644 --- a/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py +++ b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py @@ -2,7 +2,7 @@ from typing import cast import pytest -from docker_containers.docker_containers import ( +from langchain.utilities.docker_containers import ( DockerContainer, DockerImage, generate_langchain_container_tag, From c5b8c67902db8402f3a674431d3e3684d697b122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Gr=C3=B3dek?= Date: Wed, 13 Sep 2023 12:36:38 +0200 Subject: [PATCH 7/8] Fix failing tests and add helper functions for docker image --- .../langchain/utilities/docker_containers.py | 52 ++++++++++++++++--- .../utilities/docker_test_data/Dockerfile | 10 ++-- .../utilities/test_docker_containers.py | 10 ++-- 3 files changed, 52 insertions(+), 20 deletions(-) diff --git a/libs/langchain/langchain/utilities/docker_containers.py b/libs/langchain/langchain/utilities/docker_containers.py index 2efca4ccfd361..c86409d4167af 100644 --- a/libs/langchain/langchain/utilities/docker_containers.py +++ b/libs/langchain/langchain/utilities/docker_containers.py @@ -52,11 +52,16 @@ class DockerImage: def __init__(self, name: str): """Note that it does not pull the image from the internet. It only represents a tag so it must exist on your system. + It throws ValueError if docker image by that name does not exist locally. """ - self.name = name - # check if image exists - docker_client = get_docker_client() - if len(docker_client.images.list(name=name)) < 1: + splitted_name = name.split(":") + if len(splitted_name) == 1: + # by default, image has latest tag. + self.name = name + ":latest" + else: + self.name = name + + if not self.exists(name): raise ValueError( f"Invalid value: name={name} does not exist on your system." "Use DockerImage.from_tag() to pull it." @@ -65,6 +70,21 @@ def __init__(self, name: str): def __repr__(self) -> str: return f"DockerImage(name={self.name})" + @classmethod + def exists(cls, name: str) -> bool: + """Checks if the docker image exists""" + docker_client = get_docker_client() + return len(docker_client.images.list(name=name)) > 0 + + @classmethod + def remove(cls, name: str) -> None: + """WARNING: Removes image from the system, be cautious with this function. + It is irreversible operation!. + """ + if cls.exists(name): + docker_client = get_docker_client() + docker_client.images.remove(name) + @classmethod def from_tag( cls, @@ -78,10 +98,13 @@ def from_tag( Example: repository = "python" tag = "3.9-slim" """ docker_client = get_docker_client() + name = f"{repository}:{tag}" + if len(docker_client.images.list(name=name)) > 0: + return cls(name=name) docker_client.images.pull( repository=repository, tag=tag, auth_config=auth_config ) - return cls(name=f"{repository}:{tag}") + return cls(name=name) @classmethod def from_dockerfile( @@ -146,6 +169,13 @@ def __enter__(self) -> "DockerContainer": """Enters container context. It means that container is started and you can execute commands inside it. """ + self.unsafe_start() + return self + + def unsafe_start(self) -> None: + """Starts container without entering it. + Please prefer to use with DockerContainer statement. + """ assert self._container is None, "You cannot re-entry container" # tty=True is required to keep container alive self._container = self._client.containers.run( @@ -154,7 +184,6 @@ def __enter__(self) -> "DockerContainer": tty=True, **self._run_kwargs, ) - return self def __exit__( self, @@ -163,6 +192,7 @@ def __exit__( traceback: Optional[TracebackType], ) -> bool: """Cleanup container on exit.""" + assert self._container is not None, "You cannot exit unstarted container." if exc_type is not None: # re-throw exception. try to stop container and remove it try: @@ -171,10 +201,16 @@ def __exit__( print("Failed to stop and remove container to cleanup exception.", e) return False else: - self._cleanup() - self._container = None + self.unsafe_exit() return True + def unsafe_exit(self): + """Cleanup container on exit. Please prefer to use `with` statement.""" + if self._container is None: + return + self._cleanup() + self._container = None + def spawn_run( self, command: Union[str, List[str]], **kwargs: Any ) -> Tuple[int, bytes]: diff --git a/libs/langchain/tests/integration_tests/utilities/docker_test_data/Dockerfile b/libs/langchain/tests/integration_tests/utilities/docker_test_data/Dockerfile index fbc0d7f11be5f..7c19ef9a14310 100644 --- a/libs/langchain/tests/integration_tests/utilities/docker_test_data/Dockerfile +++ b/libs/langchain/tests/integration_tests/utilities/docker_test_data/Dockerfile @@ -1,7 +1,5 @@ # This is a test dockerfile that will be used to test the docker_containers. -FROM python:3.11-slim -RUN pip install cowsay -# This runs cowsay with moo so that we can test that the image works -# and additionally every other run command within container -# will execute but with moo prefix for the user input. -ENTRYPOINT ["python3", "-m", "cowsay", "moo"] \ No newline at end of file +FROM python:3.11-alpine +RUN pip install --no-cache-dir cowsay==6.0 +# This runs cowsay and it requires arguments like -t "hello world". +ENTRYPOINT ["python3", "-m", "cowsay"] \ No newline at end of file diff --git a/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py index 48e4c49ac3183..459cd9f2176d3 100644 --- a/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py +++ b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py @@ -2,6 +2,7 @@ from typing import cast import pytest + from langchain.utilities.docker_containers import ( DockerContainer, DockerImage, @@ -31,14 +32,11 @@ def run_container_cowsay(image: DockerImage) -> None: # by ENTRYPOINT defined in dockerfile. try: container = DockerContainer(image) - ret_code, log = container.spawn_run("I like langchain!") + ret_code, log = container.spawn_run('-t "I like langchain!"') assert ret_code == 0 - assert ( - log.find(b"moo I like langchain") >= 0 - ), "Cowsay should say same words with moo" + assert log.find(b"I like langchain") >= 0, "Cowsay should say same words" finally: - docker_client = get_docker_client() - docker_client.images.remove(image.name) + DockerImage.remove(image) @pytest.mark.requires("docker") From 88455be8cf76c09fa01b0ff6602769a6888c8d62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Gr=C3=B3dek?= Date: Wed, 20 Sep 2023 08:45:02 +0200 Subject: [PATCH 8/8] Add function to build docker from in-memory dockerfile --- .../langchain/utilities/docker_containers.py | 27 +++++++++++++++++++ .../utilities/test_docker_containers.py | 6 +++++ 2 files changed, 33 insertions(+) diff --git a/libs/langchain/langchain/utilities/docker_containers.py b/libs/langchain/langchain/utilities/docker_containers.py index c86409d4167af..08d1bf567ffc2 100644 --- a/libs/langchain/langchain/utilities/docker_containers.py +++ b/libs/langchain/langchain/utilities/docker_containers.py @@ -141,6 +141,33 @@ def from_dockerfile( return cls(name=img_name) + @classmethod + def from_dockerfile_content( + cls, + dockerfile_str: str, + name: Union[str, Callable[[], str]] = generate_langchain_container_tag, + **kwargs: Any, + ) -> "DockerImage": + """Build a new image from Dockerfile given a string with Dockerfile content.""" + + img_name = ( + name + if isinstance(name, str) and name + else generate_langchain_container_tag() + ) + import io + + buff = io.BytesIO(dockerfile_str.encode("utf-8")) + + docker_client = get_docker_client() + from pathlib import Path + + docker_client.images.build( + fileobj=buff, tag=img_name, rm=True, path=str(Path.cwd()), **kwargs + ) + + return cls(name=img_name) + class DockerContainer: """An isolated environment for running commands, based on docker container. diff --git a/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py index 459cd9f2176d3..4cfa458226489 100644 --- a/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py +++ b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py @@ -53,6 +53,12 @@ def test_build_image_from_dockerfile_dirpath() -> None: run_container_cowsay(image) +@pytest.mark.requires("docker") +def test_build_image_from_dockerfile_string_content() -> None: + dockerfile_str = "FROM alpine\nRUN touch /animal.txt" + DockerImage.from_dockerfile_content(dockerfile_str) + + @pytest.mark.requires("docker") def test_docker_spawn_run_works() -> None: container = DockerContainer(DockerImage.from_tag("alpine"))