diff --git a/libs/langchain/langchain/utilities/docker_containers.py b/libs/langchain/langchain/utilities/docker_containers.py new file mode 100644 index 0000000000000..08d1bf567ffc2 --- /dev/null +++ b/libs/langchain/langchain/utilities/docker_containers.py @@ -0,0 +1,301 @@ +from functools import lru_cache +from pathlib import Path +from types import TracebackType +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Generator, + List, + Optional, + Tuple, + Type, + Union, +) + +import docker +import docker.errors + +if TYPE_CHECKING: + from docker.models.containers import Container + + +@lru_cache(maxsize=1) +def get_docker_client(**kwargs: Any) -> docker.DockerClient: + """cached version to retrieve docker client. By default it will use environment + variables to connect to docker daemon. + """ + return docker.from_env(**kwargs) + + +def generate_langchain_container_tag() -> str: + """Generates a random tag for a docker container.""" + import uuid + from datetime import datetime + + timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + return f"langchain_runner:{timestamp}-{uuid.uuid4().hex[:8]}" + + +class DockerImage: + """Represents a locally available docker image as a tag. + You can either use existing docker image or build a new one from Dockerfile. + + Examples: + >>> image = DockerImage.from_tag("alpine") + >>> image = DockerImage.from_tag("python", tag="3.9-slim") + >>> image = DockerImage.from_dockerfile("example/Dockerfile") + >>> image = DockerImage.from_dockerfile("path/to/dir_with_Dockerfile/", name="cow") + """ + + def __init__(self, name: str): + """Note that it does not pull the image from the internet. + It only represents a tag so it must exist on your system. + It throws ValueError if docker image by that name does not exist locally. + """ + splitted_name = name.split(":") + if len(splitted_name) == 1: + # by default, image has latest tag. + self.name = name + ":latest" + else: + self.name = name + + if not self.exists(name): + raise ValueError( + f"Invalid value: name={name} does not exist on your system." + "Use DockerImage.from_tag() to pull it." + ) + + def __repr__(self) -> str: + return f"DockerImage(name={self.name})" + + @classmethod + def exists(cls, name: str) -> bool: + """Checks if the docker image exists""" + docker_client = get_docker_client() + return len(docker_client.images.list(name=name)) > 0 + + @classmethod + def remove(cls, name: str) -> None: + """WARNING: Removes image from the system, be cautious with this function. + It is irreversible operation!. + """ + if cls.exists(name): + docker_client = get_docker_client() + docker_client.images.remove(name) + + @classmethod + def from_tag( + cls, + repository: str, + tag: str = "latest", + auth_config: Optional[Dict[str, str]] = None, + ) -> "DockerImage": + """Use image with a given repository and tag. It is going to pull it if it is + not present on the system. + Example: repository = "alpine" (will get "latest" tag) + Example: repository = "python" tag = "3.9-slim" + """ + docker_client = get_docker_client() + name = f"{repository}:{tag}" + if len(docker_client.images.list(name=name)) > 0: + return cls(name=name) + docker_client.images.pull( + repository=repository, tag=tag, auth_config=auth_config + ) + return cls(name=name) + + @classmethod + def from_dockerfile( + cls, + dockerfile_path: Union[Path, str], + name: Union[str, Callable[[], str]] = generate_langchain_container_tag, + **kwargs: Any, + ) -> "DockerImage": + """Build a new image from Dockerfile given its file path.""" + + img_name = ( + name + if isinstance(name, str) and name + else generate_langchain_container_tag() + ) + dockerfile = Path(dockerfile_path) + + docker_client = get_docker_client() + + if dockerfile.is_dir(): + if not (dockerfile / "Dockerfile").exists(): + raise ValueError( + f"Directory {dockerfile} does not contain a Dockerfile." + ) + docker_client.images.build( + path=str(dockerfile), tag=img_name, rm=True, **kwargs + ) + elif dockerfile.name == "Dockerfile" and dockerfile.is_file(): + docker_client.images.build( + fileobj=dockerfile.open("rb"), tag=img_name, rm=True, **kwargs + ) + else: + raise ValueError(f"Invalid parameter: dockerfile: {dockerfile}") + + return cls(name=img_name) + + @classmethod + def from_dockerfile_content( + cls, + dockerfile_str: str, + name: Union[str, Callable[[], str]] = generate_langchain_container_tag, + **kwargs: Any, + ) -> "DockerImage": + """Build a new image from Dockerfile given a string with Dockerfile content.""" + + img_name = ( + name + if isinstance(name, str) and name + else generate_langchain_container_tag() + ) + import io + + buff = io.BytesIO(dockerfile_str.encode("utf-8")) + + docker_client = get_docker_client() + from pathlib import Path + + docker_client.images.build( + fileobj=buff, tag=img_name, rm=True, path=str(Path.cwd()), **kwargs + ) + + return cls(name=img_name) + + +class DockerContainer: + """An isolated environment for running commands, based on docker container. + + Examples: + If you need to run container for a single job: + >>> container = DockerContainer(DockerImage.from_tag("alpine")) + >>> status_code, logs = container.spawn_run("echo hello world") + + To run a container in background and execute commands: + >>> with DockerContainer(DockerImage.from_tag("alpine")) as container: + >>> status_code, logs = container.run("echo hello world") + """ + + def __init__(self, image: DockerImage, **kwargs: Any): + """Wraps docker image to control container interaction. + NOTE: **kwargs are passed to docker client containers.run() method so you can + use them as you wish. + """ + self.image = image + self._client = get_docker_client() + self._container = None + self._run_kwargs = kwargs + + def __enter__(self) -> "DockerContainer": + """Enters container context. It means that container is started and you can + execute commands inside it. + """ + self.unsafe_start() + return self + + def unsafe_start(self) -> None: + """Starts container without entering it. + Please prefer to use with DockerContainer statement. + """ + assert self._container is None, "You cannot re-entry container" + # tty=True is required to keep container alive + self._container = self._client.containers.run( + self.image.name, + detach=True, + tty=True, + **self._run_kwargs, + ) + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> bool: + """Cleanup container on exit.""" + assert self._container is not None, "You cannot exit unstarted container." + if exc_type is not None: + # re-throw exception. try to stop container and remove it + try: + self._cleanup() + except Exception as e: + print("Failed to stop and remove container to cleanup exception.", e) + return False + else: + self.unsafe_exit() + return True + + def unsafe_exit(self): + """Cleanup container on exit. Please prefer to use `with` statement.""" + if self._container is None: + return + self._cleanup() + self._container = None + + def spawn_run( + self, command: Union[str, List[str]], **kwargs: Any + ) -> Tuple[int, bytes]: + """Run a script in the isolated environment which is docker container with the + same lifetime as this function call. + + You can also pass all arguments that docker client containers.run() accepts. + It blocks till command is finished. + """ + # we can update here kwargs with self._run_kwargs so user can override them + custom_kwargs = ( + self._run_kwargs.copy().update(kwargs) if kwargs else self._run_kwargs + ) + # There is a known issue with auto_remove=True and docker-py: + # https://github.com/docker/docker-py/issues/1813 + # so as workaround we detach, wait & and remove container manually + container = self._client.containers.run( + self.image.name, command=command, detach=True, **custom_kwargs + ) + status_code = container.wait().get("StatusCode", 1) + logs = container.logs() + container.remove() + return status_code, logs + + @property + def docker_container(self) -> Container: + """Returns docker container object.""" + assert ( + self._container is not None + ), "You cannot access container that was not entered" + return self._container + + @property + def name(self) -> str: + """Name of the container if it exists, empty string otherwise.""" + if self._container: + return self._container.name + return "" + + def run( + self, command: Union[str, List[str]], **kwargs: Any + ) -> Tuple[int, Union[bytes, Tuple[bytes, bytes], Generator[bytes, None, None]]]: + """Run a script in the isolated environment which is docker container. + You can send any args which docker-py exec_run accepts: + https://docker-py.readthedocs.io/en/stable/containers.html#docker.models.containers.Container.exec_run + Return is a tuple of exit code and output which is controlled by arguments: + stream, socket and demux. + """ + assert ( + self._container is not None + ), "You cannot execute command in container that was not entered" + + exit_code, output = self._container.exec_run(cmd=command, **kwargs) + return exit_code, output + + def _cleanup(self) -> None: + """Stops and removes container.""" + if self._container is None: + return + self._container.stop() + self._container.remove() diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index e21acf426b83b..64483a003c0f6 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -3542,6 +3542,7 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -8148,10 +8149,8 @@ description = "Fast and Safe Tensor serialization" optional = true python-versions = "*" files = [ - {file = "safetensors-0.3.2-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:4c7827b64b1da3f082301b5f5a34331b8313104c14f257099a12d32ac621c5cd"}, {file = "safetensors-0.3.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b6a66989075c2891d743153e8ba9ca84ee7232c8539704488f454199b8b8f84d"}, {file = "safetensors-0.3.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:670d6bc3a3b377278ce2971fa7c36ebc0a35041c4ea23b9df750a39380800195"}, - {file = "safetensors-0.3.2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:67ef2cc747c88e3a8d8e4628d715874c0366a8ff1e66713a9d42285a429623ad"}, {file = "safetensors-0.3.2-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:564f42838721925b5313ae864ba6caa6f4c80a9fbe63cf24310c3be98ab013cd"}, {file = "safetensors-0.3.2-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:7f80af7e4ab3188daaff12d43d078da3017a90d732d38d7af4eb08b6ca2198a5"}, {file = "safetensors-0.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec30d78f20f1235b252d59cbb9755beb35a1fde8c24c89b3c98e6a1804cfd432"}, @@ -8160,9 +8159,7 @@ files = [ {file = "safetensors-0.3.2-cp310-cp310-win32.whl", hash = "sha256:2961c1243fd0da46aa6a1c835305cc4595486f8ac64632a604d0eb5f2de76175"}, {file = "safetensors-0.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:c813920482c337d1424d306e1b05824a38e3ef94303748a0a287dea7a8c4f805"}, {file = "safetensors-0.3.2-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:707df34bd9b9047e97332136ad98e57028faeccdb9cfe1c3b52aba5964cc24bf"}, - {file = "safetensors-0.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:23d1d9f74208c9dfdf852a9f986dac63e40092385f84bf0789d599efa8e6522f"}, {file = "safetensors-0.3.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:becc5bb85b2947eae20ed23b407ebfd5277d9a560f90381fe2c42e6c043677ba"}, - {file = "safetensors-0.3.2-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:c1913c6c549b1805e924f307159f0ee97b73ae3ce150cd2401964da015e0fa0b"}, {file = "safetensors-0.3.2-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:30a75707be5cc9686490bde14b9a371cede4af53244ea72b340cfbabfffdf58a"}, {file = "safetensors-0.3.2-cp311-cp311-macosx_13_0_universal2.whl", hash = "sha256:54ad6af663e15e2b99e2ea3280981b7514485df72ba6d014dc22dae7ba6a5e6c"}, {file = "safetensors-0.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37764b3197656ef507a266c453e909a3477dabc795962b38e3ad28226f53153b"}, @@ -8170,28 +8167,22 @@ files = [ {file = "safetensors-0.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ada0fac127ff8fb04834da5c6d85a8077e6a1c9180a11251d96f8068db922a17"}, {file = "safetensors-0.3.2-cp311-cp311-win32.whl", hash = "sha256:155b82dbe2b0ebff18cde3f76b42b6d9470296e92561ef1a282004d449fa2b4c"}, {file = "safetensors-0.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:a86428d196959619ce90197731be9391b5098b35100a7228ef4643957648f7f5"}, - {file = "safetensors-0.3.2-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:91e796b6e465d9ffaca4c411d749f236c211e257f3a8e9b25a5ffc1a42d3bfa7"}, {file = "safetensors-0.3.2-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:c1f8ab41ed735c5b581f451fd15d9602ff51aa88044bfa933c5fa4b1d0c644d1"}, - {file = "safetensors-0.3.2-cp37-cp37m-macosx_12_0_x86_64.whl", hash = "sha256:e6a8ff5652493598c45cd27f5613c193d3f15e76e0f81613d399c487a7b8cc50"}, {file = "safetensors-0.3.2-cp37-cp37m-macosx_13_0_x86_64.whl", hash = "sha256:bc9cfb3c9ea2aec89685b4d656f9f2296f0f0d67ecf2bebf950870e3be89b3db"}, {file = "safetensors-0.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ace5d471e3d78e0d93f952707d808b5ab5eac77ddb034ceb702e602e9acf2be9"}, {file = "safetensors-0.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de3e20a388b444381bcda1a3193cce51825ddca277e4cf3ed1fe8d9b2d5722cd"}, {file = "safetensors-0.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d7d70d48585fe8df00725aa788f2e64fd24a4c9ae07cd6be34f6859d0f89a9c"}, {file = "safetensors-0.3.2-cp37-cp37m-win32.whl", hash = "sha256:6ff59bc90cdc857f68b1023be9085fda6202bbe7f2fd67d06af8f976d6adcc10"}, {file = "safetensors-0.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:8b05c93da15fa911763a89281906ca333ed800ab0ef1c7ce53317aa1a2322f19"}, - {file = "safetensors-0.3.2-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:94857abc019b49a22a0065cc7741c48fb788aa7d8f3f4690c092c56090227abe"}, {file = "safetensors-0.3.2-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:8969cfd9e8d904e8d3c67c989e1bd9a95e3cc8980d4f95e4dcd43c299bb94253"}, - {file = "safetensors-0.3.2-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:da482fa011dc88fe7376d8f8b42c0ccef2f260e0cbc847ceca29c708bf75a868"}, {file = "safetensors-0.3.2-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:f54148ac027556eb02187e9bc1556c4d916c99ca3cb34ca36a7d304d675035c1"}, {file = "safetensors-0.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caec25fedbcf73f66c9261984f07885680f71417fc173f52279276c7f8a5edd3"}, {file = "safetensors-0.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:50224a1d99927ccf3b75e27c3d412f7043280431ab100b4f08aad470c37cf99a"}, {file = "safetensors-0.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa98f49e95f02eb750d32c4947e7d5aa43883149ebd0414920866446525b70f0"}, {file = "safetensors-0.3.2-cp38-cp38-win32.whl", hash = "sha256:33409df5e28a83dc5cc5547a3ac17c0f1b13a1847b1eb3bc4b3be0df9915171e"}, {file = "safetensors-0.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:e04a7cbbb3856159ab99e3adb14521544f65fcb8548cce773a1435a0f8d78d27"}, - {file = "safetensors-0.3.2-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:f39f3d951543b594c6bc5082149d994c47ca487fd5d55b4ce065ab90441aa334"}, {file = "safetensors-0.3.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:7c864cf5dcbfb608c5378f83319c60cc9c97263343b57c02756b7613cd5ab4dd"}, {file = "safetensors-0.3.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:14e8c19d6dc51d4f70ee33c46aff04c8ba3f95812e74daf8036c24bc86e75cae"}, - {file = "safetensors-0.3.2-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:41b10b0a6dfe8fdfbe4b911d64717d5647e87fbd7377b2eb3d03fb94b59810ea"}, {file = "safetensors-0.3.2-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:042a60f633c3c7009fdf6a7c182b165cb7283649d2a1e9c7a4a1c23454bd9a5b"}, {file = "safetensors-0.3.2-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:fafd95e5ef41e8f312e2a32b7031f7b9b2a621b255f867b221f94bb2e9f51ae8"}, {file = "safetensors-0.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ed77cf358abce2307f03634694e0b2a29822e322a1623e0b1aa4b41e871bf8b"}, @@ -10447,4 +10438,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "43a6bd42efc0baf917418087f788aaf3b1bc793cb4aa81de99c52ed6a7d54d26" +content-hash = "f89d812d676bdc77e633ec4bb81c9f5d44c0abf64e8102eb8d095362bba01428" diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index e38c5721ce04c..1d0da2336acab 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -127,6 +127,7 @@ xata = {version = "^1.0.0a7", optional = true} xmltodict = {version = "^0.13.0", optional = true} markdownify = {version = "^0.11.6", optional = true} assemblyai = {version = "^0.17.0", optional = true} +docker = {version = "^6.1.3", optional = true} [tool.poetry.group.test.dependencies] diff --git a/libs/langchain/tests/integration_tests/utilities/docker_test_data/Dockerfile b/libs/langchain/tests/integration_tests/utilities/docker_test_data/Dockerfile new file mode 100644 index 0000000000000..7c19ef9a14310 --- /dev/null +++ b/libs/langchain/tests/integration_tests/utilities/docker_test_data/Dockerfile @@ -0,0 +1,5 @@ +# This is a test dockerfile that will be used to test the docker_containers. +FROM python:3.11-alpine +RUN pip install --no-cache-dir cowsay==6.0 +# This runs cowsay and it requires arguments like -t "hello world". +ENTRYPOINT ["python3", "-m", "cowsay"] \ No newline at end of file diff --git a/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py new file mode 100644 index 0000000000000..4cfa458226489 --- /dev/null +++ b/libs/langchain/tests/integration_tests/utilities/test_docker_containers.py @@ -0,0 +1,96 @@ +from pathlib import Path +from typing import cast + +import pytest + +from langchain.utilities.docker_containers import ( + DockerContainer, + DockerImage, + generate_langchain_container_tag, + get_docker_client, +) + + +def test_generate_langchain_container_tag() -> None: + tag = generate_langchain_container_tag() + assert tag.startswith("langchain") + assert len(tag) > len("langchain") + new_tag = generate_langchain_container_tag() + assert tag != new_tag, "Tags should be different" + + +@pytest.mark.requires("docker") +def test_docker_image_throws_for_bad_name() -> None: + with pytest.raises(ValueError): + DockerImage(name="docker_image_which_should_not_exist_42") + + +@pytest.mark.requires("docker") +def run_container_cowsay(image: DockerImage) -> None: + """Helper for testing - runs cowsay command and verifies it works.""" + # note that our `cowsay` adds moo prefix as commands are executed + # by ENTRYPOINT defined in dockerfile. + try: + container = DockerContainer(image) + ret_code, log = container.spawn_run('-t "I like langchain!"') + assert ret_code == 0 + assert log.find(b"I like langchain") >= 0, "Cowsay should say same words" + finally: + DockerImage.remove(image) + + +@pytest.mark.requires("docker") +def test_build_image_from_dockerfile() -> None: + dockerfile_path = Path(__file__).parent / "docker_test_data/Dockerfile" + image = DockerImage.from_dockerfile(dockerfile_path, name="cow") + run_container_cowsay(image) + + +@pytest.mark.requires("docker") +def test_build_image_from_dockerfile_dirpath() -> None: + dockerfile_dir = Path(__file__).parent / "docker_test_data/" + image = DockerImage.from_dockerfile(dockerfile_dir) + run_container_cowsay(image) + + +@pytest.mark.requires("docker") +def test_build_image_from_dockerfile_string_content() -> None: + dockerfile_str = "FROM alpine\nRUN touch /animal.txt" + DockerImage.from_dockerfile_content(dockerfile_str) + + +@pytest.mark.requires("docker") +def test_docker_spawn_run_works() -> None: + container = DockerContainer(DockerImage.from_tag("alpine")) + status_code, logs = container.spawn_run(["echo", "hello", "world"]) + assert status_code == 0 + assert logs.find(b"hello world") >= 0 + + status_code, logs = container.spawn_run("echo good bye") + assert status_code == 0 + assert logs.find(b"good bye") >= 0 + + +@pytest.mark.requires("docker") +def test_docker_spawn_run_return_nonzero_status_code() -> None: + container = DockerContainer(DockerImage.from_tag("alpine")) + status_code, logs = container.spawn_run("sh -c 'echo hey && exit 1'") + assert status_code == 1 + assert logs.find(b"hey") >= 0 + + +@pytest.mark.requires("docker") +def test_docker_container_background_run_works() -> None: + client = get_docker_client() + container_name: str + with DockerContainer(DockerImage.from_tag("alpine")) as container: + container_name = container.name + assert len(client.containers.list(filters={"name": container_name})) == 1 + ret_code, output = container.run("touch /animal.txt") + assert ret_code == 0 + + ret_code, output = container.run("ls /") + assert ret_code == 0 + assert cast(bytes, output).find(b"animal.txt") >= 0 + + assert len(client.containers.list(filters={"name": container_name})) == 0