diff --git a/libs/community/langchain_community/document_loaders/directory.py b/libs/community/langchain_community/document_loaders/directory.py index 7cfa456487cc5..3cb2ad1309a0a 100644 --- a/libs/community/langchain_community/document_loaders/directory.py +++ b/libs/community/langchain_community/document_loaders/directory.py @@ -2,7 +2,7 @@ import logging import random from pathlib import Path -from typing import Any, List, Optional, Type, Union +from typing import Any, List, Optional, Sequence, Type, Union from langchain_core.documents import Document @@ -41,6 +41,7 @@ def __init__( use_multithreading: bool = False, max_concurrency: int = 4, *, + exclude: Union[Sequence[str], str] = (), sample_size: int = 0, randomize_sample: bool = False, sample_seed: Union[int, None] = None, @@ -51,6 +52,8 @@ def __init__( path: Path to directory. glob: Glob pattern to use to find files. Defaults to "**/[!.]*" (all files except hidden). + exclude: A pattern or list of patterns to exclude from results. + Use glob syntax. silent_errors: Whether to silently ignore errors. Defaults to False. load_hidden: Whether to load hidden files. Defaults to False. loader_cls: Loader class to use for loading files. @@ -64,11 +67,38 @@ def __init__( directory. randomize_sample: Shuffle the files to get a random sample. sample_seed: set the seed of the random shuffle for reproducibility. + + Examples: + + .. code-block:: python + from langchain_community.document_loaders import DirectoryLoader + + # Load all non-hidden files in a directory. + loader = DirectoryLoader("/path/to/directory") + + # Load all text files in a directory without recursion. + loader = DirectoryLoader("/path/to/directory", glob="*.txt") + + # Recursively load all text files in a directory. + loader = DirectoryLoader( + "/path/to/directory", glob="*.txt", recursive=True + ) + + # Load all files in a directory, except for py files. + loader = DirectoryLoader("/path/to/directory", exclude="*.py") + + # Load all files in a directory, except for py or pyc files. + loader = DirectoryLoader( + "/path/to/directory", exclude=["*.py", "*.pyc"] + ) """ if loader_kwargs is None: loader_kwargs = {} + if isinstance(exclude, str): + exclude = (exclude,) self.path = path self.glob = glob + self.exclude = exclude self.load_hidden = load_hidden self.loader_cls = loader_cls self.loader_kwargs = loader_kwargs @@ -118,7 +148,13 @@ def load(self) -> List[Document]: raise ValueError(f"Expected directory, got file: '{self.path}'") docs: List[Document] = [] - items = list(p.rglob(self.glob) if self.recursive else p.glob(self.glob)) + + paths = p.rglob(self.glob) if self.recursive else p.glob(self.glob) + items = [ + path + for path in paths + if not (self.exclude and any(path.match(glob) for glob in self.exclude)) + ] if self.sample_size > 0: if self.randomize_sample: diff --git a/libs/community/poetry.lock b/libs/community/poetry.lock index c5ea8eae37ae4..c4b8da543a557 100644 --- a/libs/community/poetry.lock +++ b/libs/community/poetry.lock @@ -1457,6 +1457,21 @@ files = [ marshmallow = ">=3.18.0,<4.0.0" typing-inspect = ">=0.4.0,<1" +[[package]] +name = "dataclasses-json-speakeasy" +version = "0.5.11" +description = "Easily serialize dataclasses to and from JSON." +optional = true +python-versions = ">=3.7,<4.0" +files = [ + {file = "dataclasses_json_speakeasy-0.5.11-py3-none-any.whl", hash = "sha256:ac52a069a01e8521015d682f37849bfdf056c36fa3f81497055e201fec684104"}, + {file = "dataclasses_json_speakeasy-0.5.11.tar.gz", hash = "sha256:418a987cea2ccf4e4be662f39faa5cc79b47b147c9d1a69d6928d6a27e0c17e8"}, +] + +[package.dependencies] +marshmallow = ">=3.18.0,<4.0.0" +typing-inspect = ">=0.4.0,<1" + [[package]] name = "datasets" version = "2.16.1" @@ -1750,6 +1765,20 @@ elastic-transport = ">=8,<9" async = ["aiohttp (>=3,<4)"] requests = ["requests (>=2.4.0,<3.0.0)"] +[[package]] +name = "emoji" +version = "2.10.1" +description = "Emoji for Python" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "emoji-2.10.1-py2.py3-none-any.whl", hash = "sha256:11fb369ea79d20c14efa4362c732d67126df294a7959a2c98bfd7447c12a218e"}, + {file = "emoji-2.10.1.tar.gz", hash = "sha256:16287283518fb7141bde00198f9ffff4e1c1cb570efb68b2f1ec50975c3a581d"}, +] + +[package.extras] +dev = ["coverage", "coveralls", "pytest"] + [[package]] name = "entrypoints" version = "0.4" @@ -1938,6 +1967,17 @@ docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1 testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] typing = ["typing-extensions (>=4.8)"] +[[package]] +name = "filetype" +version = "1.2.0" +description = "Infer file type and MIME type of any file/buffer. No external dependencies." +optional = true +python-versions = "*" +files = [ + {file = "filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25"}, + {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, +] + [[package]] name = "fiona" version = "1.9.5" @@ -3215,6 +3255,17 @@ files = [ [package.dependencies] jsonpointer = ">=1.9" +[[package]] +name = "jsonpath-python" +version = "1.0.6" +description = "A more powerful JSONPath implementation in modern python" +optional = true +python-versions = ">=3.6" +files = [ + {file = "jsonpath-python-1.0.6.tar.gz", hash = "sha256:dd5be4a72d8a2995c3f583cf82bf3cd1a9544cfdabf2d22595b67aff07349666"}, + {file = "jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575"}, +] + [[package]] name = "jsonpointer" version = "2.4" @@ -3650,7 +3701,7 @@ files = [ [[package]] name = "langchain-core" -version = "0.1.21" +version = "0.1.22" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -3674,6 +3725,20 @@ extended-testing = ["jinja2 (>=3,<4)"] type = "directory" url = "../core" +[[package]] +name = "langdetect" +version = "1.0.9" +description = "Language detection library ported from Google's language-detection." +optional = true +python-versions = "*" +files = [ + {file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"}, + {file = "langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0"}, +] + +[package.dependencies] +six = "*" + [[package]] name = "langsmith" version = "0.0.87" @@ -6406,6 +6471,20 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "python-iso639" +version = "2024.2.7" +description = "Look-up utilities for ISO 639 language codes and names" +optional = true +python-versions = ">=3.8" +files = [ + {file = "python-iso639-2024.2.7.tar.gz", hash = "sha256:c323233348c34d57c601e3e6d824088e492896bcb97a61a87f7d93401a305377"}, + {file = "python_iso639-2024.2.7-py3-none-any.whl", hash = "sha256:7b149623ff74230f4ee3061fb01d18e57a8d07c5fee2aa72907f39b7f6d16cbc"}, +] + +[package.extras] +dev = ["black (==24.1.1)", "build (==1.0.3)", "flake8 (==7.0.0)", "pytest (==8.0.0)", "twine (==4.0.2)"] + [[package]] name = "python-json-logger" version = "2.0.7" @@ -6434,6 +6513,17 @@ jsonschema = ">=4.18" Markdown = ">=2.4" six = ">=1.5.2" +[[package]] +name = "python-magic" +version = "0.4.27" +description = "File type identification using libmagic" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"}, + {file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"}, +] + [[package]] name = "pytz" version = "2023.4" @@ -6508,7 +6598,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -8319,6 +8408,126 @@ tzdata = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] +[[package]] +name = "unstructured" +version = "0.12.4" +description = "A library that prepares raw documents for downstream ML tasks." +optional = true +python-versions = ">=3.9.0,<3.12" +files = [ + {file = "unstructured-0.12.4-py3-none-any.whl", hash = "sha256:f1aa046297a3afba3aa16895e513aca6a93802ef73b7a18080656435c4deb217"}, + {file = "unstructured-0.12.4.tar.gz", hash = "sha256:019cf52e9e2bfa286e61ffa0d7d336e1645280f9a0f165e697583143fcfe708a"}, +] + +[package.dependencies] +backoff = "*" +beautifulsoup4 = "*" +chardet = "*" +dataclasses-json = "*" +emoji = "*" +filetype = "*" +langdetect = "*" +lxml = "*" +nltk = "*" +numpy = "*" +python-iso639 = "*" +python-magic = "*" +rapidfuzz = "*" +requests = "*" +tabulate = "*" +typing-extensions = "*" +unstructured-client = ">=0.15.1" +wrapt = "*" + +[package.extras] +airtable = ["pyairtable"] +all-docs = ["markdown", "msg-parser", "networkx", "onnx", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pikepdf", "pillow-heif", "pypandoc", "pypdf", "python-docx", "python-pptx (<=0.6.23)", "unstructured-inference (==0.7.23)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] +azure = ["adlfs", "fsspec"] +azure-cognitive-search = ["azure-search-documents"] +bedrock = ["boto3", "langchain-community"] +biomed = ["bs4"] +box = ["boxfs", "fsspec"] +chroma = ["chromadb"] +confluence = ["atlassian-python-api"] +csv = ["pandas"] +databricks-volumes = ["databricks-sdk"] +delta-table = ["deltalake", "fsspec"] +discord = ["discord-py"] +doc = ["python-docx"] +docx = ["python-docx"] +dropbox = ["dropboxdrivefs", "fsspec"] +elasticsearch = ["elasticsearch"] +embed-huggingface = ["huggingface", "langchain-community", "sentence-transformers"] +epub = ["pypandoc"] +gcs = ["bs4", "fsspec", "gcsfs"] +github = ["pygithub (>1.58.0)"] +gitlab = ["python-gitlab"] +google-drive = ["google-api-python-client"] +hubspot = ["hubspot-api-client", "urllib3"] +huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"] +image = ["onnx", "pdf2image", "pdfminer.six", "pikepdf", "pillow-heif", "pypdf", "unstructured-inference (==0.7.23)", "unstructured.pytesseract (>=0.3.12)"] +jira = ["atlassian-python-api"] +local-inference = ["markdown", "msg-parser", "networkx", "onnx", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pikepdf", "pillow-heif", "pypandoc", "pypdf", "python-docx", "python-pptx (<=0.6.23)", "unstructured-inference (==0.7.23)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] +md = ["markdown"] +mongodb = ["pymongo"] +msg = ["msg-parser"] +notion = ["htmlBuilder", "notion-client"] +odt = ["pypandoc", "python-docx"] +onedrive = ["Office365-REST-Python-Client", "bs4", "msal"] +openai = ["langchain-community", "openai", "tiktoken"] +opensearch = ["opensearch-py"] +org = ["pypandoc"] +outlook = ["Office365-REST-Python-Client", "msal"] +paddleocr = ["unstructured.paddleocr (==2.6.1.3)"] +pdf = ["onnx", "pdf2image", "pdfminer.six", "pikepdf", "pillow-heif", "pypdf", "unstructured-inference (==0.7.23)", "unstructured.pytesseract (>=0.3.12)"] +pinecone = ["pinecone-client (==2.2.4)"] +postgres = ["psycopg2-binary"] +ppt = ["python-pptx (<=0.6.23)"] +pptx = ["python-pptx (<=0.6.23)"] +qdrant = ["qdrant-client"] +reddit = ["praw"] +rst = ["pypandoc"] +rtf = ["pypandoc"] +s3 = ["fsspec", "s3fs"] +salesforce = ["simple-salesforce"] +sftp = ["fsspec", "paramiko"] +sharepoint = ["Office365-REST-Python-Client", "msal"] +slack = ["slack-sdk"] +tsv = ["pandas"] +weaviate = ["weaviate-client"] +wikipedia = ["wikipedia"] +xlsx = ["networkx", "openpyxl", "pandas", "xlrd"] + +[[package]] +name = "unstructured-client" +version = "0.18.0" +description = "Python Client SDK for Unstructured API" +optional = true +python-versions = ">=3.8" +files = [ + {file = "unstructured-client-0.18.0.tar.gz", hash = "sha256:b5f1866b6a48d2e28645e37e86c9d58b1ee7df2d88e79adf873572338c027aa8"}, + {file = "unstructured_client-0.18.0-py3-none-any.whl", hash = "sha256:36d8c5cb01b97a87e271e11d4d5a063d1c5b85fc5fd7f07819c35a9bef74821f"}, +] + +[package.dependencies] +certifi = ">=2023.7.22" +charset-normalizer = ">=3.2.0" +dataclasses-json-speakeasy = ">=0.5.11" +idna = ">=3.4" +jsonpath-python = ">=1.0.6" +marshmallow = ">=3.19.0" +mypy-extensions = ">=1.0.0" +packaging = ">=23.1" +python-dateutil = ">=2.8.2" +requests = ">=2.31.0" +six = ">=1.16.0" +typing-extensions = ">=4.7.1" +typing-inspect = ">=0.9.0" +urllib3 = ">=1.26.18" + +[package.extras] +dev = ["pylint (==2.16.2)"] + [[package]] name = "update-checker" version = "0.18.0" @@ -8998,9 +9207,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [extras] cli = ["typer"] -extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "httpx", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "nvidia-riva-client", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "rdflib", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict", "zhipuai"] +extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "httpx", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "nvidia-riva-client", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "rdflib", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "unstructured", "upstash-redis", "xata", "xmltodict", "zhipuai"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "fe633ab7d246239420a26bebf8bcf857edcf0778f75b3eb2a4b1314cb13645c8" +content-hash = "53a097241464b24e955d3b8fc11d2a6ad1c1a17b85f6dafa5f9a2f380c6c95ae" diff --git a/libs/community/pyproject.toml b/libs/community/pyproject.toml index ac6b9a3a9940d..cd6d81d8c7dfe 100644 --- a/libs/community/pyproject.toml +++ b/libs/community/pyproject.toml @@ -92,6 +92,7 @@ hdbcli = {version = "^2.19.21", optional = true} oci = {version = "^2.119.1", optional = true} rdflib = {version = "7.0.0", optional = true} nvidia-riva-client = {version = "^2.14.0", optional = true} +unstructured = {version = "^0.12.4", optional = true, python = ">=3.9.0,<3.12"} [tool.poetry.group.test] optional = true @@ -256,7 +257,8 @@ extended_testing = [ "elasticsearch", "hdbcli", "oci", - "rdflib" + "rdflib", + "unstructured", ] [tool.ruff] diff --git a/libs/community/tests/unit_tests/document_loaders/test_directory.py b/libs/community/tests/unit_tests/document_loaders/test_directory.py index dc028e4814c70..147595015c256 100644 --- a/libs/community/tests/unit_tests/document_loaders/test_directory.py +++ b/libs/community/tests/unit_tests/document_loaders/test_directory.py @@ -1,3 +1,5 @@ +from pathlib import Path + import pytest from langchain_community.document_loaders import DirectoryLoader @@ -17,3 +19,21 @@ def test_raise_error_if_path_is_not_directory() -> None: loader.load() assert str(e.value) == f"Expected directory, got file: '{__file__}'" + + +@pytest.mark.requires("unstructured") +def test_exclude_ignores_matching_files(tmp_path: Path) -> None: + txt_file = tmp_path / "test.txt" + py_file = tmp_path / "test.py" + txt_file.touch() + py_file.touch() + + loader = DirectoryLoader(str(tmp_path), exclude=["*.py"]) + data = loader.load() + + assert len(data) == 1 + + +def test_exclude_as_string_converts_to_sequence() -> None: + loader = DirectoryLoader("./some_directory", exclude="*.py") + assert loader.exclude == ("*.py",) diff --git a/poetry.lock b/poetry.lock index d712c5ab7d230..7419f5bea38a6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1694,7 +1694,7 @@ files = [ [[package]] name = "langchain" -version = "0.1.5" +version = "0.1.6" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -1706,8 +1706,8 @@ aiohttp = "^3.8.3" async-timeout = {version = "^4.0.0", markers = "python_version < \"3.11\""} dataclasses-json = ">= 0.5.7, < 0.7" jsonpatch = "^1.33" -langchain-community = ">=0.0.17,<0.1" -langchain-core = ">=0.1.16,<0.2" +langchain-community = ">=0.0.18,<0.1" +langchain-core = ">=0.1.22,<0.2" langsmith = ">=0.0.83,<0.1" numpy = "^1" pydantic = ">=1,<3" @@ -1737,7 +1737,7 @@ url = "libs/langchain" [[package]] name = "langchain-community" -version = "0.0.17" +version = "0.0.19" description = "Community contributed LangChain integrations." optional = false python-versions = ">=3.8.1,<4.0" @@ -1747,7 +1747,7 @@ develop = true [package.dependencies] aiohttp = "^3.8.3" dataclasses-json = ">= 0.5.7, < 0.7" -langchain-core = ">=0.1.16,<0.2" +langchain-core = ">=0.1.21,<0.2" langsmith = ">=0.0.83,<0.1" numpy = "^1" PyYAML = ">=5.3" @@ -1757,7 +1757,7 @@ tenacity = "^8.1.0" [package.extras] cli = ["typer (>=0.9.0,<0.10.0)"] -extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "elasticsearch (>=8.12.0,<9.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hdbcli (>=2.19.21,<3.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "httpx (>=0.24.1,<0.25.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "oci (>=2.119.1,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)", "zhipuai (>=1.0.7,<2.0.0)"] +extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "elasticsearch (>=8.12.0,<9.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hdbcli (>=2.19.21,<3.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "httpx (>=0.24.1,<0.25.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "nvidia-riva-client (>=2.14.0,<3.0.0)", "oci (>=2.119.1,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "unstructured (>=0.12.4,<0.13.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)", "zhipuai (>=1.0.7,<2.0.0)"] [package.source] type = "directory" @@ -1765,7 +1765,7 @@ url = "libs/community" [[package]] name = "langchain-core" -version = "0.1.18" +version = "0.1.22" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -1775,7 +1775,7 @@ develop = true [package.dependencies] anyio = ">=3,<5" jsonpatch = "^1.33" -langsmith = ">=0.0.83,<0.1" +langsmith = "^0.0.87" packaging = "^23.2" pydantic = ">=1,<3" PyYAML = ">=5.3" @@ -1830,13 +1830,13 @@ url = "libs/partners/openai" [[package]] name = "langsmith" -version = "0.0.86" +version = "0.0.87" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.0.86-py3-none-any.whl", hash = "sha256:7af15c36edb8c9fd9ae5c6d4fb940eb1da668b630a703d63c90c91e9be53aefb"}, - {file = "langsmith-0.0.86.tar.gz", hash = "sha256:c1572824664810c4425b17f2d1e9a59d53992e6898df22a37236c62d3c80f59e"}, + {file = "langsmith-0.0.87-py3-none-any.whl", hash = "sha256:8903d3811b9fc89eb18f5961c8e6935fbd2d0f119884fbf30dc70b8f8f4121fc"}, + {file = "langsmith-0.0.87.tar.gz", hash = "sha256:36c4cc47e5b54be57d038036a30fb19ce6e4c73048cd7a464b8f25b459694d34"}, ] [package.dependencies] @@ -1925,16 +1925,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -2946,7 +2936,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},