From 758178257515b8a996c02d9ed4084f6d1b5da2b8 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 13:41:09 -0800 Subject: [PATCH 01/18] core[patch], langchain[patch]: import CI --- .github/workflows/_pydantic_compatibility.yml | 4 ++++ libs/core/Makefile | 6 +++++- libs/core/scripts/{check_imports.sh => lint_imports.sh} | 0 libs/langchain/Makefile | 7 ++++++- .../scripts/{check_imports.sh => lint_imports.sh} | 0 5 files changed, 15 insertions(+), 2 deletions(-) rename libs/core/scripts/{check_imports.sh => lint_imports.sh} (100%) rename libs/langchain/scripts/{check_imports.sh => lint_imports.sh} (100%) diff --git a/.github/workflows/_pydantic_compatibility.yml b/.github/workflows/_pydantic_compatibility.yml index 680d961c30549..72f47cdfbab4d 100644 --- a/.github/workflows/_pydantic_compatibility.yml +++ b/.github/workflows/_pydantic_compatibility.yml @@ -44,6 +44,10 @@ jobs: shell: bash run: poetry install + - name: Check imports with base dependencies + shell: bash + run: make check_imports + - name: Install langchain editable working-directory: ${{ inputs.working-directory }} if: ${{ inputs.langchain-location }} diff --git a/libs/core/Makefile b/libs/core/Makefile index 6ab8c21d5adf0..b5a20ce0619db 100644 --- a/libs/core/Makefile +++ b/libs/core/Makefile @@ -15,6 +15,10 @@ tests: test_watch: poetry run ptw --snapshot-update --now . -- -vv -x tests/unit_tests +check_imports: langchain_core/**/*.py + for f in $^ ; do \ + python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" ; \ + done ###################### # LINTING AND FORMATTING @@ -27,7 +31,7 @@ lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/experimenta lint lint_diff: ./scripts/check_pydantic.sh . - ./scripts/check_imports.sh + ./scripts/lint_imports.sh poetry run ruff . [ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff [ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I $(PYTHON_FILES) diff --git a/libs/core/scripts/check_imports.sh b/libs/core/scripts/lint_imports.sh similarity index 100% rename from libs/core/scripts/check_imports.sh rename to libs/core/scripts/lint_imports.sh diff --git a/libs/langchain/Makefile b/libs/langchain/Makefile index 3c6e8ed1840a0..713ff9d076fc1 100644 --- a/libs/langchain/Makefile +++ b/libs/langchain/Makefile @@ -40,6 +40,11 @@ docker_tests: docker build -t my-langchain-image:test . docker run --rm my-langchain-image:test +check_imports: langchain/**/*.py + for f in $^ ; do \ + python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" ; \ + done + ###################### # LINTING AND FORMATTING ###################### @@ -51,7 +56,7 @@ lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/langchain - lint lint_diff: ./scripts/check_pydantic.sh . - ./scripts/check_imports.sh + ./scripts/lint_imports.sh poetry run ruff . [ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff [ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I $(PYTHON_FILES) diff --git a/libs/langchain/scripts/check_imports.sh b/libs/langchain/scripts/lint_imports.sh similarity index 100% rename from libs/langchain/scripts/check_imports.sh rename to libs/langchain/scripts/lint_imports.sh From e41cb7f94a40000b57c88d8c779bd2377eb9675d Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 13:43:49 -0800 Subject: [PATCH 02/18] poetry run --- .github/workflows/_pydantic_compatibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_pydantic_compatibility.yml b/.github/workflows/_pydantic_compatibility.yml index 72f47cdfbab4d..49f85387fee62 100644 --- a/.github/workflows/_pydantic_compatibility.yml +++ b/.github/workflows/_pydantic_compatibility.yml @@ -46,7 +46,7 @@ jobs: - name: Check imports with base dependencies shell: bash - run: make check_imports + run: poetry run make check_imports - name: Install langchain editable working-directory: ${{ inputs.working-directory }} From b163c5b7f9167bc705bc7f9c8f987ba2d25be3ea Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 13:53:07 -0800 Subject: [PATCH 03/18] experimental --- libs/experimental/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/experimental/Makefile b/libs/experimental/Makefile index e426a37b1ba41..affbc7777d7eb 100644 --- a/libs/experimental/Makefile +++ b/libs/experimental/Makefile @@ -21,6 +21,11 @@ extended_tests: integration_tests: poetry run pytest tests/integration_tests +check_imports: langchain_experimental/**/*.py + for f in $^ ; do \ + python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" ; \ + done + ###################### # LINTING AND FORMATTING From 86ff4cace201dd85d2c282941be29c0b7560546a Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 13:55:21 -0800 Subject: [PATCH 04/18] rename --- .github/workflows/_all_ci.yml | 2 +- .../{_pydantic_compatibility.yml => _dependencies.yml} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename .github/workflows/{_pydantic_compatibility.yml => _dependencies.yml} (97%) diff --git a/.github/workflows/_all_ci.yml b/.github/workflows/_all_ci.yml index e72bf101ba1a5..893a3f9a3c666 100644 --- a/.github/workflows/_all_ci.yml +++ b/.github/workflows/_all_ci.yml @@ -54,7 +54,7 @@ jobs: secrets: inherit pydantic-compatibility: - uses: ./.github/workflows/_pydantic_compatibility.yml + uses: ./.github/workflows/_dependencies.yml with: working-directory: ${{ inputs.working-directory }} secrets: inherit diff --git a/.github/workflows/_pydantic_compatibility.yml b/.github/workflows/_dependencies.yml similarity index 97% rename from .github/workflows/_pydantic_compatibility.yml rename to .github/workflows/_dependencies.yml index 49f85387fee62..71777485d7da1 100644 --- a/.github/workflows/_pydantic_compatibility.yml +++ b/.github/workflows/_dependencies.yml @@ -1,4 +1,4 @@ -name: pydantic v1/v2 compatibility +name: dependencies on: workflow_call: @@ -28,7 +28,7 @@ jobs: - "3.9" - "3.10" - "3.11" - name: Pydantic v1/v2 compatibility - Python ${{ matrix.python-version }} + name: dependencies - Python ${{ matrix.python-version }} steps: - uses: actions/checkout@v4 From c840c1ae813b1a0a5f168357ac4616ec777d3d61 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 13:59:59 -0800 Subject: [PATCH 05/18] should fail with optional groups --- libs/core/pyproject.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index 3f54dfc2b56c5..bb86956024b3f 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -15,14 +15,23 @@ langsmith = "~0.0.63" tenacity = "^8.1.0" jsonpatch = "^1.33" +[tool.poetry.group.lint] +optional = true + [tool.poetry.group.lint.dependencies] ruff = "^0.1.5" +[tool.poetry.group.typing] +optional = true + [tool.poetry.group.typing.dependencies] mypy = "^0.991" types-pyyaml = "^6.0.12.2" types-requests = "^2.28.11.5" +[tool.poetry.group.dev] +optional = true + [tool.poetry.group.dev.dependencies] jupyter = "^1.0.0" setuptools = "^67.6.1" From fa911d0d5b01b9886f2c0e95914494007c9b0065 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 14:02:38 -0800 Subject: [PATCH 06/18] lock --- libs/core/poetry.lock | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/libs/core/poetry.lock b/libs/core/poetry.lock index 99d60bc7d4d4e..37a6fbf35e05b 100644 --- a/libs/core/poetry.lock +++ b/libs/core/poetry.lock @@ -1147,16 +1147,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -1922,7 +1912,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1930,15 +1919,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1955,7 +1937,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1963,7 +1944,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -2706,4 +2686,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "b08d47f726dd194af0f801d300402b174c8db96a4184cc1136cb8e5a0e287190" +content-hash = "349519c04921a656c9acabbb86f70ade8bbb96089f6b14d8f10bb0cb30f41b81" From e2e26e5966bc7cf8a96eecb98a5d46c116ee6271 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 14:08:53 -0800 Subject: [PATCH 07/18] exit code --- libs/core/Makefile | 2 +- libs/experimental/Makefile | 2 +- libs/langchain/Makefile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/core/Makefile b/libs/core/Makefile index b5a20ce0619db..3e37a4ca5180d 100644 --- a/libs/core/Makefile +++ b/libs/core/Makefile @@ -17,7 +17,7 @@ test_watch: check_imports: langchain_core/**/*.py for f in $^ ; do \ - python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" ; \ + python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" || exit 1; \ done ###################### diff --git a/libs/experimental/Makefile b/libs/experimental/Makefile index affbc7777d7eb..df73cebbf9867 100644 --- a/libs/experimental/Makefile +++ b/libs/experimental/Makefile @@ -23,7 +23,7 @@ integration_tests: check_imports: langchain_experimental/**/*.py for f in $^ ; do \ - python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" ; \ + python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" || exit 1; \ done diff --git a/libs/langchain/Makefile b/libs/langchain/Makefile index 713ff9d076fc1..b934869c34cbd 100644 --- a/libs/langchain/Makefile +++ b/libs/langchain/Makefile @@ -42,7 +42,7 @@ docker_tests: check_imports: langchain/**/*.py for f in $^ ; do \ - python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" ; \ + python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" || exit 1; \ done ###################### From 60289460f431ecab31a4084caad024bdfea4c53d Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 14:23:42 -0800 Subject: [PATCH 08/18] revert dep changes --- libs/core/poetry.lock | 22 +++++++++++++++++++++- libs/core/pyproject.toml | 9 --------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/libs/core/poetry.lock b/libs/core/poetry.lock index 37a6fbf35e05b..99d60bc7d4d4e 100644 --- a/libs/core/poetry.lock +++ b/libs/core/poetry.lock @@ -1147,6 +1147,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -1912,6 +1922,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1919,8 +1930,15 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1937,6 +1955,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1944,6 +1963,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -2686,4 +2706,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "349519c04921a656c9acabbb86f70ade8bbb96089f6b14d8f10bb0cb30f41b81" +content-hash = "b08d47f726dd194af0f801d300402b174c8db96a4184cc1136cb8e5a0e287190" diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index bb86956024b3f..3f54dfc2b56c5 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -15,23 +15,14 @@ langsmith = "~0.0.63" tenacity = "^8.1.0" jsonpatch = "^1.33" -[tool.poetry.group.lint] -optional = true - [tool.poetry.group.lint.dependencies] ruff = "^0.1.5" -[tool.poetry.group.typing] -optional = true - [tool.poetry.group.typing.dependencies] mypy = "^0.991" types-pyyaml = "^6.0.12.2" types-requests = "^2.28.11.5" -[tool.poetry.group.dev] -optional = true - [tool.poetry.group.dev.dependencies] jupyter = "^1.0.0" setuptools = "^67.6.1" From f9e4cf129a079fd874b35c73bcd27dcf8798ff86 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 14:41:38 -0800 Subject: [PATCH 09/18] fix experimental --- .../data_anonymizer/deanonymizer_mapping.py | 11 +-- .../data_anonymizer/presidio.py | 75 ++++++++++++++----- 2 files changed, 63 insertions(+), 23 deletions(-) diff --git a/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py b/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py index 9db586c2848c3..72d2350d9da3e 100644 --- a/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py +++ b/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py @@ -1,10 +1,11 @@ import re from collections import defaultdict from dataclasses import dataclass, field -from typing import Dict, List +from typing import TYPE_CHECKING, Dict, List -from presidio_analyzer import RecognizerResult -from presidio_anonymizer.entities import EngineResult +if TYPE_CHECKING: + from presidio_analyzer import RecognizerResult + from presidio_anonymizer.entities import EngineResult MappingDataType = Dict[str, Dict[str, str]] @@ -62,8 +63,8 @@ def update(self, new_mapping: MappingDataType) -> None: def create_anonymizer_mapping( original_text: str, - analyzer_results: List[RecognizerResult], - anonymizer_results: EngineResult, + analyzer_results: List["RecognizerResult"], + anonymizer_results: "EngineResult", is_reversed: bool = False, ) -> MappingDataType: """Creates or updates the mapping used to anonymize and/or deanonymize text. diff --git a/libs/experimental/langchain_experimental/data_anonymizer/presidio.py b/libs/experimental/langchain_experimental/data_anonymizer/presidio.py index 1b20607679e66..6161d47a6e74c 100644 --- a/libs/experimental/langchain_experimental/data_anonymizer/presidio.py +++ b/libs/experimental/langchain_experimental/data_anonymizer/presidio.py @@ -23,28 +23,62 @@ get_pseudoanonymizer_mapping, ) -try: - from presidio_analyzer import AnalyzerEngine +if TYPE_CHECKING: + from presidio_analyzer import AnalyzerEngine, EntityRecognizer from presidio_analyzer.nlp_engine import NlpEngineProvider - -except ImportError as e: - raise ImportError( - "Could not import presidio_analyzer, please install with " - "`pip install presidio-analyzer`. You will also need to download a " - "spaCy model to use the analyzer, e.g. " - "`python -m spacy download en_core_web_lg`." - ) from e -try: from presidio_anonymizer import AnonymizerEngine from presidio_anonymizer.entities import OperatorConfig -except ImportError as e: - raise ImportError( - "Could not import presidio_anonymizer, please install with " - "`pip install presidio-anonymizer`." - ) from e -if TYPE_CHECKING: - from presidio_analyzer import EntityRecognizer + +def _import_analyzer_engine() -> "AnalyzerEngine": + try: + from presidio_analyzer import AnalyzerEngine + + except ImportError as e: + raise ImportError( + "Could not import presidio_analyzer, please install with " + "`pip install presidio-analyzer`. You will also need to download a " + "spaCy model to use the analyzer, e.g. " + "`python -m spacy download en_core_web_lg`." + ) from e + return AnalyzerEngine + + +def _import_nlp_engine_provider() -> "NlpEngineProvider": + try: + from presidio_analyzer.nlp_engine import NlpEngineProvider + + except ImportError as e: + raise ImportError( + "Could not import presidio_analyzer, please install with " + "`pip install presidio-analyzer`. You will also need to download a " + "spaCy model to use the analyzer, e.g. " + "`python -m spacy download en_core_web_lg`." + ) from e + return NlpEngineProvider + + +def _import_anonymizer_engine() -> "AnonymizerEngine": + try: + from presidio_anonymizer import AnonymizerEngine + except ImportError as e: + raise ImportError( + "Could not import presidio_anonymizer, please install with " + "`pip install presidio-anonymizer`." + ) from e + return AnonymizerEngine + + +def _import_operator_config() -> "OperatorConfig": + try: + from presidio_anonymizer.entities import OperatorConfig + except ImportError as e: + raise ImportError( + "Could not import presidio_anonymizer, please install with " + "`pip install presidio-anonymizer`." + ) from e + return OperatorConfig + # Configuring Anonymizer for multiple languages # Detailed description and examples can be found here: @@ -89,6 +123,11 @@ def __init__( Defaults to None, in which case faker will be seeded randomly and provide random values. """ + OperatorConfig = _import_operator_config() + AnalyzerEngine = _import_analyzer_engine() + NlpEngineProvider = _import_nlp_engine_provider() + AnonymizerEngine = _import_anonymizer_engine() + self.analyzed_fields = ( analyzed_fields if analyzed_fields is not None From 809cae6f95d61fe898e748527529c130bf0e5e77 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 14:44:05 -0800 Subject: [PATCH 10/18] merge --- .github/workflows/_dependencies.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_dependencies.yml b/.github/workflows/_dependencies.yml index bb6a635b76f99..af01a7eafa77d 100644 --- a/.github/workflows/_dependencies.yml +++ b/.github/workflows/_dependencies.yml @@ -42,12 +42,16 @@ jobs: - name: Install dependencies shell: bash - run: poetry install --with test + run: poetry install - name: Check imports with base dependencies shell: bash run: poetry run make check_imports + - name: Install test dependencies + shell: bash + run: poetry install --with test + - name: Install langchain editable working-directory: ${{ inputs.working-directory }} if: ${{ inputs.langchain-location }} From 84025a84663a93f313e5d74f29ecdb89480f1728 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 14:45:17 -0800 Subject: [PATCH 11/18] another name --- .github/workflows/_all_ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_all_ci.yml b/.github/workflows/_all_ci.yml index ff1c4d2cd67a9..37aacd426531f 100644 --- a/.github/workflows/_all_ci.yml +++ b/.github/workflows/_all_ci.yml @@ -52,7 +52,7 @@ jobs: working-directory: ${{ inputs.working-directory }} secrets: inherit - pydantic-compatibility: + dependencies: uses: ./.github/workflows/_dependencies.yml with: working-directory: ${{ inputs.working-directory }} From f3e61d8b710fb5d1cf82b361eea89c1148a57a29 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 15:32:05 -0800 Subject: [PATCH 12/18] tiktoken --- libs/langchain/langchain/utilities/github.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/libs/langchain/langchain/utilities/github.py b/libs/langchain/langchain/utilities/github.py index 940708718332b..9983951be4d17 100644 --- a/libs/langchain/langchain/utilities/github.py +++ b/libs/langchain/langchain/utilities/github.py @@ -5,7 +5,6 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional import requests -import tiktoken from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator from langchain.utils import get_from_dict_or_env @@ -15,6 +14,18 @@ from github.PullRequest import PullRequest +def _import_tiktoken() -> Any: + """Import tiktoken.""" + try: + import tiktoken + except ImportError: + raise ImportError( + "tiktoken is not installed. " + "Please install it with `pip install tiktoken`" + ) + return tiktoken + + class GitHubAPIWrapper(BaseModel): """Wrapper for GitHub API.""" @@ -385,6 +396,7 @@ def list_pull_request_files(self, pr_number: int) -> List[Dict[str, Any]]: dict: A dictionary containing the issue's title, body, and comments as a string """ + tiktoken = _import_tiktoken() MAX_TOKENS_FOR_FILES = 3_000 pr_files = [] pr = self.github_repo_instance.get_pull(number=int(pr_number)) @@ -453,6 +465,7 @@ def get_pull_request(self, pr_number: int) -> Dict[str, Any]: total_tokens = 0 def get_tokens(text: str) -> int: + tiktoken = _import_tiktoken() return len(tiktoken.get_encoding("cl100k_base").encode(text)) def add_to_dict(data_dict: Dict[str, Any], key: str, value: str) -> None: From 358fafeb40633949ab67172cc8993b15c0462d1b Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 15:42:46 -0800 Subject: [PATCH 13/18] pgvector --- .../vectorstores/_pgvector_data_models.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/libs/langchain/langchain/vectorstores/_pgvector_data_models.py b/libs/langchain/langchain/vectorstores/_pgvector_data_models.py index 1a4b60776537b..062d64896b65d 100644 --- a/libs/langchain/langchain/vectorstores/_pgvector_data_models.py +++ b/libs/langchain/langchain/vectorstores/_pgvector_data_models.py @@ -1,12 +1,24 @@ -from typing import Optional, Tuple +from typing import TYPE_CHECKING, Optional, Tuple import sqlalchemy -from pgvector.sqlalchemy import Vector from sqlalchemy.dialects.postgresql import JSON, UUID from sqlalchemy.orm import Session, relationship from langchain.vectorstores.pgvector import BaseModel +if TYPE_CHECKING: + from pgvector.sqlalchemy import Vector + + +def _import_vector() -> None: + try: + from pgvector.sqlalchemy import Vector + except ImportError: + raise ImportError( + "The `pgvector` library is required to use the PGVectorStore." + ) + return Vector + class CollectionStore(BaseModel): """Collection store.""" @@ -63,7 +75,7 @@ class EmbeddingStore(BaseModel): ) collection = relationship(CollectionStore, back_populates="embeddings") - embedding: Vector = sqlalchemy.Column(Vector(None)) + embedding: Vector = sqlalchemy.Column(_import_vector()(None)) document = sqlalchemy.Column(sqlalchemy.String, nullable=True) cmetadata = sqlalchemy.Column(JSON, nullable=True) From 7c3ed1c2ecc26fceba77f90bec2b759d92992a07 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 15:46:19 -0800 Subject: [PATCH 14/18] lint --- .../langchain/langchain/vectorstores/_pgvector_data_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/vectorstores/_pgvector_data_models.py b/libs/langchain/langchain/vectorstores/_pgvector_data_models.py index 062d64896b65d..afc872afdd1ab 100644 --- a/libs/langchain/langchain/vectorstores/_pgvector_data_models.py +++ b/libs/langchain/langchain/vectorstores/_pgvector_data_models.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING, Any, Optional, Tuple import sqlalchemy from sqlalchemy.dialects.postgresql import JSON, UUID @@ -10,7 +10,7 @@ from pgvector.sqlalchemy import Vector -def _import_vector() -> None: +def _import_vector() -> Any: try: from pgvector.sqlalchemy import Vector except ImportError: From 25b2996fb994696387802a7437090890babad06d Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 15:58:50 -0800 Subject: [PATCH 15/18] default --- libs/langchain/langchain/vectorstores/_pgvector_data_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain/langchain/vectorstores/_pgvector_data_models.py b/libs/langchain/langchain/vectorstores/_pgvector_data_models.py index afc872afdd1ab..9e8ddeed4b377 100644 --- a/libs/langchain/langchain/vectorstores/_pgvector_data_models.py +++ b/libs/langchain/langchain/vectorstores/_pgvector_data_models.py @@ -75,7 +75,7 @@ class EmbeddingStore(BaseModel): ) collection = relationship(CollectionStore, back_populates="embeddings") - embedding: Vector = sqlalchemy.Column(_import_vector()(None)) + embedding: Vector = sqlalchemy.Column(default=lambda: _import_vector()(None)) document = sqlalchemy.Column(sqlalchemy.String, nullable=True) cmetadata = sqlalchemy.Column(JSON, nullable=True) From 1477e96645a7814be4038556859d0b489a89cdd1 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 16:13:12 -0800 Subject: [PATCH 16/18] pgvector fix --- .../vectorstores/_pgvector_data_models.py | 83 ------------------ .../langchain/vectorstores/pgvector.py | 85 +++++++++++++++++-- 2 files changed, 77 insertions(+), 91 deletions(-) delete mode 100644 libs/langchain/langchain/vectorstores/_pgvector_data_models.py diff --git a/libs/langchain/langchain/vectorstores/_pgvector_data_models.py b/libs/langchain/langchain/vectorstores/_pgvector_data_models.py deleted file mode 100644 index 9e8ddeed4b377..0000000000000 --- a/libs/langchain/langchain/vectorstores/_pgvector_data_models.py +++ /dev/null @@ -1,83 +0,0 @@ -from typing import TYPE_CHECKING, Any, Optional, Tuple - -import sqlalchemy -from sqlalchemy.dialects.postgresql import JSON, UUID -from sqlalchemy.orm import Session, relationship - -from langchain.vectorstores.pgvector import BaseModel - -if TYPE_CHECKING: - from pgvector.sqlalchemy import Vector - - -def _import_vector() -> Any: - try: - from pgvector.sqlalchemy import Vector - except ImportError: - raise ImportError( - "The `pgvector` library is required to use the PGVectorStore." - ) - return Vector - - -class CollectionStore(BaseModel): - """Collection store.""" - - __tablename__ = "langchain_pg_collection" - - name = sqlalchemy.Column(sqlalchemy.String) - cmetadata = sqlalchemy.Column(JSON) - - embeddings = relationship( - "EmbeddingStore", - back_populates="collection", - passive_deletes=True, - ) - - @classmethod - def get_by_name(cls, session: Session, name: str) -> Optional["CollectionStore"]: - return session.query(cls).filter(cls.name == name).first() # type: ignore - - @classmethod - def get_or_create( - cls, - session: Session, - name: str, - cmetadata: Optional[dict] = None, - ) -> Tuple["CollectionStore", bool]: - """ - Get or create a collection. - Returns [Collection, bool] where the bool is True if the collection was created. - """ - created = False - collection = cls.get_by_name(session, name) - if collection: - return collection, created - - collection = cls(name=name, cmetadata=cmetadata) - session.add(collection) - session.commit() - created = True - return collection, created - - -class EmbeddingStore(BaseModel): - """Embedding store.""" - - __tablename__ = "langchain_pg_embedding" - - collection_id = sqlalchemy.Column( - UUID(as_uuid=True), - sqlalchemy.ForeignKey( - f"{CollectionStore.__tablename__}.uuid", - ondelete="CASCADE", - ), - ) - collection = relationship(CollectionStore, back_populates="embeddings") - - embedding: Vector = sqlalchemy.Column(default=lambda: _import_vector()(None)) - document = sqlalchemy.Column(sqlalchemy.String, nullable=True) - cmetadata = sqlalchemy.Column(JSON, nullable=True) - - # custom_id : any user defined id - custom_id = sqlalchemy.Column(sqlalchemy.String, nullable=True) diff --git a/libs/langchain/langchain/vectorstores/pgvector.py b/libs/langchain/langchain/vectorstores/pgvector.py index c10686bf2b4ba..27d233b808ac6 100644 --- a/libs/langchain/langchain/vectorstores/pgvector.py +++ b/libs/langchain/langchain/vectorstores/pgvector.py @@ -22,8 +22,8 @@ import numpy as np import sqlalchemy from sqlalchemy import delete -from sqlalchemy.dialects.postgresql import UUID -from sqlalchemy.orm import Session +from sqlalchemy.dialects.postgresql import JSON, UUID +from sqlalchemy.orm import Session, relationship try: from sqlalchemy.orm import declarative_base @@ -64,6 +64,79 @@ class BaseModel(Base): uuid = sqlalchemy.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) +def _get_collection_store() -> Any: + class CollectionStore(BaseModel): + """Collection store.""" + + __tablename__ = "langchain_pg_collection" + + name = sqlalchemy.Column(sqlalchemy.String) + cmetadata = sqlalchemy.Column(JSON) + + embeddings = relationship( + "EmbeddingStore", + back_populates="collection", + passive_deletes=True, + ) + + @classmethod + def get_by_name( + cls, session: Session, name: str + ) -> Optional["CollectionStore"]: + return session.query(cls).filter(cls.name == name).first() # type: ignore + + @classmethod + def get_or_create( + cls, + session: Session, + name: str, + cmetadata: Optional[dict] = None, + ) -> Tuple["CollectionStore", bool]: + """ + Get or create a collection. + Returns [Collection, bool] where the bool is True if the collection was created. + """ + created = False + collection = cls.get_by_name(session, name) + if collection: + return collection, created + + collection = cls(name=name, cmetadata=cmetadata) + session.add(collection) + session.commit() + created = True + return collection, created + + return CollectionStore + + +def _get_embedding_store() -> Any: + from pgvector.sqlalchemy import Vector + + class EmbeddingStore(BaseModel): + """Embedding store.""" + + __tablename__ = "langchain_pg_embedding" + + collection_id = sqlalchemy.Column( + UUID(as_uuid=True), + sqlalchemy.ForeignKey( + f"{CollectionStore.__tablename__}.uuid", + ondelete="CASCADE", + ), + ) + collection = relationship(CollectionStore, back_populates="embeddings") + + embedding: Vector = sqlalchemy.Column(Vector(None)) + document = sqlalchemy.Column(sqlalchemy.String, nullable=True) + cmetadata = sqlalchemy.Column(JSON, nullable=True) + + # custom_id : any user defined id + custom_id = sqlalchemy.Column(sqlalchemy.String, nullable=True) + + return EmbeddingStore + + def _results_to_docs(docs_and_scores: Any) -> List[Document]: """Return docs from docs and scores.""" return [doc for doc, _ in docs_and_scores] @@ -138,13 +211,9 @@ def __post_init__( ) -> None: """Initialize the store.""" self.create_vector_extension() - from langchain.vectorstores._pgvector_data_models import ( - CollectionStore, - EmbeddingStore, - ) - self.CollectionStore = CollectionStore - self.EmbeddingStore = EmbeddingStore + self.CollectionStore = _get_collection_store() + self.EmbeddingStore = _get_embedding_store() self.create_tables_if_not_exists() self.create_collection() From 9a5e528af7e8b4be3784b1d38fcf8b844b52b9ed Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 16:13:28 -0800 Subject: [PATCH 17/18] pgvector fix --- libs/langchain/langchain/vectorstores/pgvector.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/libs/langchain/langchain/vectorstores/pgvector.py b/libs/langchain/langchain/vectorstores/pgvector.py index 27d233b808ac6..fa4a7fdb456d4 100644 --- a/libs/langchain/langchain/vectorstores/pgvector.py +++ b/libs/langchain/langchain/vectorstores/pgvector.py @@ -37,9 +37,6 @@ from langchain.utils import get_from_dict_or_env from langchain.vectorstores.utils import maximal_marginal_relevance -if TYPE_CHECKING: - from langchain.vectorstores._pgvector_data_models import CollectionStore - class DistanceStrategy(str, enum.Enum): """Enumerator of the Distance strategies.""" From 6ceb8e2ad43856a23b07dcc5b2f780504e28d614 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Thu, 7 Dec 2023 16:14:48 -0800 Subject: [PATCH 18/18] simplify --- .../langchain/vectorstores/pgvector.py | 74 +++++++++---------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/libs/langchain/langchain/vectorstores/pgvector.py b/libs/langchain/langchain/vectorstores/pgvector.py index fa4a7fdb456d4..4f2ccb9b899ca 100644 --- a/libs/langchain/langchain/vectorstores/pgvector.py +++ b/libs/langchain/langchain/vectorstores/pgvector.py @@ -7,7 +7,6 @@ import uuid from functools import partial from typing import ( - TYPE_CHECKING, Any, Callable, Dict, @@ -61,50 +60,45 @@ class BaseModel(Base): uuid = sqlalchemy.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) -def _get_collection_store() -> Any: - class CollectionStore(BaseModel): - """Collection store.""" +class CollectionStore(BaseModel): + """Collection store.""" - __tablename__ = "langchain_pg_collection" + __tablename__ = "langchain_pg_collection" - name = sqlalchemy.Column(sqlalchemy.String) - cmetadata = sqlalchemy.Column(JSON) + name = sqlalchemy.Column(sqlalchemy.String) + cmetadata = sqlalchemy.Column(JSON) - embeddings = relationship( - "EmbeddingStore", - back_populates="collection", - passive_deletes=True, - ) + embeddings = relationship( + "EmbeddingStore", + back_populates="collection", + passive_deletes=True, + ) - @classmethod - def get_by_name( - cls, session: Session, name: str - ) -> Optional["CollectionStore"]: - return session.query(cls).filter(cls.name == name).first() # type: ignore - - @classmethod - def get_or_create( - cls, - session: Session, - name: str, - cmetadata: Optional[dict] = None, - ) -> Tuple["CollectionStore", bool]: - """ - Get or create a collection. - Returns [Collection, bool] where the bool is True if the collection was created. - """ - created = False - collection = cls.get_by_name(session, name) - if collection: - return collection, created - - collection = cls(name=name, cmetadata=cmetadata) - session.add(collection) - session.commit() - created = True + @classmethod + def get_by_name(cls, session: Session, name: str) -> Optional["CollectionStore"]: + return session.query(cls).filter(cls.name == name).first() # type: ignore + + @classmethod + def get_or_create( + cls, + session: Session, + name: str, + cmetadata: Optional[dict] = None, + ) -> Tuple["CollectionStore", bool]: + """ + Get or create a collection. + Returns [Collection, bool] where the bool is True if the collection was created. + """ + created = False + collection = cls.get_by_name(session, name) + if collection: return collection, created - return CollectionStore + collection = cls(name=name, cmetadata=cmetadata) + session.add(collection) + session.commit() + created = True + return collection, created def _get_embedding_store() -> Any: @@ -209,7 +203,7 @@ def __post_init__( """Initialize the store.""" self.create_vector_extension() - self.CollectionStore = _get_collection_store() + self.CollectionStore = CollectionStore self.EmbeddingStore = _get_embedding_store() self.create_tables_if_not_exists() self.create_collection()