diff --git a/.github/workflows/_integration_test.yml b/.github/workflows/_integration_test.yml index caf1f51..3df47bb 100644 --- a/.github/workflows/_integration_test.yml +++ b/.github/workflows/_integration_test.yml @@ -43,6 +43,8 @@ jobs: ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }} + SHARED_SECRET_NAME_OPENAI: ${{ secrets.SHARED_SECRET_NAME_OPENAI }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | make integration_tests diff --git a/.github/workflows/_release.yml b/.github/workflows/_release.yml index b50db1f..d209715 100644 --- a/.github/workflows/_release.yml +++ b/.github/workflows/_release.yml @@ -156,13 +156,15 @@ jobs: run: make tests working-directory: ${{ inputs.working-directory }} -# - name: Run integration tests -# env: -# ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} -# ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} -# ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }} -# run: make integration_tests -# working-directory: ${{ inputs.working-directory }} + - name: Run integration tests + env: + ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} + ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} + ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }} + SHARED_SECRET_NAME_OPENAI: ${{ secrets.SHARED_SECRET_NAME_OPENAI }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: make integration_tests + working-directory: ${{ inputs.working-directory }} - name: Get minimum versions working-directory: ${{ inputs.working-directory }} diff --git a/libs/astradb/Makefile b/libs/astradb/Makefile index 6280a79..af92305 100644 --- a/libs/astradb/Makefile +++ b/libs/astradb/Makefile @@ -44,10 +44,10 @@ format format_diff: poetry run ruff --select I --fix $(PYTHON_FILES) spell_check: - poetry run codespell --toml pyproject.toml + poetry run codespell --toml pyproject.toml -I codespell_ignore_words.txt spell_fix: - poetry run codespell --toml pyproject.toml -w + poetry run codespell --toml pyproject.toml -w -I codespell_ignore_words.txt check_imports: $(shell find langchain_astradb -name '*.py') poetry run python ./scripts/check_imports.py $^ diff --git a/libs/astradb/codespell_ignore_words.txt b/libs/astradb/codespell_ignore_words.txt new file mode 100644 index 0000000..0b3a7cd --- /dev/null +++ b/libs/astradb/codespell_ignore_words.txt @@ -0,0 +1 @@ +Haa diff --git a/libs/astradb/langchain_astradb/utils/astradb.py b/libs/astradb/langchain_astradb/utils/astradb.py index 3ba98be..9321c49 100644 --- a/libs/astradb/langchain_astradb/utils/astradb.py +++ b/libs/astradb/langchain_astradb/utils/astradb.py @@ -19,6 +19,8 @@ API_ENDPOINT_ENV_VAR = "ASTRA_DB_API_ENDPOINT" NAMESPACE_ENV_VAR = "ASTRA_DB_KEYSPACE" +DEFAULT_VECTORIZE_SECRET_HEADER = "x-embedding-api-key" + logger = logging.getLogger() @@ -138,19 +140,29 @@ def __init__( collection_vector_service_options: Optional[ CollectionVectorServiceOptions ] = None, + collection_embedding_api_key: Optional[str] = None, ) -> None: super().__init__( token, api_endpoint, astra_db_client, async_astra_db_client, namespace ) + embedding_key_header = { + k: v + for k, v in { + DEFAULT_VECTORIZE_SECRET_HEADER: collection_embedding_api_key, + }.items() + if v is not None + } self.collection_name = collection_name self.collection = AstraDBCollection( collection_name=collection_name, astra_db=self.astra_db, + additional_headers=embedding_key_header, ) self.async_collection = AsyncAstraDBCollection( collection_name=collection_name, astra_db=self.async_astra_db, + additional_headers=embedding_key_header, ) if requested_indexing_policy is not None: diff --git a/libs/astradb/langchain_astradb/vectorstores.py b/libs/astradb/langchain_astradb/vectorstores.py index f2a7bf7..f154fee 100644 --- a/libs/astradb/langchain_astradb/vectorstores.py +++ b/libs/astradb/langchain_astradb/vectorstores.py @@ -156,6 +156,7 @@ def __init__( collection_vector_service_options: Optional[ CollectionVectorServiceOptions ] = None, + collection_embedding_api_key: Optional[str] = None, ) -> None: """Wrapper around DataStax Astra DB for vector-store workloads. @@ -181,8 +182,9 @@ def __init__( Args: embedding: the embeddings function or service to use. This enables client-side embedding functions or calls to external - embedding providers. Only one of `embedding` or - `collection_vector_service_options` can be provided. + embedding providers. If `embedding` is provided, arguments + `collection_vector_service_options` and + `collection_embedding_api_key` cannot be provided. collection_name: name of the Astra DB collection to create/use. token: API token for Astra DB usage. If not provided, the environment variable ASTRA_DB_APPLICATION_TOKEN is inspected. @@ -220,10 +222,16 @@ def __init__( (see docs.datastax.com/en/astra/astra-db-vector/api-reference/ data-api-commands.html#advanced-feature-indexing-clause-on-createcollection) collection_vector_service_options: specifies the use of server-side - embeddings within Astra DB. Only one of `embedding` or - `collection_vector_service_options` can be provided. - NOTE: This feature is under current development. - + embeddings within Astra DB. If passing this parameter, `embedding` + cannot be provided. + collection_embedding_api_key: for usage of server-side embeddings + within Astra DB, with this parameter one can supply an API Key + that will be passed to Astra DB with each data request. + This is useful when the service is configured for the collection, + but no corresponding secret is stored within + Astra's key management system. + This parameter cannot be provided without + specifying `collection_vector_service_options`. Note: For concurrency in synchronous :meth:`~add_texts`:, as a rule of thumb, on a @@ -242,7 +250,7 @@ def __init__( Remember you can pass concurrency settings to individual calls to :meth:`~add_texts` and :meth:`~add_documents` as well. """ - # Embedding and collection_vector_service_options are mutually exclusive, + # Embedding and the server-side embeddings are mutually exclusive, # as both specify how to produce embeddings if embedding is None and collection_vector_service_options is None: raise ValueError( @@ -256,6 +264,15 @@ def __init__( can be provided." ) + if ( + collection_vector_service_options is None + and collection_embedding_api_key is not None + ): + raise ValueError( + "`collection_embedding_api_key` cannot be provided unless" + " `collection_vector_service_options` is also passed." + ) + self.embedding_dimension: Optional[int] = None self.embedding = embedding self.collection_name = collection_name @@ -263,6 +280,7 @@ def __init__( self.api_endpoint = api_endpoint self.namespace = namespace self.collection_vector_service_options = collection_vector_service_options + self.collection_embedding_api_key = collection_embedding_api_key # Concurrency settings self.batch_size: int = batch_size or DEFAULT_BATCH_SIZE self.bulk_insert_batch_concurrency: int = ( @@ -305,6 +323,7 @@ def __init__( requested_indexing_policy=self.indexing_policy, default_indexing_policy=DEFAULT_INDEXING_OPTIONS, collection_vector_service_options=collection_vector_service_options, + collection_embedding_api_key=collection_embedding_api_key, ) self.astra_db = self.astra_env.astra_db self.async_astra_db = self.astra_env.async_astra_db diff --git a/libs/astradb/poetry.lock b/libs/astradb/poetry.lock index 1bd4ea5..27ba834 100644 --- a/libs/astradb/poetry.lock +++ b/libs/astradb/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -112,13 +112,13 @@ frozenlist = ">=1.1.0" [[package]] name = "annotated-types" -version = "0.6.0" +version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = false python-versions = ">=3.8" files = [ - {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"}, - {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, ] [package.dependencies] @@ -148,13 +148,13 @@ trio = ["trio (>=0.23)"] [[package]] name = "astrapy" -version = "1.1.0" +version = "1.2.0" description = "AstraPy is a Pythonic SDK for DataStax Astra and its Data API" optional = false python-versions = "<4.0.0,>=3.8.0" files = [ - {file = "astrapy-1.1.0-py3-none-any.whl", hash = "sha256:577151c3a9f8ed9c389402fad0b43ec17af9386974731622e2462908ca0e81a6"}, - {file = "astrapy-1.1.0.tar.gz", hash = "sha256:287421bb293060967c4620b7a9de5949fe46391898768169dba370a7ec4676c4"}, + {file = "astrapy-1.2.0-py3-none-any.whl", hash = "sha256:5d65242771934c38ebe16f330e9e517968c1437846dabdbe7e48470f7b1782e8"}, + {file = "astrapy-1.2.0.tar.gz", hash = "sha256:6ce1b421d1ae21fe73373fa36048d8d56c775367886525504f01c48cbb742842"}, ] [package.dependencies] @@ -398,13 +398,13 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [[package]] name = "codespell" -version = "2.2.6" +version = "2.3.0" description = "Codespell" optional = false python-versions = ">=3.8" files = [ - {file = "codespell-2.2.6-py3-none-any.whl", hash = "sha256:9ee9a3e5df0990604013ac2a9f22fa8e57669c827124a2e961fe8a1da4cacc07"}, - {file = "codespell-2.2.6.tar.gz", hash = "sha256:a8c65d8eb3faa03deabab6b3bbe798bea72e1799c7e9e955d57eca4096abcff9"}, + {file = "codespell-2.3.0-py3-none-any.whl", hash = "sha256:a9c7cef2501c9cfede2110fd6d4e5e62296920efe9abfb84648df866e47f58d1"}, + {file = "codespell-2.3.0.tar.gz", hash = "sha256:360c7d10f75e65f67bad720af7007e1060a5d395670ec11a7ed1fed9dd17471f"}, ] [package.extras] @@ -424,21 +424,6 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -[[package]] -name = "dataclasses-json" -version = "0.6.6" -description = "Easily serialize dataclasses to and from JSON." -optional = false -python-versions = "<4.0,>=3.7" -files = [ - {file = "dataclasses_json-0.6.6-py3-none-any.whl", hash = "sha256:e54c5c87497741ad454070ba0ed411523d46beb5da102e221efb873801b0ba85"}, - {file = "dataclasses_json-0.6.6.tar.gz", hash = "sha256:0c09827d26fffda27f1be2fed7a7a01a29c5ddcd2eb6393ad5ebf9d77e9deae8"}, -] - -[package.dependencies] -marshmallow = ">=3.18.0,<4.0.0" -typing-inspect = ">=0.4.0,<1" - [[package]] name = "deprecation" version = "2.1.0" @@ -791,12 +776,11 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, - {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] name = "langchain" -version = "0.2.0rc2" +version = "0.2.1" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -806,9 +790,8 @@ develop = false [package.dependencies] aiohttp = "^3.8.3" async-timeout = {version = "^4.0.0", markers = "python_version < \"3.11\""} -dataclasses-json = ">= 0.5.7, < 0.7" -langchain-core = ">=0.1.52,<0.3" -langchain-text-splitters = ">=0.0.1,<0.1" +langchain-core = "^0.2.0" +langchain-text-splitters = "^0.2.0" langsmith = "^0.1.17" numpy = "^1" pydantic = ">=1,<3" @@ -828,7 +811,7 @@ embeddings = ["sentence-transformers (>=2,<3)"] extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<6)", "couchbase (>=4.1.9,<5.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "langchain-openai (>=0.1,<0.2)", "lxml (>=4.9.3,<6.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] javascript = ["esprima (>=4.0.1,<5.0.0)"] llms = ["clarifai (>=9.1.0)", "cohere (>=4,<6)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] -openai = ["openai (<2)", "tiktoken (>=0.3.2,<0.6.0)"] +openai = ["openai (<2)", "tiktoken (>=0.7,<1.0)"] qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"] text-helpers = ["chardet (>=5.1.0,<6.0.0)"] @@ -836,12 +819,12 @@ text-helpers = ["chardet (>=5.1.0,<6.0.0)"] type = "git" url = "https://github.com/langchain-ai/langchain.git" reference = "HEAD" -resolved_reference = "be15740084e31d6850808e08ccb005f652664a48" +resolved_reference = "cccc8fbe2fe59bde0846875f67aa046aeb1105a3" subdirectory = "libs/langchain" [[package]] name = "langchain-core" -version = "0.2.0rc1" +version = "0.2.2rc1" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -863,12 +846,12 @@ extended-testing = ["jinja2 (>=3,<4)"] type = "git" url = "https://github.com/langchain-ai/langchain.git" reference = "HEAD" -resolved_reference = "be15740084e31d6850808e08ccb005f652664a48" +resolved_reference = "cccc8fbe2fe59bde0846875f67aa046aeb1105a3" subdirectory = "libs/core" [[package]] name = "langchain-text-splitters" -version = "0.0.2" +version = "0.2.0" description = "LangChain text splitting utilities" optional = false python-versions = ">=3.8.1,<4.0" @@ -876,7 +859,7 @@ files = [] develop = false [package.dependencies] -langchain-core = ">=0.1.28,<0.3" +langchain-core = "^0.2.0" [package.extras] extended-testing = ["beautifulsoup4 (>=4.12.3,<5.0.0)", "lxml (>=4.9.3,<6.0)"] @@ -885,18 +868,18 @@ extended-testing = ["beautifulsoup4 (>=4.12.3,<5.0.0)", "lxml (>=4.9.3,<6.0)"] type = "git" url = "https://github.com/langchain-ai/langchain.git" reference = "HEAD" -resolved_reference = "be15740084e31d6850808e08ccb005f652664a48" +resolved_reference = "cccc8fbe2fe59bde0846875f67aa046aeb1105a3" subdirectory = "libs/text-splitters" [[package]] name = "langsmith" -version = "0.1.59" +version = "0.1.63" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.1.59-py3-none-any.whl", hash = "sha256:445e3bc1d3baa1e5340cd979907a19483b9763a2ed37b863a01113d406f69345"}, - {file = "langsmith-0.1.59.tar.gz", hash = "sha256:e748a89f4dd6aa441349143e49e546c03b5dfb43376a25bfef6a5ca792fe1437"}, + {file = "langsmith-0.1.63-py3-none-any.whl", hash = "sha256:7810afdf5e3f3b472fc581a29371fb96cd843dde2149e048d1b9610325159d1e"}, + {file = "langsmith-0.1.63.tar.gz", hash = "sha256:a609405b52f6f54df442a142cbf19ab38662d54e532f96028b4c546434d4afdf"}, ] [package.dependencies] @@ -904,25 +887,6 @@ orjson = ">=3.9.14,<4.0.0" pydantic = ">=1,<3" requests = ">=2,<3" -[[package]] -name = "marshmallow" -version = "3.21.2" -description = "A lightweight library for converting complex datatypes to and from native Python datatypes." -optional = false -python-versions = ">=3.8" -files = [ - {file = "marshmallow-3.21.2-py3-none-any.whl", hash = "sha256:70b54a6282f4704d12c0a41599682c5c5450e843b9ec406308653b47c59648a1"}, - {file = "marshmallow-3.21.2.tar.gz", hash = "sha256:82408deadd8b33d56338d2182d455db632c6313aa2af61916672146bb32edc56"}, -] - -[package.dependencies] -packaging = ">=17.0" - -[package.extras] -dev = ["marshmallow[tests]", "pre-commit (>=3.5,<4.0)", "tox"] -docs = ["alabaster (==0.7.16)", "autodocsumm (==0.2.12)", "sphinx (==7.3.7)", "sphinx-issues (==4.1.0)", "sphinx-version-warning (==1.1.2)"] -tests = ["pytest", "pytz", "simplejson"] - [[package]] name = "multidict" version = "6.0.5" @@ -1438,7 +1402,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1446,16 +1409,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1472,7 +1427,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1480,7 +1434,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -1488,13 +1441,13 @@ files = [ [[package]] name = "requests" -version = "2.31.0" +version = "2.32.2" description = "Python HTTP for Humans." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, - {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, + {file = "requests-2.32.2-py3-none-any.whl", hash = "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c"}, + {file = "requests-2.32.2.tar.gz", hash = "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289"}, ] [package.dependencies] @@ -1614,7 +1567,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\""} typing-extensions = ">=4.6.0" [package.extras] @@ -1695,30 +1648,15 @@ files = [ [[package]] name = "typing-extensions" -version = "4.11.0" +version = "4.12.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, - {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, -] - -[[package]] -name = "typing-inspect" -version = "0.9.0" -description = "Runtime inspection utilities for typing module." -optional = false -python-versions = "*" -files = [ - {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, - {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, + {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, + {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, ] -[package.dependencies] -mypy-extensions = ">=0.3.0" -typing-extensions = ">=3.7.4" - [[package]] name = "urllib3" version = "2.2.1" @@ -1749,40 +1687,43 @@ files = [ [[package]] name = "watchdog" -version = "4.0.0" +version = "4.0.1" description = "Filesystem events monitoring" optional = false python-versions = ">=3.8" files = [ - {file = "watchdog-4.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:39cb34b1f1afbf23e9562501673e7146777efe95da24fab5707b88f7fb11649b"}, - {file = "watchdog-4.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c522392acc5e962bcac3b22b9592493ffd06d1fc5d755954e6be9f4990de932b"}, - {file = "watchdog-4.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6c47bdd680009b11c9ac382163e05ca43baf4127954c5f6d0250e7d772d2b80c"}, - {file = "watchdog-4.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8350d4055505412a426b6ad8c521bc7d367d1637a762c70fdd93a3a0d595990b"}, - {file = "watchdog-4.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c17d98799f32e3f55f181f19dd2021d762eb38fdd381b4a748b9f5a36738e935"}, - {file = "watchdog-4.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4986db5e8880b0e6b7cd52ba36255d4793bf5cdc95bd6264806c233173b1ec0b"}, - {file = "watchdog-4.0.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:11e12fafb13372e18ca1bbf12d50f593e7280646687463dd47730fd4f4d5d257"}, - {file = "watchdog-4.0.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5369136a6474678e02426bd984466343924d1df8e2fd94a9b443cb7e3aa20d19"}, - {file = "watchdog-4.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76ad8484379695f3fe46228962017a7e1337e9acadafed67eb20aabb175df98b"}, - {file = "watchdog-4.0.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:45cc09cc4c3b43fb10b59ef4d07318d9a3ecdbff03abd2e36e77b6dd9f9a5c85"}, - {file = "watchdog-4.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eed82cdf79cd7f0232e2fdc1ad05b06a5e102a43e331f7d041e5f0e0a34a51c4"}, - {file = "watchdog-4.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba30a896166f0fee83183cec913298151b73164160d965af2e93a20bbd2ab605"}, - {file = "watchdog-4.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d18d7f18a47de6863cd480734613502904611730f8def45fc52a5d97503e5101"}, - {file = "watchdog-4.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2895bf0518361a9728773083908801a376743bcc37dfa252b801af8fd281b1ca"}, - {file = "watchdog-4.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:87e9df830022488e235dd601478c15ad73a0389628588ba0b028cb74eb72fed8"}, - {file = "watchdog-4.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6e949a8a94186bced05b6508faa61b7adacc911115664ccb1923b9ad1f1ccf7b"}, - {file = "watchdog-4.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6a4db54edea37d1058b08947c789a2354ee02972ed5d1e0dca9b0b820f4c7f92"}, - {file = "watchdog-4.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d31481ccf4694a8416b681544c23bd271f5a123162ab603c7d7d2dd7dd901a07"}, - {file = "watchdog-4.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8fec441f5adcf81dd240a5fe78e3d83767999771630b5ddfc5867827a34fa3d3"}, - {file = "watchdog-4.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:6a9c71a0b02985b4b0b6d14b875a6c86ddea2fdbebd0c9a720a806a8bbffc69f"}, - {file = "watchdog-4.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:557ba04c816d23ce98a06e70af6abaa0485f6d94994ec78a42b05d1c03dcbd50"}, - {file = "watchdog-4.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:d0f9bd1fd919134d459d8abf954f63886745f4660ef66480b9d753a7c9d40927"}, - {file = "watchdog-4.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:f9b2fdca47dc855516b2d66eef3c39f2672cbf7e7a42e7e67ad2cbfcd6ba107d"}, - {file = "watchdog-4.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:73c7a935e62033bd5e8f0da33a4dcb763da2361921a69a5a95aaf6c93aa03a87"}, - {file = "watchdog-4.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6a80d5cae8c265842c7419c560b9961561556c4361b297b4c431903f8c33b269"}, - {file = "watchdog-4.0.0-py3-none-win32.whl", hash = "sha256:8f9a542c979df62098ae9c58b19e03ad3df1c9d8c6895d96c0d51da17b243b1c"}, - {file = "watchdog-4.0.0-py3-none-win_amd64.whl", hash = "sha256:f970663fa4f7e80401a7b0cbeec00fa801bf0287d93d48368fc3e6fa32716245"}, - {file = "watchdog-4.0.0-py3-none-win_ia64.whl", hash = "sha256:9a03e16e55465177d416699331b0f3564138f1807ecc5f2de9d55d8f188d08c7"}, - {file = "watchdog-4.0.0.tar.gz", hash = "sha256:e3e7065cbdabe6183ab82199d7a4f6b3ba0a438c5a512a68559846ccb76a78ec"}, + {file = "watchdog-4.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:da2dfdaa8006eb6a71051795856bedd97e5b03e57da96f98e375682c48850645"}, + {file = "watchdog-4.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e93f451f2dfa433d97765ca2634628b789b49ba8b504fdde5837cdcf25fdb53b"}, + {file = "watchdog-4.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ef0107bbb6a55f5be727cfc2ef945d5676b97bffb8425650dadbb184be9f9a2b"}, + {file = "watchdog-4.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:17e32f147d8bf9657e0922c0940bcde863b894cd871dbb694beb6704cfbd2fb5"}, + {file = "watchdog-4.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:03e70d2df2258fb6cb0e95bbdbe06c16e608af94a3ffbd2b90c3f1e83eb10767"}, + {file = "watchdog-4.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:123587af84260c991dc5f62a6e7ef3d1c57dfddc99faacee508c71d287248459"}, + {file = "watchdog-4.0.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:093b23e6906a8b97051191a4a0c73a77ecc958121d42346274c6af6520dec175"}, + {file = "watchdog-4.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:611be3904f9843f0529c35a3ff3fd617449463cb4b73b1633950b3d97fa4bfb7"}, + {file = "watchdog-4.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:62c613ad689ddcb11707f030e722fa929f322ef7e4f18f5335d2b73c61a85c28"}, + {file = "watchdog-4.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d4925e4bf7b9bddd1c3de13c9b8a2cdb89a468f640e66fbfabaf735bd85b3e35"}, + {file = "watchdog-4.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cad0bbd66cd59fc474b4a4376bc5ac3fc698723510cbb64091c2a793b18654db"}, + {file = "watchdog-4.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a3c2c317a8fb53e5b3d25790553796105501a235343f5d2bf23bb8649c2c8709"}, + {file = "watchdog-4.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c9904904b6564d4ee8a1ed820db76185a3c96e05560c776c79a6ce5ab71888ba"}, + {file = "watchdog-4.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:667f3c579e813fcbad1b784db7a1aaa96524bed53437e119f6a2f5de4db04235"}, + {file = "watchdog-4.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d10a681c9a1d5a77e75c48a3b8e1a9f2ae2928eda463e8d33660437705659682"}, + {file = "watchdog-4.0.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0144c0ea9997b92615af1d94afc0c217e07ce2c14912c7b1a5731776329fcfc7"}, + {file = "watchdog-4.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:998d2be6976a0ee3a81fb8e2777900c28641fb5bfbd0c84717d89bca0addcdc5"}, + {file = "watchdog-4.0.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e7921319fe4430b11278d924ef66d4daa469fafb1da679a2e48c935fa27af193"}, + {file = "watchdog-4.0.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:f0de0f284248ab40188f23380b03b59126d1479cd59940f2a34f8852db710625"}, + {file = "watchdog-4.0.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bca36be5707e81b9e6ce3208d92d95540d4ca244c006b61511753583c81c70dd"}, + {file = "watchdog-4.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ab998f567ebdf6b1da7dc1e5accfaa7c6992244629c0fdaef062f43249bd8dee"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:dddba7ca1c807045323b6af4ff80f5ddc4d654c8bce8317dde1bd96b128ed253"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_armv7l.whl", hash = "sha256:4513ec234c68b14d4161440e07f995f231be21a09329051e67a2118a7a612d2d"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_i686.whl", hash = "sha256:4107ac5ab936a63952dea2a46a734a23230aa2f6f9db1291bf171dac3ebd53c6"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_ppc64.whl", hash = "sha256:6e8c70d2cd745daec2a08734d9f63092b793ad97612470a0ee4cbb8f5f705c57"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:f27279d060e2ab24c0aa98363ff906d2386aa6c4dc2f1a374655d4e02a6c5e5e"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_s390x.whl", hash = "sha256:f8affdf3c0f0466e69f5b3917cdd042f89c8c63aebdb9f7c078996f607cdb0f5"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ac7041b385f04c047fcc2951dc001671dee1b7e0615cde772e84b01fbf68ee84"}, + {file = "watchdog-4.0.1-py3-none-win32.whl", hash = "sha256:206afc3d964f9a233e6ad34618ec60b9837d0582b500b63687e34011e15bb429"}, + {file = "watchdog-4.0.1-py3-none-win_amd64.whl", hash = "sha256:7577b3c43e5909623149f76b099ac49a1a01ca4e167d1785c76eb52fa585745a"}, + {file = "watchdog-4.0.1-py3-none-win_ia64.whl", hash = "sha256:d7b9f5f3299e8dd230880b6c55504a1f69cf1e4316275d1b215ebdd8187ec88d"}, + {file = "watchdog-4.0.1.tar.gz", hash = "sha256:eebaacf674fa25511e8867028d281e602ee6500045b57f43b08778082f7f8b44"}, ] [package.extras] @@ -1894,4 +1835,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "b84212079a0b3a10ad7759bca1557e81458e4dd7c26fba596323382d3763edd7" +content-hash = "eab37187d7130cd3a5b98edbd5e3e85e67e7db8d050a2f3077a6ad07a90166c9" diff --git a/libs/astradb/pyproject.toml b/libs/astradb/pyproject.toml index d466804..ea550f2 100644 --- a/libs/astradb/pyproject.toml +++ b/libs/astradb/pyproject.toml @@ -13,7 +13,7 @@ license = "MIT" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" langchain-core = ">=0.1.31,<0.3" -astrapy = "^1" +astrapy = "^1.2" numpy = "^1" [tool.poetry.group.test] diff --git a/libs/astradb/testing.env.sample b/libs/astradb/testing.env.sample new file mode 100644 index 0000000..2edbbd4 --- /dev/null +++ b/libs/astradb/testing.env.sample @@ -0,0 +1,11 @@ +export ASTRA_DB_APPLICATION_TOKEN="AstraCS:aaabbbccc..." +export ASTRA_DB_API_ENDPOINT="https://0123...-region.apps.astra.datastax.com" +export ASTRA_DB_KEYSPACE="default_keyspace" + +# required to test vectorize with SHARED_SECRET +export SHARED_SECRET_NAME_OPENAI="NAME_SUPPLIED_IN_ASTRA_KMS" +# required to test vectorize with HEADER +export OPENAI_API_KEY="sk-aaabbbccc..." + +# Also invoke pytest prepending `NVIDIA_VECTORIZE_AVAILABLE=1` if +# the database supports server-side nVidia embeddings as well \ No newline at end of file diff --git a/libs/astradb/tests/integration_tests/test_vectorstores.py b/libs/astradb/tests/integration_tests/test_vectorstores.py index 7faddf8..07a82f8 100644 --- a/libs/astradb/tests/integration_tests/test_vectorstores.py +++ b/libs/astradb/tests/integration_tests/test_vectorstores.py @@ -34,16 +34,60 @@ COLLECTION_NAME_DIM2 = "lc_test_d2" COLLECTION_NAME_DIM2_EUCLIDEAN = "lc_test_d2_eucl" -COLLECTION_NAME_VECTORIZE = "lc_test_vectorize" +COLLECTION_NAME_VECTORIZE_OPENAI = "lc_test_vec_openai" +COLLECTION_NAME_VECTORIZE_OPENAI_HEADER = "lc_test_vec_openai_h" +COLLECTION_NAME_VECTORIZE_NVIDIA = "lc_test_nvidia" MATCH_EPSILON = 0.0001 +# For the time being, prod-regions described only +OPENAI_VECTORIZE_REGIONS_MAP = { + "prod": {"us-east-2", "westus3", "us-east1"}, # resp. aws, azure, gcp +} + +openai_vectorize_options = CollectionVectorServiceOptions( + provider="openai", + model_name="text-embedding-3-small", + authentication={ + "providerKey": f"{os.environ.get('SHARED_SECRET_NAME_OPENAI', '')}.providerKey", + }, +) +openai_vectorize_options_header = CollectionVectorServiceOptions( + provider="openai", + model_name="text-embedding-3-small", +) +nvidia_vectorize_options = CollectionVectorServiceOptions( + provider="nvidia", + model_name="NV-Embed-QA", +) + + +def is_nvidia_vector_service_available() -> bool: + # For the time being, this is manually controlled + if os.environ.get("NVIDIA_VECTORIZE_AVAILABLE"): + try: + return int(os.environ["NVIDIA_VECTORIZE_AVAILABLE"]) != 0 + except Exception: + return False + else: + return False -def is_vector_service_available() -> bool: + +def is_openai_vector_service_available() -> bool: + env: str + if "astra.datastax.com" in os.environ.get("ASTRA_DB_API_ENDPOINT", ""): + env = "prod" + else: + env = "other" + openai_vectorize_regions = OPENAI_VECTORIZE_REGIONS_MAP.get(env, set()) return all( [ - "us-west-2" in os.environ.get("ASTRA_DB_API_ENDPOINT", ""), - "astra-dev.datastax.com" in os.environ.get("ASTRA_DB_API_ENDPOINT", ""), + any( + openai_region in os.environ.get("ASTRA_DB_API_ENDPOINT", "") + for openai_region in openai_vectorize_regions + ), + "astra.datastax.com" in os.environ.get("ASTRA_DB_API_ENDPOINT", ""), + os.environ.get("SHARED_SECRET_NAME_OPENAI"), ] ) @@ -176,31 +220,77 @@ def vectorize_store( astradb_credentials: AstraDBCredentials, ) -> Iterable[AstraDBVectorStore]: """ - astra db vector store with server-side embeddings using the nvidia model + astra db vector store with server-side embeddings using openai + shared_secret """ - # Only available in dev us-west-2 now - if not is_vector_service_available(): - pytest.skip("vectorize unavailable") + if not is_openai_vector_service_available(): + pytest.skip("vectorize/openai unavailable") - options = CollectionVectorServiceOptions( - provider="nvidia", model_name="NV-Embed-QA" + v_store = AstraDBVectorStore( + collection_vector_service_options=openai_vectorize_options, + collection_name=COLLECTION_NAME_VECTORIZE_OPENAI, + **astradb_credentials, ) + v_store.clear() + + yield v_store + + # explicitly delete the collection to avoid max collection limit + v_store.delete_collection() + + +@pytest.fixture(scope="function") +def vectorize_store_w_header( + astradb_credentials: AstraDBCredentials, +) -> Iterable[AstraDBVectorStore]: + """ + astra db vector store with server-side embeddings using openai + header + """ + if not is_openai_vector_service_available(): + pytest.skip("vectorize/openai service unavailable") + + if not os.environ.get("OPENAI_API_KEY"): + pytest.skip("OpenAI key not available") + v_store = AstraDBVectorStore( - collection_vector_service_options=options, - collection_name=COLLECTION_NAME_VECTORIZE, + collection_vector_service_options=openai_vectorize_options_header, + collection_name=COLLECTION_NAME_VECTORIZE_OPENAI_HEADER, + collection_embedding_api_key=os.environ["OPENAI_API_KEY"], **astradb_credentials, ) v_store.clear() yield v_store - # explicilty delete the collection to avoid max collection limit + # explicitly delete the collection to avoid max collection limit + v_store.delete_collection() + + +@pytest.fixture(scope="function") +def vectorize_store_nvidia( + astradb_credentials: AstraDBCredentials, +) -> Iterable[AstraDBVectorStore]: + """ + astra db vector store with server-side embeddings using the nvidia model + """ + if not is_nvidia_vector_service_available(): + pytest.skip("vectorize/nvidia unavailable") + + v_store = AstraDBVectorStore( + collection_vector_service_options=nvidia_vectorize_options, + collection_name=COLLECTION_NAME_VECTORIZE_NVIDIA, + **astradb_credentials, + ) + v_store.clear() + + yield v_store + + # explicitly delete the collection to avoid max collection limit v_store.delete_collection() @pytest.mark.skipif(not _has_env_vars(), reason="Missing Astra DB env. vars") class TestAstraDBVectorStore: - def test_astradb_vectorstore_create_delete( + def test_astradb_vectorstore_create_delete_sync( self, astradb_credentials: AstraDBCredentials ) -> None: """Create and delete.""" @@ -235,18 +325,15 @@ def test_astradb_vectorstore_create_delete( v_store_2.clear() @pytest.mark.skipif( - not is_vector_service_available(), reason="vectorize unavailable" + not is_openai_vector_service_available(), reason="vectorize unavailable" ) - def test_astradb_vectorstore_create_delete_vectorize( + def test_astradb_vectorstore_create_delete_vectorize_sync( self, astradb_credentials: AstraDBCredentials ) -> None: """Create and delete with vectorize option.""" - options = CollectionVectorServiceOptions( - provider="nvidia", model_name="NV-Embed-QA" - ) v_store = AstraDBVectorStore( - collection_vector_service_options=options, - collection_name=COLLECTION_NAME_VECTORIZE, + collection_vector_service_options=openai_vectorize_options, + collection_name=COLLECTION_NAME_VECTORIZE_OPENAI, **astradb_credentials, ) v_store.add_texts(["Sample 1"]) @@ -280,18 +367,15 @@ async def test_astradb_vectorstore_create_delete_async( await v_store_2.aclear() @pytest.mark.skipif( - not is_vector_service_available(), reason="vectorize unavailable" + not is_openai_vector_service_available(), reason="vectorize unavailable" ) async def test_astradb_vectorstore_create_delete_vectorize_async( self, astradb_credentials: AstraDBCredentials ) -> None: """Create and delete with vectorize option.""" - options = CollectionVectorServiceOptions( - provider="nvidia", model_name="NV-Embed-QA" - ) v_store = AstraDBVectorStore( - collection_vector_service_options=options, - collection_name=COLLECTION_NAME_VECTORIZE, + collection_vector_service_options=openai_vectorize_options, + collection_name=COLLECTION_NAME_VECTORIZE_OPENAI, **astradb_credentials, ) await v_store.adelete_collection() @@ -300,7 +384,7 @@ async def test_astradb_vectorstore_create_delete_vectorize_async( SKIP_COLLECTION_DELETE, reason="Collection-deletion tests are suppressed", ) - def test_astradb_vectorstore_pre_delete_collection( + def test_astradb_vectorstore_pre_delete_collection_sync( self, astradb_credentials: AstraDBCredentials ) -> None: """Use of the pre_delete_collection flag.""" @@ -369,7 +453,7 @@ async def test_astradb_vectorstore_pre_delete_collection_async( finally: await v_store.adelete_collection() - def test_astradb_vectorstore_from_x( + def test_astradb_vectorstore_from_x_sync( self, astradb_credentials: AstraDBCredentials ) -> None: """from_texts and from_documents methods.""" @@ -414,27 +498,23 @@ def test_astradb_vectorstore_from_x( v_store_2.clear() @pytest.mark.skipif( - not is_vector_service_available(), reason="vectorize unavailable" + not is_openai_vector_service_available(), reason="vectorize unavailable" ) - def test_astradb_vectorstore_from_x_vectorize( + def test_astradb_vectorstore_from_x_vectorize_sync( self, astradb_credentials: AstraDBCredentials ) -> None: """from_texts and from_documents methods with vectorize.""" - options = CollectionVectorServiceOptions( - provider="nvidia", model_name="NV-Embed-QA" - ) - AstraDBVectorStore( - collection_vector_service_options=options, - collection_name=COLLECTION_NAME_VECTORIZE, + collection_vector_service_options=openai_vectorize_options, + collection_name=COLLECTION_NAME_VECTORIZE_OPENAI, **astradb_credentials, ).clear() # from_texts v_store = AstraDBVectorStore.from_texts( texts=["Hi", "Ho"], - collection_vector_service_options=options, - collection_name=COLLECTION_NAME_VECTORIZE, + collection_vector_service_options=openai_vectorize_options, + collection_name=COLLECTION_NAME_VECTORIZE_OPENAI, **astradb_credentials, ) try: @@ -448,8 +528,8 @@ def test_astradb_vectorstore_from_x_vectorize( Document(page_content="Hee"), Document(page_content="Hoi"), ], - collection_vector_service_options=options, - collection_name=COLLECTION_NAME_VECTORIZE, + collection_vector_service_options=openai_vectorize_options, + collection_name=COLLECTION_NAME_VECTORIZE_OPENAI, **astradb_credentials, ) try: @@ -504,20 +584,17 @@ async def test_astradb_vectorstore_from_x_async( await v_store_2.aclear() @pytest.mark.skipif( - not is_vector_service_available(), reason="vectorize unavailable" + not is_openai_vector_service_available(), reason="vectorize unavailable" ) - async def test_astradb_vectorstore_from_x_async_vectorize( + async def test_astradb_vectorstore_from_x_vectorize_async( self, astradb_credentials: AstraDBCredentials ) -> None: """from_texts and from_documents methods with vectorize.""" # from_text with vectorize - options = CollectionVectorServiceOptions( - provider="nvidia", model_name="NV-Embed-QA" - ) v_store = await AstraDBVectorStore.afrom_texts( texts=["Haa", "Huu"], - collection_vector_service_options=options, - collection_name=COLLECTION_NAME_VECTORIZE, + collection_vector_service_options=openai_vectorize_options, + collection_name=COLLECTION_NAME_VECTORIZE_OPENAI, **astradb_credentials, ) try: @@ -533,8 +610,8 @@ async def test_astradb_vectorstore_from_x_async_vectorize( Document(page_content="HeeH"), Document(page_content="HooH"), ], - collection_vector_service_options=options, - collection_name=COLLECTION_NAME_VECTORIZE, + collection_vector_service_options=openai_vectorize_options, + collection_name=COLLECTION_NAME_VECTORIZE_OPENAI, **astradb_credentials, ) try: @@ -544,8 +621,16 @@ async def test_astradb_vectorstore_from_x_async_vectorize( finally: await v_store_2.adelete_collection() - @pytest.mark.parametrize("vector_store", ["store_someemb", "vectorize_store"]) - def test_astradb_vectorstore_crud( + @pytest.mark.parametrize( + "vector_store", + [ + "store_someemb", + "vectorize_store", + "vectorize_store_w_header", + "vectorize_store_nvidia", + ], + ) + def test_astradb_vectorstore_crud_sync( self, vector_store: str, request: pytest.FixtureRequest ) -> None: """Basic add/delete/update behaviour.""" @@ -605,7 +690,15 @@ def test_astradb_vectorstore_crud( res4 = vstore.similarity_search("ww", k=1, filter={"k": "w"}) assert res4[0].metadata["ord"] == 205 - @pytest.mark.parametrize("vector_store", ["store_someemb", "vectorize_store"]) + @pytest.mark.parametrize( + "vector_store", + [ + "store_someemb", + "vectorize_store", + "vectorize_store_w_header", + "vectorize_store_nvidia", + ], + ) async def test_astradb_vectorstore_crud_async( self, vector_store: str, request: pytest.FixtureRequest ) -> None: @@ -666,7 +759,9 @@ async def test_astradb_vectorstore_crud_async( res4 = await vstore.asimilarity_search("ww", k=1, filter={"k": "w"}) assert res4[0].metadata["ord"] == 205 - def test_astradb_vectorstore_mmr(self, store_parseremb: AstraDBVectorStore) -> None: + def test_astradb_vectorstore_mmr_sync( + self, store_parseremb: AstraDBVectorStore + ) -> None: """ MMR testing. We work on the unit circle with angle multiples of 2*pi/20 and prepare a store with known vectors for a controlled @@ -719,7 +814,7 @@ def _v_from_i(i: int, N: int) -> str: res_i_vals = {doc.metadata["i"] for doc in res1} assert res_i_vals == {0, 4} - def test_astradb_vectorstore_mmr_vectorize_unsupported( + def test_astradb_vectorstore_mmr_vectorize_unsupported_sync( self, vectorize_store: AstraDBVectorStore ) -> None: """ @@ -737,7 +832,15 @@ async def test_astradb_vectorstore_mmr_vectorize_unsupported_async( with pytest.raises(ValueError): await vectorize_store.amax_marginal_relevance_search("aa", k=2, fetch_k=3) - @pytest.mark.parametrize("vector_store", ["store_someemb", "vectorize_store"]) + @pytest.mark.parametrize( + "vector_store", + [ + "store_someemb", + "vectorize_store", + "vectorize_store_w_header", + "vectorize_store_nvidia", + ], + ) def test_astradb_vectorstore_metadata( self, vector_store: str, request: pytest.FixtureRequest ) -> None: @@ -804,7 +907,7 @@ def test_astradb_vectorstore_metadata( assert {doc.page_content for doc in res4} == {"q", "r"} @pytest.mark.parametrize("vector_store", ["store_parseremb"]) - def test_astradb_vectorstore_similarity_scale( + def test_astradb_vectorstore_similarity_scale_sync( self, vector_store: str, request: pytest.FixtureRequest ) -> None: """Scale of the similarity scores.""" @@ -845,7 +948,15 @@ async def test_astradb_vectorstore_similarity_scale_async( sco_near, sco_far = scores assert abs(1 - sco_near) < MATCH_EPSILON and abs(sco_far) < MATCH_EPSILON - @pytest.mark.parametrize("vector_store", ["store_someemb", "vectorize_store"]) + @pytest.mark.parametrize( + "vector_store", + [ + "store_someemb", + "vectorize_store", + "vectorize_store_w_header", + "vectorize_store_nvidia", + ], + ) def test_astradb_vectorstore_massive_delete( self, vector_store: str, request: pytest.FixtureRequest ) -> None: @@ -894,7 +1005,7 @@ def test_astradb_vectorstore_delete_collection( with pytest.raises(ValueError): _ = v_store.similarity_search("hah", k=10) - def test_astradb_vectorstore_custom_params( + def test_astradb_vectorstore_custom_params_sync( self, astradb_credentials: AstraDBCredentials ) -> None: """Custom batch size and concurrency params.""" @@ -1057,7 +1168,7 @@ def test_astradb_vectorstore_metrics( else: vstore_euc.clear() - def test_astradb_vectorstore_indexing(self) -> None: + def test_astradb_vectorstore_indexing_sync(self) -> None: """ Test that the right errors/warnings are issued depending on the compatibility of on-DB indexing settings and the requested ones. diff --git a/libs/astradb/tests/unit_tests/test_astra_db_environment.py b/libs/astradb/tests/unit_tests/test_astra_db_environment.py index 22ffb89..62f6cbc 100644 --- a/libs/astradb/tests/unit_tests/test_astra_db_environment.py +++ b/libs/astradb/tests/unit_tests/test_astra_db_environment.py @@ -17,7 +17,7 @@ def test_initialization(self) -> None: # clean environment if TOKEN_ENV_VAR in os.environ: - del os.environ["TOKEN_ENV_VAR"] + del os.environ[TOKEN_ENV_VAR] if API_ENDPOINT_ENV_VAR in os.environ: del os.environ[API_ENDPOINT_ENV_VAR] if NAMESPACE_ENV_VAR in os.environ: