diff --git a/.github/workflows/_integration_test.yml b/.github/workflows/_integration_test.yml index 8a33b8db038c2..1189907e96695 100644 --- a/.github/workflows/_integration_test.yml +++ b/.github/workflows/_integration_test.yml @@ -67,6 +67,9 @@ jobs: WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }} PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }} + ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} + ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} + ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }} run: | make integration_tests diff --git a/.github/workflows/_release.yml b/.github/workflows/_release.yml index b3c35b1f6cca3..1221d6a6d6ae8 100644 --- a/.github/workflows/_release.yml +++ b/.github/workflows/_release.yml @@ -187,6 +187,9 @@ jobs: WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }} PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }} + ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} + ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} + ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }} run: make integration_tests working-directory: ${{ inputs.working-directory }} diff --git a/docs/docs/integrations/document_loaders/cassandra.ipynb b/docs/docs/integrations/document_loaders/cassandra.ipynb index 49f261a18a84b..b69b1135a2bbe 100644 --- a/docs/docs/integrations/document_loaders/cassandra.ipynb +++ b/docs/docs/integrations/document_loaders/cassandra.ipynb @@ -72,57 +72,72 @@ }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "### Init from a cassandra driver Session\n", "\n", "You need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [], "source": [ "from cassandra.cluster import Cluster\n", "\n", "cluster = Cluster()\n", "session = cluster.connect()" - ], - "metadata": { - "collapsed": false - }, - "execution_count": null + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "You need to provide the name of an existing keyspace of the Cassandra instance:" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [], "source": [ "CASSANDRA_KEYSPACE = input(\"CASSANDRA_KEYSPACE = \")" - ], - "metadata": { - "collapsed": false - }, - "execution_count": null + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "Creating the document loader:" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", @@ -144,18 +159,21 @@ }, { "cell_type": "code", - "outputs": [], - "source": [ - "docs = loader.load()" - ], + "execution_count": 17, "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-01-19T15:47:26.399472Z", "start_time": "2024-01-19T15:47:26.389145Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "execution_count": 17 + "outputs": [], + "source": [ + "docs = loader.load()" + ] }, { "cell_type": "code", @@ -169,7 +187,9 @@ "outputs": [ { "data": { - "text/plain": "Document(page_content='Row(_id=\\'659bdffa16cbc4586b11a423\\', title=\\'Dangerous Men\\', reviewtext=\\'\"Dangerous Men,\" the picture\\\\\\'s production notes inform, took 26 years to reach the big screen. After having seen it, I wonder: What was the rush?\\')', metadata={'table': 'movie_reviews', 'keyspace': 'default_keyspace'})" + "text/plain": [ + "Document(page_content='Row(_id=\\'659bdffa16cbc4586b11a423\\', title=\\'Dangerous Men\\', reviewtext=\\'\"Dangerous Men,\" the picture\\\\\\'s production notes inform, took 26 years to reach the big screen. After having seen it, I wonder: What was the rush?\\')', metadata={'table': 'movie_reviews', 'keyspace': 'default_keyspace'})" + ] }, "execution_count": 19, "metadata": {}, @@ -182,17 +202,27 @@ }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "### Init from cassio\n", "\n", "It's also possible to use cassio to configure the session and keyspace." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [], "source": [ "import cassio\n", @@ -204,11 +234,16 @@ ")\n", "\n", "docs = loader.load()" - ], - "metadata": { - "collapsed": false - }, - "execution_count": null + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Attribution statement\n", + "\n", + "> Apache Cassandra, Cassandra and Apache are either registered trademarks or trademarks of the [Apache Software Foundation](http://www.apache.org/) in the United States and/or other countries." + ] } ], "metadata": { @@ -233,7 +268,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.9.17" } }, "nbformat": 4, diff --git a/docs/docs/integrations/llms/llm_caching.ipynb b/docs/docs/integrations/llms/llm_caching.ipynb index 791ff870b0fda..f428939a2a8c8 100644 --- a/docs/docs/integrations/llms/llm_caching.ipynb +++ b/docs/docs/integrations/llms/llm_caching.ipynb @@ -1131,6 +1131,16 @@ "print(llm(\"How come we always see one face of the moon?\"))" ] }, + { + "cell_type": "markdown", + "id": "55dc84b3-37cb-4f19-b175-40e18e06f83f", + "metadata": {}, + "source": [ + "#### Attribution statement\n", + "\n", + ">Apache Cassandra, Cassandra and Apache are either registered trademarks or trademarks of the [Apache Software Foundation](http://www.apache.org/) in the United States and/or other countries." + ] + }, { "cell_type": "markdown", "id": "8712f8fc-bb89-4164-beb9-c672778bbd91", @@ -1588,7 +1598,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.1" + "version": "3.9.17" } }, "nbformat": 4, diff --git a/docs/docs/integrations/memory/astradb_chat_message_history.ipynb b/docs/docs/integrations/memory/astradb_chat_message_history.ipynb index abe6b1b2876f6..068b804f00b45 100644 --- a/docs/docs/integrations/memory/astradb_chat_message_history.ipynb +++ b/docs/docs/integrations/memory/astradb_chat_message_history.ipynb @@ -32,7 +32,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install --upgrade --quiet \"astrapy>=0.6.2\"" + "%pip install --upgrade --quiet \"astrapy>=0.7.1\"" ] }, { diff --git a/docs/docs/integrations/memory/cassandra_chat_message_history.ipynb b/docs/docs/integrations/memory/cassandra_chat_message_history.ipynb index d802bc785da35..64ead129f51c0 100644 --- a/docs/docs/integrations/memory/cassandra_chat_message_history.ipynb +++ b/docs/docs/integrations/memory/cassandra_chat_message_history.ipynb @@ -145,6 +145,24 @@ "source": [ "message_history.messages" ] + }, + { + "cell_type": "markdown", + "id": "59902d0f-e9ba-4e3d-a7e0-ce202b9d3c43", + "metadata": {}, + "source": [ + "#### Attribution statement\n", + "\n", + "> Apache Cassandra, Cassandra and Apache are either registered trademarks or trademarks of the [Apache Software Foundation](http://www.apache.org/) in the United States and/or other countries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7efaa51c-e9ee-4dce-80a4-eb9280a0dbe5", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -163,7 +181,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.9.17" } }, "nbformat": 4, diff --git a/docs/docs/integrations/providers/astradb.mdx b/docs/docs/integrations/providers/astradb.mdx index 20a94d736b361..2abe18c1bf6fb 100644 --- a/docs/docs/integrations/providers/astradb.mdx +++ b/docs/docs/integrations/providers/astradb.mdx @@ -1,21 +1,17 @@ # Astra DB -This page lists the integrations available with [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) and [Apache Cassandra®](https://cassandra.apache.org/). +> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Apache Cassandra® and made conveniently available +> through an easy-to-use JSON API. ### Setup Install the following Python package: ```bash -pip install "astrapy>=0.5.3" +pip install "astrapy>=0.7.1" ``` -## Astra DB - -> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available -> through an easy-to-use JSON API. - -### Vector Store +## Vector Store ```python from langchain_community.vectorstores import AstraDB @@ -29,11 +25,22 @@ vector_store = AstraDB( Learn more in the [example notebook](/docs/integrations/vectorstores/astradb). -### LLM Cache +## Chat message history + +```python +from langchain_community.chat_message_histories import AstraDBChatMessageHistory +message_history = AstraDBChatMessageHistory( + session_id="test-session", + api_endpoint="...", + token="...", +) +``` + +## LLM Cache ```python from langchain.globals import set_llm_cache -from langchain.cache import AstraDBCache +from langchain_community.cache import AstraDBCache set_llm_cache(AstraDBCache( api_endpoint="...", token="...", @@ -43,11 +50,11 @@ set_llm_cache(AstraDBCache( Learn more in the [example notebook](/docs/integrations/llms/llm_caching#astra-db-caches) (scroll to the Astra DB section). -### Semantic LLM Cache +## Semantic LLM Cache ```python from langchain.globals import set_llm_cache -from langchain.cache import AstraDBSemanticCache +from langchain_community.cache import AstraDBSemanticCache set_llm_cache(AstraDBSemanticCache( embedding=my_embedding, api_endpoint="...", @@ -57,20 +64,9 @@ set_llm_cache(AstraDBSemanticCache( Learn more in the [example notebook](/docs/integrations/llms/llm_caching#astra-db-caches) (scroll to the appropriate section). -### Chat message history - -```python -from langchain.memory import AstraDBChatMessageHistory -message_history = AstraDBChatMessageHistory( - session_id="test-session", - api_endpoint="...", - token="...", -) -``` - Learn more in the [example notebook](/docs/integrations/memory/astradb_chat_message_history). -### Document loader +## Document loader ```python from langchain_community.document_loaders import AstraDBLoader @@ -83,7 +79,7 @@ loader = AstraDBLoader( Learn more in the [example notebook](/docs/integrations/document_loaders/astradb). -### Self-querying retriever +## Self-querying retriever ```python from langchain_community.vectorstores import AstraDB @@ -106,7 +102,7 @@ retriever = SelfQueryRetriever.from_llm( Learn more in the [example notebook](/docs/integrations/retrievers/self_query/astradb). -### Store +## Store ```python from langchain_community.storage import AstraDBStore @@ -119,7 +115,7 @@ store = AstraDBStore( Learn more in the [example notebook](/docs/integrations/stores/astradb#astradbstore). -### Byte Store +## Byte Store ```python from langchain_community.storage import AstraDBByteStore @@ -131,57 +127,3 @@ store = AstraDBByteStore( ``` Learn more in the [example notebook](/docs/integrations/stores/astradb#astradbbytestore). - -## Apache Cassandra and Astra DB through CQL - -> [Cassandra](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database. -> Starting with version 5.0, the database ships with [vector search capabilities](https://cassandra.apache.org/doc/trunk/cassandra/vector-search/overview.html). -> DataStax [Astra DB through CQL](https://docs.datastax.com/en/astra-serverless/docs/vector-search/quickstart.html) is a managed serverless database built on Cassandra, offering the same interface and strengths. - -These databases use the CQL protocol (Cassandra Query Language). -Hence, a different set of connectors, outlined below, shall be used. - -### Vector Store - -```python -from langchain_community.vectorstores import Cassandra -vector_store = Cassandra( - embedding=my_embedding, - table_name="my_store", -) -``` - -Learn more in the [example notebook](/docs/integrations/vectorstores/astradb#apache-cassandra-and-astra-db-through-cql) (scroll down to the CQL-specific section). - - -### Memory - -```python -from langchain.memory import CassandraChatMessageHistory -message_history = CassandraChatMessageHistory(session_id="my-session") -``` - -Learn more in the [example notebook](/docs/integrations/memory/cassandra_chat_message_history). - - -### LLM Cache - -```python -from langchain.cache import CassandraCache -langchain.llm_cache = CassandraCache() -``` - -Learn more in the [example notebook](/docs/integrations/llms/llm_caching#cassandra-caches) (scroll to the Cassandra section). - - -### Semantic LLM Cache - -```python -from langchain.cache import CassandraSemanticCache -cassSemanticCache = CassandraSemanticCache( - embedding=my_embedding, - table_name="my_store", -) -``` - -Learn more in the [example notebook](/docs/integrations/llms/llm_caching#cassandra-caches) (scroll to the appropriate section). diff --git a/docs/docs/integrations/providers/cassandra.mdx b/docs/docs/integrations/providers/cassandra.mdx new file mode 100644 index 0000000000000..392f7a1767e3e --- /dev/null +++ b/docs/docs/integrations/providers/cassandra.mdx @@ -0,0 +1,76 @@ +# Apache Cassandra + +> [Apache Cassandra®](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database. +> Starting with version 5.0, the database ships with [vector search capabilities](https://cassandra.apache.org/doc/trunk/cassandra/vector-search/overview.html). + +The integrations outlined in this page can be used with Cassandra as well as other CQL-compatible databases, i.e. those using the Cassandra Query Language protocol. + + +### Setup + +Install the following Python package: + +```bash +pip install "cassio>=0.1.4" +``` + + +## Vector Store + +```python +from langchain_community.vectorstores import Cassandra +vector_store = Cassandra( + embedding=my_embedding, + table_name="my_store", +) +``` + +Learn more in the [example notebook](/docs/integrations/vectorstores/cassandra). + +## Chat message history + +```python +from langchain_community.chat_message_histories import CassandraChatMessageHistory +message_history = CassandraChatMessageHistory(session_id="my-session") +``` + +Learn more in the [example notebook](/docs/integrations/memory/cassandra_chat_message_history). + + +## LLM Cache + +```python +from langchain.globals import set_llm_cache +from langchain_community.cache import CassandraCache +set_llm_cache(CassandraCache()) +``` + +Learn more in the [example notebook](/docs/integrations/llms/llm_caching#cassandra-caches) (scroll to the Cassandra section). + + +## Semantic LLM Cache + +```python +from langchain.globals import set_llm_cache +from langchain_community.cache import CassandraSemanticCache +set_llm_cache(CassandraSemanticCache( + embedding=my_embedding, + table_name="my_store", +)) +``` + +Learn more in the [example notebook](/docs/integrations/llms/llm_caching#cassandra-caches) (scroll to the appropriate section). + +## Document loader + +```python +from langchain_community.document_loaders import CassandraLoader +loader = CassandraLoader(table="my_table") +docs = loader.load() +``` + +Learn more in the [example notebook](/docs/integrations/document_loaders/cassandra). + +#### Attribution statement + +> Apache Cassandra, Cassandra and Apache are either registered trademarks or trademarks of the [Apache Software Foundation](http://www.apache.org/) in the United States and/or other countries. diff --git a/docs/docs/integrations/vectorstores/astradb.ipynb b/docs/docs/integrations/vectorstores/astradb.ipynb index c4b354225d88b..2b32dd9ea713b 100644 --- a/docs/docs/integrations/vectorstores/astradb.ipynb +++ b/docs/docs/integrations/vectorstores/astradb.ipynb @@ -2,13 +2,27 @@ "cells": [ { "cell_type": "markdown", - "id": "d2d6ca14-fb7e-4172-9aa0-a3119a064b96", + "id": "66d0270a-b74f-4110-901e-7960b00297af", "metadata": {}, "source": [ "# Astra DB\n", "\n", - "This page provides a quickstart for using [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) and [Apache Cassandra®](https://cassandra.apache.org/) as a Vector Store.\n", - "\n", + "This page provides a quickstart for using [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) as a Vector Store." + ] + }, + { + "cell_type": "markdown", + "id": "ab8cd64f-3bb2-4f16-a0a9-12d7b1789bf6", + "metadata": {}, + "source": [ + "> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Apache Cassandra® and made conveniently available through an easy-to-use JSON API." + ] + }, + { + "cell_type": "markdown", + "id": "d2d6ca14-fb7e-4172-9aa0-a3119a064b96", + "metadata": {}, + "source": [ "_Note: in addition to access to the database, an OpenAI API Key is required to run the full example._" ] }, @@ -35,7 +49,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install --upgrade --quiet \"astrapy>=0.5.3\"" + "%pip install --upgrade --quiet \"astrapy>=0.7.1\"" ] }, { @@ -44,7 +58,7 @@ "metadata": {}, "source": [ "_Note: depending on your LangChain setup, you may need to install/upgrade other dependencies needed for this demo_\n", - "_(specifically, recent versions of `datasets`, `openai`, `pypdf` and `tiktoken` are required)._" + "_(specifically, recent versions of `datasets`, `langchain-openai` and `pypdf` are required, along with `langchain-community`)._" ] }, { @@ -89,28 +103,12 @@ "embe = OpenAIEmbeddings()" ] }, - { - "cell_type": "markdown", - "id": "dd8caa76-bc41-429e-a93b-989ba13aff01", - "metadata": {}, - "source": [ - "_Keep reading to connect with Astra DB. For usage with Apache Cassandra and Astra DB through CQL, scroll to the section below._" - ] - }, { "cell_type": "markdown", "id": "22866f09-e10d-4f05-a24b-b9420129462e", "metadata": {}, "source": [ - "## Astra DB" - ] - }, - { - "cell_type": "markdown", - "id": "5fba47cc-3533-42fc-84b7-9dc14cd68b2b", - "metadata": {}, - "source": [ - "DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API." + "## Import the Vector Store" ] }, { @@ -128,10 +126,13 @@ "id": "68f61b01-3e09-47c1-9d67-5d6915c86626", "metadata": {}, "source": [ - "### Astra DB connection parameters\n", + "## Connection parameters\n", + "\n", + "These are found on your Astra DB dashboard:\n", "\n", "- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n", - "- the Token looks like `AstraCS:6gBhNmsk135....`" + "- the Token looks like `AstraCS:6gBhNmsk135....`\n", + "- you may optionally provide a _Namespace_ such as `my_namespace`" ] }, { @@ -142,7 +143,21 @@ "outputs": [], "source": [ "ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n", - "ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")" + "ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n", + "\n", + "desired_namespace = input(\"(optional) Namespace = \")\n", + "if desired_namespace:\n", + " ASTRA_DB_KEYSPACE = desired_namespace\n", + "else:\n", + " ASTRA_DB_KEYSPACE = None" + ] + }, + { + "cell_type": "markdown", + "id": "196268bd-a950-41c3-bede-f5b55f6a0804", + "metadata": {}, + "source": [ + "Now you can create the vector store:" ] }, { @@ -157,6 +172,7 @@ " collection_name=\"astra_vector_demo\",\n", " api_endpoint=ASTRA_DB_API_ENDPOINT,\n", " token=ASTRA_DB_APPLICATION_TOKEN,\n", + " namespace=ASTRA_DB_KEYSPACE,\n", ")" ] }, @@ -165,7 +181,7 @@ "id": "9a348678-b2f6-46ca-9a0d-2eb4cc6b66b1", "metadata": {}, "source": [ - "### Load a dataset" + "## Load a dataset" ] }, { @@ -243,7 +259,7 @@ "id": "c031760a-1fc5-4855-adf2-02ed52fe2181", "metadata": {}, "source": [ - "### Run simple searches" + "## Run searches" ] }, { @@ -318,12 +334,22 @@ " print(f\"* {res.page_content} [{res.metadata}]\")" ] }, + { + "cell_type": "markdown", + "id": "60fda5df-14e4-4fb0-bd17-65a393fab8a9", + "metadata": {}, + "source": [ + "### Async\n", + "\n", + "Note that the Astra DB vector store supports all fully async methods (`asimilarity_search`, `afrom_texts`, `adelete` and so on) natively, i.e. without thread wrapping involved." + ] + }, { "cell_type": "markdown", "id": "1cc86edd-692b-4495-906c-ccfd13b03c23", "metadata": {}, "source": [ - "### Deleting stored documents" + "## Deleting stored documents" ] }, { @@ -353,7 +379,7 @@ "id": "847181ba-77d1-4a17-b7f9-9e2c3d8efd13", "metadata": {}, "source": [ - "### A minimal RAG chain" + "## A minimal RAG chain" ] }, { @@ -452,7 +478,7 @@ "id": "177610c7-50d0-4b7b-8634-b03338054c8e", "metadata": {}, "source": [ - "### Cleanup" + "## Cleanup" ] }, { @@ -474,290 +500,6 @@ "source": [ "vstore.delete_collection()" ] - }, - { - "cell_type": "markdown", - "id": "94ebaab1-7cbf-4144-a147-7b0e32c43069", - "metadata": {}, - "source": [ - "## Apache Cassandra and Astra DB through CQL" - ] - }, - { - "cell_type": "markdown", - "id": "bc3931b4-211d-4f84-bcc0-51c127e3027c", - "metadata": {}, - "source": [ - "[Cassandra](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database.Starting with version 5.0, the database ships with [vector search capabilities](https://cassandra.apache.org/doc/trunk/cassandra/vector-search/overview.html).\n", - "\n", - "DataStax [Astra DB through CQL](https://docs.datastax.com/en/astra-serverless/docs/vector-search/quickstart.html) is a managed serverless database built on Cassandra, offering the same interface and strengths." - ] - }, - { - "cell_type": "markdown", - "id": "a0055fbf-448d-4e46-9c40-28d43df25ca3", - "metadata": {}, - "source": [ - "#### What sets this case apart from \"Astra DB\" above?\n", - "\n", - "Thanks to LangChain having a standardized `VectorStore` interface, most of the \"Astra DB\" section above applies to this case as well. However, this time the database uses the CQL protocol, which means you'll use a _different_ class this time and instantiate it in another way.\n", - "\n", - "The cells below show how you should get your `vstore` object in this case and how you can clean up the database resources at the end: for the rest, i.e. the actual usage of the vector store, you will be able to run the very code that was shown above.\n", - "\n", - "In other words, running this demo in full with Cassandra or Astra DB through CQL means:\n", - "\n", - "- **initialization as shown below**\n", - "- \"Load a dataset\", _see above section_\n", - "- \"Run simple searches\", _see above section_\n", - "- \"MMR search\", _see above section_\n", - "- \"Deleting stored documents\", _see above section_\n", - "- \"A minimal RAG chain\", _see above section_\n", - "- **cleanup as shown below**" - ] - }, - { - "cell_type": "markdown", - "id": "23d12be2-745f-4e72-a82c-334a887bc7cd", - "metadata": {}, - "source": [ - "### Initialization" - ] - }, - { - "cell_type": "markdown", - "id": "e3212542-79be-423e-8e1f-b8d725e3cda8", - "metadata": {}, - "source": [ - "The class to use is the following:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "941af73e-a090-4fba-b23c-595757d470eb", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_community.vectorstores import Cassandra" - ] - }, - { - "cell_type": "markdown", - "id": "414d1e72-f7c9-4b6d-bf6f-16075712c7e3", - "metadata": {}, - "source": [ - "Now, depending on whether you connect to a Cassandra cluster or to Astra DB through CQL, you will provide different parameters when creating the vector store object." - ] - }, - { - "cell_type": "markdown", - "id": "48ecca56-71a4-4a91-b198-29384c44ce27", - "metadata": {}, - "source": [ - "#### Initialization (Cassandra cluster)" - ] - }, - { - "cell_type": "markdown", - "id": "55ebe958-5654-43e0-9aed-d607ffd3fa48", - "metadata": {}, - "source": [ - "In this case, you first need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4642dafb-a065-4063-b58c-3d276f5ad07e", - "metadata": {}, - "outputs": [], - "source": [ - "from cassandra.cluster import Cluster\n", - "\n", - "cluster = Cluster([\"127.0.0.1\"])\n", - "session = cluster.connect()" - ] - }, - { - "cell_type": "markdown", - "id": "624c93bf-fb46-4350-bcfa-09ca09dc068f", - "metadata": {}, - "source": [ - "You can now set the session, along with your desired keyspace name, as a global CassIO parameter:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92a4ab28-1c4f-4dad-9671-d47e0b1dde7b", - "metadata": {}, - "outputs": [], - "source": [ - "import cassio\n", - "\n", - "CASSANDRA_KEYSPACE = input(\"CASSANDRA_KEYSPACE = \")\n", - "\n", - "cassio.init(session=session, keyspace=CASSANDRA_KEYSPACE)" - ] - }, - { - "cell_type": "markdown", - "id": "3b87a824-36f1-45b4-b54c-efec2a2de216", - "metadata": {}, - "source": [ - "Now you can create the vector store:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "853a2a88-a565-4e24-8789-d78c213954a6", - "metadata": {}, - "outputs": [], - "source": [ - "vstore = Cassandra(\n", - " embedding=embe,\n", - " table_name=\"cassandra_vector_demo\",\n", - " # session=None, keyspace=None # Uncomment on older versions of LangChain\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "768ddf7a-0c3e-4134-ad38-25ac53c3da7a", - "metadata": {}, - "source": [ - "#### Initialization (Astra DB through CQL)" - ] - }, - { - "cell_type": "markdown", - "id": "4ed4269a-b7e7-4503-9e66-5a11335c7681", - "metadata": {}, - "source": [ - "In this case you initialize CassIO with the following connection parameters:\n", - "\n", - "- the Database ID, e.g. `01234567-89ab-cdef-0123-456789abcdef`\n", - "- the Token, e.g. `AstraCS:6gBhNmsk135....` (it must be a \"Database Administrator\" token)\n", - "- Optionally a Keyspace name (if omitted, the default one for the database will be used)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5fa6bd74-d4b2-45c5-9757-96dddc6242fb", - "metadata": {}, - "outputs": [], - "source": [ - "ASTRA_DB_ID = input(\"ASTRA_DB_ID = \")\n", - "ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n", - "\n", - "desired_keyspace = input(\"ASTRA_DB_KEYSPACE (optional, can be left empty) = \")\n", - "if desired_keyspace:\n", - " ASTRA_DB_KEYSPACE = desired_keyspace\n", - "else:\n", - " ASTRA_DB_KEYSPACE = None" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "add6e585-17ff-452e-8ef6-7e485ead0b06", - "metadata": {}, - "outputs": [], - "source": [ - "import cassio\n", - "\n", - "cassio.init(\n", - " database_id=ASTRA_DB_ID,\n", - " token=ASTRA_DB_APPLICATION_TOKEN,\n", - " keyspace=ASTRA_DB_KEYSPACE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "b305823c-bc98-4f3d-aabb-d7eb663ea421", - "metadata": {}, - "source": [ - "Now you can create the vector store:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f45f3038-9d59-41cc-8b43-774c6aa80295", - "metadata": {}, - "outputs": [], - "source": [ - "vstore = Cassandra(\n", - " embedding=embe,\n", - " table_name=\"cassandra_vector_demo\",\n", - " # session=None, keyspace=None # Uncomment on older versions of LangChain\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "39284918-cf8a-49bb-a2d3-aef285bb2ffa", - "metadata": {}, - "source": [ - "### Usage of the vector store" - ] - }, - { - "cell_type": "markdown", - "id": "3cc1aead-d6ec-48a3-affe-1d0cffa955a9", - "metadata": {}, - "source": [ - "_See the sections \"Load a dataset\" through \"A minimal RAG chain\" above._\n", - "\n", - "Speaking of the latter, you can check out a full RAG template for Astra DB through CQL [here](https://github.com/langchain-ai/langchain/tree/master/templates/cassandra-entomology-rag)." - ] - }, - { - "cell_type": "markdown", - "id": "096397d8-6622-4685-9f9d-7e238beca467", - "metadata": {}, - "source": [ - "### Cleanup" - ] - }, - { - "cell_type": "markdown", - "id": "cc1e74f9-5500-41aa-836f-235b1ed5f20c", - "metadata": {}, - "source": [ - "the following essentially retrieves the `Session` object from CassIO and runs a CQL `DROP TABLE` statement with it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b5b82c33-0e77-4a37-852c-8d50edbdd991", - "metadata": {}, - "outputs": [], - "source": [ - "cassio.config.resolve_session().execute(\n", - " f\"DROP TABLE {cassio.config.resolve_keyspace()}.cassandra_vector_demo;\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "c10ece4d-ae06-42ab-baf4-4d0ac2051743", - "metadata": {}, - "source": [ - "### Learn more" - ] - }, - { - "cell_type": "markdown", - "id": "51ea8b69-7e15-458f-85aa-9fa199f95f9c", - "metadata": {}, - "source": [ - "For more information, extended quickstarts and additional usage examples, please visit the [CassIO documentation](https://cassio.org/frameworks/langchain/about/) for more on using the LangChain `Cassandra` vector store." - ] } ], "metadata": { @@ -776,7 +518,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/docs/docs/integrations/vectorstores/cassandra.ipynb b/docs/docs/integrations/vectorstores/cassandra.ipynb new file mode 100644 index 0000000000000..524f76a1052c9 --- /dev/null +++ b/docs/docs/integrations/vectorstores/cassandra.ipynb @@ -0,0 +1,651 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d2d6ca14-fb7e-4172-9aa0-a3119a064b96", + "metadata": {}, + "source": [ + "# Apache Cassandra\n", + "\n", + "This page provides a quickstart for using [Apache Cassandra®](https://cassandra.apache.org/) as a Vector Store." + ] + }, + { + "cell_type": "markdown", + "id": "6a1a562e-3d1a-4693-b55d-08bf90943a9a", + "metadata": {}, + "source": [ + "> [Cassandra](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database.Starting with version 5.0, the database ships with [vector search capabilities](https://cassandra.apache.org/doc/trunk/cassandra/vector-search/overview.html)." + ] + }, + { + "cell_type": "markdown", + "id": "9cf37d7f-c18e-4e63-adea-138e5e981475", + "metadata": {}, + "source": [ + "_Note: in addition to access to the database, an OpenAI API Key is required to run the full example._" + ] + }, + { + "cell_type": "markdown", + "id": "bb9be7ce-8c70-4d46-9f11-71c42a36e928", + "metadata": {}, + "source": [ + "### Setup and general dependencies" + ] + }, + { + "cell_type": "markdown", + "id": "dbe7c156-0413-47e3-9237-4769c4248869", + "metadata": {}, + "source": [ + "Use of the integration requires the following Python package." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d00fcf4-9798-4289-9214-d9734690adfc", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet \"cassio>=0.1.4\"" + ] + }, + { + "cell_type": "markdown", + "id": "2453d83a-bc8f-41e1-a692-befe4dd90156", + "metadata": {}, + "source": [ + "_Note: depending on your LangChain setup, you may need to install/upgrade other dependencies needed for this demo_\n", + "_(specifically, recent versions of `datasets`, `openai`, `pypdf` and `tiktoken` are required, along with `langchain-community`)._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b06619af-fea2-4863-8149-7f239a8c9c82", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "from datasets import (\n", + " load_dataset,\n", + ")\n", + "from langchain.schema import Document\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain_community.document_loaders import PyPDFLoader\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1983f1da-0ae7-4a9b-bf4c-4ade328f7a3a", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"OPENAI_API_KEY\"] = getpass(\"OPENAI_API_KEY = \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c656df06-e938-4bc5-b570-440b8b7a0189", + "metadata": {}, + "outputs": [], + "source": [ + "embe = OpenAIEmbeddings()" + ] + }, + { + "cell_type": "markdown", + "id": "22866f09-e10d-4f05-a24b-b9420129462e", + "metadata": {}, + "source": [ + "## Import the Vector Store" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b32730d-176e-414c-9d91-fd3644c54211", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.vectorstores import Cassandra" + ] + }, + { + "cell_type": "markdown", + "id": "68f61b01-3e09-47c1-9d67-5d6915c86626", + "metadata": {}, + "source": [ + "## Connection parameters\n", + "\n", + "The Vector Store integration shown in this page can be used with Cassandra as well as other derived databases, such as Astra DB, which use the CQL (Cassandra Query Language) protocol.\n", + "\n", + "> DataStax [Astra DB](https://docs.datastax.com/en/astra-serverless/docs/vector-search/quickstart.html) is a managed serverless database built on Cassandra, offering the same interface and strengths.\n", + "\n", + "Depending on whether you connect to a Cassandra cluster or to Astra DB through CQL, you will provide different parameters when creating the vector store object." + ] + }, + { + "cell_type": "markdown", + "id": "36bbb3d9-4d07-4f63-b23d-c52be03f8938", + "metadata": {}, + "source": [ + "### Connecting to a Cassandra cluster\n", + "\n", + "You first need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d95bb1d4-d8a6-4e66-89bc-776f9c6f962b", + "metadata": {}, + "outputs": [], + "source": [ + "from cassandra.cluster import Cluster\n", + "\n", + "cluster = Cluster([\"127.0.0.1\"])\n", + "session = cluster.connect()" + ] + }, + { + "cell_type": "markdown", + "id": "8279aa78-96d6-43ad-aa21-79fd798d895d", + "metadata": {}, + "source": [ + "You can now set the session, along with your desired keyspace name, as a global CassIO parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29ececc4-e50b-4428-967f-4b6bbde12a14", + "metadata": {}, + "outputs": [], + "source": [ + "import cassio\n", + "\n", + "CASSANDRA_KEYSPACE = input(\"CASSANDRA_KEYSPACE = \")\n", + "\n", + "cassio.init(session=session, keyspace=CASSANDRA_KEYSPACE)" + ] + }, + { + "cell_type": "markdown", + "id": "0bd035a2-f0af-418f-94e5-0fbb4d51ac3c", + "metadata": {}, + "source": [ + "Now you can create the vector store:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eeb62cde-89fc-44d7-ba76-91e19cbc5898", + "metadata": {}, + "outputs": [], + "source": [ + "vstore = Cassandra(\n", + " embedding=embe,\n", + " table_name=\"cassandra_vector_demo\",\n", + " # session=None, keyspace=None # Uncomment on older versions of LangChain\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ce240555-e5fc-431d-ac0f-bcf2f6e6a5fb", + "metadata": {}, + "source": [ + "_Note: you can also pass your session and keyspace directly as parameters when creating the vector store. Using the global `cassio.init` setting, however, comes handy if your applications uses Cassandra in several ways (for instance, for vector store, chat memory and LLM response caching), as it allows to centralize credential and DB connection management in one place._" + ] + }, + { + "cell_type": "markdown", + "id": "b598e5fa-eb62-4939-9734-091628e84db4", + "metadata": {}, + "source": [ + "### Connecting to Astra DB through CQL" + ] + }, + { + "cell_type": "markdown", + "id": "2feec7c3-7092-4252-9a3f-05eda4babe74", + "metadata": {}, + "source": [ + "In this case you initialize CassIO with the following connection parameters:\n", + "\n", + "- the Database ID, e.g. `01234567-89ab-cdef-0123-456789abcdef`\n", + "- the Token, e.g. `AstraCS:6gBhNmsk135....` (it must be a \"Database Administrator\" token)\n", + "- Optionally a Keyspace name (if omitted, the default one for the database will be used)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f96147d-6d76-4101-bbb0-4a7f215c3d2d", + "metadata": {}, + "outputs": [], + "source": [ + "ASTRA_DB_ID = input(\"ASTRA_DB_ID = \")\n", + "ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n", + "\n", + "desired_keyspace = input(\"ASTRA_DB_KEYSPACE (optional, can be left empty) = \")\n", + "if desired_keyspace:\n", + " ASTRA_DB_KEYSPACE = desired_keyspace\n", + "else:\n", + " ASTRA_DB_KEYSPACE = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d653df1d-9dad-4980-ba52-76a47b4c5c1a", + "metadata": {}, + "outputs": [], + "source": [ + "import cassio\n", + "\n", + "cassio.init(\n", + " database_id=ASTRA_DB_ID,\n", + " token=ASTRA_DB_APPLICATION_TOKEN,\n", + " keyspace=ASTRA_DB_KEYSPACE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e606b58b-d390-4fed-a2fc-65036c44860f", + "metadata": {}, + "source": [ + "Now you can create the vector store:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9cb552d1-e888-4550-a350-6df06b1f5aae", + "metadata": {}, + "outputs": [], + "source": [ + "vstore = Cassandra(\n", + " embedding=embe,\n", + " table_name=\"cassandra_vector_demo\",\n", + " # session=None, keyspace=None # Uncomment on older versions of LangChain\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "9a348678-b2f6-46ca-9a0d-2eb4cc6b66b1", + "metadata": {}, + "source": [ + "## Load a dataset" + ] + }, + { + "cell_type": "markdown", + "id": "552e56b0-301a-4b06-99c7-57ba6faa966f", + "metadata": {}, + "source": [ + "Convert each entry in the source dataset into a `Document`, then write them into the vector store:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a1f532f-ad63-4256-9730-a183841bd8e9", + "metadata": {}, + "outputs": [], + "source": [ + "philo_dataset = load_dataset(\"datastax/philosopher-quotes\")[\"train\"]\n", + "\n", + "docs = []\n", + "for entry in philo_dataset:\n", + " metadata = {\"author\": entry[\"author\"]}\n", + " doc = Document(page_content=entry[\"quote\"], metadata=metadata)\n", + " docs.append(doc)\n", + "\n", + "inserted_ids = vstore.add_documents(docs)\n", + "print(f\"\\nInserted {len(inserted_ids)} documents.\")" + ] + }, + { + "cell_type": "markdown", + "id": "79d4f436-ef04-4288-8f79-97c9abb983ed", + "metadata": {}, + "source": [ + "In the above, `metadata` dictionaries are created from the source data and are part of the `Document`." + ] + }, + { + "cell_type": "markdown", + "id": "084d8802-ab39-4262-9a87-42eafb746f92", + "metadata": {}, + "source": [ + "Add some more entries, this time with `add_texts`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6b157f5-eb31-4907-a78e-2e2b06893936", + "metadata": {}, + "outputs": [], + "source": [ + "texts = [\"I think, therefore I am.\", \"To the things themselves!\"]\n", + "metadatas = [{\"author\": \"descartes\"}, {\"author\": \"husserl\"}]\n", + "ids = [\"desc_01\", \"huss_xy\"]\n", + "\n", + "inserted_ids_2 = vstore.add_texts(texts=texts, metadatas=metadatas, ids=ids)\n", + "print(f\"\\nInserted {len(inserted_ids_2)} documents.\")" + ] + }, + { + "cell_type": "markdown", + "id": "63840eb3-8b29-4017-bc2f-301bf5001f28", + "metadata": {}, + "source": [ + "_Note: you may want to speed up the execution of `add_texts` and `add_documents` by increasing the concurrency level for_\n", + "_these bulk operations - check out the methods' `batch_size` parameter_\n", + "_for more details. Depending on the network and the client machine specifications, your best-performing choice of parameters may vary._" + ] + }, + { + "cell_type": "markdown", + "id": "c031760a-1fc5-4855-adf2-02ed52fe2181", + "metadata": {}, + "source": [ + "## Run searches" + ] + }, + { + "cell_type": "markdown", + "id": "02a77d8e-1aae-4054-8805-01c77947c49f", + "metadata": {}, + "source": [ + "This section demonstrates metadata filtering and getting the similarity scores back:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1761806a-1afd-4491-867c-25a80d92b9fe", + "metadata": {}, + "outputs": [], + "source": [ + "results = vstore.similarity_search(\"Our life is what we make of it\", k=3)\n", + "for res in results:\n", + " print(f\"* {res.page_content} [{res.metadata}]\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eebc4f7c-f61a-438e-b3c8-17e6888d8a0b", + "metadata": {}, + "outputs": [], + "source": [ + "results_filtered = vstore.similarity_search(\n", + " \"Our life is what we make of it\",\n", + " k=3,\n", + " filter={\"author\": \"plato\"},\n", + ")\n", + "for res in results_filtered:\n", + " print(f\"* {res.page_content} [{res.metadata}]\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11bbfe64-c0cd-40c6-866a-a5786538450e", + "metadata": {}, + "outputs": [], + "source": [ + "results = vstore.similarity_search_with_score(\"Our life is what we make of it\", k=3)\n", + "for res, score in results:\n", + " print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "b14ea558-bfbe-41ce-807e-d70670060ada", + "metadata": {}, + "source": [ + "### MMR (Maximal-marginal-relevance) search" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76381ce8-780a-4e3b-97b1-056d6782d7d5", + "metadata": {}, + "outputs": [], + "source": [ + "results = vstore.max_marginal_relevance_search(\n", + " \"Our life is what we make of it\",\n", + " k=3,\n", + " filter={\"author\": \"aristotle\"},\n", + ")\n", + "for res in results:\n", + " print(f\"* {res.page_content} [{res.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "1cc86edd-692b-4495-906c-ccfd13b03c23", + "metadata": {}, + "source": [ + "## Deleting stored documents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38a70ec4-b522-4d32-9ead-c642864fca37", + "metadata": {}, + "outputs": [], + "source": [ + "delete_1 = vstore.delete(inserted_ids[:3])\n", + "print(f\"all_succeed={delete_1}\") # True, all documents deleted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4cf49ed-9d29-4ed9-bdab-51a308c41b8e", + "metadata": {}, + "outputs": [], + "source": [ + "delete_2 = vstore.delete(inserted_ids[2:5])\n", + "print(f\"some_succeeds={delete_2}\") # True, though some IDs were gone already" + ] + }, + { + "cell_type": "markdown", + "id": "847181ba-77d1-4a17-b7f9-9e2c3d8efd13", + "metadata": {}, + "source": [ + "## A minimal RAG chain" + ] + }, + { + "cell_type": "markdown", + "id": "cd64b844-846f-43c5-a7dd-c26b9ed417d0", + "metadata": {}, + "source": [ + "The next cells will implement a simple RAG pipeline:\n", + "- download a sample PDF file and load it onto the store;\n", + "- create a RAG chain with LCEL (LangChain Expression Language), with the vector store at its heart;\n", + "- run the question-answering chain." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cbc4dba-0d5e-4038-8fc5-de6cadd1c2a9", + "metadata": {}, + "outputs": [], + "source": [ + "!curl -L \\\n", + " \"https://github.com/awesome-astra/datasets/blob/main/demo-resources/what-is-philosophy/what-is-philosophy.pdf?raw=true\" \\\n", + " -o \"what-is-philosophy.pdf\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "459385be-5e9c-47ff-ba53-2b7ae6166b09", + "metadata": {}, + "outputs": [], + "source": [ + "pdf_loader = PyPDFLoader(\"what-is-philosophy.pdf\")\n", + "splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)\n", + "docs_from_pdf = pdf_loader.load_and_split(text_splitter=splitter)\n", + "\n", + "print(f\"Documents from PDF: {len(docs_from_pdf)}.\")\n", + "inserted_ids_from_pdf = vstore.add_documents(docs_from_pdf)\n", + "print(f\"Inserted {len(inserted_ids_from_pdf)} documents.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5010a66c-4298-4e32-82b5-2da0d36a5c70", + "metadata": {}, + "outputs": [], + "source": [ + "retriever = vstore.as_retriever(search_kwargs={\"k\": 3})\n", + "\n", + "philo_template = \"\"\"\n", + "You are a philosopher that draws inspiration from great thinkers of the past\n", + "to craft well-thought answers to user questions. Use the provided context as the basis\n", + "for your answers and do not make up new reasoning paths - just mix-and-match what you are given.\n", + "Your answers must be concise and to the point, and refrain from answering about other topics than philosophy.\n", + "\n", + "CONTEXT:\n", + "{context}\n", + "\n", + "QUESTION: {question}\n", + "\n", + "YOUR ANSWER:\"\"\"\n", + "\n", + "philo_prompt = ChatPromptTemplate.from_template(philo_template)\n", + "\n", + "llm = ChatOpenAI()\n", + "\n", + "chain = (\n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | philo_prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcbc1296-6c7c-478b-b55b-533ba4e54ddb", + "metadata": {}, + "outputs": [], + "source": [ + "chain.invoke(\"How does Russel elaborate on Peirce's idea of the security blanket?\")" + ] + }, + { + "cell_type": "markdown", + "id": "869ab448-a029-4692-aefc-26b85513314d", + "metadata": {}, + "source": [ + "For more, check out a complete RAG template using Astra DB through CQL [here](https://github.com/langchain-ai/langchain/tree/master/templates/cassandra-entomology-rag)." + ] + }, + { + "cell_type": "markdown", + "id": "177610c7-50d0-4b7b-8634-b03338054c8e", + "metadata": {}, + "source": [ + "## Cleanup" + ] + }, + { + "cell_type": "markdown", + "id": "0da4d19f-9878-4d3d-82c9-09cafca20322", + "metadata": {}, + "source": [ + "the following essentially retrieves the `Session` object from CassIO and runs a CQL `DROP TABLE` statement with it:\n", + "\n", + "_(You will lose the data you stored in it.)_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd405a13-6f71-46fa-87e6-167238e9c25e", + "metadata": {}, + "outputs": [], + "source": [ + "cassio.config.resolve_session().execute(\n", + " f\"DROP TABLE {cassio.config.resolve_keyspace()}.cassandra_vector_demo;\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "c10ece4d-ae06-42ab-baf4-4d0ac2051743", + "metadata": {}, + "source": [ + "### Learn more" + ] + }, + { + "cell_type": "markdown", + "id": "51ea8b69-7e15-458f-85aa-9fa199f95f9c", + "metadata": {}, + "source": [ + "For more information, extended quickstarts and additional usage examples, please visit the [CassIO documentation](https://cassio.org/frameworks/langchain/about/) for more on using the LangChain `Cassandra` vector store." + ] + }, + { + "cell_type": "markdown", + "id": "3b8ee30c-2c84-42f3-9cff-e80dbc590490", + "metadata": {}, + "source": [ + "#### Attribution statement\n", + "\n", + "> Apache Cassandra, Cassandra and Apache are either registered trademarks or trademarks of the [Apache Software Foundation](http://www.apache.org/) in the United States and/or other countries.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/vercel.json b/docs/vercel.json index 796d3d16b5e62..65a9861a4d531 100644 --- a/docs/vercel.json +++ b/docs/vercel.json @@ -594,11 +594,7 @@ }, { "source": "/docs/integrations/cassandra", - "destination": "/docs/integrations/providers/astradb" - }, - { - "source": "/docs/integrations/providers/cassandra", - "destination": "/docs/integrations/providers/astradb" + "destination": "/docs/integrations/providers/cassandra" }, { "source": "/docs/integrations/providers/providers/semadb", @@ -608,10 +604,6 @@ "source": "/docs/integrations/vectorstores/vectorstores/semadb", "destination": "/docs/integrations/vectorstores/semadb" }, - { - "source": "/docs/integrations/vectorstores/cassandra", - "destination": "/docs/integrations/vectorstores/astradb" - }, { "source": "/docs/integrations/vectorstores/async_faiss", "destination": "/docs/integrations/vectorstores/faiss_async" diff --git a/libs/community/langchain_community/vectorstores/astradb.py b/libs/community/langchain_community/vectorstores/astradb.py index e6d1a5e010072..f079246e3a9e8 100644 --- a/libs/community/langchain_community/vectorstores/astradb.py +++ b/libs/community/langchain_community/vectorstores/astradb.py @@ -20,6 +20,7 @@ ) import numpy as np +from langchain_core._api.deprecation import deprecated from langchain_core.documents import Document from langchain_core.embeddings import Embeddings from langchain_core.runnables import run_in_executor @@ -61,6 +62,11 @@ def _unique_list(lst: List[T], key: Callable[[T], U]) -> List[T]: return new_lst +@deprecated( + since="0.1.23", + removal="0.2.0", + alternative_import="langchain_astradb.AstraDBVectorStore", +) class AstraDB(VectorStore): """Wrapper around DataStax Astra DB for vector-store workloads. diff --git a/libs/partners/astradb/.gitignore b/libs/partners/astradb/.gitignore new file mode 100644 index 0000000000000..bdc93231f0353 --- /dev/null +++ b/libs/partners/astradb/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +*.env +.mypy_cache +.ruff_cache +.pytest_cache \ No newline at end of file diff --git a/libs/partners/astradb/LICENSE b/libs/partners/astradb/LICENSE new file mode 100644 index 0000000000000..426b65090341f --- /dev/null +++ b/libs/partners/astradb/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 LangChain, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/libs/partners/astradb/Makefile b/libs/partners/astradb/Makefile new file mode 100644 index 0000000000000..ee1200c9af5e4 --- /dev/null +++ b/libs/partners/astradb/Makefile @@ -0,0 +1,66 @@ +SHELL := /bin/bash +.PHONY: all format lint test tests integration_test integration_tests spell_check help + +# Default target executed when no arguments are given to make. +all: help + +# Define a variable for the test file path. +TEST_FILE ?= tests/unit_tests/ +INTEGRATION_TEST_FILE ?= tests/integration_tests/ + +test: + poetry run pytest $(TEST_FILE) + +tests: + poetry run pytest $(TEST_FILE) + +integration_test: + poetry run pytest $(INTEGRATION_TEST_FILE) + +integration_tests: + poetry run pytest $(INTEGRATION_TEST_FILE) + +###################### +# LINTING AND FORMATTING +###################### + +# Define a variable for Python and notebook files. +PYTHON_FILES=. +MYPY_CACHE=.mypy_cache +lint format: PYTHON_FILES=. +lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/astradb --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$') +lint_package: PYTHON_FILES=langchain_astradb +lint_tests: PYTHON_FILES=tests +lint_tests: MYPY_CACHE=.mypy_cache_test + +lint lint_diff lint_package lint_tests: + poetry run ruff . + poetry run ruff format $(PYTHON_FILES) --diff + poetry run ruff --select I $(PYTHON_FILES) + mkdir -p $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE) + +format format_diff: + poetry run ruff format $(PYTHON_FILES) + poetry run ruff --select I --fix $(PYTHON_FILES) + +spell_check: + poetry run codespell --toml pyproject.toml + +spell_fix: + poetry run codespell --toml pyproject.toml -w + +check_imports: $(shell find langchain_astradb -name '*.py') + poetry run python ./scripts/check_imports.py $^ + +###################### +# HELP +###################### + +help: + @echo '----' + @echo 'check_imports - check imports' + @echo 'format - run code formatters' + @echo 'lint - run linters' + @echo 'test - run unit tests' + @echo 'tests - run unit tests' + @echo 'test TEST_FILE= - run all tests in file' diff --git a/libs/partners/astradb/README.md b/libs/partners/astradb/README.md new file mode 100644 index 0000000000000..a4c2dc84e1fb5 --- /dev/null +++ b/libs/partners/astradb/README.md @@ -0,0 +1,35 @@ +# langchain-astradb + +This package contains the LangChain integrations for using DataStax Astra DB. + +> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Apache Cassandra® and made conveniently available +> through an easy-to-use JSON API. + +_**Note.** For a short transitional period, only some of the Astra DB integration classes are contained in this package (the remaining ones being still in `langchain-community`). In a short while, and surely by version 0.2 of LangChain, all of the Astra DB support will be removed from `langchain-community` and included in this package._ + +## Installation and Setup + +Installation of this partner package: + +```bash +pip install langchain-astradb +``` + +## Integrations overview + +### Vector Store + +```python +from langchain_astradb.vectorstores import AstraDBVectorStore + +my_store = AstraDBVectorStore( + embedding=my_embeddings, + collection_name="my_store", + api_endpoint="https://...", + token="AstraCS:...", +) +``` + +## Reference + +See the [LangChain docs page](https://python.langchain.com/docs/integrations/providers/astradb) for a more detailed listing. diff --git a/libs/partners/astradb/langchain_astradb/__init__.py b/libs/partners/astradb/langchain_astradb/__init__.py new file mode 100644 index 0000000000000..fc86dd73bcf46 --- /dev/null +++ b/libs/partners/astradb/langchain_astradb/__init__.py @@ -0,0 +1,5 @@ +from langchain_astradb.vectorstores import AstraDBVectorStore + +__all__ = [ + "AstraDBVectorStore", +] diff --git a/libs/partners/astradb/langchain_astradb/py.typed b/libs/partners/astradb/langchain_astradb/py.typed new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/astradb/langchain_astradb/utils/mmr.py b/libs/partners/astradb/langchain_astradb/utils/mmr.py new file mode 100644 index 0000000000000..feb34ad1c23d6 --- /dev/null +++ b/libs/partners/astradb/langchain_astradb/utils/mmr.py @@ -0,0 +1,87 @@ +""" +Tools for the Maximal Marginal Relevance (MMR) reranking. +Duplicated from langchain_community to avoid cross-dependencies. + +Functions "maximal_marginal_relevance" and "cosine_similarity" +are duplicated in this utility respectively from modules: + - "libs/community/langchain_community/vectorstores/utils.py" + - "libs/community/langchain_community/utils/math.py" +""" + +import logging +from typing import List, Union + +import numpy as np + +logger = logging.getLogger(__name__) + +Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray] + + +def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray: + """Row-wise cosine similarity between two equal-width matrices.""" + if len(X) == 0 or len(Y) == 0: + return np.array([]) + + X = np.array(X) + Y = np.array(Y) + if X.shape[1] != Y.shape[1]: + raise ValueError( + f"Number of columns in X and Y must be the same. X has shape {X.shape} " + f"and Y has shape {Y.shape}." + ) + try: + import simsimd as simd # type: ignore + + X = np.array(X, dtype=np.float32) + Y = np.array(Y, dtype=np.float32) + Z = 1 - simd.cdist(X, Y, metric="cosine") + if isinstance(Z, float): + return np.array([Z]) + return Z + except ImportError: + logger.info( + "Unable to import simsimd, defaulting to NumPy implementation. If you want " + "to use simsimd please install with `pip install simsimd`." + ) + X_norm = np.linalg.norm(X, axis=1) + Y_norm = np.linalg.norm(Y, axis=1) + # Ignore divide by zero errors run time warnings as those are handled below. + with np.errstate(divide="ignore", invalid="ignore"): + similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm) + similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0 + return similarity + + +def maximal_marginal_relevance( + query_embedding: np.ndarray, + embedding_list: list, + lambda_mult: float = 0.5, + k: int = 4, +) -> List[int]: + """Calculate maximal marginal relevance.""" + if min(k, len(embedding_list)) <= 0: + return [] + if query_embedding.ndim == 1: + query_embedding = np.expand_dims(query_embedding, axis=0) + similarity_to_query = cosine_similarity(query_embedding, embedding_list)[0] + most_similar = int(np.argmax(similarity_to_query)) + idxs = [most_similar] + selected = np.array([embedding_list[most_similar]]) + while len(idxs) < min(k, len(embedding_list)): + best_score = -np.inf + idx_to_add = -1 + similarity_to_selected = cosine_similarity(embedding_list, selected) + for i, query_score in enumerate(similarity_to_query): + if i in idxs: + continue + redundant_score = max(similarity_to_selected[i]) + equation_score = ( + lambda_mult * query_score - (1 - lambda_mult) * redundant_score + ) + if equation_score > best_score: + best_score = equation_score + idx_to_add = i + idxs.append(idx_to_add) + selected = np.append(selected, [embedding_list[idx_to_add]], axis=0) + return idxs diff --git a/libs/partners/astradb/langchain_astradb/vectorstores/__init__.py b/libs/partners/astradb/langchain_astradb/vectorstores/__init__.py new file mode 100644 index 0000000000000..310732d125ff9 --- /dev/null +++ b/libs/partners/astradb/langchain_astradb/vectorstores/__init__.py @@ -0,0 +1,5 @@ +from langchain_astradb.vectorstores.astradb import AstraDBVectorStore + +__all__ = [ + "AstraDBVectorStore", +] diff --git a/libs/partners/astradb/langchain_astradb/vectorstores/astradb.py b/libs/partners/astradb/langchain_astradb/vectorstores/astradb.py new file mode 100644 index 0000000000000..e501113e3083a --- /dev/null +++ b/libs/partners/astradb/langchain_astradb/vectorstores/astradb.py @@ -0,0 +1,1317 @@ +from __future__ import annotations + +import asyncio +import uuid +import warnings +from asyncio import Task +from concurrent.futures import ThreadPoolExecutor +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Optional, + Set, + Tuple, + Type, + TypeVar, + cast, +) + +import numpy as np +from astrapy.db import ( + AstraDB as AstraDBClient, +) +from astrapy.db import ( + AstraDBCollection, + AsyncAstraDBCollection, +) +from astrapy.db import ( + AsyncAstraDB as AsyncAstraDBClient, +) +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.runnables import run_in_executor +from langchain_core.runnables.utils import gather_with_concurrency +from langchain_core.utils.iter import batch_iterate +from langchain_core.vectorstores import VectorStore + +from langchain_astradb.utils.mmr import maximal_marginal_relevance + +T = TypeVar("T") +U = TypeVar("U") +DocDict = Dict[str, Any] # dicts expressing entries to insert + +# Batch/concurrency default values (if parameters not provided): +# Size of batches for bulk insertions: +# (20 is the max batch size for the HTTP API at the time of writing) +DEFAULT_BATCH_SIZE = 20 +# Number of threads to insert batches concurrently: +DEFAULT_BULK_INSERT_BATCH_CONCURRENCY = 16 +# Number of threads in a batch to insert pre-existing entries: +DEFAULT_BULK_INSERT_OVERWRITE_CONCURRENCY = 10 +# Number of threads (for deleting multiple rows concurrently): +DEFAULT_BULK_DELETE_CONCURRENCY = 20 + + +def _unique_list(lst: List[T], key: Callable[[T], U]) -> List[T]: + visited_keys: Set[U] = set() + new_lst = [] + for item in lst: + item_key = key(item) + if item_key not in visited_keys: + visited_keys.add(item_key) + new_lst.append(item) + return new_lst + + +class AstraDBVectorStore(VectorStore): + """Wrapper around DataStax Astra DB for vector-store workloads. + + For quickstart and details, visit: + docs.datastax.com/en/astra/home/astra.html + + Example: + .. code-block:: python + + from langchain_astradb.vectorstores import AstraDBVectorStore + from langchain_openai.embeddings import OpenAIEmbeddings + + embeddings = OpenAIEmbeddings() + vectorstore = AstraDBVectorStore( + embedding=embeddings, + collection_name="my_store", + token="AstraCS:...", + api_endpoint="https://-.apps.astra.datastax.com" + ) + + vectorstore.add_texts(["Giraffes", "All good here"]) + results = vectorstore.similarity_search("Everything's ok", k=1) + + Constructor Args (only keyword-arguments accepted): + embedding (Embeddings): embedding function to use. + collection_name (str): name of the Astra DB collection to create/use. + token (Optional[str]): API token for Astra DB usage. + api_endpoint (Optional[str]): full URL to the API endpoint, + such as "https://-us-east1.apps.astra.datastax.com". + astra_db_client (Optional[astrapy.db.AstraDB]): + *alternative to token+api_endpoint*, + you can pass an already-created 'astrapy.db.AstraDB' instance. + async_astra_db_client (Optional[astrapy.db.AsyncAstraDB]): + same as `astra_db_client`, but the basis for the async API + of the vector store. + namespace (Optional[str]): namespace (aka keyspace) where the + collection is created. Defaults to the database's "default namespace". + metric (Optional[str]): similarity function to use out of those + available in Astra DB. If left out, it will use Astra DB API's + defaults (i.e. "cosine" - but, for performance reasons, + "dot_product" is suggested if embeddings are normalized to one). + + Advanced arguments (coming with sensible defaults): + batch_size (Optional[int]): Size of batches for bulk insertions. + bulk_insert_batch_concurrency (Optional[int]): Number of threads + to insert batches concurrently. + bulk_insert_overwrite_concurrency (Optional[int]): Number of + threads in a batch to insert pre-existing entries. + bulk_delete_concurrency (Optional[int]): Number of threads + (for deleting multiple rows concurrently). + pre_delete_collection (Optional[bool]): whether to delete the collection + before creating it. If False and the collection already exists, + the collection will be used as is. + + A note on concurrency: as a rule of thumb, on a typical client machine + it is suggested to keep the quantity + bulk_insert_batch_concurrency * bulk_insert_overwrite_concurrency + much below 1000 to avoid exhausting the client multithreading/networking + resources. The hardcoded defaults are somewhat conservative to meet + most machines' specs, but a sensible choice to test may be: + bulk_insert_batch_concurrency = 80 + bulk_insert_overwrite_concurrency = 10 + A bit of experimentation is required to nail the best results here, + depending on both the machine/network specs and the expected workload + (specifically, how often a write is an update of an existing id). + Remember you can pass concurrency settings to individual calls to + add_texts and add_documents as well. + + A note on passing astra_db_client and/or async_astra_db_client instead + of the credentials (token, api_endpoint): + - if you pass only the async client when creating the store, + the sync methods will error when called. + - conversely, if you pass only the sync client, the async methods will + still be available, but will be wrapping its sync counterpart + in a `run_in_executor` construct instead of using the native async. + """ + + @staticmethod + def _filter_to_metadata(filter_dict: Optional[Dict[str, Any]]) -> Dict[str, Any]: + if filter_dict is None: + return {} + else: + metadata_filter = {} + for k, v in filter_dict.items(): + if k and k[0] == "$": + if isinstance(v, list): + metadata_filter[k] = [ + AstraDBVectorStore._filter_to_metadata(f) for f in v + ] + else: + # assume each list item can be fed back to this function + metadata_filter[k] = AstraDBVectorStore._filter_to_metadata(v) # type: ignore[assignment] + else: + metadata_filter[f"metadata.{k}"] = v + + return metadata_filter + + def __init__( + self, + *, + embedding: Embeddings, + collection_name: str, + token: Optional[str] = None, + api_endpoint: Optional[str] = None, + astra_db_client: Optional[AstraDBClient] = None, + async_astra_db_client: Optional[AsyncAstraDBClient] = None, + namespace: Optional[str] = None, + metric: Optional[str] = None, + batch_size: Optional[int] = None, + bulk_insert_batch_concurrency: Optional[int] = None, + bulk_insert_overwrite_concurrency: Optional[int] = None, + bulk_delete_concurrency: Optional[int] = None, + pre_delete_collection: bool = False, + ) -> None: + """ + Create an AstraDBVectorStore vector store object. See class docstring for help. + """ + + # Conflicting-arg checks: + if astra_db_client is not None or async_astra_db_client is not None: + if token is not None or api_endpoint is not None: + raise ValueError( + "You cannot pass 'astra_db_client' or 'async_astra_db_client' to " + "AstraDBVectorStore if passing 'token' and 'api_endpoint'." + ) + + self.embedding = embedding + self.collection_name = collection_name + self.token = token + self.api_endpoint = api_endpoint + self.namespace = namespace + # Concurrency settings + self.batch_size: int = batch_size or DEFAULT_BATCH_SIZE + self.bulk_insert_batch_concurrency: int = ( + bulk_insert_batch_concurrency or DEFAULT_BULK_INSERT_BATCH_CONCURRENCY + ) + self.bulk_insert_overwrite_concurrency: int = ( + bulk_insert_overwrite_concurrency + or DEFAULT_BULK_INSERT_OVERWRITE_CONCURRENCY + ) + self.bulk_delete_concurrency: int = ( + bulk_delete_concurrency or DEFAULT_BULK_DELETE_CONCURRENCY + ) + # "vector-related" settings + self._embedding_dimension: Optional[int] = None + self.metric = metric + + self.astra_db = astra_db_client + self.async_astra_db = async_astra_db_client + self.collection = None + self.async_collection = None + + if token and api_endpoint: + self.astra_db = AstraDBClient( + token=cast(str, self.token), + api_endpoint=cast(str, self.api_endpoint), + namespace=self.namespace, + ) + self.async_astra_db = AsyncAstraDBClient( + token=cast(str, self.token), + api_endpoint=cast(str, self.api_endpoint), + namespace=self.namespace, + ) + + if self.astra_db is not None: + self.collection = AstraDBCollection( + collection_name=self.collection_name, + astra_db=self.astra_db, + ) + + self.async_setup_db_task: Optional[Task] = None + if self.async_astra_db is not None: + self.async_collection = AsyncAstraDBCollection( + collection_name=self.collection_name, + astra_db=self.async_astra_db, + ) + try: + asyncio.get_running_loop() + self.async_setup_db_task = asyncio.create_task( + self._setup_db(pre_delete_collection) + ) + except RuntimeError: + pass + + if self.async_setup_db_task is None: + if not pre_delete_collection: + self._provision_collection() + else: + self.clear() + + def _ensure_astra_db_client(self) -> None: + """ + If no error is raised, that means self.collection + is also not None (as per constructor flow). + """ + if not self.astra_db: + raise ValueError("Missing AstraDB client") + + async def _setup_db(self, pre_delete_collection: bool) -> None: + if pre_delete_collection: + # _setup_db is called from the constructor only, from a place + # where async_astra_db is not None for sure + await self.async_astra_db.delete_collection( # type: ignore[union-attr] + collection_name=self.collection_name, + ) + await self._aprovision_collection() + + async def _ensure_db_setup(self) -> None: + if self.async_setup_db_task: + await self.async_setup_db_task + + def _get_embedding_dimension(self) -> int: + if self._embedding_dimension is None: + self._embedding_dimension = len( + self.embedding.embed_query("This is a sample sentence.") + ) + return self._embedding_dimension + + def _provision_collection(self) -> None: + """ + Run the API invocation to create the collection on the backend. + + Internal-usage method, no object members are set, + other than working on the underlying actual storage. + """ + self._ensure_astra_db_client() + # self.astra_db is not None (by _ensure_astra_db_client) + self.astra_db.create_collection( # type: ignore[union-attr] + dimension=self._get_embedding_dimension(), + collection_name=self.collection_name, + metric=self.metric, + ) + + async def _aprovision_collection(self) -> None: + """ + Run the API invocation to create the collection on the backend. + + Internal-usage method, no object members are set, + other than working on the underlying actual storage. + """ + if not self.async_astra_db: + await run_in_executor(None, self._provision_collection) + else: + await self.async_astra_db.create_collection( + dimension=self._get_embedding_dimension(), + collection_name=self.collection_name, + metric=self.metric, + ) + + @property + def embeddings(self) -> Embeddings: + return self.embedding + + @staticmethod + def _dont_flip_the_cos_score(similarity0to1: float) -> float: + """Keep similarity from client unchanged ad it's in [0:1] already.""" + return similarity0to1 + + def _select_relevance_score_fn(self) -> Callable[[float], float]: + """ + The underlying API calls already returns a "score proper", + i.e. one in [0, 1] where higher means more *similar*, + so here the final score transformation is not reversing the interval: + """ + return self._dont_flip_the_cos_score + + def clear(self) -> None: + """Empty the collection of all its stored entries.""" + self._ensure_astra_db_client() + # self.collection is not None (by _ensure_astra_db_client) + self.collection.delete_many(filter={}) # type: ignore[union-attr] + + async def aclear(self) -> None: + """Empty the collection of all its stored entries.""" + await self._ensure_db_setup() + if not self.async_astra_db: + return await run_in_executor(None, self.clear) + else: + # async_collection not None if so is async_astra_db (constr. flow) + await self.async_collection.delete_many({}) # type: ignore[union-attr] + + def delete_by_document_id(self, document_id: str) -> bool: + """ + Remove a single document from the store, given its document_id (str). + Return True if a document has indeed been deleted, False if ID not found. + """ + self._ensure_astra_db_client() + # self.collection is not None (by _ensure_astra_db_client) + deletion_response = self.collection.delete_one(document_id) # type: ignore[union-attr] + return ((deletion_response or {}).get("status") or {}).get( + "deletedCount", 0 + ) == 1 + + async def adelete_by_document_id(self, document_id: str) -> bool: + """ + Remove a single document from the store, given its document_id (str). + Return True if a document has indeed been deleted, False if ID not found. + """ + await self._ensure_db_setup() + if not self.async_collection: + return await run_in_executor(None, self.delete_by_document_id, document_id) + deletion_response = await self.async_collection.delete_one(document_id) + return ((deletion_response or {}).get("status") or {}).get( + "deletedCount", 0 + ) == 1 + + def delete( + self, + ids: Optional[List[str]] = None, + concurrency: Optional[int] = None, + **kwargs: Any, + ) -> Optional[bool]: + """Delete by vector ids. + + Args: + ids (Optional[List[str]]): List of ids to delete. + concurrency (Optional[int]): max number of threads issuing + single-doc delete requests. Defaults to instance-level setting. + + Returns: + Optional[bool]: True if deletion is successful, + False otherwise, None if not implemented. + """ + + if kwargs: + warnings.warn( + "Method 'delete' of AstraDBVectorStore vector store invoked with " + f"unsupported arguments ({', '.join(sorted(kwargs.keys()))}), " + "which will be ignored." + ) + + if ids is None: + raise ValueError("No ids provided to delete.") + + _max_workers = concurrency or self.bulk_delete_concurrency + with ThreadPoolExecutor(max_workers=_max_workers) as tpe: + _ = list( + tpe.map( + self.delete_by_document_id, + ids, + ) + ) + return True + + async def adelete( + self, + ids: Optional[List[str]] = None, + concurrency: Optional[int] = None, + **kwargs: Any, + ) -> Optional[bool]: + """Delete by vector ID or other criteria. + + Args: + ids: List of ids to delete. + concurrency (Optional[int]): max number of concurrent delete queries. + Defaults to instance-level setting. + **kwargs: Other keyword arguments that subclasses might use. + + Returns: + Optional[bool]: True if deletion is successful, + False otherwise, None if not implemented. + """ + if kwargs: + warnings.warn( + "Method 'adelete' of AstraDBVectorStore invoked with " + f"unsupported arguments ({', '.join(sorted(kwargs.keys()))}), " + "which will be ignored." + ) + + if ids is None: + raise ValueError("No ids provided to delete.") + + return all( + await gather_with_concurrency( + concurrency, *[self.adelete_by_document_id(doc_id) for doc_id in ids] + ) + ) + + def delete_collection(self) -> None: + """ + Completely delete the collection from the database (as opposed + to 'clear()', which empties it only). + Stored data is lost and unrecoverable, resources are freed. + Use with caution. + """ + self._ensure_astra_db_client() + # self.astra_db is not None (by _ensure_astra_db_client) + self.astra_db.delete_collection( # type: ignore[union-attr] + collection_name=self.collection_name, + ) + + async def adelete_collection(self) -> None: + """ + Completely delete the collection from the database (as opposed + to 'clear()', which empties it only). + Stored data is lost and unrecoverable, resources are freed. + Use with caution. + """ + await self._ensure_db_setup() + if not self.async_astra_db: + return await run_in_executor(None, self.delete_collection) + else: + await self.async_astra_db.delete_collection( + collection_name=self.collection_name, + ) + + @staticmethod + def _get_documents_to_insert( + texts: Iterable[str], + embedding_vectors: List[List[float]], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + ) -> List[DocDict]: + if ids is None: + ids = [uuid.uuid4().hex for _ in texts] + if metadatas is None: + metadatas = [{} for _ in texts] + # + documents_to_insert = [ + { + "content": b_txt, + "_id": b_id, + "$vector": b_emb, + "metadata": b_md, + } + for b_txt, b_emb, b_id, b_md in zip( + texts, + embedding_vectors, + ids, + metadatas, + ) + ] + # make unique by id, keeping the last + uniqued_documents_to_insert = _unique_list( + documents_to_insert[::-1], + lambda document: document["_id"], + )[::-1] + return uniqued_documents_to_insert + + @staticmethod + def _get_missing_from_batch( + document_batch: List[DocDict], insert_result: Dict[str, Any] + ) -> Tuple[List[str], List[DocDict]]: + if "status" not in insert_result: + raise ValueError( + f"API Exception while running bulk insertion: {str(insert_result)}" + ) + batch_inserted = insert_result["status"]["insertedIds"] + # estimation of the preexisting documents that failed + missed_inserted_ids = {document["_id"] for document in document_batch} - set( + batch_inserted + ) + errors = insert_result.get("errors", []) + # careful for other sources of error other than "doc already exists" + num_errors = len(errors) + unexpected_errors = any( + error.get("errorCode") != "DOCUMENT_ALREADY_EXISTS" for error in errors + ) + if num_errors != len(missed_inserted_ids) or unexpected_errors: + raise ValueError( + f"API Exception while running bulk insertion: {str(errors)}" + ) + # deal with the missing insertions as upserts + missing_from_batch = [ + document + for document in document_batch + if document["_id"] in missed_inserted_ids + ] + return batch_inserted, missing_from_batch + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + *, + batch_size: Optional[int] = None, + batch_concurrency: Optional[int] = None, + overwrite_concurrency: Optional[int] = None, + **kwargs: Any, + ) -> List[str]: + """Run texts through the embeddings and add them to the vectorstore. + + If passing explicit ids, those entries whose id is in the store already + will be replaced. + + Args: + texts (Iterable[str]): Texts to add to the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadatas. + ids (Optional[List[str]], optional): Optional list of ids. + batch_size (Optional[int]): Number of documents in each API call. + Check the underlying Astra DB HTTP API specs for the max value + (20 at the time of writing this). If not provided, defaults + to the instance-level setting. + batch_concurrency (Optional[int]): number of threads to process + insertion batches concurrently. Defaults to instance-level + setting if not provided. + overwrite_concurrency (Optional[int]): number of threads to process + pre-existing documents in each batch (which require individual + API calls). Defaults to instance-level setting if not provided. + + A note on metadata: there are constraints on the allowed field names + in this dictionary, coming from the underlying Astra DB API. + For instance, the `$` (dollar sign) cannot be used in the dict keys. + See this document for details: + docs.datastax.com/en/astra-serverless/docs/develop/dev-with-json.html + + Returns: + List[str]: List of ids of the added texts. + """ + + if kwargs: + warnings.warn( + "Method 'add_texts' of AstraDBVectorStore vector store invoked with " + f"unsupported arguments ({', '.join(sorted(kwargs.keys()))}), " + "which will be ignored." + ) + self._ensure_astra_db_client() + + embedding_vectors = self.embedding.embed_documents(list(texts)) + documents_to_insert = self._get_documents_to_insert( + texts, embedding_vectors, metadatas, ids + ) + + def _handle_batch(document_batch: List[DocDict]) -> List[str]: + # self.collection is not None (by _ensure_astra_db_client) + im_result = self.collection.insert_many( # type: ignore[union-attr] + documents=document_batch, + options={"ordered": False}, + partial_failures_allowed=True, + ) + batch_inserted, missing_from_batch = self._get_missing_from_batch( + document_batch, im_result + ) + + def _handle_missing_document(missing_document: DocDict) -> str: + # self.collection is not None (by _ensure_astra_db_client) + replacement_result = self.collection.find_one_and_replace( # type: ignore[union-attr] + filter={"_id": missing_document["_id"]}, + replacement=missing_document, + ) + return replacement_result["data"]["document"]["_id"] + + _u_max_workers = ( + overwrite_concurrency or self.bulk_insert_overwrite_concurrency + ) + with ThreadPoolExecutor(max_workers=_u_max_workers) as tpe2: + batch_replaced = list( + tpe2.map( + _handle_missing_document, + missing_from_batch, + ) + ) + return batch_inserted + batch_replaced + + _b_max_workers = batch_concurrency or self.bulk_insert_batch_concurrency + with ThreadPoolExecutor(max_workers=_b_max_workers) as tpe: + all_ids_nested = tpe.map( + _handle_batch, + batch_iterate( + batch_size or self.batch_size, + documents_to_insert, + ), + ) + return [iid for id_list in all_ids_nested for iid in id_list] + + async def aadd_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + *, + batch_size: Optional[int] = None, + batch_concurrency: Optional[int] = None, + overwrite_concurrency: Optional[int] = None, + **kwargs: Any, + ) -> List[str]: + """Run texts through the embeddings and add them to the vectorstore. + + If passing explicit ids, those entries whose id is in the store already + will be replaced. + + Args: + texts (Iterable[str]): Texts to add to the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadatas. + ids (Optional[List[str]], optional): Optional list of ids. + batch_size (Optional[int]): Number of documents in each API call. + Check the underlying Astra DB HTTP API specs for the max value + (20 at the time of writing this). If not provided, defaults + to the instance-level setting. + batch_concurrency (Optional[int]): number of concurrent batch insertions. + Defaults to instance-level setting if not provided. + overwrite_concurrency (Optional[int]): number of concurrent API calls to + process pre-existing documents in each batch. + Defaults to instance-level setting if not provided. + + A note on metadata: there are constraints on the allowed field names + in this dictionary, coming from the underlying Astra DB API. + For instance, the `$` (dollar sign) cannot be used in the dict keys. + See this document for details: + docs.datastax.com/en/astra-serverless/docs/develop/dev-with-json.html + + Returns: + List[str]: List of ids of the added texts. + """ + await self._ensure_db_setup() + if not self.async_collection: + return await super().aadd_texts( + texts, + metadatas, + ids=ids, + batch_size=batch_size, + batch_concurrency=batch_concurrency, + overwrite_concurrency=overwrite_concurrency, + ) + else: + if kwargs: + warnings.warn( + "Method 'aadd_texts' of AstraDBVectorStore invoked with " + f"unsupported arguments ({', '.join(sorted(kwargs.keys()))}), " + "which will be ignored." + ) + + embedding_vectors = await self.embedding.aembed_documents(list(texts)) + documents_to_insert = self._get_documents_to_insert( + texts, embedding_vectors, metadatas, ids + ) + + async def _handle_batch(document_batch: List[DocDict]) -> List[str]: + # self.async_collection is not None here for sure + im_result = await self.async_collection.insert_many( # type: ignore[union-attr] + documents=document_batch, + options={"ordered": False}, + partial_failures_allowed=True, + ) + batch_inserted, missing_from_batch = self._get_missing_from_batch( + document_batch, im_result + ) + + async def _handle_missing_document(missing_document: DocDict) -> str: + # self.async_collection is not None here for sure + replacement_result = ( + await self.async_collection.find_one_and_replace( # type: ignore[union-attr] + filter={"_id": missing_document["_id"]}, + replacement=missing_document, + ) + ) + return replacement_result["data"]["document"]["_id"] + + _u_max_workers = ( + overwrite_concurrency or self.bulk_insert_overwrite_concurrency + ) + batch_replaced = await gather_with_concurrency( + _u_max_workers, + *[_handle_missing_document(doc) for doc in missing_from_batch], + ) + return batch_inserted + batch_replaced + + _b_max_workers = batch_concurrency or self.bulk_insert_batch_concurrency + all_ids_nested = await gather_with_concurrency( + _b_max_workers, + *[ + _handle_batch(batch) + for batch in batch_iterate( + batch_size or self.batch_size, + documents_to_insert, + ) + ], + ) + + return [iid for id_list in all_ids_nested for iid in id_list] + + def similarity_search_with_score_id_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float, str]]: + """Return docs most similar to embedding vector. + + Args: + embedding (str): Embedding to look up documents similar to. + k (int): Number of Documents to return. Defaults to 4. + Returns: + List of (Document, score, id), the most similar to the query vector. + """ + self._ensure_astra_db_client() + metadata_parameter = self._filter_to_metadata(filter) + # + hits = list( + # self.collection is not None (by _ensure_astra_db_client) + self.collection.paginated_find( # type: ignore[union-attr] + filter=metadata_parameter, + sort={"$vector": embedding}, + options={"limit": k, "includeSimilarity": True}, + projection={ + "_id": 1, + "content": 1, + "metadata": 1, + }, + ) + ) + # + return [ + ( + Document( + page_content=hit["content"], + metadata=hit["metadata"], + ), + hit["$similarity"], + hit["_id"], + ) + for hit in hits + ] + + async def asimilarity_search_with_score_id_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float, str]]: + """Return docs most similar to embedding vector. + + Args: + embedding (str): Embedding to look up documents similar to. + k (int): Number of Documents to return. Defaults to 4. + Returns: + List of (Document, score, id), the most similar to the query vector. + """ + await self._ensure_db_setup() + if not self.async_collection: + return await run_in_executor( + None, + self.similarity_search_with_score_id_by_vector, + embedding, + k, + filter, + ) + metadata_parameter = self._filter_to_metadata(filter) + # + return [ + ( + Document( + page_content=hit["content"], + metadata=hit["metadata"], + ), + hit["$similarity"], + hit["_id"], + ) + async for hit in self.async_collection.paginated_find( + filter=metadata_parameter, + sort={"$vector": embedding}, + options={"limit": k, "includeSimilarity": True}, + projection={ + "_id": 1, + "content": 1, + "metadata": 1, + }, + ) + ] + + def similarity_search_with_score_id( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float, str]]: + embedding_vector = self.embedding.embed_query(query) + return self.similarity_search_with_score_id_by_vector( + embedding=embedding_vector, + k=k, + filter=filter, + ) + + async def asimilarity_search_with_score_id( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float, str]]: + embedding_vector = await self.embedding.aembed_query(query) + return await self.asimilarity_search_with_score_id_by_vector( + embedding=embedding_vector, + k=k, + filter=filter, + ) + + def similarity_search_with_score_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float]]: + """Return docs most similar to embedding vector. + + Args: + embedding (str): Embedding to look up documents similar to. + k (int): Number of Documents to return. Defaults to 4. + Returns: + List of (Document, score), the most similar to the query vector. + """ + return [ + (doc, score) + for (doc, score, doc_id) in self.similarity_search_with_score_id_by_vector( + embedding=embedding, + k=k, + filter=filter, + ) + ] + + async def asimilarity_search_with_score_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float]]: + """Return docs most similar to embedding vector. + + Args: + embedding (str): Embedding to look up documents similar to. + k (int): Number of Documents to return. Defaults to 4. + Returns: + List of (Document, score), the most similar to the query vector. + """ + return [ + (doc, score) + for ( + doc, + score, + doc_id, + ) in await self.asimilarity_search_with_score_id_by_vector( + embedding=embedding, + k=k, + filter=filter, + ) + ] + + def similarity_search( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + embedding_vector = self.embedding.embed_query(query) + return self.similarity_search_by_vector( + embedding_vector, + k, + filter=filter, + ) + + async def asimilarity_search( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + embedding_vector = await self.embedding.aembed_query(query) + return await self.asimilarity_search_by_vector( + embedding_vector, + k, + filter=filter, + ) + + def similarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + return [ + doc + for doc, _ in self.similarity_search_with_score_by_vector( + embedding, + k, + filter=filter, + ) + ] + + async def asimilarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + return [ + doc + for doc, _ in await self.asimilarity_search_with_score_by_vector( + embedding, + k, + filter=filter, + ) + ] + + def similarity_search_with_score( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float]]: + embedding_vector = self.embedding.embed_query(query) + return self.similarity_search_with_score_by_vector( + embedding_vector, + k, + filter=filter, + ) + + async def asimilarity_search_with_score( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float]]: + embedding_vector = await self.embedding.aembed_query(query) + return await self.asimilarity_search_with_score_by_vector( + embedding_vector, + k, + filter=filter, + ) + + @staticmethod + def _get_mmr_hits( + embedding: List[float], k: int, lambda_mult: float, prefetch_hits: List[DocDict] + ) -> List[Document]: + mmr_chosen_indices = maximal_marginal_relevance( + np.array(embedding, dtype=np.float32), + [prefetch_hit["$vector"] for prefetch_hit in prefetch_hits], + k=k, + lambda_mult=lambda_mult, + ) + mmr_hits = [ + prefetch_hit + for prefetch_index, prefetch_hit in enumerate(prefetch_hits) + if prefetch_index in mmr_chosen_indices + ] + return [ + Document( + page_content=hit["content"], + metadata=hit["metadata"], + ) + for hit in mmr_hits + ] + + def max_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + lambda_mult: Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Returns: + List of Documents selected by maximal marginal relevance. + """ + self._ensure_astra_db_client() + metadata_parameter = self._filter_to_metadata(filter) + + prefetch_hits = list( + # self.collection is not None (by _ensure_astra_db_client) + self.collection.paginated_find( # type: ignore[union-attr] + filter=metadata_parameter, + sort={"$vector": embedding}, + options={"limit": fetch_k, "includeSimilarity": True}, + projection={ + "_id": 1, + "content": 1, + "metadata": 1, + "$vector": 1, + }, + ) + ) + + return self._get_mmr_hits(embedding, k, lambda_mult, prefetch_hits) + + async def amax_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + lambda_mult: Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Returns: + List of Documents selected by maximal marginal relevance. + """ + await self._ensure_db_setup() + if not self.async_collection: + return await run_in_executor( + None, + self.max_marginal_relevance_search_by_vector, + embedding, + k, + fetch_k, + lambda_mult, + filter, + **kwargs, + ) + metadata_parameter = self._filter_to_metadata(filter) + + prefetch_hits = [ + hit + async for hit in self.async_collection.paginated_find( + filter=metadata_parameter, + sort={"$vector": embedding}, + options={"limit": fetch_k, "includeSimilarity": True}, + projection={ + "_id": 1, + "content": 1, + "metadata": 1, + "$vector": 1, + }, + ) + ] + + return self._get_mmr_hits(embedding, k, lambda_mult, prefetch_hits) + + def max_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + Args: + query (str): Text to look up documents similar to. + k (int = 4): Number of Documents to return. + fetch_k (int = 20): Number of Documents to fetch to pass to MMR algorithm. + lambda_mult (float = 0.5): Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Optional. + Returns: + List of Documents selected by maximal marginal relevance. + """ + embedding_vector = self.embedding.embed_query(query) + return self.max_marginal_relevance_search_by_vector( + embedding_vector, + k, + fetch_k, + lambda_mult=lambda_mult, + filter=filter, + ) + + async def amax_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + Args: + query (str): Text to look up documents similar to. + k (int = 4): Number of Documents to return. + fetch_k (int = 20): Number of Documents to fetch to pass to MMR algorithm. + lambda_mult (float = 0.5): Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Optional. + Returns: + List of Documents selected by maximal marginal relevance. + """ + embedding_vector = await self.embedding.aembed_query(query) + return await self.amax_marginal_relevance_search_by_vector( + embedding_vector, + k, + fetch_k, + lambda_mult=lambda_mult, + filter=filter, + ) + + @classmethod + def _from_kwargs( + cls: Type[AstraDBVectorStore], + embedding: Embeddings, + **kwargs: Any, + ) -> AstraDBVectorStore: + known_kwargs = { + "collection_name", + "token", + "api_endpoint", + "astra_db_client", + "async_astra_db_client", + "namespace", + "metric", + "batch_size", + "bulk_insert_batch_concurrency", + "bulk_insert_overwrite_concurrency", + "bulk_delete_concurrency", + "batch_concurrency", + "overwrite_concurrency", + } + if kwargs: + unknown_kwargs = set(kwargs.keys()) - known_kwargs + if unknown_kwargs: + warnings.warn( + "Method 'from_texts' of AstraDBVectorStore vector store " + "invoked with unsupported arguments " + f"({', '.join(sorted(unknown_kwargs))}), " + "which will be ignored." + ) + + collection_name: str = kwargs["collection_name"] + token = kwargs.get("token") + api_endpoint = kwargs.get("api_endpoint") + astra_db_client = kwargs.get("astra_db_client") + async_astra_db_client = kwargs.get("async_astra_db_client") + namespace = kwargs.get("namespace") + metric = kwargs.get("metric") + + return cls( + embedding=embedding, + collection_name=collection_name, + token=token, + api_endpoint=api_endpoint, + astra_db_client=astra_db_client, + async_astra_db_client=async_astra_db_client, + namespace=namespace, + metric=metric, + batch_size=kwargs.get("batch_size"), + bulk_insert_batch_concurrency=kwargs.get("bulk_insert_batch_concurrency"), + bulk_insert_overwrite_concurrency=kwargs.get( + "bulk_insert_overwrite_concurrency" + ), + bulk_delete_concurrency=kwargs.get("bulk_delete_concurrency"), + ) + + @classmethod + def from_texts( + cls: Type[AstraDBVectorStore], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> AstraDBVectorStore: + """Create an Astra DB vectorstore from raw texts. + + Args: + texts (List[str]): the texts to insert. + embedding (Embeddings): the embedding function to use in the store. + metadatas (Optional[List[dict]]): metadata dicts for the texts. + ids (Optional[List[str]]): ids to associate to the texts. + *Additional arguments*: you can pass any argument that you would + to 'add_texts' and/or to the 'AstraDBVectorStore' constructor + (see these methods for details). These arguments will be + routed to the respective methods as they are. + + Returns: + an `AstraDBVectorStore` vectorstore. + """ + astra_db_store = AstraDBVectorStore._from_kwargs(embedding, **kwargs) + astra_db_store.add_texts( + texts=texts, + metadatas=metadatas, + ids=ids, + batch_size=kwargs.get("batch_size"), + batch_concurrency=kwargs.get("batch_concurrency"), + overwrite_concurrency=kwargs.get("overwrite_concurrency"), + ) + return astra_db_store + + @classmethod + async def afrom_texts( + cls: Type[AstraDBVectorStore], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> AstraDBVectorStore: + """Create an Astra DB vectorstore from raw texts. + + Args: + texts (List[str]): the texts to insert. + embedding (Embeddings): the embedding function to use in the store. + metadatas (Optional[List[dict]]): metadata dicts for the texts. + ids (Optional[List[str]]): ids to associate to the texts. + *Additional arguments*: you can pass any argument that you would + to 'add_texts' and/or to the 'AstraDBVectorStore' constructor + (see these methods for details). These arguments will be + routed to the respective methods as they are. + + Returns: + an `AstraDBVectorStore` vectorstore. + """ + astra_db_store = AstraDBVectorStore._from_kwargs(embedding, **kwargs) + await astra_db_store.aadd_texts( + texts=texts, + metadatas=metadatas, + ids=ids, + batch_size=kwargs.get("batch_size"), + batch_concurrency=kwargs.get("batch_concurrency"), + overwrite_concurrency=kwargs.get("overwrite_concurrency"), + ) + return astra_db_store + + @classmethod + def from_documents( + cls: Type[AstraDBVectorStore], + documents: List[Document], + embedding: Embeddings, + **kwargs: Any, + ) -> AstraDBVectorStore: + """Create an Astra DB vectorstore from a document list. + + Utility method that defers to 'from_texts' (see that one). + + Args: see 'from_texts', except here you have to supply 'documents' + in place of 'texts' and 'metadatas'. + + Returns: + an `AstraDBVectorStore` vectorstore. + """ + return super().from_documents(documents, embedding, **kwargs) diff --git a/libs/partners/astradb/poetry.lock b/libs/partners/astradb/poetry.lock new file mode 100644 index 0000000000000..78b4da4d0b674 --- /dev/null +++ b/libs/partners/astradb/poetry.lock @@ -0,0 +1,1074 @@ +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. + +[[package]] +name = "annotated-types" +version = "0.6.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +files = [ + {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"}, + {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} + +[[package]] +name = "anyio" +version = "4.2.0" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.8" +files = [ + {file = "anyio-4.2.0-py3-none-any.whl", hash = "sha256:745843b39e829e108e518c489b31dc757de7d2131d53fac32bd8df268227bfee"}, + {file = "anyio-4.2.0.tar.gz", hash = "sha256:e1875bb4b4e2de1669f4bc7869b6d3f54231cdced71605e6e64c9be77e3be50f"}, +] + +[package.dependencies] +exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} +idna = ">=2.8" +sniffio = ">=1.1" +typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} + +[package.extras] +doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +trio = ["trio (>=0.23)"] + +[[package]] +name = "astrapy" +version = "0.7.5" +description = "AstraPy is a Pythonic SDK for DataStax Astra" +optional = false +python-versions = ">=3.8.0,<4.0.0" +files = [ + {file = "astrapy-0.7.5-py3-none-any.whl", hash = "sha256:51daabbc59ed56f023233296e42372cd2a7468282f978c36ccc33a2e211beddc"}, + {file = "astrapy-0.7.5.tar.gz", hash = "sha256:72a31538c5fd06cbf91a235924bee81007d03b8c0feff1d7cf811e64a2adc7a8"}, +] + +[package.dependencies] +cassio = ">=0.1.4,<0.2.0" +deprecation = ">=2.1.0,<2.2.0" +httpx = {version = ">=0.26.0,<0.27.0", extras = ["http2"]} +toml = ">=0.10.2,<0.11.0" + +[[package]] +name = "cassandra-driver" +version = "3.29.0" +description = "DataStax Driver for Apache Cassandra" +optional = false +python-versions = "*" +files = [ + {file = "cassandra-driver-3.29.0.tar.gz", hash = "sha256:0a34f9534356e5fd33af8cdda109d5e945b6335cb50399b267c46368c4e93c98"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:28d6fe5379d55e4fc96785bd2e2cba029ef171cc43fb38fc507b9ba232917ac2"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:05e267412ccc9fe71ee4a81d98f2250df2429390fac4721f41dd17b65e4c41ac"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84eacfc8e6461590eb1c2b9651ea809be298eb8283c2d844a6dad8058ee7928c"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8feeda01bb13dce1a74b0a94172b3b06b0d9d8f33d6fb56e1910d495b0e085e5"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb0ef3297255bbade7b0c2d168c31d36ec904b1a9b42521d1d3d65c3148bbc7"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-win32.whl", hash = "sha256:39d78971a4e26ef65b77caa09c0e6ccfd7b2c52b0924c328fbfdca91667eb08e"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-win_amd64.whl", hash = "sha256:9dd713fe6388f3ba141cc2eef4737b5e4a27b0d1c1a6b0372b8ff3d2d35ccf79"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:76333d38cb663065d53ca658e15185b23aa0ce434f2876c455624d90d2ee0dbf"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:81ce92e0420bf18742b4bc433052c7c2e4aa72fa84898be2b26083e240ace343"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5b90c2f052a102560e4fcf860f6d1ac35d3514ad36b1978cf821998f1e689f38"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fecf584a7f411d247d1925c66d527f7ecc73710b230b68cdacf2044fb57ae4b"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a678bc7107cc606ac8ff8cb58fe6abb0bb2a9ff5196016b3bd36926146c4dc62"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-win32.whl", hash = "sha256:e9badede26005fd993e2344e8a541a4133bc46a76a90969d57a90a028b2b8ca6"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-win_amd64.whl", hash = "sha256:cac6d2e6ad1a386f1b786de78102f918bcd5caac390c3e446558e5adee9464c6"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:01a8b4cdb056c209c5c4aa22e0d7f427b87cb98297a6efff29ea278da9a52698"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:73aa7b32dfad1f58fb00167052ab80b1b186b69baac7de4ad5cca785fff569be"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7f7c446edba002b0fdd94f2b92c4752e16738ea7dce27d754103fcd086b4dcc9"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6843569360fb4a446d65f6733faed1207c252745a31a1d8dc02feff8f7f86a23"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1762d228bdd3f1bc5faa0812e1fcac75a36ab7504f3cfb7e9b5d2bf26a50c552"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-win32.whl", hash = "sha256:dd245817e0df048b780f45ac78b1840fe12deb5aea8873df4a11e0c44a68c19a"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-win_amd64.whl", hash = "sha256:002134a4152034ed66d9f9616ea391f44dfdf7c9f97d22bd4d4f64d70020b91b"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d9b652db99f69ee62bbd679a29cfbab398ebf2bfc195632d57ecb3f246baf48b"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6ac82ae8240b4f4f1a3d1e6f21a4ecd9948afdfedef6f23235fac85d20d11076"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1590d231503594546dfdbf6119d805e1a0b22de98a1a6ec0de79a1bacc59ecb5"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcf9dee3b120062a8224278da56ab088c2c081a79dc9e017f065dccd421b6477"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb9a123ad86152d2a1ca31f4a3d91d72cbd3ed7a88a4c3cd5f6f72173a1bfbd8"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-win32.whl", hash = "sha256:cc6794ca9c94e157570e2b7b5a04458259ee29c5a0d0de50a9e0c8e2da8f5455"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-win_amd64.whl", hash = "sha256:096eef84ab466b090a69a4e9d85e65d57e926ff7d7897443e7b637d40277f373"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:befb723d62ee650cb3afd9668245ee9ce6ba5394dbd58352866ff2baa0324101"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4108fb2a64a8fd77948003ff0ca4d296364d9ff7381f4abe7a9db202e6378446"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5cd4701cc083e047553888dbd99d2d5119b5b3da54b9e8034a80b8c8d516142c"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7b94c5273bf3c2f252aed8624303c46d9d4e6dc7663f53ed9c9335e5d0dcb88"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3609f2eda8ee2a6a9b2c9c84c009bf54a7695b9dfc21700b88dd0a2140c82c95"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-win32.whl", hash = "sha256:aaeff4c3af3100510e329177c46da89aab6d444070f4fa370df5328b8ad488b4"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-win_amd64.whl", hash = "sha256:88d9a6abd0e0af199636ff9386d0b9b81b1dd189e22c8498ecaa546256bacf24"}, +] + +[package.dependencies] +geomet = ">=0.1,<0.3" + +[package.extras] +cle = ["cryptography (>=35.0)"] +graph = ["gremlinpython (==3.4.6)"] + +[[package]] +name = "cassio" +version = "0.1.4" +description = "A framework-agnostic Python library to seamlessly integrate Apache Cassandra(R) with ML/LLM/genAI workloads." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cassio-0.1.4-py3-none-any.whl", hash = "sha256:ab997879c36807ff5b9771ff35941f104c0f0e60e1595118279869b5b95c146f"}, + {file = "cassio-0.1.4.tar.gz", hash = "sha256:df495c459ee5e9194e4780ac3ea1aaf79a4443e6d06f0eeb67aac6e3cd8c47aa"}, +] + +[package.dependencies] +cassandra-driver = ">=3.28.0" +numpy = ">=1.0" +requests = ">=2" + +[[package]] +name = "certifi" +version = "2024.2.2" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, + {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "codespell" +version = "2.2.6" +description = "Codespell" +optional = false +python-versions = ">=3.8" +files = [ + {file = "codespell-2.2.6-py3-none-any.whl", hash = "sha256:9ee9a3e5df0990604013ac2a9f22fa8e57669c827124a2e961fe8a1da4cacc07"}, + {file = "codespell-2.2.6.tar.gz", hash = "sha256:a8c65d8eb3faa03deabab6b3bbe798bea72e1799c7e9e955d57eca4096abcff9"}, +] + +[package.extras] +dev = ["Pygments", "build", "chardet", "pre-commit", "pytest", "pytest-cov", "pytest-dependency", "ruff", "tomli", "twine"] +hard-encoding-detection = ["chardet"] +toml = ["tomli"] +types = ["chardet (>=5.1.0)", "mypy", "pytest", "pytest-cov", "pytest-dependency"] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "deprecation" +version = "2.1.0" +description = "A library to handle automated deprecations" +optional = false +python-versions = "*" +files = [ + {file = "deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a"}, + {file = "deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff"}, +] + +[package.dependencies] +packaging = "*" + +[[package]] +name = "exceptiongroup" +version = "1.2.0" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, + {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "freezegun" +version = "1.4.0" +description = "Let your Python tests travel through time" +optional = false +python-versions = ">=3.7" +files = [ + {file = "freezegun-1.4.0-py3-none-any.whl", hash = "sha256:55e0fc3c84ebf0a96a5aa23ff8b53d70246479e9a68863f1fcac5a3e52f19dd6"}, + {file = "freezegun-1.4.0.tar.gz", hash = "sha256:10939b0ba0ff5adaecf3b06a5c2f73071d9678e507c5eaedb23c761d56ac774b"}, +] + +[package.dependencies] +python-dateutil = ">=2.7" + +[[package]] +name = "geomet" +version = "0.2.1.post1" +description = "GeoJSON <-> WKT/WKB conversion utilities" +optional = false +python-versions = ">2.6, !=3.3.*, <4" +files = [ + {file = "geomet-0.2.1.post1-py3-none-any.whl", hash = "sha256:a41a1e336b381416d6cbed7f1745c848e91defaa4d4c1bdc1312732e46ffad2b"}, + {file = "geomet-0.2.1.post1.tar.gz", hash = "sha256:91d754f7c298cbfcabd3befdb69c641c27fe75e808b27aa55028605761d17e95"}, +] + +[package.dependencies] +click = "*" +six = "*" + +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "h2" +version = "4.1.0" +description = "HTTP/2 State-Machine based protocol implementation" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] + +[package.dependencies] +hpack = ">=4.0,<5" +hyperframe = ">=6.0,<7" + +[[package]] +name = "hpack" +version = "4.0.0" +description = "Pure-Python HPACK header compression" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] + +[[package]] +name = "httpcore" +version = "1.0.3" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpcore-1.0.3-py3-none-any.whl", hash = "sha256:9a6a501c3099307d9fd76ac244e08503427679b1e81ceb1d922485e2f2462ad2"}, + {file = "httpcore-1.0.3.tar.gz", hash = "sha256:5c0f9546ad17dac4d0772b0808856eb616eb8b48ce94f49ed819fd6982a8a544"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.13,<0.15" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<0.24.0)"] + +[[package]] +name = "httpx" +version = "0.26.0" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpx-0.26.0-py3-none-any.whl", hash = "sha256:8915f5a3627c4d47b73e8202457cb28f1266982d1159bd5779d86a80c0eab1cd"}, + {file = "httpx-0.26.0.tar.gz", hash = "sha256:451b55c30d5185ea6b23c2c793abf9bb237d2a7dfb901ced6ff69ad37ec1dfaf"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""} +httpcore = "==1.*" +idna = "*" +sniffio = "*" + +[package.extras] +brotli = ["brotli", "brotlicffi"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] + +[[package]] +name = "hyperframe" +version = "6.0.1" +description = "HTTP/2 framing layer for Python" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] + +[[package]] +name = "idna" +version = "3.6" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, + {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "jsonpatch" +version = "1.33" +description = "Apply JSON-Patches (RFC 6902)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, + {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, +] + +[package.dependencies] +jsonpointer = ">=1.9" + +[[package]] +name = "jsonpointer" +version = "2.4" +description = "Identify specific nodes in a JSON document (RFC 6901)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, +] + +[[package]] +name = "langchain-core" +version = "0.1.23" +description = "Building applications with LLMs through composability" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [] +develop = true + +[package.dependencies] +anyio = ">=3,<5" +jsonpatch = "^1.33" +langsmith = "^0.0.87" +packaging = "^23.2" +pydantic = ">=1,<3" +PyYAML = ">=5.3" +requests = "^2" +tenacity = "^8.1.0" + +[package.extras] +extended-testing = ["jinja2 (>=3,<4)"] + +[package.source] +type = "directory" +url = "../../core" + +[[package]] +name = "langsmith" +version = "0.0.87" +description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "langsmith-0.0.87-py3-none-any.whl", hash = "sha256:8903d3811b9fc89eb18f5961c8e6935fbd2d0f119884fbf30dc70b8f8f4121fc"}, + {file = "langsmith-0.0.87.tar.gz", hash = "sha256:36c4cc47e5b54be57d038036a30fb19ce6e4c73048cd7a464b8f25b459694d34"}, +] + +[package.dependencies] +pydantic = ">=1,<3" +requests = ">=2,<3" + +[[package]] +name = "mypy" +version = "0.991" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mypy-0.991-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7d17e0a9707d0772f4a7b878f04b4fd11f6f5bcb9b3813975a9b13c9332153ab"}, + {file = "mypy-0.991-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0714258640194d75677e86c786e80ccf294972cc76885d3ebbb560f11db0003d"}, + {file = "mypy-0.991-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0c8f3be99e8a8bd403caa8c03be619544bc2c77a7093685dcf308c6b109426c6"}, + {file = "mypy-0.991-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9ec663ed6c8f15f4ae9d3c04c989b744436c16d26580eaa760ae9dd5d662eb"}, + {file = "mypy-0.991-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4307270436fd7694b41f913eb09210faff27ea4979ecbcd849e57d2da2f65305"}, + {file = "mypy-0.991-cp310-cp310-win_amd64.whl", hash = "sha256:901c2c269c616e6cb0998b33d4adbb4a6af0ac4ce5cd078afd7bc95830e62c1c"}, + {file = "mypy-0.991-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d13674f3fb73805ba0c45eb6c0c3053d218aa1f7abead6e446d474529aafc372"}, + {file = "mypy-0.991-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1c8cd4fb70e8584ca1ed5805cbc7c017a3d1a29fb450621089ffed3e99d1857f"}, + {file = "mypy-0.991-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:209ee89fbb0deed518605edddd234af80506aec932ad28d73c08f1400ef80a33"}, + {file = "mypy-0.991-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37bd02ebf9d10e05b00d71302d2c2e6ca333e6c2a8584a98c00e038db8121f05"}, + {file = "mypy-0.991-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:26efb2fcc6b67e4d5a55561f39176821d2adf88f2745ddc72751b7890f3194ad"}, + {file = "mypy-0.991-cp311-cp311-win_amd64.whl", hash = "sha256:3a700330b567114b673cf8ee7388e949f843b356a73b5ab22dd7cff4742a5297"}, + {file = "mypy-0.991-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f7d1a520373e2272b10796c3ff721ea1a0712288cafaa95931e66aa15798813"}, + {file = "mypy-0.991-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:641411733b127c3e0dab94c45af15fea99e4468f99ac88b39efb1ad677da5711"}, + {file = "mypy-0.991-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3d80e36b7d7a9259b740be6d8d906221789b0d836201af4234093cae89ced0cd"}, + {file = "mypy-0.991-cp37-cp37m-win_amd64.whl", hash = "sha256:e62ebaad93be3ad1a828a11e90f0e76f15449371ffeecca4a0a0b9adc99abcef"}, + {file = "mypy-0.991-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b86ce2c1866a748c0f6faca5232059f881cda6dda2a893b9a8373353cfe3715a"}, + {file = "mypy-0.991-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac6e503823143464538efda0e8e356d871557ef60ccd38f8824a4257acc18d93"}, + {file = "mypy-0.991-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0cca5adf694af539aeaa6ac633a7afe9bbd760df9d31be55ab780b77ab5ae8bf"}, + {file = "mypy-0.991-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12c56bf73cdab116df96e4ff39610b92a348cc99a1307e1da3c3768bbb5b135"}, + {file = "mypy-0.991-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:652b651d42f155033a1967739788c436491b577b6a44e4c39fb340d0ee7f0d70"}, + {file = "mypy-0.991-cp38-cp38-win_amd64.whl", hash = "sha256:4175593dc25d9da12f7de8de873a33f9b2b8bdb4e827a7cae952e5b1a342e243"}, + {file = "mypy-0.991-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:98e781cd35c0acf33eb0295e8b9c55cdbef64fcb35f6d3aa2186f289bed6e80d"}, + {file = "mypy-0.991-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6d7464bac72a85cb3491c7e92b5b62f3dcccb8af26826257760a552a5e244aa5"}, + {file = "mypy-0.991-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c9166b3f81a10cdf9b49f2d594b21b31adadb3d5e9db9b834866c3258b695be3"}, + {file = "mypy-0.991-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8472f736a5bfb159a5e36740847808f6f5b659960115ff29c7cecec1741c648"}, + {file = "mypy-0.991-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e80e758243b97b618cdf22004beb09e8a2de1af481382e4d84bc52152d1c476"}, + {file = "mypy-0.991-cp39-cp39-win_amd64.whl", hash = "sha256:74e259b5c19f70d35fcc1ad3d56499065c601dfe94ff67ae48b85596b9ec1461"}, + {file = "mypy-0.991-py3-none-any.whl", hash = "sha256:de32edc9b0a7e67c2775e574cb061a537660e51210fbf6006b0b36ea695ae9bb"}, + {file = "mypy-0.991.tar.gz", hash = "sha256:3c0165ba8f354a6d9881809ef29f1a9318a236a6d81c690094c5df32107bde06"}, +] + +[package.dependencies] +mypy-extensions = ">=0.4.3" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=3.10" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +python2 = ["typed-ast (>=1.4.0,<2)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "numpy" +version = "1.24.4" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, + {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"}, + {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"}, + {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"}, + {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"}, + {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"}, + {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"}, + {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"}, + {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"}, + {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, + {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, +] + +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + +[[package]] +name = "pluggy" +version = "1.4.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, + {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pydantic" +version = "2.6.1" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic-2.6.1-py3-none-any.whl", hash = "sha256:0b6a909df3192245cb736509a92ff69e4fef76116feffec68e93a567347bae6f"}, + {file = "pydantic-2.6.1.tar.gz", hash = "sha256:4fd5c182a2488dc63e6d32737ff19937888001e2a6d86e94b3f233104a5d1fa9"}, +] + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.16.2" +typing-extensions = ">=4.6.1" + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.16.2" +description = "" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic_core-2.16.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3fab4e75b8c525a4776e7630b9ee48aea50107fea6ca9f593c98da3f4d11bf7c"}, + {file = "pydantic_core-2.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8bde5b48c65b8e807409e6f20baee5d2cd880e0fad00b1a811ebc43e39a00ab2"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2924b89b16420712e9bb8192396026a8fbd6d8726224f918353ac19c4c043d2a"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16aa02e7a0f539098e215fc193c8926c897175d64c7926d00a36188917717a05"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:936a787f83db1f2115ee829dd615c4f684ee48ac4de5779ab4300994d8af325b"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:459d6be6134ce3b38e0ef76f8a672924460c455d45f1ad8fdade36796df1ddc8"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9ee4febb249c591d07b2d4dd36ebcad0ccd128962aaa1801508320896575ef"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40a0bd0bed96dae5712dab2aba7d334a6c67cbcac2ddfca7dbcc4a8176445990"}, + {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:870dbfa94de9b8866b37b867a2cb37a60c401d9deb4a9ea392abf11a1f98037b"}, + {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:308974fdf98046db28440eb3377abba274808bf66262e042c412eb2adf852731"}, + {file = "pydantic_core-2.16.2-cp310-none-win32.whl", hash = "sha256:a477932664d9611d7a0816cc3c0eb1f8856f8a42435488280dfbf4395e141485"}, + {file = "pydantic_core-2.16.2-cp310-none-win_amd64.whl", hash = "sha256:8f9142a6ed83d90c94a3efd7af8873bf7cefed2d3d44387bf848888482e2d25f"}, + {file = "pydantic_core-2.16.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:406fac1d09edc613020ce9cf3f2ccf1a1b2f57ab00552b4c18e3d5276c67eb11"}, + {file = "pydantic_core-2.16.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce232a6170dd6532096cadbf6185271e4e8c70fc9217ebe105923ac105da9978"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a90fec23b4b05a09ad988e7a4f4e081711a90eb2a55b9c984d8b74597599180f"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8aafeedb6597a163a9c9727d8a8bd363a93277701b7bfd2749fbefee2396469e"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9957433c3a1b67bdd4c63717eaf174ebb749510d5ea612cd4e83f2d9142f3fc8"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0d7a9165167269758145756db43a133608a531b1e5bb6a626b9ee24bc38a8f7"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dffaf740fe2e147fedcb6b561353a16243e654f7fe8e701b1b9db148242e1272"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8ed79883b4328b7f0bd142733d99c8e6b22703e908ec63d930b06be3a0e7113"}, + {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cf903310a34e14651c9de056fcc12ce090560864d5a2bb0174b971685684e1d8"}, + {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:46b0d5520dbcafea9a8645a8164658777686c5c524d381d983317d29687cce97"}, + {file = "pydantic_core-2.16.2-cp311-none-win32.whl", hash = "sha256:70651ff6e663428cea902dac297066d5c6e5423fda345a4ca62430575364d62b"}, + {file = "pydantic_core-2.16.2-cp311-none-win_amd64.whl", hash = "sha256:98dc6f4f2095fc7ad277782a7c2c88296badcad92316b5a6e530930b1d475ebc"}, + {file = "pydantic_core-2.16.2-cp311-none-win_arm64.whl", hash = "sha256:ef6113cd31411eaf9b39fc5a8848e71c72656fd418882488598758b2c8c6dfa0"}, + {file = "pydantic_core-2.16.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:88646cae28eb1dd5cd1e09605680c2b043b64d7481cdad7f5003ebef401a3039"}, + {file = "pydantic_core-2.16.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7b883af50eaa6bb3299780651e5be921e88050ccf00e3e583b1e92020333304b"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bf26c2e2ea59d32807081ad51968133af3025c4ba5753e6a794683d2c91bf6e"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99af961d72ac731aae2a1b55ccbdae0733d816f8bfb97b41909e143de735f522"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02906e7306cb8c5901a1feb61f9ab5e5c690dbbeaa04d84c1b9ae2a01ebe9379"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5362d099c244a2d2f9659fb3c9db7c735f0004765bbe06b99be69fbd87c3f15"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ac426704840877a285d03a445e162eb258924f014e2f074e209d9b4ff7bf380"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b94cbda27267423411c928208e89adddf2ea5dd5f74b9528513f0358bba019cb"}, + {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6db58c22ac6c81aeac33912fb1af0e930bc9774166cdd56eade913d5f2fff35e"}, + {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396fdf88b1b503c9c59c84a08b6833ec0c3b5ad1a83230252a9e17b7dfb4cffc"}, + {file = "pydantic_core-2.16.2-cp312-none-win32.whl", hash = "sha256:7c31669e0c8cc68400ef0c730c3a1e11317ba76b892deeefaf52dcb41d56ed5d"}, + {file = "pydantic_core-2.16.2-cp312-none-win_amd64.whl", hash = "sha256:a3b7352b48fbc8b446b75f3069124e87f599d25afb8baa96a550256c031bb890"}, + {file = "pydantic_core-2.16.2-cp312-none-win_arm64.whl", hash = "sha256:a9e523474998fb33f7c1a4d55f5504c908d57add624599e095c20fa575b8d943"}, + {file = "pydantic_core-2.16.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:ae34418b6b389d601b31153b84dce480351a352e0bb763684a1b993d6be30f17"}, + {file = "pydantic_core-2.16.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:732bd062c9e5d9582a30e8751461c1917dd1ccbdd6cafb032f02c86b20d2e7ec"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b52776a2e3230f4854907a1e0946eec04d41b1fc64069ee774876bbe0eab55"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ef551c053692b1e39e3f7950ce2296536728871110e7d75c4e7753fb30ca87f4"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ebb892ed8599b23fa8f1799e13a12c87a97a6c9d0f497525ce9858564c4575a4"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa6c8c582036275997a733427b88031a32ffa5dfc3124dc25a730658c47a572f"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ba0884a91f1aecce75202473ab138724aa4fb26d7707f2e1fa6c3e68c84fbf"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7924e54f7ce5d253d6160090ddc6df25ed2feea25bfb3339b424a9dd591688bc"}, + {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69a7b96b59322a81c2203be537957313b07dd333105b73db0b69212c7d867b4b"}, + {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7e6231aa5bdacda78e96ad7b07d0c312f34ba35d717115f4b4bff6cb87224f0f"}, + {file = "pydantic_core-2.16.2-cp38-none-win32.whl", hash = "sha256:41dac3b9fce187a25c6253ec79a3f9e2a7e761eb08690e90415069ea4a68ff7a"}, + {file = "pydantic_core-2.16.2-cp38-none-win_amd64.whl", hash = "sha256:f685dbc1fdadb1dcd5b5e51e0a378d4685a891b2ddaf8e2bba89bd3a7144e44a"}, + {file = "pydantic_core-2.16.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:55749f745ebf154c0d63d46c8c58594d8894b161928aa41adbb0709c1fe78b77"}, + {file = "pydantic_core-2.16.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b30b0dd58a4509c3bd7eefddf6338565c4905406aee0c6e4a5293841411a1286"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18de31781cdc7e7b28678df7c2d7882f9692ad060bc6ee3c94eb15a5d733f8f7"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5864b0242f74b9dd0b78fd39db1768bc3f00d1ffc14e596fd3e3f2ce43436a33"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8f9186ca45aee030dc8234118b9c0784ad91a0bb27fc4e7d9d6608a5e3d386c"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc6f6c9be0ab6da37bc77c2dda5f14b1d532d5dbef00311ee6e13357a418e646"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa057095f621dad24a1e906747179a69780ef45cc8f69e97463692adbcdae878"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ad84731a26bcfb299f9eab56c7932d46f9cad51c52768cace09e92a19e4cf55"}, + {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3b052c753c4babf2d1edc034c97851f867c87d6f3ea63a12e2700f159f5c41c3"}, + {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e0f686549e32ccdb02ae6f25eee40cc33900910085de6aa3790effd391ae10c2"}, + {file = "pydantic_core-2.16.2-cp39-none-win32.whl", hash = "sha256:7afb844041e707ac9ad9acad2188a90bffce2c770e6dc2318be0c9916aef1469"}, + {file = "pydantic_core-2.16.2-cp39-none-win_amd64.whl", hash = "sha256:9da90d393a8227d717c19f5397688a38635afec89f2e2d7af0df037f3249c39a"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f60f920691a620b03082692c378661947d09415743e437a7478c309eb0e4f82"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:47924039e785a04d4a4fa49455e51b4eb3422d6eaacfde9fc9abf8fdef164e8a"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6294e76b0380bb7a61eb8a39273c40b20beb35e8c87ee101062834ced19c545"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe56851c3f1d6f5384b3051c536cc81b3a93a73faf931f404fef95217cf1e10d"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9d776d30cde7e541b8180103c3f294ef7c1862fd45d81738d156d00551005784"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:72f7919af5de5ecfaf1eba47bf9a5d8aa089a3340277276e5636d16ee97614d7"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:4bfcbde6e06c56b30668a0c872d75a7ef3025dc3c1823a13cf29a0e9b33f67e8"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ff7c97eb7a29aba230389a2661edf2e9e06ce616c7e35aa764879b6894a44b25"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9b5f13857da99325dcabe1cc4e9e6a3d7b2e2c726248ba5dd4be3e8e4a0b6d0e"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a7e41e3ada4cca5f22b478c08e973c930e5e6c7ba3588fb8e35f2398cdcc1545"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60eb8ceaa40a41540b9acae6ae7c1f0a67d233c40dc4359c256ad2ad85bdf5e5"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7beec26729d496a12fd23cf8da9944ee338c8b8a17035a560b585c36fe81af20"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:22c5f022799f3cd6741e24f0443ead92ef42be93ffda0d29b2597208c94c3753"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:eca58e319f4fd6df004762419612122b2c7e7d95ffafc37e890252f869f3fb2a"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ed957db4c33bc99895f3a1672eca7e80e8cda8bd1e29a80536b4ec2153fa9804"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:459c0d338cc55d099798618f714b21b7ece17eb1a87879f2da20a3ff4c7628e2"}, + {file = "pydantic_core-2.16.2.tar.gz", hash = "sha256:0ba503850d8b8dcc18391f10de896ae51d37fe5fe43dbfb6a35c5c5cad271a06"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + +[[package]] +name = "pytest" +version = "7.4.4" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.21.1" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-asyncio-0.21.1.tar.gz", hash = "sha256:40a7eae6dded22c7b604986855ea48400ab15b069ae38116e8c01238e9eeb64d"}, + {file = "pytest_asyncio-0.21.1-py3-none-any.whl", hash = "sha256:8666c1c8ac02631d7c51ba282e0c69a8a452b211ffedf2599099845da5c5c37b"}, +] + +[package.dependencies] +pytest = ">=7.0.0" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"] + +[[package]] +name = "pytest-mock" +version = "3.12.0" +description = "Thin-wrapper around the mock package for easier use with pytest" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-mock-3.12.0.tar.gz", hash = "sha256:31a40f038c22cad32287bb43932054451ff5583ff094bca6f675df2f8bc1a6e9"}, + {file = "pytest_mock-3.12.0-py3-none-any.whl", hash = "sha256:0972719a7263072da3a21c7f4773069bcc7486027d7e8e1f81d98a47e701bc4f"}, +] + +[package.dependencies] +pytest = ">=5.0" + +[package.extras] +dev = ["pre-commit", "pytest-asyncio", "tox"] + +[[package]] +name = "pytest-watcher" +version = "0.3.5" +description = "Automatically rerun your tests on file modifications" +optional = false +python-versions = ">=3.7.0,<4.0.0" +files = [ + {file = "pytest_watcher-0.3.5-py3-none-any.whl", hash = "sha256:af00ca52c7be22dc34c0fd3d7ffef99057207a73b05dc5161fe3b2fe91f58130"}, + {file = "pytest_watcher-0.3.5.tar.gz", hash = "sha256:8896152460ba2b1a8200c12117c6611008ec96c8b2d811f0a05ab8a82b043ff8"}, +] + +[package.dependencies] +tomli = {version = ">=2.0.1,<3.0.0", markers = "python_version < \"3.11\""} +watchdog = ">=2.0.0" + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "ruff" +version = "0.1.15" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5fe8d54df166ecc24106db7dd6a68d44852d14eb0729ea4672bb4d96c320b7df"}, + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f0bfbb53c4b4de117ac4d6ddfd33aa5fc31beeaa21d23c45c6dd249faf9126f"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d432aec35bfc0d800d4f70eba26e23a352386be3a6cf157083d18f6f5881c8"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9405fa9ac0e97f35aaddf185a1be194a589424b8713e3b97b762336ec79ff807"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c66ec24fe36841636e814b8f90f572a8c0cb0e54d8b5c2d0e300d28a0d7bffec"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6f8ad828f01e8dd32cc58bc28375150171d198491fc901f6f98d2a39ba8e3ff5"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86811954eec63e9ea162af0ffa9f8d09088bab51b7438e8b6488b9401863c25e"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd4025ac5e87d9b80e1f300207eb2fd099ff8200fa2320d7dc066a3f4622dc6b"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b17b93c02cdb6aeb696effecea1095ac93f3884a49a554a9afa76bb125c114c1"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ddb87643be40f034e97e97f5bc2ef7ce39de20e34608f3f829db727a93fb82c5"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:abf4822129ed3a5ce54383d5f0e964e7fef74a41e48eb1dfad404151efc130a2"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6c629cf64bacfd136c07c78ac10a54578ec9d1bd2a9d395efbee0935868bf852"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1bab866aafb53da39c2cadfb8e1c4550ac5340bb40300083eb8967ba25481447"}, + {file = "ruff-0.1.15-py3-none-win32.whl", hash = "sha256:2417e1cb6e2068389b07e6fa74c306b2810fe3ee3476d5b8a96616633f40d14f"}, + {file = "ruff-0.1.15-py3-none-win_amd64.whl", hash = "sha256:3837ac73d869efc4182d9036b1405ef4c73d9b1f88da2413875e34e0d6919587"}, + {file = "ruff-0.1.15-py3-none-win_arm64.whl", hash = "sha256:9a933dfb1c14ec7a33cceb1e49ec4a16b51ce3c20fd42663198746efc0427360"}, + {file = "ruff-0.1.15.tar.gz", hash = "sha256:f6dfa8c1b21c913c326919056c390966648b680966febcb796cc9d1aaab8564e"}, +] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "sniffio" +version = "1.3.0" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, + {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, +] + +[[package]] +name = "syrupy" +version = "4.6.1" +description = "Pytest Snapshot Test Utility" +optional = false +python-versions = ">=3.8.1,<4" +files = [ + {file = "syrupy-4.6.1-py3-none-any.whl", hash = "sha256:203e52f9cb9fa749cf683f29bd68f02c16c3bc7e7e5fe8f2fc59bdfe488ce133"}, + {file = "syrupy-4.6.1.tar.gz", hash = "sha256:37a835c9ce7857eeef86d62145885e10b3cb9615bc6abeb4ce404b3f18e1bb36"}, +] + +[package.dependencies] +pytest = ">=7.0.0,<9.0.0" + +[[package]] +name = "tenacity" +version = "8.2.3" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, + {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, +] + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "typing-extensions" +version = "4.9.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, + {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, +] + +[[package]] +name = "urllib3" +version = "2.2.0" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.0-py3-none-any.whl", hash = "sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224"}, + {file = "urllib3-2.2.0.tar.gz", hash = "sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "watchdog" +version = "4.0.0" +description = "Filesystem events monitoring" +optional = false +python-versions = ">=3.8" +files = [ + {file = "watchdog-4.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:39cb34b1f1afbf23e9562501673e7146777efe95da24fab5707b88f7fb11649b"}, + {file = "watchdog-4.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c522392acc5e962bcac3b22b9592493ffd06d1fc5d755954e6be9f4990de932b"}, + {file = "watchdog-4.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6c47bdd680009b11c9ac382163e05ca43baf4127954c5f6d0250e7d772d2b80c"}, + {file = "watchdog-4.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8350d4055505412a426b6ad8c521bc7d367d1637a762c70fdd93a3a0d595990b"}, + {file = "watchdog-4.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c17d98799f32e3f55f181f19dd2021d762eb38fdd381b4a748b9f5a36738e935"}, + {file = "watchdog-4.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4986db5e8880b0e6b7cd52ba36255d4793bf5cdc95bd6264806c233173b1ec0b"}, + {file = "watchdog-4.0.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:11e12fafb13372e18ca1bbf12d50f593e7280646687463dd47730fd4f4d5d257"}, + {file = "watchdog-4.0.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5369136a6474678e02426bd984466343924d1df8e2fd94a9b443cb7e3aa20d19"}, + {file = "watchdog-4.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76ad8484379695f3fe46228962017a7e1337e9acadafed67eb20aabb175df98b"}, + {file = "watchdog-4.0.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:45cc09cc4c3b43fb10b59ef4d07318d9a3ecdbff03abd2e36e77b6dd9f9a5c85"}, + {file = "watchdog-4.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eed82cdf79cd7f0232e2fdc1ad05b06a5e102a43e331f7d041e5f0e0a34a51c4"}, + {file = "watchdog-4.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba30a896166f0fee83183cec913298151b73164160d965af2e93a20bbd2ab605"}, + {file = "watchdog-4.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d18d7f18a47de6863cd480734613502904611730f8def45fc52a5d97503e5101"}, + {file = "watchdog-4.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2895bf0518361a9728773083908801a376743bcc37dfa252b801af8fd281b1ca"}, + {file = "watchdog-4.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:87e9df830022488e235dd601478c15ad73a0389628588ba0b028cb74eb72fed8"}, + {file = "watchdog-4.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6e949a8a94186bced05b6508faa61b7adacc911115664ccb1923b9ad1f1ccf7b"}, + {file = "watchdog-4.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6a4db54edea37d1058b08947c789a2354ee02972ed5d1e0dca9b0b820f4c7f92"}, + {file = "watchdog-4.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d31481ccf4694a8416b681544c23bd271f5a123162ab603c7d7d2dd7dd901a07"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8fec441f5adcf81dd240a5fe78e3d83767999771630b5ddfc5867827a34fa3d3"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:6a9c71a0b02985b4b0b6d14b875a6c86ddea2fdbebd0c9a720a806a8bbffc69f"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:557ba04c816d23ce98a06e70af6abaa0485f6d94994ec78a42b05d1c03dcbd50"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:d0f9bd1fd919134d459d8abf954f63886745f4660ef66480b9d753a7c9d40927"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:f9b2fdca47dc855516b2d66eef3c39f2672cbf7e7a42e7e67ad2cbfcd6ba107d"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:73c7a935e62033bd5e8f0da33a4dcb763da2361921a69a5a95aaf6c93aa03a87"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6a80d5cae8c265842c7419c560b9961561556c4361b297b4c431903f8c33b269"}, + {file = "watchdog-4.0.0-py3-none-win32.whl", hash = "sha256:8f9a542c979df62098ae9c58b19e03ad3df1c9d8c6895d96c0d51da17b243b1c"}, + {file = "watchdog-4.0.0-py3-none-win_amd64.whl", hash = "sha256:f970663fa4f7e80401a7b0cbeec00fa801bf0287d93d48368fc3e6fa32716245"}, + {file = "watchdog-4.0.0-py3-none-win_ia64.whl", hash = "sha256:9a03e16e55465177d416699331b0f3564138f1807ecc5f2de9d55d8f188d08c7"}, + {file = "watchdog-4.0.0.tar.gz", hash = "sha256:e3e7065cbdabe6183ab82199d7a4f6b3ba0a438c5a512a68559846ccb76a78ec"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.8.1,<4.0" +content-hash = "b29ea73885b37acd9972fb07daa8aa13b5cf2509a11c2c6c2f9b385ed6572211" diff --git a/libs/partners/astradb/pyproject.toml b/libs/partners/astradb/pyproject.toml new file mode 100644 index 0000000000000..871ea31c67c61 --- /dev/null +++ b/libs/partners/astradb/pyproject.toml @@ -0,0 +1,90 @@ +[tool.poetry] +name = "langchain-astradb" +version = "0.0.1" +description = "An integration package connecting Astra DB and LangChain" +authors = [] +readme = "README.md" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +langchain-core = ">=0.1" +astrapy = "^0.7.5" +numpy = "^1" + +[tool.poetry.group.test] +optional = true + +[tool.poetry.group.test.dependencies] +pytest = "^7.3.0" +freezegun = "^1.2.2" +pytest-mock = "^3.10.0" +syrupy = "^4.0.2" +pytest-watcher = "^0.3.4" +pytest-asyncio = "^0.21.1" +langchain-core = {path = "../../core", develop = true} + +[tool.poetry.group.codespell] +optional = true + +[tool.poetry.group.codespell.dependencies] +codespell = "^2.2.0" + +[tool.poetry.group.test_integration] +optional = true + +[tool.poetry.group.test_integration.dependencies] + +[tool.poetry.group.lint] +optional = true + +[tool.poetry.group.lint.dependencies] +ruff = "^0.1.5" + +[tool.poetry.group.typing.dependencies] +mypy = "^0.991" +langchain-core = {path = "../../core", develop = true} + +[tool.poetry.group.dev] +optional = true + +[tool.poetry.group.dev.dependencies] +langchain-core = {path = "../../core", develop = true} + +[tool.ruff] +select = [ + "E", # pycodestyle + "F", # pyflakes + "I", # isort +] + +[tool.mypy] +disallow_untyped_defs = "True" + +[tool.coverage.run] +omit = [ + "tests/*", +] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +# --strict-markers will raise errors on unknown marks. +# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks +# +# https://docs.pytest.org/en/7.1.x/reference/reference.html +# --strict-config any warnings encountered while parsing the `pytest` +# section of the configuration file raise errors. +# +# https://github.com/tophat/syrupy +# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite. +addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5" +# Registering custom markers. +# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers +markers = [ + "requires: mark tests as requiring a specific library", + "asyncio: mark tests as requiring asyncio", + "compile: mark placeholder test used to compile integration tests without running them", +] +asyncio_mode = "auto" diff --git a/libs/partners/astradb/scripts/check_imports.py b/libs/partners/astradb/scripts/check_imports.py new file mode 100644 index 0000000000000..fd21a4975b7f0 --- /dev/null +++ b/libs/partners/astradb/scripts/check_imports.py @@ -0,0 +1,17 @@ +import sys +import traceback +from importlib.machinery import SourceFileLoader + +if __name__ == "__main__": + files = sys.argv[1:] + has_failure = False + for file in files: + try: + SourceFileLoader("x", file).load_module() + except Exception: + has_faillure = True + print(file) + traceback.print_exc() + print() + + sys.exit(1 if has_failure else 0) diff --git a/libs/partners/astradb/scripts/check_pydantic.sh b/libs/partners/astradb/scripts/check_pydantic.sh new file mode 100755 index 0000000000000..06b5bb81ae236 --- /dev/null +++ b/libs/partners/astradb/scripts/check_pydantic.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# +# This script searches for lines starting with "import pydantic" or "from pydantic" +# in tracked files within a Git repository. +# +# Usage: ./scripts/check_pydantic.sh /path/to/repository + +# Check if a path argument is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 /path/to/repository" + exit 1 +fi + +repository_path="$1" + +# Search for lines matching the pattern within the specified repository +result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic') + +# Check if any matching lines were found +if [ -n "$result" ]; then + echo "ERROR: The following lines need to be updated:" + echo "$result" + echo "Please replace the code with an import from langchain_core.pydantic_v1." + echo "For example, replace 'from pydantic import BaseModel'" + echo "with 'from langchain_core.pydantic_v1 import BaseModel'" + exit 1 +fi diff --git a/libs/partners/astradb/scripts/lint_imports.sh b/libs/partners/astradb/scripts/lint_imports.sh new file mode 100755 index 0000000000000..695613c7ba8fd --- /dev/null +++ b/libs/partners/astradb/scripts/lint_imports.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -eu + +# Initialize a variable to keep track of errors +errors=0 + +# make sure not importing from langchain or langchain_experimental +git --no-pager grep '^from langchain\.' . && errors=$((errors+1)) +git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1)) + +# Decide on an exit status based on the errors +if [ "$errors" -gt 0 ]; then + exit 1 +else + exit 0 +fi diff --git a/libs/partners/astradb/tests/__init__.py b/libs/partners/astradb/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/astradb/tests/integration_tests/__init__.py b/libs/partners/astradb/tests/integration_tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/astradb/tests/integration_tests/test_compile.py b/libs/partners/astradb/tests/integration_tests/test_compile.py new file mode 100644 index 0000000000000..33ecccdfa0fbd --- /dev/null +++ b/libs/partners/astradb/tests/integration_tests/test_compile.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.mark.compile +def test_placeholder() -> None: + """Used for compiling integration tests without running any real tests.""" + pass diff --git a/libs/partners/astradb/tests/integration_tests/vectorstores/test_astradb.py b/libs/partners/astradb/tests/integration_tests/vectorstores/test_astradb.py new file mode 100644 index 0000000000000..de68c016a9809 --- /dev/null +++ b/libs/partners/astradb/tests/integration_tests/vectorstores/test_astradb.py @@ -0,0 +1,869 @@ +""" +Test of Astra DB vector store class `AstraDBVectorStore` + +Required to run this test: + - a recent `astrapy` Python package available + - an Astra DB instance; + - the two environment variables set: + export ASTRA_DB_API_ENDPOINT="https://-us-east1.apps.astra.datastax.com" + export ASTRA_DB_APPLICATION_TOKEN="AstraCS:........." + - optionally this as well (otherwise defaults are used): + export ASTRA_DB_KEYSPACE="my_keyspace" + - optionally: + export SKIP_COLLECTION_DELETE="0" ("1" = no deletions, default) +""" + +import json +import math +import os +from typing import Iterable, List, Optional, TypedDict + +import pytest +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings + +from langchain_astradb.vectorstores import AstraDBVectorStore + +# Faster testing (no actual collection deletions). Off by default (=full tests) +SKIP_COLLECTION_DELETE = ( + int(os.environ.get("ASTRA_DB_SKIP_COLLECTION_DELETIONS", "0")) != 0 +) + +COLLECTION_NAME_DIM2 = "lc_test_d2" +COLLECTION_NAME_DIM2_EUCLIDEAN = "lc_test_d2_eucl" + +MATCH_EPSILON = 0.0001 + +# Ad-hoc embedding classes: + + +class AstraDBCredentials(TypedDict): + token: str + api_endpoint: str + namespace: Optional[str] + + +class SomeEmbeddings(Embeddings): + """ + Turn a sentence into an embedding vector in some way. + Not important how. It is deterministic is all that counts. + """ + + def __init__(self, dimension: int) -> None: + self.dimension = dimension + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + return [self.embed_query(txt) for txt in texts] + + async def aembed_documents(self, texts: List[str]) -> List[List[float]]: + return self.embed_documents(texts) + + def embed_query(self, text: str) -> List[float]: + unnormed0 = [ord(c) for c in text[: self.dimension]] + unnormed = (unnormed0 + [1] + [0] * (self.dimension - 1 - len(unnormed0)))[ + : self.dimension + ] + norm = sum(x * x for x in unnormed) ** 0.5 + normed = [x / norm for x in unnormed] + return normed + + async def aembed_query(self, text: str) -> List[float]: + return self.embed_query(text) + + +class ParserEmbeddings(Embeddings): + """ + Parse input texts: if they are json for a List[float], fine. + Otherwise, return all zeros and call it a day. + """ + + def __init__(self, dimension: int) -> None: + self.dimension = dimension + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + return [self.embed_query(txt) for txt in texts] + + async def aembed_documents(self, texts: List[str]) -> List[List[float]]: + return self.embed_documents(texts) + + def embed_query(self, text: str) -> List[float]: + try: + vals = json.loads(text) + assert len(vals) == self.dimension + return vals + except Exception: + print(f'[ParserEmbeddings] Returning a moot vector for "{text}"') + return [0.0] * self.dimension + + async def aembed_query(self, text: str) -> List[float]: + return self.embed_query(text) + + +def _has_env_vars() -> bool: + return all( + [ + "ASTRA_DB_APPLICATION_TOKEN" in os.environ, + "ASTRA_DB_API_ENDPOINT" in os.environ, + ] + ) + + +@pytest.fixture(scope="session") +def astradb_credentials() -> Iterable[AstraDBCredentials]: + yield { + "token": os.environ["ASTRA_DB_APPLICATION_TOKEN"], + "api_endpoint": os.environ["ASTRA_DB_API_ENDPOINT"], + "namespace": os.environ.get("ASTRA_DB_KEYSPACE"), + } + + +@pytest.fixture(scope="function") +def store_someemb( + astradb_credentials: AstraDBCredentials, +) -> Iterable[AstraDBVectorStore]: + emb = SomeEmbeddings(dimension=2) + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + v_store.clear() + + yield v_store + + if not SKIP_COLLECTION_DELETE: + v_store.delete_collection() + else: + v_store.clear() + + +@pytest.fixture(scope="function") +def store_parseremb( + astradb_credentials: AstraDBCredentials, +) -> Iterable[AstraDBVectorStore]: + emb = ParserEmbeddings(dimension=2) + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + v_store.clear() + + yield v_store + + if not SKIP_COLLECTION_DELETE: + v_store.delete_collection() + else: + v_store.clear() + + +@pytest.mark.requires("astrapy") +@pytest.mark.skipif(not _has_env_vars(), reason="Missing Astra DB env. vars") +class TestAstraDBVectorStore: + def test_astradb_vectorstore_create_delete( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """Create and delete.""" + from astrapy.db import AstraDB as LibAstraDB + + emb = SomeEmbeddings(dimension=2) + # creation by passing the connection secrets + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + v_store.add_texts("Sample 1") + if not SKIP_COLLECTION_DELETE: + v_store.delete_collection() + else: + v_store.clear() + + # Creation by passing a ready-made astrapy client: + astra_db_client = LibAstraDB( + **astradb_credentials, + ) + v_store_2 = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + astra_db_client=astra_db_client, + ) + v_store_2.add_texts("Sample 2") + if not SKIP_COLLECTION_DELETE: + v_store_2.delete_collection() + else: + v_store_2.clear() + + async def test_astradb_vectorstore_create_delete_async( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """Create and delete.""" + emb = SomeEmbeddings(dimension=2) + # creation by passing the connection secrets + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + await v_store.adelete_collection() + # Creation by passing a ready-made astrapy client: + from astrapy.db import AsyncAstraDB + + astra_db_client = AsyncAstraDB( + **astradb_credentials, + ) + v_store_2 = AstraDBVectorStore( + embedding=emb, + collection_name="lc_test_2_async", + async_astra_db_client=astra_db_client, + ) + if not SKIP_COLLECTION_DELETE: + await v_store_2.adelete_collection() + else: + await v_store_2.aclear() + + @pytest.mark.skipif( + SKIP_COLLECTION_DELETE, + reason="Collection-deletion tests are suppressed", + ) + def test_astradb_vectorstore_pre_delete_collection( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """Use of the pre_delete_collection flag.""" + emb = SomeEmbeddings(dimension=2) + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + v_store.clear() + try: + v_store.add_texts( + texts=["aa"], + metadatas=[ + {"k": "a", "ord": 0}, + ], + ids=["a"], + ) + res1 = v_store.similarity_search("aa", k=5) + assert len(res1) == 1 + v_store = AstraDBVectorStore( + embedding=emb, + pre_delete_collection=True, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + res1 = v_store.similarity_search("aa", k=5) + assert len(res1) == 0 + finally: + v_store.delete_collection() + + @pytest.mark.skipif( + SKIP_COLLECTION_DELETE, + reason="Collection-deletion tests are suppressed", + ) + async def test_astradb_vectorstore_pre_delete_collection_async( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """Use of the pre_delete_collection flag.""" + emb = SomeEmbeddings(dimension=2) + # creation by passing the connection secrets + + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + try: + await v_store.aadd_texts( + texts=["aa"], + metadatas=[ + {"k": "a", "ord": 0}, + ], + ids=["a"], + ) + res1 = await v_store.asimilarity_search("aa", k=5) + assert len(res1) == 1 + v_store = AstraDBVectorStore( + embedding=emb, + pre_delete_collection=True, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + res1 = await v_store.asimilarity_search("aa", k=5) + assert len(res1) == 0 + finally: + await v_store.adelete_collection() + + def test_astradb_vectorstore_from_x( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """from_texts and from_documents methods.""" + emb = SomeEmbeddings(dimension=2) + # prepare empty collection + AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ).clear() + # from_texts + v_store = AstraDBVectorStore.from_texts( + texts=["Hi", "Ho"], + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + try: + assert v_store.similarity_search("Ho", k=1)[0].page_content == "Ho" + finally: + if not SKIP_COLLECTION_DELETE: + v_store.delete_collection() + else: + v_store.clear() + + # from_documents + v_store_2 = AstraDBVectorStore.from_documents( + [ + Document(page_content="Hee"), + Document(page_content="Hoi"), + ], + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + try: + assert v_store_2.similarity_search("Hoi", k=1)[0].page_content == "Hoi" + finally: + if not SKIP_COLLECTION_DELETE: + v_store_2.delete_collection() + else: + v_store_2.clear() + + async def test_astradb_vectorstore_from_x_async( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """from_texts and from_documents methods.""" + emb = SomeEmbeddings(dimension=2) + # prepare empty collection + await AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ).aclear() + # from_texts + v_store = await AstraDBVectorStore.afrom_texts( + texts=["Hi", "Ho"], + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + try: + assert (await v_store.asimilarity_search("Ho", k=1))[0].page_content == "Ho" + finally: + if not SKIP_COLLECTION_DELETE: + await v_store.adelete_collection() + else: + await v_store.aclear() + + # from_documents + v_store_2 = await AstraDBVectorStore.afrom_documents( + [ + Document(page_content="Hee"), + Document(page_content="Hoi"), + ], + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + try: + assert (await v_store_2.asimilarity_search("Hoi", k=1))[ + 0 + ].page_content == "Hoi" + finally: + if not SKIP_COLLECTION_DELETE: + await v_store_2.adelete_collection() + else: + await v_store_2.aclear() + + def test_astradb_vectorstore_crud(self, store_someemb: AstraDBVectorStore) -> None: + """Basic add/delete/update behaviour.""" + res0 = store_someemb.similarity_search("Abc", k=2) + assert res0 == [] + # write and check again + store_someemb.add_texts( + texts=["aa", "bb", "cc"], + metadatas=[ + {"k": "a", "ord": 0}, + {"k": "b", "ord": 1}, + {"k": "c", "ord": 2}, + ], + ids=["a", "b", "c"], + ) + res1 = store_someemb.similarity_search("Abc", k=5) + assert {doc.page_content for doc in res1} == {"aa", "bb", "cc"} + # partial overwrite and count total entries + store_someemb.add_texts( + texts=["cc", "dd"], + metadatas=[ + {"k": "c_new", "ord": 102}, + {"k": "d_new", "ord": 103}, + ], + ids=["c", "d"], + ) + res2 = store_someemb.similarity_search("Abc", k=10) + assert len(res2) == 4 + # pick one that was just updated and check its metadata + res3 = store_someemb.similarity_search_with_score_id( + query="cc", k=1, filter={"k": "c_new"} + ) + print(str(res3)) + doc3, score3, id3 = res3[0] + assert doc3.page_content == "cc" + assert doc3.metadata == {"k": "c_new", "ord": 102} + assert score3 > 0.999 # leaving some leeway for approximations... + assert id3 == "c" + # delete and count again + del1_res = store_someemb.delete(["b"]) + assert del1_res is True + del2_res = store_someemb.delete(["a", "c", "Z!"]) + assert del2_res is True # a non-existing ID was supplied + assert len(store_someemb.similarity_search("xy", k=10)) == 1 + # clear store + store_someemb.clear() + assert store_someemb.similarity_search("Abc", k=2) == [] + # add_documents with "ids" arg passthrough + store_someemb.add_documents( + [ + Document(page_content="vv", metadata={"k": "v", "ord": 204}), + Document(page_content="ww", metadata={"k": "w", "ord": 205}), + ], + ids=["v", "w"], + ) + assert len(store_someemb.similarity_search("xy", k=10)) == 2 + res4 = store_someemb.similarity_search("ww", k=1, filter={"k": "w"}) + assert res4[0].metadata["ord"] == 205 + + async def test_astradb_vectorstore_crud_async( + self, store_someemb: AstraDBVectorStore + ) -> None: + """Basic add/delete/update behaviour.""" + res0 = await store_someemb.asimilarity_search("Abc", k=2) + assert res0 == [] + # write and check again + await store_someemb.aadd_texts( + texts=["aa", "bb", "cc"], + metadatas=[ + {"k": "a", "ord": 0}, + {"k": "b", "ord": 1}, + {"k": "c", "ord": 2}, + ], + ids=["a", "b", "c"], + ) + res1 = await store_someemb.asimilarity_search("Abc", k=5) + assert {doc.page_content for doc in res1} == {"aa", "bb", "cc"} + # partial overwrite and count total entries + await store_someemb.aadd_texts( + texts=["cc", "dd"], + metadatas=[ + {"k": "c_new", "ord": 102}, + {"k": "d_new", "ord": 103}, + ], + ids=["c", "d"], + ) + res2 = await store_someemb.asimilarity_search("Abc", k=10) + assert len(res2) == 4 + # pick one that was just updated and check its metadata + res3 = await store_someemb.asimilarity_search_with_score_id( + query="cc", k=1, filter={"k": "c_new"} + ) + print(str(res3)) + doc3, score3, id3 = res3[0] + assert doc3.page_content == "cc" + assert doc3.metadata == {"k": "c_new", "ord": 102} + assert score3 > 0.999 # leaving some leeway for approximations... + assert id3 == "c" + # delete and count again + del1_res = await store_someemb.adelete(["b"]) + assert del1_res is True + del2_res = await store_someemb.adelete(["a", "c", "Z!"]) + assert del2_res is False # a non-existing ID was supplied + assert len(await store_someemb.asimilarity_search("xy", k=10)) == 1 + # clear store + await store_someemb.aclear() + assert await store_someemb.asimilarity_search("Abc", k=2) == [] + # add_documents with "ids" arg passthrough + await store_someemb.aadd_documents( + [ + Document(page_content="vv", metadata={"k": "v", "ord": 204}), + Document(page_content="ww", metadata={"k": "w", "ord": 205}), + ], + ids=["v", "w"], + ) + assert len(await store_someemb.asimilarity_search("xy", k=10)) == 2 + res4 = await store_someemb.asimilarity_search("ww", k=1, filter={"k": "w"}) + assert res4[0].metadata["ord"] == 205 + + def test_astradb_vectorstore_mmr(self, store_parseremb: AstraDBVectorStore) -> None: + """ + MMR testing. We work on the unit circle with angle multiples + of 2*pi/20 and prepare a store with known vectors for a controlled + MMR outcome. + """ + + def _v_from_i(i: int, N: int) -> str: + angle = 2 * math.pi * i / N + vector = [math.cos(angle), math.sin(angle)] + return json.dumps(vector) + + i_vals = [0, 4, 5, 13] + N_val = 20 + store_parseremb.add_texts( + [_v_from_i(i, N_val) for i in i_vals], metadatas=[{"i": i} for i in i_vals] + ) + res1 = store_parseremb.max_marginal_relevance_search( + _v_from_i(3, N_val), + k=2, + fetch_k=3, + ) + res_i_vals = {doc.metadata["i"] for doc in res1} + assert res_i_vals == {0, 4} + + async def test_astradb_vectorstore_mmr_async( + self, store_parseremb: AstraDBVectorStore + ) -> None: + """ + MMR testing. We work on the unit circle with angle multiples + of 2*pi/20 and prepare a store with known vectors for a controlled + MMR outcome. + """ + + def _v_from_i(i: int, N: int) -> str: + angle = 2 * math.pi * i / N + vector = [math.cos(angle), math.sin(angle)] + return json.dumps(vector) + + i_vals = [0, 4, 5, 13] + N_val = 20 + await store_parseremb.aadd_texts( + [_v_from_i(i, N_val) for i in i_vals], + metadatas=[{"i": i} for i in i_vals], + ) + res1 = await store_parseremb.amax_marginal_relevance_search( + _v_from_i(3, N_val), + k=2, + fetch_k=3, + ) + res_i_vals = {doc.metadata["i"] for doc in res1} + assert res_i_vals == {0, 4} + + def test_astradb_vectorstore_metadata( + self, store_someemb: AstraDBVectorStore + ) -> None: + """Metadata filtering.""" + store_someemb.add_documents( + [ + Document( + page_content="q", + metadata={"ord": ord("q"), "group": "consonant"}, + ), + Document( + page_content="w", + metadata={"ord": ord("w"), "group": "consonant"}, + ), + Document( + page_content="r", + metadata={"ord": ord("r"), "group": "consonant"}, + ), + Document( + page_content="e", + metadata={"ord": ord("e"), "group": "vowel"}, + ), + Document( + page_content="i", + metadata={"ord": ord("i"), "group": "vowel"}, + ), + Document( + page_content="o", + metadata={"ord": ord("o"), "group": "vowel"}, + ), + ] + ) + # no filters + res0 = store_someemb.similarity_search("x", k=10) + assert {doc.page_content for doc in res0} == set("qwreio") + # single filter + res1 = store_someemb.similarity_search( + "x", + k=10, + filter={"group": "vowel"}, + ) + assert {doc.page_content for doc in res1} == set("eio") + # multiple filters + res2 = store_someemb.similarity_search( + "x", + k=10, + filter={"group": "consonant", "ord": ord("q")}, + ) + assert {doc.page_content for doc in res2} == set("q") + # excessive filters + res3 = store_someemb.similarity_search( + "x", + k=10, + filter={"group": "consonant", "ord": ord("q"), "case": "upper"}, + ) + assert res3 == [] + # filter with logical operator + res4 = store_someemb.similarity_search( + "x", + k=10, + filter={"$or": [{"ord": ord("q")}, {"ord": ord("r")}]}, + ) + assert {doc.page_content for doc in res4} == {"q", "r"} + + def test_astradb_vectorstore_similarity_scale( + self, store_parseremb: AstraDBVectorStore + ) -> None: + """Scale of the similarity scores.""" + store_parseremb.add_texts( + texts=[ + json.dumps([1, 1]), + json.dumps([-1, -1]), + ], + ids=["near", "far"], + ) + res1 = store_parseremb.similarity_search_with_score( + json.dumps([0.5, 0.5]), + k=2, + ) + scores = [sco for _, sco in res1] + sco_near, sco_far = scores + assert abs(1 - sco_near) < MATCH_EPSILON and abs(sco_far) < MATCH_EPSILON + + async def test_astradb_vectorstore_similarity_scale_async( + self, store_parseremb: AstraDBVectorStore + ) -> None: + """Scale of the similarity scores.""" + await store_parseremb.aadd_texts( + texts=[ + json.dumps([1, 1]), + json.dumps([-1, -1]), + ], + ids=["near", "far"], + ) + res1 = await store_parseremb.asimilarity_search_with_score( + json.dumps([0.5, 0.5]), + k=2, + ) + scores = [sco for _, sco in res1] + sco_near, sco_far = scores + assert abs(1 - sco_near) < MATCH_EPSILON and abs(sco_far) < MATCH_EPSILON + + def test_astradb_vectorstore_massive_delete( + self, store_someemb: AstraDBVectorStore + ) -> None: + """Larger-scale bulk deletes.""" + M = 50 + texts = [str(i + 1 / 7.0) for i in range(2 * M)] + ids0 = ["doc_%i" % i for i in range(M)] + ids1 = ["doc_%i" % (i + M) for i in range(M)] + ids = ids0 + ids1 + store_someemb.add_texts(texts=texts, ids=ids) + # deleting a bunch of these + del_res0 = store_someemb.delete(ids0) + assert del_res0 is True + # deleting the rest plus a fake one + del_res1 = store_someemb.delete(ids1 + ["ghost!"]) + assert del_res1 is True # ensure no error + # nothing left + assert store_someemb.similarity_search("x", k=2 * M) == [] + + @pytest.mark.skipif( + SKIP_COLLECTION_DELETE, + reason="Collection-deletion tests are suppressed", + ) + def test_astradb_vectorstore_delete_collection( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """behaviour of 'delete_collection'.""" + collection_name = COLLECTION_NAME_DIM2 + emb = SomeEmbeddings(dimension=2) + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=collection_name, + **astradb_credentials, + ) + v_store.add_texts(["huh"]) + assert len(v_store.similarity_search("hah", k=10)) == 1 + # another instance pointing to the same collection on DB + v_store_kenny = AstraDBVectorStore( + embedding=emb, + collection_name=collection_name, + **astradb_credentials, + ) + v_store_kenny.delete_collection() + # dropped on DB, but 'v_store' should have no clue: + with pytest.raises(ValueError): + _ = v_store.similarity_search("hah", k=10) + + def test_astradb_vectorstore_custom_params( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """Custom batch size and concurrency params.""" + emb = SomeEmbeddings(dimension=2) + # prepare empty collection + AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ).clear() + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + batch_size=17, + bulk_insert_batch_concurrency=13, + bulk_insert_overwrite_concurrency=7, + bulk_delete_concurrency=19, + ) + try: + # add_texts + N = 50 + texts = [str(i + 1 / 7.0) for i in range(N)] + ids = ["doc_%i" % i for i in range(N)] + v_store.add_texts(texts=texts, ids=ids) + v_store.add_texts( + texts=texts, + ids=ids, + batch_size=19, + batch_concurrency=7, + overwrite_concurrency=13, + ) + # + _ = v_store.delete(ids[: N // 2]) + _ = v_store.delete(ids[N // 2 :], concurrency=23) + # + finally: + if not SKIP_COLLECTION_DELETE: + v_store.delete_collection() + else: + v_store.clear() + + async def test_astradb_vectorstore_custom_params_async( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """Custom batch size and concurrency params.""" + emb = SomeEmbeddings(dimension=2) + v_store = AstraDBVectorStore( + embedding=emb, + collection_name="lc_test_c_async", + batch_size=17, + bulk_insert_batch_concurrency=13, + bulk_insert_overwrite_concurrency=7, + bulk_delete_concurrency=19, + **astradb_credentials, + ) + try: + # add_texts + N = 50 + texts = [str(i + 1 / 7.0) for i in range(N)] + ids = ["doc_%i" % i for i in range(N)] + await v_store.aadd_texts(texts=texts, ids=ids) + await v_store.aadd_texts( + texts=texts, + ids=ids, + batch_size=19, + batch_concurrency=7, + overwrite_concurrency=13, + ) + # + await v_store.adelete(ids[: N // 2]) + await v_store.adelete(ids[N // 2 :], concurrency=23) + # + finally: + if not SKIP_COLLECTION_DELETE: + await v_store.adelete_collection() + else: + await v_store.aclear() + + def test_astradb_vectorstore_metrics( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """ + Different choices of similarity metric. + Both stores (with "cosine" and "euclidea" metrics) contain these two: + - a vector slightly rotated w.r.t query vector + - a vector which is a long multiple of query vector + so, which one is "the closest one" depends on the metric. + """ + emb = ParserEmbeddings(dimension=2) + isq2 = 0.5**0.5 + isa = 0.7 + isb = (1.0 - isa * isa) ** 0.5 + texts = [ + json.dumps([isa, isb]), + json.dumps([10 * isq2, 10 * isq2]), + ] + ids = [ + "rotated", + "scaled", + ] + query_text = json.dumps([isq2, isq2]) + + # prepare empty collections + AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ).clear() + AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2_EUCLIDEAN, + metric="euclidean", + **astradb_credentials, + ).clear() + + # creation, population, query - cosine + vstore_cos = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + metric="cosine", + **astradb_credentials, + ) + try: + vstore_cos.add_texts( + texts=texts, + ids=ids, + ) + _, _, id_from_cos = vstore_cos.similarity_search_with_score_id( + query_text, + k=1, + )[0] + assert id_from_cos == "scaled" + finally: + if not SKIP_COLLECTION_DELETE: + vstore_cos.delete_collection() + else: + vstore_cos.clear() + # creation, population, query - euclidean + + vstore_euc = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2_EUCLIDEAN, + metric="euclidean", + **astradb_credentials, + ) + try: + vstore_euc.add_texts( + texts=texts, + ids=ids, + ) + _, _, id_from_euc = vstore_euc.similarity_search_with_score_id( + query_text, + k=1, + )[0] + assert id_from_euc == "rotated" + finally: + if not SKIP_COLLECTION_DELETE: + vstore_euc.delete_collection() + else: + vstore_euc.clear() diff --git a/libs/partners/astradb/tests/unit_tests/__init__.py b/libs/partners/astradb/tests/unit_tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/astradb/tests/unit_tests/test_imports.py b/libs/partners/astradb/tests/unit_tests/test_imports.py new file mode 100644 index 0000000000000..2240748c70c6e --- /dev/null +++ b/libs/partners/astradb/tests/unit_tests/test_imports.py @@ -0,0 +1,9 @@ +from langchain_astradb import __all__ + +EXPECTED_ALL = [ + "AstraDBVectorStore", +] + + +def test_all_imports() -> None: + assert sorted(EXPECTED_ALL) == sorted(__all__) diff --git a/libs/partners/astradb/tests/unit_tests/test_vectorstores.py b/libs/partners/astradb/tests/unit_tests/test_vectorstores.py new file mode 100644 index 0000000000000..ebfc6978d18c7 --- /dev/null +++ b/libs/partners/astradb/tests/unit_tests/test_vectorstores.py @@ -0,0 +1,45 @@ +from typing import List +from unittest.mock import Mock + +from langchain_core.embeddings import Embeddings + +from langchain_astradb.vectorstores import AstraDBVectorStore + + +class SomeEmbeddings(Embeddings): + """ + Turn a sentence into an embedding vector in some way. + Not important how. It is deterministic is all that counts. + """ + + def __init__(self, dimension: int) -> None: + self.dimension = dimension + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + return [self.embed_query(txt) for txt in texts] + + async def aembed_documents(self, texts: List[str]) -> List[List[float]]: + return self.embed_documents(texts) + + def embed_query(self, text: str) -> List[float]: + unnormed0 = [ord(c) for c in text[: self.dimension]] + unnormed = (unnormed0 + [1] + [0] * (self.dimension - 1 - len(unnormed0)))[ + : self.dimension + ] + norm = sum(x * x for x in unnormed) ** 0.5 + normed = [x / norm for x in unnormed] + return normed + + async def aembed_query(self, text: str) -> List[float]: + return self.embed_query(text) + + +def test_initialization() -> None: + """Test integration vectorstore initialization.""" + mock_astra_db = Mock() + embedding = SomeEmbeddings(dimension=2) + AstraDBVectorStore( + embedding=embedding, + collection_name="mock_coll_name", + astra_db_client=mock_astra_db, + )