diff --git a/docs/docs/integrations/llms/llm_caching.ipynb b/docs/docs/integrations/llms/llm_caching.ipynb index db7b11a7f89e1..10de9275d428c 100644 --- a/docs/docs/integrations/llms/llm_caching.ipynb +++ b/docs/docs/integrations/llms/llm_caching.ipynb @@ -916,9 +916,29 @@ "source": [ "## `Cassandra` caches\n", "\n", - "You can use Cassandra / Astra DB through CQL for caching LLM responses, choosing from the exact-match `CassandraCache` or the (vector-similarity-based) `CassandraSemanticCache`.\n", + "> [Apache Cassandra®](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database. Starting with version 5.0, the database ships with [vector search capabilities](https://cassandra.apache.org/doc/trunk/cassandra/vector-search/overview.html).\n", "\n", - "Let's see both in action in the following cells." + "You can use Cassandra for caching LLM responses, choosing from the exact-match `CassandraCache` or the (vector-similarity-based) `CassandraSemanticCache`.\n", + "\n", + "Let's see both in action. The next cells guide you through the (little) required setup, and the following cells showcase the two available cache classes." + ] + }, + { + "cell_type": "markdown", + "id": "6cf6acb4-1bc4-4c4b-9325-2420c17e5e2b", + "metadata": {}, + "source": [ + "### Required dependency" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe842b0d-fd3d-47dd-bc6a-975997c9707f", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet \"cassio>=0.1.4\"" ] }, { @@ -926,54 +946,125 @@ "id": "a4a6725d", "metadata": {}, "source": [ - "#### Connect to the DB\n", + "### Connect to the DB\n", + "\n", + "The Cassandra caches shown in this page can be used with Cassandra as well as other derived databases, such as Astra DB, which use the CQL (Cassandra Query Language) protocol.\n", + "\n", + "> DataStax [Astra DB](https://docs.datastax.com/en/astra-serverless/docs/vector-search/quickstart.html) is a managed serverless database built on Cassandra, offering the same interface and strengths.\n", + "\n", + "Depending on whether you connect to a Cassandra cluster or to Astra DB through CQL, you will provide different parameters when instantiating the cache (through initialization of a CassIO connection)." + ] + }, + { + "cell_type": "markdown", + "id": "15735abe-2567-43ce-aa91-f253b33b5a88", + "metadata": {}, + "source": [ + "#### Connecting to a Cassandra cluster\n", "\n", - "First you need to establish a `Session` to the DB and to specify a _keyspace_ for the cache table(s). The following gets you connected to Astra DB through CQL (see e.g. [here](https://cassio.org/start_here/#vector-database) for more backends and connection options)." + "You first need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:" ] }, { "cell_type": "code", "execution_count": 1, - "id": "cc53ce1b", + "id": "e4b898a5-fe0e-4f11-a87b-7979652322a7", + "metadata": {}, + "outputs": [], + "source": [ + "from cassandra.cluster import Cluster\n", + "\n", + "cluster = Cluster([\"127.0.0.1\"])\n", + "session = cluster.connect()" + ] + }, + { + "cell_type": "markdown", + "id": "6435198e-8713-4045-906b-879613bf5083", + "metadata": {}, + "source": [ + "You can now set the session, along with your desired keyspace name, as a global CassIO parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "992267dc-0d19-45e0-9a13-ccbb6348d804", "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stdin", "output_type": "stream", "text": [ - "\n", - "Keyspace name? my_keyspace\n", - "\n", - "Astra DB Token (\"AstraCS:...\") ········\n", - "Full path to your Secure Connect Bundle? /path/to/secure-connect-databasename.zip\n" + "CASSANDRA_KEYSPACE = demo_keyspace\n" + ] + } + ], + "source": [ + "import cassio\n", + "\n", + "CASSANDRA_KEYSPACE = input(\"CASSANDRA_KEYSPACE = \")\n", + "\n", + "cassio.init(session=session, keyspace=CASSANDRA_KEYSPACE)" + ] + }, + { + "cell_type": "markdown", + "id": "2cc7ba29-8f84-4fbf-aaf7-3daa1be7e7b0", + "metadata": {}, + "source": [ + "#### Connecting to Astra DB through CQL\n", + "\n", + "In this case you initialize CassIO with the following connection parameters:\n", + "\n", + "- the Database ID, e.g. `01234567-89ab-cdef-0123-456789abcdef`\n", + "- the Token, e.g. `AstraCS:6gBhNmsk135....` (it must be a \"Database Administrator\" token)\n", + "- Optionally a Keyspace name (if omitted, the default one for the database will be used)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ead97077-cc79-4f5c-940c-91eb21650466", + "metadata": {}, + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "ASTRA_DB_ID = 01234567-89ab-cdef-0123-456789abcdef\n", + "ASTRA_DB_APPLICATION_TOKEN = ········\n", + "ASTRA_DB_KEYSPACE (optional, can be left empty) = my_keyspace\n" ] } ], "source": [ "import getpass\n", "\n", - "keyspace = input(\"\\nKeyspace name? \")\n", - "ASTRA_DB_APPLICATION_TOKEN = getpass.getpass('\\nAstra DB Token (\"AstraCS:...\") ')\n", - "ASTRA_DB_SECURE_BUNDLE_PATH = input(\"Full path to your Secure Connect Bundle? \")" + "ASTRA_DB_ID = input(\"ASTRA_DB_ID = \")\n", + "ASTRA_DB_APPLICATION_TOKEN = getpass.getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n", + "\n", + "desired_keyspace = input(\"ASTRA_DB_KEYSPACE (optional, can be left empty) = \")\n", + "if desired_keyspace:\n", + " ASTRA_DB_KEYSPACE = desired_keyspace\n", + "else:\n", + " ASTRA_DB_KEYSPACE = None" ] }, { "cell_type": "code", - "execution_count": 2, - "id": "4617f485", + "execution_count": 13, + "id": "cc53ce1b", "metadata": {}, "outputs": [], "source": [ - "from cassandra.auth import PlainTextAuthProvider\n", - "from cassandra.cluster import Cluster\n", + "import cassio\n", "\n", - "cluster = Cluster(\n", - " cloud={\n", - " \"secure_connect_bundle\": ASTRA_DB_SECURE_BUNDLE_PATH,\n", - " },\n", - " auth_provider=PlainTextAuthProvider(\"token\", ASTRA_DB_APPLICATION_TOKEN),\n", - ")\n", - "session = cluster.connect()" + "cassio.init(\n", + " database_id=ASTRA_DB_ID,\n", + " token=ASTRA_DB_APPLICATION_TOKEN,\n", + " keyspace=ASTRA_DB_KEYSPACE,\n", + ")" ] }, { @@ -981,27 +1072,27 @@ "id": "8665664a", "metadata": {}, "source": [ - "### Exact cache\n", + "### Cassandra: Exact cache\n", "\n", "This will avoid invoking the LLM when the supplied prompt is _exactly_ the same as one encountered already:" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "00a5e66f", "metadata": {}, "outputs": [], "source": [ - "from langchain.cache import CassandraCache\n", - "from langchain.globals import set_llm_cache\n", + "from langchain_community.cache import CassandraCache\n", + "from langchain_core.globals import set_llm_cache\n", "\n", - "set_llm_cache(CassandraCache(session=session, keyspace=keyspace))" + "set_llm_cache(CassandraCache())" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "id": "956a5145", "metadata": {}, "outputs": [ @@ -1011,9 +1102,9 @@ "text": [ "\n", "\n", - "The Moon always shows the same side because it is tidally locked to Earth.\n", - "CPU times: user 41.7 ms, sys: 153 µs, total: 41.8 ms\n", - "Wall time: 1.96 s\n" + "The Moon is tidally locked with the Earth, which means that its rotation on its own axis is synchronized with its orbit around the Earth. This results in the Moon always showing the same side to the Earth. This is because the gravitational forces between the Earth and the Moon have caused the Moon's rotation to slow down over time, until it reached a point where it takes the same amount of time for the Moon to rotate on its axis as it does to orbit around the Earth. This phenomenon is common among satellites in close orbits around their parent planets and is known as tidal locking.\n", + "CPU times: user 92.5 ms, sys: 8.89 ms, total: 101 ms\n", + "Wall time: 1.98 s\n" ] } ], @@ -1025,7 +1116,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "id": "158f0151", "metadata": {}, "outputs": [ @@ -1035,9 +1126,9 @@ "text": [ "\n", "\n", - "The Moon always shows the same side because it is tidally locked to Earth.\n", - "CPU times: user 4.09 ms, sys: 0 ns, total: 4.09 ms\n", - "Wall time: 119 ms\n" + "The Moon is tidally locked with the Earth, which means that its rotation on its own axis is synchronized with its orbit around the Earth. This results in the Moon always showing the same side to the Earth. This is because the gravitational forces between the Earth and the Moon have caused the Moon's rotation to slow down over time, until it reached a point where it takes the same amount of time for the Moon to rotate on its axis as it does to orbit around the Earth. This phenomenon is common among satellites in close orbits around their parent planets and is known as tidal locking.\n", + "CPU times: user 5.51 ms, sys: 0 ns, total: 5.51 ms\n", + "Wall time: 5.78 ms\n" ] } ], @@ -1052,14 +1143,14 @@ "id": "8fc4d017", "metadata": {}, "source": [ - "### Semantic cache\n", + "### Cassandra: Semantic cache\n", "\n", "This cache will do a semantic similarity search and return a hit if it finds a cached entry that is similar enough, For this, you need to provide an `Embeddings` instance of your choice." ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "b9ad3f54", "metadata": {}, "outputs": [], @@ -1071,26 +1162,25 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "id": "4623f95e", "metadata": {}, "outputs": [], "source": [ - "from langchain.cache import CassandraSemanticCache\n", + "from langchain_community.cache import CassandraSemanticCache\n", + "from langchain_core.globals import set_llm_cache\n", "\n", "set_llm_cache(\n", " CassandraSemanticCache(\n", - " session=session,\n", - " keyspace=keyspace,\n", " embedding=embedding,\n", - " table_name=\"cass_sem_cache\",\n", + " table_name=\"my_semantic_cache\",\n", " )\n", ")" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 19, "id": "1a8e577b", "metadata": {}, "outputs": [ @@ -1100,9 +1190,9 @@ "text": [ "\n", "\n", - "The Moon always shows the same side because it is tidally locked with Earth. This means that the same side of the Moon always faces Earth.\n", - "CPU times: user 21.3 ms, sys: 177 µs, total: 21.4 ms\n", - "Wall time: 3.09 s\n" + "The Moon is always showing the same side because of a phenomenon called synchronous rotation. This means that the Moon rotates on its axis at the same rate that it orbits around the Earth, which takes approximately 27.3 days. This results in the same side of the Moon always facing the Earth. This is due to the gravitational forces between the Earth and the Moon, which have caused the Moon's rotation to gradually slow down and become synchronized with its orbit. This is a common occurrence among many moons in our solar system.\n", + "CPU times: user 49.5 ms, sys: 7.38 ms, total: 56.9 ms\n", + "Wall time: 2.55 s\n" ] } ], @@ -1114,7 +1204,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 20, "id": "f7abddfd", "metadata": {}, "outputs": [ @@ -1124,9 +1214,9 @@ "text": [ "\n", "\n", - "The Moon always shows the same side because it is tidally locked with Earth. This means that the same side of the Moon always faces Earth.\n", - "CPU times: user 10.9 ms, sys: 17 µs, total: 10.9 ms\n", - "Wall time: 461 ms\n" + "The Moon is always showing the same side because of a phenomenon called synchronous rotation. This means that the Moon rotates on its axis at the same rate that it orbits around the Earth, which takes approximately 27.3 days. This results in the same side of the Moon always facing the Earth. This is due to the gravitational forces between the Earth and the Moon, which have caused the Moon's rotation to gradually slow down and become synchronized with its orbit. This is a common occurrence among many moons in our solar system.\n", + "CPU times: user 21.2 ms, sys: 3.38 ms, total: 24.6 ms\n", + "Wall time: 532 ms\n" ] } ], diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py index eaf8b2be81f97..4a09ba61969bc 100644 --- a/libs/community/langchain_community/cache.py +++ b/libs/community/langchain_community/cache.py @@ -1045,11 +1045,43 @@ class CassandraCache(BaseCache): """ Cache that uses Cassandra / Astra DB as a backend. + Example: + + .. code-block:: python + + import cassio + + from langchain_community.cache import CassandraCache + from langchain_core.globals import set_llm_cache + + cassio.init(auto=True) # Requires env. variables, see CassIO docs + + set_llm_cache(CassandraCache()) + It uses a single Cassandra table. The lookup keys (which get to form the primary key) are: - prompt, a string - llm_string, a deterministic str representation of the model parameters. - (needed to prevent collisions same-prompt-different-model collisions) + (needed to prevent same-prompt-different-model collisions) + + Args: + session: an open Cassandra session. + Leave unspecified to use the global cassio init (see below) + keyspace: the keyspace to use for storing the cache. + Leave unspecified to use the global cassio init (see below) + table_name: name of the Cassandra table to use as cache + ttl_seconds: time-to-live for cache entries + (default: None, i.e. forever) + setup_mode: a value in langchain_community.utilities.cassandra.SetupMode. + Choose between SYNC, ASYNC and OFF - the latter if the Cassandra + table is guaranteed to exist already, for a faster initialization. + + Note: + The session and keyspace parameters, when left out (or passed as None), + fall back to the globally-available cassio settings if any are available. + In other words, if a previously-run 'cassio.init(...)' has been + executed previously anywhere in the code, Cassandra-based objects + need not specify the connection parameters at all. """ def __init__( @@ -1061,25 +1093,21 @@ def __init__( skip_provisioning: bool = False, setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC, ): - """ - Initialize with a ready session and a keyspace name. - Args: - session (cassandra.cluster.Session): an open Cassandra session - keyspace (str): the keyspace to use for storing the cache - table_name (str): name of the Cassandra table to use as cache - ttl_seconds (optional int): time-to-live for cache entries - (default: None, i.e. forever) - """ if skip_provisioning: warn_deprecated( - "0.0.33", alternative="Use setup_mode=CassandraSetupMode.OFF instead." + "0.0.33", + name="skip_provisioning", + alternative=( + "setup_mode=langchain_community.utilities.cassandra.SetupMode.OFF" + ), + pending=True, ) try: from cassio.table import ElasticCassandraTable except (ImportError, ModuleNotFoundError): raise ImportError( "Could not import cassio python package. " - "Please install it with `pip install cassio`." + "Please install it with `pip install -U cassio`." ) self.session = session @@ -1170,6 +1198,7 @@ async def aclear(self, **kwargs: Any) -> None: await self.kv_cache.aclear() +# This constant is in fact a similarity - the 'distance' name is kept for compatibility: CASSANDRA_SEMANTIC_CACHE_DEFAULT_DISTANCE_METRIC = "dot" CASSANDRA_SEMANTIC_CACHE_DEFAULT_SCORE_THRESHOLD = 0.85 CASSANDRA_SEMANTIC_CACHE_DEFAULT_TABLE_NAME = "langchain_llm_semantic_cache" @@ -1182,60 +1211,117 @@ class CassandraSemanticCache(BaseCache): Cache that uses Cassandra as a vector-store backend for semantic (i.e. similarity-based) lookup. + Example: + + .. code-block:: python + + import cassio + + from langchain_community.cache import CassandraSemanticCache + from langchain_core.globals import set_llm_cache + + cassio.init(auto=True) # Requires env. variables, see CassIO docs + + my_embedding = ... + + set_llm_cache(CassandraSemanticCache( + embedding=my_embedding, + table_name="my_semantic_cache", + )) + It uses a single (vector) Cassandra table and stores, in principle, cached values from several LLMs, so the LLM's llm_string is part of the rows' primary keys. - The similarity is based on one of several distance metrics (default: "dot"). - If choosing another metric, the default threshold is to be re-tuned accordingly. + One can choose a similarity measure (default: "dot" for dot-product). + Choosing another one ("cos", "l2") almost certainly requires threshold tuning. + (which may be in order nevertheless, even if sticking to "dot"). + + Args: + session: an open Cassandra session. + Leave unspecified to use the global cassio init (see below) + keyspace: the keyspace to use for storing the cache. + Leave unspecified to use the global cassio init (see below) + embedding: Embedding provider for semantic + encoding and search. + table_name: name of the Cassandra (vector) table + to use as cache. There is a default for "simple" usage, but + remember to explicitly specify different tables if several embedding + models coexist in your app (they cannot share one cache table). + distance_metric: an alias for the 'similarity_measure' parameter (see below). + As the "distance" terminology is misleading, please prefer + 'similarity_measure' for clarity. + score_threshold: numeric value to use as + cutoff for the similarity searches + ttl_seconds: time-to-live for cache entries + (default: None, i.e. forever) + similarity_measure: which measure to adopt for similarity searches. + Note: this parameter is aliased by 'distance_metric' - however, + it is suggested to use the "similarity" terminology since this value + is in fact a similarity (i.e. higher means closer). + Note that at most one of the two parameters 'distance_metric' + and 'similarity_measure' can be provided. + setup_mode: a value in langchain_community.utilities.cassandra.SetupMode. + Choose between SYNC, ASYNC and OFF - the latter if the Cassandra + table is guaranteed to exist already, for a faster initialization. + + Note: + The session and keyspace parameters, when left out (or passed as None), + fall back to the globally-available cassio settings if any are available. + In other words, if a previously-run 'cassio.init(...)' has been + executed previously anywhere in the code, Cassandra-based objects + need not specify the connection parameters at all. """ def __init__( self, - session: Optional[CassandraSession], - keyspace: Optional[str], - embedding: Embeddings, + session: Optional[CassandraSession] = None, + keyspace: Optional[str] = None, + embedding: Optional[Embeddings] = None, table_name: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TABLE_NAME, - distance_metric: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_DISTANCE_METRIC, + distance_metric: Optional[str] = None, score_threshold: float = CASSANDRA_SEMANTIC_CACHE_DEFAULT_SCORE_THRESHOLD, ttl_seconds: Optional[int] = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TTL_SECONDS, skip_provisioning: bool = False, + similarity_measure: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_DISTANCE_METRIC, setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC, ): - """ - Initialize the cache with all relevant parameters. - Args: - session (cassandra.cluster.Session): an open Cassandra session - keyspace (str): the keyspace to use for storing the cache - embedding (Embedding): Embedding provider for semantic - encoding and search. - table_name (str): name of the Cassandra (vector) table - to use as cache - distance_metric (str, 'dot'): which measure to adopt for - similarity searches - score_threshold (optional float): numeric value to use as - cutoff for the similarity searches - ttl_seconds (optional int): time-to-live for cache entries - (default: None, i.e. forever) - The default score threshold is tuned to the default metric. - Tune it carefully yourself if switching to another distance metric. - """ if skip_provisioning: warn_deprecated( - "0.0.33", alternative="Use setup_mode=CassandraSetupMode.OFF instead." + "0.0.33", + name="skip_provisioning", + alternative=( + "setup_mode=langchain_community.utilities.cassandra.SetupMode.OFF" + ), + pending=True, ) try: from cassio.table import MetadataVectorCassandraTable except (ImportError, ModuleNotFoundError): raise ImportError( "Could not import cassio python package. " - "Please install it with `pip install cassio`." + "Please install it with `pip install -U cassio`." ) + + if not embedding: + raise ValueError("Missing required parameter 'embedding'.") + + # detect if legacy 'distance_metric' parameter used + if distance_metric is not None: + # if passed, takes precedence over 'similarity_measure', but we warn: + warn_deprecated( + "0.0.33", + name="distance_metric", + alternative="similarity_measure", + pending=True, + ) + similarity_measure = distance_metric + self.session = session self.keyspace = keyspace self.embedding = embedding self.table_name = table_name - self.distance_metric = distance_metric + self.similarity_measure = similarity_measure self.score_threshold = score_threshold self.ttl_seconds = ttl_seconds @@ -1347,7 +1433,7 @@ def lookup_with_id( vector=prompt_embedding, metadata={"_llm_string_hash": _hash(llm_string)}, n=1, - metric=self.distance_metric, + metric=self.similarity_measure, metric_threshold=self.score_threshold, ) ) @@ -1378,7 +1464,7 @@ async def alookup_with_id( vector=prompt_embedding, metadata={"_llm_string_hash": _hash(llm_string)}, n=1, - metric=self.distance_metric, + metric=self.similarity_measure, metric_threshold=self.score_threshold, ) ) diff --git a/libs/community/tests/integration_tests/cache/test_cassandra.py b/libs/community/tests/integration_tests/cache/test_cassandra.py index 44308db1bf58f..93032ccc1bf23 100644 --- a/libs/community/tests/integration_tests/cache/test_cassandra.py +++ b/libs/community/tests/integration_tests/cache/test_cassandra.py @@ -21,7 +21,7 @@ def cassandra_connection() -> Iterator[Tuple[Any, str]]: keyspace = "langchain_cache_test_keyspace" # get db connection if "CASSANDRA_CONTACT_POINTS" in os.environ: - contact_points = os.environ["CONTACT_POINTS"].split(",") + contact_points = os.environ["CASSANDRA_CONTACT_POINTS"].split(",") cluster = Cluster(contact_points) else: cluster = Cluster() diff --git a/templates/cassandra-synonym-caching/cassandra_synonym_caching/__init__.py b/templates/cassandra-synonym-caching/cassandra_synonym_caching/__init__.py index 04ca550915fc6..bff2421a98801 100644 --- a/templates/cassandra-synonym-caching/cassandra_synonym_caching/__init__.py +++ b/templates/cassandra-synonym-caching/cassandra_synonym_caching/__init__.py @@ -2,7 +2,7 @@ import cassio import langchain -from langchain.cache import CassandraCache +from langchain_community.cache import CassandraCache from langchain_community.chat_models import ChatOpenAI from langchain_core.messages import BaseMessage from langchain_core.prompts import ChatPromptTemplate