langchain-ai · ccurme · Dec 17, 2024 · Jul 19, 2024 · Jul 19, 2024 · Jul 19, 2024
diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb
@@ -1854,6 +1854,96 @@
     "llm.invoke(\"Tell me a joke\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "235ff73bf7143f13",
+   "metadata": {},
+   "source": [
+    "## Azure CosmosDB NoSql Semantic Cache\n",
+    "\n",
+    "You can use this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) for caching."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "41fea5aa7b2153ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Any, Dict\n",
+    "\n",
+    "from azure.cosmos import CosmosClient, PartitionKey\n",
+    "from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
+    "from langchain_openai import OpenAIEmbeddings\n",
+    "\n",
+    "HOST = \"COSMOS_DB_URI\"\n",
+    "KEY = \"COSMOS_DB_KEY\"\n",
+    "\n",
+    "cosmos_client = CosmosClient(HOST, KEY)\n",
+    "\n",
+    "\n",
+    "def get_vector_indexing_policy() -> dict:\n",
+    "    return {\n",
+    "        \"indexingMode\": \"consistent\",\n",
+    "        \"includedPaths\": [{\"path\": \"/*\"}],\n",
+    "        \"excludedPaths\": [{\"path\": '/\"_etag\"/?'}],\n",
+    "        \"vectorIndexes\": [{\"path\": \"/embedding\", \"type\": \"quantized_flat\"}],\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "def get_vector_embedding_policy() -> dict:\n",
+    "    return {\n",
+    "        \"vectorEmbeddings\": [\n",
+    "            {\n",
+    "                \"path\": \"/embedding\",\n",
+    "                \"dataType\": \"float32\",\n",
+    "                \"dimensions\": 1536,\n",
+    "                \"distanceFunction\": \"cosine\",\n",
+    "            }\n",
+    "        ]\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "cosmos_container_properties_test = {\"partition_key\": PartitionKey(path=\"/id\")}\n",
+    "cosmos_database_properties_test: Dict[str, Any] = {}\n",
+    "\n",
+    "set_llm_cache(\n",
+    "    AzureCosmosDBNoSqlSemanticCache(\n",
+    "        cosmos_client=cosmos_client,\n",
+    "        embedding=OpenAIEmbeddings(),\n",
+    "        vector_embedding_policy=get_vector_indexing_policy(),\n",
+    "        indexing_policy=get_vector_embedding_policy(),\n",
+    "        cosmos_container_properties=cosmos_container_properties_test,\n",
+    "        cosmos_database_properties=cosmos_database_properties_test,\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1e1cd93819921bf6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "# The first time, it is not yet in cache, so it should take longer\n",
+    "llm.invoke(\"Tell me a joke\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "576ce24c1244812a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "# The first time, it is not yet in cache, so it should take longer\n",
+    "llm.invoke(\"Tell me a joke\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "306ff47b",

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
@@ -80,7 +80,10 @@
 from langchain_community.utilities.astradb import (
     _AstraDBCollectionEnvironment,
 )
-from langchain_community.vectorstores import AzureCosmosDBVectorSearch
+from langchain_community.vectorstores import (
+    AzureCosmosDBNoSqlVectorSearch,
+    AzureCosmosDBVectorSearch,
+)
 from langchain_community.vectorstores import (
     OpenSearchVectorSearch as OpenSearchVectorStore,
 )
@@ -92,6 +95,7 @@
 if TYPE_CHECKING:
     import momento
     from astrapy.db import AstraDB, AsyncAstraDB
+    from azure.cosmos.cosmos_client import CosmosClient
     from cassandra.cluster import Session as CassandraSession
 
 
@@ -2102,7 +2106,7 @@ def __init__(
         ef_construction: int = 64,
         ef_search: int = 40,
         score_threshold: Optional[float] = None,
-        application_name: str = "LANGCHAIN_CACHING_PYTHON",
+        application_name: str = "LangChain-CDBNoSQL-SemanticCache-Python",
     ):
         """
         Args:
@@ -2267,14 +2271,115 @@ def clear(self, **kwargs: Any) -> None:
         index_name = self._index_name(kwargs["llm_string"])
         if index_name in self._cache_dict:
             self._cache_dict[index_name].get_collection().delete_many({})
-            # self._cache_dict[index_name].clear_collection()
 
     @staticmethod
     def _validate_enum_value(value: Any, enum_type: Type[Enum]) -> None:
         if not isinstance(value, enum_type):
             raise ValueError(f"Invalid enum value: {value}. Expected {enum_type}.")
 
 
+class AzureCosmosDBNoSqlSemanticCache(BaseCache):
+    """Cache that uses Cosmos DB NoSQL backend"""
+
+    def __init__(
+        self,
+        embedding: Embeddings,
+        cosmos_client: CosmosClient,
+        database_name: str = "CosmosNoSqlCacheDB",
+        container_name: str = "CosmosNoSqlCacheContainer",
+        *,
+        vector_embedding_policy: Dict[str, Any],
+        indexing_policy: Dict[str, Any],
+        cosmos_container_properties: Dict[str, Any],
+        cosmos_database_properties: Dict[str, Any],
+    ):
+        self.cosmos_client = cosmos_client
+        self.database_name = database_name
+        self.container_name = container_name
+        self.embedding = embedding
+        self.vector_embedding_policy = vector_embedding_policy
+        self.indexing_policy = indexing_policy
+        self.cosmos_container_properties = cosmos_container_properties
+        self.cosmos_database_properties = cosmos_database_properties
+        self._cache_dict: Dict[str, AzureCosmosDBNoSqlVectorSearch] = {}
+
+    def _cache_name(self, llm_string: str) -> str:
+        hashed_index = _hash(llm_string)
+        return f"cache:{hashed_index}"
+
+    def _get_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
+        cache_name = self._cache_name(llm_string)
+
+        # return vectorstore client for the specific llm string
+        if cache_name in self._cache_dict:
+            return self._cache_dict[cache_name]
+
+        # create new vectorstore client to create the cache
+        if self.cosmos_client:
+            self._cache_dict[cache_name] = AzureCosmosDBNoSqlVectorSearch(
+                cosmos_client=self.cosmos_client,
+                embedding=self.embedding,
+                vector_embedding_policy=self.vector_embedding_policy,
+                indexing_policy=self.indexing_policy,
+                cosmos_container_properties=self.cosmos_container_properties,
+                cosmos_database_properties=self.cosmos_database_properties,
+                database_name=self.database_name,
+                container_name=self.container_name,
+            )
+
+        return self._cache_dict[cache_name]
+
+    def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
+        """Look up based on prompt."""
+        llm_cache = self._get_llm_cache(llm_string)
+        generations: List = []
+        # Read from a Hash
+        results = llm_cache.similarity_search(
+            query=prompt,
+            k=1,
+        )
+        if results:
+            for document in results:
+                try:
+                    generations.extend(loads(document.metadata["return_val"]))
+                except Exception:
+                    logger.warning(
+                        "Retrieving a cache value that could not be deserialized "
+                        "properly. This is likely due to the cache being in an "
+                        "older format. Please recreate your cache to avoid this "
+                        "error."
+                    )
+
+                    generations.extend(
+                        _load_generations_from_json(document.metadata["return_val"])
+                    )
+        return generations if generations else None
+
+    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
+        """Update cache based on prompt and llm_string."""
+        for gen in return_val:
+            if not isinstance(gen, Generation):
+                raise ValueError(
+                    "CosmosDBNoSqlSemanticCache only supports caching of "
+                    f"normal LLM generations, got {type(gen)}"
+                )
+        llm_cache = self._get_llm_cache(llm_string)
+        metadata = {
+            "llm_string": llm_string,
+            "prompt": prompt,
+            "return_val": dumps([g for g in return_val]),
+        }
+        llm_cache.add_texts(texts=[prompt], metadatas=[metadata])
+
+    def clear(self, **kwargs: Any) -> None:
+        """Clear semantic cache for a given llm_string."""
+        cache_name = self._cache_name(llm_string=kwargs["llm-string"])
+        if cache_name in self._cache_dict:
+            container = self._cache_dict["cache_name"].get_container()
+            for item in container.read_all_items():
+                container.delete_item(item)
+
+
 class OpenSearchSemanticCache(BaseCache):
     """Cache that uses OpenSearch vector store backend"""
 

diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db.py
@@ -80,7 +80,7 @@ def __init__(
         index_name: str = "vectorSearchIndex",
         text_key: str = "textContent",
         embedding_key: str = "vectorContent",
-        application_name: str = "LANGCHAIN_PYTHON",
+        application_name: str = "LangChain-CDBMongoVCore-VectorStore-Python",
     ):
         """Constructor for AzureCosmosDBVectorSearch
 
@@ -119,7 +119,7 @@ def from_connection_string(
         connection_string: str,
         namespace: str,
         embedding: Embeddings,
-        application_name: str = "LANGCHAIN_PYTHON",
+        application_name: str = "LangChain-CDBMongoVCore-VectorStore-Python",
         **kwargs: Any,
     ) -> AzureCosmosDBVectorSearch:
         """Creates an Instance of AzureCosmosDBVectorSearch
@@ -129,6 +129,7 @@ def from_connection_string(
             connection_string: The MongoDB vCore instance connection string
             namespace: The namespace (database.collection)
             embedding: The embedding utility
+            application_name:
             **kwargs: Dynamic keyword arguments
 
         Returns: