diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb index 4ba1901613ac2..5356f90a7537c 100644 --- a/docs/docs/integrations/llm_caching.ipynb +++ b/docs/docs/integrations/llm_caching.ipynb @@ -14,8 +14,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "88486f6f", + "execution_count": null, + "id": "f938e881", "metadata": {}, "outputs": [], "source": [ @@ -30,12 +30,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "10ad9224", "metadata": { "ExecuteTime": { - "end_time": "2024-04-12T02:05:57.319706Z", - "start_time": "2024-04-12T02:05:57.303868Z" + "end_time": "2024-12-06T00:54:06.474593Z", + "start_time": "2024-12-06T00:53:58.727138Z" } }, "outputs": [], @@ -1820,7 +1820,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": null, "id": "bc1570a2a77b58c8", "metadata": { "ExecuteTime": { @@ -1848,12 +1848,155 @@ "output_type": "execute_result" } ], + "source": [ + "%%time\n", + "# The second time it is, so it goes faster\n", + "llm.invoke(\"Tell me a joke\")" + ] + }, + { + "cell_type": "markdown", + "id": "235ff73bf7143f13", + "metadata": {}, + "source": [ + "## Azure CosmosDB NoSql Semantic Cache\n", + "\n", + "You can use this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) for caching." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41fea5aa7b2153ca", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-06T00:55:38.648972Z", + "start_time": "2024-12-06T00:55:38.290541Z" + } + }, + "outputs": [], + "source": [ + "from typing import Any, Dict\n", + "\n", + "from azure.cosmos import CosmosClient, PartitionKey\n", + "from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n", + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "HOST = \"COSMOS_DB_URI\"\n", + "KEY = \"COSMOS_DB_KEY\"\n", + "\n", + "cosmos_client = CosmosClient(HOST, KEY)\n", + "\n", + "\n", + "def get_vector_indexing_policy() -> dict:\n", + " return {\n", + " \"indexingMode\": \"consistent\",\n", + " \"includedPaths\": [{\"path\": \"/*\"}],\n", + " \"excludedPaths\": [{\"path\": '/\"_etag\"/?'}],\n", + " \"vectorIndexes\": [{\"path\": \"/embedding\", \"type\": \"diskANN\"}],\n", + " }\n", + "\n", + "\n", + "def get_vector_embedding_policy() -> dict:\n", + " return {\n", + " \"vectorEmbeddings\": [\n", + " {\n", + " \"path\": \"/embedding\",\n", + " \"dataType\": \"float32\",\n", + " \"dimensions\": 1536,\n", + " \"distanceFunction\": \"cosine\",\n", + " }\n", + " ]\n", + " }\n", + "\n", + "\n", + "cosmos_container_properties_test = {\"partition_key\": PartitionKey(path=\"/id\")}\n", + "cosmos_database_properties_test: Dict[str, Any] = {}\n", + "\n", + "set_llm_cache(\n", + " AzureCosmosDBNoSqlSemanticCache(\n", + " cosmos_client=cosmos_client,\n", + " embedding=OpenAIEmbeddings(),\n", + " vector_embedding_policy=get_vector_embedding_policy(),\n", + " indexing_policy=get_vector_indexing_policy(),\n", + " cosmos_container_properties=cosmos_container_properties_test,\n", + " cosmos_database_properties=cosmos_database_properties_test,\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1e1cd93819921bf6", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-06T00:55:44.513080Z", + "start_time": "2024-12-06T00:55:41.353843Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 374 ms, sys: 34.2 ms, total: 408 ms\n", + "Wall time: 3.15 s\n" + ] + }, + { + "data": { + "text/plain": [ + "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was two-tired!\"" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "%%time\n", "# The first time, it is not yet in cache, so it should take longer\n", "llm.invoke(\"Tell me a joke\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "576ce24c1244812a", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-06T00:55:50.925865Z", + "start_time": "2024-12-06T00:55:50.548520Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 17.7 ms, sys: 2.88 ms, total: 20.6 ms\n", + "Wall time: 373 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was two-tired!\"" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The second time it is, so it goes faster\n", + "llm.invoke(\"Tell me a joke\")" + ] + }, { "cell_type": "markdown", "id": "306ff47b", diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py index 697c26ed8725f..11a704224e506 100644 --- a/libs/community/langchain_community/cache.py +++ b/libs/community/langchain_community/cache.py @@ -80,7 +80,10 @@ from langchain_community.utilities.astradb import ( _AstraDBCollectionEnvironment, ) -from langchain_community.vectorstores import AzureCosmosDBVectorSearch +from langchain_community.vectorstores import ( + AzureCosmosDBNoSqlVectorSearch, + AzureCosmosDBVectorSearch, +) from langchain_community.vectorstores import ( OpenSearchVectorSearch as OpenSearchVectorStore, ) @@ -93,6 +96,7 @@ import momento import pymemcache from astrapy.db import AstraDB, AsyncAstraDB + from azure.cosmos.cosmos_client import CosmosClient from cassandra.cluster import Session as CassandraSession @@ -2103,7 +2107,7 @@ def __init__( ef_construction: int = 64, ef_search: int = 40, score_threshold: Optional[float] = None, - application_name: str = "LANGCHAIN_CACHING_PYTHON", + application_name: str = "LangChain-CDBMongoVCore-SemanticCache-Python", ): """ Args: @@ -2268,7 +2272,6 @@ def clear(self, **kwargs: Any) -> None: index_name = self._index_name(kwargs["llm_string"]) if index_name in self._cache_dict: self._cache_dict[index_name].get_collection().delete_many({}) - # self._cache_dict[index_name].clear_collection() @staticmethod def _validate_enum_value(value: Any, enum_type: Type[Enum]) -> None: @@ -2276,6 +2279,111 @@ def _validate_enum_value(value: Any, enum_type: Type[Enum]) -> None: raise ValueError(f"Invalid enum value: {value}. Expected {enum_type}.") +class AzureCosmosDBNoSqlSemanticCache(BaseCache): + """Cache that uses Cosmos DB NoSQL backend""" + + def __init__( + self, + embedding: Embeddings, + cosmos_client: CosmosClient, + database_name: str = "CosmosNoSqlCacheDB", + container_name: str = "CosmosNoSqlCacheContainer", + *, + vector_embedding_policy: Dict[str, Any], + indexing_policy: Dict[str, Any], + cosmos_container_properties: Dict[str, Any], + cosmos_database_properties: Dict[str, Any], + create_container: bool = True, + ): + self.cosmos_client = cosmos_client + self.database_name = database_name + self.container_name = container_name + self.embedding = embedding + self.vector_embedding_policy = vector_embedding_policy + self.indexing_policy = indexing_policy + self.cosmos_container_properties = cosmos_container_properties + self.cosmos_database_properties = cosmos_database_properties + self.create_container = create_container + self._cache_dict: Dict[str, AzureCosmosDBNoSqlVectorSearch] = {} + + def _cache_name(self, llm_string: str) -> str: + hashed_index = _hash(llm_string) + return f"cache:{hashed_index}" + + def _get_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch: + cache_name = self._cache_name(llm_string) + + # return vectorstore client for the specific llm string + if cache_name in self._cache_dict: + return self._cache_dict[cache_name] + + # create new vectorstore client to create the cache + if self.cosmos_client: + self._cache_dict[cache_name] = AzureCosmosDBNoSqlVectorSearch( + cosmos_client=self.cosmos_client, + embedding=self.embedding, + vector_embedding_policy=self.vector_embedding_policy, + indexing_policy=self.indexing_policy, + cosmos_container_properties=self.cosmos_container_properties, + cosmos_database_properties=self.cosmos_database_properties, + database_name=self.database_name, + container_name=self.container_name, + create_container=self.create_container, + ) + + return self._cache_dict[cache_name] + + def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: + """Look up based on prompt.""" + llm_cache = self._get_llm_cache(llm_string) + generations: List = [] + # Read from a Hash + results = llm_cache.similarity_search( + query=prompt, + k=1, + ) + if results: + for document in results: + try: + generations.extend(loads(document.metadata["return_val"])) + except Exception: + logger.warning( + "Retrieving a cache value that could not be deserialized " + "properly. This is likely due to the cache being in an " + "older format. Please recreate your cache to avoid this " + "error." + ) + + generations.extend( + _load_generations_from_json(document.metadata["return_val"]) + ) + return generations if generations else None + + def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None: + """Update cache based on prompt and llm_string.""" + for gen in return_val: + if not isinstance(gen, Generation): + raise ValueError( + "CosmosDBNoSqlSemanticCache only supports caching of " + f"normal LLM generations, got {type(gen)}" + ) + llm_cache = self._get_llm_cache(llm_string) + metadata = { + "llm_string": llm_string, + "prompt": prompt, + "return_val": dumps([g for g in return_val]), + } + llm_cache.add_texts(texts=[prompt], metadatas=[metadata]) + + def clear(self, **kwargs: Any) -> None: + """Clear semantic cache for a given llm_string.""" + cache_name = self._cache_name(llm_string=kwargs["llm-string"]) + if cache_name in self._cache_dict: + container = self._cache_dict["cache_name"].get_container() + for item in container.read_all_items(): + container.delete_item(item) + + class OpenSearchSemanticCache(BaseCache): """Cache that uses OpenSearch vector store backend""" diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db.py index b96ec2055cc98..e91f6c210a315 100644 --- a/libs/community/langchain_community/vectorstores/azure_cosmos_db.py +++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db.py @@ -82,7 +82,7 @@ def __init__( index_name: str = "vectorSearchIndex", text_key: str = "textContent", embedding_key: str = "vectorContent", - application_name: str = "LANGCHAIN_PYTHON", + application_name: str = "LangChain-CDBMongoVCore-VectorStore-Python", ): """Constructor for AzureCosmosDBVectorSearch @@ -121,7 +121,7 @@ def from_connection_string( connection_string: str, namespace: str, embedding: Embeddings, - application_name: str = "LANGCHAIN_PYTHON", + application_name: str = "LangChain-CDBMongoVCore-VectorStore-Python", **kwargs: Any, ) -> AzureCosmosDBVectorSearch: """Creates an Instance of AzureCosmosDBVectorSearch diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py index 48110a182a982..8d0d90dd92e9c 100644 --- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py +++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py @@ -14,7 +14,7 @@ from langchain_community.vectorstores.utils import maximal_marginal_relevance if TYPE_CHECKING: - from azure.cosmos import CosmosClient + from azure.cosmos import ContainerProxy, CosmosClient from azure.identity import DefaultAzureCredential USER_AGENT = ("LangChain-CDBNoSql-VectorStore-Python",) @@ -859,3 +859,6 @@ def _where_clause_operator_map(self) -> Dict[str, str]: "$full_text_contains_any": "FullTextContainsAny", } return operator_map + + def get_container(self) -> ContainerProxy: + return self._container diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py new file mode 100644 index 0000000000000..343eb11d7c242 --- /dev/null +++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py @@ -0,0 +1,227 @@ +"""Test` Azure CosmosDB NoSql cache functionality.""" + +from typing import Any, Dict + +import pytest +from langchain.globals import get_llm_cache, set_llm_cache +from langchain_core.outputs import Generation + +from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings +from tests.unit_tests.llms.fake_llm import FakeLLM + +HOST = "COSMOS_DB_URI" +KEY = "COSMOS_DB_KEY" + + +@pytest.fixture() +def cosmos_client() -> Any: + from azure.cosmos import CosmosClient + + return CosmosClient(HOST, KEY) + + +@pytest.fixture() +def partition_key() -> Any: + from azure.cosmos import PartitionKey + + return PartitionKey(path="/id") + + +# cosine, euclidean, innerproduct +def indexing_policy(index_type: str) -> dict: + return { + "indexingMode": "consistent", + "includedPaths": [{"path": "/*"}], + "excludedPaths": [{"path": '/"_etag"/?'}], + "vectorIndexes": [{"path": "/embedding", "type": index_type}], + } + + +def vector_embedding_policy(distance_function: str) -> dict: + return { + "vectorEmbeddings": [ + { + "path": "/embedding", + "dataType": "float32", + "distanceFunction": distance_function, + "dimensions": 1536, + } + ] + } + + +cosmos_container_properties_test = {"partition_key": partition_key} +cosmos_database_properties_test: Dict[str, Any] = {} + + +def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat( + cosmos_client: Any, +) -> None: + set_llm_cache( + AzureCosmosDBNoSqlSemanticCache( + cosmos_client=cosmos_client, + embedding=FakeEmbeddings(), + vector_embedding_policy=vector_embedding_policy("cosine"), + indexing_policy=indexing_policy("quantizedFlat"), + cosmos_container_properties=cosmos_container_properties_test, + cosmos_database_properties=cosmos_database_properties_test, + ) + ) + + llm = FakeLLM() + params = llm.dict() + params["stop"] = None + llm_string = str(sorted([(k, v) for k, v in params.items()])) + get_llm_cache().update("foo", llm_string, [Generation(text="fizz")]) + + # foo and bar will have the same embedding produced by FakeEmbeddings + cache_output = get_llm_cache().lookup("bar", llm_string) + assert cache_output == [Generation(text="fizz")] + + # clear the cache + get_llm_cache().clear(llm_string=llm_string) + + +def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat( + cosmos_client: Any, +) -> None: + set_llm_cache( + AzureCosmosDBNoSqlSemanticCache( + cosmos_client=cosmos_client, + embedding=FakeEmbeddings(), + vector_embedding_policy=vector_embedding_policy("cosine"), + indexing_policy=indexing_policy("flat"), + cosmos_container_properties=cosmos_container_properties_test, + cosmos_database_properties=cosmos_database_properties_test, + ) + ) + + llm = FakeLLM() + params = llm.dict() + params["stop"] = None + llm_string = str(sorted([(k, v) for k, v in params.items()])) + get_llm_cache().update("foo", llm_string, [Generation(text="fizz")]) + + # foo and bar will have the same embedding produced by FakeEmbeddings + cache_output = get_llm_cache().lookup("bar", llm_string) + assert cache_output == [Generation(text="fizz")] + + # clear the cache + get_llm_cache().clear(llm_string=llm_string) + + +def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat( + cosmos_client: Any, +) -> None: + set_llm_cache( + AzureCosmosDBNoSqlSemanticCache( + cosmos_client=cosmos_client, + embedding=FakeEmbeddings(), + vector_embedding_policy=vector_embedding_policy("dotProduct"), + indexing_policy=indexing_policy("quantizedFlat"), + cosmos_container_properties=cosmos_container_properties_test, + cosmos_database_properties=cosmos_database_properties_test, + ) + ) + + llm = FakeLLM() + params = llm.dict() + params["stop"] = None + llm_string = str(sorted([(k, v) for k, v in params.items()])) + get_llm_cache().update( + "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")] + ) + + # foo and bar will have the same embedding produced by FakeEmbeddings + cache_output = get_llm_cache().lookup("bar", llm_string) + assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")] + + # clear the cache + get_llm_cache().clear(llm_string=llm_string) + + +def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat( + cosmos_client: Any, +) -> None: + set_llm_cache( + AzureCosmosDBNoSqlSemanticCache( + cosmos_client=cosmos_client, + embedding=FakeEmbeddings(), + vector_embedding_policy=vector_embedding_policy("dotProduct"), + indexing_policy=indexing_policy("flat"), + cosmos_container_properties=cosmos_container_properties_test, + cosmos_database_properties=cosmos_database_properties_test, + ) + ) + + llm = FakeLLM() + params = llm.dict() + params["stop"] = None + llm_string = str(sorted([(k, v) for k, v in params.items()])) + get_llm_cache().update( + "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")] + ) + + # foo and bar will have the same embedding produced by FakeEmbeddings + cache_output = get_llm_cache().lookup("bar", llm_string) + assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")] + + # clear the cache + get_llm_cache().clear(llm_string=llm_string) + + +def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat( + cosmos_client: Any, +) -> None: + set_llm_cache( + AzureCosmosDBNoSqlSemanticCache( + cosmos_client=cosmos_client, + embedding=FakeEmbeddings(), + vector_embedding_policy=vector_embedding_policy("euclidean"), + indexing_policy=indexing_policy("quantizedFlat"), + cosmos_container_properties=cosmos_container_properties_test, + cosmos_database_properties=cosmos_database_properties_test, + ) + ) + + llm = FakeLLM() + params = llm.dict() + params["stop"] = None + llm_string = str(sorted([(k, v) for k, v in params.items()])) + get_llm_cache().update("foo", llm_string, [Generation(text="fizz")]) + + # foo and bar will have the same embedding produced by FakeEmbeddings + cache_output = get_llm_cache().lookup("bar", llm_string) + assert cache_output == [Generation(text="fizz")] + + # clear the cache + get_llm_cache().clear(llm_string=llm_string) + + +def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat( + cosmos_client: Any, +) -> None: + set_llm_cache( + AzureCosmosDBNoSqlSemanticCache( + cosmos_client=cosmos_client, + embedding=FakeEmbeddings(), + vector_embedding_policy=vector_embedding_policy("euclidean"), + indexing_policy=indexing_policy("flat"), + cosmos_container_properties=cosmos_container_properties_test, + cosmos_database_properties=cosmos_database_properties_test, + ) + ) + + llm = FakeLLM() + params = llm.dict() + params["stop"] = None + llm_string = str(sorted([(k, v) for k, v in params.items()])) + get_llm_cache().update("foo", llm_string, [Generation(text="fizz")]) + + # foo and bar will have the same embedding produced by FakeEmbeddings + cache_output = get_llm_cache().lookup("bar", llm_string) + assert cache_output == [Generation(text="fizz")] + + # clear the cache + get_llm_cache().clear(llm_string=llm_string) diff --git a/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py b/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py index 63c0d0a17efa1..bbaca0775be7c 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py +++ b/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py @@ -45,14 +45,6 @@ def partition_key() -> Any: return PartitionKey(path="/id") -@pytest.fixture() -def azure_openai_embeddings() -> Any: - openai_embeddings: OpenAIEmbeddings = OpenAIEmbeddings( - deployment=model_deployment, model=model_name, chunk_size=1 - ) - return openai_embeddings - - def safe_delete_database(cosmos_client: Any) -> None: cosmos_client.delete_database(database_name) @@ -101,7 +93,7 @@ def test_from_documents_cosine_distance( store = AzureCosmosDBNoSqlVectorSearch.from_documents( documents, - azure_openai_embeddings, + embedding=azure_openai_embeddings, cosmos_client=cosmos_client, database_name=database_name, container_name=container_name, @@ -175,7 +167,7 @@ def test_from_documents_cosine_distance_with_filtering( store = AzureCosmosDBNoSqlVectorSearch.from_documents( documents, - azure_openai_embeddings, + embedding=azure_openai_embeddings, cosmos_client=cosmos_client, database_name=database_name, container_name=container_name, @@ -195,11 +187,6 @@ def test_from_documents_cosine_distance_with_filtering( assert "Border Collies" in output[0].page_content assert output[0].metadata["a"] == 1 - # pre_filter = { - # "conditions": [ - # {"property": "metadata.a", "operator": "$eq", "value": 1}, - # ], - # } pre_filter = PreFilter( conditions=[ Condition(property="metadata.a", operator="$eq", value=1), @@ -213,11 +200,6 @@ def test_from_documents_cosine_distance_with_filtering( assert "Border Collies" in output[0].page_content assert output[0].metadata["a"] == 1 - # pre_filter = { - # "conditions": [ - # {"property": "metadata.a", "operator": "$eq", "value": 1}, - # ], - # } pre_filter = PreFilter( conditions=[ Condition(property="metadata.a", operator="$eq", value=1), @@ -262,15 +244,6 @@ def test_from_documents_full_text_and_hybrid( sleep(480) # waits for Cosmos DB to save contents to the collection # Full text search contains any - # pre_filter = { - # "conditions": [ - # { - # "property": "text", - # "operator": "$full_text_contains_any", - # "value": "intelligent herders", - # }, - # ], - # } pre_filter = PreFilter( conditions=[ Condition( @@ -292,15 +265,6 @@ def test_from_documents_full_text_and_hybrid( assert "Border Collies" in output[0].page_content # Full text search contains all - # pre_filter = { - # "conditions": [ - # { - # "property": "text", - # "operator": "$full_text_contains_all", - # "value": "intelligent herders", - # }, - # ], - # } pre_filter = PreFilter( conditions=[ Condition( @@ -332,11 +296,6 @@ def test_from_documents_full_text_and_hybrid( assert "Standard Poodles" in output[0].page_content # Full text search BM25 ranking with filtering - # pre_filter = { - # "conditions": [ - # {"property": "metadata.a", "operator": "$eq", "value": 1}, - # ], - # } pre_filter = PreFilter( conditions=[ Condition(property="metadata.a", operator="$eq", value=1), @@ -363,11 +322,6 @@ def test_from_documents_full_text_and_hybrid( assert "Border Collies" in output[0].page_content # Hybrid search RRF ranking with filtering - # pre_filter = { - # "conditions": [ - # {"property": "metadata.a", "operator": "$eq", "value": 1}, - # ], - # } pre_filter = PreFilter( conditions=[ Condition(property="metadata.a", operator="$eq", value=1), @@ -385,16 +339,6 @@ def test_from_documents_full_text_and_hybrid( assert "Border Collies" in output[0].page_content # Full text search BM25 ranking with full text filtering - # pre_filter = { - # "conditions": [ - # { - # "property": "text", - # "operator": "$full_text_contains", - # "value": "energetic", - # }, - # ] - # } - pre_filter = PreFilter( conditions=[ Condition( @@ -414,17 +358,6 @@ def test_from_documents_full_text_and_hybrid( assert "Border Collies" in output[0].page_content # Full text search BM25 ranking with full text filtering - # pre_filter = { - # "conditions": [ - # { - # "property": "text", - # "operator": "$full_text_contains", - # "value": "energetic", - # }, - # {"property": "metadata.a", "operator": "$eq", "value": 2}, - # ], - # "logical_operator": "$and", - # } pre_filter = PreFilter( conditions=[ Condition(