add as run param

deepset-ai · Jun 21, 2024 · d4c198a · d4c198a
1 parent 2489722
commit d4c198a
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 3 deletions.
diff --git a/...s/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py b/...s/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py
@@ -113,6 +113,7 @@ def run(
         top_k: Optional[int] = None,
         fuzziness: Optional[str] = None,
         scale_score: Optional[bool] = None,
+        custom_query: Optional[str] = None,
     ):
         """
         Retrieve documents using BM25 retrieval.
@@ -124,6 +125,31 @@ def run(
         :param fuzziness: Fuzziness parameter for full-text queries.
         :param scale_score: Whether to scale the score of retrieved documents between 0 and 1.
             This is useful when comparing documents across different indexes.
+        :param custom_query: The query string containing a mandatory `${query}` and an optional `${filters}` placeholder
+
+            **An example custom_query:**
+
+            ```python
+            {
+                "query": {
+                    "bool": {
+                        "should": [{"multi_match": {
+                            "query": ${query},                 // mandatory query placeholder
+                            "type": "most_fields",
+                            "fields": ["content", "title"]}}],
+                        "filter": ${filters}                  // optional filter placeholder
+                    }
+                }
+            }
+            ```
+
+        **For this custom_query, a sample `run()` could be:**
+
+        ```python
+        retriever.run(query="Why did the revenue increase?",
+                        filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
+        ```
+
 
         :returns:
             A dictionary containing the retrieved documents with the following structure:
@@ -140,6 +166,8 @@ def run(
             fuzziness = self._fuzziness
         if scale_score is None:
             scale_score = self._scale_score
+        if custom_query is None:
+            custom_query = self._custom_query
 
         docs = self._document_store._bm25_retrieval(
             query=query,
@@ -148,6 +176,6 @@ def run(
             top_k=top_k,
             scale_score=scale_score,
             all_terms_must_match=all_terms_must_match,
-            custom_query=self._custom_query,
+            custom_query=custom_query,
         )
         return {"documents": docs}
diff --git a/...nsearch/src/haystack_integrations/components/retrievers/opensearch/embedding_retriever.py b/...nsearch/src/haystack_integrations/components/retrievers/opensearch/embedding_retriever.py
@@ -105,13 +105,45 @@ def from_dict(cls, data: Dict[str, Any]) -> "OpenSearchEmbeddingRetriever":
         return default_from_dict(cls, data)
 
     @component.output_types(documents=List[Document])
-    def run(self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
+    def run(self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None, custom_query: Optional[str] = None,):
         """
         Retrieve documents using a vector similarity metric.
 
         :param query_embedding: Embedding of the query.
         :param filters: Optional filters to narrow down the search space.
         :param top_k: Maximum number of Documents to return.
+        :param custom_query: The query string containing a mandatory `${query_embedding}` and an optional `${filters}`
+            placeholder
+
+            **An example custom_query:**
+
+            ```python
+            {
+                "query": {
+                    "bool": {
+                        "must": [
+                            {
+                                "knn": {
+                                    "embedding": {
+                                        "vector": ${query_embedding},   // mandatory query placeholder
+                                        "k": 10000,
+                                    }
+                                }
+                            }
+                        ],
+                        "filter": ${filters}                            // optional filter placeholder
+                    }
+                }
+            }
+            ```
+
+        **For this custom_query, a sample `run()` could be:**
+
+        ```python
+        retriever.run(query_embedding=embedding,
+                        filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
+        ```
+
         :returns:
             Dictionary with key "documents" containing the retrieved Documents.
             - documents: List of Document similar to `query_embedding`.
@@ -120,11 +152,13 @@ def run(self, query_embedding: List[float], filters: Optional[Dict[str, Any]] =
             filters = self._filters
         if top_k is None:
             top_k = self._top_k
+        if custom_query is None:
+            custom_query = self._custom_query
 
         docs = self._document_store._embedding_retrieval(
             query_embedding=query_embedding,
             filters=filters,
             top_k=top_k,
-            custom_query=self._custom_query,
+            custom_query=custom_query,
         )
         return {"documents": docs}