Skip to content

Commit

Permalink
add as run param
Browse files Browse the repository at this point in the history
  • Loading branch information
tstadel committed Jun 21, 2024
1 parent 2489722 commit d4c198a
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def run(
top_k: Optional[int] = None,
fuzziness: Optional[str] = None,
scale_score: Optional[bool] = None,
custom_query: Optional[str] = None,
):
"""
Retrieve documents using BM25 retrieval.
Expand All @@ -124,6 +125,31 @@ def run(
:param fuzziness: Fuzziness parameter for full-text queries.
:param scale_score: Whether to scale the score of retrieved documents between 0 and 1.
This is useful when comparing documents across different indexes.
:param custom_query: The query string containing a mandatory `${query}` and an optional `${filters}` placeholder
**An example custom_query:**
```python
{
"query": {
"bool": {
"should": [{"multi_match": {
"query": ${query}, // mandatory query placeholder
"type": "most_fields",
"fields": ["content", "title"]}}],
"filter": ${filters} // optional filter placeholder
}
}
}
```
**For this custom_query, a sample `run()` could be:**
```python
retriever.run(query="Why did the revenue increase?",
filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
```
:returns:
A dictionary containing the retrieved documents with the following structure:
Expand All @@ -140,6 +166,8 @@ def run(
fuzziness = self._fuzziness
if scale_score is None:
scale_score = self._scale_score
if custom_query is None:
custom_query = self._custom_query

docs = self._document_store._bm25_retrieval(
query=query,
Expand All @@ -148,6 +176,6 @@ def run(
top_k=top_k,
scale_score=scale_score,
all_terms_must_match=all_terms_must_match,
custom_query=self._custom_query,
custom_query=custom_query,
)
return {"documents": docs}
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,45 @@ def from_dict(cls, data: Dict[str, Any]) -> "OpenSearchEmbeddingRetriever":
return default_from_dict(cls, data)

@component.output_types(documents=List[Document])
def run(self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
def run(self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None, custom_query: Optional[str] = None,):
"""
Retrieve documents using a vector similarity metric.
:param query_embedding: Embedding of the query.
:param filters: Optional filters to narrow down the search space.
:param top_k: Maximum number of Documents to return.
:param custom_query: The query string containing a mandatory `${query_embedding}` and an optional `${filters}`
placeholder
**An example custom_query:**
```python
{
"query": {
"bool": {
"must": [
{
"knn": {
"embedding": {
"vector": ${query_embedding}, // mandatory query placeholder
"k": 10000,
}
}
}
],
"filter": ${filters} // optional filter placeholder
}
}
}
```
**For this custom_query, a sample `run()` could be:**
```python
retriever.run(query_embedding=embedding,
filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
```
:returns:
Dictionary with key "documents" containing the retrieved Documents.
- documents: List of Document similar to `query_embedding`.
Expand All @@ -120,11 +152,13 @@ def run(self, query_embedding: List[float], filters: Optional[Dict[str, Any]] =
filters = self._filters
if top_k is None:
top_k = self._top_k
if custom_query is None:
custom_query = self._custom_query

docs = self._document_store._embedding_retrieval(
query_embedding=query_embedding,
filters=filters,
top_k=top_k,
custom_query=self._custom_query,
custom_query=custom_query,
)
return {"documents": docs}

0 comments on commit d4c198a

Please sign in to comment.