From d3295bdf8945624ff5e24f880bb0514dd03c12ba Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Wed, 11 Dec 2024 09:21:49 +0100 Subject: [PATCH] Update docstring and type of fuzziness --- .../retrievers/opensearch/bm25_retriever.py | 14 ++++++++++---- .../document_stores/opensearch/document_store.py | 12 +++++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py index 4a8478e2c..69288a5cf 100644 --- a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py +++ b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py @@ -27,7 +27,7 @@ def __init__( *, document_store: OpenSearchDocumentStore, filters: Optional[Dict[str, Any]] = None, - fuzziness: str = "AUTO", + fuzziness: Union[int, str] = "AUTO", top_k: int = 10, scale_score: bool = False, all_terms_must_match: bool = False, @@ -40,8 +40,14 @@ def __init__( :param document_store: An instance of OpenSearchDocumentStore to use with the Retriever. :param filters: Filters to narrow down the search for documents in the Document Store. - :param fuzziness: Fuzziness parameter for full-text queries to apply approximate string matching. - For more information, see [OpenSearch fuzzy query](https://opensearch.org/docs/latest/query-dsl/term/fuzzy/). + :param fuzziness: Determines how approximate string matching is applied in full-text queries. + This parameter sets the number of character edits (insertions, deletions, or substitutions) + required to transform one word into another. For example, the "fuzziness" between the words + "wined" and "wind" is 1 because only one edit is needed to match them. + + Use "AUTO" (the default) for automatic adjustment based on term length, which is optimal for + most scenarios. For detailed guidance, refer to the + [OpenSearch fuzzy query documentation](https://opensearch.org/docs/latest/query-dsl/term/fuzzy/). :param top_k: Maximum number of documents to return. :param scale_score: If `True`, scales the score of retrieved documents to a range between 0 and 1. This is useful when comparing documents across different indexes. @@ -153,7 +159,7 @@ def run( filters: Optional[Dict[str, Any]] = None, all_terms_must_match: Optional[bool] = None, top_k: Optional[int] = None, - fuzziness: Optional[str] = None, + fuzziness: Optional[Union[int, str]] = None, scale_score: Optional[bool] = None, custom_query: Optional[Dict[str, Any]] = None, ): diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py index 4ec2420b3..6cb5295f0 100644 --- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py +++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py @@ -340,7 +340,7 @@ def _bm25_retrieval( query: str, *, filters: Optional[Dict[str, Any]] = None, - fuzziness: str = "AUTO", + fuzziness: Union[int, str] = "AUTO", top_k: int = 10, scale_score: bool = False, all_terms_must_match: bool = False, @@ -357,8 +357,14 @@ def _bm25_retrieval( :param query: String to search in saved Documents' text. :param filters: Optional filters to narrow down the search space. - :param fuzziness: Fuzziness parameter passed to OpenSearch, defaults to "AUTO". see the official documentation - for valid [fuzziness values](https://www.elastic.co/guide/en/OpenSearch/reference/current/common-options.html#fuzziness) + :param fuzziness: Determines how approximate string matching is applied in full-text queries. + This parameter sets the number of character edits (insertions, deletions, or substitutions) + required to transform one word into another. For example, the "fuzziness" between the words + "wined" and "wind" is 1 because only one edit is needed to match them. + + Use "AUTO" (the default) for automatic adjustment based on term length, which is optimal for + most scenarios. For detailed guidance, refer to the + [OpenSearch fuzzy query documentation](https://opensearch.org/docs/latest/query-dsl/term/fuzzy/). :param top_k: Maximum number of Documents to return, defaults to 10 :param scale_score: If `True` scales the Document`s scores between 0 and 1, defaults to False :param all_terms_must_match: If `True` all terms in `query` must be present in the Document, defaults to False