From b3570453b0f5b25f7c427e3f82a79d768e1f1000 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Mon, 13 Nov 2023 12:05:13 +0100 Subject: [PATCH 1/3] set scale_score default to False --- .../src/elasticsearch_haystack/bm25_retriever.py | 2 +- .../src/elasticsearch_haystack/document_store.py | 6 +++--- document_stores/elasticsearch/tests/test_bm25_retriever.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/document_stores/elasticsearch/src/elasticsearch_haystack/bm25_retriever.py b/document_stores/elasticsearch/src/elasticsearch_haystack/bm25_retriever.py index f48efcf3f..017860a9a 100644 --- a/document_stores/elasticsearch/src/elasticsearch_haystack/bm25_retriever.py +++ b/document_stores/elasticsearch/src/elasticsearch_haystack/bm25_retriever.py @@ -18,7 +18,7 @@ def __init__( filters: Optional[Dict[str, Any]] = None, fuzziness: str = "AUTO", top_k: int = 10, - scale_score: bool = True, + scale_score: bool = False, ): if not isinstance(document_store, ElasticsearchDocumentStore): msg = "document_store must be an instance of ElasticsearchDocumentStore" diff --git a/document_stores/elasticsearch/src/elasticsearch_haystack/document_store.py b/document_stores/elasticsearch/src/elasticsearch_haystack/document_store.py index b428b19e8..740b54180 100644 --- a/document_stores/elasticsearch/src/elasticsearch_haystack/document_store.py +++ b/document_stores/elasticsearch/src/elasticsearch_haystack/document_store.py @@ -22,7 +22,7 @@ Hosts = Union[str, List[Union[str, Mapping[str, Union[str, int]], NodeConfig]]] # document scores are essentially unbounded and will be scaled to values between 0 and 1 if scale_score is set to -# True (default). Scaling uses the expit function (inverse of the logit function) after applying a scaling factor +# True. Scaling uses the expit function (inverse of the logit function) after applying a scaling factor # (e.g., BM25_SCALING_FACTOR for the bm25_retrieval method). # Larger scaling factor decreases scaled scores. For example, an input of 10 is scaled to 0.99 with # BM25_SCALING_FACTOR=2 but to 0.78 with BM25_SCALING_FACTOR=8 (default). The defaults were chosen empirically. @@ -248,7 +248,7 @@ def _bm25_retrieval( filters: Optional[Dict[str, Any]] = None, fuzziness: str = "AUTO", top_k: int = 10, - scale_score: bool = True, + scale_score: bool = False, ) -> List[Document]: """ Elasticsearch by defaults uses BM25 search algorithm. @@ -268,7 +268,7 @@ def _bm25_retrieval( see the official documentation for valid values: https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness :param top_k: Maximum number of Documents to return, defaults to 10 - :param scale_score: If `True` scales the Document`s scores between 0 and 1, defaults to True + :param scale_score: If `True` scales the Document`s scores between 0 and 1, defaults to False :raises ValueError: If `query` is an empty string :return: List of Document that match `query` """ diff --git a/document_stores/elasticsearch/tests/test_bm25_retriever.py b/document_stores/elasticsearch/tests/test_bm25_retriever.py index b9914e69c..d07bb0709 100644 --- a/document_stores/elasticsearch/tests/test_bm25_retriever.py +++ b/document_stores/elasticsearch/tests/test_bm25_retriever.py @@ -15,7 +15,7 @@ def test_init_default(): assert retriever._document_store == mock_store assert retriever._filters == {} assert retriever._top_k == 10 - assert retriever._scale_score + assert retriever._scale_score is False @patch("elasticsearch_haystack.document_store.Elasticsearch") @@ -33,7 +33,7 @@ def test_to_dict(_mock_elasticsearch_client): "filters": {}, "fuzziness": "AUTO", "top_k": 10, - "scale_score": True, + "scale_score": False, }, } @@ -71,7 +71,7 @@ def test_run(): filters={}, fuzziness="AUTO", top_k=10, - scale_score=True, + scale_score=False, ) assert len(res) == 1 assert len(res["documents"]) == 1 From 8b47688c794ecbc7818e0dc2b58e57caef039388 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Mon, 13 Nov 2023 12:11:25 +0100 Subject: [PATCH 2/3] unrelated: replace text w content --- document_stores/elasticsearch/tests/test_document_store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/document_stores/elasticsearch/tests/test_document_store.py b/document_stores/elasticsearch/tests/test_document_store.py index 64c154687..130da8340 100644 --- a/document_stores/elasticsearch/tests/test_document_store.py +++ b/document_stores/elasticsearch/tests/test_document_store.py @@ -132,7 +132,7 @@ def test_delete_not_empty(self, docstore: ElasticsearchDocumentStore): `DocumentStoreBaseTests` declares this test but we override it since we want `delete_documents` to be idempotent. """ - doc = Document(text="test doc") + doc = Document(content="test doc") docstore.write_documents([doc]) docstore.delete_documents([doc.id]) @@ -154,7 +154,7 @@ def test_delete_not_empty_nonexisting(self, docstore: ElasticsearchDocumentStore `DocumentStoreBaseTests` declares this test but we override it since we want `delete_documents` to be idempotent. """ - doc = Document(text="test doc") + doc = Document(content="test doc") docstore.write_documents([doc]) docstore.delete_documents(["non_existing"]) From f3b20f1533341876df0add4d03a178ea8b1ae3ca Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com> Date: Tue, 14 Nov 2023 10:37:06 +0100 Subject: [PATCH 3/3] Update document_stores/elasticsearch/tests/test_bm25_retriever.py Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> --- document_stores/elasticsearch/tests/test_bm25_retriever.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/document_stores/elasticsearch/tests/test_bm25_retriever.py b/document_stores/elasticsearch/tests/test_bm25_retriever.py index d07bb0709..86c5aac3a 100644 --- a/document_stores/elasticsearch/tests/test_bm25_retriever.py +++ b/document_stores/elasticsearch/tests/test_bm25_retriever.py @@ -15,7 +15,7 @@ def test_init_default(): assert retriever._document_store == mock_store assert retriever._filters == {} assert retriever._top_k == 10 - assert retriever._scale_score is False + assert not retriever._scale_score @patch("elasticsearch_haystack.document_store.Elasticsearch")