Skip to content

Commit

Permalink
Elastiscsearch BM25Retriever: set scale_score default value to False (
Browse files Browse the repository at this point in the history
#50)

* set scale_score default to False

* unrelated: replace text w content

* Update document_stores/elasticsearch/tests/test_bm25_retriever.py

Co-authored-by: Silvano Cerza <[email protected]>

---------

Co-authored-by: Silvano Cerza <[email protected]>
  • Loading branch information
anakin87 and silvanocerza authored Nov 14, 2023
1 parent 2d7dd0a commit 5d77ec4
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(
filters: Optional[Dict[str, Any]] = None,
fuzziness: str = "AUTO",
top_k: int = 10,
scale_score: bool = True,
scale_score: bool = False,
):
if not isinstance(document_store, ElasticsearchDocumentStore):
msg = "document_store must be an instance of ElasticsearchDocumentStore"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
Hosts = Union[str, List[Union[str, Mapping[str, Union[str, int]], NodeConfig]]]

# document scores are essentially unbounded and will be scaled to values between 0 and 1 if scale_score is set to
# True (default). Scaling uses the expit function (inverse of the logit function) after applying a scaling factor
# True. Scaling uses the expit function (inverse of the logit function) after applying a scaling factor
# (e.g., BM25_SCALING_FACTOR for the bm25_retrieval method).
# Larger scaling factor decreases scaled scores. For example, an input of 10 is scaled to 0.99 with
# BM25_SCALING_FACTOR=2 but to 0.78 with BM25_SCALING_FACTOR=8 (default). The defaults were chosen empirically.
Expand Down Expand Up @@ -248,7 +248,7 @@ def _bm25_retrieval(
filters: Optional[Dict[str, Any]] = None,
fuzziness: str = "AUTO",
top_k: int = 10,
scale_score: bool = True,
scale_score: bool = False,
) -> List[Document]:
"""
Elasticsearch by defaults uses BM25 search algorithm.
Expand All @@ -268,7 +268,7 @@ def _bm25_retrieval(
see the official documentation for valid values:
https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness
:param top_k: Maximum number of Documents to return, defaults to 10
:param scale_score: If `True` scales the Document`s scores between 0 and 1, defaults to True
:param scale_score: If `True` scales the Document`s scores between 0 and 1, defaults to False
:raises ValueError: If `query` is an empty string
:return: List of Document that match `query`
"""
Expand Down
6 changes: 3 additions & 3 deletions document_stores/elasticsearch/tests/test_bm25_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_init_default():
assert retriever._document_store == mock_store
assert retriever._filters == {}
assert retriever._top_k == 10
assert retriever._scale_score
assert not retriever._scale_score


@patch("elasticsearch_haystack.document_store.Elasticsearch")
Expand All @@ -33,7 +33,7 @@ def test_to_dict(_mock_elasticsearch_client):
"filters": {},
"fuzziness": "AUTO",
"top_k": 10,
"scale_score": True,
"scale_score": False,
},
}

Expand Down Expand Up @@ -71,7 +71,7 @@ def test_run():
filters={},
fuzziness="AUTO",
top_k=10,
scale_score=True,
scale_score=False,
)
assert len(res) == 1
assert len(res["documents"]) == 1
Expand Down
4 changes: 2 additions & 2 deletions document_stores/elasticsearch/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def test_delete_not_empty(self, docstore: ElasticsearchDocumentStore):
`DocumentStoreBaseTests` declares this test but we override it since we
want `delete_documents` to be idempotent.
"""
doc = Document(text="test doc")
doc = Document(content="test doc")
docstore.write_documents([doc])

docstore.delete_documents([doc.id])
Expand All @@ -154,7 +154,7 @@ def test_delete_not_empty_nonexisting(self, docstore: ElasticsearchDocumentStore
`DocumentStoreBaseTests` declares this test but we override it since we
want `delete_documents` to be idempotent.
"""
doc = Document(text="test doc")
doc = Document(content="test doc")
docstore.write_documents([doc])

docstore.delete_documents(["non_existing"])
Expand Down

0 comments on commit 5d77ec4

Please sign in to comment.