Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Elastiscsearch BM25Retriever: set scale_score default value to False #50

Merged
merged 3 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(
filters: Optional[Dict[str, Any]] = None,
fuzziness: str = "AUTO",
top_k: int = 10,
scale_score: bool = True,
scale_score: bool = False,
):
if not isinstance(document_store, ElasticsearchDocumentStore):
msg = "document_store must be an instance of ElasticsearchDocumentStore"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
Hosts = Union[str, List[Union[str, Mapping[str, Union[str, int]], NodeConfig]]]

# document scores are essentially unbounded and will be scaled to values between 0 and 1 if scale_score is set to
# True (default). Scaling uses the expit function (inverse of the logit function) after applying a scaling factor
# True. Scaling uses the expit function (inverse of the logit function) after applying a scaling factor
# (e.g., BM25_SCALING_FACTOR for the bm25_retrieval method).
# Larger scaling factor decreases scaled scores. For example, an input of 10 is scaled to 0.99 with
# BM25_SCALING_FACTOR=2 but to 0.78 with BM25_SCALING_FACTOR=8 (default). The defaults were chosen empirically.
Expand Down Expand Up @@ -248,7 +248,7 @@ def _bm25_retrieval(
filters: Optional[Dict[str, Any]] = None,
fuzziness: str = "AUTO",
top_k: int = 10,
scale_score: bool = True,
scale_score: bool = False,
) -> List[Document]:
"""
Elasticsearch by defaults uses BM25 search algorithm.
Expand All @@ -268,7 +268,7 @@ def _bm25_retrieval(
see the official documentation for valid values:
https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness
:param top_k: Maximum number of Documents to return, defaults to 10
:param scale_score: If `True` scales the Document`s scores between 0 and 1, defaults to True
:param scale_score: If `True` scales the Document`s scores between 0 and 1, defaults to False
:raises ValueError: If `query` is an empty string
:return: List of Document that match `query`
"""
Expand Down
6 changes: 3 additions & 3 deletions document_stores/elasticsearch/tests/test_bm25_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_init_default():
assert retriever._document_store == mock_store
assert retriever._filters == {}
assert retriever._top_k == 10
assert retriever._scale_score
assert not retriever._scale_score


@patch("elasticsearch_haystack.document_store.Elasticsearch")
Expand All @@ -33,7 +33,7 @@ def test_to_dict(_mock_elasticsearch_client):
"filters": {},
"fuzziness": "AUTO",
"top_k": 10,
"scale_score": True,
"scale_score": False,
},
}

Expand Down Expand Up @@ -71,7 +71,7 @@ def test_run():
filters={},
fuzziness="AUTO",
top_k=10,
scale_score=True,
scale_score=False,
)
assert len(res) == 1
assert len(res["documents"]) == 1
Expand Down
4 changes: 2 additions & 2 deletions document_stores/elasticsearch/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def test_delete_not_empty(self, docstore: ElasticsearchDocumentStore):
`DocumentStoreBaseTests` declares this test but we override it since we
want `delete_documents` to be idempotent.
"""
doc = Document(text="test doc")
doc = Document(content="test doc")
docstore.write_documents([doc])

docstore.delete_documents([doc.id])
Expand All @@ -154,7 +154,7 @@ def test_delete_not_empty_nonexisting(self, docstore: ElasticsearchDocumentStore
`DocumentStoreBaseTests` declares this test but we override it since we
want `delete_documents` to be idempotent.
"""
doc = Document(text="test doc")
doc = Document(content="test doc")
docstore.write_documents([doc])

docstore.delete_documents(["non_existing"])
Expand Down