Skip to content

Commit

Permalink
chore: OpenSearch - remove legacy filter support (#1067)
Browse files Browse the repository at this point in the history
* Remove legacy filter support

* Lint

* More linting

* Small fix

* Remove outdated test

* Lint tests

* Remove outdated test

* Improve error message

* Error msg fmt

* More formatting
  • Loading branch information
vblagoje authored and Amnah199 committed Oct 2, 2024
1 parent cde2f1d commit 89008be
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from haystack.dataclasses import Document
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
from haystack.document_stores.types import DuplicatePolicy
from haystack.utils.filters import convert
from opensearchpy import OpenSearch
from opensearchpy.helpers import bulk

Expand Down Expand Up @@ -238,14 +237,14 @@ def _search_documents(self, **kwargs) -> List[Document]:

def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
if filters and "operator" not in filters and "conditions" not in filters:
filters = convert(filters)
msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
raise ValueError(msg)

if filters:
query = {"bool": {"filter": normalize_filters(filters)}}
documents = self._search_documents(query=query, size=10_000)
else:
documents = self._search_documents(size=10_000)

return documents

def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int:
Expand Down Expand Up @@ -384,7 +383,8 @@ def _bm25_retrieval(
:returns: List of Document that match `query`
"""
if filters and "operator" not in filters and "conditions" not in filters:
filters = convert(filters)
msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
raise ValueError(msg)

if not query:
body: Dict[str, Any] = {"query": {"bool": {"must": {"match_all": {}}}}}
Expand Down Expand Up @@ -478,7 +478,8 @@ def _embedding_retrieval(
:returns: List of Document that are most similar to `query_embedding`
"""
if filters and "operator" not in filters and "conditions" not in filters:
filters = convert(filters)
msg = "Legacy filters support has been removed. Please see documentation for new filter syntax."
raise ValueError(msg)

if not query_embedding:
msg = "query_embedding must be a non-empty list of floats"
Expand Down
91 changes: 0 additions & 91 deletions integrations/opensearch/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,76 +574,6 @@ def test_bm25_retrieval_with_filters(self, document_store: OpenSearchDocumentSto
retrieved_ids = sorted([doc.id for doc in res])
assert retrieved_ids == ["1", "2", "3", "4", "5"]

def test_bm25_retrieval_with_legacy_filters(self, document_store: OpenSearchDocumentStore):
document_store.write_documents(
[
Document(
content="Haskell is a functional programming language",
meta={"likes": 100000, "language_type": "functional"},
id="1",
),
Document(
content="Lisp is a functional programming language",
meta={"likes": 10000, "language_type": "functional"},
id="2",
),
Document(
content="Exilir is a functional programming language",
meta={"likes": 1000, "language_type": "functional"},
id="3",
),
Document(
content="F# is a functional programming language",
meta={"likes": 100, "language_type": "functional"},
id="4",
),
Document(
content="C# is a functional programming language",
meta={"likes": 10, "language_type": "functional"},
id="5",
),
Document(
content="C++ is an object oriented programming language",
meta={"likes": 100000, "language_type": "object_oriented"},
id="6",
),
Document(
content="Dart is an object oriented programming language",
meta={"likes": 10000, "language_type": "object_oriented"},
id="7",
),
Document(
content="Go is an object oriented programming language",
meta={"likes": 1000, "language_type": "object_oriented"},
id="8",
),
Document(
content="Python is a object oriented programming language",
meta={"likes": 100, "language_type": "object_oriented"},
id="9",
),
Document(
content="Ruby is a object oriented programming language",
meta={"likes": 10, "language_type": "object_oriented"},
id="10",
),
Document(
content="PHP is a object oriented programming language",
meta={"likes": 1, "language_type": "object_oriented"},
id="11",
),
]
)

res = document_store._bm25_retrieval(
"programming",
top_k=10,
filters={"language_type": "functional"},
)
assert len(res) == 5
retrieved_ids = sorted([doc.id for doc in res])
assert retrieved_ids == ["1", "2", "3", "4", "5"]

def test_bm25_retrieval_with_custom_query(self, document_store: OpenSearchDocumentStore):
document_store.write_documents(
[
Expand Down Expand Up @@ -760,27 +690,6 @@ def test_embedding_retrieval_with_filters(self, document_store_embedding_dim_4:
assert len(results) == 1
assert results[0].content == "Not very similar document with meta field"

def test_embedding_retrieval_with_legacy_filters(self, document_store_embedding_dim_4: OpenSearchDocumentStore):
docs = [
Document(content="Most similar document", embedding=[1.0, 1.0, 1.0, 1.0]),
Document(content="2nd best document", embedding=[0.8, 0.8, 0.8, 1.0]),
Document(
content="Not very similar document with meta field",
embedding=[0.0, 0.8, 0.3, 0.9],
meta={"meta_field": "custom_value"},
),
]
document_store_embedding_dim_4.write_documents(docs)

filters = {"meta_field": "custom_value"}
# we set top_k=3, to make the test pass as we are not sure whether efficient filtering is supported for nmslib
# TODO: remove top_k=3, when efficient filtering is supported for nmslib
results = document_store_embedding_dim_4._embedding_retrieval(
query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=3, filters=filters
)
assert len(results) == 1
assert results[0].content == "Not very similar document with meta field"

def test_embedding_retrieval_pagination(self, document_store_embedding_dim_4: OpenSearchDocumentStore):
"""
Test that handling of pagination works as expected, when the matching documents are > 10.
Expand Down

0 comments on commit 89008be

Please sign in to comment.