Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: reorganize docstore test suite to isolate dataframe tests #8684

Merged
merged 3 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
249 changes: 145 additions & 104 deletions haystack/testing/document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,74 +174,86 @@ def test_delete_documents_non_existing_document(self, document_store: DocumentSt
assert document_store.count_documents() == 1


class FilterableDocsFixtureMixin:
def create_filterable_docs(include_dataframe_docs: bool = False) -> List[Document]:
"""
Mixin class that adds a filterable_docs() fixture to a test class.
Create a list of filterable documents to be used in the filterable_docs and filterable_docs_with_dataframe fixtures.
"""

@pytest.fixture
def filterable_docs(self) -> List[Document]:
"""Fixture that returns a list of Documents that can be used to test filtering."""
documents = []
for i in range(3):
documents.append(
Document(
content=f"A Foo Document {i}",
meta={
"name": f"name_{i}",
"page": "100",
"chapter": "intro",
"number": 2,
"date": "1969-07-21T20:17:40",
},
embedding=_random_embeddings(768),
)
documents = []
for i in range(3):
documents.append(
Document(
content=f"A Foo Document {i}",
meta={
"name": f"name_{i}",
"page": "100",
"chapter": "intro",
"number": 2,
"date": "1969-07-21T20:17:40",
},
embedding=_random_embeddings(768),
)
documents.append(
Document(
content=f"A Bar Document {i}",
meta={
"name": f"name_{i}",
"page": "123",
"chapter": "abstract",
"number": -2,
"date": "1972-12-11T19:54:58",
},
embedding=_random_embeddings(768),
)
)
documents.append(
Document(
content=f"A Bar Document {i}",
meta={
"name": f"name_{i}",
"page": "123",
"chapter": "abstract",
"number": -2,
"date": "1972-12-11T19:54:58",
},
embedding=_random_embeddings(768),
)
documents.append(
Document(
content=f"A Foobar Document {i}",
meta={
"name": f"name_{i}",
"page": "90",
"chapter": "conclusion",
"number": -10,
"date": "1989-11-09T17:53:00",
},
embedding=_random_embeddings(768),
)
)
documents.append(
Document(
content=f"A Foobar Document {i}",
meta={
"name": f"name_{i}",
"page": "90",
"chapter": "conclusion",
"number": -10,
"date": "1989-11-09T17:53:00",
},
embedding=_random_embeddings(768),
)
documents.append(
Document(
content=f"Document {i} without embedding",
meta={"name": f"name_{i}", "no_embedding": True, "chapter": "conclusion"},
)
)
documents.append(
Document(
content=f"Document {i} without embedding",
meta={"name": f"name_{i}", "no_embedding": True, "chapter": "conclusion"},
)
)
documents.append(
Document(content=f"Doc {i} with zeros emb", meta={"name": "zeros_doc"}, embedding=TEST_EMBEDDING_1)
)
documents.append(
Document(content=f"Doc {i} with ones emb", meta={"name": "ones_doc"}, embedding=TEST_EMBEDDING_2)
)

if include_dataframe_docs:
for i in range(3):
documents.append(Document(dataframe=pd.DataFrame([i]), meta={"name": f"table_doc_{i}"}))
documents.append(
Document(content=f"Doc {i} with zeros emb", meta={"name": "zeros_doc"}, embedding=TEST_EMBEDDING_1)
)
documents.append(
Document(content=f"Doc {i} with ones emb", meta={"name": "ones_doc"}, embedding=TEST_EMBEDDING_2)
)
return documents

return documents


class FilterableDocsFixtureMixin:
"""
Mixin class that adds a filterable_docs() fixture to a test class.
"""

@pytest.fixture
def filterable_docs(self) -> List[Document]:
"""Fixture that returns a list of Documents that can be used to test filtering."""
return create_filterable_docs(include_dataframe_docs=False)


class FilterDocumentsTest(AssertDocumentsEqualMixin, FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using different types of filters.
Utility class to test a Document Store `filter_documents` method using different types of filters.

To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
Expand Down Expand Up @@ -270,16 +282,6 @@ def test_comparison_equal(self, document_store, filterable_docs):
result = document_store.filter_documents(filters={"field": "meta.number", "operator": "==", "value": 100})
self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") == 100])

def test_comparison_equal_with_dataframe(self, document_store, filterable_docs):
"""Test filter_documents() with == comparator and dataframe"""
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(
filters={"field": "dataframe", "operator": "==", "value": pd.DataFrame([1])}
)
self.assert_documents_are_equal(
result, [d for d in filterable_docs if d.dataframe is not None and d.dataframe.equals(pd.DataFrame([1]))]
)

def test_comparison_equal_with_none(self, document_store, filterable_docs):
"""Test filter_documents() with == comparator and None"""
document_store.write_documents(filterable_docs)
Expand All @@ -293,16 +295,6 @@ def test_comparison_not_equal(self, document_store, filterable_docs):
result = document_store.filter_documents({"field": "meta.number", "operator": "!=", "value": 100})
self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") != 100])

def test_comparison_not_equal_with_dataframe(self, document_store, filterable_docs):
"""Test filter_documents() with != comparator and dataframe"""
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(
filters={"field": "dataframe", "operator": "!=", "value": pd.DataFrame([1])}
)
self.assert_documents_are_equal(
result, [d for d in filterable_docs if d.dataframe is None or not d.dataframe.equals(pd.DataFrame([1]))]
)

def test_comparison_not_equal_with_none(self, document_store, filterable_docs):
"""Test filter_documents() with != comparator and None"""
document_store.write_documents(filterable_docs)
Expand Down Expand Up @@ -340,12 +332,6 @@ def test_comparison_greater_than_with_string(self, document_store, filterable_do
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "meta.number", "operator": ">", "value": "1"})

def test_comparison_greater_than_with_dataframe(self, document_store, filterable_docs):
"""Test filter_documents() with > comparator and dataframe"""
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "dataframe", "operator": ">", "value": pd.DataFrame([1])})

def test_comparison_greater_than_with_list(self, document_store, filterable_docs):
"""Test filter_documents() with > comparator and list"""
document_store.write_documents(filterable_docs)
Expand Down Expand Up @@ -389,14 +375,6 @@ def test_comparison_greater_than_equal_with_string(self, document_store, filtera
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "meta.number", "operator": ">=", "value": "1"})

def test_comparison_greater_than_equal_with_dataframe(self, document_store, filterable_docs):
"""Test filter_documents() with >= comparator and dataframe"""
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
document_store.filter_documents(
filters={"field": "dataframe", "operator": ">=", "value": pd.DataFrame([1])}
)

def test_comparison_greater_than_equal_with_list(self, document_store, filterable_docs):
"""Test filter_documents() with >= comparator and list"""
document_store.write_documents(filterable_docs)
Expand Down Expand Up @@ -440,12 +418,6 @@ def test_comparison_less_than_with_string(self, document_store, filterable_docs)
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "meta.number", "operator": "<", "value": "1"})

def test_comparison_less_than_with_dataframe(self, document_store, filterable_docs):
"""Test filter_documents() with < comparator and dataframe"""
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "dataframe", "operator": "<", "value": pd.DataFrame([1])})

def test_comparison_less_than_with_list(self, document_store, filterable_docs):
"""Test filter_documents() with < comparator and list"""
document_store.write_documents(filterable_docs)
Expand Down Expand Up @@ -489,14 +461,6 @@ def test_comparison_less_than_equal_with_string(self, document_store, filterable
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "meta.number", "operator": "<=", "value": "1"})

def test_comparison_less_than_equal_with_dataframe(self, document_store, filterable_docs):
"""Test filter_documents() with <= comparator and dataframe"""
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
document_store.filter_documents(
filters={"field": "dataframe", "operator": "<=", "value": pd.DataFrame([1])}
)

def test_comparison_less_than_equal_with_list(self, document_store, filterable_docs):
"""Test filter_documents() with <= comparator and list"""
document_store.write_documents(filterable_docs)
Expand Down Expand Up @@ -638,6 +602,83 @@ def test_missing_condition_value_key(self, document_store, filterable_docs):
)


class FilterableDocsFixtureMixinWithDataframe:
"""
Mixin class that adds a filterable_docs_with_dataframe() fixture to a test class, including dataframe documents.
"""

@pytest.fixture
def filterable_docs_with_dataframe(self) -> List[Document]:
"""Fixture that returns a list of Documents including dataframe documents."""
documents = create_filterable_docs(include_dataframe_docs=True)

return documents


class FilterDocumentsTestWithDataframe(AssertDocumentsEqualMixin, FilterableDocsFixtureMixinWithDataframe):
"""
Utility class to test a Document Store `filter_documents` method specifically for DataFrame documents.
"""

def test_comparison_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe):
"""Test filter_documents() with == comparator and dataframe"""
document_store.write_documents(filterable_docs_with_dataframe)
result = document_store.filter_documents(
filters={"field": "dataframe", "operator": "==", "value": pd.DataFrame([1])}
)
self.assert_documents_are_equal(
result,
[
d
for d in filterable_docs_with_dataframe
if d.dataframe is not None and d.dataframe.equals(pd.DataFrame([1]))
],
)

def test_comparison_not_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe):
"""Test filter_documents() with != comparator and dataframe"""
document_store.write_documents(filterable_docs_with_dataframe)
result = document_store.filter_documents(
filters={"field": "dataframe", "operator": "!=", "value": pd.DataFrame([1])}
)
self.assert_documents_are_equal(
result,
[
d
for d in filterable_docs_with_dataframe
if d.dataframe is None or not d.dataframe.equals(pd.DataFrame([1]))
],
)

def test_comparison_greater_than_with_dataframe(self, document_store, filterable_docs_with_dataframe):
"""Test filter_documents() with > comparator and dataframe"""
document_store.write_documents(filterable_docs_with_dataframe)
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "dataframe", "operator": ">", "value": pd.DataFrame([1])})

def test_comparison_greater_than_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe):
"""Test filter_documents() with >= comparator and dataframe"""
document_store.write_documents(filterable_docs_with_dataframe)
with pytest.raises(FilterError):
document_store.filter_documents(
filters={"field": "dataframe", "operator": ">=", "value": pd.DataFrame([1])}
)

def test_comparison_less_than_with_dataframe(self, document_store, filterable_docs_with_dataframe):
"""Test filter_documents() with < comparator and dataframe"""
document_store.write_documents(filterable_docs_with_dataframe)
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "dataframe", "operator": "<", "value": pd.DataFrame([1])})

def test_comparison_less_than_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe):
"""Test filter_documents() with <= comparator and dataframe"""
document_store.write_documents(filterable_docs_with_dataframe)
with pytest.raises(FilterError):
document_store.filter_documents(
filters={"field": "dataframe", "operator": "<=", "value": pd.DataFrame([1])}
)


class DocumentStoreBaseTests(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest):
@pytest.fixture
def document_store(self) -> DocumentStore:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
enhancements:
- |
Reorganized the document store test suite to isolate dataframe filter tests.
This change prepares for potential future deprecation of the Document class's dataframe field.
4 changes: 2 additions & 2 deletions test/document_stores/test_in_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
from haystack import Document
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.testing.document_store import DocumentStoreBaseTests
from haystack.testing.document_store import DocumentStoreBaseTests, FilterDocumentsTestWithDataframe


class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
class TestMemoryDocumentStore(DocumentStoreBaseTests, FilterDocumentsTestWithDataframe): # pylint: disable=R0904
"""
Test InMemoryDocumentStore's specific features
"""
Expand Down
Loading