diff --git a/haystack/testing/document_store.py b/haystack/testing/document_store.py index 6d2bda2804..a86823ebf5 100644 --- a/haystack/testing/document_store.py +++ b/haystack/testing/document_store.py @@ -174,74 +174,86 @@ def test_delete_documents_non_existing_document(self, document_store: DocumentSt assert document_store.count_documents() == 1 -class FilterableDocsFixtureMixin: +def create_filterable_docs(include_dataframe_docs: bool = False) -> List[Document]: """ - Mixin class that adds a filterable_docs() fixture to a test class. + Create a list of filterable documents to be used in the filterable_docs and filterable_docs_with_dataframe fixtures. """ - @pytest.fixture - def filterable_docs(self) -> List[Document]: - """Fixture that returns a list of Documents that can be used to test filtering.""" - documents = [] - for i in range(3): - documents.append( - Document( - content=f"A Foo Document {i}", - meta={ - "name": f"name_{i}", - "page": "100", - "chapter": "intro", - "number": 2, - "date": "1969-07-21T20:17:40", - }, - embedding=_random_embeddings(768), - ) + documents = [] + for i in range(3): + documents.append( + Document( + content=f"A Foo Document {i}", + meta={ + "name": f"name_{i}", + "page": "100", + "chapter": "intro", + "number": 2, + "date": "1969-07-21T20:17:40", + }, + embedding=_random_embeddings(768), ) - documents.append( - Document( - content=f"A Bar Document {i}", - meta={ - "name": f"name_{i}", - "page": "123", - "chapter": "abstract", - "number": -2, - "date": "1972-12-11T19:54:58", - }, - embedding=_random_embeddings(768), - ) + ) + documents.append( + Document( + content=f"A Bar Document {i}", + meta={ + "name": f"name_{i}", + "page": "123", + "chapter": "abstract", + "number": -2, + "date": "1972-12-11T19:54:58", + }, + embedding=_random_embeddings(768), ) - documents.append( - Document( - content=f"A Foobar Document {i}", - meta={ - "name": f"name_{i}", - "page": "90", - "chapter": "conclusion", - "number": -10, - "date": "1989-11-09T17:53:00", - }, - embedding=_random_embeddings(768), - ) + ) + documents.append( + Document( + content=f"A Foobar Document {i}", + meta={ + "name": f"name_{i}", + "page": "90", + "chapter": "conclusion", + "number": -10, + "date": "1989-11-09T17:53:00", + }, + embedding=_random_embeddings(768), ) - documents.append( - Document( - content=f"Document {i} without embedding", - meta={"name": f"name_{i}", "no_embedding": True, "chapter": "conclusion"}, - ) + ) + documents.append( + Document( + content=f"Document {i} without embedding", + meta={"name": f"name_{i}", "no_embedding": True, "chapter": "conclusion"}, ) + ) + documents.append( + Document(content=f"Doc {i} with zeros emb", meta={"name": "zeros_doc"}, embedding=TEST_EMBEDDING_1) + ) + documents.append( + Document(content=f"Doc {i} with ones emb", meta={"name": "ones_doc"}, embedding=TEST_EMBEDDING_2) + ) + + if include_dataframe_docs: + for i in range(3): documents.append(Document(dataframe=pd.DataFrame([i]), meta={"name": f"table_doc_{i}"})) - documents.append( - Document(content=f"Doc {i} with zeros emb", meta={"name": "zeros_doc"}, embedding=TEST_EMBEDDING_1) - ) - documents.append( - Document(content=f"Doc {i} with ones emb", meta={"name": "ones_doc"}, embedding=TEST_EMBEDDING_2) - ) - return documents + + return documents + + +class FilterableDocsFixtureMixin: + """ + Mixin class that adds a filterable_docs() fixture to a test class. + """ + + @pytest.fixture + def filterable_docs(self) -> List[Document]: + """Fixture that returns a list of Documents that can be used to test filtering.""" + return create_filterable_docs(include_dataframe_docs=False) class FilterDocumentsTest(AssertDocumentsEqualMixin, FilterableDocsFixtureMixin): """ - Utility class to test a Document Store `filter_documents` method using different types of filters. + Utility class to test a Document Store `filter_documents` method using different types of filters. To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: @@ -270,16 +282,6 @@ def test_comparison_equal(self, document_store, filterable_docs): result = document_store.filter_documents(filters={"field": "meta.number", "operator": "==", "value": 100}) self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") == 100]) - def test_comparison_equal_with_dataframe(self, document_store, filterable_docs): - """Test filter_documents() with == comparator and dataframe""" - document_store.write_documents(filterable_docs) - result = document_store.filter_documents( - filters={"field": "dataframe", "operator": "==", "value": pd.DataFrame([1])} - ) - self.assert_documents_are_equal( - result, [d for d in filterable_docs if d.dataframe is not None and d.dataframe.equals(pd.DataFrame([1]))] - ) - def test_comparison_equal_with_none(self, document_store, filterable_docs): """Test filter_documents() with == comparator and None""" document_store.write_documents(filterable_docs) @@ -293,16 +295,6 @@ def test_comparison_not_equal(self, document_store, filterable_docs): result = document_store.filter_documents({"field": "meta.number", "operator": "!=", "value": 100}) self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") != 100]) - def test_comparison_not_equal_with_dataframe(self, document_store, filterable_docs): - """Test filter_documents() with != comparator and dataframe""" - document_store.write_documents(filterable_docs) - result = document_store.filter_documents( - filters={"field": "dataframe", "operator": "!=", "value": pd.DataFrame([1])} - ) - self.assert_documents_are_equal( - result, [d for d in filterable_docs if d.dataframe is None or not d.dataframe.equals(pd.DataFrame([1]))] - ) - def test_comparison_not_equal_with_none(self, document_store, filterable_docs): """Test filter_documents() with != comparator and None""" document_store.write_documents(filterable_docs) @@ -340,12 +332,6 @@ def test_comparison_greater_than_with_string(self, document_store, filterable_do with pytest.raises(FilterError): document_store.filter_documents(filters={"field": "meta.number", "operator": ">", "value": "1"}) - def test_comparison_greater_than_with_dataframe(self, document_store, filterable_docs): - """Test filter_documents() with > comparator and dataframe""" - document_store.write_documents(filterable_docs) - with pytest.raises(FilterError): - document_store.filter_documents(filters={"field": "dataframe", "operator": ">", "value": pd.DataFrame([1])}) - def test_comparison_greater_than_with_list(self, document_store, filterable_docs): """Test filter_documents() with > comparator and list""" document_store.write_documents(filterable_docs) @@ -389,14 +375,6 @@ def test_comparison_greater_than_equal_with_string(self, document_store, filtera with pytest.raises(FilterError): document_store.filter_documents(filters={"field": "meta.number", "operator": ">=", "value": "1"}) - def test_comparison_greater_than_equal_with_dataframe(self, document_store, filterable_docs): - """Test filter_documents() with >= comparator and dataframe""" - document_store.write_documents(filterable_docs) - with pytest.raises(FilterError): - document_store.filter_documents( - filters={"field": "dataframe", "operator": ">=", "value": pd.DataFrame([1])} - ) - def test_comparison_greater_than_equal_with_list(self, document_store, filterable_docs): """Test filter_documents() with >= comparator and list""" document_store.write_documents(filterable_docs) @@ -440,12 +418,6 @@ def test_comparison_less_than_with_string(self, document_store, filterable_docs) with pytest.raises(FilterError): document_store.filter_documents(filters={"field": "meta.number", "operator": "<", "value": "1"}) - def test_comparison_less_than_with_dataframe(self, document_store, filterable_docs): - """Test filter_documents() with < comparator and dataframe""" - document_store.write_documents(filterable_docs) - with pytest.raises(FilterError): - document_store.filter_documents(filters={"field": "dataframe", "operator": "<", "value": pd.DataFrame([1])}) - def test_comparison_less_than_with_list(self, document_store, filterable_docs): """Test filter_documents() with < comparator and list""" document_store.write_documents(filterable_docs) @@ -489,14 +461,6 @@ def test_comparison_less_than_equal_with_string(self, document_store, filterable with pytest.raises(FilterError): document_store.filter_documents(filters={"field": "meta.number", "operator": "<=", "value": "1"}) - def test_comparison_less_than_equal_with_dataframe(self, document_store, filterable_docs): - """Test filter_documents() with <= comparator and dataframe""" - document_store.write_documents(filterable_docs) - with pytest.raises(FilterError): - document_store.filter_documents( - filters={"field": "dataframe", "operator": "<=", "value": pd.DataFrame([1])} - ) - def test_comparison_less_than_equal_with_list(self, document_store, filterable_docs): """Test filter_documents() with <= comparator and list""" document_store.write_documents(filterable_docs) @@ -638,6 +602,83 @@ def test_missing_condition_value_key(self, document_store, filterable_docs): ) +class FilterableDocsFixtureMixinWithDataframe: + """ + Mixin class that adds a filterable_docs_with_dataframe() fixture to a test class, including dataframe documents. + """ + + @pytest.fixture + def filterable_docs_with_dataframe(self) -> List[Document]: + """Fixture that returns a list of Documents including dataframe documents.""" + documents = create_filterable_docs(include_dataframe_docs=True) + + return documents + + +class FilterDocumentsTestWithDataframe(AssertDocumentsEqualMixin, FilterableDocsFixtureMixinWithDataframe): + """ + Utility class to test a Document Store `filter_documents` method specifically for DataFrame documents. + """ + + def test_comparison_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe): + """Test filter_documents() with == comparator and dataframe""" + document_store.write_documents(filterable_docs_with_dataframe) + result = document_store.filter_documents( + filters={"field": "dataframe", "operator": "==", "value": pd.DataFrame([1])} + ) + self.assert_documents_are_equal( + result, + [ + d + for d in filterable_docs_with_dataframe + if d.dataframe is not None and d.dataframe.equals(pd.DataFrame([1])) + ], + ) + + def test_comparison_not_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe): + """Test filter_documents() with != comparator and dataframe""" + document_store.write_documents(filterable_docs_with_dataframe) + result = document_store.filter_documents( + filters={"field": "dataframe", "operator": "!=", "value": pd.DataFrame([1])} + ) + self.assert_documents_are_equal( + result, + [ + d + for d in filterable_docs_with_dataframe + if d.dataframe is None or not d.dataframe.equals(pd.DataFrame([1])) + ], + ) + + def test_comparison_greater_than_with_dataframe(self, document_store, filterable_docs_with_dataframe): + """Test filter_documents() with > comparator and dataframe""" + document_store.write_documents(filterable_docs_with_dataframe) + with pytest.raises(FilterError): + document_store.filter_documents(filters={"field": "dataframe", "operator": ">", "value": pd.DataFrame([1])}) + + def test_comparison_greater_than_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe): + """Test filter_documents() with >= comparator and dataframe""" + document_store.write_documents(filterable_docs_with_dataframe) + with pytest.raises(FilterError): + document_store.filter_documents( + filters={"field": "dataframe", "operator": ">=", "value": pd.DataFrame([1])} + ) + + def test_comparison_less_than_with_dataframe(self, document_store, filterable_docs_with_dataframe): + """Test filter_documents() with < comparator and dataframe""" + document_store.write_documents(filterable_docs_with_dataframe) + with pytest.raises(FilterError): + document_store.filter_documents(filters={"field": "dataframe", "operator": "<", "value": pd.DataFrame([1])}) + + def test_comparison_less_than_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe): + """Test filter_documents() with <= comparator and dataframe""" + document_store.write_documents(filterable_docs_with_dataframe) + with pytest.raises(FilterError): + document_store.filter_documents( + filters={"field": "dataframe", "operator": "<=", "value": pd.DataFrame([1])} + ) + + class DocumentStoreBaseTests(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest): @pytest.fixture def document_store(self) -> DocumentStore: diff --git a/releasenotes/notes/reorganize-docstore-testing-for-dataframes-3825910ade718d51.yaml b/releasenotes/notes/reorganize-docstore-testing-for-dataframes-3825910ade718d51.yaml new file mode 100644 index 0000000000..935c3cbab0 --- /dev/null +++ b/releasenotes/notes/reorganize-docstore-testing-for-dataframes-3825910ade718d51.yaml @@ -0,0 +1,5 @@ +--- +enhancements: + - | + Reorganized the document store test suite to isolate dataframe filter tests. + This change prepares for potential future deprecation of the Document class's dataframe field. diff --git a/test/document_stores/test_in_memory.py b/test/document_stores/test_in_memory.py index ba623eedb3..8c2a313d92 100644 --- a/test/document_stores/test_in_memory.py +++ b/test/document_stores/test_in_memory.py @@ -11,10 +11,10 @@ from haystack import Document from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError from haystack.document_stores.in_memory import InMemoryDocumentStore -from haystack.testing.document_store import DocumentStoreBaseTests +from haystack.testing.document_store import DocumentStoreBaseTests, FilterDocumentsTestWithDataframe -class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904 +class TestMemoryDocumentStore(DocumentStoreBaseTests, FilterDocumentsTestWithDataframe): # pylint: disable=R0904 """ Test InMemoryDocumentStore's specific features """