From 570cc745a60ed8ac85d65dc2adaaacb579ac8d06 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Mon, 1 Jul 2024 23:26:10 +0200 Subject: [PATCH 01/10] Fix a bug for checking metadata values --- .../document_stores/chroma/document_store.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py b/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py index d39158db4..5be6a7938 100644 --- a/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py +++ b/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py @@ -428,8 +428,9 @@ def _query_result_to_documents(result: QueryResult) -> List[List[Document]]: } # prepare metadata - if metadatas := result.get("metadatas"): - document_dict["meta"] = dict(metadatas[i][j]) + metadatas = result.get("metadatas") + if metadatas and metadatas[i][j] is not None: + document_dict["meta"] = metadatas[i][j].copy() if embeddings := result.get("embeddings"): document_dict["embedding"] = np.array(embeddings[i][j]) From 18d7296298a4a326a3146054abb2904ce2b4bd60 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 2 Jul 2024 00:01:41 +0200 Subject: [PATCH 02/10] Added a unit test --- integrations/chroma/tests/test_document_store.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index 4e1181ae2..2a3debcb6 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -65,6 +65,10 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do assert doc_received.content == doc_expected.content assert doc_received.meta == doc_expected.meta + def test_document_store_search_without_metadata(self, document_store: ChromaDocumentStore): + document_store.write_documents([Document(content=e) for e in ["First document", "Second document"]] ) + document_store.search(["First document"], top_k=1) + def test_ne_filter(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): """ We customize this test because Chroma consider "not equal" true when From 88e2e2675894adaee5f0b1fe275c6c8cfedcceae Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 2 Jul 2024 00:47:00 +0200 Subject: [PATCH 03/10] Fix linting --- integrations/chroma/tests/test_document_store.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index 2a3debcb6..d65da6b3a 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -67,7 +67,11 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do def test_document_store_search_without_metadata(self, document_store: ChromaDocumentStore): document_store.write_documents([Document(content=e) for e in ["First document", "Second document"]] ) - document_store.search(["First document"], top_k=1) + results = document_store.search(["First document"], top_k=1)[0] + + # Assertions to verify correctness + assert len(results) == 1 + assert results[0].content == "First document" def test_ne_filter(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): """ From 282f2142a9cd94a75eba8af764649eba18ffa22e Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 2 Jul 2024 01:35:24 +0200 Subject: [PATCH 04/10] Fixing bug --- .../document_stores/chroma/document_store.py | 2 +- integrations/chroma/tests/test_document_store.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py b/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py index 5be6a7938..8effdbd65 100644 --- a/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py +++ b/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py @@ -430,7 +430,7 @@ def _query_result_to_documents(result: QueryResult) -> List[List[Document]]: # prepare metadata metadatas = result.get("metadatas") if metadatas and metadatas[i][j] is not None: - document_dict["meta"] = metadatas[i][j].copy() + document_dict["meta"] = metadatas[i][j] if embeddings := result.get("embeddings"): document_dict["embedding"] = np.array(embeddings[i][j]) diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index d65da6b3a..580e17cca 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -67,11 +67,11 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do def test_document_store_search_without_metadata(self, document_store: ChromaDocumentStore): document_store.write_documents([Document(content=e) for e in ["First document", "Second document"]] ) - results = document_store.search(["First document"], top_k=1)[0] + result = document_store.search(["First document"], top_k=1) # Assertions to verify correctness - assert len(results) == 1 - assert results[0].content == "First document" + assert len(result) == 1 + assert result[0][0].content == "First document" def test_ne_filter(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): """ From 32d1cb94e99f90d9232f39c683158b353a64d00a Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 2 Jul 2024 01:36:42 +0200 Subject: [PATCH 05/10] Fixing Lint --- integrations/chroma/tests/test_document_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index 580e17cca..061dbd736 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -66,7 +66,7 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do assert doc_received.meta == doc_expected.meta def test_document_store_search_without_metadata(self, document_store: ChromaDocumentStore): - document_store.write_documents([Document(content=e) for e in ["First document", "Second document"]] ) + document_store.write_documents([Document(content=e) for e in ["First document", "Second document"]]) result = document_store.search(["First document"], top_k=1) # Assertions to verify correctness From 52026706097a7ae0e3c525c892cf889b7b55bfd0 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 2 Jul 2024 12:21:46 +0200 Subject: [PATCH 06/10] Fix bugs in test --- .../chroma/tests/test_document_store.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index 061dbd736..b9d7e728a 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -65,14 +65,6 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do assert doc_received.content == doc_expected.content assert doc_received.meta == doc_expected.meta - def test_document_store_search_without_metadata(self, document_store: ChromaDocumentStore): - document_store.write_documents([Document(content=e) for e in ["First document", "Second document"]]) - result = document_store.search(["First document"], top_k=1) - - # Assertions to verify correctness - assert len(result) == 1 - assert result[0][0].content == "First document" - def test_ne_filter(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): """ We customize this test because Chroma consider "not equal" true when @@ -100,6 +92,15 @@ def test_delete_not_empty_nonexisting(self, document_store: ChromaDocumentStore) assert document_store.filter_documents(filters={"id": doc.id}) == [doc] + def test_document_store_search_without_metadata(self): + document_store = ChromaDocumentStore() + document_store.write_documents([Document(content=e) for e in ["First document", "Second document"]]) + result = document_store.search(["First document"], top_k=1) + + # Assertions to verify correctness + assert len(result) == 1 + assert result[0][0].content == "First document" + @pytest.mark.integration def test_to_json(self, request): ds = ChromaDocumentStore( From 8cd47d0cf78e6d144168e8a87e3f3d0d8fbaa356 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Mon, 8 Jul 2024 17:55:35 +0200 Subject: [PATCH 07/10] Included review comments and added a test --- .../document_stores/chroma/document_store.py | 8 +++++-- .../chroma/tests/test_document_store.py | 23 +++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py b/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py index 8effdbd65..a91ea9ae8 100644 --- a/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py +++ b/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py @@ -429,8 +429,12 @@ def _query_result_to_documents(result: QueryResult) -> List[List[Document]]: # prepare metadata metadatas = result.get("metadatas") - if metadatas and metadatas[i][j] is not None: - document_dict["meta"] = metadatas[i][j] + print (metadatas) + try: + if metadatas and metadatas[i][j] is not None: + document_dict["meta"] = metadatas[i][j] + except IndexError: + raise IndexError("No metadata found for document: " + document_dict['id']) if embeddings := result.get("embeddings"): document_dict["embedding"] = np.array(embeddings[i][j]) diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index b9d7e728a..9ecd3f401 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -101,6 +101,29 @@ def test_document_store_search_without_metadata(self): assert len(result) == 1 assert result[0][0].content == "First document" + def test_document_store_search_with_metadata(self): + document_store = ChromaDocumentStore() + + # Writing documents to the document store + documents = [ + Document(content="First document", meta={"author": "Author1"}), + Document(content="Second document"), # No metadata + Document(content="Third document", meta={"author": "Author2"}), + Document(content="Fourth document") # No metadata + ] + + document_store.write_documents(documents) + # Search for a document with metadata + result_with_metadata = document_store.search(["Author1"], top_k=1) + print(result_with_metadata[0]) + assert result_with_metadata[0][0].content == "First document" + + # Search for a document without metadata + #result_without_metadata = document_store.search(["Author2"], top_k=1) + + #assert result_without_metadata[0][0].content == "Second document" + + @pytest.mark.integration def test_to_json(self, request): ds = ChromaDocumentStore( From 489044f1d7af2f1090ef3999e4a4d1cf42b59ccb Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Mon, 8 Jul 2024 18:09:33 +0200 Subject: [PATCH 08/10] Fixed linting --- .../document_stores/chroma/document_store.py | 6 +++--- integrations/chroma/tests/test_document_store.py | 9 +-------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py b/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py index a91ea9ae8..be1c894c7 100644 --- a/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py +++ b/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py @@ -429,12 +429,12 @@ def _query_result_to_documents(result: QueryResult) -> List[List[Document]]: # prepare metadata metadatas = result.get("metadatas") - print (metadatas) try: if metadatas and metadatas[i][j] is not None: document_dict["meta"] = metadatas[i][j] - except IndexError: - raise IndexError("No metadata found for document: " + document_dict['id']) + except IndexError as e: + msg = "No metadata found for document: " + document_dict["id"] + raise IndexError(msg) from e if embeddings := result.get("embeddings"): document_dict["embedding"] = np.array(embeddings[i][j]) diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index 9ecd3f401..de9d7d159 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -109,21 +109,14 @@ def test_document_store_search_with_metadata(self): Document(content="First document", meta={"author": "Author1"}), Document(content="Second document"), # No metadata Document(content="Third document", meta={"author": "Author2"}), - Document(content="Fourth document") # No metadata + Document(content="Fourth document"), # No metadata ] document_store.write_documents(documents) # Search for a document with metadata result_with_metadata = document_store.search(["Author1"], top_k=1) - print(result_with_metadata[0]) assert result_with_metadata[0][0].content == "First document" - # Search for a document without metadata - #result_without_metadata = document_store.search(["Author2"], top_k=1) - - #assert result_without_metadata[0][0].content == "Second document" - - @pytest.mark.integration def test_to_json(self, request): ds = ChromaDocumentStore( From 21fa63e3e773043ca1451a28771872b4b6df8e78 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Mon, 8 Jul 2024 18:41:33 +0200 Subject: [PATCH 09/10] Fixed Errors --- .../document_stores/chroma/document_store.py | 5 ++--- .../chroma/tests/test_document_store.py | 22 +++++-------------- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py b/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py index be1c894c7..49cfced2e 100644 --- a/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py +++ b/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py @@ -432,9 +432,8 @@ def _query_result_to_documents(result: QueryResult) -> List[List[Document]]: try: if metadatas and metadatas[i][j] is not None: document_dict["meta"] = metadatas[i][j] - except IndexError as e: - msg = "No metadata found for document: " + document_dict["id"] - raise IndexError(msg) from e + except IndexError: + pass if embeddings := result.get("embeddings"): document_dict["embedding"] = np.array(embeddings[i][j]) diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index de9d7d159..c18ee7051 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -92,30 +92,20 @@ def test_delete_not_empty_nonexisting(self, document_store: ChromaDocumentStore) assert document_store.filter_documents(filters={"id": doc.id}) == [doc] - def test_document_store_search_without_metadata(self): + def test_document_store_search(self): document_store = ChromaDocumentStore() - document_store.write_documents([Document(content=e) for e in ["First document", "Second document"]]) - result = document_store.search(["First document"], top_k=1) - - # Assertions to verify correctness - assert len(result) == 1 - assert result[0][0].content == "First document" - - def test_document_store_search_with_metadata(self): - document_store = ChromaDocumentStore() - - # Writing documents to the document store documents = [ Document(content="First document", meta={"author": "Author1"}), Document(content="Second document"), # No metadata Document(content="Third document", meta={"author": "Author2"}), Document(content="Fourth document"), # No metadata ] - document_store.write_documents(documents) - # Search for a document with metadata - result_with_metadata = document_store.search(["Author1"], top_k=1) - assert result_with_metadata[0][0].content == "First document" + result = document_store.search(["Third"], top_k=1) + + # Assertions to verify correctness + assert len(result) == 1 + assert result[0][0].content == "Third document" @pytest.mark.integration def test_to_json(self, request): From 0ab48ff8419691b60ff5ac2b0a04638538e94e68 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Mon, 8 Jul 2024 18:55:35 +0200 Subject: [PATCH 10/10] Update integrations/chroma/tests/test_document_store.py Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> --- integrations/chroma/tests/test_document_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index c18ee7051..774096a15 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -92,7 +92,7 @@ def test_delete_not_empty_nonexisting(self, document_store: ChromaDocumentStore) assert document_store.filter_documents(filters={"id": doc.id}) == [doc] - def test_document_store_search(self): + def test_search(self): document_store = ChromaDocumentStore() documents = [ Document(content="First document", meta={"author": "Author1"}),