From 4c94dc7c4205ea07be5876ef5a064654ca751813 Mon Sep 17 00:00:00 2001 From: brendancicchi Date: Tue, 3 Sep 2024 09:54:07 -0400 Subject: [PATCH] #1047 Remove count_documents from delete_documents Removed the expensive check to see if the collection is non-empty by performing a full count. This is to fix issue #1047 --- .../document_stores/astra/astra_client.py | 18 ++++++++++++++++++ .../document_stores/astra/document_store.py | 4 ++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/integrations/astra/src/haystack_integrations/document_stores/astra/astra_client.py b/integrations/astra/src/haystack_integrations/document_stores/astra/astra_client.py index 5a88a0fe9..b594f87d3 100644 --- a/integrations/astra/src/haystack_integrations/document_stores/astra/astra_client.py +++ b/integrations/astra/src/haystack_integrations/document_stores/astra/astra_client.py @@ -231,6 +231,24 @@ def find_documents(self, find_query): else: logger.warning(f"No documents found: {response_dict}") + def find_one_document(self, find_query): + """ + Find one document in the Astra index. + + :param find_query: a dictionary with the query options + :returns: the document found in the index + """ + response_dict = self._astra_db_collection.find_one( + filter=find_query.get("filter"), + options=find_query.get("options"), + projection={"*": 1}, + ) + + if "data" in response_dict and "document" in response_dict["data"]: + return response_dict["data"]["document"] + else: + logger.warning(f"No document found: {response_dict}") + def get_documents(self, ids: List[str], batch_size: int = 20) -> QueryResponse: """ Get documents from the Astra index by their ids. diff --git a/integrations/astra/src/haystack_integrations/document_stores/astra/document_store.py b/integrations/astra/src/haystack_integrations/document_stores/astra/document_store.py index 1dea6e08b..a7a7a231c 100644 --- a/integrations/astra/src/haystack_integrations/document_stores/astra/document_store.py +++ b/integrations/astra/src/haystack_integrations/document_stores/astra/document_store.py @@ -411,8 +411,8 @@ def delete_documents(self, document_ids: Optional[List[str]] = None, delete_all: :param delete_all: if `True`, delete all documents. :raises MissingDocumentError: if no document was deleted but document IDs were provided. """ - deletion_counter = 0 - if self.index.count_documents() > 0: + if self.index.find_one_document({"filter": {}}) is not None: + deletion_counter = 0 if document_ids is not None: for batch in _batches(document_ids, MAX_BATCH_SIZE): deletion_counter += self.index.delete(ids=batch)