From 6d395a1f4c35c576b8d32accdbc08a6de33956da Mon Sep 17 00:00:00 2001 From: NolanTrem <34580718+NolanTrem@users.noreply.github.com> Date: Sat, 30 Nov 2024 14:41:16 -0600 Subject: [PATCH] remove chunk_entity --- py/core/main/services/kg_service.py | 8 ---- py/core/providers/database/graph.py | 60 ----------------------------- 2 files changed, 68 deletions(-) diff --git a/py/core/main/services/kg_service.py b/py/core/main/services/kg_service.py index 22c90a96f..6334be62f 100644 --- a/py/core/main/services/kg_service.py +++ b/py/core/main/services/kg_service.py @@ -566,14 +566,6 @@ async def kg_entity_description( return all_results - @telemetry_event("get_graph_status") - async def get_graph_status( - self, - collection_id: UUID, - **kwargs, - ): - raise NotImplementedError("Not implemented") - @telemetry_event("kg_clustering") async def kg_clustering( self, diff --git a/py/core/providers/database/graph.py b/py/core/providers/database/graph.py index 105eb409b..58673a7d6 100644 --- a/py/core/providers/database/graph.py +++ b/py/core/providers/database/graph.py @@ -2199,7 +2199,6 @@ async def delete_node_via_document_id( # Execute separate DELETE queries delete_queries = [ - f"DELETE FROM {self._get_table_name('chunk_entity')} WHERE document_id = $1", f"DELETE FROM {self._get_table_name('relationship')} WHERE document_id = $1", f"DELETE FROM {self._get_table_name('entity')} WHERE document_id = $1", ] @@ -2716,7 +2715,6 @@ async def delete_graph_for_collection( # TODO: make these queries more efficient. Pass the document_ids as params. if cascade: DELETE_QUERIES += [ - f"DELETE FROM {self._get_table_name('chunk_entity')} WHERE document_id = ANY($1::uuid[]);", f"DELETE FROM {self._get_table_name('relationship')} WHERE document_id = ANY($1::uuid[]);", f"DELETE FROM {self._get_table_name('entity')} WHERE document_id = ANY($1::uuid[]);", f"DELETE FROM {self._get_table_name('graph_entity')} WHERE collection_id = $1;", @@ -2905,64 +2903,6 @@ async def get_entity_map( return entity_map - async def get_graph_status(self, collection_id: UUID) -> dict: - # check document_info table for the documents in the collection and return the status of each document - kg_extraction_statuses = await self.connection_manager.fetch_query( - f"SELECT document_id, extraction_status FROM {self._get_table_name('document_info')} WHERE collection_id = $1", - [collection_id], - ) - - document_ids = [ - doc_id["document_id"] for doc_id in kg_extraction_statuses - ] - - graph_cluster_statuses = await self.connection_manager.fetch_query( - f"SELECT enrichment_status FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} WHERE id = $1", - [collection_id], - ) - - # entity and relationship counts - chunk_entity_count = await self.connection_manager.fetch_query( - f"SELECT COUNT(*) FROM {self._get_table_name('chunk_entity')} WHERE document_id = ANY($1)", - [document_ids], - ) - - relationship_count = await self.connection_manager.fetch_query( - f"SELECT COUNT(*) FROM {self._get_table_name('relationship')} WHERE document_id = ANY($1)", - [document_ids], - ) - - entity_count = await self.connection_manager.fetch_query( - f"SELECT COUNT(*) FROM {self._get_table_name('entity')} WHERE document_id = ANY($1)", - [document_ids], - ) - - graph_entity_count = await self.connection_manager.fetch_query( - f"SELECT COUNT(*) FROM {self._get_table_name('graph_entity')} WHERE collection_id = $1", - [collection_id], - ) - - community_count = await self.connection_manager.fetch_query( - f"SELECT COUNT(*) FROM {self._get_table_name('community')} WHERE collection_id = $1", - [collection_id], - ) - - return { - "kg_extraction_statuses": kg_extraction_statuses, - "graph_cluster_status": graph_cluster_statuses[0][ - "enrichment_status" - ], - "chunk_entity_count": chunk_entity_count[0]["count"], - "relationship_count": relationship_count[0]["count"], - "entity_count": entity_count[0]["count"], - "graph_entity_count": graph_entity_count[0]["count"], - "community_count": community_count[0]["count"], - } - - ####################### ESTIMATION METHODS ####################### - - ####################### GRAPH SEARCH METHODS ####################### - def _build_filters( self, filters: dict, parameters: list[Union[str, int, bytes]] ) -> str: