From 7620d9f221655b6cb9327014e91f34b2f351f675 Mon Sep 17 00:00:00 2001 From: NolanTrem <34580718+NolanTrem@users.noreply.github.com> Date: Sat, 30 Nov 2024 14:30:38 -0600 Subject: [PATCH] More --- py/core/pipes/kg/storage.py | 1 - py/core/providers/database/graph.py | 60 +---------------------------- 2 files changed, 1 insertion(+), 60 deletions(-) diff --git a/py/core/pipes/kg/storage.py b/py/core/pipes/kg/storage.py index 53a0f132e..510cd5ca6 100644 --- a/py/core/pipes/kg/storage.py +++ b/py/core/pipes/kg/storage.py @@ -5,7 +5,6 @@ from core.base import AsyncState, KGExtraction, R2RDocumentProcessingError from core.base.pipes.base_pipe import AsyncPipe -from core.providers.database.graph import DataLevel from core.providers.database.postgres import PostgresDBProvider from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider diff --git a/py/core/providers/database/graph.py b/py/core/providers/database/graph.py index c1fa9bac4..105eb409b 100644 --- a/py/core/providers/database/graph.py +++ b/py/core/providers/database/graph.py @@ -2829,7 +2829,7 @@ async def perform_graph_clustering( # relationship_ids_cache, leiden_params, collection_id # ) # else: - num_communities = await self._cluster_and_add_community_info( + return await self._cluster_and_add_community_info( relationships=relationships, relationship_ids_cache=relationship_ids_cache, leiden_params=leiden_params, @@ -2837,10 +2837,6 @@ async def perform_graph_clustering( # graph_id=collection_id, ) - return num_communities - - ####################### MANAGEMENT METHODS ####################### - async def get_entity_map( self, offset: int, limit: int, document_id: UUID ) -> dict[str, dict[str, list[dict[str, Any]]]]: @@ -3448,8 +3444,6 @@ async def _compute_leiden_communities( except ImportError as e: raise ImportError("Please install the graspologic package.") from e - ####################### UTILITY METHODS ####################### - async def get_existing_document_entity_chunk_ids( self, document_id: UUID ) -> list[str]: @@ -3463,23 +3457,6 @@ async def get_existing_document_entity_chunk_ids( ) ] - async def create_vector_index(self): - # need to implement this. Just call vector db provider's create_vector_index method. - # this needs to be run periodically for every collection. - raise NotImplementedError - - async def structured_query(self): - raise NotImplementedError - - async def update_extraction_prompt(self): - raise NotImplementedError - - async def update_kg_search_prompt(self): - raise NotImplementedError - - async def upsert_relationships(self): - raise NotImplementedError - async def get_entity_count( self, collection_id: Optional[UUID] = None, @@ -3528,41 +3505,6 @@ async def get_entity_count( "count" ] - async def get_relationship_count( - self, - collection_id: Optional[UUID] = None, - document_id: Optional[UUID] = None, - ) -> int: - if collection_id is None and document_id is None: - raise ValueError( - "Either collection_id or document_id must be provided." - ) - - conditions = [] - params = [] - - if collection_id: - conditions.append( - f""" - document_id = ANY( - SELECT document_id FROM {self._get_table_name("document_info")} - WHERE $1 = ANY(collection_ids) - ) - """ - ) - params.append(str(collection_id)) - else: - conditions.append("document_id = $1") - params.append(str(document_id)) - - QUERY = f""" - SELECT COUNT(*) FROM {self._get_table_name("relationship")} - WHERE {" AND ".join(conditions)} - """ - return (await self.connection_manager.fetch_query(QUERY, params))[0][ - "count" - ] - async def update_entity_descriptions(self, entities: list[Entity]): query = f"""