From cfae71fe1ff40aa257135becfedd132c91922289 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Sun, 1 Dec 2024 19:38:38 -0800 Subject: [PATCH] up --- py/core/main/api/v3/documents_router.py | 25 +- py/core/main/api/v3/graph_router.py | 365 ++++++++++++-------- py/core/main/services/management_service.py | 46 +-- 3 files changed, 266 insertions(+), 170 deletions(-) diff --git a/py/core/main/api/v3/documents_router.py b/py/core/main/api/v3/documents_router.py index d42f7fa7a..675563f76 100644 --- a/py/core/main/api/v3/documents_router.py +++ b/py/core/main/api/v3/documents_router.py @@ -1270,7 +1270,10 @@ async def extract( settings = settings.dict() if settings else None # type: ignore if not auth_user.is_superuser: - logger.warning("Implement permission checks here.") + raise R2RException( + "Only a superuser can extract entities and relationships from a document.", + 403, + ) # If no run type is provided, default to estimate if not run_type: @@ -1376,6 +1379,15 @@ async def get_entities( Results are returned in the order they were extracted from the document. """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + # First check if the document exists and user has access documents_overview_response = await self.services[ "management" @@ -1470,7 +1482,7 @@ async def get_entities( }, ) @self.base_endpoint - async def list_relationships( + async def get_relationships( id: UUID = Path( ..., description="The ID of the document to retrieve relationships for.", @@ -1505,6 +1517,15 @@ async def list_relationships( Results are returned in the order they were extracted from the document. """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + # First check if the document exists and user has access documents_overview_response = await self.services[ "management" diff --git a/py/core/main/api/v3/graph_router.py b/py/core/main/api/v3/graph_router.py index 69a9d6308..91a92d295 100644 --- a/py/core/main/api/v3/graph_router.py +++ b/py/core/main/api/v3/graph_router.py @@ -83,7 +83,9 @@ async def _deduplicate_entities( R2RException: If user unauthorized or deduplication fails """ if not auth_user.is_superuser: - raise R2RException("Only superusers can deduplicate entities", 403) + raise R2RException( + "Only superusers can deduplicate a graphs entities", 403 + ) server_settings = ( self.providers.database.config.kg_entity_deduplication_settings @@ -267,11 +269,12 @@ async def get_graph( Retrieves detailed information about a specific graph by ID. """ if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified collection.", + "The currently authenticated user does not have access to the specified collection associated with the given graph.", 403, ) @@ -322,12 +325,17 @@ async def build_communities( - Summary generation prompt """ print("collection_id = ", collection_id) + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can build communities", 403 + ) if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -439,12 +447,16 @@ async def reset( and must be deleted separately using the /entities and /relationships endpoints. """ + if not auth_user.is_superuser: + raise R2RException("Only superusers can reset a graph", 403) + if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified collection.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -519,12 +531,18 @@ async def update_graph( This endpoint allows updating the name and description of an existing collection. The user must have appropriate permissions to modify the collection. """ + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can update graph details", 403 + ) + if ( - not auth_user.is_superuser - and collection_id not in auth_user.collection_ids + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified collection.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -593,11 +611,12 @@ async def get_entities( ) -> WrappedEntitiesResponse: """Lists all entities in the graph with pagination support.""" if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -634,11 +653,12 @@ async def create_entity( ) -> WrappedEntityResponse: """Creates a new entity in the graph.""" if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -687,12 +707,18 @@ async def create_relationship( auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedRelationshipResponse: """Creates a new relationship in the graph.""" + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can create relationships.", 403 + ) + if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -763,11 +789,12 @@ async def get_entity( ) -> WrappedEntityResponse: """Retrieves a specific entity by its ID.""" if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -807,12 +834,17 @@ async def update_entity( auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedEntityResponse: """Updates an existing entity in the graph.""" + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can update graph entities.", 403 + ) if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -880,12 +912,18 @@ async def delete_entity( auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedBooleanResponse: """Removes an entity from the graph.""" + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can delete graph details.", 403 + ) + if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -958,11 +996,12 @@ async def get_relationships( Lists all relationships in the graph with pagination support. """ if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -1032,11 +1071,12 @@ async def get_relationship( ) -> WrappedRelationshipResponse: """Retrieves a specific relationship by its ID.""" if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -1093,12 +1133,18 @@ async def update_relationship( auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedRelationshipResponse: """Updates an existing relationship in the graph.""" + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can update graph details", 403 + ) + if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -1170,12 +1216,17 @@ async def delete_relationship( auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedBooleanResponse: """Removes a relationship from the graph.""" + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can delete a relationship.", 403 + ) + if ( not auth_user.is_superuser and collection_id not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -1271,12 +1322,17 @@ async def create_community( The created communities will be integrated with any existing automatically detected communities in the graph's community structure. """ + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can create a community.", 403 + ) + if ( not auth_user.is_superuser and collection_id not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -1351,11 +1407,12 @@ async def get_communities( Lists all communities in the graph with pagination support. """ if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -1424,11 +1481,12 @@ async def get_community( Retrieves a specific community by its ID. """ if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -1501,12 +1559,18 @@ async def delete_community( ), auth_user=Depends(self.providers.auth.auth_wrapper), ): + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can delete communities", 403 + ) + if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -1583,12 +1647,18 @@ async def update_community( """ Updates an existing community in the graph. """ + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can update communities.", 403 + ) + if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -1676,12 +1746,16 @@ async def pull( The user must have access to both the graph and the documents being added. """ # Check user permissions for graph + if not auth_user.is_superuser: + raise R2RException("Only superusers can `pull` a graph.", 403) + if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids + # not auth_user.is_superuser + collection_id + not in auth_user.graph_ids ): raise R2RException( - "The currently authenticated user does not have access to the specified graph.", + "The currently authenticated user does not have access to the collection associated with the given graph.", 403, ) @@ -1712,6 +1786,7 @@ async def pull( success = False for document in documents: + # TODO - Add better checks for user permissions if ( not auth_user.is_superuser and document.id @@ -1770,91 +1845,91 @@ async def pull( return GenericBooleanResponse(success=success) # type: ignore - @self.router.delete( - "/graphs/{collection_id}/documents/{document_id}", - summary="Remove document from graph", - openapi_extra={ - "x-codeSamples": [ - { - "lang": "Python", - "source": textwrap.dedent( - """ - from r2r import R2RClient - - client = R2RClient("http://localhost:7272") - # when using auth, do client.login(...) - - result = client.graphs.remove_document( - collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7", - document_id="f98db41a-5555-4444-3333-222222222222" - )""" - ), - }, - { - "lang": "JavaScript", - "source": textwrap.dedent( - """ - const { r2rClient } = require("r2r-js"); - - const client = new r2rClient("http://localhost:7272"); - - async function main() { - const response = await client.graphs.removeDocument({ - collectionId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7", - documentId: "f98db41a-5555-4444-3333-222222222222" - }); - } - - main(); - """ - ), - }, - ] - }, - ) - @self.base_endpoint - async def remove_document( - collection_id: UUID = Path( - ..., - description="The ID of the graph to remove the document from.", - ), - document_id: UUID = Path( - ..., description="The ID of the document to remove." - ), - auth_user=Depends(self.providers.auth.auth_wrapper), - ) -> WrappedBooleanResponse: - """ - Removes a document from a graph and removes any associated entities - - This endpoint: - 1. Removes the document ID from the graph's document_ids array - 2. Optionally deletes the document's copied entities and relationships - - The user must have access to both the graph and the document being removed. - """ - if ( - not auth_user.is_superuser - and collection_id not in auth_user.graph_ids - ): - raise R2RException( - "The currently authenticated user does not have access to the specified graph.", - 403, - ) - - if ( - not auth_user.is_superuser - and document_id not in auth_user.document_ids - ): - raise R2RException( - "The currently authenticated user does not have access to the specified document.", - 403, - ) - - success = ( - await self.providers.database.graph_handler.remove_documents( - id=collection_id, - document_ids=[document_id], # , delete_data=delete_data - ) - ) - - return GenericBooleanResponse(success=success) # type: ignore + # @self.router.delete( + # "/graphs/{collection_id}/documents/{document_id}", + # summary="Remove document from graph", + # openapi_extra={ + # "x-codeSamples": [ + # { + # "lang": "Python", + # "source": textwrap.dedent( + # """ + # from r2r import R2RClient + + # client = R2RClient("http://localhost:7272") + # # when using auth, do client.login(...) + + # result = client.graphs.remove_document( + # collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + # document_id="f98db41a-5555-4444-3333-222222222222" + # )""" + # ), + # }, + # { + # "lang": "JavaScript", + # "source": textwrap.dedent( + # """ + # const { r2rClient } = require("r2r-js"); + + # const client = new r2rClient("http://localhost:7272"); + + # async function main() { + # const response = await client.graphs.removeDocument({ + # collectionId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + # documentId: "f98db41a-5555-4444-3333-222222222222" + # }); + # } + + # main(); + # """ + # ), + # }, + # ] + # }, + # ) + # @self.base_endpoint + # async def remove_document( + # collection_id: UUID = Path( + # ..., + # description="The ID of the graph to remove the document from.", + # ), + # document_id: UUID = Path( + # ..., description="The ID of the document to remove." + # ), + # auth_user=Depends(self.providers.auth.auth_wrapper), + # ) -> WrappedBooleanResponse: + # """ + # Removes a document from a graph and removes any associated entities + + # This endpoint: + # 1. Removes the document ID from the graph's document_ids array + # 2. Optionally deletes the document's copied entities and relationships + + # The user must have access to both the graph and the document being removed. + # """ + # if ( + # not auth_user.is_superuser + # and collection_id not in auth_user.graph_ids + # ): + # raise R2RException( + # "The currently authenticated user does not have access to the collection associated with the given graph.", + # 403, + # ) + + # if ( + # not auth_user.is_superuser + # and document_id not in auth_user.document_ids + # ): + # raise R2RException( + # "The currently authenticated user does not have access to the collection associated with the given graph.", + # 403, + # ) + + # success = ( + # await self.providers.database.graph_handler.remove_documents( + # id=collection_id, + # document_ids=[document_id], # , delete_data=delete_data + # ) + # ) + + # return GenericBooleanResponse(success=success) # type: ignore diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index f61989632..78da92ab9 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -356,29 +356,29 @@ def process_filter(filter_dict: dict[str, Any]): ) for document_id in document_ids_to_purge: - # remaining_chunks = await self.providers.database.list_document_chunks( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. - # document_id=document_id, - # offset=0, - # limit=1000, - # ) - # if remaining_chunks["total_entries"] == 0: - # try: - # await self.providers.database.delete_from_documents_overview( - # document_id - # ) - # logger.info( - # f"Deleted document ID {document_id} from documents_overview." - # ) - # except Exception as e: - # logger.error( - # f"Error deleting document ID {document_id} from documents_overview: {e}" - # ) - # await self.providers.database.graph_handler.entities.delete( - # parent_id=document_id, store_type="document" - # ) - # await self.providers.database.graph_handler.relationships.delete( - # parent_id=document_id, store_type="document" - # ) + remaining_chunks = await self.providers.database.list_document_chunks( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + document_id=document_id, + offset=0, + limit=1000, + ) + if remaining_chunks["total_entries"] == 0: + try: + await self.providers.database.delete_from_documents_overview( + document_id + ) + logger.info( + f"Deleted document ID {document_id} from documents_overview." + ) + except Exception as e: + logger.error( + f"Error deleting document ID {document_id} from documents_overview: {e}" + ) + await self.providers.database.graph_handler.entities.delete( + parent_id=document_id, store_type="document" + ) + await self.providers.database.graph_handler.relationships.delete( + parent_id=document_id, store_type="document" + ) collections = ( await self.providers.database.get_collections_overview( offset=0, limit=1000, filter_document_ids=[document_id]