From 6a3d4602cc7e8e84ba5c9827e8e379fbec441ced Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty <68796651+emrgnt-cmplxty@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:04:34 -0700 Subject: [PATCH 01/27] Feature/include vectors option document chunks (#1419) * Fix async JSON parsing (#1408) * Fix async JSON parsing * Remove score completion from js * clean up js * lockfile * Feature/build custom logger (#1409) * building a custom logger for r2r * fix log * maintain bkwd compat * Feature/add kg description prompt (#1411) * add kg desc prompt * add kg desc prompt * add kg desc prompt * fix prompt name * separate test run freq * task_id check fix * add ingestion docs * updatet * add * rm old prompts * rm old prompots * rm old prompts * rm old prompts * add option to include vectors in document chunks * checkin * update vector --------- Co-authored-by: Nolan Tremelling <34580718+NolanTrem@users.noreply.github.com> --- docs/api-reference/openapi.json | 2 +- docs/documentation/cli/management.mdx | 5 +++++ docs/documentation/js-sdk/ingestion.mdx | 20 ++++++++++++++++--- docs/documentation/python-sdk/ingestion.mdx | 21 ++++++++++++++++++-- py/cli/commands/management.py | 14 +++++++++++-- py/core/main/api/management_router.py | 3 ++- py/core/main/services/management_service.py | 6 +++++- py/core/providers/database/vector.py | 19 +++++++++++++++--- py/sdk/management.py | 3 +++ py/shared/api/models/management/responses.py | 1 + 10 files changed, 81 insertions(+), 13 deletions(-) diff --git a/docs/api-reference/openapi.json b/docs/api-reference/openapi.json index e07db4b35..c14c0afe0 100644 --- a/docs/api-reference/openapi.json +++ b/docs/api-reference/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"R2R Application API","version":"1.0.0"},"paths":{"/v2/ingest_files":{"post":{"summary":"Ingest Files App","description":"Ingest files into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. More expansive collection permissioning is under development.","operationId":"ingest_files_app_v2_ingest_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_ingest_files_app_v2_ingest_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_files(\n file_paths=[\"pg_essay_1.html\", \"got.txt\"],\n metadatas=[{\"metadata_1\":\"some random metadata\"}, {\"metadata_2\": \"some other random metadata\"}],\n document_ids=None\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1.html;type=text/html\" \\\n -F \"file=@got.txt;type=text/plain\" \\\n -F 'metadatas=[{},{}]' \\\n -F 'document_ids=null'\n"}]}},"/v2/update_files":{"post":{"summary":"Update Files App","description":"Update existing files in the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to update files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only update their own files. More expansive collection permissioning is under development.","operationId":"update_files_app_v2_update_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_update_files_app_v2_update_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdateResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.update_files(\n file_paths=[\"pg_essay_1_v2.txt\"],\n document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/update_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/ingest_chunks":{"post":{"summary":"Ingest Chunks App","description":"Ingest text chunks into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest pre-parsed text chunks into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest chunks for their own access. More expansive collection permissioning is under development.","operationId":"ingest_chunks_app_v2_ingest_chunks_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_ingest_chunks_app_v2_ingest_chunks_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_chunks(\n chunks=[\n {\n \"text\": \"Another chunk of text\",\n },\n {\n \"text\": \"Yet another chunk of text\",\n },\n {\n \"text\": \"A chunk of text\",\n },\n ],\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_chunks\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"chunks\": [\n {\n \"text\": \"Another chunk of text\"\n },\n {\n \"text\": \"Yet another chunk of text\"\n },\n {\n \"text\": \"A chunk of text\"\n }\n ],\n \"document_id\": \"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\",\n \"metadata\": {}\n }'\n"}]}},"/v2/create_vector_index":{"post":{"summary":"Create Vector Index App","operationId":"create_vector_index_app_v2_create_vector_index_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_vector_index_app_v2_create_vector_index_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CreateVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ServerStats_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AllPromptsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AnalyticsResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_LogResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AppSettingsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","minimum":-1,"description":"Number of items to return. Use -1 to return all items.","default":100,"title":"Limit"},"description":"Number of items to return. Use -1 to return all items."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentChunkResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/conversations/{conversation_id}":{"get":{"summary":"Get Conversation","operationId":"get_conversation_v2_conversations__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"},{"name":"branch_id","in":"query","required":false,"schema":{"type":"string","description":"Branch ID","title":"Branch Id"},"description":"Branch ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_SearchResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGAgentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/completion":{"post":{"summary":"Completion","description":"Generate completions for a list of messages.\n\nThis endpoint uses the language model to generate completions for the provided messages.\nThe generation process can be customized using the generation_config parameter.","operationId":"completion_v2_completion_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_completion_v2_completion_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings.\nIf document IDs are not provided, the graph will be created on all documents in the system.\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information.\nIt creates communities of nodes based on their similarity and adds embeddings to the graph.\nThis step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/entities":{"get":{"summary":"Get Entities","description":"Retrieve entities from the knowledge graph.","operationId":"get_entities_v2_entities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve entities from.","title":"Collection Id"},"description":"Collection ID to retrieve entities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity IDs to filter by.","title":"Entity Ids"},"description":"Entity IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEntitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/triples":{"get":{"summary":"Get Triples","description":"Retrieve triples from the knowledge graph.","operationId":"get_triples_v2_triples_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve triples from.","title":"Collection Id"},"description":"Collection ID to retrieve triples from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_names","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity names to filter by.","title":"Entity Names"},"description":"Entity names to filter by."},{"name":"triple_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Triple IDs to filter by.","title":"Triple Ids"},"description":"Triple IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGTriplesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/communities":{"get":{"summary":"Get Communities","description":"Retrieve communities from the knowledge graph.","operationId":"get_communities_v2_communities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve communities from.","title":"Collection Id"},"description":"Collection ID to retrieve communities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"levels","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Levels to filter by.","title":"Levels"},"description":"Levels to filter by."},{"name":"community_numbers","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Community numbers to filter by.","title":"Community Numbers"},"description":"Community numbers to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGCommunitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AllPromptsResponse":{"properties":{"prompts":{"additionalProperties":{"$ref":"#/components/schemas/PromptResponse"},"type":"object","title":"Prompts"}},"type":"object","required":["prompts"],"title":"AllPromptsResponse"},"AnalyticsResponse":{"properties":{"analytics_data":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Analytics Data"},"filtered_logs":{"type":"object","title":"Filtered Logs"}},"type":"object","required":["filtered_logs"],"title":"AnalyticsResponse"},"AppSettingsResponse":{"properties":{"config":{"type":"object","title":"Config"},"prompts":{"type":"object","title":"Prompts"}},"type":"object","required":["config","prompts"],"title":"AppSettingsResponse"},"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"message":{"$ref":"#/components/schemas/Message"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true},"conversation_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Conversation Id","description":"The ID of the conversation, a new ID is assigned if not provided"}},"type":"object","required":["message"],"title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_completion_v2_completion_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"The messages to complete"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"The generation config"}},"type":"object","required":["messages"],"title":"Body_completion_v2_completion_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to create graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph creation process."},"kg_creation_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process."}},"type":"object","title":"Body_create_graph_v2_create_graph_post"},"Body_create_vector_index_app_v2_create_vector_index_post":{"properties":{"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The name of the vector table to create.","default":"CHUNKS"},"index_method":{"$ref":"#/components/schemas/IndexMethod","description":"The type of vector index to create.","default":"hnsw"},"measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The measure for the index.","default":"cosine_distance"},"index_arguments":{"anyOf":[{"$ref":"#/components/schemas/IndexArgsIVFFlat"},{"$ref":"#/components/schemas/IndexArgsHNSW"},{"type":"null"}],"title":"Index Arguments","description":"The arguments for the index method."},"replace":{"type":"boolean","title":"Replace","description":"Whether to replace an existing index.","default":true},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to create the index concurrently.","default":true}},"type":"object","title":"Body_create_vector_index_app_v2_create_vector_index_post"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to enrich graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_chunks_app_v2_ingest_chunks_post":{"properties":{"chunks":{"items":{"$ref":"#/components/schemas/RawChunk"},"type":"array","title":"Chunks","description":"A list of text chunks to ingest into the system.","default":{}},"document_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document Id","description":"An optional document id to associate the chunks with. If not provided, a unique document id will be generated."},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","title":"Body_ingest_chunks_app_v2_ingest_chunks_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id","description":"ID of the user to update"},"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"is_superuser":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Is Superuser","description":"Updated superuser status"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"CollectionOverviewResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"user_count":{"type":"integer","title":"User Count"},"document_count":{"type":"integer","title":"Document Count"}},"type":"object","required":["collection_id","name","description","created_at","updated_at","user_count","document_count"],"title":"CollectionOverviewResponse"},"CollectionResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"}},"type":"object","required":["collection_id","name","description","created_at","updated_at"],"title":"CollectionResponse"},"CommunityReport":{"properties":{"community_number":{"type":"integer","title":"Community Number"},"level":{"type":"integer","title":"Level"},"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name","default":""},"summary":{"type":"string","title":"Summary","default":""},"findings":{"items":{"type":"string"},"type":"array","title":"Findings","default":[]},"rating":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Rating"},"rating_explanation":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rating Explanation"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Embedding"},"attributes":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["community_number","level","collection_id"],"title":"CommunityReport"},"CreateVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message","description":"Vector index creation queued successfully."}},"type":"object","required":["message"],"title":"CreateVectorIndexResponse"},"DocumentChunkResponse":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","text","metadata"],"title":"DocumentChunkResponse"},"DocumentOverviewResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"title":{"type":"string","title":"Title"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"type":{"type":"string","title":"Type"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"ingestion_status":{"type":"string","title":"Ingestion Status"},"kg_extraction_status":{"type":"string","title":"Kg Extraction Status"},"version":{"type":"string","title":"Version"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","title","user_id","type","created_at","updated_at","ingestion_status","kg_extraction_status","version","collection_ids","metadata"],"title":"DocumentOverviewResponse"},"Entity":{"properties":{"name":{"type":"string","title":"Name"},"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"category":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Category"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"description_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Description Embedding"},"community_numbers":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Community Numbers"},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id"},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["name"],"title":"Entity","description":"An entity extracted from a document."},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexArgsHNSW":{"properties":{"m":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"M","default":16},"ef_construction":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Construction","default":64}},"type":"object","title":"IndexArgsHNSW","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an HNSW type index.\n\nRef: https://github.com/pgvector/pgvector#index-options\n\nBoth attributes are Optional in case the user only wants to specify one and\nleave the other as default\n\nAttributes:\n m (int): Maximum number of connections per node per layer (default: 16)\n ef_construction (int): Size of the dynamic candidate list for\n constructing the graph (default: 64)"},"IndexArgsIVFFlat":{"properties":{"n_lists":{"type":"integer","title":"N Lists"}},"type":"object","required":["n_lists"],"title":"IndexArgsIVFFlat","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an IVFFlat type index.\n\nAttributes:\n nlist (int): The number of IVF centroids that the index should use"},"IndexMeasure":{"type":"string","enum":["cosine_distance","l2_distance","max_inner_product"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"IndexMethod":{"type":"string","enum":["auto","ivfflat","hnsw"],"title":"IndexMethod","description":"An enum representing the index methods available.\n\nThis class currently only supports the 'ivfflat' method but may\nexpand in the future.\n\nAttributes:\n auto (str): Automatically choose the best available index method.\n ivfflat (str): The ivfflat index method.\n hnsw (str): The hnsw index method."},"IngestionResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_id":{"type":"string","format":"uuid","title":"Document Id","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_id"],"title":"IngestionResponse","example":{"document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","message":"Ingestion task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCommunitiesResponse":{"properties":{"communities":{"items":{"$ref":"#/components/schemas/CommunityReport"},"type":"array","title":"Communities","description":"The list of communities in the graph for the collection."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of communities in the graph."}},"type":"object","required":["communities","total_entries"],"title":"KGCommunitiesResponse","description":"Response for knowledge graph communities.","example":{"communities":[{"collection_ids":["122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"],"community_number":1,"findings":["finding1","finding2"],"id":"1","level":0,"name":"community name","rating":"8","rating_explanation":"rating explanation","summary":"community summary"}],"total_count":1}},"KGCommunityResult":{"properties":{"name":{"type":"string","title":"Name"},"summary":{"type":"string","title":"Summary"},"rating":{"type":"number","title":"Rating"},"rating_explanation":{"type":"string","title":"Rating Explanation"},"findings":{"items":{"type":"string"},"type":"array","title":"Findings"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","summary","rating","rating_explanation","findings"],"title":"KGCommunityResult","findings":["Finding 1","Finding 2"],"metadata":{},"name":"Community Name","rating":9,"rating_explanation":"Rating Explanation","summary":"Community Summary"},"KGCreationEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"document_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Document Count","description":"The number of documents in the collection."},"number_of_jobs_created":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Number Of Jobs Created","description":"The number of jobs created for the graph creation process."},"total_chunks":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Chunks","description":"The estimated total number of chunks."},"estimated_entities":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Entities","description":"The estimated number of entities in the graph."},"estimated_triples":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Triples","description":"The estimated number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls in millions."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph creation process in minutes."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph creation process in USD."}},"type":"object","title":"KGCreationEstimationResponse","description":"Response for knowledge graph creation estimation."},"KGCreationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG creation request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG creation request."}},"type":"object","required":["message","task_id"],"title":"KGCreationResponse","example":{"message":"Graph creation queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEnrichmentEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"total_entities":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Entities","description":"The total number of entities in the graph."},"total_triples":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Triples","description":"The total number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph enrichment process."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph enrichment process."}},"type":"object","title":"KGEnrichmentEstimationResponse","description":"Response for knowledge graph enrichment estimation."},"KGEnrichmentResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG enrichment request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG enrichment request."}},"type":"object","required":["message","task_id"],"title":"KGEnrichmentResponse","example":{"message":"Graph enrichment queued successfuly.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEntitiesResponse":{"properties":{"entities":{"items":{"$ref":"#/components/schemas/Entity"},"type":"array","title":"Entities","description":"The list of entities in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of entities in the graph for the collection or document."}},"type":"object","required":["entities","total_entries"],"title":"KGEntitiesResponse","description":"Response for knowledge graph entities.","example":{"entities":[{"description":"Description 1","id":"1","name":"Entity 1"},{"description":"Description 2","id":"2","name":"Entity 2"}],"total_entries":2}},"KGEntityResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGEntityResult","description":"Entity Description","metadata":{},"name":"Entity Name"},"KGGlobalResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGGlobalResult","description":"Global Result Description","metadata":{},"name":"Global Result Name"},"KGRelationshipResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGRelationshipResult","description":"Relationship Description","metadata":{},"name":"Relationship Name"},"KGRunType":{"type":"string","enum":["estimate","run"],"title":"KGRunType","description":"Type of KG run."},"KGSearchMethod":{"type":"string","enum":["local"],"const":"local","title":"KGSearchMethod"},"KGSearchResult":{"properties":{"method":{"$ref":"#/components/schemas/KGSearchMethod"},"content":{"anyOf":[{"$ref":"#/components/schemas/KGEntityResult"},{"$ref":"#/components/schemas/KGRelationshipResult"},{"$ref":"#/components/schemas/KGCommunityResult"},{"$ref":"#/components/schemas/KGGlobalResult"}],"title":"Content"},"result_type":{"anyOf":[{"$ref":"#/components/schemas/KGSearchResultType"},{"type":"null"}]},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"metadata":{"type":"object","title":"Metadata","default":{}}},"type":"object","required":["method","content"],"title":"KGSearchResult","content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"},"KGSearchResultType":{"type":"string","enum":["entity","relationship","community"],"title":"KGSearchResultType"},"KGSearchSettings":{"properties":{"filters":{"type":"object","title":"Filters","description":"Filters to apply to the vector search","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search"},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"graphrag_map_system_prompt":{"type":"string","title":"Graphrag Map System Prompt","description":"The system prompt for the graphrag map prompt.","default":"graphrag_map_system_prompt"},"graphrag_reduce_system_prompt":{"type":"string","title":"Graphrag Reduce System Prompt","description":"The system prompt for the graphrag reduce prompt.","default":"graphrag_reduce_system_prompt"},"use_kg_search":{"type":"boolean","title":"Use Kg Search","description":"Whether to use KG search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","description":"KG search type","default":"local"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level","description":"KG search level"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph search."},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"local","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"KGTriplesResponse":{"properties":{"triples":{"items":{"$ref":"#/components/schemas/Triple"},"type":"array","title":"Triples","description":"The list of triples in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of triples in the graph for the collection or document."}},"type":"object","required":["triples","total_entries"],"title":"KGTriplesResponse","description":"Response for knowledge graph triples.","example":{"total_entries":2,"triples":[{"description":"Paris is the capital of France","object":"France","predicate":"is capital of","subject":"Paris"}]}},"LogEntry":{"properties":{"key":{"type":"string","title":"Key"},"value":{"title":"Value"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp"}},"type":"object","required":["key","value","timestamp"],"title":"LogEntry"},"LogResponse":{"properties":{"run_id":{"type":"string","format":"uuid","title":"Run Id"},"run_type":{"type":"string","title":"Run Type"},"entries":{"items":{"$ref":"#/components/schemas/LogEntry"},"type":"array","title":"Entries"},"timestamp":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Timestamp"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["run_id","run_type","entries","timestamp","user_id"],"title":"LogResponse"},"Message":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"PaginatedResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[CollectionResponse]]"},"PaginatedResultsWrapper_list_DocumentChunkResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentChunkResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentChunkResponse]]"},"PaginatedResultsWrapper_list_DocumentOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentOverviewResponse]]"},"PaginatedResultsWrapper_list_UserOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/UserOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserOverviewResponse]]"},"PaginatedResultsWrapper_list_UserResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/shared__api__models__management__responses__UserResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserResponse]]"},"PromptResponse":{"properties":{"name":{"type":"string","title":"Name"},"template":{"type":"string","title":"Template"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types"}},"type":"object","required":["name","template","created_at","updated_at","input_types"],"title":"PromptResponse"},"RAGAgentResponse":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"List of messages in the RAG agent response"}},"type":"object","required":["messages"],"title":"RAGAgentResponse","example":[{"content":"## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.","role":"system"},{"content":"You are a helpful assistant.","role":"system"},{"content":"Who is the greatest philospher of all time?","role":"user"},{"content":"Aristotle is widely considered the greatest philospher of all time.","role":"assistant"},{"content":"Can you tell me more about him?","role":"user"},{"function_call":{"arguments":"{\"query\":\"Aristotle biography\"}","name":"search"},"role":"assistant"},{"content":"1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n","name":"search","role":"function"},{"content":"Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.","role":"assistant"}]},"RAGResponse":{"properties":{"completion":{"title":"Completion","description":"The generated completion from the RAG process"},"search_results":{"$ref":"#/components/schemas/SearchResponse","description":"The search results used for the RAG process"}},"type":"object","required":["completion","search_results"],"title":"RAGResponse","example":{"completion":{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Paris is the capital of France.","role":"assistant"}}],"id":"chatcmpl-example123"},"search_results":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}}},"RawChunk":{"properties":{"text":{"type":"string","title":"Text"}},"type":"object","required":["text"],"title":"RawChunk"},"ResultsWrapper_AllPromptsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AllPromptsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AllPromptsResponse]"},"ResultsWrapper_AnalyticsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AnalyticsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AnalyticsResponse]"},"ResultsWrapper_AppSettingsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AppSettingsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AppSettingsResponse]"},"ResultsWrapper_CollectionResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CollectionResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CollectionResponse]"},"ResultsWrapper_CreateVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CreateVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CreateVectorIndexResponse]"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_KGCommunitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGCommunitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGCommunitiesResponse]"},"ResultsWrapper_KGEntitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEntitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEntitiesResponse]"},"ResultsWrapper_KGTriplesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGTriplesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGTriplesResponse]"},"ResultsWrapper_NoneType_":{"properties":{"results":{"type":"null","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[NoneType]"},"ResultsWrapper_RAGAgentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGAgentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGAgentResponse]"},"ResultsWrapper_RAGResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGResponse]"},"ResultsWrapper_SearchResponse_":{"properties":{"results":{"$ref":"#/components/schemas/SearchResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[SearchResponse]"},"ResultsWrapper_ServerStats_":{"properties":{"results":{"$ref":"#/components/schemas/ServerStats"}},"type":"object","required":["results"],"title":"ResultsWrapper[ServerStats]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGCreationResponse"},{"$ref":"#/components/schemas/KGCreationEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGCreationResponse, KGCreationEstimationResponse]]"},"ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGEnrichmentResponse"},{"$ref":"#/components/schemas/KGEnrichmentEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGEnrichmentResponse, KGEnrichmentEstimationResponse]]"},"ResultsWrapper_UpdatePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdatePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdatePromptResponse]"},"ResultsWrapper_UpdateResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdateResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdateResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/shared__api__models__auth__responses__UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"ResultsWrapper_list_CollectionOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionOverviewResponse]]"},"ResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionResponse]]"},"ResultsWrapper_list_IngestionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/IngestionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[IngestionResponse]]"},"ResultsWrapper_list_LogResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/LogResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[LogResponse]]"},"SearchResponse":{"properties":{"vector_search_results":{"items":{"$ref":"#/components/schemas/VectorSearchResult"},"type":"array","title":"Vector Search Results","description":"List of vector search results"},"kg_search_results":{"anyOf":[{"items":{"$ref":"#/components/schemas/KGSearchResult"},"type":"array"},{"type":"null"}],"title":"Kg Search Results","description":"Knowledge graph search results, if applicable"}},"type":"object","required":["vector_search_results"],"title":"SearchResponse","example":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}},"ServerStats":{"properties":{"start_time":{"type":"string","format":"date-time","title":"Start Time"},"uptime_seconds":{"type":"number","title":"Uptime Seconds"},"cpu_usage":{"type":"number","title":"Cpu Usage"},"memory_usage":{"type":"number","title":"Memory Usage"}},"type":"object","required":["start_time","uptime_seconds","cpu_usage","memory_usage"],"title":"ServerStats"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"Triple":{"properties":{"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"subject":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Subject"},"predicate":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Predicate"},"object":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Object"},"weight":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Weight","default":1.0},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"predicate_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Predicate Embedding"},"extraction_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Extraction Ids","default":[]},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"}],"title":"Attributes","default":{}}},"type":"object","title":"Triple","description":"A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities."},"UpdatePromptResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"UpdatePromptResponse"},"UpdateResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_ids"],"title":"UpdateResponse","example":{"document_ids":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"],"message":"Update task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"UserOverviewResponse":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id"},"num_files":{"type":"integer","title":"Num Files"},"total_size_in_bytes":{"type":"integer","title":"Total Size In Bytes"},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids"}},"type":"object","required":["user_id","num_files","total_size_in_bytes","document_ids"],"title":"UserOverviewResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchResult":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"score":{"type":"number","title":"Score"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","score","text","metadata"],"title":"VectorSearchResult","description":"Result of a search operation.","collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Filters to apply to the vector search","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"offset":{"type":"integer","minimum":0.0,"title":"Offset","description":"Offset to paginate search results","default":0},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"anyOf":[{"$ref":"#/components/schemas/HybridSearchSettings"},{"type":"null"}],"description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"type":"string","title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","limit":20,"offset":0,"probes":10,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true},"VectorTableName":{"type":"string","enum":["CHUNKS","entity_embedding","community_report"],"title":"VectorTableName","description":"This enum represents the different tables where we store vectors.\n\n# TODO: change the table name of the chunks table. Right now it is called\n# {r2r_project_name}.{r2r_project_name} due to a bug in the vector class."},"shared__api__models__auth__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-11T16:33:32.173507"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-11T16:33:32.173517"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"shared__api__models__management__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-11T16:33:32.188961"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-11T16:33:32.188965"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} +{"openapi":"3.1.0","info":{"title":"R2R Application API","version":"1.0.0"},"paths":{"/v2/ingest_files":{"post":{"summary":"Ingest Files App","description":"Ingest files into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. More expansive collection permissioning is under development.","operationId":"ingest_files_app_v2_ingest_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_ingest_files_app_v2_ingest_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_files(\n file_paths=[\"pg_essay_1.html\", \"got.txt\"],\n metadatas=[{\"metadata_1\":\"some random metadata\"}, {\"metadata_2\": \"some other random metadata\"}],\n document_ids=None\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1.html;type=text/html\" \\\n -F \"file=@got.txt;type=text/plain\" \\\n -F 'metadatas=[{},{}]' \\\n -F 'document_ids=null'\n"}]}},"/v2/update_files":{"post":{"summary":"Update Files App","description":"Update existing files in the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to update files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only update their own files. More expansive collection permissioning is under development.","operationId":"update_files_app_v2_update_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_update_files_app_v2_update_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdateResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.update_files(\n file_paths=[\"pg_essay_1_v2.txt\"],\n document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/update_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/ingest_chunks":{"post":{"summary":"Ingest Chunks App","description":"Ingest text chunks into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest pre-parsed text chunks into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest chunks for their own access. More expansive collection permissioning is under development.","operationId":"ingest_chunks_app_v2_ingest_chunks_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_ingest_chunks_app_v2_ingest_chunks_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_chunks(\n chunks=[\n {\n \"text\": \"Another chunk of text\",\n },\n {\n \"text\": \"Yet another chunk of text\",\n },\n {\n \"text\": \"A chunk of text\",\n },\n ],\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_chunks\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"chunks\": [\n {\n \"text\": \"Another chunk of text\"\n },\n {\n \"text\": \"Yet another chunk of text\"\n },\n {\n \"text\": \"A chunk of text\"\n }\n ],\n \"document_id\": \"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\",\n \"metadata\": {}\n }'\n"}]}},"/v2/create_vector_index":{"post":{"summary":"Create Vector Index App","operationId":"create_vector_index_app_v2_create_vector_index_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_vector_index_app_v2_create_vector_index_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CreateVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ServerStats_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AllPromptsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AnalyticsResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_LogResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AppSettingsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","minimum":-1,"description":"Number of items to return. Use -1 to return all items.","default":100,"title":"Limit"},"description":"Number of items to return. Use -1 to return all items."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentChunkResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/conversations/{conversation_id}":{"get":{"summary":"Get Conversation","operationId":"get_conversation_v2_conversations__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"},{"name":"branch_id","in":"query","required":false,"schema":{"type":"string","description":"Branch ID","title":"Branch Id"},"description":"Branch ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_Tuple_str__Message___"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_SearchResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGAgentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/completion":{"post":{"summary":"Completion","description":"Generate completions for a list of messages.\n\nThis endpoint uses the language model to generate completions for the provided messages.\nThe generation process can be customized using the generation_config parameter.","operationId":"completion_v2_completion_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_completion_v2_completion_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings.\nIf document IDs are not provided, the graph will be created on all documents in the system.\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information.\nIt creates communities of nodes based on their similarity and adds embeddings to the graph.\nThis step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/entities":{"get":{"summary":"Get Entities","description":"Retrieve entities from the knowledge graph.","operationId":"get_entities_v2_entities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve entities from.","title":"Collection Id"},"description":"Collection ID to retrieve entities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity IDs to filter by.","title":"Entity Ids"},"description":"Entity IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEntitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/triples":{"get":{"summary":"Get Triples","description":"Retrieve triples from the knowledge graph.","operationId":"get_triples_v2_triples_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve triples from.","title":"Collection Id"},"description":"Collection ID to retrieve triples from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_names","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity names to filter by.","title":"Entity Names"},"description":"Entity names to filter by."},{"name":"triple_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Triple IDs to filter by.","title":"Triple Ids"},"description":"Triple IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGTriplesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/communities":{"get":{"summary":"Get Communities","description":"Retrieve communities from the knowledge graph.","operationId":"get_communities_v2_communities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve communities from.","title":"Collection Id"},"description":"Collection ID to retrieve communities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"levels","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Levels to filter by.","title":"Levels"},"description":"Levels to filter by."},{"name":"community_numbers","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Community numbers to filter by.","title":"Community Numbers"},"description":"Community numbers to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGCommunitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AllPromptsResponse":{"properties":{"prompts":{"additionalProperties":{"$ref":"#/components/schemas/PromptResponse"},"type":"object","title":"Prompts"}},"type":"object","required":["prompts"],"title":"AllPromptsResponse"},"AnalyticsResponse":{"properties":{"analytics_data":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Analytics Data"},"filtered_logs":{"type":"object","title":"Filtered Logs"}},"type":"object","required":["filtered_logs"],"title":"AnalyticsResponse"},"AppSettingsResponse":{"properties":{"config":{"type":"object","title":"Config"},"prompts":{"type":"object","title":"Prompts"}},"type":"object","required":["config","prompts"],"title":"AppSettingsResponse"},"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"message":{"anyOf":[{"$ref":"#/components/schemas/Message"},{"type":"null"}],"description":"Message object"},"messages":{"anyOf":[{"items":{"$ref":"#/components/schemas/Message"},"type":"array"},{"type":"null"}],"title":"Messages","description":"List of message objects (deprecated, use message instead)","deprecated":true},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true},"conversation_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Conversation Id","description":"The ID of the conversation, a new ID is assigned if not provided"},"branch_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Branch Id","description":"The ID of the branch"}},"type":"object","title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_completion_v2_completion_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"The messages to complete"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"The generation config"}},"type":"object","required":["messages"],"title":"Body_completion_v2_completion_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to create graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph creation process."},"kg_creation_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process."}},"type":"object","title":"Body_create_graph_v2_create_graph_post"},"Body_create_vector_index_app_v2_create_vector_index_post":{"properties":{"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The name of the vector table to create.","default":"chunks"},"index_method":{"$ref":"#/components/schemas/IndexMethod","description":"The type of vector index to create.","default":"hnsw"},"measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The measure for the index.","default":"cosine_distance"},"index_arguments":{"anyOf":[{"$ref":"#/components/schemas/IndexArgsIVFFlat"},{"$ref":"#/components/schemas/IndexArgsHNSW"},{"type":"null"}],"title":"Index Arguments","description":"The arguments for the index method."},"replace":{"type":"boolean","title":"Replace","description":"Whether to replace an existing index.","default":true},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to create the index concurrently.","default":true}},"type":"object","title":"Body_create_vector_index_app_v2_create_vector_index_post"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to enrich graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_chunks_app_v2_ingest_chunks_post":{"properties":{"chunks":{"items":{"$ref":"#/components/schemas/RawChunk"},"type":"array","title":"Chunks","description":"A list of text chunks to ingest into the system.","default":{}},"document_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document Id","description":"An optional document id to associate the chunks with. If not provided, a unique document id will be generated."},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","title":"Body_ingest_chunks_app_v2_ingest_chunks_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id","description":"ID of the user to update"},"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"is_superuser":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Is Superuser","description":"Updated superuser status"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"CollectionOverviewResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"user_count":{"type":"integer","title":"User Count"},"document_count":{"type":"integer","title":"Document Count"}},"type":"object","required":["collection_id","name","description","created_at","updated_at","user_count","document_count"],"title":"CollectionOverviewResponse"},"CollectionResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"}},"type":"object","required":["collection_id","name","description","created_at","updated_at"],"title":"CollectionResponse"},"CommunityReport":{"properties":{"community_number":{"type":"integer","title":"Community Number"},"level":{"type":"integer","title":"Level"},"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name","default":""},"summary":{"type":"string","title":"Summary","default":""},"findings":{"items":{"type":"string"},"type":"array","title":"Findings","default":[]},"rating":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Rating"},"rating_explanation":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rating Explanation"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Embedding"},"attributes":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["community_number","level","collection_id"],"title":"CommunityReport"},"CreateVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message","description":"Vector index creation queued successfully."}},"type":"object","required":["message"],"title":"CreateVectorIndexResponse"},"DocumentChunkResponse":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","text","metadata"],"title":"DocumentChunkResponse"},"DocumentOverviewResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"title":{"type":"string","title":"Title"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"type":{"type":"string","title":"Type"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"ingestion_status":{"type":"string","title":"Ingestion Status"},"kg_extraction_status":{"type":"string","title":"Kg Extraction Status"},"version":{"type":"string","title":"Version"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","title","user_id","type","created_at","updated_at","ingestion_status","kg_extraction_status","version","collection_ids","metadata"],"title":"DocumentOverviewResponse"},"Entity":{"properties":{"name":{"type":"string","title":"Name"},"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"category":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Category"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"description_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Description Embedding"},"community_numbers":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Community Numbers"},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id"},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["name"],"title":"Entity","description":"An entity extracted from a document."},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexArgsHNSW":{"properties":{"m":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"M","default":16},"ef_construction":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Construction","default":64}},"type":"object","title":"IndexArgsHNSW","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an HNSW type index.\n\nRef: https://github.com/pgvector/pgvector#index-options\n\nBoth attributes are Optional in case the user only wants to specify one and\nleave the other as default\n\nAttributes:\n m (int): Maximum number of connections per node per layer (default: 16)\n ef_construction (int): Size of the dynamic candidate list for\n constructing the graph (default: 64)"},"IndexArgsIVFFlat":{"properties":{"n_lists":{"type":"integer","title":"N Lists"}},"type":"object","required":["n_lists"],"title":"IndexArgsIVFFlat","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an IVFFlat type index.\n\nAttributes:\n nlist (int): The number of IVF centroids that the index should use"},"IndexMeasure":{"type":"string","enum":["cosine_distance","l2_distance","max_inner_product"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"IndexMethod":{"type":"string","enum":["auto","ivfflat","hnsw"],"title":"IndexMethod","description":"An enum representing the index methods available.\n\nThis class currently only supports the 'ivfflat' method but may\nexpand in the future.\n\nAttributes:\n auto (str): Automatically choose the best available index method.\n ivfflat (str): The ivfflat index method.\n hnsw (str): The hnsw index method."},"IngestionResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_id":{"type":"string","format":"uuid","title":"Document Id","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_id"],"title":"IngestionResponse","example":{"document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","message":"Ingestion task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCommunitiesResponse":{"properties":{"communities":{"items":{"$ref":"#/components/schemas/CommunityReport"},"type":"array","title":"Communities","description":"The list of communities in the graph for the collection."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of communities in the graph."}},"type":"object","required":["communities","total_entries"],"title":"KGCommunitiesResponse","description":"Response for knowledge graph communities.","example":{"communities":[{"collection_ids":["122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"],"community_number":1,"findings":["finding1","finding2"],"id":"1","level":0,"name":"community name","rating":"8","rating_explanation":"rating explanation","summary":"community summary"}],"total_count":1}},"KGCommunityResult":{"properties":{"name":{"type":"string","title":"Name"},"summary":{"type":"string","title":"Summary"},"rating":{"type":"number","title":"Rating"},"rating_explanation":{"type":"string","title":"Rating Explanation"},"findings":{"items":{"type":"string"},"type":"array","title":"Findings"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","summary","rating","rating_explanation","findings"],"title":"KGCommunityResult","findings":["Finding 1","Finding 2"],"metadata":{},"name":"Community Name","rating":9,"rating_explanation":"Rating Explanation","summary":"Community Summary"},"KGCreationEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"document_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Document Count","description":"The number of documents in the collection."},"number_of_jobs_created":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Number Of Jobs Created","description":"The number of jobs created for the graph creation process."},"total_chunks":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Chunks","description":"The estimated total number of chunks."},"estimated_entities":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Entities","description":"The estimated number of entities in the graph."},"estimated_triples":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Triples","description":"The estimated number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls in millions."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph creation process in minutes."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph creation process in USD."}},"type":"object","title":"KGCreationEstimationResponse","description":"Response for knowledge graph creation estimation."},"KGCreationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG creation request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG creation request."}},"type":"object","required":["message","task_id"],"title":"KGCreationResponse","example":{"message":"Graph creation queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEnrichmentEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"total_entities":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Entities","description":"The total number of entities in the graph."},"total_triples":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Triples","description":"The total number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph enrichment process."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph enrichment process."}},"type":"object","title":"KGEnrichmentEstimationResponse","description":"Response for knowledge graph enrichment estimation."},"KGEnrichmentResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG enrichment request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG enrichment request."}},"type":"object","required":["message","task_id"],"title":"KGEnrichmentResponse","example":{"message":"Graph enrichment queued successfuly.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEntitiesResponse":{"properties":{"entities":{"items":{"$ref":"#/components/schemas/Entity"},"type":"array","title":"Entities","description":"The list of entities in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of entities in the graph for the collection or document."}},"type":"object","required":["entities","total_entries"],"title":"KGEntitiesResponse","description":"Response for knowledge graph entities.","example":{"entities":[{"description":"Description 1","id":"1","name":"Entity 1"},{"description":"Description 2","id":"2","name":"Entity 2"}],"total_entries":2}},"KGEntityResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGEntityResult","description":"Entity Description","metadata":{},"name":"Entity Name"},"KGGlobalResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGGlobalResult","description":"Global Result Description","metadata":{},"name":"Global Result Name"},"KGRelationshipResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGRelationshipResult","description":"Relationship Description","metadata":{},"name":"Relationship Name"},"KGRunType":{"type":"string","enum":["estimate","run"],"title":"KGRunType","description":"Type of KG run."},"KGSearchMethod":{"type":"string","enum":["local"],"const":"local","title":"KGSearchMethod"},"KGSearchResult":{"properties":{"method":{"$ref":"#/components/schemas/KGSearchMethod"},"content":{"anyOf":[{"$ref":"#/components/schemas/KGEntityResult"},{"$ref":"#/components/schemas/KGRelationshipResult"},{"$ref":"#/components/schemas/KGCommunityResult"},{"$ref":"#/components/schemas/KGGlobalResult"}],"title":"Content"},"result_type":{"anyOf":[{"$ref":"#/components/schemas/KGSearchResultType"},{"type":"null"}]},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"metadata":{"type":"object","title":"Metadata","default":{}}},"type":"object","required":["method","content"],"title":"KGSearchResult","content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"},"KGSearchResultType":{"type":"string","enum":["entity","relationship","community"],"title":"KGSearchResultType"},"KGSearchSettings":{"properties":{"filters":{"type":"object","title":"Filters","description":"Filters to apply to the vector search","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search"},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"graphrag_map_system":{"type":"string","title":"Graphrag Map System","description":"The system prompt for the graphrag map prompt.","default":"graphrag_map_system"},"graphrag_reduce_system":{"type":"string","title":"Graphrag Reduce System","description":"The system prompt for the graphrag reduce prompt.","default":"graphrag_reduce_system"},"use_kg_search":{"type":"boolean","title":"Use Kg Search","description":"Whether to use KG search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","description":"KG search type","default":"local"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level","description":"KG search level"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph search."},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"local","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"KGTriplesResponse":{"properties":{"triples":{"items":{"$ref":"#/components/schemas/Triple"},"type":"array","title":"Triples","description":"The list of triples in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of triples in the graph for the collection or document."}},"type":"object","required":["triples","total_entries"],"title":"KGTriplesResponse","description":"Response for knowledge graph triples.","example":{"total_entries":2,"triples":[{"description":"Paris is the capital of France","object":"France","predicate":"is capital of","subject":"Paris"}]}},"LogEntry":{"properties":{"key":{"type":"string","title":"Key"},"value":{"title":"Value"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp"}},"type":"object","required":["key","value","timestamp"],"title":"LogEntry"},"LogResponse":{"properties":{"run_id":{"type":"string","format":"uuid","title":"Run Id"},"run_type":{"type":"string","title":"Run Type"},"entries":{"items":{"$ref":"#/components/schemas/LogEntry"},"type":"array","title":"Entries"},"timestamp":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Timestamp"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["run_id","run_type","entries","timestamp","user_id"],"title":"LogResponse"},"Message":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"PaginatedResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[CollectionResponse]]"},"PaginatedResultsWrapper_list_DocumentChunkResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentChunkResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentChunkResponse]]"},"PaginatedResultsWrapper_list_DocumentOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentOverviewResponse]]"},"PaginatedResultsWrapper_list_UserOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/UserOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserOverviewResponse]]"},"PaginatedResultsWrapper_list_UserResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/shared__api__models__management__responses__UserResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserResponse]]"},"PromptResponse":{"properties":{"name":{"type":"string","title":"Name"},"template":{"type":"string","title":"Template"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types"}},"type":"object","required":["name","template","created_at","updated_at","input_types"],"title":"PromptResponse"},"RAGAgentResponse":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"List of messages in the RAG agent response"},"conversation_id":{"type":"string","title":"Conversation Id","description":"The conversation ID for the RAG agent response"}},"type":"object","required":["messages","conversation_id"],"title":"RAGAgentResponse","example":[{"content":"## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.","role":"system"},{"content":"You are a helpful assistant.","role":"system"},{"content":"Who is the greatest philospher of all time?","role":"user"},{"content":"Aristotle is widely considered the greatest philospher of all time.","role":"assistant"},{"content":"Can you tell me more about him?","role":"user"},{"function_call":{"arguments":"{\"query\":\"Aristotle biography\"}","name":"search"},"role":"assistant"},{"content":"1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n","name":"search","role":"function"},{"content":"Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.","role":"assistant"}]},"RAGResponse":{"properties":{"completion":{"title":"Completion","description":"The generated completion from the RAG process"},"search_results":{"$ref":"#/components/schemas/SearchResponse","description":"The search results used for the RAG process"}},"type":"object","required":["completion","search_results"],"title":"RAGResponse","example":{"completion":{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Paris is the capital of France.","role":"assistant"}}],"id":"chatcmpl-example123"},"search_results":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}}},"RawChunk":{"properties":{"text":{"type":"string","title":"Text"}},"type":"object","required":["text"],"title":"RawChunk"},"ResultsWrapper_AllPromptsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AllPromptsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AllPromptsResponse]"},"ResultsWrapper_AnalyticsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AnalyticsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AnalyticsResponse]"},"ResultsWrapper_AppSettingsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AppSettingsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AppSettingsResponse]"},"ResultsWrapper_CollectionResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CollectionResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CollectionResponse]"},"ResultsWrapper_CreateVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CreateVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CreateVectorIndexResponse]"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_KGCommunitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGCommunitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGCommunitiesResponse]"},"ResultsWrapper_KGEntitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEntitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEntitiesResponse]"},"ResultsWrapper_KGTriplesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGTriplesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGTriplesResponse]"},"ResultsWrapper_NoneType_":{"properties":{"results":{"type":"null","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[NoneType]"},"ResultsWrapper_RAGAgentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGAgentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGAgentResponse]"},"ResultsWrapper_RAGResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGResponse]"},"ResultsWrapper_SearchResponse_":{"properties":{"results":{"$ref":"#/components/schemas/SearchResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[SearchResponse]"},"ResultsWrapper_ServerStats_":{"properties":{"results":{"$ref":"#/components/schemas/ServerStats"}},"type":"object","required":["results"],"title":"ResultsWrapper[ServerStats]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGCreationResponse"},{"$ref":"#/components/schemas/KGCreationEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGCreationResponse, KGCreationEstimationResponse]]"},"ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGEnrichmentResponse"},{"$ref":"#/components/schemas/KGEnrichmentEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGEnrichmentResponse, KGEnrichmentEstimationResponse]]"},"ResultsWrapper_UpdatePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdatePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdatePromptResponse]"},"ResultsWrapper_UpdateResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdateResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdateResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/shared__api__models__auth__responses__UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"ResultsWrapper_list_CollectionOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionOverviewResponse]]"},"ResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionResponse]]"},"ResultsWrapper_list_IngestionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/IngestionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[IngestionResponse]]"},"ResultsWrapper_list_LogResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/LogResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[LogResponse]]"},"ResultsWrapper_list_Tuple_str__Message___":{"properties":{"results":{"items":{"prefixItems":[{"type":"string"},{"$ref":"#/components/schemas/Message"}],"type":"array","maxItems":2,"minItems":2},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[Tuple[str, Message]]]"},"SearchResponse":{"properties":{"vector_search_results":{"items":{"$ref":"#/components/schemas/VectorSearchResult"},"type":"array","title":"Vector Search Results","description":"List of vector search results"},"kg_search_results":{"anyOf":[{"items":{"$ref":"#/components/schemas/KGSearchResult"},"type":"array"},{"type":"null"}],"title":"Kg Search Results","description":"Knowledge graph search results, if applicable"}},"type":"object","required":["vector_search_results"],"title":"SearchResponse","example":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}},"ServerStats":{"properties":{"start_time":{"type":"string","format":"date-time","title":"Start Time"},"uptime_seconds":{"type":"number","title":"Uptime Seconds"},"cpu_usage":{"type":"number","title":"Cpu Usage"},"memory_usage":{"type":"number","title":"Memory Usage"}},"type":"object","required":["start_time","uptime_seconds","cpu_usage","memory_usage"],"title":"ServerStats"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"Triple":{"properties":{"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"subject":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Subject"},"predicate":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Predicate"},"object":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Object"},"weight":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Weight","default":1.0},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"predicate_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Predicate Embedding"},"extraction_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Extraction Ids","default":[]},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"}],"title":"Attributes","default":{}}},"type":"object","title":"Triple","description":"A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities."},"UpdatePromptResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"UpdatePromptResponse"},"UpdateResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_ids"],"title":"UpdateResponse","example":{"document_ids":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"],"message":"Update task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"UserOverviewResponse":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id"},"num_files":{"type":"integer","title":"Num Files"},"total_size_in_bytes":{"type":"integer","title":"Total Size In Bytes"},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids"}},"type":"object","required":["user_id","num_files","total_size_in_bytes","document_ids"],"title":"UserOverviewResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchResult":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"score":{"type":"number","title":"Score"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","score","text","metadata"],"title":"VectorSearchResult","description":"Result of a search operation.","collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Filters to apply to the vector search","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"offset":{"type":"integer","minimum":0.0,"title":"Offset","description":"Offset to paginate search results","default":0},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"anyOf":[{"$ref":"#/components/schemas/HybridSearchSettings"},{"type":"null"}],"description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"type":"string","title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","limit":20,"offset":0,"probes":10,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true},"VectorTableName":{"type":"string","enum":["chunks","entity_embedding","community_report"],"title":"VectorTableName","description":"This enum represents the different tables where we store vectors.\n\n# TODO: change the table name of the chunks table. Right now it is called\n# {r2r_project_name}.{r2r_project_name} due to a bug in the vector class."},"shared__api__models__auth__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-16T15:02:02.412347"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-16T15:02:02.412352"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"shared__api__models__management__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-16T15:02:02.439825"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-16T15:02:02.439830"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} diff --git a/docs/documentation/cli/management.mdx b/docs/documentation/cli/management.mdx index 3ff6c68e5..be11a251f 100644 --- a/docs/documentation/cli/management.mdx +++ b/docs/documentation/cli/management.mdx @@ -119,5 +119,10 @@ r2r document-chunks --document-id doc1 --offset 0 --limit 10 The maximum number of nodes to return. Defaults to 100. + + + An optional value to return the vectors associated with each chunk, defaults to `False`. + + diff --git a/docs/documentation/js-sdk/ingestion.mdx b/docs/documentation/js-sdk/ingestion.mdx index 6450dbb08..0abe1a393 100644 --- a/docs/documentation/js-sdk/ingestion.mdx +++ b/docs/documentation/js-sdk/ingestion.mdx @@ -57,7 +57,7 @@ const ingestResponse = await client.ingestFiles(files, { - The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime. + The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime. Learn more about [configuration here](/documentation/configuration/ingestion/parsing_and_chunking). Which chunking provider to use. Options are "r2r", "unstructured_local", or "unstructured_api". @@ -273,7 +273,7 @@ const updateResponse = await client.updateFiles(files, { - The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime. + The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime. Learn more about [configuration here](/documentation/configuration/ingestion/parsing_and_chunking). Which chunking provider to use, `r2r` or `unstructured`. Selecting `unstructured` is generally recommended when parsing with `unstructured` or `unstructured_api`. @@ -335,6 +335,12 @@ const documentsOverview = await client.documentsOverview(); An optional array of document IDs to filter the overview. + + An optional value to offset the starting point of fetched results, defaults to `0`. + + + An optional value to limit the fetched results, defaults to `100`. + ### Document Chunks @@ -368,7 +374,15 @@ const chunks = await client.documentChunks(documentId); The ID of the document to retrieve chunks for. - + + An optional value to offset the starting point of fetched results, defaults to `0`. + + + An optional value to limit the fetched results, defaults to `100`. + + + An optional value to return the vectors associated with each chunk, defaults to `False`. + ### Delete Documents diff --git a/docs/documentation/python-sdk/ingestion.mdx b/docs/documentation/python-sdk/ingestion.mdx index 28050b287..fcacc026b 100644 --- a/docs/documentation/python-sdk/ingestion.mdx +++ b/docs/documentation/python-sdk/ingestion.mdx @@ -67,7 +67,7 @@ Refer to the [ingestion configuration](/documentation/configuration/ingestion/pa - The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime. + The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime. Learn more about [configuration here](/documentation/configuration/ingestion/parsing_and_chunking). Which R2R ingestion provider to use. Options are "r2r". @@ -287,7 +287,7 @@ The ingestion configuration can be customized analogously to the ingest files en - The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime. + The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime. Learn more about [configuration here](/documentation/configuration/ingestion/parsing_and_chunking). Which R2R ingestion provider to use. Options are "r2r". @@ -458,6 +458,13 @@ documents_overview = client.documents_overview() An optional list of document IDs to filter the overview. + + An optional value to offset the starting point of fetched results, defaults to `0`. + + + An optional value to limit the fetched results, defaults to `100`. + + ### Document Chunks @@ -493,6 +500,16 @@ chunks = client.document_chunks(document_id) The ID of the document to retrieve chunks for. + + An optional value to offset the starting point of fetched results, defaults to `0`. + + + An optional value to limit the fetched results, defaults to `100`. + + + An optional value to return the vectors associated with each chunk, defaults to `False`. + + ### Delete Documents diff --git a/py/cli/commands/management.py b/py/cli/commands/management.py index 2f8021ead..db79ae39b 100644 --- a/py/cli/commands/management.py +++ b/py/cli/commands/management.py @@ -126,8 +126,14 @@ def documents_overview(ctx, document_ids, offset, limit): default=None, help="The maximum number of nodes to return. Defaults to 100.", ) +@click.option( + "--include-vectors", + is_flag=True, + default=False, + help="Should the vector be included in the response chunks", +) @pass_context -def document_chunks(ctx, document_id, offset, limit): +def document_chunks(ctx, document_id, offset, limit, include_vectors): """Get chunks of a specific document.""" client = ctx.obj if not document_id: @@ -135,7 +141,9 @@ def document_chunks(ctx, document_id, offset, limit): return with timer(): - chunks_data = client.document_chunks(document_id, offset, limit) + chunks_data = client.document_chunks( + document_id, offset, limit, include_vectors + ) chunks = chunks_data["results"] if not chunks: @@ -150,5 +158,7 @@ def document_chunks(ctx, document_id, offset, limit): click.echo(f"Extraction ID: {chunk.get('id', 'N/A')}") click.echo(f"Text: {chunk.get('text', '')[:100]}...") click.echo(f"Metadata: {chunk.get('metadata', {})}") + if include_vectors: + click.echo(f"Vector: {chunk.get('vector', 'N/A')}") else: click.echo(f"Unexpected chunk format: {chunk}") diff --git a/py/core/main/api/management_router.py b/py/core/main/api/management_router.py index 478dc7627..387fe48a2 100644 --- a/py/core/main/api/management_router.py +++ b/py/core/main/api/management_router.py @@ -367,12 +367,13 @@ async def document_chunks_app( document_id: str = Path(...), offset: Optional[int] = Query(0, ge=0), limit: Optional[int] = Query(100, ge=0), + include_vectors: Optional[bool] = Query(False), auth_user=Depends(self.service.providers.auth.auth_wrapper), ) -> WrappedDocumentChunkResponse: document_uuid = UUID(document_id) document_chunks = await self.service.document_chunks( - document_uuid, offset, limit + document_uuid, offset, limit, include_vectors ) document_chunks_result = document_chunks["results"] diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index f355a1f98..2ec16e66a 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -365,11 +365,15 @@ async def document_chunks( document_id: UUID, offset: int = 0, limit: int = 100, + include_vectors: bool = False, *args, **kwargs, ): return self.providers.database.vector.get_document_chunks( - document_id, offset=offset, limit=limit + document_id, + offset=offset, + limit=limit, + include_vectors=include_vectors, ) @telemetry_event("AssignDocumentToCollection") diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 25d0829a8..0496bd579 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -1,5 +1,6 @@ import concurrent.futures import copy +import json import logging import time from concurrent.futures import ThreadPoolExecutor @@ -490,16 +491,25 @@ def delete_collection(self, collection_id: str) -> None: raise def get_document_chunks( - self, document_id: str, offset: int = 0, limit: int = -1 + self, + document_id: str, + offset: int = 0, + limit: int = -1, + include_vectors: bool = False, ) -> dict[str, Any]: if not self.collection: raise ValueError("Collection is not initialized.") limit_clause = f"LIMIT {limit}" if limit != -1 else "" table_name = self.collection.table.name + + select_clause = "SELECT extraction_id, document_id, user_id, collection_ids, text, metadata" + if include_vectors: + select_clause += ", vec" + query = text( f""" - SELECT extraction_id, document_id, user_id, collection_ids, text, metadata, COUNT(*) OVER() AS total + {select_clause}, COUNT(*) OVER() AS total FROM {self.project_name}."{table_name}" WHERE document_id = :document_id ORDER BY CAST(metadata->>'chunk_order' AS INTEGER) @@ -518,7 +528,7 @@ def get_document_chunks( total = 0 if results: - total = results[0][6] + total = results[0][-1] # Get the total count from the last column chunks = [ { "extraction_id": result[0], @@ -527,6 +537,9 @@ def get_document_chunks( "collection_ids": result[3], "text": result[4], "metadata": result[5], + "vector": ( + json.loads(result[6]) if include_vectors else None + ), } for result in results ] diff --git a/py/sdk/management.py b/py/sdk/management.py index b18e5baaf..6d234c77a 100644 --- a/py/sdk/management.py +++ b/py/sdk/management.py @@ -248,6 +248,7 @@ async def document_chunks( document_id: str, offset: Optional[int] = None, limit: Optional[int] = None, + include_vectors: Optional[bool] = False, ) -> dict: """ Get the chunks for a document. @@ -263,6 +264,8 @@ async def document_chunks( params["offset"] = offset if limit is not None: params["limit"] = limit + if include_vectors: + params["include_vectors"] = include_vectors if not params: return await client._make_request( "GET", f"document_chunks/{document_id}" diff --git a/py/shared/api/models/management/responses.py b/py/shared/api/models/management/responses.py index 62eeadaac..ea5bf50cc 100644 --- a/py/shared/api/models/management/responses.py +++ b/py/shared/api/models/management/responses.py @@ -106,6 +106,7 @@ class DocumentChunkResponse(BaseModel): collection_ids: list[UUID] text: str metadata: dict[str, Any] + vector: Optional[list[float]] = None KnowledgeGraphResponse = str From 0840cf69ec8440241df64ff86197e38ea7c398e8 Mon Sep 17 00:00:00 2001 From: Nolan Tremelling <34580718+NolanTrem@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:54:27 -0700 Subject: [PATCH 02/27] Allow env var to set the default R2R deployment for the dashboard (#1417) --- py/compose.full.yaml | 2 +- py/compose.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/py/compose.full.yaml b/py/compose.full.yaml index ee2ecb08c..7fcf0a987 100644 --- a/py/compose.full.yaml +++ b/py/compose.full.yaml @@ -372,7 +372,7 @@ services: r2r-dashboard: image: emrgntcmplxty/r2r-dashboard:latest environment: - - NEXT_PUBLIC_API_URL=http://localhost:${R2R_PORT:-7272}/r2r-api + - NEXT_PUBLIC_R2R_DEPLOYMENT_URL=${R2R_DEPLOYMENT_URL:-http://localhost:7272} networks: - r2r-network ports: diff --git a/py/compose.yaml b/py/compose.yaml index 4188e345f..b764817ed 100644 --- a/py/compose.yaml +++ b/py/compose.yaml @@ -115,7 +115,7 @@ services: r2r-dashboard: image: emrgntcmplxty/r2r-dashboard:latest environment: - - NEXT_PUBLIC_API_URL=http://localhost:${R2R_PORT:-7272}/r2r-api + - NEXT_PUBLIC_R2R_DEPLOYMENT_URL=${R2R_DEPLOYMENT_URL:-http://localhost:7272} networks: - r2r-network ports: From c5c0835b701e31734fa555acf9cc500715cd87ee Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty <68796651+emrgnt-cmplxty@users.noreply.github.com> Date: Wed, 16 Oct 2024 17:21:48 -0700 Subject: [PATCH 03/27] Feature/various documentation tweaks (#1422) * Fix async JSON parsing (#1408) * Fix async JSON parsing * Remove score completion from js * clean up js * lockfile * Feature/build custom logger (#1409) * building a custom logger for r2r * fix log * maintain bkwd compat * Feature/add kg description prompt (#1411) * add kg desc prompt * add kg desc prompt * add kg desc prompt * fix prompt name * separate test run freq * task_id check fix * add ingestion docs * updatet * add * rm old prompts * rm old prompots * rm old prompts * rm old prompts * add option to include vectors in document chunks * checkin * update vector * some various documentation tweaks * some various documentation tweaks --------- Co-authored-by: Nolan Tremelling <34580718+NolanTrem@users.noreply.github.com> --- docs/api-reference/openapi.json | 2 +- docs/cookbooks/ingestion.mdx | 21 ++- docs/documentation/cli/ingestion.mdx | 1 - .../ingestion/parsing_and_chunking.mdx | 155 ++++++++++++++++++ .../configuration/retrieval/vector-search.mdx | 3 +- docs/documentation/js-sdk/ingestion.mdx | 16 ++ docs/documentation/js-sdk/retrieval.mdx | 66 +++++++- docs/documentation/python-sdk/ingestion.mdx | 17 ++ docs/documentation/python-sdk/retrieval.mdx | 50 +++++- py/cli/commands/retrieval.py | 2 +- py/core/providers/database/document.py | 18 -- py/core/providers/kg/postgres.py | 3 +- py/shared/abstractions/search.py | 28 +++- 13 files changed, 343 insertions(+), 39 deletions(-) diff --git a/docs/api-reference/openapi.json b/docs/api-reference/openapi.json index c14c0afe0..9435d6da7 100644 --- a/docs/api-reference/openapi.json +++ b/docs/api-reference/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"R2R Application API","version":"1.0.0"},"paths":{"/v2/ingest_files":{"post":{"summary":"Ingest Files App","description":"Ingest files into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. More expansive collection permissioning is under development.","operationId":"ingest_files_app_v2_ingest_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_ingest_files_app_v2_ingest_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_files(\n file_paths=[\"pg_essay_1.html\", \"got.txt\"],\n metadatas=[{\"metadata_1\":\"some random metadata\"}, {\"metadata_2\": \"some other random metadata\"}],\n document_ids=None\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1.html;type=text/html\" \\\n -F \"file=@got.txt;type=text/plain\" \\\n -F 'metadatas=[{},{}]' \\\n -F 'document_ids=null'\n"}]}},"/v2/update_files":{"post":{"summary":"Update Files App","description":"Update existing files in the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to update files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only update their own files. More expansive collection permissioning is under development.","operationId":"update_files_app_v2_update_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_update_files_app_v2_update_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdateResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.update_files(\n file_paths=[\"pg_essay_1_v2.txt\"],\n document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/update_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/ingest_chunks":{"post":{"summary":"Ingest Chunks App","description":"Ingest text chunks into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest pre-parsed text chunks into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest chunks for their own access. More expansive collection permissioning is under development.","operationId":"ingest_chunks_app_v2_ingest_chunks_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_ingest_chunks_app_v2_ingest_chunks_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_chunks(\n chunks=[\n {\n \"text\": \"Another chunk of text\",\n },\n {\n \"text\": \"Yet another chunk of text\",\n },\n {\n \"text\": \"A chunk of text\",\n },\n ],\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_chunks\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"chunks\": [\n {\n \"text\": \"Another chunk of text\"\n },\n {\n \"text\": \"Yet another chunk of text\"\n },\n {\n \"text\": \"A chunk of text\"\n }\n ],\n \"document_id\": \"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\",\n \"metadata\": {}\n }'\n"}]}},"/v2/create_vector_index":{"post":{"summary":"Create Vector Index App","operationId":"create_vector_index_app_v2_create_vector_index_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_vector_index_app_v2_create_vector_index_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CreateVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ServerStats_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AllPromptsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AnalyticsResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_LogResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AppSettingsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","minimum":-1,"description":"Number of items to return. Use -1 to return all items.","default":100,"title":"Limit"},"description":"Number of items to return. Use -1 to return all items."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentChunkResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/conversations/{conversation_id}":{"get":{"summary":"Get Conversation","operationId":"get_conversation_v2_conversations__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"},{"name":"branch_id","in":"query","required":false,"schema":{"type":"string","description":"Branch ID","title":"Branch Id"},"description":"Branch ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_Tuple_str__Message___"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_SearchResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGAgentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/completion":{"post":{"summary":"Completion","description":"Generate completions for a list of messages.\n\nThis endpoint uses the language model to generate completions for the provided messages.\nThe generation process can be customized using the generation_config parameter.","operationId":"completion_v2_completion_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_completion_v2_completion_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings.\nIf document IDs are not provided, the graph will be created on all documents in the system.\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information.\nIt creates communities of nodes based on their similarity and adds embeddings to the graph.\nThis step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/entities":{"get":{"summary":"Get Entities","description":"Retrieve entities from the knowledge graph.","operationId":"get_entities_v2_entities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve entities from.","title":"Collection Id"},"description":"Collection ID to retrieve entities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity IDs to filter by.","title":"Entity Ids"},"description":"Entity IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEntitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/triples":{"get":{"summary":"Get Triples","description":"Retrieve triples from the knowledge graph.","operationId":"get_triples_v2_triples_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve triples from.","title":"Collection Id"},"description":"Collection ID to retrieve triples from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_names","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity names to filter by.","title":"Entity Names"},"description":"Entity names to filter by."},{"name":"triple_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Triple IDs to filter by.","title":"Triple Ids"},"description":"Triple IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGTriplesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/communities":{"get":{"summary":"Get Communities","description":"Retrieve communities from the knowledge graph.","operationId":"get_communities_v2_communities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve communities from.","title":"Collection Id"},"description":"Collection ID to retrieve communities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"levels","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Levels to filter by.","title":"Levels"},"description":"Levels to filter by."},{"name":"community_numbers","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Community numbers to filter by.","title":"Community Numbers"},"description":"Community numbers to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGCommunitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AllPromptsResponse":{"properties":{"prompts":{"additionalProperties":{"$ref":"#/components/schemas/PromptResponse"},"type":"object","title":"Prompts"}},"type":"object","required":["prompts"],"title":"AllPromptsResponse"},"AnalyticsResponse":{"properties":{"analytics_data":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Analytics Data"},"filtered_logs":{"type":"object","title":"Filtered Logs"}},"type":"object","required":["filtered_logs"],"title":"AnalyticsResponse"},"AppSettingsResponse":{"properties":{"config":{"type":"object","title":"Config"},"prompts":{"type":"object","title":"Prompts"}},"type":"object","required":["config","prompts"],"title":"AppSettingsResponse"},"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"message":{"anyOf":[{"$ref":"#/components/schemas/Message"},{"type":"null"}],"description":"Message object"},"messages":{"anyOf":[{"items":{"$ref":"#/components/schemas/Message"},"type":"array"},{"type":"null"}],"title":"Messages","description":"List of message objects (deprecated, use message instead)","deprecated":true},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true},"conversation_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Conversation Id","description":"The ID of the conversation, a new ID is assigned if not provided"},"branch_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Branch Id","description":"The ID of the branch"}},"type":"object","title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_completion_v2_completion_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"The messages to complete"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"The generation config"}},"type":"object","required":["messages"],"title":"Body_completion_v2_completion_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to create graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph creation process."},"kg_creation_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process."}},"type":"object","title":"Body_create_graph_v2_create_graph_post"},"Body_create_vector_index_app_v2_create_vector_index_post":{"properties":{"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The name of the vector table to create.","default":"chunks"},"index_method":{"$ref":"#/components/schemas/IndexMethod","description":"The type of vector index to create.","default":"hnsw"},"measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The measure for the index.","default":"cosine_distance"},"index_arguments":{"anyOf":[{"$ref":"#/components/schemas/IndexArgsIVFFlat"},{"$ref":"#/components/schemas/IndexArgsHNSW"},{"type":"null"}],"title":"Index Arguments","description":"The arguments for the index method."},"replace":{"type":"boolean","title":"Replace","description":"Whether to replace an existing index.","default":true},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to create the index concurrently.","default":true}},"type":"object","title":"Body_create_vector_index_app_v2_create_vector_index_post"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to enrich graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_chunks_app_v2_ingest_chunks_post":{"properties":{"chunks":{"items":{"$ref":"#/components/schemas/RawChunk"},"type":"array","title":"Chunks","description":"A list of text chunks to ingest into the system.","default":{}},"document_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document Id","description":"An optional document id to associate the chunks with. If not provided, a unique document id will be generated."},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","title":"Body_ingest_chunks_app_v2_ingest_chunks_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id","description":"ID of the user to update"},"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"is_superuser":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Is Superuser","description":"Updated superuser status"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"CollectionOverviewResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"user_count":{"type":"integer","title":"User Count"},"document_count":{"type":"integer","title":"Document Count"}},"type":"object","required":["collection_id","name","description","created_at","updated_at","user_count","document_count"],"title":"CollectionOverviewResponse"},"CollectionResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"}},"type":"object","required":["collection_id","name","description","created_at","updated_at"],"title":"CollectionResponse"},"CommunityReport":{"properties":{"community_number":{"type":"integer","title":"Community Number"},"level":{"type":"integer","title":"Level"},"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name","default":""},"summary":{"type":"string","title":"Summary","default":""},"findings":{"items":{"type":"string"},"type":"array","title":"Findings","default":[]},"rating":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Rating"},"rating_explanation":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rating Explanation"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Embedding"},"attributes":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["community_number","level","collection_id"],"title":"CommunityReport"},"CreateVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message","description":"Vector index creation queued successfully."}},"type":"object","required":["message"],"title":"CreateVectorIndexResponse"},"DocumentChunkResponse":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","text","metadata"],"title":"DocumentChunkResponse"},"DocumentOverviewResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"title":{"type":"string","title":"Title"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"type":{"type":"string","title":"Type"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"ingestion_status":{"type":"string","title":"Ingestion Status"},"kg_extraction_status":{"type":"string","title":"Kg Extraction Status"},"version":{"type":"string","title":"Version"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","title","user_id","type","created_at","updated_at","ingestion_status","kg_extraction_status","version","collection_ids","metadata"],"title":"DocumentOverviewResponse"},"Entity":{"properties":{"name":{"type":"string","title":"Name"},"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"category":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Category"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"description_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Description Embedding"},"community_numbers":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Community Numbers"},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id"},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["name"],"title":"Entity","description":"An entity extracted from a document."},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexArgsHNSW":{"properties":{"m":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"M","default":16},"ef_construction":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Construction","default":64}},"type":"object","title":"IndexArgsHNSW","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an HNSW type index.\n\nRef: https://github.com/pgvector/pgvector#index-options\n\nBoth attributes are Optional in case the user only wants to specify one and\nleave the other as default\n\nAttributes:\n m (int): Maximum number of connections per node per layer (default: 16)\n ef_construction (int): Size of the dynamic candidate list for\n constructing the graph (default: 64)"},"IndexArgsIVFFlat":{"properties":{"n_lists":{"type":"integer","title":"N Lists"}},"type":"object","required":["n_lists"],"title":"IndexArgsIVFFlat","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an IVFFlat type index.\n\nAttributes:\n nlist (int): The number of IVF centroids that the index should use"},"IndexMeasure":{"type":"string","enum":["cosine_distance","l2_distance","max_inner_product"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"IndexMethod":{"type":"string","enum":["auto","ivfflat","hnsw"],"title":"IndexMethod","description":"An enum representing the index methods available.\n\nThis class currently only supports the 'ivfflat' method but may\nexpand in the future.\n\nAttributes:\n auto (str): Automatically choose the best available index method.\n ivfflat (str): The ivfflat index method.\n hnsw (str): The hnsw index method."},"IngestionResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_id":{"type":"string","format":"uuid","title":"Document Id","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_id"],"title":"IngestionResponse","example":{"document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","message":"Ingestion task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCommunitiesResponse":{"properties":{"communities":{"items":{"$ref":"#/components/schemas/CommunityReport"},"type":"array","title":"Communities","description":"The list of communities in the graph for the collection."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of communities in the graph."}},"type":"object","required":["communities","total_entries"],"title":"KGCommunitiesResponse","description":"Response for knowledge graph communities.","example":{"communities":[{"collection_ids":["122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"],"community_number":1,"findings":["finding1","finding2"],"id":"1","level":0,"name":"community name","rating":"8","rating_explanation":"rating explanation","summary":"community summary"}],"total_count":1}},"KGCommunityResult":{"properties":{"name":{"type":"string","title":"Name"},"summary":{"type":"string","title":"Summary"},"rating":{"type":"number","title":"Rating"},"rating_explanation":{"type":"string","title":"Rating Explanation"},"findings":{"items":{"type":"string"},"type":"array","title":"Findings"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","summary","rating","rating_explanation","findings"],"title":"KGCommunityResult","findings":["Finding 1","Finding 2"],"metadata":{},"name":"Community Name","rating":9,"rating_explanation":"Rating Explanation","summary":"Community Summary"},"KGCreationEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"document_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Document Count","description":"The number of documents in the collection."},"number_of_jobs_created":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Number Of Jobs Created","description":"The number of jobs created for the graph creation process."},"total_chunks":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Chunks","description":"The estimated total number of chunks."},"estimated_entities":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Entities","description":"The estimated number of entities in the graph."},"estimated_triples":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Triples","description":"The estimated number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls in millions."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph creation process in minutes."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph creation process in USD."}},"type":"object","title":"KGCreationEstimationResponse","description":"Response for knowledge graph creation estimation."},"KGCreationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG creation request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG creation request."}},"type":"object","required":["message","task_id"],"title":"KGCreationResponse","example":{"message":"Graph creation queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEnrichmentEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"total_entities":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Entities","description":"The total number of entities in the graph."},"total_triples":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Triples","description":"The total number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph enrichment process."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph enrichment process."}},"type":"object","title":"KGEnrichmentEstimationResponse","description":"Response for knowledge graph enrichment estimation."},"KGEnrichmentResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG enrichment request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG enrichment request."}},"type":"object","required":["message","task_id"],"title":"KGEnrichmentResponse","example":{"message":"Graph enrichment queued successfuly.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEntitiesResponse":{"properties":{"entities":{"items":{"$ref":"#/components/schemas/Entity"},"type":"array","title":"Entities","description":"The list of entities in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of entities in the graph for the collection or document."}},"type":"object","required":["entities","total_entries"],"title":"KGEntitiesResponse","description":"Response for knowledge graph entities.","example":{"entities":[{"description":"Description 1","id":"1","name":"Entity 1"},{"description":"Description 2","id":"2","name":"Entity 2"}],"total_entries":2}},"KGEntityResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGEntityResult","description":"Entity Description","metadata":{},"name":"Entity Name"},"KGGlobalResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGGlobalResult","description":"Global Result Description","metadata":{},"name":"Global Result Name"},"KGRelationshipResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGRelationshipResult","description":"Relationship Description","metadata":{},"name":"Relationship Name"},"KGRunType":{"type":"string","enum":["estimate","run"],"title":"KGRunType","description":"Type of KG run."},"KGSearchMethod":{"type":"string","enum":["local"],"const":"local","title":"KGSearchMethod"},"KGSearchResult":{"properties":{"method":{"$ref":"#/components/schemas/KGSearchMethod"},"content":{"anyOf":[{"$ref":"#/components/schemas/KGEntityResult"},{"$ref":"#/components/schemas/KGRelationshipResult"},{"$ref":"#/components/schemas/KGCommunityResult"},{"$ref":"#/components/schemas/KGGlobalResult"}],"title":"Content"},"result_type":{"anyOf":[{"$ref":"#/components/schemas/KGSearchResultType"},{"type":"null"}]},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"metadata":{"type":"object","title":"Metadata","default":{}}},"type":"object","required":["method","content"],"title":"KGSearchResult","content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"},"KGSearchResultType":{"type":"string","enum":["entity","relationship","community"],"title":"KGSearchResultType"},"KGSearchSettings":{"properties":{"filters":{"type":"object","title":"Filters","description":"Filters to apply to the vector search","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search"},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"graphrag_map_system":{"type":"string","title":"Graphrag Map System","description":"The system prompt for the graphrag map prompt.","default":"graphrag_map_system"},"graphrag_reduce_system":{"type":"string","title":"Graphrag Reduce System","description":"The system prompt for the graphrag reduce prompt.","default":"graphrag_reduce_system"},"use_kg_search":{"type":"boolean","title":"Use Kg Search","description":"Whether to use KG search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","description":"KG search type","default":"local"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level","description":"KG search level"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph search."},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"local","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"KGTriplesResponse":{"properties":{"triples":{"items":{"$ref":"#/components/schemas/Triple"},"type":"array","title":"Triples","description":"The list of triples in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of triples in the graph for the collection or document."}},"type":"object","required":["triples","total_entries"],"title":"KGTriplesResponse","description":"Response for knowledge graph triples.","example":{"total_entries":2,"triples":[{"description":"Paris is the capital of France","object":"France","predicate":"is capital of","subject":"Paris"}]}},"LogEntry":{"properties":{"key":{"type":"string","title":"Key"},"value":{"title":"Value"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp"}},"type":"object","required":["key","value","timestamp"],"title":"LogEntry"},"LogResponse":{"properties":{"run_id":{"type":"string","format":"uuid","title":"Run Id"},"run_type":{"type":"string","title":"Run Type"},"entries":{"items":{"$ref":"#/components/schemas/LogEntry"},"type":"array","title":"Entries"},"timestamp":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Timestamp"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["run_id","run_type","entries","timestamp","user_id"],"title":"LogResponse"},"Message":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"PaginatedResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[CollectionResponse]]"},"PaginatedResultsWrapper_list_DocumentChunkResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentChunkResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentChunkResponse]]"},"PaginatedResultsWrapper_list_DocumentOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentOverviewResponse]]"},"PaginatedResultsWrapper_list_UserOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/UserOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserOverviewResponse]]"},"PaginatedResultsWrapper_list_UserResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/shared__api__models__management__responses__UserResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserResponse]]"},"PromptResponse":{"properties":{"name":{"type":"string","title":"Name"},"template":{"type":"string","title":"Template"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types"}},"type":"object","required":["name","template","created_at","updated_at","input_types"],"title":"PromptResponse"},"RAGAgentResponse":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"List of messages in the RAG agent response"},"conversation_id":{"type":"string","title":"Conversation Id","description":"The conversation ID for the RAG agent response"}},"type":"object","required":["messages","conversation_id"],"title":"RAGAgentResponse","example":[{"content":"## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.","role":"system"},{"content":"You are a helpful assistant.","role":"system"},{"content":"Who is the greatest philospher of all time?","role":"user"},{"content":"Aristotle is widely considered the greatest philospher of all time.","role":"assistant"},{"content":"Can you tell me more about him?","role":"user"},{"function_call":{"arguments":"{\"query\":\"Aristotle biography\"}","name":"search"},"role":"assistant"},{"content":"1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n","name":"search","role":"function"},{"content":"Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.","role":"assistant"}]},"RAGResponse":{"properties":{"completion":{"title":"Completion","description":"The generated completion from the RAG process"},"search_results":{"$ref":"#/components/schemas/SearchResponse","description":"The search results used for the RAG process"}},"type":"object","required":["completion","search_results"],"title":"RAGResponse","example":{"completion":{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Paris is the capital of France.","role":"assistant"}}],"id":"chatcmpl-example123"},"search_results":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}}},"RawChunk":{"properties":{"text":{"type":"string","title":"Text"}},"type":"object","required":["text"],"title":"RawChunk"},"ResultsWrapper_AllPromptsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AllPromptsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AllPromptsResponse]"},"ResultsWrapper_AnalyticsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AnalyticsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AnalyticsResponse]"},"ResultsWrapper_AppSettingsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AppSettingsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AppSettingsResponse]"},"ResultsWrapper_CollectionResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CollectionResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CollectionResponse]"},"ResultsWrapper_CreateVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CreateVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CreateVectorIndexResponse]"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_KGCommunitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGCommunitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGCommunitiesResponse]"},"ResultsWrapper_KGEntitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEntitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEntitiesResponse]"},"ResultsWrapper_KGTriplesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGTriplesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGTriplesResponse]"},"ResultsWrapper_NoneType_":{"properties":{"results":{"type":"null","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[NoneType]"},"ResultsWrapper_RAGAgentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGAgentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGAgentResponse]"},"ResultsWrapper_RAGResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGResponse]"},"ResultsWrapper_SearchResponse_":{"properties":{"results":{"$ref":"#/components/schemas/SearchResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[SearchResponse]"},"ResultsWrapper_ServerStats_":{"properties":{"results":{"$ref":"#/components/schemas/ServerStats"}},"type":"object","required":["results"],"title":"ResultsWrapper[ServerStats]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGCreationResponse"},{"$ref":"#/components/schemas/KGCreationEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGCreationResponse, KGCreationEstimationResponse]]"},"ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGEnrichmentResponse"},{"$ref":"#/components/schemas/KGEnrichmentEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGEnrichmentResponse, KGEnrichmentEstimationResponse]]"},"ResultsWrapper_UpdatePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdatePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdatePromptResponse]"},"ResultsWrapper_UpdateResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdateResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdateResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/shared__api__models__auth__responses__UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"ResultsWrapper_list_CollectionOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionOverviewResponse]]"},"ResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionResponse]]"},"ResultsWrapper_list_IngestionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/IngestionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[IngestionResponse]]"},"ResultsWrapper_list_LogResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/LogResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[LogResponse]]"},"ResultsWrapper_list_Tuple_str__Message___":{"properties":{"results":{"items":{"prefixItems":[{"type":"string"},{"$ref":"#/components/schemas/Message"}],"type":"array","maxItems":2,"minItems":2},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[Tuple[str, Message]]]"},"SearchResponse":{"properties":{"vector_search_results":{"items":{"$ref":"#/components/schemas/VectorSearchResult"},"type":"array","title":"Vector Search Results","description":"List of vector search results"},"kg_search_results":{"anyOf":[{"items":{"$ref":"#/components/schemas/KGSearchResult"},"type":"array"},{"type":"null"}],"title":"Kg Search Results","description":"Knowledge graph search results, if applicable"}},"type":"object","required":["vector_search_results"],"title":"SearchResponse","example":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}},"ServerStats":{"properties":{"start_time":{"type":"string","format":"date-time","title":"Start Time"},"uptime_seconds":{"type":"number","title":"Uptime Seconds"},"cpu_usage":{"type":"number","title":"Cpu Usage"},"memory_usage":{"type":"number","title":"Memory Usage"}},"type":"object","required":["start_time","uptime_seconds","cpu_usage","memory_usage"],"title":"ServerStats"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"Triple":{"properties":{"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"subject":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Subject"},"predicate":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Predicate"},"object":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Object"},"weight":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Weight","default":1.0},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"predicate_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Predicate Embedding"},"extraction_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Extraction Ids","default":[]},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"}],"title":"Attributes","default":{}}},"type":"object","title":"Triple","description":"A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities."},"UpdatePromptResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"UpdatePromptResponse"},"UpdateResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_ids"],"title":"UpdateResponse","example":{"document_ids":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"],"message":"Update task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"UserOverviewResponse":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id"},"num_files":{"type":"integer","title":"Num Files"},"total_size_in_bytes":{"type":"integer","title":"Total Size In Bytes"},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids"}},"type":"object","required":["user_id","num_files","total_size_in_bytes","document_ids"],"title":"UserOverviewResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchResult":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"score":{"type":"number","title":"Score"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","score","text","metadata"],"title":"VectorSearchResult","description":"Result of a search operation.","collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Filters to apply to the vector search","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"offset":{"type":"integer","minimum":0.0,"title":"Offset","description":"Offset to paginate search results","default":0},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"anyOf":[{"$ref":"#/components/schemas/HybridSearchSettings"},{"type":"null"}],"description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"type":"string","title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","limit":20,"offset":0,"probes":10,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true},"VectorTableName":{"type":"string","enum":["chunks","entity_embedding","community_report"],"title":"VectorTableName","description":"This enum represents the different tables where we store vectors.\n\n# TODO: change the table name of the chunks table. Right now it is called\n# {r2r_project_name}.{r2r_project_name} due to a bug in the vector class."},"shared__api__models__auth__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-16T15:02:02.412347"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-16T15:02:02.412352"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"shared__api__models__management__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-16T15:02:02.439825"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-16T15:02:02.439830"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} +{"openapi":"3.1.0","info":{"title":"R2R Application API","version":"1.0.0"},"paths":{"/v2/ingest_files":{"post":{"summary":"Ingest Files App","description":"Ingest files into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. More expansive collection permissioning is under development.","operationId":"ingest_files_app_v2_ingest_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_ingest_files_app_v2_ingest_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_files(\n file_paths=[\"pg_essay_1.html\", \"got.txt\"],\n metadatas=[{\"metadata_1\":\"some random metadata\"}, {\"metadata_2\": \"some other random metadata\"}],\n document_ids=None\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1.html;type=text/html\" \\\n -F \"file=@got.txt;type=text/plain\" \\\n -F 'metadatas=[{},{}]' \\\n -F 'document_ids=null'\n"}]}},"/v2/update_files":{"post":{"summary":"Update Files App","description":"Update existing files in the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to update files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only update their own files. More expansive collection permissioning is under development.","operationId":"update_files_app_v2_update_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_update_files_app_v2_update_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdateResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.update_files(\n file_paths=[\"pg_essay_1_v2.txt\"],\n document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/update_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/ingest_chunks":{"post":{"summary":"Ingest Chunks App","description":"Ingest text chunks into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest pre-parsed text chunks into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest chunks for their own access. More expansive collection permissioning is under development.","operationId":"ingest_chunks_app_v2_ingest_chunks_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_ingest_chunks_app_v2_ingest_chunks_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_chunks(\n chunks=[\n {\n \"text\": \"Another chunk of text\",\n },\n {\n \"text\": \"Yet another chunk of text\",\n },\n {\n \"text\": \"A chunk of text\",\n },\n ],\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_chunks\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"chunks\": [\n {\n \"text\": \"Another chunk of text\"\n },\n {\n \"text\": \"Yet another chunk of text\"\n },\n {\n \"text\": \"A chunk of text\"\n }\n ],\n \"document_id\": \"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\",\n \"metadata\": {}\n }'\n"}]}},"/v2/create_vector_index":{"post":{"summary":"Create Vector Index App","operationId":"create_vector_index_app_v2_create_vector_index_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_vector_index_app_v2_create_vector_index_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CreateVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ServerStats_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AllPromptsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AnalyticsResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_LogResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AppSettingsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","minimum":-1,"description":"Number of items to return. Use -1 to return all items.","default":100,"title":"Limit"},"description":"Number of items to return. Use -1 to return all items."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}},{"name":"include_vectors","in":"query","required":false,"schema":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":false,"title":"Include Vectors"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentChunkResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/conversations/{conversation_id}":{"get":{"summary":"Get Conversation","operationId":"get_conversation_v2_conversations__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"},{"name":"branch_id","in":"query","required":false,"schema":{"type":"string","description":"Branch ID","title":"Branch Id"},"description":"Branch ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_Tuple_str__Message___"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_SearchResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGAgentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/completion":{"post":{"summary":"Completion","description":"Generate completions for a list of messages.\n\nThis endpoint uses the language model to generate completions for the provided messages.\nThe generation process can be customized using the generation_config parameter.","operationId":"completion_v2_completion_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_completion_v2_completion_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings.\nIf document IDs are not provided, the graph will be created on all documents in the system.\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information.\nIt creates communities of nodes based on their similarity and adds embeddings to the graph.\nThis step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/entities":{"get":{"summary":"Get Entities","description":"Retrieve entities from the knowledge graph.","operationId":"get_entities_v2_entities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve entities from.","title":"Collection Id"},"description":"Collection ID to retrieve entities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity IDs to filter by.","title":"Entity Ids"},"description":"Entity IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEntitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/triples":{"get":{"summary":"Get Triples","description":"Retrieve triples from the knowledge graph.","operationId":"get_triples_v2_triples_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve triples from.","title":"Collection Id"},"description":"Collection ID to retrieve triples from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_names","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity names to filter by.","title":"Entity Names"},"description":"Entity names to filter by."},{"name":"triple_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Triple IDs to filter by.","title":"Triple Ids"},"description":"Triple IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGTriplesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/communities":{"get":{"summary":"Get Communities","description":"Retrieve communities from the knowledge graph.","operationId":"get_communities_v2_communities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve communities from.","title":"Collection Id"},"description":"Collection ID to retrieve communities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"levels","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Levels to filter by.","title":"Levels"},"description":"Levels to filter by."},{"name":"community_numbers","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Community numbers to filter by.","title":"Community Numbers"},"description":"Community numbers to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGCommunitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AllPromptsResponse":{"properties":{"prompts":{"additionalProperties":{"$ref":"#/components/schemas/PromptResponse"},"type":"object","title":"Prompts"}},"type":"object","required":["prompts"],"title":"AllPromptsResponse"},"AnalyticsResponse":{"properties":{"analytics_data":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Analytics Data"},"filtered_logs":{"type":"object","title":"Filtered Logs"}},"type":"object","required":["filtered_logs"],"title":"AnalyticsResponse"},"AppSettingsResponse":{"properties":{"config":{"type":"object","title":"Config"},"prompts":{"type":"object","title":"Prompts"}},"type":"object","required":["config","prompts"],"title":"AppSettingsResponse"},"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"message":{"anyOf":[{"$ref":"#/components/schemas/Message"},{"type":"null"}],"description":"Message object"},"messages":{"anyOf":[{"items":{"$ref":"#/components/schemas/Message"},"type":"array"},{"type":"null"}],"title":"Messages","description":"List of message objects (deprecated, use message instead)","deprecated":true},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true},"conversation_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Conversation Id","description":"The ID of the conversation, a new ID is assigned if not provided"},"branch_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Branch Id","description":"The ID of the branch"}},"type":"object","title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_completion_v2_completion_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"The messages to complete"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"The generation config"}},"type":"object","required":["messages"],"title":"Body_completion_v2_completion_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to create graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph creation process."},"kg_creation_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process."}},"type":"object","title":"Body_create_graph_v2_create_graph_post"},"Body_create_vector_index_app_v2_create_vector_index_post":{"properties":{"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The name of the vector table to create.","default":"chunks"},"index_method":{"$ref":"#/components/schemas/IndexMethod","description":"The type of vector index to create.","default":"hnsw"},"measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The measure for the index.","default":"cosine_distance"},"index_arguments":{"anyOf":[{"$ref":"#/components/schemas/IndexArgsIVFFlat"},{"$ref":"#/components/schemas/IndexArgsHNSW"},{"type":"null"}],"title":"Index Arguments","description":"The arguments for the index method."},"replace":{"type":"boolean","title":"Replace","description":"Whether to replace an existing index.","default":true},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to create the index concurrently.","default":true}},"type":"object","title":"Body_create_vector_index_app_v2_create_vector_index_post"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to enrich graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_chunks_app_v2_ingest_chunks_post":{"properties":{"chunks":{"items":{"$ref":"#/components/schemas/RawChunk"},"type":"array","title":"Chunks","description":"A list of text chunks to ingest into the system.","default":{}},"document_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document Id","description":"An optional document id to associate the chunks with. If not provided, a unique document id will be generated."},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","title":"Body_ingest_chunks_app_v2_ingest_chunks_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id","description":"ID of the user to update"},"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"is_superuser":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Is Superuser","description":"Updated superuser status"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"CollectionOverviewResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"user_count":{"type":"integer","title":"User Count"},"document_count":{"type":"integer","title":"Document Count"}},"type":"object","required":["collection_id","name","description","created_at","updated_at","user_count","document_count"],"title":"CollectionOverviewResponse"},"CollectionResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"}},"type":"object","required":["collection_id","name","description","created_at","updated_at"],"title":"CollectionResponse"},"CommunityReport":{"properties":{"community_number":{"type":"integer","title":"Community Number"},"level":{"type":"integer","title":"Level"},"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name","default":""},"summary":{"type":"string","title":"Summary","default":""},"findings":{"items":{"type":"string"},"type":"array","title":"Findings","default":[]},"rating":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Rating"},"rating_explanation":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rating Explanation"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Embedding"},"attributes":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["community_number","level","collection_id"],"title":"CommunityReport"},"CreateVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message","description":"Vector index creation queued successfully."}},"type":"object","required":["message"],"title":"CreateVectorIndexResponse"},"DocumentChunkResponse":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"},"vector":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Vector"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","text","metadata"],"title":"DocumentChunkResponse"},"DocumentOverviewResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"title":{"type":"string","title":"Title"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"type":{"type":"string","title":"Type"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"ingestion_status":{"type":"string","title":"Ingestion Status"},"kg_extraction_status":{"type":"string","title":"Kg Extraction Status"},"version":{"type":"string","title":"Version"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","title","user_id","type","created_at","updated_at","ingestion_status","kg_extraction_status","version","collection_ids","metadata"],"title":"DocumentOverviewResponse"},"Entity":{"properties":{"name":{"type":"string","title":"Name"},"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"category":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Category"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"description_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Description Embedding"},"community_numbers":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Community Numbers"},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id"},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["name"],"title":"Entity","description":"An entity extracted from a document."},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexArgsHNSW":{"properties":{"m":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"M","default":16},"ef_construction":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Construction","default":64}},"type":"object","title":"IndexArgsHNSW","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an HNSW type index.\n\nRef: https://github.com/pgvector/pgvector#index-options\n\nBoth attributes are Optional in case the user only wants to specify one and\nleave the other as default\n\nAttributes:\n m (int): Maximum number of connections per node per layer (default: 16)\n ef_construction (int): Size of the dynamic candidate list for\n constructing the graph (default: 64)"},"IndexArgsIVFFlat":{"properties":{"n_lists":{"type":"integer","title":"N Lists"}},"type":"object","required":["n_lists"],"title":"IndexArgsIVFFlat","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an IVFFlat type index.\n\nAttributes:\n nlist (int): The number of IVF centroids that the index should use"},"IndexMeasure":{"type":"string","enum":["cosine_distance","l2_distance","max_inner_product"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"IndexMethod":{"type":"string","enum":["auto","ivfflat","hnsw"],"title":"IndexMethod","description":"An enum representing the index methods available.\n\nThis class currently only supports the 'ivfflat' method but may\nexpand in the future.\n\nAttributes:\n auto (str): Automatically choose the best available index method.\n ivfflat (str): The ivfflat index method.\n hnsw (str): The hnsw index method."},"IngestionResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_id":{"type":"string","format":"uuid","title":"Document Id","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_id"],"title":"IngestionResponse","example":{"document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","message":"Ingestion task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCommunitiesResponse":{"properties":{"communities":{"items":{"$ref":"#/components/schemas/CommunityReport"},"type":"array","title":"Communities","description":"The list of communities in the graph for the collection."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of communities in the graph."}},"type":"object","required":["communities","total_entries"],"title":"KGCommunitiesResponse","description":"Response for knowledge graph communities.","example":{"communities":[{"collection_ids":["122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"],"community_number":1,"findings":["finding1","finding2"],"id":"1","level":0,"name":"community name","rating":"8","rating_explanation":"rating explanation","summary":"community summary"}],"total_count":1}},"KGCommunityResult":{"properties":{"name":{"type":"string","title":"Name"},"summary":{"type":"string","title":"Summary"},"rating":{"type":"number","title":"Rating"},"rating_explanation":{"type":"string","title":"Rating Explanation"},"findings":{"items":{"type":"string"},"type":"array","title":"Findings"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","summary","rating","rating_explanation","findings"],"title":"KGCommunityResult","findings":["Finding 1","Finding 2"],"metadata":{},"name":"Community Name","rating":9,"rating_explanation":"Rating Explanation","summary":"Community Summary"},"KGCreationEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"document_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Document Count","description":"The number of documents in the collection."},"number_of_jobs_created":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Number Of Jobs Created","description":"The number of jobs created for the graph creation process."},"total_chunks":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Chunks","description":"The estimated total number of chunks."},"estimated_entities":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Entities","description":"The estimated number of entities in the graph."},"estimated_triples":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Triples","description":"The estimated number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls in millions."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph creation process in minutes."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph creation process in USD."}},"type":"object","title":"KGCreationEstimationResponse","description":"Response for knowledge graph creation estimation."},"KGCreationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG creation request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG creation request."}},"type":"object","required":["message","task_id"],"title":"KGCreationResponse","example":{"message":"Graph creation queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEnrichmentEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"total_entities":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Entities","description":"The total number of entities in the graph."},"total_triples":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Triples","description":"The total number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph enrichment process."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph enrichment process."}},"type":"object","title":"KGEnrichmentEstimationResponse","description":"Response for knowledge graph enrichment estimation."},"KGEnrichmentResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG enrichment request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG enrichment request."}},"type":"object","required":["message","task_id"],"title":"KGEnrichmentResponse","example":{"message":"Graph enrichment queued successfuly.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEntitiesResponse":{"properties":{"entities":{"items":{"$ref":"#/components/schemas/Entity"},"type":"array","title":"Entities","description":"The list of entities in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of entities in the graph for the collection or document."}},"type":"object","required":["entities","total_entries"],"title":"KGEntitiesResponse","description":"Response for knowledge graph entities.","example":{"entities":[{"description":"Description 1","id":"1","name":"Entity 1"},{"description":"Description 2","id":"2","name":"Entity 2"}],"total_entries":2}},"KGEntityResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGEntityResult","description":"Entity Description","metadata":{},"name":"Entity Name"},"KGGlobalResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGGlobalResult","description":"Global Result Description","metadata":{},"name":"Global Result Name"},"KGRelationshipResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGRelationshipResult","description":"Relationship Description","metadata":{},"name":"Relationship Name"},"KGRunType":{"type":"string","enum":["estimate","run"],"title":"KGRunType","description":"Type of KG run."},"KGSearchMethod":{"type":"string","enum":["local"],"const":"local","title":"KGSearchMethod"},"KGSearchResult":{"properties":{"method":{"$ref":"#/components/schemas/KGSearchMethod"},"content":{"anyOf":[{"$ref":"#/components/schemas/KGEntityResult"},{"$ref":"#/components/schemas/KGRelationshipResult"},{"$ref":"#/components/schemas/KGCommunityResult"},{"$ref":"#/components/schemas/KGGlobalResult"}],"title":"Content"},"result_type":{"anyOf":[{"$ref":"#/components/schemas/KGSearchResultType"},{"type":"null"}]},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"metadata":{"type":"object","title":"Metadata","default":{}}},"type":"object","required":["method","content"],"title":"KGSearchResult","content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"},"KGSearchResultType":{"type":"string","enum":["entity","relationship","community"],"title":"KGSearchResultType"},"KGSearchSettings":{"properties":{"filters":{"type":"object","title":"Filters","description":"Alias for search_filters","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.\n\n Commonly seen filters include operations include the following:\n\n `{\"document_id\": {\"$eq\": \"9fbe403b-...\"}}`\n\n `{\"document_id\": {\"$in\": [\"9fbe403b-...\", \"3e157b3a-...\"]}}`\n\n `{\"collection_ids\": {\"$overlap\": [\"122fdf6a-...\", \"...\"]}}`\n\n `{\"$and\": {\"$document_id\": ..., \"collection_ids\": ...}}`"},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"graphrag_map_system":{"type":"string","title":"Graphrag Map System","description":"The system prompt for the graphrag map prompt.","default":"graphrag_map_system"},"graphrag_reduce_system":{"type":"string","title":"Graphrag Reduce System","description":"The system prompt for the graphrag reduce prompt.","default":"graphrag_reduce_system"},"use_kg_search":{"type":"boolean","title":"Use Kg Search","description":"Whether to use KG search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","description":"KG search type","default":"local"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level","description":"KG search level"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph search."},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"local","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"KGTriplesResponse":{"properties":{"triples":{"items":{"$ref":"#/components/schemas/Triple"},"type":"array","title":"Triples","description":"The list of triples in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of triples in the graph for the collection or document."}},"type":"object","required":["triples","total_entries"],"title":"KGTriplesResponse","description":"Response for knowledge graph triples.","example":{"total_entries":2,"triples":[{"description":"Paris is the capital of France","object":"France","predicate":"is capital of","subject":"Paris"}]}},"LogEntry":{"properties":{"key":{"type":"string","title":"Key"},"value":{"title":"Value"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp"}},"type":"object","required":["key","value","timestamp"],"title":"LogEntry"},"LogResponse":{"properties":{"run_id":{"type":"string","format":"uuid","title":"Run Id"},"run_type":{"type":"string","title":"Run Type"},"entries":{"items":{"$ref":"#/components/schemas/LogEntry"},"type":"array","title":"Entries"},"timestamp":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Timestamp"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["run_id","run_type","entries","timestamp","user_id"],"title":"LogResponse"},"Message":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"PaginatedResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[CollectionResponse]]"},"PaginatedResultsWrapper_list_DocumentChunkResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentChunkResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentChunkResponse]]"},"PaginatedResultsWrapper_list_DocumentOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentOverviewResponse]]"},"PaginatedResultsWrapper_list_UserOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/UserOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserOverviewResponse]]"},"PaginatedResultsWrapper_list_UserResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/shared__api__models__management__responses__UserResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserResponse]]"},"PromptResponse":{"properties":{"name":{"type":"string","title":"Name"},"template":{"type":"string","title":"Template"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types"}},"type":"object","required":["name","template","created_at","updated_at","input_types"],"title":"PromptResponse"},"RAGAgentResponse":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"List of messages in the RAG agent response"},"conversation_id":{"type":"string","title":"Conversation Id","description":"The conversation ID for the RAG agent response"}},"type":"object","required":["messages","conversation_id"],"title":"RAGAgentResponse","example":[{"content":"## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.","role":"system"},{"content":"You are a helpful assistant.","role":"system"},{"content":"Who is the greatest philospher of all time?","role":"user"},{"content":"Aristotle is widely considered the greatest philospher of all time.","role":"assistant"},{"content":"Can you tell me more about him?","role":"user"},{"function_call":{"arguments":"{\"query\":\"Aristotle biography\"}","name":"search"},"role":"assistant"},{"content":"1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n","name":"search","role":"function"},{"content":"Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.","role":"assistant"}]},"RAGResponse":{"properties":{"completion":{"title":"Completion","description":"The generated completion from the RAG process"},"search_results":{"$ref":"#/components/schemas/SearchResponse","description":"The search results used for the RAG process"}},"type":"object","required":["completion","search_results"],"title":"RAGResponse","example":{"completion":{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Paris is the capital of France.","role":"assistant"}}],"id":"chatcmpl-example123"},"search_results":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}}},"RawChunk":{"properties":{"text":{"type":"string","title":"Text"}},"type":"object","required":["text"],"title":"RawChunk"},"ResultsWrapper_AllPromptsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AllPromptsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AllPromptsResponse]"},"ResultsWrapper_AnalyticsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AnalyticsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AnalyticsResponse]"},"ResultsWrapper_AppSettingsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AppSettingsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AppSettingsResponse]"},"ResultsWrapper_CollectionResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CollectionResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CollectionResponse]"},"ResultsWrapper_CreateVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CreateVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CreateVectorIndexResponse]"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_KGCommunitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGCommunitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGCommunitiesResponse]"},"ResultsWrapper_KGEntitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEntitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEntitiesResponse]"},"ResultsWrapper_KGTriplesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGTriplesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGTriplesResponse]"},"ResultsWrapper_NoneType_":{"properties":{"results":{"type":"null","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[NoneType]"},"ResultsWrapper_RAGAgentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGAgentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGAgentResponse]"},"ResultsWrapper_RAGResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGResponse]"},"ResultsWrapper_SearchResponse_":{"properties":{"results":{"$ref":"#/components/schemas/SearchResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[SearchResponse]"},"ResultsWrapper_ServerStats_":{"properties":{"results":{"$ref":"#/components/schemas/ServerStats"}},"type":"object","required":["results"],"title":"ResultsWrapper[ServerStats]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGCreationResponse"},{"$ref":"#/components/schemas/KGCreationEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGCreationResponse, KGCreationEstimationResponse]]"},"ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGEnrichmentResponse"},{"$ref":"#/components/schemas/KGEnrichmentEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGEnrichmentResponse, KGEnrichmentEstimationResponse]]"},"ResultsWrapper_UpdatePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdatePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdatePromptResponse]"},"ResultsWrapper_UpdateResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdateResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdateResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/shared__api__models__auth__responses__UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"ResultsWrapper_list_CollectionOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionOverviewResponse]]"},"ResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionResponse]]"},"ResultsWrapper_list_IngestionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/IngestionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[IngestionResponse]]"},"ResultsWrapper_list_LogResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/LogResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[LogResponse]]"},"ResultsWrapper_list_Tuple_str__Message___":{"properties":{"results":{"items":{"prefixItems":[{"type":"string"},{"$ref":"#/components/schemas/Message"}],"type":"array","maxItems":2,"minItems":2},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[Tuple[str, Message]]]"},"SearchResponse":{"properties":{"vector_search_results":{"items":{"$ref":"#/components/schemas/VectorSearchResult"},"type":"array","title":"Vector Search Results","description":"List of vector search results"},"kg_search_results":{"anyOf":[{"items":{"$ref":"#/components/schemas/KGSearchResult"},"type":"array"},{"type":"null"}],"title":"Kg Search Results","description":"Knowledge graph search results, if applicable"}},"type":"object","required":["vector_search_results"],"title":"SearchResponse","example":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}},"ServerStats":{"properties":{"start_time":{"type":"string","format":"date-time","title":"Start Time"},"uptime_seconds":{"type":"number","title":"Uptime Seconds"},"cpu_usage":{"type":"number","title":"Cpu Usage"},"memory_usage":{"type":"number","title":"Memory Usage"}},"type":"object","required":["start_time","uptime_seconds","cpu_usage","memory_usage"],"title":"ServerStats"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"Triple":{"properties":{"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"subject":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Subject"},"predicate":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Predicate"},"object":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Object"},"weight":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Weight","default":1.0},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"predicate_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Predicate Embedding"},"extraction_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Extraction Ids","default":[]},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"}],"title":"Attributes","default":{}}},"type":"object","title":"Triple","description":"A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities."},"UpdatePromptResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"UpdatePromptResponse"},"UpdateResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_ids"],"title":"UpdateResponse","example":{"document_ids":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"],"message":"Update task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"UserOverviewResponse":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id"},"num_files":{"type":"integer","title":"Num Files"},"total_size_in_bytes":{"type":"integer","title":"Total Size In Bytes"},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids"}},"type":"object","required":["user_id","num_files","total_size_in_bytes","document_ids"],"title":"UserOverviewResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchResult":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"score":{"type":"number","title":"Score"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","score","text","metadata"],"title":"VectorSearchResult","description":"Result of a search operation.","collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Alias for search_filters","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.\n\n Commonly seen filters include operations include the following:\n\n `{\"document_id\": {\"$eq\": \"9fbe403b-...\"}}`\n\n `{\"document_id\": {\"$in\": [\"9fbe403b-...\", \"3e157b3a-...\"]}}`\n\n `{\"collection_ids\": {\"$overlap\": [\"122fdf6a-...\", \"...\"]}}`\n\n `{\"$and\": {\"$document_id\": ..., \"collection_ids\": ...}}`"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"offset":{"type":"integer","minimum":0.0,"title":"Offset","description":"Offset to paginate search results","default":0},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"anyOf":[{"$ref":"#/components/schemas/HybridSearchSettings"},{"type":"null"}],"description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"type":"string","title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","limit":20,"offset":0,"probes":10,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true},"VectorTableName":{"type":"string","enum":["chunks","entity_embedding","community_report"],"title":"VectorTableName","description":"This enum represents the different tables where we store vectors.\n\n# TODO: change the table name of the chunks table. Right now it is called\n# {r2r_project_name}.{r2r_project_name} due to a bug in the vector class."},"shared__api__models__auth__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-16T17:20:12.245500"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-16T17:20:12.245505"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"shared__api__models__management__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-16T17:20:12.257770"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-16T17:20:12.257773"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} diff --git a/docs/cookbooks/ingestion.mdx b/docs/cookbooks/ingestion.mdx index c203b72fd..ab679d805 100644 --- a/docs/cookbooks/ingestion.mdx +++ b/docs/cookbooks/ingestion.mdx @@ -8,6 +8,23 @@ icon: 'file-arrow-up' R2R provides a powerful and flexible ingestion pipeline that allows you to efficiently process and manage various types of documents. This cookbook will guide you through the process of ingesting files, updating existing documents, and deleting documents using the R2R Python SDK. + + +As of version `3.2.13`, we have expanded the options for ingesting files using multimodal foundation models. In addition to using such models by default for images, R2R can now use them on PDFs by passing the following in your ingestion configuration: + +```json +{ + "parser_overrides": { + "pdf": "zerox" + } +} +``` + +We recommend this method for achieving the highest quality ingestion results. + + + + ## Ingesting Files To ingest files into your R2R system, you can use the `ingest_files` method from the Python SDK: @@ -44,8 +61,8 @@ If you have pre-processed chunks of text, you can directly ingest them using the ```python chunks = [ - {"text": "This is the first chunk.", "metadata": {"source": "document1"}}, - {"text": "This is the second chunk.", "metadata": {"source": "document2"}}, + {"text": "This is the first chunk."}, + {"text": "This is the second chunk."} ] ingest_response = client.ingest_chunks( diff --git a/docs/documentation/cli/ingestion.mdx b/docs/documentation/cli/ingestion.mdx index 0ee8e82b9..942cffb7f 100644 --- a/docs/documentation/cli/ingestion.mdx +++ b/docs/documentation/cli/ingestion.mdx @@ -3,7 +3,6 @@ title: 'Ingestion' description: 'Ingesting files with the R2R CLI.' --- - ## Document Ingestion and Management ### Ingest Files diff --git a/docs/documentation/configuration/ingestion/parsing_and_chunking.mdx b/docs/documentation/configuration/ingestion/parsing_and_chunking.mdx index 8e0766e1d..425605304 100644 --- a/docs/documentation/configuration/ingestion/parsing_and_chunking.mdx +++ b/docs/documentation/configuration/ingestion/parsing_and_chunking.mdx @@ -21,6 +21,161 @@ Some of the configurable options include: - Excluded parsers - Provider-specific settings (e.g., max characters, overlap, languages) + +An exhaustive list of runtime ingestion inputs to the `ingest-files` endpoint is shown below: + + + A list of file paths or directory paths to ingest. If a directory path is provided, all files within the directory and its subdirectories will be ingested. + + + + An optional list of metadata dictionaries corresponding to each file. If provided, the length should match the number of files being ingested. + + + + An optional list of document IDs to assign to the ingested files. If provided, the length should match the number of files being ingested. + + + + An optional list of version strings for the ingested files. If provided, the length should match the number of files being ingested. + + + + The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime. Learn more about [configuration here](/documentation/configuration/ingestion/parsing_and_chunking). + + + Which R2R ingestion provider to use. Options are "r2r". + + + Only `recursive` is currently supported. + + + The target size for output chunks. + + + The target overlap fraction for output chunks + + + Which parsers to exclude from inside R2R. + + + + + + Which unstructured ingestion provider to use. Options are "unstructured_local", or "unstructured_api". + + + + Sets a maximum size on output chunks. + + + + Combine chunks smaller than this number of characters. + + + + Maximum number of characters per chunk. + + + + Whether to include coordinates in the output. + + + + Encoding to use for text files. + + + + Types of image blocks to extract. + + + + Content type for uncompressed gzip files. + + + + Name of the high-resolution model to use. + + + + Whether to include original elements in the output. + + + + Whether to include page breaks in the output. + + + + List of languages to consider for text processing. + + + + Whether to allow sections to span multiple pages. + + + + Start a new chunk after this many characters. + + + + Languages to use for OCR. + + + + Format of the output. + + + + Number of characters to overlap between chunks. + + + + Whether to overlap all chunks. + + + + Whether to infer table structure in PDFs. + + + + Threshold for considering chunks similar. + + + + Types of tables to skip inferring. + + + + Concurrency level for splitting PDFs. + + + + Whether to split PDFs by page. + + + + Page number to start processing from. + + + + Strategy for processing. Options are "auto", "fast", or "hi_res". + + + + Strategy for chunking. Options are "by_title" or "basic". + + + + Whether to generate unique IDs for elements. + + + + Whether to keep XML tags in the output. + + + + + For a comprehensive list of available runtime configuration options and examples of how to use them, refer to the [Python SDK Ingestion Documentation](/documentation/python-sdk/ingestion). diff --git a/docs/documentation/configuration/retrieval/vector-search.mdx b/docs/documentation/configuration/retrieval/vector-search.mdx index 580a70ecf..cd633e471 100644 --- a/docs/documentation/configuration/retrieval/vector-search.mdx +++ b/docs/documentation/configuration/retrieval/vector-search.mdx @@ -25,7 +25,8 @@ response = client.search("query", vector_search_settings=vector_search_settings) 1. `use_vector_search` (bool): Whether to use vector search 2. `use_hybrid_search` (bool): Whether to perform a hybrid search (combining vector and keyword search) -3. `filters` (dict): Filters to apply to the vector search +3. `filters` (dict): Alias for filters +3. `search_filters` (dict): Filters to apply to the vector search 4. `search_limit` (int): Maximum number of results to return (1-1000) 5. `selected_collection_ids` (list[UUID]): Collection Ids to search for 6. `index_measure` (IndexMeasure): The distance measure to use for indexing (cosine_distance, l2_distance, or max_inner_product) diff --git a/docs/documentation/js-sdk/ingestion.mdx b/docs/documentation/js-sdk/ingestion.mdx index 0abe1a393..cef4d96d3 100644 --- a/docs/documentation/js-sdk/ingestion.mdx +++ b/docs/documentation/js-sdk/ingestion.mdx @@ -10,6 +10,22 @@ Occasionally this SDK documentation falls out of date, cross-check with the auto ## Document Ingestion and Management + + +As of version `3.2.13`, we have expanded the options for ingesting files using multimodal foundation models. In addition to using such models by default for images, R2R can now use them on PDFs by passing the following in your ingestion configuration: + +```json +{ + "parser_overrides": { + "pdf": "zerox" + } +} +``` + +We recommend this method for achieving the highest quality ingestion results. + + + ### Ingest Files Ingest files or directories into your R2R system: diff --git a/docs/documentation/js-sdk/retrieval.mdx b/docs/documentation/js-sdk/retrieval.mdx index 3da61b29a..f3cb50b0d 100644 --- a/docs/documentation/js-sdk/retrieval.mdx +++ b/docs/documentation/js-sdk/retrieval.mdx @@ -65,7 +65,15 @@ const searchResponse = await client.search("What was Uber's profit in 2020?"); - Filters to apply to the vector search. + Commonly seen filters include operations include the following: + + `{"document_id": {"$eq": "9fbe403b-..."}}` + + `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` + + `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` + + `{"$and": {"$document_id": ..., "collection_ids": ...}}` @@ -101,10 +109,26 @@ const searchResponse = await client.search("What was Uber's profit in 2020?"); - - Filters to apply to the vector search. This field is deprecated in favor of `filters`. + + Alias for `search_filters`, now `deprecated`. + + + + Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. + + Commonly seen filters include operations include the following: + + `{"document_id": {"$eq": "9fbe403b-..."}}` + + `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` + + `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` + + `{"$and": {"$document_id": ..., "collection_ids": ...}}` + + Collection IDs to search for. @@ -277,7 +301,21 @@ const ragResponse = await client.rag("What was Uber's profit in 2020?"); - Filters to apply to the vector search. + Alias for `search_filters`, now `deprecated`. + + + + Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. + + Commonly seen filters include operations include the following: + + `{"document_id": {"$eq": "9fbe403b-..."}}` + + `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` + + `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` + + `{"$and": {"$document_id": ..., "collection_ids": ...}}` @@ -337,9 +375,23 @@ const ragResponse = await client.rag("What was Uber's profit in 2020?"); The `KGSearchSettings` class allows you to configure the knowledge graph search settings for your R2R system. Here are the available options: - - Filters to apply to the vector search. This field is deprecated in favor of `filters`. - + + Alias for `search_filters`, now `deprecated`. + + + + Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. + + Commonly seen filters include operations include the following: + + `{"document_id": {"$eq": "9fbe403b-..."}}` + + `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` + + `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` + + `{"$and": {"$document_id": ..., "collection_ids": ...}}` + Collection IDs to search for. diff --git a/docs/documentation/python-sdk/ingestion.mdx b/docs/documentation/python-sdk/ingestion.mdx index fcacc026b..3be96e5c8 100644 --- a/docs/documentation/python-sdk/ingestion.mdx +++ b/docs/documentation/python-sdk/ingestion.mdx @@ -10,9 +10,26 @@ Occasionally this SDK documentation falls out of date, cross-check with the auto ## Document Ingestion and Management + + +As of version `3.2.13`, we have expanded the options for ingesting files using multimodal foundation models. In addition to using such models by default for images, R2R can now use them on PDFs by passing the following in your ingestion configuration: + +```json +{ + "parser_overrides": { + "pdf": "zerox" + } +} +``` + +We recommend this method for achieving the highest quality ingestion results. + + + ### Ingest Files + Ingest files or directories into your R2R system: ```python diff --git a/docs/documentation/python-sdk/retrieval.mdx b/docs/documentation/python-sdk/retrieval.mdx index bccc4f216..3bb2a6e58 100644 --- a/docs/documentation/python-sdk/retrieval.mdx +++ b/docs/documentation/python-sdk/retrieval.mdx @@ -63,8 +63,23 @@ search_response = client.search("What was Uber's profit in 2020?") Whether to perform a hybrid search (combining vector and keyword search). + - Filters to apply to the vector search. + Alias for `search_filters`, now `deprecated`. + + + + Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. + + Commonly seen filters include operations include the following: + + `{"document_id": {"$eq": "9fbe403b-..."}}` + + `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` + + `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` + + `{"$and": {"$document_id": ..., "collection_ids": ...}}` @@ -326,8 +341,23 @@ rag_response = client.rag("What was Uber's profit in 2020?") Whether to perform a hybrid search (combining vector and keyword search). + - Filters to apply to the vector search. + Alias for `search_filters`, now `deprecated`. + + + + Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. + + Commonly seen filters include operations include the following: + + `{"document_id": {"$eq": "9fbe403b-..."}}` + + `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` + + `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` + + `{"$and": {"$document_id": ..., "collection_ids": ...}}` @@ -637,7 +667,21 @@ Note that any of the customization seen in AI powered search and RAG documentati - Filters to apply to the vector search. + Alias for `search_filters`, now `deprecated`. + + + + Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. + + Commonly seen filters include operations include the following: + + `{"document_id": {"$eq": "9fbe403b-..."}}` + + `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` + + `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` + + `{"$and": {"$document_id": ..., "collection_ids": ...}}` diff --git a/py/cli/commands/retrieval.py b/py/cli/commands/retrieval.py index 70be9ee32..d75e2ee9a 100644 --- a/py/cli/commands/retrieval.py +++ b/py/cli/commands/retrieval.py @@ -20,7 +20,7 @@ @click.option( "--filters", type=JSON, - help="Filters to apply to the vector search as a JSON", + help="""Filters to apply to the vector search as a JSON, e.g. --filters='{"document_id":{"$in":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "3e157b3a-8469-51db-90d9-52e7d896b49b"]}}'""", ) @click.option( "--search-limit", default=None, help="Number of search results to return" diff --git a/py/core/providers/database/document.py b/py/core/providers/database/document.py index 5febf6f38..96eaa384c 100644 --- a/py/core/providers/database/document.py +++ b/py/core/providers/database/document.py @@ -75,24 +75,6 @@ async def create_table(self): """ await self.execute_query(query) - # TODO - Remove this after the next release - # Additional query to check and add the column if it doesn't exist - # add_column_query = f""" - # DO $$ - # BEGIN - # IF NOT EXISTS ( - # SELECT 1 - # FROM information_schema.columns - # WHERE table_name = '{self._get_table_name("document_info")}' - # AND column_name = 'ingestion_attempt_number' - # ) THEN - # ALTER TABLE {self._get_table_name("document_info")} - # ADD COLUMN ingestion_attempt_number INT DEFAULT 0; - # END IF; - # END $$; - # """ - # await self.execute_query(add_column_query) - async def upsert_documents_overview( self, documents_overview: Union[DocumentInfo, list[DocumentInfo]] ) -> None: diff --git a/py/core/providers/kg/postgres.py b/py/core/providers/kg/postgres.py index 86b5fc2d4..5b23a9a54 100644 --- a/py/core/providers/kg/postgres.py +++ b/py/core/providers/kg/postgres.py @@ -1,8 +1,8 @@ import json import logging +import time from typing import Any, Dict, List, Optional, Tuple, Union from uuid import UUID -import time import asyncpg @@ -441,6 +441,7 @@ async def vector_query(self, query: str, **kwargs: Any) -> Any: or search_type == "__Relationship__" ): filter_query = "WHERE document_id = ANY($3)" + # TODO - This seems like a hack, we will need a better way to filter by collection ids for entities and relationships query = f""" SELECT distinct document_id FROM {self._get_table_name('document_info')} WHERE $1 = ANY(collection_ids) """ diff --git a/py/shared/abstractions/search.py b/py/shared/abstractions/search.py index 3f290c8ed..8d946b115 100644 --- a/py/shared/abstractions/search.py +++ b/py/shared/abstractions/search.py @@ -205,12 +205,22 @@ class VectorSearchSettings(R2RSerializable): ) filters: dict[str, Any] = Field( default_factory=dict, - description="Filters to apply to the vector search", + description="Alias for search_filters", deprecated=True, ) search_filters: dict[str, Any] = Field( default_factory=dict, - description="Filters to apply to the vector search", + description="""Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. + + Commonly seen filters include operations include the following: + + `{"document_id": {"$eq": "9fbe403b-..."}}` + + `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` + + `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` + + `{"$and": {"$document_id": ..., "collection_ids": ...}}`""", ) search_limit: int = Field( default=10, @@ -304,12 +314,22 @@ class KGSearchSettings(R2RSerializable): filters: dict[str, Any] = Field( default_factory=dict, - description="Filters to apply to the vector search", + description="Alias for search_filters", deprecated=True, ) search_filters: dict[str, Any] = Field( default_factory=dict, - description="Filters to apply to the vector search", + description="""Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. + + Commonly seen filters include operations include the following: + + `{"document_id": {"$eq": "9fbe403b-..."}}` + + `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` + + `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` + + `{"$and": {"$document_id": ..., "collection_ids": ...}}`""", ) selected_collection_ids: list[UUID] = Field( From a38f9164ff88728bd2c2b6ab1ae79dd8db3bba4e Mon Sep 17 00:00:00 2001 From: Shreyas Pimpalgaonkar Date: Wed, 16 Oct 2024 17:23:07 -0700 Subject: [PATCH 04/27] Graphrag tests (#1418) * Fix async JSON parsing (#1408) * Fix async JSON parsing * Remove score completion from js * clean up js * lockfile * Feature/build custom logger (#1409) * building a custom logger for r2r * fix log * maintain bkwd compat * Feature/add kg description prompt (#1411) * add kg desc prompt * add kg desc prompt * add kg desc prompt * fix prompt name * separate test run freq (#1412) * separate test run freq * task_id check fix * add ingestion docs * updatet * add * rm old prompts * rm old prompots * rm old prompts * rm old prompts * Prod fixes + enhancements (#1407) * change default settings back to fp32 * add logging and cache triples * up * up * pre-commit and cleanups * making community summary prompt async * up * up * revert prompt changes * up * up * modify default * bump test timeout due to stricter concurrency limits * bump sleep * rm ubuntu from windows/mac workflows * up * add tests --------- Co-authored-by: Nolan Tremelling <34580718+NolanTrem@users.noreply.github.com> Co-authored-by: emrgnt-cmplxty <68796651+emrgnt-cmplxty@users.noreply.github.com> Co-authored-by: emrgnt-cmplxty --- py/core/providers/database/postgres.py | 2 +- py/core/providers/kg/postgres.py | 20 +- py/tests/conftest.py | 53 +++++ py/tests/core/providers/kg/__init__.py | 0 py/tests/core/providers/kg/test_kg_logic.py | 245 ++++++++++++++++++++ 5 files changed, 306 insertions(+), 14 deletions(-) create mode 100644 py/tests/core/providers/kg/__init__.py create mode 100644 py/tests/core/providers/kg/test_kg_logic.py diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index 3b7a8edef..1ab5cb7a2 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -49,8 +49,8 @@ def __init__( self, config: DatabaseConfig, dimension: int, - quantization_type: VectorQuantizationType, crypto_provider: CryptoProvider, + quantization_type: Optional[VectorQuantizationType] = VectorQuantizationType.FP32, *args, **kwargs, ): diff --git a/py/core/providers/kg/postgres.py b/py/core/providers/kg/postgres.py index 5b23a9a54..04fc5942f 100644 --- a/py/core/providers/kg/postgres.py +++ b/py/core/providers/kg/postgres.py @@ -218,12 +218,16 @@ async def add_entities( Returns: result: asyncpg.Record: result of the upsert operation """ + for entity in entities: + if entity.description_embedding is not None: + entity.description_embedding = str(entity.description_embedding) + return await self._add_objects(entities, table_name) async def add_triples( self, triples: list[Triple], - table_name: str = "triples", + table_name: str = "triple_raw", ) -> None: """ Upsert triples into the triple_raw table. These are raw triples extracted from the document. @@ -396,16 +400,6 @@ async def upsert_entities(self, entities: List[Entity]) -> None: query = QUERY.format(table_name) await self.execute_query(query, entities) - async def upsert_relationships(self, relationships: list[Triple]) -> None: - QUERY = """ - INSERT INTO $1.$2 (source, target, relationship) - VALUES ($1, $2, $3) - """ - - table_name = self._get_table_name("triples") - query = QUERY.format(table_name) - await self.execute_query(query, relationships) - async def vector_query(self, query: str, **kwargs: Any) -> Any: query_embedding = kwargs.get("query_embedding", None) @@ -585,7 +579,6 @@ async def perform_graph_clustering( weight_default: Union[int, float] = 1.0, check_directed: bool = True, """ - settings: Dict[str, Any] = {} start_time = time.time() triples = await self.get_all_triples(collection_id) @@ -1060,7 +1053,8 @@ async def get_entities( entities = [Entity(**entity) for entity in results] total_entries = await self.get_entity_count( - collection_id=collection_id + collection_id=collection_id, + entity_table_name=entity_table_name ) return {"entities": entities, "total_entries": total_entries} diff --git a/py/tests/conftest.py b/py/tests/conftest.py index 1ef4e002d..ec0c2d567 100644 --- a/py/tests/conftest.py +++ b/py/tests/conftest.py @@ -2,6 +2,7 @@ import os import random import uuid +from uuid import UUID import pytest @@ -12,12 +13,22 @@ CompletionConfig, DatabaseConfig, EmbeddingConfig, + KGConfig, FileConfig, LoggingConfig, SqlitePersistentLoggingProvider, Vector, VectorEntry, ) + +from core.base import ( + DocumentInfo, + DocumentType, + IngestionStatus, + KGEnrichmentStatus, + KGExtractionStatus, +) + from core.providers import ( BCryptProvider, LiteCompletionProvider, @@ -25,8 +36,10 @@ PostgresDBProvider, PostgresFileProvider, R2RAuthProvider, + PostgresKGProvider, ) +from shared.abstractions.vector import VectorQuantizationType # Vectors @pytest.fixture(scope="function") @@ -201,3 +214,43 @@ async def local_logging_provider(app_config): await provider.close() if os.path.exists(logging_path): os.remove(logging_path) + + +@pytest.fixture(scope="function") +def kg_config_temporary(app_config): + return KGConfig(provider="postgres", app=app_config) + +# KG + +@pytest.fixture(scope="function") +def embedding_dimension(): + return 128 + +@pytest.fixture(scope="function") +def vector_quantization_type(): + return VectorQuantizationType.FP32 + +@pytest.fixture(scope="function") +async def postgres_kg_provider(kg_config_temporary, temporary_postgres_db_provider, litellm_provider, embedding_dimension, vector_quantization_type): + + # upsert into documents_overview + document_info = DocumentInfo( + id=UUID("9fbe403b-c11c-5aae-8ade-ef22980c3ad1"), + collection_ids=[UUID("122fdf6a-e116-546b-a8f6-e4cb2e2c0a09")], + user_id=UUID("00000000-0000-0000-0000-000000000003"), + type=DocumentType.PDF, + metadata={}, + title="Test Document for KG", + version="1.0", + size_in_bytes=1024, + ingestion_status=IngestionStatus.PENDING, + kg_extraction_status=KGExtractionStatus.PENDING, + ) + + await temporary_postgres_db_provider.relational.upsert_documents_overview( + document_info + ) + + kg_provider = PostgresKGProvider(kg_config_temporary, temporary_postgres_db_provider, litellm_provider) + await kg_provider.create_tables(embedding_dimension, vector_quantization_type) + yield kg_provider diff --git a/py/tests/core/providers/kg/__init__.py b/py/tests/core/providers/kg/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/tests/core/providers/kg/test_kg_logic.py b/py/tests/core/providers/kg/test_kg_logic.py new file mode 100644 index 000000000..195599bc9 --- /dev/null +++ b/py/tests/core/providers/kg/test_kg_logic.py @@ -0,0 +1,245 @@ +# tests/core/providers/kg/test_kg_logic.py +import pytest +import random +import uuid +from core.providers.kg.postgres import PostgresKGProvider +from core.base import Entity, Triple, Community, CommunityReport, KGExtraction +from shared.abstractions.vector import VectorQuantizationType + +@pytest.fixture(scope="function") +def collection_id(): + return uuid.UUID('122fdf6a-e116-546b-a8f6-e4cb2e2c0a09') + +@pytest.fixture(scope="function") +def document_id(): + return uuid.UUID('9fbe403b-c11c-5aae-8ade-ef22980c3ad1') + +@pytest.fixture(scope="function") +def extraction_ids(): + return [uuid.UUID('32ff6daf-6e67-44fa-b2a9-19384f5d9d19'), uuid.UUID('42ff6daf-6e67-44fa-b2a9-19384f5d9d19')] + +@pytest.fixture(scope="function") +def embedding_dimension(): + return 512 + +@pytest.fixture(scope="function") +def vector_quantization_type(): + return VectorQuantizationType.FP32 + +@pytest.fixture(scope="function") +def embedding_vectors(embedding_dimension): + random.seed(42) + return [[random.random() for _ in range(embedding_dimension)] for _ in range(2)] + +@pytest.fixture(scope="function") +def entities_raw_list(document_id, extraction_ids): + return [ + Entity(name="Entity1", description="Description1", category="Category1", extraction_ids=extraction_ids, document_id=document_id, attributes={"attr1": "value1", "attr2": "value2"}), + Entity(name="Entity2", description="Description2", category="Category2", extraction_ids=extraction_ids, document_id=document_id, attributes={"attr3": "value3", "attr4": "value4"}), + ] + +@pytest.fixture(scope="function") +def entities_list(extraction_ids, document_id, embedding_vectors): + return [ + Entity(name="Entity1", description="Description1", extraction_ids=extraction_ids, document_id=document_id, description_embedding=embedding_vectors[0]), + Entity(name="Entity2", description="Description2", extraction_ids=extraction_ids, document_id=document_id, description_embedding=embedding_vectors[1]), + ] + +@pytest.fixture(scope="function") +def triples_raw_list(embedding_vectors, extraction_ids, document_id): + return [ + Triple(subject="Entity1", predicate="predicate1", object="object1", weight=1.0, description="description1", embedding=embedding_vectors[0], extraction_ids=extraction_ids, document_id=document_id, attributes={"attr1": "value1", "attr2": "value2"}), + Triple(subject="Entity2", predicate="predicate2", object="object2", weight=1.0, description="description2", embedding=embedding_vectors[1], extraction_ids=extraction_ids, document_id=document_id, attributes={"attr3": "value3", "attr4": "value4"}), + ] + +@pytest.fixture(scope="function") +def communities_list(entities_list, triples_raw_list): + return [ + Community(name="Community1", description="Description1", entities=[entities_list[0]], triples=[triples_raw_list[0]]), + Community(name="Community2", description="Description2", entities=[entities_list[1]], triples=[triples_raw_list[1]]), + ] + +@pytest.fixture(scope="function") +def community_table_info(collection_id): + return [ + ("Entity1", 1, None, 0, False, [1,2], collection_id), + ("Entity2", 2, None, 0, False, [1,2], collection_id), + ] + +@pytest.fixture(scope="function") +def kg_extractions(extraction_ids, entities_raw_list, triples_raw_list, document_id): + return [KGExtraction(extraction_ids=extraction_ids, entities=entities_raw_list, triples=triples_raw_list, document_id=document_id)] + +@pytest.fixture(scope="function") +def community_report_list(embedding_vectors, collection_id): + return [ + CommunityReport( + community_number=1, + level=0, + collection_id=collection_id, + name="Community Report 1", + summary="Summary of the community report", + rating=8.0, + rating_explanation="Rating explanation of the community report", + findings=["Findings of the community report"], + embedding=embedding_vectors[0], + ), + CommunityReport( + community_number=2, + level=0, + collection_id=collection_id, + name="Community Report", + summary="Summary of the community report", + rating=8.0, + rating_explanation="Rating explanation of the community report", + findings=["Findings of the community report"], + embedding=embedding_vectors[1], + ), + ] + + +@pytest.mark.asyncio +async def test_kg_provider_initialization(postgres_kg_provider): + assert isinstance(postgres_kg_provider, PostgresKGProvider) + +@pytest.mark.asyncio +async def test_create_tables(postgres_kg_provider, collection_id, embedding_dimension, vector_quantization_type): + assert await postgres_kg_provider.get_entities(collection_id) == {'entities': [], "total_entries": 0} + assert await postgres_kg_provider.get_triples(collection_id) == {'triples': [], "total_entries": 0} + assert await postgres_kg_provider.get_communities(collection_id) == {'communities': [], "total_entries": 0} + +@pytest.mark.asyncio +async def test_add_entities_raw(postgres_kg_provider, entities_raw_list, collection_id): + await postgres_kg_provider.add_entities(entities_raw_list, table_name='entity_raw') + entities = await postgres_kg_provider.get_entities(collection_id, entity_table_name='entity_raw') + assert entities["entities"][0].name == "Entity1" + assert entities["entities"][1].name == "Entity2" + assert len(entities["entities"]) == 2 + assert entities["total_entries"] == 2 + +@pytest.mark.asyncio +async def test_add_entities(postgres_kg_provider, entities_list, collection_id): + await postgres_kg_provider.add_entities(entities_list, table_name='entity_embedding') + entities = await postgres_kg_provider.get_entities(collection_id, entity_table_name='entity_embedding') + assert entities["entities"][0].name == "Entity1" + assert entities["entities"][1].name == "Entity2" + assert len(entities["entities"]) == 2 + assert entities["total_entries"] == 2 + +@pytest.mark.asyncio +async def test_add_triples(postgres_kg_provider, triples_raw_list, collection_id): + await postgres_kg_provider.add_triples(triples_raw_list, table_name='triple_raw') + triples = await postgres_kg_provider.get_triples(collection_id) + assert triples["triples"][0].subject == "Entity1" + assert triples["triples"][1].subject == "Entity2" + assert len(triples["triples"]) == 2 + assert triples["total_entries"] == 2 + + +@pytest.mark.asyncio +async def test_add_kg_extractions(postgres_kg_provider, kg_extractions, collection_id): + added_extractions = await postgres_kg_provider.add_kg_extractions(kg_extractions, table_suffix='_raw') + + assert added_extractions == (2, 2) + + entities = await postgres_kg_provider.get_entities(collection_id, entity_table_name='entity_raw') + assert entities["entities"][0].name == "Entity1" + assert entities["entities"][1].name == "Entity2" + assert len(entities["entities"]) == 2 + assert entities["total_entries"] == 2 + + triples = await postgres_kg_provider.get_triples(collection_id) + assert triples["triples"][0].subject == "Entity1" + assert triples["triples"][1].subject == "Entity2" + assert len(triples["triples"]) == 2 + assert triples["total_entries"] == 2 + +@pytest.mark.asyncio +async def test_get_entity_map(postgres_kg_provider, entities_raw_list, triples_raw_list, document_id): + await postgres_kg_provider.add_entities(entities_raw_list, table_name='entity_raw') + entity_map = await postgres_kg_provider.get_entity_map(0, 2, document_id) + assert entity_map['Entity1']["entities"][0].name == "Entity1" + assert entity_map['Entity2']["entities"][0].name == "Entity2" + + await postgres_kg_provider.add_triples(triples_raw_list) + entity_map = await postgres_kg_provider.get_entity_map(0, 2, document_id) + assert entity_map['Entity1']["entities"][0].name == "Entity1" + assert entity_map['Entity2']["entities"][0].name == "Entity2" + + assert entity_map['Entity1']["triples"][0].subject == "Entity1" + assert entity_map['Entity2']["triples"][0].subject == "Entity2" + + +@pytest.mark.asyncio +async def test_upsert_embeddings(postgres_kg_provider, collection_id, entities_list): + table_name = 'entity_embedding' + + entities_list_to_upsert = [ + ( + entity.name, + entity.description, + str(entity.description_embedding), + entity.extraction_ids, + entity.document_id + ) + for entity in entities_list + ] + + await postgres_kg_provider.upsert_embeddings(entities_list_to_upsert, table_name) + + entities = await postgres_kg_provider.get_entities(collection_id, entity_table_name=table_name) + assert entities["entities"][0].name == "Entity1" + assert entities["entities"][1].name == "Entity2" + +@pytest.mark.asyncio +async def test_get_all_triples(postgres_kg_provider, collection_id, triples_raw_list): + await postgres_kg_provider.add_triples(triples_raw_list) + triples = await postgres_kg_provider.get_triples(collection_id) + assert triples["triples"][0].subject == "Entity1" + assert triples["triples"][1].subject == "Entity2" + assert len(triples["triples"]) == 2 + + +@pytest.mark.asyncio +async def test_get_communities(postgres_kg_provider, collection_id, community_report_list): + await postgres_kg_provider.add_community_report(community_report_list[0]) + await postgres_kg_provider.add_community_report(community_report_list[1]) + communities = await postgres_kg_provider.get_communities(collection_id) + assert communities["communities"][0].name == "Community Report 1" + assert len(communities["communities"]) == 2 + assert communities["total_entries"] == 2 + + +@pytest.fixture(scope="function") +def leiden_params_1(): + return { + "resolution": 1.0, + 'max_cluster_size': 1000, + "random_seed": 42, + } + +@pytest.mark.asyncio +async def test_perform_graph_clustering(postgres_kg_provider, collection_id, leiden_params_1, entities_list, triples_raw_list): + + # addd entities and triples + await postgres_kg_provider.add_entities(entities_list, table_name='entity_embedding') + await postgres_kg_provider.add_triples(triples_raw_list, table_name='triple_raw') + + num_communities = await postgres_kg_provider.perform_graph_clustering(collection_id, leiden_params_1) + assert num_communities + + +@pytest.mark.asyncio +async def test_get_community_details(postgres_kg_provider, entities_list, triples_raw_list, collection_id, community_report_list, community_table_info): + + await postgres_kg_provider.add_entities(entities_list, table_name='entity_embedding') + await postgres_kg_provider.add_triples(triples_raw_list, table_name='triple_raw') + await postgres_kg_provider.add_communities(community_table_info) + await postgres_kg_provider.add_community_report(community_report_list[0]) + + community_level, entities, triples = await postgres_kg_provider.get_community_details(community_number=1) + + assert community_level == 0 + # TODO: change these to objects + assert entities[0]['name'] == "Entity1" + assert triples[0]['subject'] == "Entity1" \ No newline at end of file From 03cd2784bad19279ca3f7681b58e24d7efbb556d Mon Sep 17 00:00:00 2001 From: Shreyas Pimpalgaonkar Date: Wed, 16 Oct 2024 17:23:17 -0700 Subject: [PATCH 05/27] Modify graphrag tests timeouts (#1416) * Fix async JSON parsing (#1408) * Fix async JSON parsing * Remove score completion from js * clean up js * lockfile * Feature/build custom logger (#1409) * building a custom logger for r2r * fix log * maintain bkwd compat * Feature/add kg description prompt (#1411) * add kg desc prompt * add kg desc prompt * add kg desc prompt * fix prompt name * separate test run freq (#1412) * separate test run freq * task_id check fix * add ingestion docs * updatet * add * rm old prompts * rm old prompots * rm old prompts * rm old prompts * Prod fixes + enhancements (#1407) * change default settings back to fp32 * add logging and cache triples * up * up * pre-commit and cleanups * making community summary prompt async * up * up * revert prompt changes * up * up * modify default * bump test timeout due to stricter concurrency limits * bump sleep * rm ubuntu from windows/mac workflows * modify timeouts --------- Co-authored-by: Nolan Tremelling <34580718+NolanTrem@users.noreply.github.com> Co-authored-by: emrgnt-cmplxty <68796651+emrgnt-cmplxty@users.noreply.github.com> Co-authored-by: emrgnt-cmplxty --- py/tests/integration/runner_cli.py | 11 +++++++---- py/tests/integration/runner_sdk.py | 6 ++++-- templates/ycombinator_graphrag/python-backend/main.py | 4 ++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/py/tests/integration/runner_cli.py b/py/tests/integration/runner_cli.py index e8576a550..89e2fe2e3 100644 --- a/py/tests/integration/runner_cli.py +++ b/py/tests/integration/runner_cli.py @@ -251,8 +251,10 @@ def test_rag_response_stream_sample_file_cli(): def test_kg_create_graph_sample_file_cli(): print("Testing: KG create graph") print("Calling `poetry run r2r create-graph --run` ") - run_command("poetry run r2r create-graph --run") - time.sleep(120) + output = run_command("poetry run r2r create-graph --run") + + if "queued" in output: + time.sleep(60) response = requests.get( "http://localhost:7272/v2/entities", @@ -281,9 +283,10 @@ def test_kg_create_graph_sample_file_cli(): def test_kg_enrich_graph_sample_file_cli(): print("Testing: KG enrich graph") - run_command("poetry run r2r enrich-graph --run") + output = run_command("poetry run r2r enrich-graph --run") - time.sleep(120) + if "queued" in output: + time.sleep(60) response = requests.get( "http://localhost:7272/v2/communities", diff --git a/py/tests/integration/runner_sdk.py b/py/tests/integration/runner_sdk.py index 5761f09f6..80b38afb6 100644 --- a/py/tests/integration/runner_sdk.py +++ b/py/tests/integration/runner_sdk.py @@ -686,7 +686,8 @@ def test_kg_create_graph_sample_file_sdk(): collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", run_type="run" ) - time.sleep(120) + if "queued" in create_graph_result["results"]["message"]: + time.sleep(60) result = client.get_entities( collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", limit=1000 @@ -710,7 +711,8 @@ def test_kg_enrich_graph_sample_file_sdk(): collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", run_type="run" ) - time.sleep(120) + if "queued" in enrich_graph_result["results"]["message"]: + time.sleep(60) result = client.get_communities( collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09" diff --git a/templates/ycombinator_graphrag/python-backend/main.py b/templates/ycombinator_graphrag/python-backend/main.py index 3850f6e80..c4e0724de 100644 --- a/templates/ycombinator_graphrag/python-backend/main.py +++ b/templates/ycombinator_graphrag/python-backend/main.py @@ -100,8 +100,8 @@ def update_prompts(): def ingest(): update_prompts() - # ingest_data() - # create_graph() + ingest_data() + create_graph() enrich_graph() def ask(): From f33fccd0fabc7a6eaba768ea9142b37e5c9ac529 Mon Sep 17 00:00:00 2001 From: Shreyas Pimpalgaonkar Date: Wed, 16 Oct 2024 17:24:42 -0700 Subject: [PATCH 06/27] feat: Make prompt provider methods asynchronous (comments below) (#1415) * Fix async JSON parsing (#1408) * Fix async JSON parsing * Remove score completion from js * clean up js * lockfile * Feature/build custom logger (#1409) * building a custom logger for r2r * fix log * maintain bkwd compat * Feature/add kg description prompt (#1411) * add kg desc prompt * add kg desc prompt * add kg desc prompt * fix prompt name * separate test run freq (#1412) * separate test run freq * task_id check fix * add ingestion docs * updatet * add * rm old prompts * rm old prompots * rm old prompts * rm old prompts * Prod fixes + enhancements (#1407) * change default settings back to fp32 * add logging and cache triples * up * up * pre-commit and cleanups * making community summary prompt async * up * up * revert prompt changes * up * up * modify default * bump test timeout due to stricter concurrency limits * bump sleep * rm ubuntu from windows/mac workflows * feat: Make prompt provider methods asynchronous --------- Co-authored-by: Nolan Tremelling <34580718+NolanTrem@users.noreply.github.com> Co-authored-by: emrgnt-cmplxty <68796651+emrgnt-cmplxty@users.noreply.github.com> Co-authored-by: emrgnt-cmplxty --- py/core/base/agent/agent.py | 6 +++-- py/core/base/providers/prompt.py | 27 ++++++++++++------- py/core/main/services/management_service.py | 6 +++-- py/core/pipes/kg/community_summary.py | 2 +- py/core/pipes/kg/entity_description.py | 2 +- py/core/pipes/kg/triples_extraction.py | 2 +- .../pipes/retrieval/query_transform_pipe.py | 2 +- py/core/pipes/retrieval/search_rag_pipe.py | 2 +- py/core/pipes/retrieval/streaming_rag_pipe.py | 2 +- py/core/providers/prompts/r2r_prompts.py | 2 +- 10 files changed, 33 insertions(+), 20 deletions(-) diff --git a/py/core/base/agent/agent.py b/py/core/base/agent/agent.py index e3254f4df..aed1b540c 100644 --- a/py/core/base/agent/agent.py +++ b/py/core/base/agent/agent.py @@ -95,8 +95,10 @@ async def _setup(self, system_instruction: Optional[str] = None): Message( role="system", content=system_instruction - or self.prompt_provider.get_prompt( - self.config.system_instruction_name + or ( + await self.prompt_provider.get_prompt( + self.config.system_instruction_name + ) ), ) ) diff --git a/py/core/base/providers/prompt.py b/py/core/base/providers/prompt.py index 75fb64cb5..838f78bb9 100644 --- a/py/core/base/providers/prompt.py +++ b/py/core/base/providers/prompt.py @@ -39,7 +39,7 @@ async def add_prompt( pass @abstractmethod - def get_prompt( + async def get_prompt( self, prompt_name: str, inputs: Optional[dict[str, Any]] = None, @@ -64,7 +64,7 @@ async def update_prompt( async def delete_prompt(self, name: str) -> None: pass - def _get_message_payload( + async def _get_message_payload( self, system_prompt_name: Optional[str] = None, system_role: str = "system", @@ -75,20 +75,29 @@ def _get_message_payload( task_inputs: dict = {}, task_prompt_override: Optional[str] = None, ) -> list[dict]: - system_prompt = system_prompt_override or self.get_prompt( - system_prompt_name or self.config.default_system_name, - system_inputs, - prompt_override=system_prompt_override, - ) - task_prompt = self.get_prompt( + + if system_prompt_override: + system_prompt = system_prompt_override + else: + system_prompt = await self.get_prompt( + system_prompt_name or self.config.default_system_name, + system_inputs, + prompt_override=system_prompt_override, + ) + + task_prompt = await self.get_prompt( task_prompt_name or self.config.default_task_name, task_inputs, prompt_override=task_prompt_override, ) + return [ { "role": system_role, "content": system_prompt, }, - {"role": task_role, "content": task_prompt}, + { + "role": task_role, + "content": task_prompt, + }, ] diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index 2ec16e66a..0110283eb 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -625,8 +625,10 @@ async def get_prompt( ) -> dict: try: return { - "message": self.providers.prompt.get_prompt( - prompt_name, inputs, prompt_override + "message": ( + await self.providers.prompt.get_prompt( + prompt_name, inputs, prompt_override + ) ) } except ValueError as e: diff --git a/py/core/pipes/kg/community_summary.py b/py/core/pipes/kg/community_summary.py index 38ccb1a8e..54e40c4bd 100644 --- a/py/core/pipes/kg/community_summary.py +++ b/py/core/pipes/kg/community_summary.py @@ -119,7 +119,7 @@ async def process_community( description = ( ( await self.llm_provider.aget_completion( - messages=self.prompt_provider._get_message_payload( + messages=await self.prompt_provider._get_message_payload( task_prompt_name=self.kg_provider.config.kg_enrichment_settings.community_reports_prompt, task_inputs={ "input_text": ( diff --git a/py/core/pipes/kg/entity_description.py b/py/core/pipes/kg/entity_description.py index 6ebe5e5cc..db5c5ccc5 100644 --- a/py/core/pipes/kg/entity_description.py +++ b/py/core/pipes/kg/entity_description.py @@ -106,7 +106,7 @@ async def process_entity( out_entity.description = ( ( await self.llm_provider.aget_completion( - messages=self.prompt_provider._get_message_payload( + messages=await self.prompt_provider._get_message_payload( task_prompt_name=self.kg_provider.config.kg_creation_settings.kg_entity_description_prompt, task_inputs={ "entity_info": truncate_info( diff --git a/py/core/pipes/kg/triples_extraction.py b/py/core/pipes/kg/triples_extraction.py index 659571f96..a24cb5787 100644 --- a/py/core/pipes/kg/triples_extraction.py +++ b/py/core/pipes/kg/triples_extraction.py @@ -93,7 +93,7 @@ async def extract_kg( # combine all extractions into a single string combined_extraction: str = " ".join([extraction.data for extraction in extractions]) # type: ignore - messages = self.prompt_provider._get_message_payload( + messages = await self.prompt_provider._get_message_payload( task_prompt_name=self.kg_provider.config.kg_creation_settings.kg_triples_extraction_prompt, task_inputs={ "input": combined_extraction, diff --git a/py/core/pipes/retrieval/query_transform_pipe.py b/py/core/pipes/retrieval/query_transform_pipe.py index f948af26f..668bd720b 100644 --- a/py/core/pipes/retrieval/query_transform_pipe.py +++ b/py/core/pipes/retrieval/query_transform_pipe.py @@ -65,7 +65,7 @@ async def _run_logic( # type: ignore ) query_transform_request = ( - self.prompt_provider._get_message_payload( + await self.prompt_provider._get_message_payload( system_prompt_name=self.config.system_prompt, task_prompt_name=self.config.task_prompt, task_inputs={ diff --git a/py/core/pipes/retrieval/search_rag_pipe.py b/py/core/pipes/retrieval/search_rag_pipe.py index f18b6fd0d..2ebcaca36 100644 --- a/py/core/pipes/retrieval/search_rag_pipe.py +++ b/py/core/pipes/retrieval/search_rag_pipe.py @@ -64,7 +64,7 @@ async def _run_logic( # type: ignore context += context_piece search_iteration += 1 - messages = self.prompt_provider._get_message_payload( + messages = await self.prompt_provider._get_message_payload( system_prompt_name=self.config.system_prompt, task_prompt_name=self.config.task_prompt, task_inputs={"query": sel_query, "context": context}, diff --git a/py/core/pipes/retrieval/streaming_rag_pipe.py b/py/core/pipes/retrieval/streaming_rag_pipe.py index a0c95dff1..1ce7b341f 100644 --- a/py/core/pipes/retrieval/streaming_rag_pipe.py +++ b/py/core/pipes/retrieval/streaming_rag_pipe.py @@ -67,7 +67,7 @@ async def _run_logic( # type: ignore gen_context = format_search_results_for_llm(search_results) context += gen_context - messages = self.prompt_provider._get_message_payload( + messages = await self.prompt_provider._get_message_payload( system_prompt_name=self.config.system_prompt, task_prompt_name=self.config.task_prompt, task_inputs={"query": query, "context": context}, diff --git a/py/core/providers/prompts/r2r_prompts.py b/py/core/providers/prompts/r2r_prompts.py index 0788abf77..68b53702d 100644 --- a/py/core/providers/prompts/r2r_prompts.py +++ b/py/core/providers/prompts/r2r_prompts.py @@ -219,7 +219,7 @@ async def add_prompt( logger.error(f"Failed to add/update prompt '{name}': {e}") raise - def get_prompt( + async def get_prompt( self, prompt_name: str, inputs: Optional[dict[str, Any]] = None, From 11ae42eec6d74afcfe0b55ac7ed722d197b8ebfc Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Wed, 16 Oct 2024 18:37:57 -0700 Subject: [PATCH 07/27] bump pyproject version --- py/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/pyproject.toml b/py/pyproject.toml index 5ce2119d9..27abbf808 100644 --- a/py/pyproject.toml +++ b/py/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "r2r" readme = "README.md" -version = "3.2.14" +version = "3.2.15" description = "SciPhi R2R" authors = ["Owen Colegrove "] From 785c72eda546855e45faca490be115ed4db24186 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Thu, 17 Oct 2024 11:37:05 -0700 Subject: [PATCH 08/27] first commit --- py/core/main/api/ingestion_router.py | 9 +- py/core/providers/database/postgres.py | 4 +- py/core/providers/database/vecs/__init__.py | 4 +- py/core/providers/database/vecs/client.py | 40 ++- .../{collection.py => vector_collection.py} | 126 +------- py/core/providers/database/vector.py | 8 +- py/core/providers/kg/postgres.py | 13 +- py/tests/conftest.py | 30 +- py/tests/core/providers/kg/test_kg_logic.py | 299 ++++++++++++++---- 9 files changed, 304 insertions(+), 229 deletions(-) rename py/core/providers/database/vecs/{collection.py => vector_collection.py} (90%) diff --git a/py/core/main/api/ingestion_router.py b/py/core/main/api/ingestion_router.py index 7d91ab31d..7e98dca15 100644 --- a/py/core/main/api/ingestion_router.py +++ b/py/core/main/api/ingestion_router.py @@ -353,9 +353,9 @@ async def create_vector_index_app( None, description="The arguments for the index method.", ), - replace: bool = Body( - default=True, - description="Whether to replace an existing index.", + index_name: Optional[str] = Body( + None, + description="The name of the index to create.", ), concurrently: bool = Body( default=True, @@ -365,7 +365,7 @@ async def create_vector_index_app( ) -> WrappedCreateVectorIndexResponse: logger.info( - f"Creating vector index for {table_name} with method {index_method}, measure {measure}, replace {replace}, concurrently {concurrently}" + f"Creating vector index for {table_name} with method {index_method}, measure {measure}, concurrently {concurrently}" ) raw_message = await self.orchestration_provider.run_workflow( @@ -376,7 +376,6 @@ async def create_vector_index_app( "index_method": index_method, "measure": measure, "index_arguments": index_arguments, - "replace": replace, "concurrently": concurrently, }, }, diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index 1ab5cb7a2..350fe0d2b 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -50,7 +50,9 @@ def __init__( config: DatabaseConfig, dimension: int, crypto_provider: CryptoProvider, - quantization_type: Optional[VectorQuantizationType] = VectorQuantizationType.FP32, + quantization_type: Optional[ + VectorQuantizationType + ] = VectorQuantizationType.FP32, *args, **kwargs, ): diff --git a/py/core/providers/database/vecs/__init__.py b/py/core/providers/database/vecs/__init__.py index bbcdaf3bb..c4fa0b6d3 100644 --- a/py/core/providers/database/vecs/__init__.py +++ b/py/core/providers/database/vecs/__init__.py @@ -1,13 +1,13 @@ from . import exc from .client import Client -from .collection import Collection +from .vector_collection import VectorCollection __project__ = "vecs" __version__ = "0.4.2" __all__ = [ - "Collection", + "VectorCollection", "Client", "exc", ] diff --git a/py/core/providers/database/vecs/client.py b/py/core/providers/database/vecs/client.py index 62e54ae5f..5b720f9f7 100644 --- a/py/core/providers/database/vecs/client.py +++ b/py/core/providers/database/vecs/client.py @@ -23,7 +23,7 @@ from .exc import CollectionNotFound if TYPE_CHECKING: - from core.providers.database.vecs.collection import Collection + from core.providers.database.vecs.vector_collection import VectorCollection logger = logging.getLogger() @@ -159,7 +159,7 @@ def get_or_create_vector_table( dimension: Optional[int] = None, adapter: Optional[Adapter] = None, quantization_type: Optional[VectorQuantizationType] = None, - ) -> Collection: + ) -> VectorCollection: """ Get a vector collection by name, or create it if no collection with *name* exists. @@ -177,11 +177,13 @@ def get_or_create_vector_table( Raises: CollectionAlreadyExists: If a collection with the same name already exists """ - from core.providers.database.vecs.collection import Collection + from core.providers.database.vecs.vector_collection import ( + VectorCollection, + ) adapter_dimension = adapter.exported_dimension if adapter else None - collection = Collection( + collection = VectorCollection( name=name, dimension=dimension or adapter_dimension, # type: ignore quantization_type=quantization_type, @@ -192,7 +194,7 @@ def get_or_create_vector_table( return collection._create_if_not_exists() @deprecated("use Client.get_or_create_vector_table") - def create_collection(self, name: str, dimension: int) -> Collection: + def create_collection(self, name: str, dimension: int) -> VectorCollection: """ Create a new vector collection. @@ -206,12 +208,14 @@ def create_collection(self, name: str, dimension: int) -> Collection: Raises: CollectionAlreadyExists: If a collection with the same name already exists """ - from core.providers.database.vecs.collection import Collection + from core.providers.database.vecs.vector_collection import ( + VectorCollection, + ) - return Collection(name, dimension, self)._create() + return VectorCollection(name, dimension, self)._create() @deprecated("use Client.get_or_create_vector_table") - def get_collection(self, name: str) -> Collection: + def get_collection(self, name: str) -> VectorCollection: """ Retrieve an existing vector collection. @@ -224,7 +228,9 @@ def get_collection(self, name: str) -> Collection: Raises: CollectionNotFound: If no collection with the given name exists. """ - from core.providers.database.vecs.collection import Collection + from core.providers.database.vecs.vector_collection import ( + VectorCollection, + ) query = text( f""" @@ -252,22 +258,24 @@ def get_collection(self, name: str) -> Collection: ) name, dimension = query_result - return Collection( + return VectorCollection( name, dimension, self, ) - def list_collections(self) -> List["Collection"]: + def list_collections(self) -> List["VectorCollection"]: """ List all vector collections. Returns: list[Collection]: A list of all collections. """ - from core.providers.database.vecs.collection import Collection + from core.providers.database.vecs.vector_collection import ( + VectorCollection, + ) - return Collection._list_collections(self) + return VectorCollection._list_collections(self) def delete_collection(self, name: str) -> None: """ @@ -281,9 +289,11 @@ def delete_collection(self, name: str) -> None: Returns: None """ - from core.providers.database.vecs.collection import Collection + from core.providers.database.vecs.vector_collection import ( + VectorCollection, + ) - Collection(name, -1, self)._drop() + VectorCollection(name, -1, self)._drop() return def disconnect(self) -> None: diff --git a/py/core/providers/database/vecs/collection.py b/py/core/providers/database/vecs/vector_collection.py similarity index 90% rename from py/core/providers/database/vecs/collection.py rename to py/core/providers/database/vecs/vector_collection.py index e3aa7ccaa..44dc451b3 100644 --- a/py/core/providers/database/vecs/collection.py +++ b/py/core/providers/database/vecs/vector_collection.py @@ -7,11 +7,8 @@ from __future__ import annotations -import math import time import warnings -from dataclasses import dataclass -from enum import Enum from typing import TYPE_CHECKING, Any, Iterable, Optional, Union from uuid import UUID, uuid4 @@ -120,7 +117,7 @@ def cosine_distance(self, other): return self.op("<=>", return_type=Float)(other) -class Collection: +class VectorCollection: """ The `vecs.Collection` class represents a collection of vectors within a PostgreSQL database with pgvector support. It provides methods to manage (create, delete, fetch, upsert), index, and perform similarity searches on these vector collections. @@ -180,7 +177,6 @@ def __init__( dimension, quantization_type, ) - self._index: Optional[str] = None self.adapter = adapter or Adapter(steps=[NoOp(dimension=dimension)]) reported_dimensions = set( @@ -670,7 +666,7 @@ def build_filters(self, filters: dict): raise FilterError("filters must be a dict") def parse_condition(key, value): - if key in Collection.COLUMN_VARS: + if key in VectorCollection.COLUMN_VARS: # Handle column-based filters column = getattr(self.table.c, key) if isinstance(value, dict): @@ -783,43 +779,6 @@ def parse_filter(filter_dict): return parse_filter(filters) - @classmethod - def _list_collections(cls, client: "Client") -> list["Collection"]: - """ - PRIVATE - - Retrieves all collections from the database. - - Args: - client (Client): The database client. - - Returns: - list[Collection]: A list of all existing collections. - """ - - query = text( - """ - select - relname as table_name, - atttypmod as embedding_dim - from - pg_class pc - join pg_attribute pa - on pc.oid = pa.attrelid - where - pc.relnamespace = '{client.project_name}'::regnamespace - and pc.relkind = 'r' - and pa.attname = 'vec' - and not pc.relname ^@ '_' - """ - ) - xc = [] - with client.Session() as sess: - for name, dimension in sess.execute(query): - existing_collection = cls(name, dimension, client) - xc.append(existing_collection) - return xc - @classmethod def _does_collection_exist(cls, client: "Client", name: str) -> bool: """ @@ -841,63 +800,6 @@ def _does_collection_exist(cls, client: "Client", name: str) -> bool: except CollectionNotFound: return False - @property - def index(self) -> Optional[str]: - """ - PRIVATE - - Note: - The `index` property is private and expected to undergo refactoring. - Do not rely on it's output. - - Retrieves the SQL name of the collection's vector index, if it exists. - - Returns: - Optional[str]: The name of the index, or None if no index exists. - """ - - if self._index is None: - query = text( - f""" - select - relname as table_name - from - pg_class pc - where - pc.relnamespace = '{self.client.project_name}'::regnamespace - and relname ilike 'ix_vector%' - and pc.relkind = 'i' - """ - ) - with self.client.Session() as sess: - ix_name = sess.execute(query).scalar() - self._index = ix_name - return self._index - - def is_indexed_for_measure(self, measure: IndexMeasure): - """ - Checks if the collection is indexed for a specific measure. - - Args: - measure (IndexMeasure): The measure to check for. - - Returns: - bool: True if the collection is indexed for the measure, False otherwise. - """ - - index_name = self.index - if index_name is None: - return False - - ops = index_measure_to_ops(measure, self.quantization_type) - if ops is None: - return False - - if ops in index_name: - return True - - return False - def close(self): """ Closes the database connection associated with this collection. @@ -916,9 +818,8 @@ def create_index( index_arguments: Optional[ Union[IndexArgsIVFFlat, IndexArgsHNSW] ] = None, - replace: bool = True, + index_name: Optional[str] = None, concurrently: bool = True, - quantization_type: VectorQuantizationType = VectorQuantizationType.FP32, ) -> None: """ Creates an index for the collection. @@ -1015,22 +916,9 @@ def create_index( concurrently_sql = "CONCURRENTLY" if concurrently else "" - # Drop existing index if needed (must be outside of transaction) - # Doesn't drop - if self.index is not None and replace: - drop_index_sql = f'DROP INDEX {concurrently_sql} IF EXISTS {self.client.project_name}."{self.index}";' - try: - with self.client.engine.connect() as connection: - connection = connection.execution_options( - isolation_level="AUTOCOMMIT" - ) - connection.execute(text(drop_index_sql)) - except Exception as e: - raise Exception(f"Failed to drop existing index: {e}") - self._index = None - - timestamp = time.strftime("%Y%m%d%H%M%S") - index_name = f"ix_{ops}_{method}__{timestamp}" + index_name = ( + index_name or f"ix_{ops}_{method}__{time.strftime('%Y%m%d%H%M%S')}" + ) create_index_sql = f""" CREATE INDEX {concurrently_sql} {index_name} @@ -1052,8 +940,6 @@ def create_index( except Exception as e: raise Exception(f"Failed to create index: {e}") - self._index = index_name - return None diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 0496bd579..0e20f277b 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -25,7 +25,7 @@ VectorTableName, ) -from .vecs import Client, Collection, create_client +from .vecs import Client, VectorCollection, create_client logger = logging.getLogger() @@ -33,7 +33,7 @@ class PostgresVectorDBProvider(VectorDBProvider): def __init__(self, config: DatabaseConfig, *args, **kwargs): super().__init__(config) - self.collection: Optional[Collection] = None + self.collection: Optional[VectorCollection] = None self.project_name = kwargs.get("project_name", None) connection_string = kwargs.get("connection_string", None) if not connection_string: @@ -305,7 +305,7 @@ def create_index( index_arguments: Optional[ Union[IndexArgsHNSW, IndexArgsIVFFlat] ] = None, - replace: bool = True, + index_name: Optional[str] = None, concurrently: bool = True, ): if self.collection is None: @@ -318,7 +318,7 @@ def create_index( method=index_method, measure=measure, index_arguments=index_arguments, - replace=replace, + index_name=index_name, concurrently=concurrently, ) diff --git a/py/core/providers/kg/postgres.py b/py/core/providers/kg/postgres.py index 04fc5942f..f976f7c85 100644 --- a/py/core/providers/kg/postgres.py +++ b/py/core/providers/kg/postgres.py @@ -195,7 +195,11 @@ async def _add_objects( # Filter out null values for each object params = [ tuple( - json.dumps(v) if isinstance(v, dict) else v + ( + json.dumps(v) + if isinstance(v, dict) + else (str(v) if v is not None else None) + ) for v in obj.__dict__.values() if v is not None ) @@ -218,10 +222,6 @@ async def add_entities( Returns: result: asyncpg.Record: result of the upsert operation """ - for entity in entities: - if entity.description_embedding is not None: - entity.description_embedding = str(entity.description_embedding) - return await self._add_objects(entities, table_name) async def add_triples( @@ -1053,8 +1053,7 @@ async def get_entities( entities = [Entity(**entity) for entity in results] total_entries = await self.get_entity_count( - collection_id=collection_id, - entity_table_name=entity_table_name + collection_id=collection_id, entity_table_name=entity_table_name ) return {"entities": entities, "total_entries": total_entries} diff --git a/py/tests/conftest.py b/py/tests/conftest.py index ec0c2d567..27a76da3a 100644 --- a/py/tests/conftest.py +++ b/py/tests/conftest.py @@ -13,34 +13,32 @@ CompletionConfig, DatabaseConfig, EmbeddingConfig, - KGConfig, FileConfig, + KGConfig, LoggingConfig, SqlitePersistentLoggingProvider, Vector, VectorEntry, ) - -from core.base import ( +from core.base import ( DocumentInfo, DocumentType, IngestionStatus, KGEnrichmentStatus, KGExtractionStatus, ) - from core.providers import ( BCryptProvider, LiteCompletionProvider, LiteLLMEmbeddingProvider, PostgresDBProvider, PostgresFileProvider, - R2RAuthProvider, PostgresKGProvider, + R2RAuthProvider, ) - from shared.abstractions.vector import VectorQuantizationType + # Vectors @pytest.fixture(scope="function") def dimension(): @@ -220,18 +218,28 @@ async def local_logging_provider(app_config): def kg_config_temporary(app_config): return KGConfig(provider="postgres", app=app_config) + # KG + @pytest.fixture(scope="function") def embedding_dimension(): return 128 + @pytest.fixture(scope="function") def vector_quantization_type(): return VectorQuantizationType.FP32 + @pytest.fixture(scope="function") -async def postgres_kg_provider(kg_config_temporary, temporary_postgres_db_provider, litellm_provider, embedding_dimension, vector_quantization_type): +async def postgres_kg_provider( + kg_config_temporary, + temporary_postgres_db_provider, + litellm_provider, + embedding_dimension, + vector_quantization_type, +): # upsert into documents_overview document_info = DocumentInfo( @@ -251,6 +259,10 @@ async def postgres_kg_provider(kg_config_temporary, temporary_postgres_db_provid document_info ) - kg_provider = PostgresKGProvider(kg_config_temporary, temporary_postgres_db_provider, litellm_provider) - await kg_provider.create_tables(embedding_dimension, vector_quantization_type) + kg_provider = PostgresKGProvider( + kg_config_temporary, temporary_postgres_db_provider, litellm_provider + ) + await kg_provider.create_tables( + embedding_dimension, vector_quantization_type + ) yield kg_provider diff --git a/py/tests/core/providers/kg/test_kg_logic.py b/py/tests/core/providers/kg/test_kg_logic.py index 195599bc9..d875a0a78 100644 --- a/py/tests/core/providers/kg/test_kg_logic.py +++ b/py/tests/core/providers/kg/test_kg_logic.py @@ -1,74 +1,159 @@ # tests/core/providers/kg/test_kg_logic.py -import pytest import random import uuid + +import pytest + +from core.base import Community, CommunityReport, Entity, KGExtraction, Triple from core.providers.kg.postgres import PostgresKGProvider -from core.base import Entity, Triple, Community, CommunityReport, KGExtraction from shared.abstractions.vector import VectorQuantizationType + @pytest.fixture(scope="function") def collection_id(): - return uuid.UUID('122fdf6a-e116-546b-a8f6-e4cb2e2c0a09') + return uuid.UUID("122fdf6a-e116-546b-a8f6-e4cb2e2c0a09") + @pytest.fixture(scope="function") def document_id(): - return uuid.UUID('9fbe403b-c11c-5aae-8ade-ef22980c3ad1') + return uuid.UUID("9fbe403b-c11c-5aae-8ade-ef22980c3ad1") + @pytest.fixture(scope="function") def extraction_ids(): - return [uuid.UUID('32ff6daf-6e67-44fa-b2a9-19384f5d9d19'), uuid.UUID('42ff6daf-6e67-44fa-b2a9-19384f5d9d19')] + return [ + uuid.UUID("32ff6daf-6e67-44fa-b2a9-19384f5d9d19"), + uuid.UUID("42ff6daf-6e67-44fa-b2a9-19384f5d9d19"), + ] + @pytest.fixture(scope="function") def embedding_dimension(): return 512 + @pytest.fixture(scope="function") def vector_quantization_type(): return VectorQuantizationType.FP32 + @pytest.fixture(scope="function") def embedding_vectors(embedding_dimension): random.seed(42) - return [[random.random() for _ in range(embedding_dimension)] for _ in range(2)] + return [ + [random.random() for _ in range(embedding_dimension)] for _ in range(2) + ] + @pytest.fixture(scope="function") def entities_raw_list(document_id, extraction_ids): return [ - Entity(name="Entity1", description="Description1", category="Category1", extraction_ids=extraction_ids, document_id=document_id, attributes={"attr1": "value1", "attr2": "value2"}), - Entity(name="Entity2", description="Description2", category="Category2", extraction_ids=extraction_ids, document_id=document_id, attributes={"attr3": "value3", "attr4": "value4"}), + Entity( + name="Entity1", + description="Description1", + category="Category1", + extraction_ids=extraction_ids, + document_id=document_id, + attributes={"attr1": "value1", "attr2": "value2"}, + ), + Entity( + name="Entity2", + description="Description2", + category="Category2", + extraction_ids=extraction_ids, + document_id=document_id, + attributes={"attr3": "value3", "attr4": "value4"}, + ), ] + @pytest.fixture(scope="function") def entities_list(extraction_ids, document_id, embedding_vectors): return [ - Entity(name="Entity1", description="Description1", extraction_ids=extraction_ids, document_id=document_id, description_embedding=embedding_vectors[0]), - Entity(name="Entity2", description="Description2", extraction_ids=extraction_ids, document_id=document_id, description_embedding=embedding_vectors[1]), + Entity( + name="Entity1", + description="Description1", + extraction_ids=extraction_ids, + document_id=document_id, + description_embedding=embedding_vectors[0], + ), + Entity( + name="Entity2", + description="Description2", + extraction_ids=extraction_ids, + document_id=document_id, + description_embedding=embedding_vectors[1], + ), ] + @pytest.fixture(scope="function") def triples_raw_list(embedding_vectors, extraction_ids, document_id): return [ - Triple(subject="Entity1", predicate="predicate1", object="object1", weight=1.0, description="description1", embedding=embedding_vectors[0], extraction_ids=extraction_ids, document_id=document_id, attributes={"attr1": "value1", "attr2": "value2"}), - Triple(subject="Entity2", predicate="predicate2", object="object2", weight=1.0, description="description2", embedding=embedding_vectors[1], extraction_ids=extraction_ids, document_id=document_id, attributes={"attr3": "value3", "attr4": "value4"}), + Triple( + subject="Entity1", + predicate="predicate1", + object="object1", + weight=1.0, + description="description1", + embedding=embedding_vectors[0], + extraction_ids=extraction_ids, + document_id=document_id, + attributes={"attr1": "value1", "attr2": "value2"}, + ), + Triple( + subject="Entity2", + predicate="predicate2", + object="object2", + weight=1.0, + description="description2", + embedding=embedding_vectors[1], + extraction_ids=extraction_ids, + document_id=document_id, + attributes={"attr3": "value3", "attr4": "value4"}, + ), ] + @pytest.fixture(scope="function") def communities_list(entities_list, triples_raw_list): return [ - Community(name="Community1", description="Description1", entities=[entities_list[0]], triples=[triples_raw_list[0]]), - Community(name="Community2", description="Description2", entities=[entities_list[1]], triples=[triples_raw_list[1]]), + Community( + name="Community1", + description="Description1", + entities=[entities_list[0]], + triples=[triples_raw_list[0]], + ), + Community( + name="Community2", + description="Description2", + entities=[entities_list[1]], + triples=[triples_raw_list[1]], + ), ] + @pytest.fixture(scope="function") def community_table_info(collection_id): return [ - ("Entity1", 1, None, 0, False, [1,2], collection_id), - ("Entity2", 2, None, 0, False, [1,2], collection_id), + ("Entity1", 1, None, 0, False, [1, 2], collection_id), + ("Entity2", 2, None, 0, False, [1, 2], collection_id), ] + @pytest.fixture(scope="function") -def kg_extractions(extraction_ids, entities_raw_list, triples_raw_list, document_id): - return [KGExtraction(extraction_ids=extraction_ids, entities=entities_raw_list, triples=triples_raw_list, document_id=document_id)] +def kg_extractions( + extraction_ids, entities_raw_list, triples_raw_list, document_id +): + return [ + KGExtraction( + extraction_ids=extraction_ids, + entities=entities_raw_list, + triples=triples_raw_list, + document_id=document_id, + ) + ] + @pytest.fixture(scope="function") def community_report_list(embedding_vectors, collection_id): @@ -102,33 +187,67 @@ def community_report_list(embedding_vectors, collection_id): async def test_kg_provider_initialization(postgres_kg_provider): assert isinstance(postgres_kg_provider, PostgresKGProvider) + @pytest.mark.asyncio -async def test_create_tables(postgres_kg_provider, collection_id, embedding_dimension, vector_quantization_type): - assert await postgres_kg_provider.get_entities(collection_id) == {'entities': [], "total_entries": 0} - assert await postgres_kg_provider.get_triples(collection_id) == {'triples': [], "total_entries": 0} - assert await postgres_kg_provider.get_communities(collection_id) == {'communities': [], "total_entries": 0} +async def test_create_tables( + postgres_kg_provider, + collection_id, + embedding_dimension, + vector_quantization_type, +): + assert await postgres_kg_provider.get_entities(collection_id) == { + "entities": [], + "total_entries": 0, + } + assert await postgres_kg_provider.get_triples(collection_id) == { + "triples": [], + "total_entries": 0, + } + assert await postgres_kg_provider.get_communities(collection_id) == { + "communities": [], + "total_entries": 0, + } + @pytest.mark.asyncio -async def test_add_entities_raw(postgres_kg_provider, entities_raw_list, collection_id): - await postgres_kg_provider.add_entities(entities_raw_list, table_name='entity_raw') - entities = await postgres_kg_provider.get_entities(collection_id, entity_table_name='entity_raw') +async def test_add_entities_raw( + postgres_kg_provider, entities_raw_list, collection_id +): + await postgres_kg_provider.add_entities( + entities_raw_list, table_name="entity_raw" + ) + entities = await postgres_kg_provider.get_entities( + collection_id, entity_table_name="entity_raw" + ) assert entities["entities"][0].name == "Entity1" assert entities["entities"][1].name == "Entity2" assert len(entities["entities"]) == 2 assert entities["total_entries"] == 2 -@pytest.mark.asyncio -async def test_add_entities(postgres_kg_provider, entities_list, collection_id): - await postgres_kg_provider.add_entities(entities_list, table_name='entity_embedding') - entities = await postgres_kg_provider.get_entities(collection_id, entity_table_name='entity_embedding') + +@pytest.mark.asyncio +async def test_add_entities( + postgres_kg_provider, entities_list, collection_id +): + await postgres_kg_provider.add_entities( + entities_list, table_name="entity_embedding" + ) + entities = await postgres_kg_provider.get_entities( + collection_id, entity_table_name="entity_embedding" + ) assert entities["entities"][0].name == "Entity1" assert entities["entities"][1].name == "Entity2" assert len(entities["entities"]) == 2 assert entities["total_entries"] == 2 + @pytest.mark.asyncio -async def test_add_triples(postgres_kg_provider, triples_raw_list, collection_id): - await postgres_kg_provider.add_triples(triples_raw_list, table_name='triple_raw') +async def test_add_triples( + postgres_kg_provider, triples_raw_list, collection_id +): + await postgres_kg_provider.add_triples( + triples_raw_list, table_name="triple_raw" + ) triples = await postgres_kg_provider.get_triples(collection_id) assert triples["triples"][0].subject == "Entity1" assert triples["triples"][1].subject == "Entity2" @@ -137,42 +256,55 @@ async def test_add_triples(postgres_kg_provider, triples_raw_list, collection_id @pytest.mark.asyncio -async def test_add_kg_extractions(postgres_kg_provider, kg_extractions, collection_id): - added_extractions = await postgres_kg_provider.add_kg_extractions(kg_extractions, table_suffix='_raw') +async def test_add_kg_extractions( + postgres_kg_provider, kg_extractions, collection_id +): + added_extractions = await postgres_kg_provider.add_kg_extractions( + kg_extractions, table_suffix="_raw" + ) assert added_extractions == (2, 2) - entities = await postgres_kg_provider.get_entities(collection_id, entity_table_name='entity_raw') + entities = await postgres_kg_provider.get_entities( + collection_id, entity_table_name="entity_raw" + ) assert entities["entities"][0].name == "Entity1" assert entities["entities"][1].name == "Entity2" assert len(entities["entities"]) == 2 assert entities["total_entries"] == 2 triples = await postgres_kg_provider.get_triples(collection_id) - assert triples["triples"][0].subject == "Entity1" + assert triples["triples"][0].subject == "Entity1" assert triples["triples"][1].subject == "Entity2" assert len(triples["triples"]) == 2 assert triples["total_entries"] == 2 + @pytest.mark.asyncio -async def test_get_entity_map(postgres_kg_provider, entities_raw_list, triples_raw_list, document_id): - await postgres_kg_provider.add_entities(entities_raw_list, table_name='entity_raw') +async def test_get_entity_map( + postgres_kg_provider, entities_raw_list, triples_raw_list, document_id +): + await postgres_kg_provider.add_entities( + entities_raw_list, table_name="entity_raw" + ) entity_map = await postgres_kg_provider.get_entity_map(0, 2, document_id) - assert entity_map['Entity1']["entities"][0].name == "Entity1" - assert entity_map['Entity2']["entities"][0].name == "Entity2" + assert entity_map["Entity1"]["entities"][0].name == "Entity1" + assert entity_map["Entity2"]["entities"][0].name == "Entity2" await postgres_kg_provider.add_triples(triples_raw_list) entity_map = await postgres_kg_provider.get_entity_map(0, 2, document_id) - assert entity_map['Entity1']["entities"][0].name == "Entity1" - assert entity_map['Entity2']["entities"][0].name == "Entity2" + assert entity_map["Entity1"]["entities"][0].name == "Entity1" + assert entity_map["Entity2"]["entities"][0].name == "Entity2" - assert entity_map['Entity1']["triples"][0].subject == "Entity1" - assert entity_map['Entity2']["triples"][0].subject == "Entity2" + assert entity_map["Entity1"]["triples"][0].subject == "Entity1" + assert entity_map["Entity2"]["triples"][0].subject == "Entity2" @pytest.mark.asyncio -async def test_upsert_embeddings(postgres_kg_provider, collection_id, entities_list): - table_name = 'entity_embedding' +async def test_upsert_embeddings( + postgres_kg_provider, collection_id, entities_list +): + table_name = "entity_embedding" entities_list_to_upsert = [ ( @@ -180,20 +312,27 @@ async def test_upsert_embeddings(postgres_kg_provider, collection_id, entities_l entity.description, str(entity.description_embedding), entity.extraction_ids, - entity.document_id + entity.document_id, ) for entity in entities_list ] - await postgres_kg_provider.upsert_embeddings(entities_list_to_upsert, table_name) + await postgres_kg_provider.upsert_embeddings( + entities_list_to_upsert, table_name + ) - entities = await postgres_kg_provider.get_entities(collection_id, entity_table_name=table_name) + entities = await postgres_kg_provider.get_entities( + collection_id, entity_table_name=table_name + ) assert entities["entities"][0].name == "Entity1" assert entities["entities"][1].name == "Entity2" + @pytest.mark.asyncio -async def test_get_all_triples(postgres_kg_provider, collection_id, triples_raw_list): - await postgres_kg_provider.add_triples(triples_raw_list) +async def test_get_all_triples( + postgres_kg_provider, collection_id, triples_raw_list +): + await postgres_kg_provider.add_triples(triples_raw_list) triples = await postgres_kg_provider.get_triples(collection_id) assert triples["triples"][0].subject == "Entity1" assert triples["triples"][1].subject == "Entity2" @@ -201,7 +340,9 @@ async def test_get_all_triples(postgres_kg_provider, collection_id, triples_raw_ @pytest.mark.asyncio -async def test_get_communities(postgres_kg_provider, collection_id, community_report_list): +async def test_get_communities( + postgres_kg_provider, collection_id, community_report_list +): await postgres_kg_provider.add_community_report(community_report_list[0]) await postgres_kg_provider.add_community_report(community_report_list[1]) communities = await postgres_kg_provider.get_communities(collection_id) @@ -214,32 +355,58 @@ async def test_get_communities(postgres_kg_provider, collection_id, community_re def leiden_params_1(): return { "resolution": 1.0, - 'max_cluster_size': 1000, + "max_cluster_size": 1000, "random_seed": 42, } + @pytest.mark.asyncio -async def test_perform_graph_clustering(postgres_kg_provider, collection_id, leiden_params_1, entities_list, triples_raw_list): +async def test_perform_graph_clustering( + postgres_kg_provider, + collection_id, + leiden_params_1, + entities_list, + triples_raw_list, +): # addd entities and triples - await postgres_kg_provider.add_entities(entities_list, table_name='entity_embedding') - await postgres_kg_provider.add_triples(triples_raw_list, table_name='triple_raw') - - num_communities = await postgres_kg_provider.perform_graph_clustering(collection_id, leiden_params_1) + await postgres_kg_provider.add_entities( + entities_list, table_name="entity_embedding" + ) + await postgres_kg_provider.add_triples( + triples_raw_list, table_name="triple_raw" + ) + + num_communities = await postgres_kg_provider.perform_graph_clustering( + collection_id, leiden_params_1 + ) assert num_communities -@pytest.mark.asyncio -async def test_get_community_details(postgres_kg_provider, entities_list, triples_raw_list, collection_id, community_report_list, community_table_info): - - await postgres_kg_provider.add_entities(entities_list, table_name='entity_embedding') - await postgres_kg_provider.add_triples(triples_raw_list, table_name='triple_raw') +@pytest.mark.asyncio +async def test_get_community_details( + postgres_kg_provider, + entities_list, + triples_raw_list, + collection_id, + community_report_list, + community_table_info, +): + + await postgres_kg_provider.add_entities( + entities_list, table_name="entity_embedding" + ) + await postgres_kg_provider.add_triples( + triples_raw_list, table_name="triple_raw" + ) await postgres_kg_provider.add_communities(community_table_info) await postgres_kg_provider.add_community_report(community_report_list[0]) - community_level, entities, triples = await postgres_kg_provider.get_community_details(community_number=1) - + community_level, entities, triples = ( + await postgres_kg_provider.get_community_details(community_number=1) + ) + assert community_level == 0 # TODO: change these to objects - assert entities[0]['name'] == "Entity1" - assert triples[0]['subject'] == "Entity1" \ No newline at end of file + assert entities[0]["name"] == "Entity1" + assert triples[0]["subject"] == "Entity1" From ab14a057d293b24273c7b7b996458cfea35d1754 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Thu, 17 Oct 2024 17:10:24 -0700 Subject: [PATCH 09/27] towards slimmer vector implementation logic --- py/core/base/providers/__init__.py | 2 + py/core/base/providers/database.py | 36 +- py/core/providers/database/base.py | 16 - .../database/{relational.py => handle.py} | 106 +- py/core/providers/database/postgres.py | 76 +- py/core/providers/database/vecs/client.py | 333 ------ .../database/vecs/vector_collection.py | 988 ------------------ py/core/providers/database/vector.py | 747 +++++++------ 8 files changed, 482 insertions(+), 1822 deletions(-) rename py/core/providers/database/{relational.py => handle.py} (55%) delete mode 100644 py/core/providers/database/vecs/client.py delete mode 100644 py/core/providers/database/vecs/vector_collection.py diff --git a/py/core/base/providers/__init__.py b/py/core/base/providers/__init__.py index 306eb0ad5..a3d8507e2 100644 --- a/py/core/base/providers/__init__.py +++ b/py/core/base/providers/__init__.py @@ -7,6 +7,7 @@ PostgresConfigurationSettings, RelationalDBProvider, VectorDBProvider, + VectorQuantizationType, ) from .embedding import EmbeddingConfig, EmbeddingProvider from .file import FileConfig, FileProvider @@ -37,6 +38,7 @@ "DatabaseProvider", "RelationalDBProvider", "VectorDBProvider", + "VectorQuantizationType", # Embedding provider "EmbeddingConfig", "EmbeddingProvider", diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index bccbfba68..453f8a51f 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -69,18 +69,16 @@ def supported_providers(self) -> list[str]: return ["postgres"] -class VectorDBProvider(Provider, ABC): - @abstractmethod - def _initialize_vector_db( - self, dimension: int, quantization_type: VectorQuantizationType - ) -> None: - pass +# class VectorDBProvider(Provider, ABC): +# @abstractmethod +# def _initialize_vector_db(self) -> None: +# pass -class RelationalDBProvider(Provider, ABC): - @abstractmethod - async def _initialize_relational_db(self) -> None: - pass +# class RelationalDBProvider(Provider, ABC): +# @abstractmethod +# async def _initialize_relational_db(self) -> None: +# pass class DatabaseProvider(Provider): @@ -92,17 +90,17 @@ def __init__(self, config: DatabaseConfig): logger.info(f"Initializing DatabaseProvider with config {config}.") super().__init__(config) - # remove later to re-introduce typing... - self.vector: Any = None - self.relational: Any = None + # # remove later to re-introduce typing... + # self.vector: Any = None + # self.relational: Any = None - @abstractmethod - def _initialize_vector_db(self) -> VectorDBProvider: - pass + # @abstractmethod + # async def _initialize_vector_db(self) -> VectorDBProvider: + # pass - @abstractmethod - async def _initialize_relational_db(self) -> RelationalDBProvider: - pass + # @abstractmethod + # async def _initialize_relational_db(self) -> RelationalDBProvider: + # pass @abstractmethod def _get_table_name(self, base_name: str) -> str: diff --git a/py/core/providers/database/base.py b/py/core/providers/database/base.py index 32ae6a334..3cf10b370 100644 --- a/py/core/providers/database/base.py +++ b/py/core/providers/database/base.py @@ -2,22 +2,6 @@ from sqlalchemy import TextClause, text -from .vecs import Client - - -# TODO: This should be defined at the mixin, not here -def execute_query( - vx: Client, - query: Union[str, TextClause], - params: Optional[dict[str, Any]] = None, -): - with vx.Session() as sess: - if isinstance(query, str): - query = text(query) - result = sess.execute(query, params or {}) - sess.commit() - return result - class QueryBuilder: def __init__(self, table_name: str): diff --git a/py/core/providers/database/relational.py b/py/core/providers/database/handle.py similarity index 55% rename from py/core/providers/database/relational.py rename to py/core/providers/database/handle.py index dc5f5cfb0..83cd93d4d 100644 --- a/py/core/providers/database/relational.py +++ b/py/core/providers/database/handle.py @@ -1,81 +1,89 @@ -import asyncio import logging -from contextlib import asynccontextmanager import asyncpg -from core.base import RelationalDBProvider +from core.base import RelationalDBProvider, CryptoProvider, DatabaseConfig +from core.providers.database.vector import VectorDBMixin from core.providers.database.base import DatabaseMixin from core.providers.database.collection import CollectionMixin from core.providers.database.document import DocumentMixin from core.providers.database.tokens import BlacklistedTokensMixin from core.providers.database.user import UserMixin +from typing import Optional + +from shared.abstractions.vector import VectorQuantizationType logger = logging.getLogger() -class PostgresRelationalDBProvider( - RelationalDBProvider, +class PostgresDBHandle( + # RelationalDBProvider, DocumentMixin, CollectionMixin, BlacklistedTokensMixin, UserMixin, + VectorDBMixin, ): def __init__( self, - config, - connection_string, - crypto_provider, - project_name, - postgres_configuration_settings, + config: DatabaseConfig, + connection_string: str, + crypto_provider: CryptoProvider, + project_name: str, + dimension: int, + quantization_type: Optional[VectorQuantizationType] = None, ): super().__init__(config) self.config = config self.connection_string = connection_string self.crypto_provider = crypto_provider self.project_name = project_name + self.dimension = dimension + self.quantization_type = quantization_type self.pool = None - self.postgres_configuration_settings = postgres_configuration_settings - self.semaphore = asyncio.Semaphore( - int(self.postgres_configuration_settings.max_connections * 0.9) + + def _get_table_name(self, base_name: str) -> str: + return f"{self.project_name}.{base_name}" + + async def initialize(self, pool: asyncpg.pool.Pool): + logger.info( + "Initializing `PostgresRelationalDBProvider` with connection pool." ) - async def initialize(self): - try: - self.pool = await asyncpg.create_pool( - self.connection_string, - max_size=self.postgres_configuration_settings.max_connections, - ) + self.pool = pool - logger.info( - "Successfully connected to Postgres database and created connection pool." - ) - except Exception as e: - raise ValueError( - f"Error {e} occurred while attempting to connect to relational database." - ) from e + async with self.pool.get_connection() as conn: + await conn.execute(f'CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') - await self._initialize_relational_db() + # Call create_table for each mixin + for base_class in self.__class__.__bases__: + if issubclass(base_class, DatabaseMixin): + await base_class.create_table(self) - def _get_table_name(self, base_name: str) -> str: - return f"{self.project_name}.{base_name}" + await self.initialize_vector_db() - @asynccontextmanager - async def get_connection(self): - async with self.semaphore: - async with self.pool.acquire() as conn: - yield conn + logger.info("Successfully initialized `PostgresRelationalDBProvider`") - async def execute_query(self, query, params=None): - async with self.get_connection() as conn: - async with conn.transaction(): + async def close(self): + if self.pool: + await self.pool.close() + + async def execute_query(self, query, params=None, isolation_level=None): + async with self.pool.acquire() as conn: + if isolation_level: + async with conn.transaction(isolation=isolation_level): + if params: + return await conn.execute(query, *params) + else: + return await conn.execute(query) + else: if params: return await conn.execute(query, *params) else: return await conn.execute(query) async def execute_many(self, query, params=None, batch_size=1000): - async with self.get_connection() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): if params: for i in range(0, len(params), batch_size): @@ -85,7 +93,7 @@ async def execute_many(self, query, params=None, batch_size=1000): await conn.executemany(query) async def fetch_query(self, query, params=None): - async with self.get_connection() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): return ( await conn.fetch(query, *params) @@ -94,27 +102,9 @@ async def fetch_query(self, query, params=None): ) async def fetchrow_query(self, query, params=None): - async with self.get_connection() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): if params: return await conn.fetchrow(query, *params) else: return await conn.fetchrow(query) - - # async def copy_records_to_table(self, table_name, records): - # async with self.get_connection() as conn: - # async with conn.transaction(): - # await conn.copy_records_to_table(table_name, records) - - async def _initialize_relational_db(self): - async with self.get_connection() as conn: - await conn.execute(f'CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') - - # Call create_table for each mixin - for base_class in self.__class__.__bases__: - if issubclass(base_class, DatabaseMixin): - await base_class.create_table(self) - - async def close(self): - if self.pool: - await self.pool.close() diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index 350fe0d2b..070988f15 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -1,10 +1,13 @@ # TODO: Clean this up and make it more congruent across the vector database and the relational database. - +import asyncio import logging import os import warnings +from contextlib import asynccontextmanager from typing import Any, Optional +import asyncpg + from core.base import ( CryptoProvider, DatabaseConfig, @@ -12,10 +15,10 @@ PostgresConfigurationSettings, RelationalDBProvider, VectorDBProvider, + VectorQuantizationType, ) -from shared.abstractions.vector import VectorQuantizationType -from .relational import PostgresRelationalDBProvider +from .handle import PostgresDBHandle from .vector import PostgresVectorDBProvider logger = logging.getLogger() @@ -30,6 +33,37 @@ def get_env_var(new_var, old_var, config_value): return value +class SemaphoreConnectionPool(asyncpg.Pool): + def __init__(self, connection_string, postgres_configuration_settings): + self.connection_string = connection_string + self.postgres_configuration_settings = postgres_configuration_settings + + async def initialize(self): + try: + self.semaphore = asyncio.Semaphore( + int(self.postgres_configuration_settings.max_connections * 0.9) + ) + + self.pool = await asyncpg.create_pool( + self.connection_string, + max_size=self.postgres_configuration_settings.max_connections, + ) + + logger.info( + "Successfully connected to Postgres database and created connection pool." + ) + except Exception as e: + raise ValueError( + f"Error {e} occurred while attempting to connect to relational database." + ) from e + + @asynccontextmanager + async def get_connection(self): + async with self.semaphore: + async with self.pool.acquire() as conn: + yield conn + + class PostgresDBProvider(DatabaseProvider): user: str password: str @@ -115,28 +149,34 @@ def _get_table_name(self, base_name: str) -> str: return f"{self.project_name}.{base_name}" async def initialize(self): - self.vector = self._initialize_vector_db() - self.relational = await self._initialize_relational_db() + shared_pool = SemaphoreConnectionPool( + self.connection_string, self.postgres_configuration_settings + ) + await shared_pool.initialize() + + self.handle = await self._initialize_handle(shared_pool) + # self.relational = await self._initialize_relational_db(shared_pool) - def _initialize_vector_db(self) -> VectorDBProvider: - return PostgresVectorDBProvider( + async def _initialize_handle(self) -> VectorDBProvider: + handle = PostgresDBHandle( self.config, connection_string=self.connection_string, project_name=self.project_name, dimension=self.vector_db_dimension, quantization_type=self.vector_db_quantization_type, ) - - async def _initialize_relational_db(self) -> RelationalDBProvider: - relational_db = PostgresRelationalDBProvider( - self.config, - connection_string=self.connection_string, - crypto_provider=self.crypto_provider, - project_name=self.project_name, - postgres_configuration_settings=self.postgres_configuration_settings, - ) - await relational_db.initialize() - return relational_db + await handle.initialize() + return handle + + # async def _initialize_relational_db(self) -> RelationalDBProvider: + # relational_db = PostgresRelationalDBProvider( + # self.config, + # connection_string=self.connection_string, + # crypto_provider=self.crypto_provider, + # project_name=self.project_name, + # ) + # await relational_db.initialize() + # return relational_db def _get_postgres_configuration_settings( self, config: DatabaseConfig diff --git a/py/core/providers/database/vecs/client.py b/py/core/providers/database/vecs/client.py deleted file mode 100644 index 5b720f9f7..000000000 --- a/py/core/providers/database/vecs/client.py +++ /dev/null @@ -1,333 +0,0 @@ -""" -Defines the 'Client' class - -Importing from the `vecs.client` directly is not supported. -All public classes, enums, and functions are re-exported by the top level `vecs` module. -""" - -from __future__ import annotations - -import logging -import time -from typing import TYPE_CHECKING, List, Optional - -import sqlalchemy -from deprecated import deprecated -from sqlalchemy import MetaData, create_engine, text -from sqlalchemy.orm import sessionmaker -from sqlalchemy.pool import QueuePool - -from shared.abstractions.vector import VectorQuantizationType - -from .adapter import Adapter -from .exc import CollectionNotFound - -if TYPE_CHECKING: - from core.providers.database.vecs.vector_collection import VectorCollection - -logger = logging.getLogger() - - -class Client: - """ - The `vecs.Client` class serves as an interface to a PostgreSQL database with pgvector support. It facilitates - the creation, retrieval, listing and deletion of vector collections, while managing connections to the - database. - - A `Client` instance represents a connection to a PostgreSQL database. This connection can be used to create - and manipulate vector collections, where each collection is a group of vector records in a PostgreSQL table. - - The `vecs.Client` class can be also supports usage as a context manager to ensure the connection to the database - is properly closed after operations, or used directly. - - Example usage: - - DB_CONNECTION = "postgresql://:@:/" - - with vecs.create_client(DB_CONNECTION) as vx: - # do some work - pass - - # OR - - vx = vecs.create_client(DB_CONNECTION) - # do some work - vx.disconnect() - """ - - def __init__( - self, - connection_string: str, - pool_size: int = 1, - max_retries: int = 3, - retry_delay: int = 1, - project_name: str = "vecs", - ): - self.engine = create_engine( - connection_string, - pool_size=pool_size, - poolclass=QueuePool, - pool_recycle=300, # Recycle connections after 5 min - ) - self.meta = MetaData(schema=project_name) - self.Session = sessionmaker(self.engine) - self.max_retries = max_retries - self.retry_delay = retry_delay - self.project_name = project_name - self.vector_version: Optional[str] = None - self._initialize_database() - - def _initialize_database(self): - retries = 0 - error = None - while retries < self.max_retries: - try: - with self.Session() as sess: - with sess.begin(): - self._create_schema(sess) - self._create_extension(sess) - self._get_vector_version(sess) - return - except Exception as e: - logger.warning( - f"Database connection error: {str(e)}. Retrying in {self.retry_delay} seconds..." - ) - retries += 1 - time.sleep(self.retry_delay) - error = e - - error_message = f"Failed to initialize database after {self.max_retries} retries with error: {str(error)}" - logger.error(error_message) - raise RuntimeError(error_message) - - def _create_schema(self, sess): - try: - sess.execute( - text(f'CREATE SCHEMA IF NOT EXISTS "{self.project_name}";') - ) - except Exception as e: - logger.warning(f"Failed to create schema: {str(e)}") - - def _create_extension(self, sess): - try: - sess.execute(text(f"CREATE EXTENSION IF NOT EXISTS vector;")) - sess.execute(text(f"CREATE EXTENSION IF NOT EXISTS pg_trgm;")) - sess.execute( - text(f"CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;") - ) - except Exception as e: - logger.warning(f"Failed to create extension: {str(e)}") - - def _get_vector_version(self, sess): - try: - self.vector_version = sess.execute( - text( - "SELECT installed_version FROM pg_available_extensions WHERE name = 'vector' LIMIT 1;" - ) - ).scalar_one() - except sqlalchemy.exc.InternalError as e: - logger.error(f"Failed with internal alchemy error: {str(e)}") - - import psycopg2 - - if isinstance(e.orig, psycopg2.errors.InFailedSqlTransaction): - sess.rollback() - self.vector_version = sess.execute( - text( - "SELECT installed_version FROM pg_available_extensions WHERE name = 'vector' LIMIT 1;" - ) - ).scalar_one() - else: - raise e - except Exception as e: - logger.error(f"Failed to retrieve vector version: {str(e)}") - raise e - - def _supports_hnsw(self): - return ( - not self.vector_version.startswith("0.4") - and not self.vector_version.startswith("0.3") - and not self.vector_version.startswith("0.2") - and not self.vector_version.startswith("0.1") - and not self.vector_version.startswith("0.0") - ) - - def get_or_create_vector_table( - self, - name: str, - *, - dimension: Optional[int] = None, - adapter: Optional[Adapter] = None, - quantization_type: Optional[VectorQuantizationType] = None, - ) -> VectorCollection: - """ - Get a vector collection by name, or create it if no collection with - *name* exists. - - Args: - name (str): The name of the collection. - - Keyword Args: - dimension (int): The dimensionality of the vectors in the collection. - pipeline (int): The dimensionality of the vectors in the collection. - - Returns: - Collection: The created collection. - - Raises: - CollectionAlreadyExists: If a collection with the same name already exists - """ - from core.providers.database.vecs.vector_collection import ( - VectorCollection, - ) - - adapter_dimension = adapter.exported_dimension if adapter else None - - collection = VectorCollection( - name=name, - dimension=dimension or adapter_dimension, # type: ignore - quantization_type=quantization_type, - client=self, - adapter=adapter, - ) - - return collection._create_if_not_exists() - - @deprecated("use Client.get_or_create_vector_table") - def create_collection(self, name: str, dimension: int) -> VectorCollection: - """ - Create a new vector collection. - - Args: - name (str): The name of the collection. - dimension (int): The dimensionality of the vectors in the collection. - - Returns: - Collection: The created collection. - - Raises: - CollectionAlreadyExists: If a collection with the same name already exists - """ - from core.providers.database.vecs.vector_collection import ( - VectorCollection, - ) - - return VectorCollection(name, dimension, self)._create() - - @deprecated("use Client.get_or_create_vector_table") - def get_collection(self, name: str) -> VectorCollection: - """ - Retrieve an existing vector collection. - - Args: - name (str): The name of the collection. - - Returns: - Collection: The retrieved collection. - - Raises: - CollectionNotFound: If no collection with the given name exists. - """ - from core.providers.database.vecs.vector_collection import ( - VectorCollection, - ) - - query = text( - f""" - select - relname as table_name, - atttypmod as embedding_dim - from - pg_class pc - join pg_attribute pa - on pc.oid = pa.attrelid - where - pc.relnamespace = "{self.project_name}"::regnamespace - and pc.relkind = 'r' - and pa.attname = 'vec' - and not pc.relname ^@ '_' - and pc.relname = :name - """ - ).bindparams(name=name) - with self.Session() as sess: - query_result = sess.execute(query).fetchone() - - if query_result is None: - raise CollectionNotFound( - "No collection found with requested name" - ) - - name, dimension = query_result - return VectorCollection( - name, - dimension, - self, - ) - - def list_collections(self) -> List["VectorCollection"]: - """ - List all vector collections. - - Returns: - list[Collection]: A list of all collections. - """ - from core.providers.database.vecs.vector_collection import ( - VectorCollection, - ) - - return VectorCollection._list_collections(self) - - def delete_collection(self, name: str) -> None: - """ - Delete a vector collection. - - If no collection with requested name exists, does nothing. - - Args: - name (str): The name of the collection. - - Returns: - None - """ - from core.providers.database.vecs.vector_collection import ( - VectorCollection, - ) - - VectorCollection(name, -1, self)._drop() - return - - def disconnect(self) -> None: - """ - Disconnect the client from the database. - - Returns: - None - """ - self.engine.dispose() - logger.info("Disconnected from the database.") - return - - def __enter__(self) -> "Client": - """ - Enable use of the 'with' statement. - - Returns: - Client: The current instance of the Client. - """ - - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """ - Disconnect the client on exiting the 'with' statement context. - - Args: - exc_type: The exception type, if any. - exc_val: The exception value, if any. - exc_tb: The traceback, if any. - - Returns: - None - """ - self.disconnect() - return diff --git a/py/core/providers/database/vecs/vector_collection.py b/py/core/providers/database/vecs/vector_collection.py deleted file mode 100644 index 44dc451b3..000000000 --- a/py/core/providers/database/vecs/vector_collection.py +++ /dev/null @@ -1,988 +0,0 @@ -""" -Defines the 'Collection' class - -Importing from the `vecs.collection` directly is not supported. -All public classes, enums, and functions are re-exported by the top level `vecs` module. -""" - -from __future__ import annotations - -import time -import warnings -from typing import TYPE_CHECKING, Any, Iterable, Optional, Union -from uuid import UUID, uuid4 - -from flupy import flu -from sqlalchemy import ( - Column, - Index, - MetaData, - Table, - and_, - cast, - delete, - func, - or_, - select, - text, -) -from sqlalchemy.dialects import postgresql -from sqlalchemy.types import Float, UserDefinedType - -from core.base import VectorSearchResult -from core.base.abstractions import VectorSearchSettings -from shared.abstractions.vector import ( - INDEX_MEASURE_TO_SQLA_ACC, - IndexArgsHNSW, - IndexArgsIVFFlat, - IndexMeasure, - IndexMethod, - VectorQuantizationType, - VectorTableName, -) -from shared.utils import _decorate_vector_type - -from .adapter import Adapter, AdapterContext, NoOp, Record -from .exc import ( - ArgError, - CollectionAlreadyExists, - CollectionNotFound, - FilterError, - MismatchedDimension, -) - -if TYPE_CHECKING: - from vecs.client import Client - - -def index_measure_to_ops( - measure: IndexMeasure, quantization_type: VectorQuantizationType -): - return _decorate_vector_type(measure.ops, quantization_type) - - -class Vector(UserDefinedType): - cache_ok = True - - def __init__( - self, - dim=None, - quantization_type: Optional[ - VectorQuantizationType - ] = VectorQuantizationType.FP32, - ): - super(UserDefinedType, self).__init__() - self.dim = dim - self.quantization_type = quantization_type - - def get_col_spec(self, **kw): - col_spec = "" - if self.dim is None: - col_spec = _decorate_vector_type("", self.quantization_type) - else: - col_spec = _decorate_vector_type( - f"({self.dim})", self.quantization_type - ) - return col_spec - - def bind_processor(self, dialect): - def process(value): - if value is None: - return value - if not isinstance(value, list): - raise ValueError("Expected a list") - if self.dim is not None and len(value) != self.dim: - raise ValueError( - f"Expected {self.dim} dimensions, not {len(value)}" - ) - return "[" + ",".join(str(float(v)) for v in value) + "]" - - return process - - def result_processor(self, dialect, coltype): - return lambda value: ( - value - if value is None - else [float(v) for v in value[1:-1].split(",")] - ) - - class comparator_factory(UserDefinedType.Comparator): - def l2_distance(self, other): - return self.op("<->", return_type=Float)(other) - - def max_inner_product(self, other): - return self.op("<#>", return_type=Float)(other) - - def cosine_distance(self, other): - return self.op("<=>", return_type=Float)(other) - - -class VectorCollection: - """ - The `vecs.Collection` class represents a collection of vectors within a PostgreSQL database with pgvector support. - It provides methods to manage (create, delete, fetch, upsert), index, and perform similarity searches on these vector collections. - - The collections are stored in separate tables in the database, with each vector associated with an identifier and optional metadata. - - Example usage: - - with vecs.create_client(DB_CONNECTION) as vx: - collection = vx.create_collection(name="docs", dimension=3) - collection.upsert([("id1", [1, 1, 1], {"key": "value"})]) - # Further operations on 'collection' - - Public Attributes: - name: The name of the vector collection. - dimension: The dimension of vectors in the collection. - - Note: Some methods of this class can raise exceptions from the `vecs.exc` module if errors occur. - """ - - COLUMN_VARS = [ - "extraction_id", - "document_id", - "user_id", - "collection_ids", - ] - - def __init__( - self, - name: str, - dimension: int, - quantization_type: VectorQuantizationType, - client: Client, - adapter: Optional[Adapter] = None, - ): - """ - Initializes a new instance of the `Collection` class. - - During expected use, developers initialize instances of `Collection` using the - `vecs.Client` with `vecs.Client.create_collection(...)` rather than directly. - - Args: - name (str): The name of the collection. - dimension (int): The dimension of the vectors in the collection. - client (Client): The client to use for interacting with the database. - """ - from core.providers.database.vecs.adapter import Adapter - - self.client = client - self.name = name - self.dimension = dimension - self.quantization_type = quantization_type - self.table = _build_table( - client.project_name, - name, - client.meta, - dimension, - quantization_type, - ) - self.adapter = adapter or Adapter(steps=[NoOp(dimension=dimension)]) - - reported_dimensions = set( - [ - x - for x in [ - dimension, - adapter.exported_dimension if adapter else None, - ] - if x is not None - ] - ) - - if len(reported_dimensions) == 0: - raise ArgError( - "One of dimension or adapter must provide a dimension" - ) - elif len(reported_dimensions) > 1: - raise MismatchedDimension( - "Mismatch in the reported dimensions of the selected vector collection and embedding model. Correct the selected embedding model or specify a new vector collection by modifying the `R2R_PROJECT_NAME` environment variable." - ) - - def __repr__(self): - """ - Returns a string representation of the `Collection` instance. - - Returns: - str: A string representation of the `Collection` instance. - """ - return ( - f'vecs.Collection(name="{self.name}", dimension={self.dimension})' - ) - - def __len__(self) -> int: - """ - Returns the number of vectors in the collection. - - Returns: - int: The number of vectors in the collection. - """ - with self.client.Session() as sess: - with sess.begin(): - stmt = select(func.count()).select_from(self.table) - return sess.execute(stmt).scalar() or 0 - - def _create_if_not_exists(self): - """ - PRIVATE - - Creates a new collection in the database if it doesn't already exist - - Returns: - Collection: The found or created collection. - """ - query = text( - f""" - select - relname as table_name, - atttypmod as embedding_dim - from - pg_class pc - join pg_attribute pa - on pc.oid = pa.attrelid - where - pc.relnamespace = '{self.client.project_name}'::regnamespace - and pc.relkind = 'r' - and pa.attname = 'vec' - and not pc.relname ^@ '_' - and pc.relname = :name - """ - ).bindparams(name=self.name) - with self.client.Session() as sess: - query_result = sess.execute(query).fetchone() - - if query_result: - _, collection_dimension = query_result - else: - collection_dimension = None - - reported_dimensions = set( - [ - x - for x in [self.dimension, collection_dimension] - if x is not None - ] - ) - if len(reported_dimensions) > 1: - raise MismatchedDimension( - "Mismatch in the reported dimensions of the selected vector collection and embedding model. Correct the selected embedding model or specify a new vector collection by modifying the `R2R_PROJECT_NAME` environment variable." - ) - - if not collection_dimension: - self.table.create(self.client.engine) - - return self - - def _create(self): - """ - PRIVATE - - Creates a new collection in the database. Raises a `vecs.exc.CollectionAlreadyExists` - exception if a collection with the specified name already exists. - - Returns: - Collection: The newly created collection. - """ - - collection_exists = self.__class__._does_collection_exist( - self.client, self.name - ) - if collection_exists: - raise CollectionAlreadyExists( - "Collection with requested name already exists" - ) - self.table.create(self.client.engine) - - unique_string = str(uuid4()).replace("-", "_")[0:7] - with self.client.Session() as sess: - sess.execute( - text( - f""" - create index ix_meta_{unique_string} - on {self.client.project_name}."{self.table.name}" - using gin ( metadata jsonb_path_ops ) - """ - ) - ) - - # Create trigger to update fts column - sess.execute( - text( - f""" - CREATE TRIGGER tsvector_update_{unique_string} BEFORE INSERT OR UPDATE - ON {self.client.project_name}."{self.table.name}" FOR EACH ROW EXECUTE FUNCTION - tsvector_update_trigger(fts, 'pg_catalog.english', text); - """ - ) - ) - return self - - def _drop(self): - """ - PRIVATE - - Deletes the collection from the database. Raises a `vecs.exc.CollectionNotFound` - exception if no collection with the specified name exists. - - Returns: - Collection: The deleted collection. - """ - with self.client.Session() as sess: - sess.execute(text(f"DROP TABLE IF EXISTS {self.name} CASCADE")) - sess.commit() - - return self - - def _get_index_options( - self, - method: IndexMethod, - index_arguments: Optional[Union[IndexArgsIVFFlat, IndexArgsHNSW]], - ) -> str: - if method == IndexMethod.ivfflat: - if isinstance(index_arguments, IndexArgsIVFFlat): - return f"WITH (lists={index_arguments.n_lists})" - else: - # Default value if no arguments provided - return "WITH (lists=100)" - elif method == IndexMethod.hnsw: - if isinstance(index_arguments, IndexArgsHNSW): - return f"WITH (m={index_arguments.m}, ef_construction={index_arguments.ef_construction})" - else: - # Default values if no arguments provided - return "WITH (m=16, ef_construction=64)" - else: - return "" # No options for other methods - - def upsert( - self, - records: Iterable[Record], - ) -> None: - chunk_size = 512 - - pipeline = flu(self.adapter(records, AdapterContext("upsert"))).chunk( - chunk_size - ) - - with self.client.Session() as sess: - with sess.begin(): - for chunk in pipeline: - stmt = postgresql.insert(self.table).values( - [ - { - "extraction_id": record[0], - "document_id": record[1], - "user_id": record[2], - "collection_ids": record[3], - "vec": record[4], - "text": record[5], - "metadata": record[6], - "fts": func.to_tsvector(record[5]), - } - for record in chunk - ] - ) - stmt = stmt.on_conflict_do_update( - index_elements=[self.table.c.extraction_id], - set_=dict( - document_id=stmt.excluded.document_id, - user_id=stmt.excluded.user_id, - collection_ids=stmt.excluded.collection_ids, - vec=stmt.excluded.vec, - text=stmt.excluded.text, - metadata=stmt.excluded.metadata, - fts=stmt.excluded.fts, - ), - ) - sess.execute(stmt) - return None - - def fetch(self, ids: Iterable[UUID]) -> list[Record]: - """ - Fetches vectors from the collection by their identifiers. - - Args: - ids (Iterable[UUID]): An iterable of vector identifiers. - - Returns: - list[Record]: A list of the fetched vectors. - - Raises: - ArgError: If ids is not an iterable of UUIDs. - """ - if isinstance(ids, (str, UUID)): - raise ArgError("ids must be an iterable of UUIDs") - - chunk_size = 12 - records = [] - with self.client.Session() as sess: - with sess.begin(): - for id_chunk in flu(ids).chunk(chunk_size): - stmt = select(self.table).where( - self.table.c.extraction_id.in_(id_chunk) - ) - chunk_records = sess.execute(stmt) - records.extend(chunk_records) - return records - - def delete( - self, - ids: Optional[Iterable[UUID]] = None, - filters: Optional[dict[str, Any]] = None, - ) -> dict[str, dict[str, str]]: - """ - Deletes vectors from the collection by matching filters or ids. - - Args: - extraction_ids (Optional[Iterable[UUID]], optional): An iterable of vector fragment identifiers. - filters (Optional[dict], optional): Filters to apply to the search. Defaults to None. - - Returns: - dict[str, dict[str, str]]: A dictionary of deleted records, where the key is the extraction_id - and the value is a dictionary containing 'document_id', 'extraction_id', and 'text'. - - Raises: - ArgError: If neither ids nor filters are provided, or if both are provided. - """ - if ids is None and filters is None: - raise ArgError("Either ids or filters must be provided.") - - if ids is not None and filters is not None: - raise ArgError("Either ids or filters must be provided, not both.") - - if isinstance(ids, (str, UUID)): - raise ArgError("ids must be an iterable of UUIDs") - - deleted_records = {} - - with self.client.Session() as sess: - with sess.begin(): - if ids: - for id_chunk in flu(ids).chunk(12): - delete_stmt = ( - delete(self.table) - .where(self.table.c.extraction_id.in_(id_chunk)) - .returning( - self.table.c.extraction_id, - self.table.c.document_id, - self.table.c.text, - ) - ) - result = sess.execute(delete_stmt) - for row in result: - extraction_id = str(row[0]) - deleted_records[extraction_id] = { - "extraction_id": extraction_id, - "document_id": str(row[1]), - "text": row[2], - } - - if filters: - meta_filter = self.build_filters(filters) - delete_stmt = ( - delete(self.table) - .where(meta_filter) - .returning( - self.table.c.extraction_id, - self.table.c.document_id, - self.table.c.text, - ) - ) - result = sess.execute(delete_stmt) - for row in result: - extraction_id = str(row[0]) - deleted_records[extraction_id] = { - "extraction_id": extraction_id, - "document_id": str(row[1]), - "text": row[2], - } - return deleted_records - - def __getitem__(self, items): - """ - Fetches a vector from the collection by its identifier. - - Args: - items (str): The identifier of the vector. - - Returns: - Record: The fetched vector. - """ - if not isinstance(items, str): - raise ArgError("items must be a string id") - - row = self.fetch([items]) - - if row == []: - raise KeyError("no item found with requested id") - return row[0] - - def semantic_search( - self, - vector: list[float], - search_settings: VectorSearchSettings, - ) -> Union[list[Record], list[str]]: - """ - Executes a similarity search in the collection. - - The return type is dependent on arguments *include_value* and *include_metadata* - - Args: - data (list[float]): The vector to use as the query. - search_settings (VectorSearchSettings): The search settings to use. - - Returns: - Union[list[Record], list[str]]: The result of the similarity search. - """ - - try: - imeasure_obj = IndexMeasure(search_settings.index_measure) - except ValueError: - raise ArgError("Invalid index measure") - - if not self.is_indexed_for_measure(imeasure_obj): - warnings.warn( - UserWarning( - f"Query does not have a covering index for {imeasure_obj}. See Collection.create_index" - ) - ) - - distance_lambda = INDEX_MEASURE_TO_SQLA_ACC.get(imeasure_obj) - if distance_lambda is None: - # unreachable - raise ArgError("invalid distance_measure") # pragma: no cover - - distance_clause = distance_lambda(self.table.c.vec)(vector) - - cols = [ - self.table.c.extraction_id, - self.table.c.document_id, - self.table.c.user_id, - self.table.c.collection_ids, - self.table.c.text, - ] - if search_settings.include_values: - cols.append(distance_clause) - - if search_settings.include_metadatas: - cols.append(self.table.c.metadata) - - stmt = select(*cols) - - if search_settings.filters: - stmt = stmt.filter(self.build_filters(search_settings.filters)) - - stmt = stmt.order_by(distance_clause) - stmt = stmt.offset(search_settings.offset) - stmt = stmt.limit(search_settings.search_limit) - - with self.client.Session() as sess: - with sess.begin(): - # index ignored if greater than n_lists - sess.execute( - text("set local ivfflat.probes = :probes").bindparams( - probes=search_settings.probes - ) - ) - if self.client._supports_hnsw(): - sess.execute( - text( - "set local hnsw.ef_search = :ef_search" - ).bindparams( - ef_search=max( - search_settings.ef_search, - search_settings.search_limit, - ) - ) - ) - if len(cols) == 1: - return [str(x) for x in sess.scalars(stmt).fetchall()] - result = sess.execute(stmt).fetchall() - return result or [] - - def full_text_search( - self, query_text: str, search_settings: VectorSearchSettings - ) -> list[VectorSearchResult]: - # Create a tsquery from the input query - ts_query = func.websearch_to_tsquery("english", query_text) - - # Use ts_rank for ranking - rank_function = func.ts_rank(self.table.c.fts, ts_query, 32).label( - "rank" - ) - - # Build the main query - stmt = ( - select( - self.table.c.extraction_id, - self.table.c.document_id, - self.table.c.user_id, - self.table.c.collection_ids, - self.table.c.text, - self.table.c.metadata, - rank_function, - ) - .where(self.table.c.fts.op("@@")(ts_query)) - .where(self.build_filters(search_settings.filters)) - .order_by(rank_function.desc()) - .offset(search_settings.offset) - .limit(search_settings.hybrid_search_settings.full_text_limit) - ) - - # Execute the query - with self.client.Session() as sess: - results = sess.execute(stmt).fetchall() - - # Convert the results to VectorSearchResult objects - return [ - VectorSearchResult( - extraction_id=str(r.extraction_id), - document_id=str(r.document_id), - user_id=str(r.user_id), - collection_ids=r.collection_ids, - text=r.text, - score=float(r.rank), - metadata=r.metadata, - ) - for r in results - ] - - def build_filters(self, filters: dict): - """ - PUBLIC - - Builds filters for SQL query based on provided dictionary. - - Args: - filters (dict): The dictionary specifying filter conditions. - - Raises: - FilterError: If filter conditions are not correctly formatted. - - Returns: - The filter clause for the SQL query. - """ - - if not isinstance(filters, dict): - raise FilterError("filters must be a dict") - - def parse_condition(key, value): - if key in VectorCollection.COLUMN_VARS: - # Handle column-based filters - column = getattr(self.table.c, key) - if isinstance(value, dict): - op, clause = next(iter(value.items())) - - if op == "$eq": - return column == clause - elif op == "$ne": - return column != clause - elif op == "$in": - return column.in_(clause) - elif op == "$nin": - return ~column.in_(clause) - elif op == "$overlap": - return column.overlap(clause) - elif op == "$contains": - return column.contains(clause) - elif op == "$any": - if key == "collection_ids": - # Use ANY for UUID array comparison - return func.array_to_string(column, ",").like( - f"%{clause}%" - ) - # New operator for checking if any element in the array matches - return column.any(clause) - else: - raise FilterError( - f"Unsupported operator for column {key}: {op}" - ) - else: - # Handle direct equality - if isinstance(value, str): - value = UUID(value) - return column == value - else: - # Handle JSON-based filters - json_col = self.table.c.metadata - if key.startswith("metadata."): - key = key.split("metadata.")[1] - if isinstance(value, dict): - if len(value) > 1: - raise FilterError("only one operator permitted") - operator, clause = next(iter(value.items())) - if operator not in ( - "$eq", - "$ne", - "$lt", - "$lte", - "$gt", - "$gte", - "$in", - "$contains", - ): - raise FilterError("unknown operator") - - if operator == "$eq" and not hasattr(clause, "__len__"): - contains_value = cast({key: clause}, postgresql.JSONB) - return json_col.op("@>")(contains_value) - - if operator == "$in": - if not isinstance(clause, list): - raise FilterError( - "argument to $in filter must be a list" - ) - for elem in clause: - if not isinstance(elem, (int, str, float)): - raise FilterError( - "argument to $in filter must be a list of scalars" - ) - contains_value = [ - cast(elem, postgresql.JSONB) for elem in clause - ] - return json_col.op("->")(key).in_(contains_value) - - matches_value = cast(clause, postgresql.JSONB) - - if operator == "$contains": - if not isinstance(clause, (int, str, float)): - raise FilterError( - "argument to $contains filter must be a scalar" - ) - return and_( - json_col.op("->")(key).contains(matches_value), - func.jsonb_typeof(json_col.op("->")(key)) - == "array", - ) - - return { - "$eq": json_col.op("->")(key) == matches_value, - "$ne": json_col.op("->")(key) != matches_value, - "$lt": json_col.op("->")(key) < matches_value, - "$lte": json_col.op("->")(key) <= matches_value, - "$gt": json_col.op("->")(key) > matches_value, - "$gte": json_col.op("->")(key) >= matches_value, - }[operator] - else: - contains_value = cast({key: value}, postgresql.JSONB) - return json_col.op("@>")(contains_value) - - def parse_filter(filter_dict): - conditions = [] - for key, value in filter_dict.items(): - if key == "$and": - conditions.append(and_(*[parse_filter(f) for f in value])) - elif key == "$or": - conditions.append(or_(*[parse_filter(f) for f in value])) - else: - conditions.append(parse_condition(key, value)) - return and_(*conditions) - - return parse_filter(filters) - - @classmethod - def _does_collection_exist(cls, client: "Client", name: str) -> bool: - """ - PRIVATE - - Checks if a collection with a given name exists within the database - - Args: - client (Client): The database client. - name (str): The name of the collection - - Returns: - Exists: Whether the collection exists or not - """ - - try: - client.get_collection(name) - return True - except CollectionNotFound: - return False - - def close(self): - """ - Closes the database connection associated with this collection. - - This method should be called when you are done using the collection to release - the database resources. - """ - if self.client: - self.client.close() - - def create_index( - self, - table_name: Optional[VectorTableName] = None, - measure: IndexMeasure = IndexMeasure.cosine_distance, - method: IndexMethod = IndexMethod.auto, - index_arguments: Optional[ - Union[IndexArgsIVFFlat, IndexArgsHNSW] - ] = None, - index_name: Optional[str] = None, - concurrently: bool = True, - ) -> None: - """ - Creates an index for the collection. - - Note: - When `vecs` creates an index on a pgvector column in PostgreSQL, it uses a multi-step - process that enables performant indexes to be built for large collections with low end - database hardware. - - Those steps are: - - - Creates a new table with a different name - - Randomly selects records from the existing table - - Inserts the random records from the existing table into the new table - - Creates the requested vector index on the new table - - Upserts all data from the existing table into the new table - - Drops the existing table - - Renames the new table to the existing tables name - - If you create dependencies (like views) on the table that underpins - a `vecs.Collection` the `create_index` step may require you to drop those dependencies before - it will succeed. - - Args: - measure (IndexMeasure, optional): The measure to index for. Defaults to 'cosine_distance'. - method (IndexMethod, optional): The indexing method to use. Defaults to 'auto'. - index_arguments: (IndexArgsIVFFlat | IndexArgsHNSW, optional): Index type specific arguments - replace (bool, optional): Whether to replace the existing index. Defaults to True. - concurrently (bool, optional): Whether to create the index concurrently. Defaults to True. - Raises: - ArgError: If an invalid index method is used, or if *replace* is False and an index already exists. - """ - - if table_name == VectorTableName.CHUNKS: - table_name = f"{self.client.project_name}.{self.table.name}" - col_name = "vec" - elif table_name == VectorTableName.ENTITIES: - table_name = ( - f"{self.client.project_name}.{VectorTableName.ENTITIES}" - ) - col_name = "description_embedding" - elif table_name == VectorTableName.COMMUNITIES: - table_name = ( - f"{self.client.project_name}.{VectorTableName.COMMUNITIES}" - ) - col_name = "embedding" - else: - raise ArgError("invalid table name") - if method not in ( - IndexMethod.ivfflat, - IndexMethod.hnsw, - IndexMethod.auto, - ): - raise ArgError("invalid index method") - - if index_arguments: - # Disallow case where user submits index arguments but uses the - # IndexMethod.auto index (index build arguments should only be - # used with a specific index) - if method == IndexMethod.auto: - raise ArgError( - "Index build parameters are not allowed when using the IndexMethod.auto index." - ) - # Disallow case where user specifies one index type but submits - # index build arguments for the other index type - if ( - isinstance(index_arguments, IndexArgsHNSW) - and method != IndexMethod.hnsw - ) or ( - isinstance(index_arguments, IndexArgsIVFFlat) - and method != IndexMethod.ivfflat - ): - raise ArgError( - f"{index_arguments.__class__.__name__} build parameters were supplied but {method} index was specified." - ) - - if method == IndexMethod.auto: - if self.client._supports_hnsw(): - method = IndexMethod.hnsw - else: - method = IndexMethod.ivfflat - - if method == IndexMethod.hnsw and not self.client._supports_hnsw(): - raise ArgError( - "HNSW Unavailable. Upgrade your pgvector installation to > 0.5.0 to enable HNSW support" - ) - - ops = index_measure_to_ops( - measure, quantization_type=self.quantization_type - ) - - if ops is None: - raise ArgError("Unknown index measure") - - concurrently_sql = "CONCURRENTLY" if concurrently else "" - - index_name = ( - index_name or f"ix_{ops}_{method}__{time.strftime('%Y%m%d%H%M%S')}" - ) - - create_index_sql = f""" - CREATE INDEX {concurrently_sql} {index_name} - ON {table_name} - USING {method} ({col_name} {ops}) {self._get_index_options(method, index_arguments)}; - """ - - try: - if concurrently: - with self.client.engine.connect() as connection: - connection = connection.execution_options( - isolation_level="AUTOCOMMIT" - ) - connection.execute(text(create_index_sql)) - else: - with self.client.Session() as sess: - sess.execute(text(create_index_sql)) - sess.commit() - except Exception as e: - raise Exception(f"Failed to create index: {e}") - - return None - - -def _build_table( - project_name: str, - name: str, - meta: MetaData, - dimension: int, - quantization_type: VectorQuantizationType = VectorQuantizationType.FP32, -) -> Table: - - table = Table( - name, - meta, - Column("extraction_id", postgresql.UUID, primary_key=True), - Column("document_id", postgresql.UUID, nullable=False), - Column("user_id", postgresql.UUID, nullable=False), - Column( - "collection_ids", - postgresql.ARRAY(postgresql.UUID), - server_default="{}", - ), - Column( - "vec", - Vector(dimension, quantization_type=quantization_type), - nullable=False, - ), - Column("text", postgresql.TEXT, nullable=True), - Column( - "fts", - postgresql.TSVECTOR, - nullable=False, - server_default=text("to_tsvector('english'::regconfig, '')"), - ), - Column( - "metadata", - postgresql.JSONB, - server_default=text("'{}'::jsonb"), - nullable=False, - ), - # Create a GIN index for the tsvector column - Index(f"idx_{name}_fts", "fts", postgresql_using="gin"), - extend_existing=True, - ) - - return table diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 0e20f277b..070f835c8 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -1,162 +1,171 @@ -import concurrent.futures import copy -import json import logging import time -from concurrent.futures import ThreadPoolExecutor from typing import Any, Optional, Union from sqlalchemy import text -from sqlalchemy.exc import NoResultFound, SQLAlchemyError -from core.base import ( - DatabaseConfig, - VectorDBProvider, - VectorEntry, - VectorSearchResult, -) +from core.base import VectorEntry, VectorQuantizationType, VectorSearchResult from core.base.abstractions import VectorSearchSettings from shared.abstractions.vector import ( IndexArgsHNSW, IndexArgsIVFFlat, IndexMeasure, IndexMethod, - VectorQuantizationType, VectorTableName, ) -from .vecs import Client, VectorCollection, create_client +from .base import DatabaseMixin, QueryBuilder +from .vecs.exc import ArgError logger = logging.getLogger() - - -class PostgresVectorDBProvider(VectorDBProvider): - def __init__(self, config: DatabaseConfig, *args, **kwargs): - super().__init__(config) - self.collection: Optional[VectorCollection] = None - self.project_name = kwargs.get("project_name", None) - connection_string = kwargs.get("connection_string", None) - if not connection_string: - raise ValueError( - "Please provide a valid `connection_string` to the `PostgresVectorDBProvider`." - ) - self.vx: Client = create_client( - connection_string=connection_string, project_name=self.project_name - ) - if not self.vx: - raise ValueError( - "Error occurred while attempting to connect to the pgvector provider." - ) - self.project_name = kwargs.get("project_name", None) - if not self.project_name: - raise ValueError( - "Please provide a valid `project_name` to the `PostgresVectorDBProvider`." - ) - dimension = kwargs.get("dimension", None) - quantization_type = kwargs.get("quantization_type", None) - if not dimension: - raise ValueError( - "Please provide a valid `dimension` to the `PostgresVectorDBProvider`." - ) - - self._initialize_vector_db(dimension, quantization_type) - logger.info( - f"Successfully initialized PGVectorDB for project: {self.project_name}" - ) - - def _initialize_vector_db( - self, dimension: int, quantization_type: VectorQuantizationType - ) -> None: - # Create extension for trigram similarity - with self.vx.Session() as sess: - sess.execute(text(f"CREATE EXTENSION IF NOT EXISTS pg_trgm;")) - sess.execute(text(f"CREATE EXTENSION IF NOT EXISTS btree_gin;")) - sess.commit() - - self.collection = self.vx.get_or_create_vector_table( - name=self.project_name, - dimension=dimension, - quantization_type=quantization_type, - ) - - # NOTE: Do not create an index during initialization - # self.create_index() - - def upsert(self, entry: VectorEntry) -> None: - if self.collection is None: - raise ValueError( - "Please call `initialize_collection` before attempting to run `upsert`." - ) - - self.collection.upsert( - records=[ - ( - entry.extraction_id, - entry.document_id, - entry.user_id, - entry.collection_ids, - entry.vector.data, - entry.text, - entry.metadata, - ) - ] +from shared.utils import _decorate_vector_type + + +def index_measure_to_ops( + measure: IndexMeasure, quantization_type: VectorQuantizationType +): + return _decorate_vector_type(measure.ops, quantization_type) + + +class VectorDBMixin(DatabaseMixin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.dimension = kwargs.get("dimension") + self.quantization_type = kwargs.get("quantization_type") + + async def initialize_vector_db(self): + # Create the vector table if it doesn't exist + query = f""" + CREATE TABLE IF NOT EXISTS {self.project_name}.vectors ( + extraction_id TEXT PRIMARY KEY, + document_id TEXT, + user_id TEXT, + collection_ids TEXT[], + vector vector({self.dimension}), + text TEXT, + metadata JSONB + ); + CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self.project_name}.vectors (document_id); + CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self.project_name}.vectors (user_id); + CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self.project_name}.vectors USING GIN (collection_ids); + CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self.project_name}.vectors USING GIN (to_tsvector('english', text)); + """ + await self.execute_query(query) + + async def upsert(self, entry: VectorEntry) -> None: + query = f""" + INSERT INTO {self.project_name}.vectors + (extraction_id, document_id, user_id, collection_ids, vector, text, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (extraction_id) DO UPDATE + SET document_id = $2, user_id = $3, collection_ids = $4, vector = $5, text = $6, metadata = $7; + """ + await self.execute_query( + query, + ( + entry.extraction_id, + entry.document_id, + entry.user_id, + entry.collection_ids, + entry.vector.data, + entry.text, + entry.metadata, + ), ) - def upsert_entries(self, entries: list[VectorEntry]) -> None: - if self.collection is None: - raise ValueError( - "Please call `initialize_collection` before attempting to run `upsert_entries`." + async def upsert_entries(self, entries: list[VectorEntry]) -> None: + query = f""" + INSERT INTO {self.project_name}.vectors + (extraction_id, document_id, user_id, collection_ids, vector, text, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (extraction_id) DO UPDATE + SET document_id = $2, user_id = $3, collection_ids = $4, vector = $5, text = $6, metadata = $7; + """ + params = [ + ( + entry.extraction_id, + entry.document_id, + entry.user_id, + entry.collection_ids, + entry.vector.data, + entry.text, + entry.metadata, ) - self.collection.upsert( - records=[ - ( - entry.extraction_id, - entry.document_id, - entry.user_id, - entry.collection_ids, - entry.vector.data, - entry.text, - entry.metadata, - ) - for entry in entries - ] - ) + for entry in entries + ] + await self.execute_many(query, params) - def semantic_search( + async def semantic_search( self, query_vector: list[float], search_settings: VectorSearchSettings ) -> list[VectorSearchResult]: - if self.collection is None: - raise ValueError( - "Please call `initialize_collection` before attempting to run `semantic_search`." - ) - results = self.collection.semantic_search( - vector=query_vector, search_settings=search_settings + query = f""" + SELECT extraction_id, document_id, user_id, collection_ids, text, + 1 - (vector <=> $1::vector) as similarity, metadata + FROM {self.project_name}.vectors + WHERE collection_ids && $2 + ORDER BY similarity DESC + LIMIT $3 OFFSET $4; + """ + results = await self.fetch_query( + query, + ( + query_vector, + search_settings.collection_ids, + search_settings.search_limit, + search_settings.offset, + ), ) + return [ VectorSearchResult( - extraction_id=result[0], # type: ignore - document_id=result[1], # type: ignore - user_id=result[2], # type: ignore - collection_ids=result[3], # type: ignore - text=result[4], # type: ignore - score=1 - float(result[5]), # type: ignore - metadata=result[6], # type: ignore + extraction_id=result["extraction_id"], + document_id=result["document_id"], + user_id=result["user_id"], + collection_ids=result["collection_ids"], + text=result["text"], + score=float(result["similarity"]), + metadata=result["metadata"], ) for result in results ] - def full_text_search( + async def full_text_search( self, query_text: str, search_settings: VectorSearchSettings ) -> list[VectorSearchResult]: - if self.collection is None: - raise ValueError( - "Please call `initialize_collection` before attempting to run `full_text_search`." - ) - return self.collection.full_text_search( - query_text=query_text, search_settings=search_settings + query = f""" + SELECT extraction_id, document_id, user_id, collection_ids, text, + ts_rank_cd(to_tsvector('english', text), plainto_tsquery('english', $1)) as rank, + metadata + FROM {self.project_name}.vectors + WHERE collection_ids && $2 AND to_tsvector('english', text) @@ plainto_tsquery('english', $1) + ORDER BY rank DESC + LIMIT $3 OFFSET $4; + """ + results = await self.fetch_query( + query, + ( + query_text, + search_settings.collection_ids, + search_settings.search_limit, + search_settings.offset, + ), ) - def hybrid_search( + return [ + VectorSearchResult( + extraction_id=result["extraction_id"], + document_id=result["document_id"], + user_id=result["user_id"], + collection_ids=result["collection_ids"], + text=result["text"], + score=float(result["rank"]), + metadata=result["metadata"], + ) + for result in results + ] + + async def hybrid_search( self, query_text: str, query_vector: list[float], @@ -180,46 +189,16 @@ def hybrid_search( semantic_settings.search_limit += search_settings.offset full_text_settings = copy.deepcopy(search_settings) - full_text_settings.hybrid_search_settings.full_text_limit += ( # type: ignore + full_text_settings.hybrid_search_settings.full_text_limit += ( search_settings.offset ) - # Use ThreadPoolExecutor to run searches in parallel - with ThreadPoolExecutor(max_workers=2) as executor: - semantic_future = executor.submit( - self.semantic_search, query_vector, semantic_settings - ) - full_text_future = executor.submit( - self.full_text_search, query_text, full_text_settings - ) - - # Wait for both searches to complete - concurrent.futures.wait([semantic_future, full_text_future]) - - semantic_results: list[VectorSearchResult] = semantic_future.result() - full_text_results: list[VectorSearchResult] = full_text_future.result() - - semantic_limit = search_settings.search_limit - full_text_limit = ( - search_settings.hybrid_search_settings.full_text_limit + semantic_results = await self.semantic_search( + query_vector, semantic_settings ) - semantic_weight = ( - search_settings.hybrid_search_settings.semantic_weight - ) - full_text_weight = ( - search_settings.hybrid_search_settings.full_text_weight + full_text_results = await self.full_text_search( + query_text, full_text_settings ) - rrf_k = search_settings.hybrid_search_settings.rrf_k - - # Combine results using RRF - combined_results = { - result.extraction_id: { - "semantic_rank": rank, - "full_text_rank": full_text_limit, - "data": result, - } - for rank, result in enumerate(semantic_results, 1) - } semantic_limit = search_settings.search_limit full_text_limit = ( @@ -232,7 +211,7 @@ def hybrid_search( search_settings.hybrid_search_settings.full_text_weight ) rrf_k = search_settings.hybrid_search_settings.rrf_k - # Combine results using RRF + combined_results = { result.extraction_id: { "semantic_rank": rank, @@ -252,27 +231,24 @@ def hybrid_search( "data": result, } - # Filter out non-overlapping results combined_results = { k: v for k, v in combined_results.items() - if v["semantic_rank"] <= semantic_limit * 2 # type: ignore - and v["full_text_rank"] <= full_text_limit * 2 # type: ignore + if v["semantic_rank"] <= semantic_limit * 2 + and v["full_text_rank"] <= full_text_limit * 2 } - # Calculate RRF scores - for result in combined_results.values(): # type: ignore - semantic_score = 1 / (rrf_k + result["semantic_rank"]) # type: ignore - full_text_score = 1 / (rrf_k + result["full_text_rank"]) # type: ignore - result["rrf_score"] = ( # type: ignore + for result in combined_results.values(): + semantic_score = 1 / (rrf_k + result["semantic_rank"]) + full_text_score = 1 / (rrf_k + result["full_text_rank"]) + result["rrf_score"] = ( semantic_score * semantic_weight + full_text_score * full_text_weight ) / (semantic_weight + full_text_weight) - # Sort by RRF score and apply offset and limit sorted_results = sorted( combined_results.values(), - key=lambda x: x["rrf_score"], # type: ignore + key=lambda x: x["rrf_score"], reverse=True, ) offset_results = sorted_results[ @@ -282,14 +258,14 @@ def hybrid_search( return [ VectorSearchResult( - extraction_id=result["data"].extraction_id, # type: ignore - document_id=result["data"].document_id, # type: ignore - user_id=result["data"].user_id, # type: ignore - collection_ids=result["data"].collection_ids, # type: ignore - text=result["data"].text, # type: ignore - score=result["rrf_score"], # type: ignore + extraction_id=result["data"].extraction_id, + document_id=result["data"].document_id, + user_id=result["data"].user_id, + collection_ids=result["data"].collection_ids, + text=result["data"].text, + score=result["rrf_score"], metadata={ - **result["data"].metadata, # type: ignore + **result["data"].metadata, "semantic_rank": result["semantic_rank"], "full_text_rank": result["full_text_rank"], }, @@ -297,7 +273,7 @@ def hybrid_search( for result in offset_results ] - def create_index( + async def create_index( self, table_name: Optional[VectorTableName] = None, index_method: IndexMethod = IndexMethod.hnsw, @@ -308,249 +284,240 @@ def create_index( index_name: Optional[str] = None, concurrently: bool = True, ): - if self.collection is None: - raise ValueError("Collection is not initialized.") - - start_time = time.time() - - self.collection.create_index( - table_name=table_name, - method=index_method, - measure=measure, - index_arguments=index_arguments, - index_name=index_name, - concurrently=concurrently, - ) + # This method needs to be implemented based on your specific indexing requirements + pass - end_time = time.time() - logger.info(f"Index creation took {end_time - start_time:.2f} seconds") - - def delete( - self, - filters: dict[str, Any], + async def delete( + self, filters: dict[str, Any] ) -> dict[str, dict[str, str]]: - if self.collection is None: - raise ValueError( - "Please call `initialize_collection` before attempting to run `delete`." - ) - - return self.collection.delete(filters=filters) + conditions = [] + params = [] + for key, value in filters.items(): + conditions.append(f"{key} = ${len(params) + 1}") + params.append(value) + + where_clause = " AND ".join(conditions) + query = f""" + DELETE FROM {self.project_name}.vectors + WHERE {where_clause} + RETURNING extraction_id; + """ + results = await self.fetch_query(query, params) + return { + result["extraction_id"]: {"status": "deleted"} + for result in results + } - def assign_document_to_collection( + async def assign_document_to_collection( self, document_id: str, collection_id: str ) -> None: + query = f""" + UPDATE {self.project_name}.vectors + SET collection_ids = array_append(collection_ids, $1) + WHERE document_id = $2 AND NOT ($1 = ANY(collection_ids)); """ - Assign a document to a collection in the vector database. + await self.execute_query(query, (collection_id, document_id)) - Args: - document_id (str): The ID of the document to assign. - collection_id (str): The ID of the collection to assign the document to. - - Raises: - ValueError: If the collection is not initialized. + async def remove_document_from_collection( + self, document_id: str, collection_id: str + ) -> None: + query = f""" + UPDATE {self.project_name}.vectors + SET collection_ids = array_remove(collection_ids, $1) + WHERE document_id = $2; """ - if self.collection is None: - raise ValueError( - "Please call `initialize_collection` before attempting to run `assign_document_to_collection`." - ) + await self.execute_query(query, (collection_id, document_id)) - table_name = self.collection.table.name - query = text( - f""" - UPDATE {self.project_name}."{table_name}" - SET collection_ids = array_append(collection_ids, :collection_id) - WHERE document_id = :document_id AND NOT (:collection_id = ANY(collection_ids)) - RETURNING document_id - """ - ) - - with self.vx.Session() as sess: - result = sess.execute( - query, - {"document_id": document_id, "collection_id": collection_id}, - ).fetchone() - sess.commit() - - if not result: - logger.warning( - f"Document {document_id} not found or already assigned to collection {collection_id}" - ) - - def remove_document_from_collection( - self, document_id: str, collection_id: str + async def remove_collection_from_documents( + self, collection_id: str ) -> None: + query = f""" + UPDATE {self.project_name}.vectors + SET collection_ids = array_remove(collection_ids, $1) + WHERE $1 = ANY(collection_ids); """ - Remove a document from a collection in the vector database. + await self.execute_query(query, (collection_id,)) - Args: - document_id (str): The ID of the document to remove. - collection_id (str): The ID of the collection to remove the document from. + async def delete_user(self, user_id: str) -> None: + query = f""" + DELETE FROM {self.project_name}.vectors + WHERE user_id = $1; + """ + await self.execute_query(query, (user_id,)) - Raises: - ValueError: If the collection is not initialized. + async def delete_collection(self, collection_id: str) -> None: + query = f""" + DELETE FROM {self.project_name}.vectors + WHERE $1 = ANY(collection_ids); """ - if self.collection is None: - raise ValueError( - "Please call `initialize_collection` before attempting to run `remove_document_from_collection`." - ) + await self.execute_query(query, (collection_id,)) - table_name = self.collection.table.name - query = text( - f""" - UPDATE {self.project_name}."{table_name}" - SET collection_ids = array_remove(collection_ids, :collection_id) - WHERE document_id = :document_id AND :collection_id = ANY(collection_ids) - RETURNING document_id - """ - ) + async def get_document_chunks( + self, + document_id: str, + offset: int = 0, + limit: int = -1, + include_vectors: bool = False, + ) -> dict[str, Any]: + vector_select = ", vector" if include_vectors else "" + limit_clause = f"LIMIT {limit}" if limit > -1 else "" + + query = f""" + SELECT extraction_id, document_id, user_id, collection_ids, text, metadata + {vector_select} + FROM {self.project_name}.vectors + WHERE document_id = $1 + OFFSET $2 + {limit_clause}; + """ + params = [document_id, offset] + if limit > -1: + params.append(limit) - with self.vx.Session() as sess: - result = sess.execute( - query, - {"document_id": document_id, "collection_id": collection_id}, - ).fetchone() - sess.commit() + results = await self.fetch_query(query, params) - if not result: - logger.warning( - f"Document {document_id} not found in collection {collection_id} or already removed" - ) + return { + "chunks": [ + { + "extraction_id": result["extraction_id"], + "document_id": result["document_id"], + "user_id": result["user_id"], + "collection_ids": result["collection_ids"], + "text": result["text"], + "metadata": result["metadata"], + **( + {"vector": result["vector"]} if include_vectors else {} + ), + } + for result in results + ] + } - def remove_collection_from_documents(self, collection_id: str) -> None: - if self.collection is None: - raise ValueError("Collection is not initialized.") - - table_name = self.collection.table.name - query = text( - f""" - UPDATE {self.project_name}."{table_name}" - SET collection_ids = array_remove(collection_ids, :collection_id) - WHERE :collection_id = ANY(collection_ids) - """ - ) + async def create_index( + self, + table_name: Optional[VectorTableName] = None, + measure: IndexMeasure = IndexMeasure.cosine_distance, + method: IndexMethod = IndexMethod.auto, + index_arguments: Optional[ + Union[IndexArgsIVFFlat, IndexArgsHNSW] + ] = None, + index_name: Optional[str] = None, + concurrently: bool = True, + ) -> None: + """ + Creates an index for the collection. - with self.vx.Session() as sess: - sess.execute(query, {"collection_id": collection_id}) - sess.commit() - - def delete_user(self, user_id: str) -> None: - if self.collection is None: - raise ValueError("Collection is not initialized.") - - table_name = self.collection.table.name - query = text( - f""" - UPDATE {self.project_name}."{table_name}" - SET user_id = NULL - WHERE user_id = :user_id - """ - ) + Note: + When `vecs` creates an index on a pgvector column in PostgreSQL, it uses a multi-step + process that enables performant indexes to be built for large collections with low end + database hardware. - with self.vx.Session() as sess: - sess.execute(query, {"user_id": user_id}) - sess.commit() + Those steps are: - def delete_collection(self, collection_id: str) -> None: - if self.collection is None: - raise ValueError("Collection is not initialized.") + - Creates a new table with a different name + - Randomly selects records from the existing table + - Inserts the random records from the existing table into the new table + - Creates the requested vector index on the new table + - Upserts all data from the existing table into the new table + - Drops the existing table + - Renames the new table to the existing tables name - table_name = self.collection.table.name + If you create dependencies (like views) on the table that underpins + a `vecs.Collection` the `create_index` step may require you to drop those dependencies before + it will succeed. - query = text( - f""" - WITH updated AS ( - UPDATE {self.project_name}."{table_name}" - SET collection_ids = array_remove(collection_ids, :collection_id) - WHERE :collection_id = ANY(collection_ids) - RETURNING 1 - ) - SELECT COUNT(*) AS affected_rows FROM updated - """ - ) + Args: + measure (IndexMeasure, optional): The measure to index for. Defaults to 'cosine_distance'. + method (IndexMethod, optional): The indexing method to use. Defaults to 'auto'. + index_arguments: (IndexArgsIVFFlat | IndexArgsHNSW, optional): Index type specific arguments + replace (bool, optional): Whether to replace the existing index. Defaults to True. + concurrently (bool, optional): Whether to create the index concurrently. Defaults to True. + Raises: + ArgError: If an invalid index method is used, or if *replace* is False and an index already exists. + """ - with self.vx.Session() as sess: - try: - result = sess.execute(query, {"collection_id": collection_id}) - row = result.one() - affected_rows = row.affected_rows - sess.commit() - - if affected_rows == 0: - logger.warning( - f"Collection {collection_id} not found in any documents." - ) - except NoResultFound: - raise ValueError( - f"Unexpected error: No result returned for collection {collection_id}" + if table_name == VectorTableName.CHUNKS: + table_name = f"{self.client.project_name}.{self.table.name}" + col_name = "vec" + elif table_name == VectorTableName.ENTITIES: + table_name = ( + f"{self.client.project_name}.{VectorTableName.ENTITIES}" + ) + col_name = "description_embedding" + elif table_name == VectorTableName.COMMUNITIES: + table_name = ( + f"{self.client.project_name}.{VectorTableName.COMMUNITIES}" + ) + col_name = "embedding" + else: + raise ArgError("invalid table name") + if method not in ( + IndexMethod.ivfflat, + IndexMethod.hnsw, + IndexMethod.auto, + ): + raise ArgError("invalid index method") + + if index_arguments: + # Disallow case where user submits index arguments but uses the + # IndexMethod.auto index (index build arguments should only be + # used with a specific index) + if method == IndexMethod.auto: + raise ArgError( + "Index build parameters are not allowed when using the IndexMethod.auto index." ) - except SQLAlchemyError as e: - sess.rollback() - logger.error( - f"Error deleting collection {collection_id}: {str(e)}" + # Disallow case where user specifies one index type but submits + # index build arguments for the other index type + if ( + isinstance(index_arguments, IndexArgsHNSW) + and method != IndexMethod.hnsw + ) or ( + isinstance(index_arguments, IndexArgsIVFFlat) + and method != IndexMethod.ivfflat + ): + raise ArgError( + f"{index_arguments.__class__.__name__} build parameters were supplied but {method} index was specified." ) - raise - def get_document_chunks( - self, - document_id: str, - offset: int = 0, - limit: int = -1, - include_vectors: bool = False, - ) -> dict[str, Any]: - if not self.collection: - raise ValueError("Collection is not initialized.") - - limit_clause = f"LIMIT {limit}" if limit != -1 else "" - table_name = self.collection.table.name - - select_clause = "SELECT extraction_id, document_id, user_id, collection_ids, text, metadata" - if include_vectors: - select_clause += ", vec" - - query = text( - f""" - {select_clause}, COUNT(*) OVER() AS total - FROM {self.project_name}."{table_name}" - WHERE document_id = :document_id - ORDER BY CAST(metadata->>'chunk_order' AS INTEGER) - {limit_clause} OFFSET :offset - """ - ) + if method == IndexMethod.auto: + if self.client._supports_hnsw(): + method = IndexMethod.hnsw + else: + method = IndexMethod.ivfflat - params = {"document_id": document_id, "offset": offset} - if limit != -1: - params["limit"] = limit + if method == IndexMethod.hnsw and not self.client._supports_hnsw(): + raise ArgError( + "HNSW Unavailable. Upgrade your pgvector installation to > 0.5.0 to enable HNSW support" + ) - with self.vx.Session() as sess: - results = sess.execute(query, params).fetchall() + ops = index_measure_to_ops( + measure, quantization_type=self.quantization_type + ) - chunks = [] - total = 0 + if ops is None: + raise ArgError("Unknown index measure") - if results: - total = results[0][-1] # Get the total count from the last column - chunks = [ - { - "extraction_id": result[0], - "document_id": result[1], - "user_id": result[2], - "collection_ids": result[3], - "text": result[4], - "metadata": result[5], - "vector": ( - json.loads(result[6]) if include_vectors else None - ), - } - for result in results - ] + concurrently_sql = "CONCURRENTLY" if concurrently else "" - return {"results": chunks, "total_entries": total} + index_name = ( + index_name or f"ix_{ops}_{method}__{time.strftime('%Y%m%d%H%M%S')}" + ) - def close(self) -> None: - if self.vx: - with self.vx.Session() as sess: - sess.close() - if sess.bind: - sess.bind.dispose() # type: ignore + create_index_sql = f""" + CREATE INDEX {concurrently_sql} {index_name} + ON {table_name} + USING {method} ({col_name} {ops}) {self._get_index_options(method, index_arguments)}; + """ + + try: + if concurrently: + # For concurrent index creation, we need to execute outside a transaction + await self.execute_query( + create_index_sql, isolation_level="AUTOCOMMIT" + ) + else: + await self.execute_query(create_index_sql) + except Exception as e: + raise Exception(f"Failed to create index: {e}") - logger.info("Closed PGVectorDB connection.") + return None From 58b1222cdd807b90e7c0e2f2616611c85810916b Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Thu, 17 Oct 2024 17:19:31 -0700 Subject: [PATCH 10/27] up --- py/core/__init__.py | 2 - py/core/base/__init__.py | 2 - py/core/base/providers/__init__.py | 4 - py/core/base/providers/database.py | 24 ------ .../hatchet/ingestion_workflow.py | 16 ++-- .../main/orchestration/hatchet/kg_workflow.py | 2 +- .../simple/ingestion_workflow.py | 12 +-- py/core/main/services/auth_service.py | 24 +++--- py/core/main/services/ingestion_service.py | 12 +-- py/core/main/services/kg_service.py | 16 ++-- py/core/main/services/management_service.py | 76 ++++++++----------- py/core/pipes/kg/community_summary.py | 2 +- py/core/providers/database/handle.py | 14 ++-- py/core/providers/database/postgres.py | 18 +---- 14 files changed, 82 insertions(+), 142 deletions(-) diff --git a/py/core/__init__.py b/py/core/__init__.py index f6b1e4552..36cafc02a 100644 --- a/py/core/__init__.py +++ b/py/core/__init__.py @@ -140,8 +140,6 @@ # Database providers "DatabaseConfig", "DatabaseProvider", - "RelationalDBProvider", - "VectorDBProvider", # Embedding provider "EmbeddingConfig", "EmbeddingProvider", diff --git a/py/core/base/__init__.py b/py/core/base/__init__.py index c98f2138a..aa264a735 100644 --- a/py/core/base/__init__.py +++ b/py/core/base/__init__.py @@ -113,8 +113,6 @@ # Database providers "DatabaseConfig", "DatabaseProvider", - "RelationalDBProvider", - "VectorDBProvider", "PostgresConfigurationSettings", # Embedding provider "EmbeddingConfig", diff --git a/py/core/base/providers/__init__.py b/py/core/base/providers/__init__.py index a3d8507e2..b4dd42fb7 100644 --- a/py/core/base/providers/__init__.py +++ b/py/core/base/providers/__init__.py @@ -5,8 +5,6 @@ DatabaseConfig, DatabaseProvider, PostgresConfigurationSettings, - RelationalDBProvider, - VectorDBProvider, VectorQuantizationType, ) from .embedding import EmbeddingConfig, EmbeddingProvider @@ -36,8 +34,6 @@ "DatabaseConfig", "PostgresConfigurationSettings", "DatabaseProvider", - "RelationalDBProvider", - "VectorDBProvider", "VectorQuantizationType", # Embedding provider "EmbeddingConfig", diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index 453f8a51f..dd500afc4 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -69,18 +69,6 @@ def supported_providers(self) -> list[str]: return ["postgres"] -# class VectorDBProvider(Provider, ABC): -# @abstractmethod -# def _initialize_vector_db(self) -> None: -# pass - - -# class RelationalDBProvider(Provider, ABC): -# @abstractmethod -# async def _initialize_relational_db(self) -> None: -# pass - - class DatabaseProvider(Provider): def __init__(self, config: DatabaseConfig): if not isinstance(config, DatabaseConfig): @@ -90,18 +78,6 @@ def __init__(self, config: DatabaseConfig): logger.info(f"Initializing DatabaseProvider with config {config}.") super().__init__(config) - # # remove later to re-introduce typing... - # self.vector: Any = None - # self.relational: Any = None - - # @abstractmethod - # async def _initialize_vector_db(self) -> VectorDBProvider: - # pass - - # @abstractmethod - # async def _initialize_relational_db(self) -> RelationalDBProvider: - # pass - @abstractmethod def _get_table_name(self, base_name: str) -> str: pass diff --git a/py/core/main/orchestration/hatchet/ingestion_workflow.py b/py/core/main/orchestration/hatchet/ingestion_workflow.py index 84b230581..08f6c901f 100644 --- a/py/core/main/orchestration/hatchet/ingestion_workflow.py +++ b/py/core/main/orchestration/hatchet/ingestion_workflow.py @@ -150,14 +150,14 @@ async def parse(self, context: Context) -> dict: status=IngestionStatus.SUCCESS, ) - collection_id = await service.providers.database.relational.assign_document_to_collection( + collection_id = await service.providers.database.handle.assign_document_to_collection( document_id=document_info.id, collection_id=generate_default_user_collection_id( document_info.user_id ), ) - service.providers.database.vector.assign_document_to_collection( + service.providers.database.handle.assign_document_to_collection( document_id=document_info.id, collection_id=collection_id ) @@ -189,7 +189,7 @@ async def on_failure(self, context: Context) -> None: try: documents_overview = ( - await self.ingestion_service.providers.database.relational.get_documents_overview( + await self.ingestion_service.providers.database.handle.get_documents_overview( filter_document_ids=[document_id] ) )["results"] @@ -248,7 +248,7 @@ async def update_files(self, context: Context) -> None: ) documents_overview = ( - await self.ingestion_service.providers.database.relational.get_documents_overview( + await self.ingestion_service.providers.database.handle.get_documents_overview( filter_document_ids=document_ids, filter_user_ids=None if user.is_superuser else [user.id], ) @@ -400,13 +400,13 @@ async def finalize(self, context: Context) -> dict: ) try: - collection_id = await self.ingestion_service.providers.database.relational.assign_document_to_collection( + collection_id = await self.ingestion_service.providers.database.handle.assign_document_to_collection( document_id=document_info.id, collection_id=generate_default_user_collection_id( document_info.user_id ), ) - self.ingestion_service.providers.database.vector.assign_document_to_collection( + self.ingestion_service.providers.database.handle.assign_document_to_collection( document_id=document_info.id, collection_id=collection_id ) except Exception as e: @@ -432,7 +432,7 @@ async def on_failure(self, context: Context) -> None: try: documents_overview = ( - await self.ingestion_service.providers.database.relational.get_documents_overview( + await self.ingestion_service.providers.database.handle.get_documents_overview( filter_document_ids=[document_id] ) )["results"] @@ -474,7 +474,7 @@ async def create_vector_index(self, context: Context) -> dict: ) ) - self.ingestion_service.providers.database.vector.create_index( + self.ingestion_service.providers.database.handle.create_index( **parsed_data ) diff --git a/py/core/main/orchestration/hatchet/kg_workflow.py b/py/core/main/orchestration/hatchet/kg_workflow.py index 97862c941..c98fac710 100644 --- a/py/core/main/orchestration/hatchet/kg_workflow.py +++ b/py/core/main/orchestration/hatchet/kg_workflow.py @@ -117,7 +117,7 @@ async def on_failure(self, context: Context) -> None: return try: - await self.kg_service.providers.database.relational.set_workflow_status( + await self.kg_service.providers.database.handle.set_workflow_status( id=uuid.UUID(document_id), status_type="kg_extraction_status", status=KGExtractionStatus.FAILED, diff --git a/py/core/main/orchestration/simple/ingestion_workflow.py b/py/core/main/orchestration/simple/ingestion_workflow.py index c87eb4dc3..bef1a4ca4 100644 --- a/py/core/main/orchestration/simple/ingestion_workflow.py +++ b/py/core/main/orchestration/simple/ingestion_workflow.py @@ -67,13 +67,13 @@ async def ingest_files(input_data): ) try: - collection_id = await service.providers.database.relational.assign_document_to_collection( + collection_id = await service.providers.database.handle.assign_document_to_collection( document_id=document_info.id, collection_id=generate_default_user_collection_id( str(document_info.user_id) ), ) - service.providers.database.vector.assign_document_to_collection( + service.providers.database.handle.assign_document_to_collection( document_id=document_info.id, collection_id=collection_id ) except Exception as e: @@ -125,7 +125,7 @@ async def update_files(input_data): ) documents_overview = ( - await service.providers.database.relational.get_documents_overview( + await service.providers.database.handle.get_documents_overview( filter_document_ids=document_ids, filter_user_ids=None if user.is_superuser else [user.id], ) @@ -227,13 +227,13 @@ async def ingest_chunks(input_data): ) try: - collection_id = await service.providers.database.relational.assign_document_to_collection( + collection_id = await service.providers.database.handle.assign_document_to_collection( document_id=document_info.id, collection_id=generate_default_user_collection_id( str(document_info.user_id) ), ) - service.providers.database.vector.assign_document_to_collection( + service.providers.database.handle.assign_document_to_collection( document_id=document_info.id, collection_id=collection_id ) except Exception as e: @@ -262,7 +262,7 @@ async def create_vector_index(input_data): ) ) - service.providers.database.vector.create_index(**parsed_data) + service.providers.database.handle.create_index(**parsed_data) except Exception as e: raise R2RException( diff --git a/py/core/main/services/auth_service.py b/py/core/main/services/auth_service.py index 7ff0c8c8d..8b1f723e5 100644 --- a/py/core/main/services/auth_service.py +++ b/py/core/main/services/auth_service.py @@ -45,7 +45,7 @@ async def verify_email( status_code=400, message="Email verification is not required" ) - user_id = await self.providers.database.relational.get_user_id_by_verification_code( + user_id = await self.providers.database.handle.get_user_id_by_verification_code( verification_code ) if not user_id: @@ -53,14 +53,14 @@ async def verify_email( status_code=400, message="Invalid or expired verification code" ) - user = await self.providers.database.relational.get_user_by_id(user_id) + user = await self.providers.database.handle.get_user_by_id(user_id) if not user or user.email != email: raise R2RException( status_code=400, message="Invalid or expired verification code" ) - await self.providers.database.relational.mark_user_as_verified(user_id) - await self.providers.database.relational.remove_verification_code( + await self.providers.database.handle.mark_user_as_verified(user_id) + await self.providers.database.handle.remove_verification_code( verification_code ) return {"message": f"User account {user_id} verified successfully."} @@ -72,7 +72,7 @@ async def login(self, email: str, password: str) -> dict[str, Token]: @telemetry_event("GetCurrentUser") async def user(self, token: str) -> UserResponse: token_data = await self.providers.auth.decode_token(token) - user = await self.providers.database.relational.get_user_by_email( + user = await self.providers.database.handle.get_user_by_email( token_data.email ) if user is None: @@ -124,9 +124,7 @@ async def update_user( profile_picture: Optional[str] = None, ) -> UserResponse: user: UserResponse = ( - await self.providers.database.relational.get_user_by_id( - str(user_id) - ) + await self.providers.database.handle.get_user_by_id(str(user_id)) ) if not user: raise R2RException(status_code=404, message="User not found") @@ -140,7 +138,7 @@ async def update_user( user.bio = bio if profile_picture is not None: user.profile_picture = profile_picture - return await self.providers.database.relational.update_user(user) + return await self.providers.database.handle.update_user(user) @telemetry_event("DeleteUserAccount") async def delete_user( @@ -150,7 +148,7 @@ async def delete_user( delete_vector_data: bool = False, is_superuser: bool = False, ) -> dict[str, str]: - user = await self.providers.database.relational.get_user_by_id(user_id) + user = await self.providers.database.handle.get_user_by_id(user_id) if not user: raise R2RException(status_code=404, message="User not found") if not ( @@ -160,9 +158,9 @@ async def delete_user( ) ): raise R2RException(status_code=400, message="Incorrect password") - await self.providers.database.relational.delete_user(user_id) + await self.providers.database.handle.delete_user(user_id) if delete_vector_data: - self.providers.database.vector.delete_user(user_id) + self.providers.database.handle.delete_user(user_id) return {"message": f"User account {user_id} deleted successfully."} @@ -172,6 +170,6 @@ async def clean_expired_blacklisted_tokens( max_age_hours: int = 7 * 24, current_time: Optional[datetime] = None, ): - await self.providers.database.relational.clean_expired_blacklisted_tokens( + await self.providers.database.handle.clean_expired_blacklisted_tokens( max_age_hours, current_time ) diff --git a/py/core/main/services/ingestion_service.py b/py/core/main/services/ingestion_service.py index bd93b9ddf..0e0720be1 100644 --- a/py/core/main/services/ingestion_service.py +++ b/py/core/main/services/ingestion_service.py @@ -94,7 +94,7 @@ async def ingest_file_ingress( ) existing_document_info = ( - await self.providers.database.relational.get_documents_overview( + await self.providers.database.handle.get_documents_overview( filter_user_ids=[user.id], filter_document_ids=[document_id], ) @@ -120,7 +120,7 @@ async def ingest_file_ingress( message=f"Document {document_id} was already ingested and is not in a failed state.", ) - await self.providers.database.relational.upsert_documents_overview( + await self.providers.database.handle.upsert_documents_overview( document_info ) @@ -256,7 +256,7 @@ async def finalize_ingestion( is_update: bool = False, ) -> None: if is_update: - self.providers.database.vector.delete( + self.providers.database.handle.delete( filters={ "$and": [ {"document_id": {"$eq": document_info.id}}, @@ -284,7 +284,7 @@ async def update_document_status( async def _update_document_status_in_db(self, document_info: DocumentInfo): try: - await self.providers.database.relational.upsert_documents_overview( + await self.providers.database.handle.upsert_documents_overview( document_info ) except Exception as e: @@ -325,7 +325,7 @@ async def ingest_chunks_ingress( ) existing_document_info = ( - await self.providers.database.relational.get_documents_overview( + await self.providers.database.handle.get_documents_overview( filter_user_ids=[user.id], filter_document_ids=[document_id], ) @@ -339,7 +339,7 @@ async def ingest_chunks_ingress( message=f"Document {document_id} was already ingested and is not in a failed state.", ) - await self.providers.database.relational.upsert_documents_overview( + await self.providers.database.handle.upsert_documents_overview( document_info ) diff --git a/py/core/main/services/kg_service.py b/py/core/main/services/kg_service.py index dafbe837c..50b5d4713 100644 --- a/py/core/main/services/kg_service.py +++ b/py/core/main/services/kg_service.py @@ -67,7 +67,7 @@ async def kg_triples_extraction( f"KGService: Processing document {document_id} for KG extraction" ) - await self.providers.database.relational.set_workflow_status( + await self.providers.database.handle.set_workflow_status( id=document_id, status_type="kg_extraction_status", status=KGExtractionStatus.PROCESSING, @@ -101,7 +101,7 @@ async def kg_triples_extraction( except Exception as e: logger.error(f"KGService: Error in kg_extraction: {e}") - await self.providers.database.relational.set_workflow_status( + await self.providers.database.handle.set_workflow_status( id=document_id, status_type="kg_extraction_status", status=KGExtractionStatus.FAILED, @@ -127,10 +127,12 @@ async def get_document_ids_for_create_graph( KGExtractionStatus.PROCESSING, ] - document_ids = await self.providers.database.relational.get_document_ids_by_status( - status_type="kg_extraction_status", - status=document_status_filter, - collection_id=collection_id, + document_ids = ( + await self.providers.database.handle.get_document_ids_by_status( + status_type="kg_extraction_status", + status=document_status_filter, + collection_id=collection_id, + ) ) return document_ids @@ -193,7 +195,7 @@ async def kg_entity_description( f"KGService: Completed kg_entity_description for batch {i+1}/{num_batches} for document {document_id}" ) - await self.providers.database.relational.set_workflow_status( + await self.providers.database.handle.set_workflow_status( id=document_id, status_type="kg_extraction_status", status=KGExtractionStatus.SUCCESS, diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index 0110283eb..5b402d113 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -204,7 +204,7 @@ async def users_overview( *args, **kwargs, ): - return await self.providers.database.relational.get_users_overview( + return await self.providers.database.handle.get_users_overview( [str(ele) for ele in user_ids] if user_ids else None, offset=offset, limit=limit, @@ -267,7 +267,7 @@ def validate_filters(filters: dict[str, Any]) -> None: logger.info(f"Deleting entries with filters: {filters}") try: - vector_delete_results = self.providers.database.vector.delete( + vector_delete_results = self.providers.database.handle.delete( filters ) except Exception as e: @@ -299,7 +299,7 @@ def validate_filters(filters: dict[str, Any]) -> None: try: documents_overview = ( - await self.providers.database.relational.get_documents_overview( + await self.providers.database.handle.get_documents_overview( **relational_filters ) )["results"] @@ -319,7 +319,7 @@ def validate_filters(filters: dict[str, Any]) -> None: for document_id in document_ids_to_purge: try: - await self.providers.database.relational.delete_from_documents_overview( + await self.providers.database.handle.delete_from_documents_overview( str(document_id) ) logger.info( @@ -351,7 +351,7 @@ async def documents_overview( *args: Any, **kwargs: Any, ): - return await self.providers.database.relational.get_documents_overview( + return await self.providers.database.handle.get_documents_overview( filter_document_ids=document_ids, filter_user_ids=user_ids, filter_collection_ids=collection_ids, @@ -369,7 +369,7 @@ async def document_chunks( *args, **kwargs, ): - return self.providers.database.vector.get_document_chunks( + return self.providers.database.handle.get_document_chunks( document_id, offset=offset, limit=limit, @@ -380,10 +380,10 @@ async def document_chunks( async def assign_document_to_collection( self, document_id: str, collection_id: UUID ): - await self.providers.database.relational.assign_document_to_collection( + await self.providers.database.handle.assign_document_to_collection( document_id, collection_id ) - self.providers.database.vector.assign_document_to_collection( + self.providers.database.handle.assign_document_to_collection( document_id, collection_id ) return {"message": "Document assigned to collection successfully"} @@ -392,10 +392,10 @@ async def assign_document_to_collection( async def remove_document_from_collection( self, document_id: UUID, collection_id: UUID ): - await self.providers.database.relational.remove_document_from_collection( + await self.providers.database.handle.remove_document_from_collection( document_id, collection_id ) - self.providers.database.vector.remove_document_from_collection( + self.providers.database.handle.remove_document_from_collection( document_id, collection_id ) await self.providers.kg.delete_node_via_document_id( @@ -407,7 +407,7 @@ async def remove_document_from_collection( async def document_collections( self, document_id: str, offset: int = 0, limit: int = 100 ): - return await self.providers.database.relational.document_collections( + return await self.providers.database.handle.document_collections( document_id, offset=offset, limit=limit ) @@ -502,13 +502,13 @@ def _get_central_nodes( async def create_collection( self, name: str, description: str = "" ) -> UUID: - return await self.providers.database.relational.create_collection( + return await self.providers.database.handle.create_collection( name, description ) @telemetry_event("GetCollection") async def get_collection(self, collection_id: UUID) -> Optional[dict]: - return await self.providers.database.relational.get_collection( + return await self.providers.database.handle.get_collection( collection_id ) @@ -519,23 +519,21 @@ async def update_collection( name: Optional[str] = None, description: Optional[str] = None, ) -> bool: - return await self.providers.database.relational.update_collection( + return await self.providers.database.handle.update_collection( collection_id, name, description ) @telemetry_event("DeleteCollection") async def delete_collection(self, collection_id: UUID) -> bool: - await self.providers.database.relational.delete_collection( - collection_id - ) - self.providers.database.vector.delete_collection(collection_id) + await self.providers.database.handle.delete_collection(collection_id) + self.providers.database.handle.delete_collection(collection_id) return True @telemetry_event("ListCollections") async def list_collections( self, offset: int = 0, limit: int = 100 ) -> list[dict]: - return await self.providers.database.relational.list_collections( + return await self.providers.database.handle.list_collections( offset=offset, limit=limit ) @@ -543,7 +541,7 @@ async def list_collections( async def add_user_to_collection( self, user_id: UUID, collection_id: UUID ) -> bool: - return await self.providers.database.relational.add_user_to_collection( + return await self.providers.database.handle.add_user_to_collection( user_id, collection_id ) @@ -551,28 +549,26 @@ async def add_user_to_collection( async def remove_user_from_collection( self, user_id: UUID, collection_id: UUID ) -> bool: - return await self.providers.database.relational.remove_user_from_collection( - user_id, collection_id + return ( + await self.providers.database.handle.remove_user_from_collection( + user_id, collection_id + ) ) @telemetry_event("GetUsersInCollection") async def get_users_in_collection( self, collection_id: UUID, offset: int = 0, limit: int = 100 ) -> list[dict]: - return ( - await self.providers.database.relational.get_users_in_collection( - collection_id, offset=offset, limit=limit - ) + return await self.providers.database.handle.get_users_in_collection( + collection_id, offset=offset, limit=limit ) @telemetry_event("GetCollectionsForUser") async def get_collections_for_user( self, user_id: UUID, offset: int = 0, limit: int = 100 ) -> list[dict]: - return ( - await self.providers.database.relational.get_collections_for_user( - user_id, offset, limit - ) + return await self.providers.database.handle.get_collections_for_user( + user_id, offset, limit ) @telemetry_event("CollectionsOverview") @@ -584,26 +580,18 @@ async def collections_overview( *args, **kwargs, ): - return ( - await self.providers.database.relational.get_collections_overview( - ( - [str(ele) for ele in collection_ids] - if collection_ids - else None - ), - offset=offset, - limit=limit, - ) + return await self.providers.database.handle.get_collections_overview( + ([str(ele) for ele in collection_ids] if collection_ids else None), + offset=offset, + limit=limit, ) @telemetry_event("GetDocumentsInCollection") async def documents_in_collection( self, collection_id: UUID, offset: int = 0, limit: int = 100 ) -> list[dict]: - return ( - await self.providers.database.relational.documents_in_collection( - collection_id, offset=offset, limit=limit - ) + return await self.providers.database.handle.documents_in_collection( + collection_id, offset=offset, limit=limit ) @telemetry_event("AddPrompt") diff --git a/py/core/pipes/kg/community_summary.py b/py/core/pipes/kg/community_summary.py index 54e40c4bd..19b756f14 100644 --- a/py/core/pipes/kg/community_summary.py +++ b/py/core/pipes/kg/community_summary.py @@ -1,7 +1,7 @@ -import random import asyncio import json import logging +import random import time from typing import Any, AsyncGenerator, Optional from uuid import UUID diff --git a/py/core/providers/database/handle.py b/py/core/providers/database/handle.py index 83cd93d4d..a0d929018 100644 --- a/py/core/providers/database/handle.py +++ b/py/core/providers/database/handle.py @@ -1,23 +1,21 @@ import logging +from typing import Optional import asyncpg -from core.base import RelationalDBProvider, CryptoProvider, DatabaseConfig -from core.providers.database.vector import VectorDBMixin +from core.base import CryptoProvider, DatabaseConfig from core.providers.database.base import DatabaseMixin from core.providers.database.collection import CollectionMixin from core.providers.database.document import DocumentMixin from core.providers.database.tokens import BlacklistedTokensMixin from core.providers.database.user import UserMixin -from typing import Optional - +from core.providers.database.vector import VectorDBMixin from shared.abstractions.vector import VectorQuantizationType logger = logging.getLogger() class PostgresDBHandle( - # RelationalDBProvider, DocumentMixin, CollectionMixin, BlacklistedTokensMixin, @@ -46,9 +44,7 @@ def _get_table_name(self, base_name: str) -> str: return f"{self.project_name}.{base_name}" async def initialize(self, pool: asyncpg.pool.Pool): - logger.info( - "Initializing `PostgresRelationalDBProvider` with connection pool." - ) + logger.info("Initializing `PostgresDBHandle` with connection pool.") self.pool = pool @@ -62,7 +58,7 @@ async def initialize(self, pool: asyncpg.pool.Pool): await self.initialize_vector_db() - logger.info("Successfully initialized `PostgresRelationalDBProvider`") + logger.info("Successfully initialized `PostgresDBHandle`") async def close(self): if self.pool: diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index 070988f15..f5c855e50 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -13,13 +13,10 @@ DatabaseConfig, DatabaseProvider, PostgresConfigurationSettings, - RelationalDBProvider, - VectorDBProvider, VectorQuantizationType, ) from .handle import PostgresDBHandle -from .vector import PostgresVectorDBProvider logger = logging.getLogger() @@ -145,6 +142,8 @@ def __init__( config.default_collection_description ) + self.handle: Optional[PostgresDBHandle] = None + def _get_table_name(self, base_name: str) -> str: return f"{self.project_name}.{base_name}" @@ -155,9 +154,8 @@ async def initialize(self): await shared_pool.initialize() self.handle = await self._initialize_handle(shared_pool) - # self.relational = await self._initialize_relational_db(shared_pool) - async def _initialize_handle(self) -> VectorDBProvider: + async def _initialize_handle(self) -> PostgresDBHandle: handle = PostgresDBHandle( self.config, connection_string=self.connection_string, @@ -168,16 +166,6 @@ async def _initialize_handle(self) -> VectorDBProvider: await handle.initialize() return handle - # async def _initialize_relational_db(self) -> RelationalDBProvider: - # relational_db = PostgresRelationalDBProvider( - # self.config, - # connection_string=self.connection_string, - # crypto_provider=self.crypto_provider, - # project_name=self.project_name, - # ) - # await relational_db.initialize() - # return relational_db - def _get_postgres_configuration_settings( self, config: DatabaseConfig ) -> PostgresConfigurationSettings: From c731df927758f7ce71b09bf0dcec3629c1268410 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Thu, 17 Oct 2024 17:42:23 -0700 Subject: [PATCH 11/27] iterate --- py/core/base/providers/database.py | 6 +-- py/core/main/services/auth_service.py | 4 +- py/core/main/services/management_service.py | 12 ++--- .../pipes/ingestion/vector_storage_pipe.py | 2 +- py/core/pipes/kg/triples_extraction.py | 2 +- py/core/pipes/retrieval/vector_search_pipe.py | 4 +- py/core/providers/auth/r2r_auth.py | 44 +++++++++---------- py/core/providers/database/base.py | 1 + py/core/providers/database/collection.py | 6 +-- py/core/providers/database/postgres.py | 8 ++-- py/core/providers/database/user.py | 2 +- py/core/providers/database/vector.py | 18 ++------ py/core/providers/kg/postgres.py | 2 +- 13 files changed, 49 insertions(+), 62 deletions(-) diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index dd500afc4..7f9baad0e 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -71,11 +71,9 @@ def supported_providers(self) -> list[str]: class DatabaseProvider(Provider): def __init__(self, config: DatabaseConfig): - if not isinstance(config, DatabaseConfig): - raise ValueError( - "DatabaseProvider must be initialized with a `DatabaseConfig`." - ) logger.info(f"Initializing DatabaseProvider with config {config}.") + + self.handle: Any = None # TODO - Type this properly, we later use it as a PostgresDBHandle super().__init__(config) @abstractmethod diff --git a/py/core/main/services/auth_service.py b/py/core/main/services/auth_service.py index 8b1f723e5..01fa6863c 100644 --- a/py/core/main/services/auth_service.py +++ b/py/core/main/services/auth_service.py @@ -158,9 +158,9 @@ async def delete_user( ) ): raise R2RException(status_code=400, message="Incorrect password") - await self.providers.database.handle.delete_user(user_id) + await self.providers.database.handle.delete_user_relational(user_id) if delete_vector_data: - self.providers.database.handle.delete_user(user_id) + self.providers.database.handle.delete_user_vector(user_id) return {"message": f"User account {user_id} deleted successfully."} diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index 5b402d113..aa2758b07 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -380,10 +380,10 @@ async def document_chunks( async def assign_document_to_collection( self, document_id: str, collection_id: UUID ): - await self.providers.database.handle.assign_document_to_collection( + await self.providers.database.handle.assign_document_to_collection_vector( document_id, collection_id ) - self.providers.database.handle.assign_document_to_collection( + await self.providers.database.handle.assign_document_to_collection_relational( document_id, collection_id ) return {"message": "Document assigned to collection successfully"} @@ -392,10 +392,10 @@ async def assign_document_to_collection( async def remove_document_from_collection( self, document_id: UUID, collection_id: UUID ): - await self.providers.database.handle.remove_document_from_collection( + await self.providers.database.handle.remove_document_from_collection_relational( document_id, collection_id ) - self.providers.database.handle.remove_document_from_collection( + await self.providers.database.handle.remove_document_from_collection_vector( document_id, collection_id ) await self.providers.kg.delete_node_via_document_id( @@ -525,8 +525,8 @@ async def update_collection( @telemetry_event("DeleteCollection") async def delete_collection(self, collection_id: UUID) -> bool: - await self.providers.database.handle.delete_collection(collection_id) - self.providers.database.handle.delete_collection(collection_id) + await self.providers.database.handle.delete_collection_relational(collection_id) + await self.providers.database.handle.delete_collection_vector(collection_id) return True @telemetry_event("ListCollections") diff --git a/py/core/pipes/ingestion/vector_storage_pipe.py b/py/core/pipes/ingestion/vector_storage_pipe.py index e57698881..2a3161ab1 100644 --- a/py/core/pipes/ingestion/vector_storage_pipe.py +++ b/py/core/pipes/ingestion/vector_storage_pipe.py @@ -51,7 +51,7 @@ async def store( """ try: - self.database_provider.vector.upsert_entries(vector_entries) + self.database_provider.handle.upsert_entries(vector_entries) except Exception as e: error_message = ( f"Failed to store vector entries in the database: {e}" diff --git a/py/core/pipes/kg/triples_extraction.py b/py/core/pipes/kg/triples_extraction.py index a24cb5787..d60b0467f 100644 --- a/py/core/pipes/kg/triples_extraction.py +++ b/py/core/pipes/kg/triples_extraction.py @@ -258,7 +258,7 @@ async def _run_logic( # type: ignore data=extraction["text"], metadata=extraction["metadata"], ) - for extraction in self.database_provider.vector.get_document_chunks( + for extraction in self.database_provider.handle.get_document_chunks( document_id=document_id )[ "results" diff --git a/py/core/pipes/retrieval/vector_search_pipe.py b/py/core/pipes/retrieval/vector_search_pipe.py index f12ba2ae3..d63769bc3 100644 --- a/py/core/pipes/retrieval/vector_search_pipe.py +++ b/py/core/pipes/retrieval/vector_search_pipe.py @@ -64,13 +64,13 @@ async def search( # type: ignore ) search_results = ( - self.database_provider.vector.hybrid_search( + self.database_provider.handle.hybrid_search( query_vector=query_vector, query_text=message, search_settings=search_settings, ) if search_settings.use_hybrid_search - else self.database_provider.vector.semantic_search( + else self.database_provider.handle.semantic_search( query_vector=query_vector, search_settings=search_settings, ) diff --git a/py/core/providers/auth/r2r_auth.py b/py/core/providers/auth/r2r_auth.py index 21afba66e..083ad9b07 100644 --- a/py/core/providers/auth/r2r_auth.py +++ b/py/core/providers/auth/r2r_auth.py @@ -56,7 +56,7 @@ async def initialize(self): user = await self.register( email=self.admin_email, password=self.admin_password ) - await self.db_provider.relational.mark_user_as_superuser(user.id) + await self.db_provider.handle.mark_user_as_superuser(user.id) except R2RException: logger.info("Default admin user already exists.") @@ -85,7 +85,7 @@ def create_refresh_token(self, data: dict) -> str: async def decode_token(self, token: str) -> TokenData: try: # First, check if the token is blacklisted - if await self.db_provider.relational.is_token_blacklisted(token): + if await self.db_provider.handle.is_token_blacklisted(token): raise R2RException( status_code=401, message="Token has been invalidated" ) @@ -114,7 +114,7 @@ async def decode_token(self, token: str) -> TokenData: async def user(self, token: str = Depends(oauth2_scheme)) -> UserResponse: token_data = await self.decode_token(token) - user = await self.db_provider.relational.get_user_by_email( + user = await self.db_provider.handle.get_user_by_email( token_data.email ) if user is None: @@ -132,16 +132,16 @@ def get_current_active_user( async def register(self, email: str, password: str) -> Dict[str, str]: # Create new user and give them a default collection - new_user = await self.db_provider.relational.create_user( + new_user = await self.db_provider.handle.create_user( email, password ) default_collection = ( - await self.db_provider.relational.create_default_collection( + await self.db_provider.handle.create_default_collection( new_user.id, ) ) - await self.db_provider.relational.add_user_to_collection( + await self.db_provider.handle.add_user_to_collection( new_user.id, default_collection.collection_id ) @@ -152,7 +152,7 @@ async def register(self, email: str, password: str) -> Dict[str, str]: ) expiry = datetime.now(timezone.utc) + timedelta(hours=24) - await self.db_provider.relational.store_verification_code( + await self.db_provider.handle.store_verification_code( new_user.id, verification_code, expiry ) new_user.verification_code_expiry = expiry @@ -160,10 +160,10 @@ async def register(self, email: str, password: str) -> Dict[str, str]: # self.providers.email.send_verification_email(new_user.email, verification_code) else: # Mark user as verified - await self.db_provider.relational.store_verification_code( + await self.db_provider.handle.store_verification_code( new_user.id, None, None ) - await self.db_provider.relational.mark_user_as_verified( + await self.db_provider.handle.mark_user_as_verified( new_user.id ) @@ -173,7 +173,7 @@ async def verify_email( self, email: str, verification_code: str ) -> dict[str, str]: user_id = ( - await self.db_provider.relational.get_user_id_by_verification_code( + await self.db_provider.handle.get_user_id_by_verification_code( verification_code ) ) @@ -181,8 +181,8 @@ async def verify_email( raise R2RException( status_code=400, message="Invalid or expired verification code" ) - await self.db_provider.relational.mark_user_as_verified(user_id) - await self.db_provider.relational.remove_verification_code( + await self.db_provider.handle.mark_user_as_verified(user_id) + await self.db_provider.handle.remove_verification_code( verification_code ) return {"message": "Email verified successfully"} @@ -191,7 +191,7 @@ async def login(self, email: str, password: str) -> Dict[str, Token]: logger = logging.getLogger() logger.debug(f"Attempting login for email: {email}") - user = await self.db_provider.relational.get_user_by_email(email) + user = await self.db_provider.handle.get_user_by_email(email) if not user: logger.warning(f"No user found for email: {email}") raise R2RException( @@ -245,7 +245,7 @@ async def refresh_access_token( ) # Invalidate the old refresh token and create a new one - await self.db_provider.relational.blacklist_token(refresh_token) + await self.db_provider.handle.blacklist_token(refresh_token) new_access_token = self.create_access_token( data={"sub": token_data.email} @@ -281,13 +281,13 @@ async def change_password( hashed_new_password = self.crypto_provider.get_password_hash( new_password ) - await self.db_provider.relational.update_user_password( + await self.db_provider.handle.update_user_password( user.id, hashed_new_password ) return {"message": "Password changed successfully"} async def request_password_reset(self, email: str) -> Dict[str, str]: - user = await self.db_provider.relational.get_user_by_email(email) + user = await self.db_provider.handle.get_user_by_email(email) if not user: # To prevent email enumeration, always return a success message return { @@ -296,7 +296,7 @@ async def request_password_reset(self, email: str) -> Dict[str, str]: reset_token = self.crypto_provider.generate_verification_code() expiry = datetime.now(timezone.utc) + timedelta(hours=1) - await self.db_provider.relational.store_reset_token( + await self.db_provider.handle.store_reset_token( user.id, reset_token, expiry ) @@ -308,7 +308,7 @@ async def request_password_reset(self, email: str) -> Dict[str, str]: async def confirm_password_reset( self, reset_token: str, new_password: str ) -> Dict[str, str]: - user_id = await self.db_provider.relational.get_user_id_by_reset_token( + user_id = await self.db_provider.handle.get_user_id_by_reset_token( reset_token ) if not user_id: @@ -319,16 +319,16 @@ async def confirm_password_reset( hashed_new_password = self.crypto_provider.get_password_hash( new_password ) - await self.db_provider.relational.update_user_password( + await self.db_provider.handle.update_user_password( user_id, hashed_new_password ) - await self.db_provider.relational.remove_reset_token(user_id) + await self.db_provider.handle.remove_reset_token(user_id) return {"message": "Password reset successfully"} async def logout(self, token: str) -> Dict[str, str]: # Add the token to a blacklist - await self.db_provider.relational.blacklist_token(token) + await self.db_provider.handle.blacklist_token(token) return {"message": "Logged out successfully"} async def clean_expired_blacklisted_tokens(self): - await self.db_provider.relational.clean_expired_blacklisted_tokens() + await self.db_provider.handle.clean_expired_blacklisted_tokens() diff --git a/py/core/providers/database/base.py b/py/core/providers/database/base.py index 3cf10b370..1834d4d48 100644 --- a/py/core/providers/database/base.py +++ b/py/core/providers/database/base.py @@ -72,6 +72,7 @@ def execute_query( self, query: Union[str, TextClause], params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, + isolation_level: Optional[str] = None, ): raise NotImplementedError("Subclasses must implement this method") diff --git a/py/core/providers/database/collection.py b/py/core/providers/database/collection.py index 6605a7ed2..834d45cfa 100644 --- a/py/core/providers/database/collection.py +++ b/py/core/providers/database/collection.py @@ -175,7 +175,7 @@ async def update_collection( updated_at=result["updated_at"], ) - async def delete_collection(self, collection_id: UUID) -> None: + async def delete_collection_relational(self, collection_id: UUID) -> None: async with self.pool.acquire() as conn: # type: ignore async with conn.transaction(): try: @@ -425,7 +425,7 @@ async def get_collections_for_user( return {"results": collections, "total_entries": total_entries} - async def assign_document_to_collection( + async def assign_document_to_collection_relational( self, document_id: UUID, collection_id: UUID, @@ -524,7 +524,7 @@ async def document_collections( return {"results": collections, "total_entries": total_entries} - async def remove_document_from_collection( + async def remove_document_from_collection_relational( self, document_id: UUID, collection_id: UUID ) -> None: """ diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index f5c855e50..d0a439a76 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -153,9 +153,6 @@ async def initialize(self): ) await shared_pool.initialize() - self.handle = await self._initialize_handle(shared_pool) - - async def _initialize_handle(self) -> PostgresDBHandle: handle = PostgresDBHandle( self.config, connection_string=self.connection_string, @@ -163,8 +160,9 @@ async def _initialize_handle(self) -> PostgresDBHandle: dimension=self.vector_db_dimension, quantization_type=self.vector_db_quantization_type, ) - await handle.initialize() - return handle + await handle.initialize(shared_pool) + + self.handle = handle def _get_postgres_configuration_settings( self, config: DatabaseConfig diff --git a/py/core/providers/database/user.py b/py/core/providers/database/user.py index e315e17ec..0af7c2a7c 100644 --- a/py/core/providers/database/user.py +++ b/py/core/providers/database/user.py @@ -198,7 +198,7 @@ async def update_user(self, user: UserResponse) -> UserResponse: collection_ids=result["collection_ids"], ) - async def delete_user(self, user_id: UUID) -> None: + async def delete_user_relational(self, user_id: UUID) -> None: # Get the collections the user belongs to collection_query = f""" SELECT collection_ids FROM {self._get_table_name('users')} diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index dc0c27844..472374270 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -309,7 +309,7 @@ async def delete( for result in results } - async def assign_document_to_collection( + async def assign_document_to_collection_vector( self, document_id: str, collection_id: str ) -> None: query = f""" @@ -319,7 +319,7 @@ async def assign_document_to_collection( """ await self.execute_query(query, (collection_id, document_id)) - async def remove_document_from_collection( + async def remove_document_from_collection_vector( self, document_id: str, collection_id: str ) -> None: query = f""" @@ -329,24 +329,14 @@ async def remove_document_from_collection( """ await self.execute_query(query, (collection_id, document_id)) - async def remove_collection_from_documents( - self, collection_id: str - ) -> None: - query = f""" - UPDATE {self.project_name}.vectors - SET collection_ids = array_remove(collection_ids, $1) - WHERE $1 = ANY(collection_ids); - """ - await self.execute_query(query, (collection_id,)) - - async def delete_user(self, user_id: str) -> None: + async def delete_user_vector(self, user_id: str) -> None: query = f""" DELETE FROM {self.project_name}.vectors WHERE user_id = $1; """ await self.execute_query(query, (user_id,)) - async def delete_collection(self, collection_id: str) -> None: + async def delete_collection_vector(self, collection_id: str) -> None: query = f""" DELETE FROM {self.project_name}.vectors WHERE $1 = ANY(collection_ids); diff --git a/py/core/providers/kg/postgres.py b/py/core/providers/kg/postgres.py index 48c3da91e..df1af07ac 100644 --- a/py/core/providers/kg/postgres.py +++ b/py/core/providers/kg/postgres.py @@ -40,7 +40,7 @@ def __init__( ) -> None: super().__init__(config, *args, **kwargs) - self.db_provider = db_provider.relational + self.db_provider = db_provider.handle self.embedding_provider = embedding_provider try: From a32505e4d59363cb7bdfdddf890e53d6bbb98819 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Thu, 17 Oct 2024 17:54:03 -0700 Subject: [PATCH 12/27] up --- py/core/providers/database/vector.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 472374270..0e8bba9ea 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -512,3 +512,23 @@ async def create_index( raise Exception(f"Failed to create index: {e}") return None + + def _get_index_options( + self, + method: IndexMethod, + index_arguments: Optional[Union[IndexArgsIVFFlat, IndexArgsHNSW]], + ) -> str: + if method == IndexMethod.ivfflat: + if isinstance(index_arguments, IndexArgsIVFFlat): + return f"WITH (lists={index_arguments.n_lists})" + else: + # Default value if no arguments provided + return "WITH (lists=100)" + elif method == IndexMethod.hnsw: + if isinstance(index_arguments, IndexArgsHNSW): + return f"WITH (m={index_arguments.m}, ef_construction={index_arguments.ef_construction})" + else: + # Default values if no arguments provided + return "WITH (m=16, ef_construction=64)" + else: + return "" # No options for other methods From f0579078e794ea00ef7b4588f5285b025181fed6 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Fri, 18 Oct 2024 08:46:41 -0700 Subject: [PATCH 13/27] checkin --- py/core/base/providers/database.py | 6 ++- py/core/main/services/management_service.py | 8 ++- py/core/providers/auth/r2r_auth.py | 8 +-- py/core/providers/database/base.py | 3 ++ py/core/providers/database/handle.py | 6 +-- py/core/providers/database/postgres.py | 6 +-- py/core/providers/database/vector.py | 55 +++++++++------------ py/core/providers/kg/postgres.py | 6 --- 8 files changed, 44 insertions(+), 54 deletions(-) diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index 7f9baad0e..e071a6635 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -72,8 +72,10 @@ def supported_providers(self) -> list[str]: class DatabaseProvider(Provider): def __init__(self, config: DatabaseConfig): logger.info(f"Initializing DatabaseProvider with config {config}.") - - self.handle: Any = None # TODO - Type this properly, we later use it as a PostgresDBHandle + + self.handle: Any = ( + None # TODO - Type this properly, we later use it as a PostgresHandle + ) super().__init__(config) @abstractmethod diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index aa2758b07..51a707486 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -525,8 +525,12 @@ async def update_collection( @telemetry_event("DeleteCollection") async def delete_collection(self, collection_id: UUID) -> bool: - await self.providers.database.handle.delete_collection_relational(collection_id) - await self.providers.database.handle.delete_collection_vector(collection_id) + await self.providers.database.handle.delete_collection_relational( + collection_id + ) + await self.providers.database.handle.delete_collection_vector( + collection_id + ) return True @telemetry_event("ListCollections") diff --git a/py/core/providers/auth/r2r_auth.py b/py/core/providers/auth/r2r_auth.py index 083ad9b07..8f44addb0 100644 --- a/py/core/providers/auth/r2r_auth.py +++ b/py/core/providers/auth/r2r_auth.py @@ -132,9 +132,7 @@ def get_current_active_user( async def register(self, email: str, password: str) -> Dict[str, str]: # Create new user and give them a default collection - new_user = await self.db_provider.handle.create_user( - email, password - ) + new_user = await self.db_provider.handle.create_user(email, password) default_collection = ( await self.db_provider.handle.create_default_collection( new_user.id, @@ -163,9 +161,7 @@ async def register(self, email: str, password: str) -> Dict[str, str]: await self.db_provider.handle.store_verification_code( new_user.id, None, None ) - await self.db_provider.handle.mark_user_as_verified( - new_user.id - ) + await self.db_provider.handle.mark_user_as_verified(new_user.id) return new_user diff --git a/py/core/providers/database/base.py b/py/core/providers/database/base.py index 1834d4d48..70ac56a12 100644 --- a/py/core/providers/database/base.py +++ b/py/core/providers/database/base.py @@ -76,6 +76,9 @@ def execute_query( ): raise NotImplementedError("Subclasses must implement this method") + async def execute_many(self, query, params=None, batch_size=1000): + raise NotImplementedError("Subclasses must implement this method") + def fetch_query( self, query: Union[str, TextClause], diff --git a/py/core/providers/database/handle.py b/py/core/providers/database/handle.py index a0d929018..e97e47bdf 100644 --- a/py/core/providers/database/handle.py +++ b/py/core/providers/database/handle.py @@ -15,7 +15,7 @@ logger = logging.getLogger() -class PostgresDBHandle( +class PostgresHandle( DocumentMixin, CollectionMixin, BlacklistedTokensMixin, @@ -44,7 +44,7 @@ def _get_table_name(self, base_name: str) -> str: return f"{self.project_name}.{base_name}" async def initialize(self, pool: asyncpg.pool.Pool): - logger.info("Initializing `PostgresDBHandle` with connection pool.") + logger.info("Initializing `PostgresHandle` with connection pool.") self.pool = pool @@ -58,7 +58,7 @@ async def initialize(self, pool: asyncpg.pool.Pool): await self.initialize_vector_db() - logger.info("Successfully initialized `PostgresDBHandle`") + logger.info("Successfully initialized `PostgresHandle`") async def close(self): if self.pool: diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index d0a439a76..598f0e83a 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -16,7 +16,7 @@ VectorQuantizationType, ) -from .handle import PostgresDBHandle +from .handle import PostgresHandle logger = logging.getLogger() @@ -142,7 +142,7 @@ def __init__( config.default_collection_description ) - self.handle: Optional[PostgresDBHandle] = None + self.handle: Optional[PostgresHandle] = None def _get_table_name(self, base_name: str) -> str: return f"{self.project_name}.{base_name}" @@ -153,7 +153,7 @@ async def initialize(self): ) await shared_pool.initialize() - handle = PostgresDBHandle( + handle = PostgresHandle( self.config, connection_string=self.connection_string, project_name=self.project_name, diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 0e8bba9ea..8f610318d 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -16,7 +16,7 @@ VectorTableName, ) -from .base import DatabaseMixin, QueryBuilder +from .base import DatabaseMixin from .vecs.exc import ArgError logger = logging.getLogger() @@ -30,15 +30,18 @@ def index_measure_to_ops( class VectorDBMixin(DatabaseMixin): + COLUMN_NAME = "vecs" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self.project_name = kwargs.get("project_name") self.dimension = kwargs.get("dimension") self.quantization_type = kwargs.get("quantization_type") async def initialize_vector_db(self): # Create the vector table if it doesn't exist query = f""" - CREATE TABLE IF NOT EXISTS {self.project_name}.vectors ( + CREATE TABLE IF NOT EXISTS {self.project_name}.{VectorDBMixin.COLUMN_NAME} ( extraction_id TEXT PRIMARY KEY, document_id TEXT, user_id TEXT, @@ -47,16 +50,16 @@ async def initialize_vector_db(self): text TEXT, metadata JSONB ); - CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self.project_name}.vectors (document_id); - CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self.project_name}.vectors (user_id); - CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self.project_name}.vectors USING GIN (collection_ids); - CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self.project_name}.vectors USING GIN (to_tsvector('english', text)); + CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self.project_name}.{VectorDBMixin.COLUMN_NAME} (document_id); + CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self.project_name}.{VectorDBMixin.COLUMN_NAME} (user_id); + CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self.project_name}.{VectorDBMixin.COLUMN_NAME} USING GIN (collection_ids); + CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self.project_name}.{VectorDBMixin.COLUMN_NAME} USING GIN (to_tsvector('english', text)); """ await self.execute_query(query) async def upsert(self, entry: VectorEntry) -> None: query = f""" - INSERT INTO {self.project_name}.vectors + INSERT INTO {self.project_name}.{VectorDBMixin.COLUMN_NAME} (extraction_id, document_id, user_id, collection_ids, vector, text, metadata) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (extraction_id) DO UPDATE @@ -77,7 +80,7 @@ async def upsert(self, entry: VectorEntry) -> None: async def upsert_entries(self, entries: list[VectorEntry]) -> None: query = f""" - INSERT INTO {self.project_name}.vectors + INSERT INTO {self.project_name}.{VectorDBMixin.COLUMN_NAME} (extraction_id, document_id, user_id, collection_ids, vector, text, metadata) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (extraction_id) DO UPDATE @@ -103,7 +106,7 @@ async def semantic_search( query = f""" SELECT extraction_id, document_id, user_id, collection_ids, text, 1 - (vector <=> $1::vector) as similarity, metadata - FROM {self.project_name}.vectors + FROM {self.project_name}.{VectorDBMixin.COLUMN_NAME} WHERE collection_ids && $2 ORDER BY similarity DESC LIMIT $3 OFFSET $4; @@ -138,7 +141,7 @@ async def full_text_search( SELECT extraction_id, document_id, user_id, collection_ids, text, ts_rank_cd(to_tsvector('english', text), plainto_tsquery('english', $1)) as rank, metadata - FROM {self.project_name}.vectors + FROM {self.project_name}.{VectorDBMixin.COLUMN_NAME} WHERE collection_ids && $2 AND to_tsvector('english', text) @@ plainto_tsquery('english', $1) ORDER BY rank DESC LIMIT $3 OFFSET $4; @@ -299,7 +302,7 @@ async def delete( where_clause = " AND ".join(conditions) query = f""" - DELETE FROM {self.project_name}.vectors + DELETE FROM {self.project_name}.{VectorDBMixin.COLUMN_NAME} WHERE {where_clause} RETURNING extraction_id; """ @@ -313,7 +316,7 @@ async def assign_document_to_collection_vector( self, document_id: str, collection_id: str ) -> None: query = f""" - UPDATE {self.project_name}.vectors + UPDATE {self.project_name}.{VectorDBMixin.COLUMN_NAME} SET collection_ids = array_append(collection_ids, $1) WHERE document_id = $2 AND NOT ($1 = ANY(collection_ids)); """ @@ -323,7 +326,7 @@ async def remove_document_from_collection_vector( self, document_id: str, collection_id: str ) -> None: query = f""" - UPDATE {self.project_name}.vectors + UPDATE {self.project_name}.{VectorDBMixin.COLUMN_NAME} SET collection_ids = array_remove(collection_ids, $1) WHERE document_id = $2; """ @@ -331,14 +334,14 @@ async def remove_document_from_collection_vector( async def delete_user_vector(self, user_id: str) -> None: query = f""" - DELETE FROM {self.project_name}.vectors + DELETE FROM {self.project_name}.{VectorDBMixin.COLUMN_NAME} WHERE user_id = $1; """ await self.execute_query(query, (user_id,)) async def delete_collection_vector(self, collection_id: str) -> None: query = f""" - DELETE FROM {self.project_name}.vectors + DELETE FROM {self.project_name}.{VectorDBMixin.COLUMN_NAME} WHERE $1 = ANY(collection_ids); """ await self.execute_query(query, (collection_id,)) @@ -356,7 +359,7 @@ async def get_document_chunks( query = f""" SELECT extraction_id, document_id, user_id, collection_ids, text, metadata {vector_select} - FROM {self.project_name}.vectors + FROM {self.project_name}.{VectorDBMixin.COLUMN_NAME} WHERE document_id = $1 OFFSET $2 {limit_clause}; @@ -428,17 +431,13 @@ async def create_index( """ if table_name == VectorTableName.CHUNKS: - table_name = f"{self.client.project_name}.{self.table.name}" + table_name = f"{self.project_name}.{self.table.name}" col_name = "vec" elif table_name == VectorTableName.ENTITIES: - table_name = ( - f"{self.client.project_name}.{VectorTableName.ENTITIES}" - ) + table_name = f"{self.project_name}.{VectorTableName.ENTITIES}" col_name = "description_embedding" elif table_name == VectorTableName.COMMUNITIES: - table_name = ( - f"{self.client.project_name}.{VectorTableName.COMMUNITIES}" - ) + table_name = f"{self.project_name}.{VectorTableName.COMMUNITIES}" col_name = "embedding" else: raise ArgError("invalid table name") @@ -471,15 +470,7 @@ async def create_index( ) if method == IndexMethod.auto: - if self.client._supports_hnsw(): - method = IndexMethod.hnsw - else: - method = IndexMethod.ivfflat - - if method == IndexMethod.hnsw and not self.client._supports_hnsw(): - raise ArgError( - "HNSW Unavailable. Upgrade your pgvector installation to > 0.5.0 to enable HNSW support" - ) + method = IndexMethod.hnsw ops = index_measure_to_ops( measure, quantization_type=self.quantization_type diff --git a/py/core/providers/kg/postgres.py b/py/core/providers/kg/postgres.py index df1af07ac..af8d12f32 100644 --- a/py/core/providers/kg/postgres.py +++ b/py/core/providers/kg/postgres.py @@ -222,12 +222,6 @@ async def add_entities( Returns: result: asyncpg.Record: result of the upsert operation """ - for entity in entities: - if entity.description_embedding is not None: - entity.description_embedding = str( - entity.description_embedding - ) - return await self._add_objects(entities, table_name) async def add_triples( From 681d276604122dbf4320841e17a30be90dd55903 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Fri, 18 Oct 2024 11:11:59 -0700 Subject: [PATCH 14/27] up --- py/core/__init__.py | 1 + py/core/base/__init__.py | 1 + py/core/providers/database/base.py | 43 ++- py/core/providers/database/collection.py | 4 +- py/core/providers/database/document.py | 2 +- py/core/providers/database/handle.py | 48 ++- py/core/providers/database/postgres.py | 45 +-- py/core/providers/database/vecs/__init__.py | 13 - py/core/providers/database/vector.py | 348 ++++++++++++++++---- py/core/providers/file/postgres.py | 42 +-- py/core/providers/prompts/r2r_prompts.py | 16 +- py/r2r/__init__.py | 18 +- py/shared/abstractions/search.py | 6 +- 13 files changed, 396 insertions(+), 191 deletions(-) diff --git a/py/core/__init__.py b/py/core/__init__.py index 36cafc02a..7a6ced494 100644 --- a/py/core/__init__.py +++ b/py/core/__init__.py @@ -80,6 +80,7 @@ "KGSearchSettings", "VectorSearchResult", "VectorSearchSettings", + "HybridSearchSettings", # User abstractions "Token", "TokenData", diff --git a/py/core/base/__init__.py b/py/core/base/__init__.py index aa264a735..daa0e84de 100644 --- a/py/core/base/__init__.py +++ b/py/core/base/__init__.py @@ -48,6 +48,7 @@ "KGSearchSettings", "VectorSearchResult", "VectorSearchSettings", + "HybridSearchSettings", # KG abstractions "KGCreationSettings", "KGEnrichmentSettings", diff --git a/py/core/providers/database/base.py b/py/core/providers/database/base.py index 70ac56a12..a915741b4 100644 --- a/py/core/providers/database/base.py +++ b/py/core/providers/database/base.py @@ -1,7 +1,48 @@ -from typing import Any, Optional, Sequence, Union +import asyncio +import logging +from contextlib import asynccontextmanager +from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Union +import asyncpg from sqlalchemy import TextClause, text +if TYPE_CHECKING: + from core.providers.database.handle import PostgresHandle + + +logger = logging.getLogger() + + +class SemaphoreConnectionPool: + def __init__(self, connection_string, postgres_configuration_settings): + self.connection_string = connection_string + self.postgres_configuration_settings = postgres_configuration_settings + + async def initialize(self): + try: + self.semaphore = asyncio.Semaphore( + int(self.postgres_configuration_settings.max_connections * 0.9) + ) + + self.pool = await asyncpg.create_pool( + self.connection_string, + max_size=self.postgres_configuration_settings.max_connections, + ) + + logger.info( + "Successfully connected to Postgres database and created connection pool." + ) + except Exception as e: + raise ValueError( + f"Error {e} occurred while attempting to connect to relational database." + ) from e + + @asynccontextmanager + async def get_connection(self): + async with self.semaphore: + async with self.pool.acquire() as conn: + yield conn + class QueryBuilder: def __init__(self, table_name: str): diff --git a/py/core/providers/database/collection.py b/py/core/providers/database/collection.py index 834d45cfa..6c79b6173 100644 --- a/py/core/providers/database/collection.py +++ b/py/core/providers/database/collection.py @@ -86,7 +86,7 @@ async def create_collection( ] try: - async with self.pool.acquire() as conn: # type: ignore + async with self.pool.get_connection() as conn: # type: ignore row = await conn.fetchrow(query, *params) if not row: @@ -176,7 +176,7 @@ async def update_collection( ) async def delete_collection_relational(self, collection_id: UUID) -> None: - async with self.pool.acquire() as conn: # type: ignore + async with self.pool.get_connection() as conn: # type: ignore async with conn.transaction(): try: # Remove collection_id from users diff --git a/py/core/providers/database/document.py b/py/core/providers/database/document.py index 96eaa384c..7bf917b65 100644 --- a/py/core/providers/database/document.py +++ b/py/core/providers/database/document.py @@ -87,7 +87,7 @@ async def upsert_documents_overview( retries = 0 while retries < max_retries: try: - async with self.pool.acquire() as conn: # type: ignore + async with self.pool.get_connection() as conn: # type: ignore async with conn.transaction(): # Lock the row for update check_query = f""" diff --git a/py/core/providers/database/handle.py b/py/core/providers/database/handle.py index e97e47bdf..456dc0a4e 100644 --- a/py/core/providers/database/handle.py +++ b/py/core/providers/database/handle.py @@ -1,10 +1,11 @@ -import logging from typing import Optional -import asyncpg - from core.base import CryptoProvider, DatabaseConfig -from core.providers.database.base import DatabaseMixin +from core.providers.database.base import ( + DatabaseMixin, + SemaphoreConnectionPool, + logger, +) from core.providers.database.collection import CollectionMixin from core.providers.database.document import DocumentMixin from core.providers.database.tokens import BlacklistedTokensMixin @@ -12,8 +13,6 @@ from core.providers.database.vector import VectorDBMixin from shared.abstractions.vector import VectorQuantizationType -logger = logging.getLogger() - class PostgresHandle( DocumentMixin, @@ -30,42 +29,52 @@ def __init__( project_name: str, dimension: int, quantization_type: Optional[VectorQuantizationType] = None, + pool_size: int = 10, + max_retries: int = 3, + retry_delay: int = 1, ): - super().__init__(config) self.config = config self.connection_string = connection_string self.crypto_provider = crypto_provider self.project_name = project_name self.dimension = dimension self.quantization_type = quantization_type - self.pool = None + self.pool_size = pool_size + self.max_retries = max_retries + self.retry_delay = retry_delay + self.pool: Optional[SemaphoreConnectionPool] = None def _get_table_name(self, base_name: str) -> str: return f"{self.project_name}.{base_name}" - async def initialize(self, pool: asyncpg.pool.Pool): - logger.info("Initializing `PostgresHandle` with connection pool.") - + async def initialize(self, pool: SemaphoreConnectionPool): + logger.info("Initializing `PostgresDBHandle`.") self.pool = pool async with self.pool.get_connection() as conn: - await conn.execute(f'CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') + await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') + await conn.execute("CREATE EXTENSION IF NOT EXISTS vector;") + await conn.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;") + await conn.execute("CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;") + + # Create schema if it doesn't exist + await conn.execute( + f'CREATE SCHEMA IF NOT EXISTS "{self.project_name}";' + ) # Call create_table for each mixin for base_class in self.__class__.__bases__: if issubclass(base_class, DatabaseMixin): await base_class.create_table(self) - await self.initialize_vector_db() - - logger.info("Successfully initialized `PostgresHandle`") + logger.info("Successfully initialized `PostgresDBHandle`") async def close(self): if self.pool: await self.pool.close() async def execute_query(self, query, params=None, isolation_level=None): - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: if isolation_level: async with conn.transaction(isolation=isolation_level): if params: @@ -104,3 +113,10 @@ async def fetchrow_query(self, query, params=None): return await conn.fetchrow(query, *params) else: return await conn.fetchrow(query) + + async def __aenter__(self): + await self.initialize() + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.close() diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index 598f0e83a..e9443da97 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -1,13 +1,9 @@ # TODO: Clean this up and make it more congruent across the vector database and the relational database. -import asyncio import logging import os import warnings -from contextlib import asynccontextmanager from typing import Any, Optional -import asyncpg - from core.base import ( CryptoProvider, DatabaseConfig, @@ -16,6 +12,7 @@ VectorQuantizationType, ) +from .base import SemaphoreConnectionPool from .handle import PostgresHandle logger = logging.getLogger() @@ -30,37 +27,6 @@ def get_env_var(new_var, old_var, config_value): return value -class SemaphoreConnectionPool(asyncpg.Pool): - def __init__(self, connection_string, postgres_configuration_settings): - self.connection_string = connection_string - self.postgres_configuration_settings = postgres_configuration_settings - - async def initialize(self): - try: - self.semaphore = asyncio.Semaphore( - int(self.postgres_configuration_settings.max_connections * 0.9) - ) - - self.pool = await asyncpg.create_pool( - self.connection_string, - max_size=self.postgres_configuration_settings.max_connections, - ) - - logger.info( - "Successfully connected to Postgres database and created connection pool." - ) - except Exception as e: - raise ValueError( - f"Error {e} occurred while attempting to connect to relational database." - ) from e - - @asynccontextmanager - async def get_connection(self): - async with self.semaphore: - async with self.pool.acquire() as conn: - yield conn - - class PostgresDBProvider(DatabaseProvider): user: str password: str @@ -148,20 +114,21 @@ def _get_table_name(self, base_name: str) -> str: return f"{self.project_name}.{base_name}" async def initialize(self): - shared_pool = SemaphoreConnectionPool( + pool = SemaphoreConnectionPool( self.connection_string, self.postgres_configuration_settings ) - await shared_pool.initialize() + await pool.initialize() handle = PostgresHandle( self.config, connection_string=self.connection_string, + crypto_provider=self.crypto_provider, project_name=self.project_name, dimension=self.vector_db_dimension, quantization_type=self.vector_db_quantization_type, ) - await handle.initialize(shared_pool) - + await handle.initialize(pool) + self.pool = pool self.handle = handle def _get_postgres_configuration_settings( diff --git a/py/core/providers/database/vecs/__init__.py b/py/core/providers/database/vecs/__init__.py index c4fa0b6d3..ba49390f5 100644 --- a/py/core/providers/database/vecs/__init__.py +++ b/py/core/providers/database/vecs/__init__.py @@ -1,18 +1,5 @@ from . import exc -from .client import Client -from .vector_collection import VectorCollection - -__project__ = "vecs" -__version__ = "0.4.2" - __all__ = [ - "VectorCollection", - "Client", "exc", ] - - -def create_client(connection_string: str, *args, **kwargs) -> Client: - """Creates a client from a Postgres connection string""" - return Client(connection_string, *args, **kwargs) diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 8f610318d..5e5b63a12 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -2,7 +2,8 @@ import json import logging import time -from typing import Any, Optional, Union +import uuid +from typing import Any, Optional, Tuple, TypedDict, Union from sqlalchemy import text @@ -17,7 +18,7 @@ ) from .base import DatabaseMixin -from .vecs.exc import ArgError +from .vecs.exc import ArgError, FilterError logger = logging.getLogger() from shared.utils import _decorate_vector_type @@ -29,8 +30,21 @@ def index_measure_to_ops( return _decorate_vector_type(measure.ops, quantization_type) +class HybridSearchIntermediateResult(TypedDict): + semantic_rank: int + full_text_rank: int + data: VectorSearchResult + rrf_score: float + + class VectorDBMixin(DatabaseMixin): COLUMN_NAME = "vecs" + COLUMN_VARS = [ + "extraction_id", + "document_id", + "user_id", + "collection_ids", + ] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -38,10 +52,10 @@ def __init__(self, *args, **kwargs): self.dimension = kwargs.get("dimension") self.quantization_type = kwargs.get("quantization_type") - async def initialize_vector_db(self): + async def create_table(self): # Create the vector table if it doesn't exist query = f""" - CREATE TABLE IF NOT EXISTS {self.project_name}.{VectorDBMixin.COLUMN_NAME} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name(VectorDBMixin.COLUMN_NAME)} ( extraction_id TEXT PRIMARY KEY, document_id TEXT, user_id TEXT, @@ -50,20 +64,25 @@ async def initialize_vector_db(self): text TEXT, metadata JSONB ); - CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self.project_name}.{VectorDBMixin.COLUMN_NAME} (document_id); - CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self.project_name}.{VectorDBMixin.COLUMN_NAME} (user_id); - CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self.project_name}.{VectorDBMixin.COLUMN_NAME} USING GIN (collection_ids); - CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self.project_name}.{VectorDBMixin.COLUMN_NAME} USING GIN (to_tsvector('english', text)); + CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self._get_table_name(VectorDBMixin.COLUMN_NAME)} (document_id); + CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self._get_table_name(VectorDBMixin.COLUMN_NAME)} (user_id); + CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self._get_table_name(VectorDBMixin.COLUMN_NAME)} USING GIN (collection_ids); + CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(VectorDBMixin.COLUMN_NAME)} USING GIN (to_tsvector('english', text)); """ await self.execute_query(query) async def upsert(self, entry: VectorEntry) -> None: query = f""" - INSERT INTO {self.project_name}.{VectorDBMixin.COLUMN_NAME} - (extraction_id, document_id, user_id, collection_ids, vector, text, metadata) + INSERT INTO {self._get_table_name(VectorDBMixin.COLUMN_NAME)} + (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (extraction_id) DO UPDATE - SET document_id = $2, user_id = $3, collection_ids = $4, vector = $5, text = $6, metadata = $7; + ON CONFLICT (extraction_id) DO UPDATE SET + document_id = EXCLUDED.document_id, + user_id = EXCLUDED.user_id, + collection_ids = EXCLUDED.collection_ids, + vec = EXCLUDED.vec, + text = EXCLUDED.text, + metadata = EXCLUDED.metadata; """ await self.execute_query( query, @@ -80,11 +99,16 @@ async def upsert(self, entry: VectorEntry) -> None: async def upsert_entries(self, entries: list[VectorEntry]) -> None: query = f""" - INSERT INTO {self.project_name}.{VectorDBMixin.COLUMN_NAME} - (extraction_id, document_id, user_id, collection_ids, vector, text, metadata) + INSERT INTO {self._get_table_name(VectorDBMixin.COLUMN_NAME)} + (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (extraction_id) DO UPDATE - SET document_id = $2, user_id = $3, collection_ids = $4, vector = $5, text = $6, metadata = $7; + ON CONFLICT (extraction_id) DO UPDATE SET + document_id = EXCLUDED.document_id, + user_id = EXCLUDED.user_id, + collection_ids = EXCLUDED.collection_ids, + vec = EXCLUDED.vec, + text = EXCLUDED.text, + metadata = EXCLUDED.metadata; """ params = [ ( @@ -103,23 +127,59 @@ async def upsert_entries(self, entries: list[VectorEntry]) -> None: async def semantic_search( self, query_vector: list[float], search_settings: VectorSearchSettings ) -> list[VectorSearchResult]: + try: + imeasure_obj = IndexMeasure(search_settings.index_measure) + except ValueError: + raise ValueError("Invalid index measure") + + distance_func = self._get_distance_function(imeasure_obj) + + cols = [ + "extraction_id", + "document_id", + "user_id", + "collection_ids", + "text", + ] + + if search_settings.include_values: + cols.append(f"{distance_func}(vec, $1::vector) as distance") + + if search_settings.include_metadatas: + cols.append("metadata") + + select_clause = ", ".join(cols) + + where_clause = "TRUE" + if search_settings.filters: + where_clause = self._build_filters(search_settings.filters) + query = f""" - SELECT extraction_id, document_id, user_id, collection_ids, text, - 1 - (vector <=> $1::vector) as similarity, metadata - FROM {self.project_name}.{VectorDBMixin.COLUMN_NAME} - WHERE collection_ids && $2 - ORDER BY similarity DESC - LIMIT $3 OFFSET $4; + SELECT {select_clause} + FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME) +} + WHERE {where_clause} + ORDER BY {distance_func}(vec, $1::vector) + OFFSET $2 + LIMIT $3 """ - results = await self.fetch_query( - query, - ( - query_vector, - search_settings.collection_ids, - search_settings.search_limit, - search_settings.offset, - ), + + params = [ + query_vector, + search_settings.offset, + search_settings.search_limit, + ] + + # Set index-specific session parameters + await self.execute_query( + "SET LOCAL ivfflat.probes = $1", [search_settings.probes] ) + await self.execute_query( + "SET LOCAL hnsw.ef_search = $1", + [max(search_settings.ef_search, search_settings.search_limit)], + ) + + results = await self.fetch_query(query, params) return [ VectorSearchResult( @@ -128,7 +188,7 @@ async def semantic_search( user_id=result["user_id"], collection_ids=result["collection_ids"], text=result["text"], - score=float(result["similarity"]), + score=float(result["rank"]), metadata=result["metadata"], ) for result in results @@ -141,7 +201,7 @@ async def full_text_search( SELECT extraction_id, document_id, user_id, collection_ids, text, ts_rank_cd(to_tsvector('english', text), plainto_tsquery('english', $1)) as rank, metadata - FROM {self.project_name}.{VectorDBMixin.COLUMN_NAME} + FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} WHERE collection_ids && $2 AND to_tsvector('english', text) @@ plainto_tsquery('english', $1) ORDER BY rank DESC LIMIT $3 OFFSET $4; @@ -150,7 +210,7 @@ async def full_text_search( query, ( query_text, - search_settings.collection_ids, + search_settings.selected_collection_ids, search_settings.search_limit, search_settings.offset, ), @@ -197,11 +257,11 @@ async def hybrid_search( search_settings.offset ) - semantic_results = await self.semantic_search( - query_vector, semantic_settings + semantic_results: list[VectorSearchResult] = ( + await self.semantic_search(query_vector, semantic_settings) ) - full_text_results = await self.full_text_search( - query_text, full_text_settings + full_text_results: list[VectorSearchResult] = ( + await self.full_text_search(query_text, full_text_settings) ) semantic_limit = search_settings.search_limit @@ -216,14 +276,15 @@ async def hybrid_search( ) rrf_k = search_settings.hybrid_search_settings.rrf_k - combined_results = { - result.extraction_id: { + combined_results: dict[uuid.UUID, HybridSearchIntermediateResult] = {} + + for rank, result in enumerate(semantic_results, 1): + combined_results[result.extraction_id] = { "semantic_rank": rank, "full_text_rank": full_text_limit, "data": result, + "rrf_score": 0.0, # Initialize with 0, will be calculated later } - for rank, result in enumerate(semantic_results, 1) - } for rank, result in enumerate(full_text_results, 1): if result.extraction_id in combined_results: @@ -233,6 +294,7 @@ async def hybrid_search( "semantic_rank": semantic_limit, "full_text_rank": rank, "data": result, + "rrf_score": 0.0, # Initialize with 0, will be calculated later } combined_results = { @@ -242,10 +304,10 @@ async def hybrid_search( and v["full_text_rank"] <= full_text_limit * 2 } - for result in combined_results.values(): - semantic_score = 1 / (rrf_k + result["semantic_rank"]) - full_text_score = 1 / (rrf_k + result["full_text_rank"]) - result["rrf_score"] = ( + for hyb_result in combined_results.values(): + semantic_score = 1 / (rrf_k + hyb_result["semantic_rank"]) + full_text_score = 1 / (rrf_k + hyb_result["full_text_rank"]) + hyb_result["rrf_score"] = ( semantic_score * semantic_weight + full_text_score * full_text_weight ) / (semantic_weight + full_text_weight) @@ -277,20 +339,6 @@ async def hybrid_search( for result in offset_results ] - async def create_index( - self, - table_name: Optional[VectorTableName] = None, - index_method: IndexMethod = IndexMethod.hnsw, - measure: IndexMeasure = IndexMeasure.cosine_distance, - index_arguments: Optional[ - Union[IndexArgsHNSW, IndexArgsIVFFlat] - ] = None, - index_name: Optional[str] = None, - concurrently: bool = True, - ): - # This method needs to be implemented based on your specific indexing requirements - pass - async def delete( self, filters: dict[str, Any] ) -> dict[str, dict[str, str]]: @@ -302,7 +350,7 @@ async def delete( where_clause = " AND ".join(conditions) query = f""" - DELETE FROM {self.project_name}.{VectorDBMixin.COLUMN_NAME} + DELETE FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} WHERE {where_clause} RETURNING extraction_id; """ @@ -316,7 +364,7 @@ async def assign_document_to_collection_vector( self, document_id: str, collection_id: str ) -> None: query = f""" - UPDATE {self.project_name}.{VectorDBMixin.COLUMN_NAME} + UPDATE {self._get_table_name(VectorDBMixin.COLUMN_NAME)} SET collection_ids = array_append(collection_ids, $1) WHERE document_id = $2 AND NOT ($1 = ANY(collection_ids)); """ @@ -326,7 +374,7 @@ async def remove_document_from_collection_vector( self, document_id: str, collection_id: str ) -> None: query = f""" - UPDATE {self.project_name}.{VectorDBMixin.COLUMN_NAME} + UPDATE {self._get_table_name(VectorDBMixin.COLUMN_NAME)} SET collection_ids = array_remove(collection_ids, $1) WHERE document_id = $2; """ @@ -334,14 +382,14 @@ async def remove_document_from_collection_vector( async def delete_user_vector(self, user_id: str) -> None: query = f""" - DELETE FROM {self.project_name}.{VectorDBMixin.COLUMN_NAME} + DELETE FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} WHERE user_id = $1; """ await self.execute_query(query, (user_id,)) async def delete_collection_vector(self, collection_id: str) -> None: query = f""" - DELETE FROM {self.project_name}.{VectorDBMixin.COLUMN_NAME} + DELETE FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} WHERE $1 = ANY(collection_ids); """ await self.execute_query(query, (collection_id,)) @@ -359,7 +407,7 @@ async def get_document_chunks( query = f""" SELECT extraction_id, document_id, user_id, collection_ids, text, metadata {vector_select} - FROM {self.project_name}.{VectorDBMixin.COLUMN_NAME} + FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} WHERE document_id = $1 OFFSET $2 {limit_clause}; @@ -431,13 +479,15 @@ async def create_index( """ if table_name == VectorTableName.CHUNKS: - table_name = f"{self.project_name}.{self.table.name}" + table_name_str = f"{self.project_name}.{self.project_name}" # TODO - Fix bug in vector table naming convention col_name = "vec" elif table_name == VectorTableName.ENTITIES: - table_name = f"{self.project_name}.{VectorTableName.ENTITIES}" + table_name_str = f"{self.project_name}.{VectorTableName.ENTITIES}" col_name = "description_embedding" elif table_name == VectorTableName.COMMUNITIES: - table_name = f"{self.project_name}.{VectorTableName.COMMUNITIES}" + table_name_str = ( + f"{self.project_name}.{VectorTableName.COMMUNITIES}" + ) col_name = "embedding" else: raise ArgError("invalid table name") @@ -487,7 +537,7 @@ async def create_index( create_index_sql = f""" CREATE INDEX {concurrently_sql} {index_name} - ON {table_name} + ON {table_name_str} USING {method} ({col_name} {ops}) {self._get_index_options(method, index_arguments)}; """ @@ -504,6 +554,141 @@ async def create_index( return None + def build_filters(self, filters: dict) -> Tuple[str, list[Any]]: + """ + Builds filters for SQL query based on provided dictionary. + + Args: + filters (dict): The dictionary specifying filter conditions. + + Raises: + FilterError: If filter conditions are not correctly formatted. + + Returns: + A tuple containing the SQL WHERE clause string and a list of parameters. + """ + if not isinstance(filters, dict): + raise FilterError("filters must be a dict") + + conditions = [] + parameters = [] + + def parse_condition(key: str, value: Any) -> str: + nonlocal parameters + if key in self.COLUMN_VARS: + # Handle column-based filters + if isinstance(value, dict): + op, clause = next(iter(value.items())) + if op == "$eq": + parameters.append(clause) + return f"{key} = ${len(parameters)}" + elif op == "$ne": + parameters.append(clause) + return f"{key} != ${len(parameters)}" + elif op == "$in": + parameters.append(clause) + return f"{key} = ANY(${len(parameters)})" + elif op == "$nin": + parameters.append(clause) + return f"{key} != ALL(${len(parameters)})" + elif op == "$overlap": + parameters.append(clause) + return f"{key} && ${len(parameters)}" + elif op == "$contains": + parameters.append(clause) + return f"{key} @> ${len(parameters)}" + elif op == "$any": + if key == "collection_ids": + parameters.append(f"%{clause}%") + return f"array_to_string({key}, ',') LIKE ${len(parameters)}" + parameters.append(clause) + return f"${len(parameters)} = ANY({key})" + else: + raise FilterError( + f"Unsupported operator for column {key}: {op}" + ) + else: + # Handle direct equality + if isinstance(value, str): + value = uuid.UUID(value) + parameters.append(value) + return f"{key} = ${len(parameters)}" + else: + # Handle JSON-based filters + json_col = "metadata" + if key.startswith("metadata."): + key = key.split("metadata.")[1] + if isinstance(value, dict): + if len(value) > 1: + raise FilterError("only one operator permitted") + operator, clause = next(iter(value.items())) + if operator not in ( + "$eq", + "$ne", + "$lt", + "$lte", + "$gt", + "$gte", + "$in", + "$contains", + ): + raise FilterError("unknown operator") + + if operator == "$eq" and not hasattr(clause, "__len__"): + parameters.append(json.dumps({key: clause})) + return f"{json_col} @> ${len(parameters)}::jsonb" + + if operator == "$in": + if not isinstance(clause, list): + raise FilterError( + "argument to $in filter must be a list" + ) + for elem in clause: + if not isinstance(elem, (int, str, float)): + raise FilterError( + "argument to $in filter must be a list of scalars" + ) + parameters.append(clause) + return f"{json_col}->>{key} = ANY(${len(parameters)})" + + parameters.append(json.dumps(clause)) + if operator == "$contains": + if not isinstance(clause, (int, str, float)): + raise FilterError( + "argument to $contains filter must be a scalar" + ) + return f"{json_col}->{key} @> ${len(parameters)}::jsonb AND jsonb_typeof({json_col}->{key}) = 'array'" + + return { + "$eq": f"{json_col}->>{key} = ${len(parameters)}", + "$ne": f"{json_col}->>{key} != ${len(parameters)}", + "$lt": f"{json_col}->>{key} < ${len(parameters)}", + "$lte": f"{json_col}->>{key} <= ${len(parameters)}", + "$gt": f"{json_col}->>{key} > ${len(parameters)}", + "$gte": f"{json_col}->>{key} >= ${len(parameters)}", + }[operator] + else: + parameters.append(json.dumps({key: value})) + return f"{json_col} @> ${len(parameters)}::jsonb" + + def parse_filter(filter_dict: dict) -> str: + filter_conditions = [] + for key, value in filter_dict.items(): + if key == "$and": + filter_conditions.append( + f"({' AND '.join([parse_filter(f) for f in value])})" + ) + elif key == "$or": + filter_conditions.append( + f"({' OR '.join([parse_filter(f) for f in value])})" + ) + else: + filter_conditions.append(parse_condition(key, value)) + return " AND ".join(filter_conditions) + + where_clause = parse_filter(filters) + return where_clause, parameters + def _get_index_options( self, method: IndexMethod, @@ -523,3 +708,28 @@ def _get_index_options( return "WITH (m=16, ef_construction=64)" else: return "" # No options for other methods + + def _get_index_type(self, method: IndexMethod) -> str: + if method == IndexMethod.ivfflat: + return "ivfflat" + elif method == IndexMethod.hnsw: + return "hnsw" + elif method == IndexMethod.auto: + # Here you might want to implement logic to choose between ivfflat and hnsw + return "hnsw" + + def _get_index_operator(self, measure: IndexMeasure) -> str: + if measure == IndexMeasure.l2_distance: + return "vector_l2_ops" + elif measure == IndexMeasure.max_inner_product: + return "vector_ip_ops" + elif measure == IndexMeasure.cosine_distance: + return "vector_cosine_ops" + + def _get_distance_function(self, imeasure_obj: IndexMeasure) -> str: + if imeasure_obj == IndexMeasure.cosine_distance: + return "cosine_distance" + elif imeasure_obj == IndexMeasure.l2_distance: + return "l2_distance" + elif imeasure_obj == IndexMeasure.max_inner_product: + return "max_inner_product" diff --git a/py/core/providers/file/postgres.py b/py/core/providers/file/postgres.py index 56627b484..ce9a6dc3e 100644 --- a/py/core/providers/file/postgres.py +++ b/py/core/providers/file/postgres.py @@ -7,7 +7,10 @@ from core.base import FileConfig, R2RException from core.base.providers import FileProvider -from core.providers.database.postgres import PostgresDBProvider +from core.providers.database.postgres import ( + PostgresDBProvider, + SemaphoreConnectionPool, +) logger = logging.getLogger() @@ -16,30 +19,13 @@ class PostgresFileProvider(FileProvider): def __init__(self, config: FileConfig, db_provider: PostgresDBProvider): super().__init__(config) self.config: FileConfig = config - self.db_provider = db_provider + self.db_provider = db_provider.pool self.pool = None - async def __aenter__(self): - await self.initialize() - return self + async def initialize(self, pool: SemaphoreConnectionPool): + self.pool = pool - async def __aexit__(self, exc_type, exc, tb): - await self._close_connection() - - async def _close_connection(self): - if self.pool: - await self.pool.close() - self.pool = None - - async def initialize(self): - self.pool = await asyncpg.create_pool( - self.db_provider.connection_string - ) - logger.info( - "File provider successfully connected to Postgres database." - ) - - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: await conn.execute('CREATE EXTENSION IF NOT EXISTS "lo";') await self.create_table() @@ -59,7 +45,7 @@ async def create_table(self): updated_at TIMESTAMPTZ DEFAULT NOW() ); """ - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): await conn.execute(query) @@ -88,7 +74,7 @@ async def upsert_file( file_type = EXCLUDED.file_type, updated_at = NOW(); """ - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): await conn.execute( query, @@ -109,7 +95,7 @@ async def store_file( ) file_size = file_content.getbuffer().nbytes - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): oid = await conn.fetchval("SELECT lo_create(0)") await self._write_lobject(conn, oid, file_content) @@ -159,7 +145,7 @@ async def retrieve_file( FROM {self._get_table_name('file_storage')} WHERE document_id = $1 """ - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): result = await conn.fetchrow(query, document_id) if not result: @@ -231,7 +217,7 @@ async def delete_file(self, document_id: UUID) -> bool: SELECT file_oid FROM {self._get_table_name('file_storage')} WHERE document_id = $1 """ - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): result = await conn.fetchval(query, document_id) if not result: @@ -287,7 +273,7 @@ async def get_files_overview( query += f" ORDER BY created_at DESC OFFSET ${len(params) + 1} LIMIT ${len(params) + 2}" params.extend([offset, limit]) - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): results = await conn.fetch(query, *params) diff --git a/py/core/providers/prompts/r2r_prompts.py b/py/core/providers/prompts/r2r_prompts.py index 68b53702d..a874a5f75 100644 --- a/py/core/providers/prompts/r2r_prompts.py +++ b/py/core/providers/prompts/r2r_prompts.py @@ -9,6 +9,7 @@ from core.base import DatabaseProvider, Prompt, PromptConfig, PromptProvider from core.base.utils import generate_default_prompt_id +from core.providers.database.postgres import SemaphoreConnectionPool logger = logging.getLogger() @@ -33,16 +34,11 @@ async def _close_connection(self): await self.pool.close() self.pool = None - async def initialize(self): + async def initialize(self, pool: SemaphoreConnectionPool): try: - self.pool = await asyncpg.create_pool( - self.db_provider.connection_string - ) - logger.info( - "R2RPromptProvider successfully connected to Postgres database." - ) + self.pool = pool - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: await conn.execute('CREATE EXTENSION IF NOT EXISTS "lo";') await self.create_table() @@ -78,7 +74,7 @@ async def execute_query( ) -> Any: if not self.pool: raise ConnectionError("Database pool is not initialized.") - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): if params: return await conn.execute(query, *params) @@ -89,7 +85,7 @@ async def fetch_query( ) -> Any: if not self.pool: raise ConnectionError("Database pool is not initialized.") - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): return ( await conn.fetch(query, *params) diff --git a/py/r2r/__init__.py b/py/r2r/__init__.py index 5272076b7..1056bb155 100644 --- a/py/r2r/__init__.py +++ b/py/r2r/__init__.py @@ -20,15 +20,15 @@ "__version__", ] -try: - import core - from core import * - - __all__ += core.__all__ -except ImportError as e: - logger.error( - f"ImportError: `{e}`, likely due to core dependencies not being installed." - ) +# try: +import core +from core import * + +__all__ += core.__all__ +# except ImportError as e: +# logger.error( +# f"ImportError: `{e}`, likely due to core dependencies not being installed." +# ) # Add a function to get the version diff --git a/py/shared/abstractions/search.py b/py/shared/abstractions/search.py index 8d946b115..1198c0eb4 100644 --- a/py/shared/abstractions/search.py +++ b/py/shared/abstractions/search.py @@ -249,15 +249,15 @@ class VectorSearchSettings(R2RSerializable): default=True, description="Whether to include element metadata in the search results", ) - probes: Optional[int] = Field( + probes: int = Field( default=10, description="Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.", ) - ef_search: Optional[int] = Field( + ef_search: int = Field( default=40, description="Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.", ) - hybrid_search_settings: Optional[HybridSearchSettings] = Field( + hybrid_search_settings: HybridSearchSettings = Field( default=HybridSearchSettings(), description="Settings for hybrid search", ) From 8bca097c99553dc1dd983916fd190c61f214242d Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Fri, 18 Oct 2024 12:51:23 -0700 Subject: [PATCH 15/27] work doc chunks --- py/core/base/providers/database.py | 3 - py/core/main/api/management_router.py | 1 + py/core/main/assembly/factory.py | 4 +- .../hatchet/ingestion_workflow.py | 29 ++--- .../main/orchestration/hatchet/kg_workflow.py | 2 +- .../simple/ingestion_workflow.py | 26 ++-- py/core/main/services/auth_service.py | 22 ++-- py/core/main/services/ingestion_service.py | 12 +- py/core/main/services/kg_service.py | 8 +- py/core/main/services/management_service.py | 46 +++---- .../pipes/ingestion/vector_storage_pipe.py | 2 +- py/core/pipes/kg/triples_extraction.py | 2 +- py/core/pipes/retrieval/vector_search_pipe.py | 6 +- py/core/providers/auth/r2r_auth.py | 44 +++---- py/core/providers/database/base.py | 3 - py/core/providers/database/handle.py | 122 ------------------ py/core/providers/database/postgres.py | 119 ++++++++++++++--- py/core/providers/database/vector.py | 75 ++++++----- py/core/providers/file/postgres.py | 8 +- py/core/providers/kg/postgres.py | 2 +- py/core/providers/prompts/r2r_prompts.py | 6 +- 21 files changed, 253 insertions(+), 289 deletions(-) delete mode 100644 py/core/providers/database/handle.py diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index e071a6635..3b2504e36 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -73,9 +73,6 @@ class DatabaseProvider(Provider): def __init__(self, config: DatabaseConfig): logger.info(f"Initializing DatabaseProvider with config {config}.") - self.handle: Any = ( - None # TODO - Type this properly, we later use it as a PostgresHandle - ) super().__init__(config) @abstractmethod diff --git a/py/core/main/api/management_router.py b/py/core/main/api/management_router.py index 387fe48a2..3bb4d13d5 100644 --- a/py/core/main/api/management_router.py +++ b/py/core/main/api/management_router.py @@ -414,6 +414,7 @@ async def document_chunks_app( "total_entries": document_chunks["total_entries"] } + @self.router.get("/collections_overview") @self.base_endpoint async def collections_overview_app( diff --git a/py/core/main/assembly/factory.py b/py/core/main/assembly/factory.py index 0b25f74fe..7f0d69ff7 100644 --- a/py/core/main/assembly/factory.py +++ b/py/core/main/assembly/factory.py @@ -147,7 +147,7 @@ async def create_database_provider( "Embedding config must have a base dimension to initialize database." ) - vector_db_dimension = self.config.embedding.base_dimension + dimension = self.config.embedding.base_dimension quantization_type = ( self.config.embedding.quantization_settings.quantization_type ) @@ -156,7 +156,7 @@ async def create_database_provider( database_provider = PostgresDBProvider( db_config, - vector_db_dimension, + dimension, crypto_provider=crypto_provider, quantization_type=quantization_type, ) diff --git a/py/core/main/orchestration/hatchet/ingestion_workflow.py b/py/core/main/orchestration/hatchet/ingestion_workflow.py index 08f6c901f..8f1a84e0c 100644 --- a/py/core/main/orchestration/hatchet/ingestion_workflow.py +++ b/py/core/main/orchestration/hatchet/ingestion_workflow.py @@ -150,14 +150,13 @@ async def parse(self, context: Context) -> dict: status=IngestionStatus.SUCCESS, ) - collection_id = await service.providers.database.handle.assign_document_to_collection( + # TODO: Move logic onto the `management service` + collection_id=generate_default_user_collection_id(str(document_info.user_id)) + await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, - collection_id=generate_default_user_collection_id( - document_info.user_id - ), + collection_id=collection_id, ) - - service.providers.database.handle.assign_document_to_collection( + await service.providers.database.assign_document_to_collection_vector( document_id=document_info.id, collection_id=collection_id ) @@ -189,7 +188,7 @@ async def on_failure(self, context: Context) -> None: try: documents_overview = ( - await self.ingestion_service.providers.database.handle.get_documents_overview( + await self.ingestion_service.providers.database.get_documents_overview( filter_document_ids=[document_id] ) )["results"] @@ -248,7 +247,7 @@ async def update_files(self, context: Context) -> None: ) documents_overview = ( - await self.ingestion_service.providers.database.handle.get_documents_overview( + await self.ingestion_service.providers.database.get_documents_overview( filter_document_ids=document_ids, filter_user_ids=None if user.is_superuser else [user.id], ) @@ -400,13 +399,13 @@ async def finalize(self, context: Context) -> dict: ) try: - collection_id = await self.ingestion_service.providers.database.handle.assign_document_to_collection( + # TODO - Move logic onto the `management service` + collection_id = generate_default_user_collection_id(document_info.user_id) + await self.ingestion_service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, - collection_id=generate_default_user_collection_id( - document_info.user_id - ), + collection_id=collection_id, ) - self.ingestion_service.providers.database.handle.assign_document_to_collection( + await self.ingestion_service.providers.database.assign_document_to_collection_vector( document_id=document_info.id, collection_id=collection_id ) except Exception as e: @@ -432,7 +431,7 @@ async def on_failure(self, context: Context) -> None: try: documents_overview = ( - await self.ingestion_service.providers.database.handle.get_documents_overview( + await self.ingestion_service.providers.database.get_documents_overview( filter_document_ids=[document_id] ) )["results"] @@ -474,7 +473,7 @@ async def create_vector_index(self, context: Context) -> dict: ) ) - self.ingestion_service.providers.database.handle.create_index( + self.ingestion_service.providers.database.create_index( **parsed_data ) diff --git a/py/core/main/orchestration/hatchet/kg_workflow.py b/py/core/main/orchestration/hatchet/kg_workflow.py index c98fac710..e0eaf5147 100644 --- a/py/core/main/orchestration/hatchet/kg_workflow.py +++ b/py/core/main/orchestration/hatchet/kg_workflow.py @@ -117,7 +117,7 @@ async def on_failure(self, context: Context) -> None: return try: - await self.kg_service.providers.database.handle.set_workflow_status( + await self.kg_service.providers.database.set_workflow_status( id=uuid.UUID(document_id), status_type="kg_extraction_status", status=KGExtractionStatus.FAILED, diff --git a/py/core/main/orchestration/simple/ingestion_workflow.py b/py/core/main/orchestration/simple/ingestion_workflow.py index bef1a4ca4..87766b8ff 100644 --- a/py/core/main/orchestration/simple/ingestion_workflow.py +++ b/py/core/main/orchestration/simple/ingestion_workflow.py @@ -67,14 +67,14 @@ async def ingest_files(input_data): ) try: - collection_id = await service.providers.database.handle.assign_document_to_collection( + # TODO - Move logic onto management service + collection_id=generate_default_user_collection_id(str(document_info.user_id)) + await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, - collection_id=generate_default_user_collection_id( - str(document_info.user_id) - ), + collection_id=collection_id, ) - service.providers.database.handle.assign_document_to_collection( - document_id=document_info.id, collection_id=collection_id + await service.providers.database.assign_document_to_collection_vector( + document_info.id, collection_id ) except Exception as e: logger.error( @@ -125,7 +125,7 @@ async def update_files(input_data): ) documents_overview = ( - await service.providers.database.handle.get_documents_overview( + await service.providers.database.get_documents_overview( filter_document_ids=document_ids, filter_user_ids=None if user.is_superuser else [user.id], ) @@ -227,13 +227,13 @@ async def ingest_chunks(input_data): ) try: - collection_id = await service.providers.database.handle.assign_document_to_collection( + # TODO - Move logic onto management service + collection_id=generate_default_user_collection_id(str(document_info.user_id)) + await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, - collection_id=generate_default_user_collection_id( - str(document_info.user_id) - ), + collection_id=collection_id, ) - service.providers.database.handle.assign_document_to_collection( + await service.providers.database.assign_document_to_collection_vector( document_id=document_info.id, collection_id=collection_id ) except Exception as e: @@ -262,7 +262,7 @@ async def create_vector_index(input_data): ) ) - service.providers.database.handle.create_index(**parsed_data) + service.providers.database.create_index(**parsed_data) except Exception as e: raise R2RException( diff --git a/py/core/main/services/auth_service.py b/py/core/main/services/auth_service.py index 01fa6863c..f085200a9 100644 --- a/py/core/main/services/auth_service.py +++ b/py/core/main/services/auth_service.py @@ -45,7 +45,7 @@ async def verify_email( status_code=400, message="Email verification is not required" ) - user_id = await self.providers.database.handle.get_user_id_by_verification_code( + user_id = await self.providers.database.get_user_id_by_verification_code( verification_code ) if not user_id: @@ -53,14 +53,14 @@ async def verify_email( status_code=400, message="Invalid or expired verification code" ) - user = await self.providers.database.handle.get_user_by_id(user_id) + user = await self.providers.database.get_user_by_id(user_id) if not user or user.email != email: raise R2RException( status_code=400, message="Invalid or expired verification code" ) - await self.providers.database.handle.mark_user_as_verified(user_id) - await self.providers.database.handle.remove_verification_code( + await self.providers.database.mark_user_as_verified(user_id) + await self.providers.database.remove_verification_code( verification_code ) return {"message": f"User account {user_id} verified successfully."} @@ -72,7 +72,7 @@ async def login(self, email: str, password: str) -> dict[str, Token]: @telemetry_event("GetCurrentUser") async def user(self, token: str) -> UserResponse: token_data = await self.providers.auth.decode_token(token) - user = await self.providers.database.handle.get_user_by_email( + user = await self.providers.database.get_user_by_email( token_data.email ) if user is None: @@ -124,7 +124,7 @@ async def update_user( profile_picture: Optional[str] = None, ) -> UserResponse: user: UserResponse = ( - await self.providers.database.handle.get_user_by_id(str(user_id)) + await self.providers.database.get_user_by_id(str(user_id)) ) if not user: raise R2RException(status_code=404, message="User not found") @@ -138,7 +138,7 @@ async def update_user( user.bio = bio if profile_picture is not None: user.profile_picture = profile_picture - return await self.providers.database.handle.update_user(user) + return await self.providers.database.update_user(user) @telemetry_event("DeleteUserAccount") async def delete_user( @@ -148,7 +148,7 @@ async def delete_user( delete_vector_data: bool = False, is_superuser: bool = False, ) -> dict[str, str]: - user = await self.providers.database.handle.get_user_by_id(user_id) + user = await self.providers.database.get_user_by_id(user_id) if not user: raise R2RException(status_code=404, message="User not found") if not ( @@ -158,9 +158,9 @@ async def delete_user( ) ): raise R2RException(status_code=400, message="Incorrect password") - await self.providers.database.handle.delete_user_relational(user_id) + await self.providers.database.delete_user_relational(user_id) if delete_vector_data: - self.providers.database.handle.delete_user_vector(user_id) + self.providers.database.delete_user_vector(user_id) return {"message": f"User account {user_id} deleted successfully."} @@ -170,6 +170,6 @@ async def clean_expired_blacklisted_tokens( max_age_hours: int = 7 * 24, current_time: Optional[datetime] = None, ): - await self.providers.database.handle.clean_expired_blacklisted_tokens( + await self.providers.database.clean_expired_blacklisted_tokens( max_age_hours, current_time ) diff --git a/py/core/main/services/ingestion_service.py b/py/core/main/services/ingestion_service.py index 0e0720be1..3e81519b5 100644 --- a/py/core/main/services/ingestion_service.py +++ b/py/core/main/services/ingestion_service.py @@ -94,7 +94,7 @@ async def ingest_file_ingress( ) existing_document_info = ( - await self.providers.database.handle.get_documents_overview( + await self.providers.database.get_documents_overview( filter_user_ids=[user.id], filter_document_ids=[document_id], ) @@ -120,7 +120,7 @@ async def ingest_file_ingress( message=f"Document {document_id} was already ingested and is not in a failed state.", ) - await self.providers.database.handle.upsert_documents_overview( + await self.providers.database.upsert_documents_overview( document_info ) @@ -256,7 +256,7 @@ async def finalize_ingestion( is_update: bool = False, ) -> None: if is_update: - self.providers.database.handle.delete( + self.providers.database.delete( filters={ "$and": [ {"document_id": {"$eq": document_info.id}}, @@ -284,7 +284,7 @@ async def update_document_status( async def _update_document_status_in_db(self, document_info: DocumentInfo): try: - await self.providers.database.handle.upsert_documents_overview( + await self.providers.database.upsert_documents_overview( document_info ) except Exception as e: @@ -325,7 +325,7 @@ async def ingest_chunks_ingress( ) existing_document_info = ( - await self.providers.database.handle.get_documents_overview( + await self.providers.database.get_documents_overview( filter_user_ids=[user.id], filter_document_ids=[document_id], ) @@ -339,7 +339,7 @@ async def ingest_chunks_ingress( message=f"Document {document_id} was already ingested and is not in a failed state.", ) - await self.providers.database.handle.upsert_documents_overview( + await self.providers.database.upsert_documents_overview( document_info ) diff --git a/py/core/main/services/kg_service.py b/py/core/main/services/kg_service.py index 50b5d4713..30b0ceac5 100644 --- a/py/core/main/services/kg_service.py +++ b/py/core/main/services/kg_service.py @@ -67,7 +67,7 @@ async def kg_triples_extraction( f"KGService: Processing document {document_id} for KG extraction" ) - await self.providers.database.handle.set_workflow_status( + await self.providers.database.set_workflow_status( id=document_id, status_type="kg_extraction_status", status=KGExtractionStatus.PROCESSING, @@ -101,7 +101,7 @@ async def kg_triples_extraction( except Exception as e: logger.error(f"KGService: Error in kg_extraction: {e}") - await self.providers.database.handle.set_workflow_status( + await self.providers.database.set_workflow_status( id=document_id, status_type="kg_extraction_status", status=KGExtractionStatus.FAILED, @@ -128,7 +128,7 @@ async def get_document_ids_for_create_graph( ] document_ids = ( - await self.providers.database.handle.get_document_ids_by_status( + await self.providers.database.get_document_ids_by_status( status_type="kg_extraction_status", status=document_status_filter, collection_id=collection_id, @@ -195,7 +195,7 @@ async def kg_entity_description( f"KGService: Completed kg_entity_description for batch {i+1}/{num_batches} for document {document_id}" ) - await self.providers.database.handle.set_workflow_status( + await self.providers.database.set_workflow_status( id=document_id, status_type="kg_extraction_status", status=KGExtractionStatus.SUCCESS, diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index 51a707486..18b4a1c4c 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -204,7 +204,7 @@ async def users_overview( *args, **kwargs, ): - return await self.providers.database.handle.get_users_overview( + return await self.providers.database.get_users_overview( [str(ele) for ele in user_ids] if user_ids else None, offset=offset, limit=limit, @@ -267,7 +267,7 @@ def validate_filters(filters: dict[str, Any]) -> None: logger.info(f"Deleting entries with filters: {filters}") try: - vector_delete_results = self.providers.database.handle.delete( + vector_delete_results = self.providers.database.delete( filters ) except Exception as e: @@ -299,7 +299,7 @@ def validate_filters(filters: dict[str, Any]) -> None: try: documents_overview = ( - await self.providers.database.handle.get_documents_overview( + await self.providers.database.get_documents_overview( **relational_filters ) )["results"] @@ -319,7 +319,7 @@ def validate_filters(filters: dict[str, Any]) -> None: for document_id in document_ids_to_purge: try: - await self.providers.database.handle.delete_from_documents_overview( + await self.providers.database.delete_from_documents_overview( str(document_id) ) logger.info( @@ -351,7 +351,7 @@ async def documents_overview( *args: Any, **kwargs: Any, ): - return await self.providers.database.handle.get_documents_overview( + return await self.providers.database.get_documents_overview( filter_document_ids=document_ids, filter_user_ids=user_ids, filter_collection_ids=collection_ids, @@ -369,7 +369,7 @@ async def document_chunks( *args, **kwargs, ): - return self.providers.database.handle.get_document_chunks( + return await self.providers.database.get_document_chunks( document_id, offset=offset, limit=limit, @@ -380,10 +380,10 @@ async def document_chunks( async def assign_document_to_collection( self, document_id: str, collection_id: UUID ): - await self.providers.database.handle.assign_document_to_collection_vector( + await self.providers.database.assign_document_to_collection_vector( document_id, collection_id ) - await self.providers.database.handle.assign_document_to_collection_relational( + await self.providers.database.assign_document_to_collection_relational( document_id, collection_id ) return {"message": "Document assigned to collection successfully"} @@ -392,10 +392,10 @@ async def assign_document_to_collection( async def remove_document_from_collection( self, document_id: UUID, collection_id: UUID ): - await self.providers.database.handle.remove_document_from_collection_relational( + await self.providers.database.remove_document_from_collection_relational( document_id, collection_id ) - await self.providers.database.handle.remove_document_from_collection_vector( + await self.providers.database.remove_document_from_collection_vector( document_id, collection_id ) await self.providers.kg.delete_node_via_document_id( @@ -407,7 +407,7 @@ async def remove_document_from_collection( async def document_collections( self, document_id: str, offset: int = 0, limit: int = 100 ): - return await self.providers.database.handle.document_collections( + return await self.providers.database.document_collections( document_id, offset=offset, limit=limit ) @@ -502,13 +502,13 @@ def _get_central_nodes( async def create_collection( self, name: str, description: str = "" ) -> UUID: - return await self.providers.database.handle.create_collection( + return await self.providers.database.create_collection( name, description ) @telemetry_event("GetCollection") async def get_collection(self, collection_id: UUID) -> Optional[dict]: - return await self.providers.database.handle.get_collection( + return await self.providers.database.get_collection( collection_id ) @@ -519,16 +519,16 @@ async def update_collection( name: Optional[str] = None, description: Optional[str] = None, ) -> bool: - return await self.providers.database.handle.update_collection( + return await self.providers.database.update_collection( collection_id, name, description ) @telemetry_event("DeleteCollection") async def delete_collection(self, collection_id: UUID) -> bool: - await self.providers.database.handle.delete_collection_relational( + await self.providers.database.delete_collection_relational( collection_id ) - await self.providers.database.handle.delete_collection_vector( + await self.providers.database.delete_collection_vector( collection_id ) return True @@ -537,7 +537,7 @@ async def delete_collection(self, collection_id: UUID) -> bool: async def list_collections( self, offset: int = 0, limit: int = 100 ) -> list[dict]: - return await self.providers.database.handle.list_collections( + return await self.providers.database.list_collections( offset=offset, limit=limit ) @@ -545,7 +545,7 @@ async def list_collections( async def add_user_to_collection( self, user_id: UUID, collection_id: UUID ) -> bool: - return await self.providers.database.handle.add_user_to_collection( + return await self.providers.database.add_user_to_collection( user_id, collection_id ) @@ -554,7 +554,7 @@ async def remove_user_from_collection( self, user_id: UUID, collection_id: UUID ) -> bool: return ( - await self.providers.database.handle.remove_user_from_collection( + await self.providers.database.remove_user_from_collection( user_id, collection_id ) ) @@ -563,7 +563,7 @@ async def remove_user_from_collection( async def get_users_in_collection( self, collection_id: UUID, offset: int = 0, limit: int = 100 ) -> list[dict]: - return await self.providers.database.handle.get_users_in_collection( + return await self.providers.database.get_users_in_collection( collection_id, offset=offset, limit=limit ) @@ -571,7 +571,7 @@ async def get_users_in_collection( async def get_collections_for_user( self, user_id: UUID, offset: int = 0, limit: int = 100 ) -> list[dict]: - return await self.providers.database.handle.get_collections_for_user( + return await self.providers.database.get_collections_for_user( user_id, offset, limit ) @@ -584,7 +584,7 @@ async def collections_overview( *args, **kwargs, ): - return await self.providers.database.handle.get_collections_overview( + return await self.providers.database.get_collections_overview( ([str(ele) for ele in collection_ids] if collection_ids else None), offset=offset, limit=limit, @@ -594,7 +594,7 @@ async def collections_overview( async def documents_in_collection( self, collection_id: UUID, offset: int = 0, limit: int = 100 ) -> list[dict]: - return await self.providers.database.handle.documents_in_collection( + return await self.providers.database.documents_in_collection( collection_id, offset=offset, limit=limit ) diff --git a/py/core/pipes/ingestion/vector_storage_pipe.py b/py/core/pipes/ingestion/vector_storage_pipe.py index 2a3161ab1..9f2c5925a 100644 --- a/py/core/pipes/ingestion/vector_storage_pipe.py +++ b/py/core/pipes/ingestion/vector_storage_pipe.py @@ -51,7 +51,7 @@ async def store( """ try: - self.database_provider.handle.upsert_entries(vector_entries) + await self.database_provider.upsert_entries(vector_entries) except Exception as e: error_message = ( f"Failed to store vector entries in the database: {e}" diff --git a/py/core/pipes/kg/triples_extraction.py b/py/core/pipes/kg/triples_extraction.py index d60b0467f..0fa3fb594 100644 --- a/py/core/pipes/kg/triples_extraction.py +++ b/py/core/pipes/kg/triples_extraction.py @@ -258,7 +258,7 @@ async def _run_logic( # type: ignore data=extraction["text"], metadata=extraction["metadata"], ) - for extraction in self.database_provider.handle.get_document_chunks( + for extraction in await self.database_provider.get_document_chunks( document_id=document_id )[ "results" diff --git a/py/core/pipes/retrieval/vector_search_pipe.py b/py/core/pipes/retrieval/vector_search_pipe.py index d63769bc3..d76babfc6 100644 --- a/py/core/pipes/retrieval/vector_search_pipe.py +++ b/py/core/pipes/retrieval/vector_search_pipe.py @@ -63,14 +63,14 @@ async def search( # type: ignore purpose=EmbeddingPurpose.QUERY, ) - search_results = ( - self.database_provider.handle.hybrid_search( + search_results = await ( + self.database_provider.hybrid_search( query_vector=query_vector, query_text=message, search_settings=search_settings, ) if search_settings.use_hybrid_search - else self.database_provider.handle.semantic_search( + else self.database_provider.semantic_search( query_vector=query_vector, search_settings=search_settings, ) diff --git a/py/core/providers/auth/r2r_auth.py b/py/core/providers/auth/r2r_auth.py index 8f44addb0..ed0c477c6 100644 --- a/py/core/providers/auth/r2r_auth.py +++ b/py/core/providers/auth/r2r_auth.py @@ -56,7 +56,7 @@ async def initialize(self): user = await self.register( email=self.admin_email, password=self.admin_password ) - await self.db_provider.handle.mark_user_as_superuser(user.id) + await self.db_provider.mark_user_as_superuser(user.id) except R2RException: logger.info("Default admin user already exists.") @@ -85,7 +85,7 @@ def create_refresh_token(self, data: dict) -> str: async def decode_token(self, token: str) -> TokenData: try: # First, check if the token is blacklisted - if await self.db_provider.handle.is_token_blacklisted(token): + if await self.db_provider.is_token_blacklisted(token): raise R2RException( status_code=401, message="Token has been invalidated" ) @@ -114,7 +114,7 @@ async def decode_token(self, token: str) -> TokenData: async def user(self, token: str = Depends(oauth2_scheme)) -> UserResponse: token_data = await self.decode_token(token) - user = await self.db_provider.handle.get_user_by_email( + user = await self.db_provider.get_user_by_email( token_data.email ) if user is None: @@ -132,14 +132,14 @@ def get_current_active_user( async def register(self, email: str, password: str) -> Dict[str, str]: # Create new user and give them a default collection - new_user = await self.db_provider.handle.create_user(email, password) + new_user = await self.db_provider.create_user(email, password) default_collection = ( - await self.db_provider.handle.create_default_collection( + await self.db_provider.create_default_collection( new_user.id, ) ) - await self.db_provider.handle.add_user_to_collection( + await self.db_provider.add_user_to_collection( new_user.id, default_collection.collection_id ) @@ -150,7 +150,7 @@ async def register(self, email: str, password: str) -> Dict[str, str]: ) expiry = datetime.now(timezone.utc) + timedelta(hours=24) - await self.db_provider.handle.store_verification_code( + await self.db_provider.store_verification_code( new_user.id, verification_code, expiry ) new_user.verification_code_expiry = expiry @@ -158,10 +158,10 @@ async def register(self, email: str, password: str) -> Dict[str, str]: # self.providers.email.send_verification_email(new_user.email, verification_code) else: # Mark user as verified - await self.db_provider.handle.store_verification_code( + await self.db_provider.store_verification_code( new_user.id, None, None ) - await self.db_provider.handle.mark_user_as_verified(new_user.id) + await self.db_provider.mark_user_as_verified(new_user.id) return new_user @@ -169,7 +169,7 @@ async def verify_email( self, email: str, verification_code: str ) -> dict[str, str]: user_id = ( - await self.db_provider.handle.get_user_id_by_verification_code( + await self.db_provider.get_user_id_by_verification_code( verification_code ) ) @@ -177,8 +177,8 @@ async def verify_email( raise R2RException( status_code=400, message="Invalid or expired verification code" ) - await self.db_provider.handle.mark_user_as_verified(user_id) - await self.db_provider.handle.remove_verification_code( + await self.db_provider.mark_user_as_verified(user_id) + await self.db_provider.remove_verification_code( verification_code ) return {"message": "Email verified successfully"} @@ -187,7 +187,7 @@ async def login(self, email: str, password: str) -> Dict[str, Token]: logger = logging.getLogger() logger.debug(f"Attempting login for email: {email}") - user = await self.db_provider.handle.get_user_by_email(email) + user = await self.db_provider.get_user_by_email(email) if not user: logger.warning(f"No user found for email: {email}") raise R2RException( @@ -241,7 +241,7 @@ async def refresh_access_token( ) # Invalidate the old refresh token and create a new one - await self.db_provider.handle.blacklist_token(refresh_token) + await self.db_provider.blacklist_token(refresh_token) new_access_token = self.create_access_token( data={"sub": token_data.email} @@ -277,13 +277,13 @@ async def change_password( hashed_new_password = self.crypto_provider.get_password_hash( new_password ) - await self.db_provider.handle.update_user_password( + await self.db_provider.update_user_password( user.id, hashed_new_password ) return {"message": "Password changed successfully"} async def request_password_reset(self, email: str) -> Dict[str, str]: - user = await self.db_provider.handle.get_user_by_email(email) + user = await self.db_provider.get_user_by_email(email) if not user: # To prevent email enumeration, always return a success message return { @@ -292,7 +292,7 @@ async def request_password_reset(self, email: str) -> Dict[str, str]: reset_token = self.crypto_provider.generate_verification_code() expiry = datetime.now(timezone.utc) + timedelta(hours=1) - await self.db_provider.handle.store_reset_token( + await self.db_provider.store_reset_token( user.id, reset_token, expiry ) @@ -304,7 +304,7 @@ async def request_password_reset(self, email: str) -> Dict[str, str]: async def confirm_password_reset( self, reset_token: str, new_password: str ) -> Dict[str, str]: - user_id = await self.db_provider.handle.get_user_id_by_reset_token( + user_id = await self.db_provider.get_user_id_by_reset_token( reset_token ) if not user_id: @@ -315,16 +315,16 @@ async def confirm_password_reset( hashed_new_password = self.crypto_provider.get_password_hash( new_password ) - await self.db_provider.handle.update_user_password( + await self.db_provider.update_user_password( user_id, hashed_new_password ) - await self.db_provider.handle.remove_reset_token(user_id) + await self.db_provider.remove_reset_token(user_id) return {"message": "Password reset successfully"} async def logout(self, token: str) -> Dict[str, str]: # Add the token to a blacklist - await self.db_provider.handle.blacklist_token(token) + await self.db_provider.blacklist_token(token) return {"message": "Logged out successfully"} async def clean_expired_blacklisted_tokens(self): - await self.db_provider.handle.clean_expired_blacklisted_tokens() + await self.db_provider.clean_expired_blacklisted_tokens() diff --git a/py/core/providers/database/base.py b/py/core/providers/database/base.py index a915741b4..aa6ea6396 100644 --- a/py/core/providers/database/base.py +++ b/py/core/providers/database/base.py @@ -6,9 +6,6 @@ import asyncpg from sqlalchemy import TextClause, text -if TYPE_CHECKING: - from core.providers.database.handle import PostgresHandle - logger = logging.getLogger() diff --git a/py/core/providers/database/handle.py b/py/core/providers/database/handle.py deleted file mode 100644 index 456dc0a4e..000000000 --- a/py/core/providers/database/handle.py +++ /dev/null @@ -1,122 +0,0 @@ -from typing import Optional - -from core.base import CryptoProvider, DatabaseConfig -from core.providers.database.base import ( - DatabaseMixin, - SemaphoreConnectionPool, - logger, -) -from core.providers.database.collection import CollectionMixin -from core.providers.database.document import DocumentMixin -from core.providers.database.tokens import BlacklistedTokensMixin -from core.providers.database.user import UserMixin -from core.providers.database.vector import VectorDBMixin -from shared.abstractions.vector import VectorQuantizationType - - -class PostgresHandle( - DocumentMixin, - CollectionMixin, - BlacklistedTokensMixin, - UserMixin, - VectorDBMixin, -): - def __init__( - self, - config: DatabaseConfig, - connection_string: str, - crypto_provider: CryptoProvider, - project_name: str, - dimension: int, - quantization_type: Optional[VectorQuantizationType] = None, - pool_size: int = 10, - max_retries: int = 3, - retry_delay: int = 1, - ): - self.config = config - self.connection_string = connection_string - self.crypto_provider = crypto_provider - self.project_name = project_name - self.dimension = dimension - self.quantization_type = quantization_type - self.pool_size = pool_size - self.max_retries = max_retries - self.retry_delay = retry_delay - self.pool: Optional[SemaphoreConnectionPool] = None - - def _get_table_name(self, base_name: str) -> str: - return f"{self.project_name}.{base_name}" - - async def initialize(self, pool: SemaphoreConnectionPool): - logger.info("Initializing `PostgresDBHandle`.") - self.pool = pool - - async with self.pool.get_connection() as conn: - await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') - await conn.execute("CREATE EXTENSION IF NOT EXISTS vector;") - await conn.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;") - await conn.execute("CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;") - - # Create schema if it doesn't exist - await conn.execute( - f'CREATE SCHEMA IF NOT EXISTS "{self.project_name}";' - ) - - # Call create_table for each mixin - for base_class in self.__class__.__bases__: - if issubclass(base_class, DatabaseMixin): - await base_class.create_table(self) - - logger.info("Successfully initialized `PostgresDBHandle`") - - async def close(self): - if self.pool: - await self.pool.close() - - async def execute_query(self, query, params=None, isolation_level=None): - async with self.pool.get_connection() as conn: - if isolation_level: - async with conn.transaction(isolation=isolation_level): - if params: - return await conn.execute(query, *params) - else: - return await conn.execute(query) - else: - if params: - return await conn.execute(query, *params) - else: - return await conn.execute(query) - - async def execute_many(self, query, params=None, batch_size=1000): - async with self.pool.get_connection() as conn: - async with conn.transaction(): - if params: - for i in range(0, len(params), batch_size): - param_batch = params[i : i + batch_size] - await conn.executemany(query, param_batch) - else: - await conn.executemany(query) - - async def fetch_query(self, query, params=None): - async with self.pool.get_connection() as conn: - async with conn.transaction(): - return ( - await conn.fetch(query, *params) - if params - else await conn.fetch(query) - ) - - async def fetchrow_query(self, query, params=None): - async with self.pool.get_connection() as conn: - async with conn.transaction(): - if params: - return await conn.fetchrow(query, *params) - else: - return await conn.fetchrow(query) - - async def __aenter__(self): - await self.initialize() - return self - - async def __aexit__(self, exc_type, exc, tb): - await self.close() diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index e9443da97..292893f78 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -12,12 +12,20 @@ VectorQuantizationType, ) -from .base import SemaphoreConnectionPool -from .handle import PostgresHandle +from .base import SemaphoreConnectionPool, DatabaseMixin -logger = logging.getLogger() +from core.providers.database.collection import CollectionMixin +from core.providers.database.document import DocumentMixin +from core.providers.database.tokens import BlacklistedTokensMixin +from core.providers.database.user import UserMixin +from core.providers.database.vector import VectorDBMixin +from shared.abstractions.vector import VectorQuantizationType + + +logger = logging.getLogger() + def get_env_var(new_var, old_var, config_value): value = config_value or os.getenv(new_var) or os.getenv(old_var) if os.getenv(old_var) and not os.getenv(new_var): @@ -27,7 +35,15 @@ def get_env_var(new_var, old_var, config_value): return value -class PostgresDBProvider(DatabaseProvider): +class PostgresDBProvider( + DatabaseProvider, + DocumentMixin, + CollectionMixin, + BlacklistedTokensMixin, + UserMixin, + VectorDBMixin, + + ): user: str password: str host: str @@ -35,7 +51,7 @@ class PostgresDBProvider(DatabaseProvider): db_name: str project_name: str connection_string: str - vector_db_dimension: int + dimension: int conn: Optional[Any] crypto_provider: CryptoProvider postgres_configuration_settings: PostgresConfigurationSettings @@ -95,7 +111,7 @@ def __init__( self.connection_string = f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.db_name}" logger.info("Connecting to Postgres via TCP/IP") - self.vector_db_dimension = dimension + self.dimension = dimension self.vector_db_quantization_type = quantization_type self.conn = None self.config: DatabaseConfig = config @@ -108,28 +124,34 @@ def __init__( config.default_collection_description ) - self.handle: Optional[PostgresHandle] = None + self.pool: Optional[SemaphoreConnectionPool] = None def _get_table_name(self, base_name: str) -> str: return f"{self.project_name}.{base_name}" async def initialize(self): - pool = SemaphoreConnectionPool( + logger.info("Initializing `PostgresDBProvider`.") + self.pool = SemaphoreConnectionPool( self.connection_string, self.postgres_configuration_settings ) - await pool.initialize() - - handle = PostgresHandle( - self.config, - connection_string=self.connection_string, - crypto_provider=self.crypto_provider, - project_name=self.project_name, - dimension=self.vector_db_dimension, - quantization_type=self.vector_db_quantization_type, - ) - await handle.initialize(pool) - self.pool = pool - self.handle = handle + await self.pool.initialize() + + async with self.pool.get_connection() as conn: + await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') + await conn.execute("CREATE EXTENSION IF NOT EXISTS vector;") + await conn.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;") + await conn.execute("CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;") + + # Create schema if it doesn't exist + await conn.execute( + f'CREATE SCHEMA IF NOT EXISTS "{self.project_name}";' + ) + + # Call create_table for each mixin + for base_class in self.__class__.__bases__: + if issubclass(base_class, DatabaseMixin): + await base_class.create_table(self) + logger.info("Successfully initialized `PostgresDBProvider`") def _get_postgres_configuration_settings( self, config: DatabaseConfig @@ -171,3 +193,58 @@ def _get_postgres_configuration_settings( setattr(settings, setting, value) return settings + + async def close(self): + if self.pool: + await self.pool.close() + + async def execute_query(self, query, params=None, isolation_level=None): + async with self.pool.get_connection() as conn: + if isolation_level: + async with conn.transaction(isolation=isolation_level): + if params: + return await conn.execute(query, *params) + else: + return await conn.execute(query) + else: + if params: + return await conn.execute(query, *params) + else: + return await conn.execute(query) + + async def execute_many(self, query, params=None, batch_size=1000): + async with self.pool.get_connection() as conn: + async with conn.transaction(): + if params: + for i in range(0, len(params), batch_size): + param_batch = params[i : i + batch_size] + await conn.executemany(query, param_batch) + else: + await conn.executemany(query) + + async def fetch_query(self, query, params=None): + async with self.pool.get_connection() as conn: + print('query', query) + print('params', params) + + async with conn.transaction(): + return ( + await conn.fetch(query, *params) + if params + else await conn.fetch(query) + ) + + async def fetchrow_query(self, query, params=None): + async with self.pool.get_connection() as conn: + async with conn.transaction(): + if params: + return await conn.fetchrow(query, *params) + else: + return await conn.fetchrow(query) + + async def __aenter__(self): + await self.initialize() + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.close() diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 5e5b63a12..25a054627 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -53,14 +53,15 @@ def __init__(self, *args, **kwargs): self.quantization_type = kwargs.get("quantization_type") async def create_table(self): + # TODO - Move ids to `UUID` type # Create the vector table if it doesn't exist query = f""" CREATE TABLE IF NOT EXISTS {self._get_table_name(VectorDBMixin.COLUMN_NAME)} ( - extraction_id TEXT PRIMARY KEY, - document_id TEXT, - user_id TEXT, - collection_ids TEXT[], - vector vector({self.dimension}), + extraction_id UUID PRIMARY KEY, + document_id UUID, + user_id UUID, + collection_ids UUID[], + vec vector({self.dimension}), text TEXT, metadata JSONB ); @@ -91,9 +92,9 @@ async def upsert(self, entry: VectorEntry) -> None: entry.document_id, entry.user_id, entry.collection_ids, - entry.vector.data, + str(entry.vector.data), entry.text, - entry.metadata, + json.dumps(entry.metadata), ), ) @@ -116,9 +117,9 @@ async def upsert_entries(self, entries: list[VectorEntry]) -> None: entry.document_id, entry.user_id, entry.collection_ids, - entry.vector.data, + str(entry.vector.data), entry.text, - entry.metadata, + json.dumps(entry.metadata), ) for entry in entries ] @@ -154,21 +155,32 @@ async def semantic_search( if search_settings.filters: where_clause = self._build_filters(search_settings.filters) - query = f""" - SELECT {select_clause} - FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME) -} - WHERE {where_clause} - ORDER BY {distance_func}(vec, $1::vector) - OFFSET $2 - LIMIT $3 - """ +# query = f""" +# SELECT {select_clause} +# FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME) +# } +# WHERE {where_clause} +# ORDER BY {distance_func}(vec, $1::vector) +# OFFSET $2 +# LIMIT $3 +# """ + vector_str = f"ARRAY[{','.join(map(str, query_vector))}]::vector" + query = f""" + SELECT {select_clause} + FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} + WHERE {where_clause} + ORDER BY {distance_func}(vec, {vector_str}) + OFFSET $1 + LIMIT $2 + """ params = [ query_vector, search_settings.offset, search_settings.search_limit, ] + print('query = ', query) + print('params = ', params) # Set index-specific session parameters await self.execute_query( @@ -178,7 +190,7 @@ async def semantic_search( "SET LOCAL hnsw.ef_search = $1", [max(search_settings.ef_search, search_settings.search_limit)], ) - + results = await self.fetch_query(query, params) return [ @@ -368,7 +380,7 @@ async def assign_document_to_collection_vector( SET collection_ids = array_append(collection_ids, $1) WHERE document_id = $2 AND NOT ($1 = ANY(collection_ids)); """ - await self.execute_query(query, (collection_id, document_id)) + await self.execute_query(query, (str(collection_id), str(document_id))) async def remove_document_from_collection_vector( self, document_id: str, collection_id: str @@ -401,39 +413,42 @@ async def get_document_chunks( limit: int = -1, include_vectors: bool = False, ) -> dict[str, Any]: - vector_select = ", vector" if include_vectors else "" + vector_select = ", vec" if include_vectors else "" limit_clause = f"LIMIT {limit}" if limit > -1 else "" query = f""" - SELECT extraction_id, document_id, user_id, collection_ids, text, metadata + SELECT extraction_id, document_id, user_id, collection_ids, text, metadata, COUNT(*) OVER() AS total {vector_select} FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} WHERE document_id = $1 OFFSET $2 {limit_clause}; """ + params = [document_id, offset] - if limit > -1: - params.append(limit) results = await self.fetch_query(query, params) - return { - "chunks": [ + chunks = [] + total = 0 + if results: + total = results[0].get("total", 0) + chunks = [ { "extraction_id": result["extraction_id"], "document_id": result["document_id"], "user_id": result["user_id"], "collection_ids": result["collection_ids"], "text": result["text"], - "metadata": result["metadata"], - **( - {"vector": result["vector"]} if include_vectors else {} + "metadata": json.loads(result["metadata"]), + "vector": ( + json.loads(result["vec"]) if include_vectors else None ), } for result in results ] - } + + return {"results": chunks, "total_entries": total} async def create_index( self, diff --git a/py/core/providers/file/postgres.py b/py/core/providers/file/postgres.py index ce9a6dc3e..cb43cf26c 100644 --- a/py/core/providers/file/postgres.py +++ b/py/core/providers/file/postgres.py @@ -19,11 +19,11 @@ class PostgresFileProvider(FileProvider): def __init__(self, config: FileConfig, db_provider: PostgresDBProvider): super().__init__(config) self.config: FileConfig = config - self.db_provider = db_provider.pool - self.pool = None + self.db_provider = db_provider + self.pool: Optional[SemaphoreConnectionPool] = None # Initialize pool - async def initialize(self, pool: SemaphoreConnectionPool): - self.pool = pool + async def initialize(self): + self.pool = self.db_provider.pool async with self.pool.get_connection() as conn: await conn.execute('CREATE EXTENSION IF NOT EXISTS "lo";') diff --git a/py/core/providers/kg/postgres.py b/py/core/providers/kg/postgres.py index af8d12f32..3eb6398fc 100644 --- a/py/core/providers/kg/postgres.py +++ b/py/core/providers/kg/postgres.py @@ -40,7 +40,7 @@ def __init__( ) -> None: super().__init__(config, *args, **kwargs) - self.db_provider = db_provider.handle + self.db_provider = db_provider self.embedding_provider = embedding_provider try: diff --git a/py/core/providers/prompts/r2r_prompts.py b/py/core/providers/prompts/r2r_prompts.py index a874a5f75..cb379b449 100644 --- a/py/core/providers/prompts/r2r_prompts.py +++ b/py/core/providers/prompts/r2r_prompts.py @@ -20,7 +20,7 @@ def __init__(self, config: PromptConfig, db_provider: DatabaseProvider): self.prompts: dict[str, Prompt] = {} self.config: PromptConfig = config self.db_provider = db_provider - self.pool: Optional[asyncpg.pool.Pool] = None # Initialize pool + self.pool: Optional[SemaphoreConnectionPool] = None # Initialize pool async def __aenter__(self): await self.initialize() @@ -34,9 +34,9 @@ async def _close_connection(self): await self.pool.close() self.pool = None - async def initialize(self, pool: SemaphoreConnectionPool): + async def initialize(self): try: - self.pool = pool + self.pool = self.db_provider.pool async with self.pool.get_connection() as conn: await conn.execute('CREATE EXTENSION IF NOT EXISTS "lo";') From 7d86e4e1c4cf0acac120e8d4aa8b0dcba3c78adf Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Fri, 18 Oct 2024 14:03:04 -0700 Subject: [PATCH 16/27] working vector search --- py/core/pipes/retrieval/vector_search_pipe.py | 26 +- py/core/providers/database/vector.py | 229 +++++++++++++----- 2 files changed, 183 insertions(+), 72 deletions(-) diff --git a/py/core/pipes/retrieval/vector_search_pipe.py b/py/core/pipes/retrieval/vector_search_pipe.py index d76babfc6..04d772940 100644 --- a/py/core/pipes/retrieval/vector_search_pipe.py +++ b/py/core/pipes/retrieval/vector_search_pipe.py @@ -63,18 +63,22 @@ async def search( # type: ignore purpose=EmbeddingPurpose.QUERY, ) - search_results = await ( - self.database_provider.hybrid_search( - query_vector=query_vector, - query_text=message, - search_settings=search_settings, - ) - if search_settings.use_hybrid_search - else self.database_provider.semantic_search( - query_vector=query_vector, - search_settings=search_settings, - ) + search_results = await self.database_provider.semantic_search( + query_vector=query_vector, + search_settings=search_settings, ) + # search_results = await ( + # self.database_provider.hybrid_search( + # query_vector=query_vector, + # query_text=message, + # search_settings=search_settings, + # ) + # if search_settings.use_hybrid_search + # else self.database_provider.semantic_search( + # query_vector=query_vector, + # search_settings=search_settings, + # ) + # ) reranked_results = self.embedding_provider.rerank( query=message, results=search_results, diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 25a054627..fd1b35f8b 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -56,7 +56,7 @@ async def create_table(self): # TODO - Move ids to `UUID` type # Create the vector table if it doesn't exist query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name(VectorDBMixin.COLUMN_NAME)} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name(self.project_name)} ( extraction_id UUID PRIMARY KEY, document_id UUID, user_id UUID, @@ -65,16 +65,16 @@ async def create_table(self): text TEXT, metadata JSONB ); - CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self._get_table_name(VectorDBMixin.COLUMN_NAME)} (document_id); - CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self._get_table_name(VectorDBMixin.COLUMN_NAME)} (user_id); - CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self._get_table_name(VectorDBMixin.COLUMN_NAME)} USING GIN (collection_ids); - CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(VectorDBMixin.COLUMN_NAME)} USING GIN (to_tsvector('english', text)); + CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self._get_table_name(self.project_name)} (document_id); + CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self._get_table_name(self.project_name)} (user_id); + CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self._get_table_name(self.project_name)} USING GIN (collection_ids); + CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(self.project_name)} USING GIN (to_tsvector('english', text)); """ await self.execute_query(query) async def upsert(self, entry: VectorEntry) -> None: query = f""" - INSERT INTO {self._get_table_name(VectorDBMixin.COLUMN_NAME)} + INSERT INTO {self._get_table_name(self.project_name)} (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (extraction_id) DO UPDATE SET @@ -100,7 +100,7 @@ async def upsert(self, entry: VectorEntry) -> None: async def upsert_entries(self, entries: list[VectorEntry]) -> None: query = f""" - INSERT INTO {self._get_table_name(VectorDBMixin.COLUMN_NAME)} + INSERT INTO {self._get_table_name(self.project_name)} (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (extraction_id) DO UPDATE SET @@ -125,6 +125,44 @@ async def upsert_entries(self, entries: list[VectorEntry]) -> None: ] await self.execute_many(query, params) + # async def semantic_search( + # self, query_vector: list[float], search_settings: VectorSearchSettings + # ) -> list[VectorSearchResult]: + # try: + # imeasure_obj = IndexMeasure(search_settings.index_measure) + # except ValueError: + # raise ValueError("Invalid index measure") + + # distance_func = self._get_distance_function(imeasure_obj) + + # cols = [ + # f"{self._get_table_name(self.project_name)}.extraction_id", + # f"{self._get_table_name(self.project_name)}.document_id", + # f"{self._get_table_name(self.project_name)}.user_id", + # f"{self._get_table_name(self.project_name)}.collection_ids", + # f"{self._get_table_name(self.project_name)}.text", + # ] + + # if search_settings.include_values: + # cols.append(f"{self._get_table_name(self.project_name)}.vec {distance_func} :vec AS distance") + + # if search_settings.include_metadatas: + # cols.append(f"{self._get_table_name(self.project_name)}.metadata") + + # select_clause = ", ".join(cols) + + # where_clause = "TRUE" + # if search_settings.filters: + # where_clause = self._build_filters(search_settings.filters) + + # query = f""" + # SELECT {select_clause} + # FROM {self._get_table_name(self.project_name)} + # WHERE {where_clause} + # ORDER BY {self._get_table_name(self.project_name)}.vec {distance_func} :vec + # OFFSET :offset + # LIMIT :limit + # """ async def semantic_search( self, query_vector: list[float], search_settings: VectorSearchSettings ) -> list[VectorSearchResult]: @@ -136,62 +174,51 @@ async def semantic_search( distance_func = self._get_distance_function(imeasure_obj) cols = [ - "extraction_id", - "document_id", - "user_id", - "collection_ids", - "text", + f"{self._get_table_name(self.project_name)}.extraction_id", + f"{self._get_table_name(self.project_name)}.document_id", + f"{self._get_table_name(self.project_name)}.user_id", + f"{self._get_table_name(self.project_name)}.collection_ids", + f"{self._get_table_name(self.project_name)}.text", ] if search_settings.include_values: - cols.append(f"{distance_func}(vec, $1::vector) as distance") + cols.append(f"{self._get_table_name(self.project_name)}.vec {distance_func} $1 AS distance") if search_settings.include_metadatas: - cols.append("metadata") + cols.append(f"{self._get_table_name(self.project_name)}.metadata") select_clause = ", ".join(cols) - where_clause = "TRUE" + where_clause = "" if search_settings.filters: - where_clause = self._build_filters(search_settings.filters) - -# query = f""" -# SELECT {select_clause} -# FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME) -# } -# WHERE {where_clause} -# ORDER BY {distance_func}(vec, $1::vector) -# OFFSET $2 -# LIMIT $3 -# """ - vector_str = f"ARRAY[{','.join(map(str, query_vector))}]::vector" + where_clause = f"WHERE {self._build_filters(search_settings.filters)}" query = f""" - SELECT {select_clause} - FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} - WHERE {where_clause} - ORDER BY {distance_func}(vec, {vector_str}) - OFFSET $1 - LIMIT $2 - """ - params = [ - query_vector, - search_settings.offset, - search_settings.search_limit, - ] - print('query = ', query) - print('params = ', params) + SELECT {select_clause} + FROM {self._get_table_name(self.project_name)} + {where_clause} + ORDER BY {self._get_table_name(self.project_name)}.vec {distance_func} $1 + LIMIT $2 + OFFSET $3 + """ + + # params = { + # "vec_1": query_vector, + # "param_1": search_settings.search_limit, + # "param_2": search_settings.offset, + # } + print('raw query = ', query) # Set index-specific session parameters - await self.execute_query( - "SET LOCAL ivfflat.probes = $1", [search_settings.probes] - ) - await self.execute_query( - "SET LOCAL hnsw.ef_search = $1", - [max(search_settings.ef_search, search_settings.search_limit)], - ) + # await self.execute_query( + # "SET LOCAL ivfflat.probes = :probes", {"probes": search_settings.probes} + # ) + # await self.execute_query( + # "SET LOCAL hnsw.ef_search = :ef_search", + # {"ef_search": max(search_settings.ef_search, search_settings.search_limit)}, + # ) - results = await self.fetch_query(query, params) + results = await self.fetch_query(query, (str(query_vector), search_settings.search_limit, search_settings.offset)) # , params) return [ VectorSearchResult( @@ -200,12 +227,93 @@ async def semantic_search( user_id=result["user_id"], collection_ids=result["collection_ids"], text=result["text"], - score=float(result["rank"]), - metadata=result["metadata"], + score=(1-float(result["distance"])) if search_settings.include_values else None, + metadata=json.loads(result["metadata"]) if search_settings.include_metadatas else None, ) for result in results ] +# async def semantic_search( +# self, query_vector: list[float], search_settings: VectorSearchSettings +# ) -> list[VectorSearchResult]: +# try: +# imeasure_obj = IndexMeasure(search_settings.index_measure) +# except ValueError: +# raise ValueError("Invalid index measure") + +# distance_func = self._get_distance_function(imeasure_obj) + +# cols = [ +# "extraction_id", +# "document_id", +# "user_id", +# "collection_ids", +# "text", +# ] + +# if search_settings.include_values: +# cols.append(f"{distance_func}(vec, $1::vector) as distance") + +# if search_settings.include_metadatas: +# cols.append("metadata") + +# select_clause = ", ".join(cols) + +# where_clause = "TRUE" +# if search_settings.filters: +# where_clause = self._build_filters(search_settings.filters) + +# # query = f""" +# # SELECT {select_clause} +# # FROM {self._get_table_name(self.project_name) +# # } +# # WHERE {where_clause} +# # ORDER BY {distance_func}(vec, $1::vector) +# # OFFSET $2 +# # LIMIT $3 +# # """ +# vector_str = f"ARRAY[{','.join(map(str, query_vector))}]::vector" + +# query = f""" +# SELECT {select_clause} +# FROM {self._get_table_name(self.project_name)} +# WHERE {where_clause} +# ORDER BY {distance_func}(vec, {vector_str}) +# OFFSET $1 +# LIMIT $2 +# """ +# params = [ +# query_vector, +# search_settings.offset, +# search_settings.search_limit, +# ] +# print('query = ', query) +# print('params = ', params) + +# # Set index-specific session parameters +# await self.execute_query( +# "SET LOCAL ivfflat.probes = $1", [search_settings.probes] +# ) +# await self.execute_query( +# "SET LOCAL hnsw.ef_search = $1", +# [max(search_settings.ef_search, search_settings.search_limit)], +# ) + +# results = await self.fetch_query(query, params) + +# return [ +# VectorSearchResult( +# extraction_id=result["extraction_id"], +# document_id=result["document_id"], +# user_id=result["user_id"], +# collection_ids=result["collection_ids"], +# text=result["text"], +# score=float(result["rank"]), +# metadata=result["metadata"], +# ) +# for result in results +# ] + async def full_text_search( self, query_text: str, search_settings: VectorSearchSettings ) -> list[VectorSearchResult]: @@ -213,7 +321,7 @@ async def full_text_search( SELECT extraction_id, document_id, user_id, collection_ids, text, ts_rank_cd(to_tsvector('english', text), plainto_tsquery('english', $1)) as rank, metadata - FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} + FROM {self._get_table_name(self.project_name)} WHERE collection_ids && $2 AND to_tsvector('english', text) @@ plainto_tsquery('english', $1) ORDER BY rank DESC LIMIT $3 OFFSET $4; @@ -362,7 +470,7 @@ async def delete( where_clause = " AND ".join(conditions) query = f""" - DELETE FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} + DELETE FROM {self._get_table_name(self.project_name)} WHERE {where_clause} RETURNING extraction_id; """ @@ -376,7 +484,7 @@ async def assign_document_to_collection_vector( self, document_id: str, collection_id: str ) -> None: query = f""" - UPDATE {self._get_table_name(VectorDBMixin.COLUMN_NAME)} + UPDATE {self._get_table_name(self.project_name)} SET collection_ids = array_append(collection_ids, $1) WHERE document_id = $2 AND NOT ($1 = ANY(collection_ids)); """ @@ -386,7 +494,7 @@ async def remove_document_from_collection_vector( self, document_id: str, collection_id: str ) -> None: query = f""" - UPDATE {self._get_table_name(VectorDBMixin.COLUMN_NAME)} + UPDATE {self._get_table_name(self.project_name)} SET collection_ids = array_remove(collection_ids, $1) WHERE document_id = $2; """ @@ -394,14 +502,14 @@ async def remove_document_from_collection_vector( async def delete_user_vector(self, user_id: str) -> None: query = f""" - DELETE FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} + DELETE FROM {self._get_table_name(self.project_name)} WHERE user_id = $1; """ await self.execute_query(query, (user_id,)) async def delete_collection_vector(self, collection_id: str) -> None: query = f""" - DELETE FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} + DELETE FROM {self._get_table_name(self.project_name)} WHERE $1 = ANY(collection_ids); """ await self.execute_query(query, (collection_id,)) @@ -417,9 +525,8 @@ async def get_document_chunks( limit_clause = f"LIMIT {limit}" if limit > -1 else "" query = f""" - SELECT extraction_id, document_id, user_id, collection_ids, text, metadata, COUNT(*) OVER() AS total - {vector_select} - FROM {self._get_table_name(VectorDBMixin.COLUMN_NAME)} + SELECT extraction_id, document_id, user_id, collection_ids, text, metadata{vector_select}, COUNT(*) OVER() AS total + FROM {self._get_table_name(self.project_name)} WHERE document_id = $1 OFFSET $2 {limit_clause}; @@ -743,7 +850,7 @@ def _get_index_operator(self, measure: IndexMeasure) -> str: def _get_distance_function(self, imeasure_obj: IndexMeasure) -> str: if imeasure_obj == IndexMeasure.cosine_distance: - return "cosine_distance" + return "<=>" elif imeasure_obj == IndexMeasure.l2_distance: return "l2_distance" elif imeasure_obj == IndexMeasure.max_inner_product: From 0eb5b96ec202d516fca51b79f89e9a6007ac1ef7 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Fri, 18 Oct 2024 14:48:22 -0700 Subject: [PATCH 17/27] working full text search --- py/core/base/providers/database.py | 1 + py/core/main/api/management_router.py | 1 - .../hatchet/ingestion_workflow.py | 8 +- .../simple/ingestion_workflow.py | 8 +- py/core/main/services/auth_service.py | 10 +- py/core/main/services/ingestion_service.py | 4 +- py/core/main/services/management_service.py | 18 +- py/core/pipes/kg/triples_extraction.py | 4 +- py/core/pipes/retrieval/vector_search_pipe.py | 26 +- py/core/providers/auth/r2r_auth.py | 24 +- py/core/providers/database/postgres.py | 19 +- py/core/providers/database/vector.py | 355 ++++++++++-------- py/r2r.toml | 1 + 13 files changed, 243 insertions(+), 236 deletions(-) diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index 3b2504e36..641f55a4e 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -53,6 +53,7 @@ class DatabaseConfig(ProviderConfig): ] = None default_collection_name: str = "Default" default_collection_description: str = "Your default collection." + enable_fts: bool = False def __post_init__(self): self.validate_config() diff --git a/py/core/main/api/management_router.py b/py/core/main/api/management_router.py index 3bb4d13d5..387fe48a2 100644 --- a/py/core/main/api/management_router.py +++ b/py/core/main/api/management_router.py @@ -414,7 +414,6 @@ async def document_chunks_app( "total_entries": document_chunks["total_entries"] } - @self.router.get("/collections_overview") @self.base_endpoint async def collections_overview_app( diff --git a/py/core/main/orchestration/hatchet/ingestion_workflow.py b/py/core/main/orchestration/hatchet/ingestion_workflow.py index 8f1a84e0c..db9dd47e8 100644 --- a/py/core/main/orchestration/hatchet/ingestion_workflow.py +++ b/py/core/main/orchestration/hatchet/ingestion_workflow.py @@ -151,7 +151,9 @@ async def parse(self, context: Context) -> dict: ) # TODO: Move logic onto the `management service` - collection_id=generate_default_user_collection_id(str(document_info.user_id)) + collection_id = generate_default_user_collection_id( + str(document_info.user_id) + ) await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, collection_id=collection_id, @@ -400,7 +402,9 @@ async def finalize(self, context: Context) -> dict: try: # TODO - Move logic onto the `management service` - collection_id = generate_default_user_collection_id(document_info.user_id) + collection_id = generate_default_user_collection_id( + document_info.user_id + ) await self.ingestion_service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, collection_id=collection_id, diff --git a/py/core/main/orchestration/simple/ingestion_workflow.py b/py/core/main/orchestration/simple/ingestion_workflow.py index 87766b8ff..4ddf5522d 100644 --- a/py/core/main/orchestration/simple/ingestion_workflow.py +++ b/py/core/main/orchestration/simple/ingestion_workflow.py @@ -68,7 +68,9 @@ async def ingest_files(input_data): try: # TODO - Move logic onto management service - collection_id=generate_default_user_collection_id(str(document_info.user_id)) + collection_id = generate_default_user_collection_id( + str(document_info.user_id) + ) await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, collection_id=collection_id, @@ -228,7 +230,9 @@ async def ingest_chunks(input_data): try: # TODO - Move logic onto management service - collection_id=generate_default_user_collection_id(str(document_info.user_id)) + collection_id = generate_default_user_collection_id( + str(document_info.user_id) + ) await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, collection_id=collection_id, diff --git a/py/core/main/services/auth_service.py b/py/core/main/services/auth_service.py index f085200a9..80f4aa333 100644 --- a/py/core/main/services/auth_service.py +++ b/py/core/main/services/auth_service.py @@ -45,8 +45,10 @@ async def verify_email( status_code=400, message="Email verification is not required" ) - user_id = await self.providers.database.get_user_id_by_verification_code( - verification_code + user_id = ( + await self.providers.database.get_user_id_by_verification_code( + verification_code + ) ) if not user_id: raise R2RException( @@ -123,8 +125,8 @@ async def update_user( bio: Optional[str] = None, profile_picture: Optional[str] = None, ) -> UserResponse: - user: UserResponse = ( - await self.providers.database.get_user_by_id(str(user_id)) + user: UserResponse = await self.providers.database.get_user_by_id( + str(user_id) ) if not user: raise R2RException(status_code=404, message="User not found") diff --git a/py/core/main/services/ingestion_service.py b/py/core/main/services/ingestion_service.py index 3e81519b5..4a87b791d 100644 --- a/py/core/main/services/ingestion_service.py +++ b/py/core/main/services/ingestion_service.py @@ -339,9 +339,7 @@ async def ingest_chunks_ingress( message=f"Document {document_id} was already ingested and is not in a failed state.", ) - await self.providers.database.upsert_documents_overview( - document_info - ) + await self.providers.database.upsert_documents_overview(document_info) return document_info diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index 18b4a1c4c..1b4ba1825 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -267,9 +267,7 @@ def validate_filters(filters: dict[str, Any]) -> None: logger.info(f"Deleting entries with filters: {filters}") try: - vector_delete_results = self.providers.database.delete( - filters - ) + vector_delete_results = self.providers.database.delete(filters) except Exception as e: logger.error(f"Error deleting from vector database: {e}") vector_delete_results = {} @@ -508,9 +506,7 @@ async def create_collection( @telemetry_event("GetCollection") async def get_collection(self, collection_id: UUID) -> Optional[dict]: - return await self.providers.database.get_collection( - collection_id - ) + return await self.providers.database.get_collection(collection_id) @telemetry_event("UpdateCollection") async def update_collection( @@ -528,9 +524,7 @@ async def delete_collection(self, collection_id: UUID) -> bool: await self.providers.database.delete_collection_relational( collection_id ) - await self.providers.database.delete_collection_vector( - collection_id - ) + await self.providers.database.delete_collection_vector(collection_id) return True @telemetry_event("ListCollections") @@ -553,10 +547,8 @@ async def add_user_to_collection( async def remove_user_from_collection( self, user_id: UUID, collection_id: UUID ) -> bool: - return ( - await self.providers.database.remove_user_from_collection( - user_id, collection_id - ) + return await self.providers.database.remove_user_from_collection( + user_id, collection_id ) @telemetry_event("GetUsersInCollection") diff --git a/py/core/pipes/kg/triples_extraction.py b/py/core/pipes/kg/triples_extraction.py index 0fa3fb594..08784f75e 100644 --- a/py/core/pipes/kg/triples_extraction.py +++ b/py/core/pipes/kg/triples_extraction.py @@ -260,9 +260,7 @@ async def _run_logic( # type: ignore ) for extraction in await self.database_provider.get_document_chunks( document_id=document_id - )[ - "results" - ] + )["results"] ] logger.info( diff --git a/py/core/pipes/retrieval/vector_search_pipe.py b/py/core/pipes/retrieval/vector_search_pipe.py index 04d772940..dbeccebdd 100644 --- a/py/core/pipes/retrieval/vector_search_pipe.py +++ b/py/core/pipes/retrieval/vector_search_pipe.py @@ -63,22 +63,18 @@ async def search( # type: ignore purpose=EmbeddingPurpose.QUERY, ) - search_results = await self.database_provider.semantic_search( - query_vector=query_vector, - search_settings=search_settings, + search_results = await ( + self.database_provider.hybrid_search( + query_vector=query_vector, + query_text=message, + search_settings=search_settings, + ) + if search_settings.use_hybrid_search + else self.database_provider.semantic_search( + query_vector=query_vector, + search_settings=search_settings, + ) ) - # search_results = await ( - # self.database_provider.hybrid_search( - # query_vector=query_vector, - # query_text=message, - # search_settings=search_settings, - # ) - # if search_settings.use_hybrid_search - # else self.database_provider.semantic_search( - # query_vector=query_vector, - # search_settings=search_settings, - # ) - # ) reranked_results = self.embedding_provider.rerank( query=message, results=search_results, diff --git a/py/core/providers/auth/r2r_auth.py b/py/core/providers/auth/r2r_auth.py index ed0c477c6..3f6208718 100644 --- a/py/core/providers/auth/r2r_auth.py +++ b/py/core/providers/auth/r2r_auth.py @@ -114,9 +114,7 @@ async def decode_token(self, token: str) -> TokenData: async def user(self, token: str = Depends(oauth2_scheme)) -> UserResponse: token_data = await self.decode_token(token) - user = await self.db_provider.get_user_by_email( - token_data.email - ) + user = await self.db_provider.get_user_by_email(token_data.email) if user is None: raise R2RException( status_code=401, message="Invalid authentication credentials" @@ -133,10 +131,8 @@ def get_current_active_user( async def register(self, email: str, password: str) -> Dict[str, str]: # Create new user and give them a default collection new_user = await self.db_provider.create_user(email, password) - default_collection = ( - await self.db_provider.create_default_collection( - new_user.id, - ) + default_collection = await self.db_provider.create_default_collection( + new_user.id, ) await self.db_provider.add_user_to_collection( @@ -168,19 +164,15 @@ async def register(self, email: str, password: str) -> Dict[str, str]: async def verify_email( self, email: str, verification_code: str ) -> dict[str, str]: - user_id = ( - await self.db_provider.get_user_id_by_verification_code( - verification_code - ) + user_id = await self.db_provider.get_user_id_by_verification_code( + verification_code ) if not user_id: raise R2RException( status_code=400, message="Invalid or expired verification code" ) await self.db_provider.mark_user_as_verified(user_id) - await self.db_provider.remove_verification_code( - verification_code - ) + await self.db_provider.remove_verification_code(verification_code) return {"message": "Email verified successfully"} async def login(self, email: str, password: str) -> Dict[str, Token]: @@ -292,9 +284,7 @@ async def request_password_reset(self, email: str) -> Dict[str, str]: reset_token = self.crypto_provider.generate_verification_code() expiry = datetime.now(timezone.utc) + timedelta(hours=1) - await self.db_provider.store_reset_token( - user.id, reset_token, expiry - ) + await self.db_provider.store_reset_token(user.id, reset_token, expiry) # TODO: Integrate with email provider to send reset link # self.email_provider.send_reset_email(email, reset_token) diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index 292893f78..8c3884ef8 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -11,11 +11,6 @@ PostgresConfigurationSettings, VectorQuantizationType, ) - -from .base import SemaphoreConnectionPool, DatabaseMixin - - - from core.providers.database.collection import CollectionMixin from core.providers.database.document import DocumentMixin from core.providers.database.tokens import BlacklistedTokensMixin @@ -23,9 +18,11 @@ from core.providers.database.vector import VectorDBMixin from shared.abstractions.vector import VectorQuantizationType +from .base import DatabaseMixin, SemaphoreConnectionPool logger = logging.getLogger() + def get_env_var(new_var, old_var, config_value): value = config_value or os.getenv(new_var) or os.getenv(old_var) if os.getenv(old_var) and not os.getenv(new_var): @@ -42,8 +39,7 @@ class PostgresDBProvider( BlacklistedTokensMixin, UserMixin, VectorDBMixin, - - ): +): user: str password: str host: str @@ -112,7 +108,7 @@ def __init__( logger.info("Connecting to Postgres via TCP/IP") self.dimension = dimension - self.vector_db_quantization_type = quantization_type + self.quantization_type = quantization_type self.conn = None self.config: DatabaseConfig = config self.crypto_provider = crypto_provider @@ -123,6 +119,7 @@ def __init__( self.default_collection_description = ( config.default_collection_description ) + self.enable_fts = config.enable_fts self.pool: Optional[SemaphoreConnectionPool] = None @@ -224,9 +221,9 @@ async def execute_many(self, query, params=None, batch_size=1000): async def fetch_query(self, query, params=None): async with self.pool.get_connection() as conn: - print('query', query) - print('params', params) - + print("query", query) + print("params", params) + async with conn.transaction(): return ( await conn.fetch(query, *params) diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index fd1b35f8b..11abd4aae 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -5,7 +5,7 @@ import uuid from typing import Any, Optional, Tuple, TypedDict, Union -from sqlalchemy import text +from sqlalchemy import func, text from core.base import VectorEntry, VectorQuantizationType, VectorSearchResult from core.base.abstractions import VectorSearchSettings @@ -48,9 +48,6 @@ class VectorDBMixin(DatabaseMixin): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.project_name = kwargs.get("project_name") - self.dimension = kwargs.get("dimension") - self.quantization_type = kwargs.get("quantization_type") async def create_table(self): # TODO - Move ids to `UUID` type @@ -64,12 +61,18 @@ async def create_table(self): vec vector({self.dimension}), text TEXT, metadata JSONB + {",fts tsvector GENERATED ALWAYS AS (to_tsvector('english', text)) STORED" if self.enable_fts else ""} ); CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self._get_table_name(self.project_name)} (document_id); CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self._get_table_name(self.project_name)} (user_id); CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self._get_table_name(self.project_name)} USING GIN (collection_ids); CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(self.project_name)} USING GIN (to_tsvector('english', text)); """ + if self.enable_fts: + query += f""" + CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(self.project_name)} USING GIN (to_tsvector('english', text)); + """ + await self.execute_query(query) async def upsert(self, entry: VectorEntry) -> None: @@ -125,44 +128,61 @@ async def upsert_entries(self, entries: list[VectorEntry]) -> None: ] await self.execute_many(query, params) - # async def semantic_search( - # self, query_vector: list[float], search_settings: VectorSearchSettings - # ) -> list[VectorSearchResult]: - # try: - # imeasure_obj = IndexMeasure(search_settings.index_measure) - # except ValueError: - # raise ValueError("Invalid index measure") - - # distance_func = self._get_distance_function(imeasure_obj) - - # cols = [ - # f"{self._get_table_name(self.project_name)}.extraction_id", - # f"{self._get_table_name(self.project_name)}.document_id", - # f"{self._get_table_name(self.project_name)}.user_id", - # f"{self._get_table_name(self.project_name)}.collection_ids", - # f"{self._get_table_name(self.project_name)}.text", - # ] - - # if search_settings.include_values: - # cols.append(f"{self._get_table_name(self.project_name)}.vec {distance_func} :vec AS distance") - - # if search_settings.include_metadatas: - # cols.append(f"{self._get_table_name(self.project_name)}.metadata") - - # select_clause = ", ".join(cols) - - # where_clause = "TRUE" - # if search_settings.filters: - # where_clause = self._build_filters(search_settings.filters) - + # async def upsert(self, entry: VectorEntry) -> None: # query = f""" - # SELECT {select_clause} - # FROM {self._get_table_name(self.project_name)} - # WHERE {where_clause} - # ORDER BY {self._get_table_name(self.project_name)}.vec {distance_func} :vec - # OFFSET :offset - # LIMIT :limit + # INSERT INTO {self._get_table_name(self.project_name)} + # {"(extraction_id, document_id, user_id, collection_ids, vec, text, metadata)" if not self.enable_fts else "(extraction_id, document_id, user_id, collection_ids, vec, text, metadata, fts)"} + # VALUES {"($1, $2, $3, $4, $5, $6, $7)" if not self.enable_fts else "($1, $2, $3, $4, $5, $6, $7, $8)"} + # ON CONFLICT (extraction_id) DO UPDATE SET + # document_id = EXCLUDED.document_id, + # user_id = EXCLUDED.user_id, + # collection_ids = EXCLUDED.collection_ids, + # vec = EXCLUDED.vec, + # text = EXCLUDED.text, + # metadata = EXCLUDED.metadata; # """ + # await self.execute_query( + # query, + # ( + # entry.extraction_id, + # entry.document_id, + # entry.user_id, + # entry.collection_ids, + # str(entry.vector.data), + # entry.text, + # json.dumps(entry.metadata), + # func.ts_vector(entry.text) if self.enable_fts else None, + # ), + # ) + + # async def upsert_entries(self, entries: list[VectorEntry]) -> None: + # query = f""" + # INSERT INTO {self._get_table_name(self.project_name)} + # {"(extraction_id, document_id, user_id, collection_ids, vec, text, metadata)" if not self.enable_fts else "(extraction_id, document_id, user_id, collection_ids, vec, text, metadata, fts)"} + # VALUES {"($1, $2, $3, $4, $5, $6, $7)" if not self.enable_fts else "($1, $2, $3, $4, $5, $6, $7, $8)"} + # ON CONFLICT (extraction_id) DO UPDATE SET + # document_id = EXCLUDED.document_id, + # user_id = EXCLUDED.user_id, + # collection_ids = EXCLUDED.collection_ids, + # vec = EXCLUDED.vec, + # text = EXCLUDED.text, + # metadata = EXCLUDED.metadata; + # """ + # params = [ + # ( + # entry.extraction_id, + # entry.document_id, + # entry.user_id, + # entry.collection_ids, + # str(entry.vector.data), + # entry.text, + # json.dumps(entry.metadata), + # func.ts_vector(entry.text) if self.enable_fts else None, + # ) + # for entry in entries + # ] + # await self.execute_many(query, params) + async def semantic_search( self, query_vector: list[float], search_settings: VectorSearchSettings ) -> list[VectorSearchResult]: @@ -182,7 +202,9 @@ async def semantic_search( ] if search_settings.include_values: - cols.append(f"{self._get_table_name(self.project_name)}.vec {distance_func} $1 AS distance") + cols.append( + f"{self._get_table_name(self.project_name)}.vec {distance_func} $1 AS distance" + ) if search_settings.include_metadatas: cols.append(f"{self._get_table_name(self.project_name)}.metadata") @@ -191,7 +213,9 @@ async def semantic_search( where_clause = "" if search_settings.filters: - where_clause = f"WHERE {self._build_filters(search_settings.filters)}" + where_clause = ( + f"WHERE {self._build_filters(search_settings.filters)}" + ) query = f""" SELECT {select_clause} @@ -202,23 +226,14 @@ async def semantic_search( OFFSET $3 """ - # params = { - # "vec_1": query_vector, - # "param_1": search_settings.search_limit, - # "param_2": search_settings.offset, - # } - print('raw query = ', query) - - # Set index-specific session parameters - # await self.execute_query( - # "SET LOCAL ivfflat.probes = :probes", {"probes": search_settings.probes} - # ) - # await self.execute_query( - # "SET LOCAL hnsw.ef_search = :ef_search", - # {"ef_search": max(search_settings.ef_search, search_settings.search_limit)}, - # ) - - results = await self.fetch_query(query, (str(query_vector), search_settings.search_limit, search_settings.offset)) # , params) + results = await self.fetch_query( + query, + ( + str(query_vector), + search_settings.search_limit, + search_settings.offset, + ), + ) # , params) return [ VectorSearchResult( @@ -227,128 +242,138 @@ async def semantic_search( user_id=result["user_id"], collection_ids=result["collection_ids"], text=result["text"], - score=(1-float(result["distance"])) if search_settings.include_values else None, - metadata=json.loads(result["metadata"]) if search_settings.include_metadatas else None, + score=( + (1 - float(result["distance"])) + if search_settings.include_values + else None + ), + metadata=( + json.loads(result["metadata"]) + if search_settings.include_metadatas + else None + ), ) for result in results ] -# async def semantic_search( -# self, query_vector: list[float], search_settings: VectorSearchSettings -# ) -> list[VectorSearchResult]: -# try: -# imeasure_obj = IndexMeasure(search_settings.index_measure) -# except ValueError: -# raise ValueError("Invalid index measure") - -# distance_func = self._get_distance_function(imeasure_obj) - -# cols = [ -# "extraction_id", -# "document_id", -# "user_id", -# "collection_ids", -# "text", -# ] - -# if search_settings.include_values: -# cols.append(f"{distance_func}(vec, $1::vector) as distance") - -# if search_settings.include_metadatas: -# cols.append("metadata") - -# select_clause = ", ".join(cols) - -# where_clause = "TRUE" -# if search_settings.filters: -# where_clause = self._build_filters(search_settings.filters) - -# # query = f""" -# # SELECT {select_clause} -# # FROM {self._get_table_name(self.project_name) -# # } -# # WHERE {where_clause} -# # ORDER BY {distance_func}(vec, $1::vector) -# # OFFSET $2 -# # LIMIT $3 -# # """ -# vector_str = f"ARRAY[{','.join(map(str, query_vector))}]::vector" - -# query = f""" -# SELECT {select_clause} -# FROM {self._get_table_name(self.project_name)} -# WHERE {where_clause} -# ORDER BY {distance_func}(vec, {vector_str}) -# OFFSET $1 -# LIMIT $2 -# """ -# params = [ -# query_vector, -# search_settings.offset, -# search_settings.search_limit, -# ] -# print('query = ', query) -# print('params = ', params) - -# # Set index-specific session parameters -# await self.execute_query( -# "SET LOCAL ivfflat.probes = $1", [search_settings.probes] -# ) -# await self.execute_query( -# "SET LOCAL hnsw.ef_search = $1", -# [max(search_settings.ef_search, search_settings.search_limit)], -# ) - -# results = await self.fetch_query(query, params) - -# return [ -# VectorSearchResult( -# extraction_id=result["extraction_id"], -# document_id=result["document_id"], -# user_id=result["user_id"], -# collection_ids=result["collection_ids"], -# text=result["text"], -# score=float(result["rank"]), -# metadata=result["metadata"], -# ) -# for result in results -# ] - async def full_text_search( self, query_text: str, search_settings: VectorSearchSettings ) -> list[VectorSearchResult]: + if not self.enable_fts: + raise ValueError( + "Full-text search is not enabled for this collection." + ) + query = f""" - SELECT extraction_id, document_id, user_id, collection_ids, text, - ts_rank_cd(to_tsvector('english', text), plainto_tsquery('english', $1)) as rank, - metadata - FROM {self._get_table_name(self.project_name)} - WHERE collection_ids && $2 AND to_tsvector('english', text) @@ plainto_tsquery('english', $1) - ORDER BY rank DESC - LIMIT $3 OFFSET $4; + SELECT + extraction_id, document_id, user_id, collection_ids, text, metadata, + ts_rank(fts, websearch_to_tsquery('english', $1), 32) as rank + FROM {self._get_table_name(self.project_name)} + WHERE fts @@ websearch_to_tsquery('english', $1) """ - results = await self.fetch_query( - query, - ( - query_text, - search_settings.selected_collection_ids, - search_settings.search_limit, + # AND collection_ids && $2 + + # if search_settings.filters: + # filter_clause, filter_params = self._build_filters(search_settings.filters) + # query += f" AND {filter_clause}" + # params = [query_text, search_settings.selected_collection_ids] + filter_params + # else: + params = [query_text] # , search_settings.selected_collection_ids] + + query += """ + ORDER BY rank DESC + OFFSET $2 LIMIT $3 + """ + params.extend( + [ search_settings.offset, - ), + search_settings.hybrid_search_settings.full_text_limit, + ] ) + results = await self.fetch_query(query, params) return [ VectorSearchResult( - extraction_id=result["extraction_id"], - document_id=result["document_id"], - user_id=result["user_id"], - collection_ids=result["collection_ids"], - text=result["text"], - score=float(result["rank"]), - metadata=result["metadata"], + extraction_id=str(r["extraction_id"]), + document_id=str(r["document_id"]), + user_id=str(r["user_id"]), + collection_ids=r["collection_ids"], + text=r["text"], + score=float(r["rank"]), + metadata=json.loads(r["metadata"]), ) - for result in results + for r in results ] + # async def full_text_search( + # self, query_text: str, search_settings: VectorSearchSettings + # ) -> list[VectorSearchResult]: + # if not self.enable_fts: + # raise ValueError("Full-text search is not enabled for this collection.") + # query = f""" + # SELECT extraction_id, document_id, user_id, collection_ids, text, + # ts_rank_cd(to_tsvector('english', text), plainto_tsquery('english', $1)) as rank, + # metadata + # FROM {self._get_table_name(self.project_name)} + # WHERE collection_ids && $2 AND to_tsvector('english', text) @@ plainto_tsquery('english', $1) + # ORDER BY rank DESC + # LIMIT $3 OFFSET $4; + # """ + # results = await self.fetch_query( + # query, + # ( + # query_text, + # search_settings.selected_collection_ids, + # search_settings.search_limit, + # search_settings.offset, + # ), + # ) + + # return [ + # VectorSearchResult( + # extraction_id=result["extraction_id"], + # document_id=result["document_id"], + # user_id=result["user_id"], + # collection_ids=result["collection_ids"], + # text=result["text"], + # score=float(result["rank"]), + # metadata=result["metadata"], + # ) + # for result in results + # ] + + # query = f""" + # SELECT extraction_id, document_id, user_id, collection_ids, text, + # ts_rank_cd(to_tsvector('english', text), plainto_tsquery('english', $1)) as rank, + # metadata + # FROM {self._get_table_name(self.project_name)} + # WHERE collection_ids && $2 AND to_tsvector('english', text) @@ plainto_tsquery('english', $1) + # ORDER BY rank DESC + # LIMIT $3 OFFSET $4; + # """ + # results = await self.fetch_query( + # query, + # ( + # query_text, + # search_settings.selected_collection_ids, + # search_settings.search_limit, + # search_settings.offset, + # ), + # ) + + # return [ + # VectorSearchResult( + # extraction_id=result["extraction_id"], + # document_id=result["document_id"], + # user_id=result["user_id"], + # collection_ids=result["collection_ids"], + # text=result["text"], + # score=float(result["rank"]), + # metadata=result["metadata"], + # ) + # for result in results + # ] + async def hybrid_search( self, query_text: str, @@ -531,7 +556,7 @@ async def get_document_chunks( OFFSET $2 {limit_clause}; """ - + params = [document_id, offset] results = await self.fetch_query(query, params) diff --git a/py/r2r.toml b/py/r2r.toml index 55e31642b..cad4c0c51 100644 --- a/py/r2r.toml +++ b/py/r2r.toml @@ -35,6 +35,7 @@ provider = "bcrypt" provider = "postgres" default_collection_name = "Default" default_collection_description = "Your default collection." +enable_fts = true # whether or not to enable full-text search, e.g `hybrid search` [embedding] provider = "litellm" From ab0558b666849e28890870d8ae98655299eee5d2 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Fri, 18 Oct 2024 14:54:06 -0700 Subject: [PATCH 18/27] remove asyncpg --- py/core/providers/database/base.py | 10 ++++----- py/core/providers/database/document.py | 29 -------------------------- py/core/providers/database/vector.py | 2 -- 3 files changed, 4 insertions(+), 37 deletions(-) diff --git a/py/core/providers/database/base.py b/py/core/providers/database/base.py index aa6ea6396..8289fecc7 100644 --- a/py/core/providers/database/base.py +++ b/py/core/providers/database/base.py @@ -1,11 +1,9 @@ import asyncio import logging from contextlib import asynccontextmanager -from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Union +from typing import Any, Optional, Sequence, Union import asyncpg -from sqlalchemy import TextClause, text - logger = logging.getLogger() @@ -108,7 +106,7 @@ def _get_table_name(self, base_name: str) -> str: def execute_query( self, - query: Union[str, TextClause], + query: str, params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, isolation_level: Optional[str] = None, ): @@ -119,14 +117,14 @@ async def execute_many(self, query, params=None, batch_size=1000): def fetch_query( self, - query: Union[str, TextClause], + query: str, params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, ): raise NotImplementedError("Subclasses must implement this method") def fetchrow_query( self, - query: Union[str, TextClause], + query: str, params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, ): raise NotImplementedError("Subclasses must implement this method") diff --git a/py/core/providers/database/document.py b/py/core/providers/database/document.py index 7bf917b65..c2f1c2395 100644 --- a/py/core/providers/database/document.py +++ b/py/core/providers/database/document.py @@ -5,17 +5,6 @@ from uuid import UUID import asyncpg -from sqlalchemy import ( - ARRAY, - JSON, - Column, - DateTime, - Integer, - MetaData, - String, - Table, -) -from sqlalchemy.dialects.postgresql import UUID as SqlUUID from core.base import ( DocumentInfo, @@ -34,24 +23,6 @@ class DocumentMixin(DatabaseMixin): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.metadata = MetaData() - self.document_info_table = Table( - self._get_table_name("document_info"), - self.metadata, - Column("document_id", SqlUUID, primary_key=True), - Column("collection_ids", ARRAY(SqlUUID)), - Column("user_id", SqlUUID), - Column("type", String), - Column("metadata", JSON), - Column("title", String), - Column("version", String), - Column("size_in_bytes", Integer), - Column("ingestion_status", String), - Column("kg_extraction_status", String), - Column("created_at", DateTime), - Column("updated_at", DateTime), - Column("ingestion_attempt_number", Integer, default=0), - ) async def create_table(self): query = f""" diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 11abd4aae..1edcd0223 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -5,8 +5,6 @@ import uuid from typing import Any, Optional, Tuple, TypedDict, Union -from sqlalchemy import func, text - from core.base import VectorEntry, VectorQuantizationType, VectorSearchResult from core.base.abstractions import VectorSearchSettings from shared.abstractions.vector import ( From 5ab27121404875170c9b4ffdeda53dbd5055716d Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Fri, 18 Oct 2024 16:44:27 -0700 Subject: [PATCH 19/27] passing vector tests --- .../scripts/upload_hf_textbooks_ex.py | 1 - py/core/providers/database/base.py | 7 + py/core/providers/database/collection.py | 23 +- py/core/providers/database/document.py | 16 +- py/core/providers/database/postgres.py | 3 - py/core/providers/database/tokens.py | 18 +- py/core/providers/database/user.py | 45 +-- py/core/providers/database/vector.py | 354 ++++++------------ py/tests/conftest.py | 11 +- .../pipes/test_kg_community_summary_pipe.py | 18 +- .../database/relational/test_collection_db.py | 106 +++--- .../database/relational/test_document_db.py | 56 +-- .../relational/test_relational_db_provider.py | 2 +- .../database/relational/test_user_db.py | 73 ++-- .../database/test_vector_db_provider.py | 78 ++-- 15 files changed, 334 insertions(+), 477 deletions(-) diff --git a/py/core/examples/scripts/upload_hf_textbooks_ex.py b/py/core/examples/scripts/upload_hf_textbooks_ex.py index 6ee9b724c..7f456036e 100644 --- a/py/core/examples/scripts/upload_hf_textbooks_ex.py +++ b/py/core/examples/scripts/upload_hf_textbooks_ex.py @@ -25,7 +25,6 @@ def remove_file(file_path): async def process_batch(client, batch): results = await client.ingest_files(batch) print(f"Submitted {len(results['results'])} files for processing") - print("results = ", results["results"]) # Remove the processed files for file_path in batch: remove_file(file_path) diff --git a/py/core/providers/database/base.py b/py/core/providers/database/base.py index 8289fecc7..fd87ef2f8 100644 --- a/py/core/providers/database/base.py +++ b/py/core/providers/database/base.py @@ -15,6 +15,10 @@ def __init__(self, connection_string, postgres_configuration_settings): async def initialize(self): try: + logger.info( + f"Connecting with {int(self.postgres_configuration_settings.max_connections * 0.9)} connections to `asyncpg.create_pool`." + ) + self.semaphore = asyncio.Semaphore( int(self.postgres_configuration_settings.max_connections * 0.9) ) @@ -38,6 +42,9 @@ async def get_connection(self): async with self.pool.acquire() as conn: yield conn + async def close(self): + await self.pool.close() + class QueryBuilder: def __init__(self, table_name: str): diff --git a/py/core/providers/database/collection.py b/py/core/providers/database/collection.py index 6c79b6173..c2a6dafed 100644 --- a/py/core/providers/database/collection.py +++ b/py/core/providers/database/collection.py @@ -18,9 +18,11 @@ class CollectionMixin(DatabaseMixin): + TABLE_NAME = "collections" + async def create_table(self) -> None: query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name('collections')} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name(CollectionMixin.TABLE_NAME)} ( collection_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), name TEXT NOT NULL, description TEXT, @@ -59,7 +61,7 @@ async def create_default_collection( async def collection_exists(self, collection_id: UUID) -> bool: """Check if a collection exists.""" query = f""" - SELECT 1 FROM {self._get_table_name('collections')} + SELECT 1 FROM {self._get_table_name(CollectionMixin.TABLE_NAME)} WHERE collection_id = $1 """ result = await self.fetchrow_query(query, [collection_id]) @@ -73,7 +75,7 @@ async def create_collection( ) -> CollectionResponse: current_time = datetime.utcnow() query = f""" - INSERT INTO {self._get_table_name('collections')} (collection_id, name, description, created_at, updated_at) + INSERT INTO {self._get_table_name(CollectionMixin.TABLE_NAME)} (collection_id, name, description, created_at, updated_at) VALUES ($1, $2, $3, $4, $5) RETURNING collection_id, name, description, created_at, updated_at """ @@ -114,7 +116,7 @@ async def get_collection(self, collection_id: UUID) -> CollectionResponse: query = f""" SELECT collection_id, name, description, created_at, updated_at - FROM {self._get_table_name('collections')} + FROM {self._get_table_name(CollectionMixin.TABLE_NAME)} WHERE collection_id = $1 """ result = await self.fetchrow_query(query, [collection_id]) @@ -157,7 +159,7 @@ async def update_collection( params.append(collection_id) query = f""" - UPDATE {self._get_table_name('collections')} + UPDATE {self._get_table_name(CollectionMixin.TABLE_NAME)} SET {', '.join(update_fields)} WHERE collection_id = ${len(params)} RETURNING collection_id, name, description, created_at, updated_at @@ -204,11 +206,12 @@ async def delete_collection_relational(self, collection_id: UUID) -> None: # Delete the collection delete_query = f""" - DELETE FROM {self._get_table_name('collections')} + DELETE FROM {self._get_table_name(CollectionMixin.TABLE_NAME)} WHERE collection_id = $1 RETURNING collection_id """ deleted = await conn.fetchrow(delete_query, collection_id) + print("deleted = ", deleted) if not deleted: raise R2RException( @@ -230,7 +233,7 @@ async def list_collections( """List collections with pagination.""" query = f""" SELECT collection_id, name, description, created_at, updated_at, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name('collections')} + FROM {self._get_table_name(CollectionMixin.TABLE_NAME)} ORDER BY name OFFSET $1 """ @@ -346,7 +349,7 @@ async def get_collections_overview( SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at, COUNT(DISTINCT u.user_id) AS user_count, COUNT(DISTINCT d.document_id) AS document_count - FROM {self._get_table_name('collections')} g + FROM {self._get_table_name(CollectionMixin.TABLE_NAME)} g LEFT JOIN {self._get_table_name('users')} u ON g.collection_id = ANY(u.collection_ids) LEFT JOIN {self._get_table_name('document_info')} d ON g.collection_id = ANY(d.collection_ids) {' WHERE g.collection_id = ANY($1)' if collection_ids else ''} @@ -397,7 +400,7 @@ async def get_collections_for_user( ) -> dict[str, Union[list[CollectionResponse], int]]: query = f""" SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name('collections')} g + FROM {self._get_table_name(CollectionMixin.TABLE_NAME)} g JOIN {self._get_table_name('users')} u ON g.collection_id = ANY(u.collection_ids) WHERE u.user_id = $1 ORDER BY g.name @@ -495,7 +498,7 @@ async def document_collections( ) -> dict[str, Union[list[CollectionResponse], int]]: query = f""" SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name('collections')} g + FROM {self._get_table_name(CollectionMixin.TABLE_NAME)} g JOIN {self._get_table_name('document_info')} d ON g.collection_id = ANY(d.collection_ids) WHERE d.document_id = $1 ORDER BY g.name diff --git a/py/core/providers/database/document.py b/py/core/providers/database/document.py index c2f1c2395..3e0890bd9 100644 --- a/py/core/providers/database/document.py +++ b/py/core/providers/database/document.py @@ -21,12 +21,14 @@ class DocumentMixin(DatabaseMixin): + TABLE_NAME = "document_info" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) async def create_table(self): query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name('document_info')} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name(DocumentMixin.TABLE_NAME)} ( document_id UUID PRIMARY KEY, collection_ids UUID[], user_id UUID, @@ -42,7 +44,7 @@ async def create_table(self): ingestion_attempt_number INT DEFAULT 0 ); CREATE INDEX IF NOT EXISTS idx_collection_ids_{self.project_name} - ON {self._get_table_name('document_info')} USING GIN (collection_ids); + ON {self._get_table_name(DocumentMixin.TABLE_NAME)} USING GIN (collection_ids); """ await self.execute_query(query) @@ -62,7 +64,7 @@ async def upsert_documents_overview( async with conn.transaction(): # Lock the row for update check_query = f""" - SELECT ingestion_attempt_number, ingestion_status FROM {self._get_table_name('document_info')} + SELECT ingestion_attempt_number, ingestion_status FROM {self._get_table_name(DocumentMixin.TABLE_NAME)} WHERE document_id = $1 FOR UPDATE """ existing_doc = await conn.fetchrow( @@ -95,7 +97,7 @@ async def upsert_documents_overview( ) update_query = f""" - UPDATE {self._get_table_name('document_info')} + UPDATE {self._get_table_name(DocumentMixin.TABLE_NAME)} SET collection_ids = $1, user_id = $2, type = $3, metadata = $4, title = $5, version = $6, size_in_bytes = $7, ingestion_status = $8, kg_extraction_status = $9, updated_at = $10, ingestion_attempt_number = $11 @@ -118,7 +120,7 @@ async def upsert_documents_overview( ) else: insert_query = f""" - INSERT INTO {self._get_table_name('document_info')} + INSERT INTO {self._get_table_name(DocumentMixin.TABLE_NAME)} (document_id, collection_ids, user_id, type, metadata, title, version, size_in_bytes, ingestion_status, kg_extraction_status, created_at, updated_at, ingestion_attempt_number) @@ -160,7 +162,7 @@ async def delete_from_documents_overview( self, document_id: str, version: Optional[str] = None ) -> None: query = f""" - DELETE FROM {self._get_table_name('document_info')} + DELETE FROM {self._get_table_name(DocumentMixin.TABLE_NAME)} WHERE document_id = $1 """ @@ -357,7 +359,7 @@ async def get_documents_overview( param_index += 1 base_query = f""" - FROM {self._get_table_name('document_info')} + FROM {self._get_table_name(DocumentMixin.TABLE_NAME)} """ if conditions: diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index 8c3884ef8..c24d4cf53 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -221,9 +221,6 @@ async def execute_many(self, query, params=None, batch_size=1000): async def fetch_query(self, query, params=None): async with self.pool.get_connection() as conn: - print("query", query) - print("params", params) - async with conn.transaction(): return ( await conn.fetch(query, *params) diff --git a/py/core/providers/database/tokens.py b/py/core/providers/database/tokens.py index 009c6aaf9..9ae795031 100644 --- a/py/core/providers/database/tokens.py +++ b/py/core/providers/database/tokens.py @@ -5,17 +5,19 @@ class BlacklistedTokensMixin(DatabaseMixin): + TABLE_NAME = "blacklisted_tokens" + async def create_table(self): query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name('blacklisted_tokens')} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name(BlacklistedTokensMixin.TABLE_NAME)} ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), token TEXT NOT NULL, blacklisted_at TIMESTAMPTZ DEFAULT NOW() ); - CREATE INDEX IF NOT EXISTS idx_blacklisted_tokens_{self.project_name}_token - ON {self._get_table_name('blacklisted_tokens')} (token); - CREATE INDEX IF NOT EXISTS idx_blacklisted_tokens_{self.project_name}_blacklisted_at - ON {self._get_table_name('blacklisted_tokens')} (blacklisted_at); + CREATE INDEX IF NOT EXISTS idx_{self.project_name}_{BlacklistedTokensMixin.TABLE_NAME}_token + ON {self._get_table_name(BlacklistedTokensMixin.TABLE_NAME)} (token); + CREATE INDEX IF NOT EXISTS idx_{self.project_name}_{BlacklistedTokensMixin.TABLE_NAME}_blacklisted_at + ON {self._get_table_name(BlacklistedTokensMixin.TABLE_NAME)} (blacklisted_at); """ await self.execute_query(query) @@ -26,14 +28,14 @@ async def blacklist_token( current_time = datetime.utcnow() query = f""" - INSERT INTO {self._get_table_name("blacklisted_tokens")} (token, blacklisted_at) + INSERT INTO {self._get_table_name(BlacklistedTokensMixin.TABLE_NAME)} (token, blacklisted_at) VALUES ($1, $2) """ await self.execute_query(query, [token, current_time]) async def is_token_blacklisted(self, token: str) -> bool: query = f""" - SELECT 1 FROM {self._get_table_name("blacklisted_tokens")} + SELECT 1 FROM {self._get_table_name(BlacklistedTokensMixin.TABLE_NAME)} WHERE token = $1 LIMIT 1 """ @@ -50,7 +52,7 @@ async def clean_expired_blacklisted_tokens( expiry_time = current_time - timedelta(hours=max_age_hours) query = f""" - DELETE FROM {self._get_table_name("blacklisted_tokens")} + DELETE FROM {self._get_table_name(BlacklistedTokensMixin.TABLE_NAME)} WHERE blacklisted_at < $1 """ await self.execute_query(query, [expiry_time]) diff --git a/py/core/providers/database/user.py b/py/core/providers/database/user.py index 0af7c2a7c..b6722926b 100644 --- a/py/core/providers/database/user.py +++ b/py/core/providers/database/user.py @@ -10,10 +10,11 @@ class UserMixin(DatabaseMixin): + TABLE_NAME = "users" async def create_table(self): query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name('users')} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name(UserMixin.TABLE_NAME)} ( user_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), email TEXT UNIQUE NOT NULL, hashed_password TEXT NOT NULL, @@ -130,7 +131,7 @@ async def create_user(self, email: str, password: str) -> UserResponse: hashed_password = self.crypto_provider.get_password_hash(password) # type: ignore query = f""" - INSERT INTO {self._get_table_name('users')} + INSERT INTO {self._get_table_name(UserMixin.TABLE_NAME)} (email, user_id, hashed_password, collection_ids) VALUES ($1, $2, $3, $4) RETURNING user_id, email, is_superuser, is_active, is_verified, created_at, updated_at, collection_ids @@ -158,7 +159,7 @@ async def create_user(self, email: str, password: str) -> UserResponse: async def update_user(self, user: UserResponse) -> UserResponse: query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET email = $1, is_superuser = $2, is_active = $3, is_verified = $4, updated_at = NOW(), name = $5, profile_picture = $6, bio = $7, collection_ids = $8 WHERE user_id = $9 @@ -201,7 +202,7 @@ async def update_user(self, user: UserResponse) -> UserResponse: async def delete_user_relational(self, user_id: UUID) -> None: # Get the collections the user belongs to collection_query = f""" - SELECT collection_ids FROM {self._get_table_name('users')} + SELECT collection_ids FROM {self._get_table_name(UserMixin.TABLE_NAME)} WHERE user_id = $1 """ collection_result = await self.fetchrow_query( @@ -221,7 +222,7 @@ async def delete_user_relational(self, user_id: UUID) -> None: # Delete the user delete_query = f""" - DELETE FROM {self._get_table_name('users')} + DELETE FROM {self._get_table_name(UserMixin.TABLE_NAME)} WHERE user_id = $1 RETURNING user_id """ @@ -234,7 +235,7 @@ async def update_user_password( self, user_id: UUID, new_hashed_password: str ): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET hashed_password = $1, updated_at = NOW() WHERE user_id = $2 """ @@ -243,7 +244,7 @@ async def update_user_password( async def get_all_users(self) -> list[UserResponse]: query = f""" SELECT user_id, email, is_superuser, is_active, is_verified, created_at, updated_at, collection_ids - FROM {self._get_table_name('users')} + FROM {self._get_table_name(UserMixin.TABLE_NAME)} """ results = await self.fetch_query(query) @@ -266,7 +267,7 @@ async def store_verification_code( self, user_id: UUID, verification_code: str, expiry: datetime ): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET verification_code = $1, verification_code_expiry = $2 WHERE user_id = $3 """ @@ -274,7 +275,7 @@ async def store_verification_code( async def verify_user(self, verification_code: str) -> None: query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET is_verified = TRUE, verification_code = NULL, verification_code_expiry = NULL WHERE verification_code = $1 AND verification_code_expiry > NOW() RETURNING user_id @@ -288,7 +289,7 @@ async def verify_user(self, verification_code: str) -> None: async def remove_verification_code(self, verification_code: str): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET verification_code = NULL, verification_code_expiry = NULL WHERE verification_code = $1 """ @@ -296,7 +297,7 @@ async def remove_verification_code(self, verification_code: str): async def expire_verification_code(self, user_id: UUID): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET verification_code_expiry = NOW() - INTERVAL '1 day' WHERE user_id = $1 """ @@ -306,7 +307,7 @@ async def store_reset_token( self, user_id: UUID, reset_token: str, expiry: datetime ): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET reset_token = $1, reset_token_expiry = $2 WHERE user_id = $3 """ @@ -316,7 +317,7 @@ async def get_user_id_by_reset_token( self, reset_token: str ) -> Optional[UUID]: query = f""" - SELECT user_id FROM {self._get_table_name('users')} + SELECT user_id FROM {self._get_table_name(UserMixin.TABLE_NAME)} WHERE reset_token = $1 AND reset_token_expiry > NOW() """ result = await self.fetchrow_query(query, [reset_token]) @@ -324,7 +325,7 @@ async def get_user_id_by_reset_token( async def remove_reset_token(self, user_id: UUID): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET reset_token = NULL, reset_token_expiry = NULL WHERE user_id = $1 """ @@ -332,7 +333,7 @@ async def remove_reset_token(self, user_id: UUID): async def remove_user_from_all_collections(self, user_id: UUID): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET collection_ids = ARRAY[]::UUID[] WHERE user_id = $1 """ @@ -345,7 +346,7 @@ async def add_user_to_collection( raise R2RException(status_code=404, message="User not found") query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET collection_ids = array_append(collection_ids, $1) WHERE user_id = $2 AND NOT ($1 = ANY(collection_ids)) RETURNING user_id @@ -366,7 +367,7 @@ async def remove_user_from_collection( raise R2RException(status_code=404, message="User not found") query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET collection_ids = array_remove(collection_ids, $1) WHERE user_id = $2 AND $1 = ANY(collection_ids) RETURNING user_id @@ -403,7 +404,7 @@ async def get_users_in_collection( SELECT u.user_id, u.email, u.is_active, u.is_superuser, u.created_at, u.updated_at, u.is_verified, u.collection_ids, u.name, u.bio, u.profile_picture, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name('users')} u + FROM {self._get_table_name(UserMixin.TABLE_NAME)} u WHERE $1 = ANY(u.collection_ids) ORDER BY u.name OFFSET $2 @@ -441,7 +442,7 @@ async def get_users_in_collection( async def mark_user_as_superuser(self, user_id: UUID): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET is_superuser = TRUE, is_verified = TRUE, verification_code = NULL, verification_code_expiry = NULL WHERE user_id = $1 """ @@ -451,7 +452,7 @@ async def get_user_id_by_verification_code( self, verification_code: str ) -> Optional[UUID]: query = f""" - SELECT user_id FROM {self._get_table_name('users')} + SELECT user_id FROM {self._get_table_name(UserMixin.TABLE_NAME)} WHERE verification_code = $1 AND verification_code_expiry > NOW() """ result = await self.fetchrow_query(query, [verification_code]) @@ -465,7 +466,7 @@ async def get_user_id_by_verification_code( async def mark_user_as_verified(self, user_id: UUID): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(UserMixin.TABLE_NAME)} SET is_verified = TRUE, verification_code = NULL, verification_code_expiry = NULL WHERE user_id = $1 """ @@ -492,7 +493,7 @@ async def get_users_overview( COALESCE(SUM(d.size_in_bytes), 0) AS total_size_in_bytes, ARRAY_AGG(d.document_id) FILTER (WHERE d.document_id IS NOT NULL) AS document_ids, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name('users')} u + FROM {self._get_table_name(UserMixin.TABLE_NAME)} u LEFT JOIN {self._get_table_name('document_info')} d ON u.user_id = d.user_id {' WHERE u.user_id = ANY($3::uuid[])' if user_ids else ''} GROUP BY u.user_id, u.email, u.is_superuser, u.is_active, u.is_verified, u.created_at, u.updated_at, u.collection_ids diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 1edcd0223..bfb7caffb 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -36,7 +36,8 @@ class HybridSearchIntermediateResult(TypedDict): class VectorDBMixin(DatabaseMixin): - COLUMN_NAME = "vecs" + TABLE_NAME = "vector" + COLUMN_VARS = [ "extraction_id", "document_id", @@ -51,7 +52,7 @@ async def create_table(self): # TODO - Move ids to `UUID` type # Create the vector table if it doesn't exist query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name(self.project_name)} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name(VectorDBMixin.TABLE_NAME)} ( extraction_id UUID PRIMARY KEY, document_id UUID, user_id UUID, @@ -61,21 +62,21 @@ async def create_table(self): metadata JSONB {",fts tsvector GENERATED ALWAYS AS (to_tsvector('english', text)) STORED" if self.enable_fts else ""} ); - CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self._get_table_name(self.project_name)} (document_id); - CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self._get_table_name(self.project_name)} (user_id); - CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self._get_table_name(self.project_name)} USING GIN (collection_ids); - CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(self.project_name)} USING GIN (to_tsvector('english', text)); + CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self._get_table_name(VectorDBMixin.TABLE_NAME)} (document_id); + CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self._get_table_name(VectorDBMixin.TABLE_NAME)} (user_id); + CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self._get_table_name(VectorDBMixin.TABLE_NAME)} USING GIN (collection_ids); + CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(VectorDBMixin.TABLE_NAME)} USING GIN (to_tsvector('english', text)); """ if self.enable_fts: query += f""" - CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(self.project_name)} USING GIN (to_tsvector('english', text)); + CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(VectorDBMixin.TABLE_NAME)} USING GIN (to_tsvector('english', text)); """ await self.execute_query(query) async def upsert(self, entry: VectorEntry) -> None: query = f""" - INSERT INTO {self._get_table_name(self.project_name)} + INSERT INTO {self._get_table_name(VectorDBMixin.TABLE_NAME)} (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (extraction_id) DO UPDATE SET @@ -101,7 +102,7 @@ async def upsert(self, entry: VectorEntry) -> None: async def upsert_entries(self, entries: list[VectorEntry]) -> None: query = f""" - INSERT INTO {self._get_table_name(self.project_name)} + INSERT INTO {self._get_table_name(VectorDBMixin.TABLE_NAME)} (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (extraction_id) DO UPDATE SET @@ -126,61 +127,6 @@ async def upsert_entries(self, entries: list[VectorEntry]) -> None: ] await self.execute_many(query, params) - # async def upsert(self, entry: VectorEntry) -> None: - # query = f""" - # INSERT INTO {self._get_table_name(self.project_name)} - # {"(extraction_id, document_id, user_id, collection_ids, vec, text, metadata)" if not self.enable_fts else "(extraction_id, document_id, user_id, collection_ids, vec, text, metadata, fts)"} - # VALUES {"($1, $2, $3, $4, $5, $6, $7)" if not self.enable_fts else "($1, $2, $3, $4, $5, $6, $7, $8)"} - # ON CONFLICT (extraction_id) DO UPDATE SET - # document_id = EXCLUDED.document_id, - # user_id = EXCLUDED.user_id, - # collection_ids = EXCLUDED.collection_ids, - # vec = EXCLUDED.vec, - # text = EXCLUDED.text, - # metadata = EXCLUDED.metadata; - # """ - # await self.execute_query( - # query, - # ( - # entry.extraction_id, - # entry.document_id, - # entry.user_id, - # entry.collection_ids, - # str(entry.vector.data), - # entry.text, - # json.dumps(entry.metadata), - # func.ts_vector(entry.text) if self.enable_fts else None, - # ), - # ) - - # async def upsert_entries(self, entries: list[VectorEntry]) -> None: - # query = f""" - # INSERT INTO {self._get_table_name(self.project_name)} - # {"(extraction_id, document_id, user_id, collection_ids, vec, text, metadata)" if not self.enable_fts else "(extraction_id, document_id, user_id, collection_ids, vec, text, metadata, fts)"} - # VALUES {"($1, $2, $3, $4, $5, $6, $7)" if not self.enable_fts else "($1, $2, $3, $4, $5, $6, $7, $8)"} - # ON CONFLICT (extraction_id) DO UPDATE SET - # document_id = EXCLUDED.document_id, - # user_id = EXCLUDED.user_id, - # collection_ids = EXCLUDED.collection_ids, - # vec = EXCLUDED.vec, - # text = EXCLUDED.text, - # metadata = EXCLUDED.metadata; - # """ - # params = [ - # ( - # entry.extraction_id, - # entry.document_id, - # entry.user_id, - # entry.collection_ids, - # str(entry.vector.data), - # entry.text, - # json.dumps(entry.metadata), - # func.ts_vector(entry.text) if self.enable_fts else None, - # ) - # for entry in entries - # ] - # await self.execute_many(query, params) - async def semantic_search( self, query_vector: list[float], search_settings: VectorSearchSettings ) -> list[VectorSearchResult]: @@ -189,55 +135,53 @@ async def semantic_search( except ValueError: raise ValueError("Invalid index measure") - distance_func = self._get_distance_function(imeasure_obj) - + table_name = self._get_table_name(VectorDBMixin.TABLE_NAME) cols = [ - f"{self._get_table_name(self.project_name)}.extraction_id", - f"{self._get_table_name(self.project_name)}.document_id", - f"{self._get_table_name(self.project_name)}.user_id", - f"{self._get_table_name(self.project_name)}.collection_ids", - f"{self._get_table_name(self.project_name)}.text", + f"{table_name}.extraction_id", + f"{table_name}.document_id", + f"{table_name}.user_id", + f"{table_name}.collection_ids", + f"{table_name}.text", ] + # Use cosine distance calculation + distance_calc = f"{table_name}.vec <=> $1::vector" + if search_settings.include_values: - cols.append( - f"{self._get_table_name(self.project_name)}.vec {distance_func} $1 AS distance" - ) + cols.append(f"({distance_calc}) AS distance") if search_settings.include_metadatas: - cols.append(f"{self._get_table_name(self.project_name)}.metadata") + cols.append(f"{table_name}.metadata") select_clause = ", ".join(cols) where_clause = "" + params = [str(query_vector)] if search_settings.filters: - where_clause = ( - f"WHERE {self._build_filters(search_settings.filters)}" - ) + where_clause = self._build_filters(search_settings.filters, params) + where_clause = f"WHERE {where_clause}" query = f""" SELECT {select_clause} - FROM {self._get_table_name(self.project_name)} + FROM {table_name} {where_clause} - ORDER BY {self._get_table_name(self.project_name)}.vec {distance_func} $1 - LIMIT $2 - OFFSET $3 + ORDER BY {distance_calc} + LIMIT ${len(params) + 1} + OFFSET ${len(params) + 2} """ - results = await self.fetch_query( - query, - ( - str(query_vector), - search_settings.search_limit, - search_settings.offset, - ), - ) # , params) + params.extend([search_settings.search_limit, search_settings.offset]) + + print("Generated SQL query:", query) + print("Query parameters:", params) + + results = await self.fetch_query(query, params) return [ VectorSearchResult( - extraction_id=result["extraction_id"], - document_id=result["document_id"], - user_id=result["user_id"], + extraction_id=str(result["extraction_id"]), + document_id=str(result["document_id"]), + user_id=str(result["user_id"]), collection_ids=result["collection_ids"], text=result["text"], score=( @@ -262,21 +206,20 @@ async def full_text_search( "Full-text search is not enabled for this collection." ) + where_clause = "" + params = [query_text] + if search_settings.filters: + where_clause = self._build_filters(search_settings.filters, params) + where_clause = f"WHERE {where_clause}" + query = f""" SELECT extraction_id, document_id, user_id, collection_ids, text, metadata, ts_rank(fts, websearch_to_tsquery('english', $1), 32) as rank - FROM {self._get_table_name(self.project_name)} + FROM {self._get_table_name(VectorDBMixin.TABLE_NAME)} WHERE fts @@ websearch_to_tsquery('english', $1) + {where_clause} """ - # AND collection_ids && $2 - - # if search_settings.filters: - # filter_clause, filter_params = self._build_filters(search_settings.filters) - # query += f" AND {filter_clause}" - # params = [query_text, search_settings.selected_collection_ids] + filter_params - # else: - params = [query_text] # , search_settings.selected_collection_ids] query += """ ORDER BY rank DESC @@ -303,75 +246,6 @@ async def full_text_search( for r in results ] - # async def full_text_search( - # self, query_text: str, search_settings: VectorSearchSettings - # ) -> list[VectorSearchResult]: - # if not self.enable_fts: - # raise ValueError("Full-text search is not enabled for this collection.") - # query = f""" - # SELECT extraction_id, document_id, user_id, collection_ids, text, - # ts_rank_cd(to_tsvector('english', text), plainto_tsquery('english', $1)) as rank, - # metadata - # FROM {self._get_table_name(self.project_name)} - # WHERE collection_ids && $2 AND to_tsvector('english', text) @@ plainto_tsquery('english', $1) - # ORDER BY rank DESC - # LIMIT $3 OFFSET $4; - # """ - # results = await self.fetch_query( - # query, - # ( - # query_text, - # search_settings.selected_collection_ids, - # search_settings.search_limit, - # search_settings.offset, - # ), - # ) - - # return [ - # VectorSearchResult( - # extraction_id=result["extraction_id"], - # document_id=result["document_id"], - # user_id=result["user_id"], - # collection_ids=result["collection_ids"], - # text=result["text"], - # score=float(result["rank"]), - # metadata=result["metadata"], - # ) - # for result in results - # ] - - # query = f""" - # SELECT extraction_id, document_id, user_id, collection_ids, text, - # ts_rank_cd(to_tsvector('english', text), plainto_tsquery('english', $1)) as rank, - # metadata - # FROM {self._get_table_name(self.project_name)} - # WHERE collection_ids && $2 AND to_tsvector('english', text) @@ plainto_tsquery('english', $1) - # ORDER BY rank DESC - # LIMIT $3 OFFSET $4; - # """ - # results = await self.fetch_query( - # query, - # ( - # query_text, - # search_settings.selected_collection_ids, - # search_settings.search_limit, - # search_settings.offset, - # ), - # ) - - # return [ - # VectorSearchResult( - # extraction_id=result["extraction_id"], - # document_id=result["document_id"], - # user_id=result["user_id"], - # collection_ids=result["collection_ids"], - # text=result["text"], - # score=float(result["rank"]), - # metadata=result["metadata"], - # ) - # for result in results - # ] - async def hybrid_search( self, query_text: str, @@ -485,21 +359,27 @@ async def hybrid_search( async def delete( self, filters: dict[str, Any] ) -> dict[str, dict[str, str]]: - conditions = [] params = [] - for key, value in filters.items(): - conditions.append(f"{key} = ${len(params) + 1}") - params.append(value) + where_clause = self._build_filters(filters, params) - where_clause = " AND ".join(conditions) query = f""" - DELETE FROM {self._get_table_name(self.project_name)} + DELETE FROM {self._get_table_name(VectorDBMixin.TABLE_NAME)} WHERE {where_clause} - RETURNING extraction_id; + RETURNING extraction_id, document_id, text; """ + + print("Generated DELETE SQL query:", query) + print("Query parameters:", params) + results = await self.fetch_query(query, params) + return { - result["extraction_id"]: {"status": "deleted"} + str(result["extraction_id"]): { + "status": "deleted", + "extraction_id": str(result["extraction_id"]), + "document_id": str(result["document_id"]), + "text": result["text"], + } for result in results } @@ -507,7 +387,7 @@ async def assign_document_to_collection_vector( self, document_id: str, collection_id: str ) -> None: query = f""" - UPDATE {self._get_table_name(self.project_name)} + UPDATE {self._get_table_name(VectorDBMixin.TABLE_NAME)} SET collection_ids = array_append(collection_ids, $1) WHERE document_id = $2 AND NOT ($1 = ANY(collection_ids)); """ @@ -517,7 +397,7 @@ async def remove_document_from_collection_vector( self, document_id: str, collection_id: str ) -> None: query = f""" - UPDATE {self._get_table_name(self.project_name)} + UPDATE {self._get_table_name(VectorDBMixin.TABLE_NAME)} SET collection_ids = array_remove(collection_ids, $1) WHERE document_id = $2; """ @@ -525,17 +405,22 @@ async def remove_document_from_collection_vector( async def delete_user_vector(self, user_id: str) -> None: query = f""" - DELETE FROM {self._get_table_name(self.project_name)} + DELETE FROM {self._get_table_name(VectorDBMixin.TABLE_NAME)} WHERE user_id = $1; """ await self.execute_query(query, (user_id,)) async def delete_collection_vector(self, collection_id: str) -> None: query = f""" - DELETE FROM {self._get_table_name(self.project_name)} - WHERE $1 = ANY(collection_ids); - """ - await self.execute_query(query, (collection_id,)) + DELETE FROM {self._get_table_name(VectorDBMixin.TABLE_NAME)} + WHERE $1 = ANY(collection_ids) + RETURNING collection_ids + """ + results = await self.fetchrow_query(query, (collection_id,)) + print("results = ", results) + deleted_count = len(results) + print("deleted_count", deleted_count) + return deleted_count async def get_document_chunks( self, @@ -549,7 +434,7 @@ async def get_document_chunks( query = f""" SELECT extraction_id, document_id, user_id, collection_ids, text, metadata{vector_select}, COUNT(*) OVER() AS total - FROM {self._get_table_name(self.project_name)} + FROM {self._get_table_name(VectorDBMixin.TABLE_NAME)} WHERE document_id = $1 OFFSET $2 {limit_clause}; @@ -699,27 +584,12 @@ async def create_index( return None - def build_filters(self, filters: dict) -> Tuple[str, list[Any]]: - """ - Builds filters for SQL query based on provided dictionary. - - Args: - filters (dict): The dictionary specifying filter conditions. - - Raises: - FilterError: If filter conditions are not correctly formatted. - - Returns: - A tuple containing the SQL WHERE clause string and a list of parameters. - """ - if not isinstance(filters, dict): - raise FilterError("filters must be a dict") - - conditions = [] - parameters = [] + def _build_filters( + self, filters: dict, parameters: list[dict] + ) -> Tuple[str, list[Any]]: def parse_condition(key: str, value: Any) -> str: - nonlocal parameters + # nonlocal parameters if key in self.COLUMN_VARS: # Handle column-based filters if isinstance(value, dict): @@ -754,8 +624,6 @@ def parse_condition(key: str, value: Any) -> str: ) else: # Handle direct equality - if isinstance(value, str): - value = uuid.UUID(value) parameters.append(value) return f"{key} = ${len(parameters)}" else: @@ -764,10 +632,8 @@ def parse_condition(key: str, value: Any) -> str: if key.startswith("metadata."): key = key.split("metadata.")[1] if isinstance(value, dict): - if len(value) > 1: - raise FilterError("only one operator permitted") - operator, clause = next(iter(value.items())) - if operator not in ( + op, clause = next(iter(value.items())) + if op not in ( "$eq", "$ne", "$lt", @@ -779,42 +645,44 @@ def parse_condition(key: str, value: Any) -> str: ): raise FilterError("unknown operator") - if operator == "$eq" and not hasattr(clause, "__len__"): - parameters.append(json.dumps({key: clause})) - return f"{json_col} @> ${len(parameters)}::jsonb" - - if operator == "$in": + if op == "$eq": + parameters.append(json.dumps(clause)) + return ( + f"{json_col}->'{key}' = ${len(parameters)}::jsonb" + ) + elif op == "$ne": + parameters.append(json.dumps(clause)) + return ( + f"{json_col}->'{key}' != ${len(parameters)}::jsonb" + ) + elif op == "$lt": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float < (${len(parameters)}::jsonb)::float" + elif op == "$lte": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float <= (${len(parameters)}::jsonb)::float" + elif op == "$gt": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float > (${len(parameters)}::jsonb)::float" + elif op == "$gte": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float >= (${len(parameters)}::jsonb)::float" + elif op == "$in": if not isinstance(clause, list): raise FilterError( "argument to $in filter must be a list" ) - for elem in clause: - if not isinstance(elem, (int, str, float)): - raise FilterError( - "argument to $in filter must be a list of scalars" - ) - parameters.append(clause) - return f"{json_col}->>{key} = ANY(${len(parameters)})" - - parameters.append(json.dumps(clause)) - if operator == "$contains": - if not isinstance(clause, (int, str, float)): + parameters.append(json.dumps(clause)) + return f"{json_col}->'{key}' = ANY(SELECT jsonb_array_elements(${len(parameters)}::jsonb))" + elif op == "$contains": + if not isinstance(clause, (int, str, float, list)): raise FilterError( - "argument to $contains filter must be a scalar" + "argument to $contains filter must be a scalar or array" ) - return f"{json_col}->{key} @> ${len(parameters)}::jsonb AND jsonb_typeof({json_col}->{key}) = 'array'" - - return { - "$eq": f"{json_col}->>{key} = ${len(parameters)}", - "$ne": f"{json_col}->>{key} != ${len(parameters)}", - "$lt": f"{json_col}->>{key} < ${len(parameters)}", - "$lte": f"{json_col}->>{key} <= ${len(parameters)}", - "$gt": f"{json_col}->>{key} > ${len(parameters)}", - "$gte": f"{json_col}->>{key} >= ${len(parameters)}", - }[operator] - else: - parameters.append(json.dumps({key: value})) - return f"{json_col} @> ${len(parameters)}::jsonb" + parameters.append(json.dumps(clause)) + return ( + f"{json_col}->'{key}' @> ${len(parameters)}::jsonb" + ) def parse_filter(filter_dict: dict) -> str: filter_conditions = [] @@ -832,7 +700,7 @@ def parse_filter(filter_dict: dict) -> str: return " AND ".join(filter_conditions) where_clause = parse_filter(filters) - return where_clause, parameters + return where_clause def _get_index_options( self, diff --git a/py/tests/conftest.py b/py/tests/conftest.py index 6799ac51c..3eb81c6db 100644 --- a/py/tests/conftest.py +++ b/py/tests/conftest.py @@ -36,6 +36,7 @@ PostgresFileProvider, PostgresKGProvider, R2RAuthProvider, + R2RPromptProvider, ) from shared.abstractions.vector import VectorQuantizationType @@ -107,7 +108,7 @@ async def postgres_db_provider( db_config, dimension=dimension, crypto_provider=crypto_provider ) await db.initialize() - db.vector.upsert_entries(sample_entries) + await db.upsert_entries(sample_entries) yield db # Teardown # TODO - Add teardown methods @@ -134,12 +135,12 @@ async def temporary_postgres_db_provider( crypto_provider=crypto_provider, ) await db.initialize() - db.vector.upsert_entries(sample_entries) + await db.upsert_entries(sample_entries) try: yield db finally: - await db.relational.close() - db.vector.close() + await db.close() + # db.vector.close() # Auth @@ -256,7 +257,7 @@ async def postgres_kg_provider( kg_extraction_status=KGExtractionStatus.PENDING, ) - await temporary_postgres_db_provider.relational.upsert_documents_overview( + await temporary_postgres_db_provider.upsert_documents_overview( document_info ) diff --git a/py/tests/core/pipes/test_kg_community_summary_pipe.py b/py/tests/core/pipes/test_kg_community_summary_pipe.py index 75c0ab1a3..4295e6170 100644 --- a/py/tests/core/pipes/test_kg_community_summary_pipe.py +++ b/py/tests/core/pipes/test_kg_community_summary_pipe.py @@ -162,5 +162,19 @@ async def test_community_summary_prompt( summary = await kg_community_summary_pipe.community_summary_prompt( entities_list, triples_raw_list, max_summary_input_length ) - expected_summary = "\n Entity: Entity1\n Descriptions: \n 1,Description1\n Triples: \n 1,Entity1,object1,predicate1,description1\n \n Entity: Entity2\n Descriptions: \n 2,Description2\n Triples: \n 2,Entity2,object2,predicate2,description2\n " - assert summary == expected_summary + print("summary = ", summary) + expected_summary = """ + Entity: Entity1 + Descriptions: + 1,Description1 + Triples: + 1,Entity1,object1,predicate1,description1 + + Entity: Entity2 + Descriptions: + 2,Description2 + Triples: + 2,Entity2,object2,predicate2,description2 + """ + # "\n Entity: Entity1\n Descriptions: \n 1,Description1\n Triples: \n 1,Entity1,object1,predicate1,description1\n \n Entity: Entity2\n Descriptions: \n 2,Description2\n Triples: \n 2,Entity2,object2,predicate2,description2\n " + assert summary.strip() == expected_summary.strip() diff --git a/py/tests/core/providers/database/relational/test_collection_db.py b/py/tests/core/providers/database/relational/test_collection_db.py index b43cfa065..7eef24edb 100644 --- a/py/tests/core/providers/database/relational/test_collection_db.py +++ b/py/tests/core/providers/database/relational/test_collection_db.py @@ -8,10 +8,8 @@ @pytest.mark.asyncio async def test_create_collection(temporary_postgres_db_provider): - collection = ( - await temporary_postgres_db_provider.relational.create_collection( - "Test Collection", "Test Description" - ) + collection = await temporary_postgres_db_provider.create_collection( + "Test Collection", "Test Description" ) assert isinstance(collection, CollectionResponse) assert collection.name == "Test Collection" @@ -21,14 +19,12 @@ async def test_create_collection(temporary_postgres_db_provider): @pytest.mark.asyncio async def test_get_collection(temporary_postgres_db_provider): created_collection = ( - await temporary_postgres_db_provider.relational.create_collection( + await temporary_postgres_db_provider.create_collection( "Test Collection", "Test Description" ) ) - retrieved_collection = ( - await temporary_postgres_db_provider.relational.get_collection( - created_collection.collection_id - ) + retrieved_collection = await temporary_postgres_db_provider.get_collection( + created_collection.collection_id ) assert retrieved_collection == created_collection @@ -36,12 +32,12 @@ async def test_get_collection(temporary_postgres_db_provider): @pytest.mark.asyncio async def test_update_collection(temporary_postgres_db_provider): created_collection = ( - await temporary_postgres_db_provider.relational.create_collection( + await temporary_postgres_db_provider.create_collection( "Test Collection", "Test Description" ) ) updated_collection = ( - await temporary_postgres_db_provider.relational.update_collection( + await temporary_postgres_db_provider.update_collection( created_collection.collection_id, name="Updated Collection", description="Updated Description", @@ -54,50 +50,50 @@ async def test_update_collection(temporary_postgres_db_provider): @pytest.mark.asyncio async def test_delete_collection(temporary_postgres_db_provider): created_collection = ( - await temporary_postgres_db_provider.relational.create_collection( + await temporary_postgres_db_provider.create_collection( "Test Collection", "Test Description" ) ) - await temporary_postgres_db_provider.relational.delete_collection( + await temporary_postgres_db_provider.delete_collection_relational( created_collection.collection_id ) with pytest.raises(R2RException): - await temporary_postgres_db_provider.relational.get_collection( + await temporary_postgres_db_provider.delete_collection_relational( created_collection.collection_id ) + # await temporary_postgres_db_provider.delete_collection_vector( + # created_collection.collection_id + # ) + # with pytest.raises(R2RException): + # await temporary_postgres_db_provider.delete_collection_vector( + # created_collection.collection_id + # ) + @pytest.mark.asyncio async def test_list_collections(temporary_postgres_db_provider): - await temporary_postgres_db_provider.relational.create_collection( + await temporary_postgres_db_provider.create_collection( "Collection 1", "Description 1" ) - await temporary_postgres_db_provider.relational.create_collection( + await temporary_postgres_db_provider.create_collection( "Collection 2", "Description 2" ) - collections = ( - await temporary_postgres_db_provider.relational.list_collections() - ) + collections = await temporary_postgres_db_provider.list_collections() assert len(collections["results"]) >= 2 assert collections["total_entries"] >= 2 @pytest.mark.asyncio async def test_get_collections_by_ids(temporary_postgres_db_provider): - collection1 = ( - await temporary_postgres_db_provider.relational.create_collection( - "Collection 1", "Description 1" - ) + collection1 = await temporary_postgres_db_provider.create_collection( + "Collection 1", "Description 1" ) - collection2 = ( - await temporary_postgres_db_provider.relational.create_collection( - "Collection 2", "Description 2" - ) + collection2 = await temporary_postgres_db_provider.create_collection( + "Collection 2", "Description 2" ) - collections = ( - await temporary_postgres_db_provider.relational.get_collections_by_ids( - [collection1.collection_id, collection2.collection_id] - ) + collections = await temporary_postgres_db_provider.get_collections_by_ids( + [collection1.collection_id, collection2.collection_id] ) assert len(collections) == 2 assert collections[0].collection_id == collection1.collection_id @@ -108,13 +104,11 @@ async def test_get_collections_by_ids(temporary_postgres_db_provider): async def test_assign_and_remove_document_from_collection( temporary_postgres_db_provider, ): - collection = ( - await temporary_postgres_db_provider.relational.create_collection( - "Test Collection", "Test Description" - ) + collection = await temporary_postgres_db_provider.create_collection( + "Test Collection", "Test Description" ) document_id = UUID("00000000-0000-0000-0000-000000000001") - await temporary_postgres_db_provider.relational.upsert_documents_overview( + await temporary_postgres_db_provider.upsert_documents_overview( DocumentInfo( id=document_id, collection_ids=[], @@ -125,13 +119,14 @@ async def test_assign_and_remove_document_from_collection( size_in_bytes=0, ) ) - await temporary_postgres_db_provider.relational.assign_document_to_collection( + await temporary_postgres_db_provider.assign_document_to_collection_relational( + document_id, collection.collection_id + ) + await temporary_postgres_db_provider.assign_document_to_collection_vector( document_id, collection.collection_id ) document_collections = ( - await temporary_postgres_db_provider.relational.document_collections( - document_id - ) + await temporary_postgres_db_provider.document_collections(document_id) ) assert len(document_collections["results"]) == 1 assert ( @@ -139,40 +134,37 @@ async def test_assign_and_remove_document_from_collection( == collection.collection_id ) - await temporary_postgres_db_provider.relational.remove_document_from_collection( + await temporary_postgres_db_provider.remove_document_from_collection_relational( + document_id, collection.collection_id + ) + await temporary_postgres_db_provider.remove_document_from_collection_vector( document_id, collection.collection_id ) document_collections = ( - await temporary_postgres_db_provider.relational.document_collections( - document_id - ) + await temporary_postgres_db_provider.document_collections(document_id) ) assert len(document_collections["results"]) == 0 @pytest.mark.asyncio async def test_get_collections_for_user(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) - collection1 = ( - await temporary_postgres_db_provider.relational.create_collection( - "Collection 1", "Description 1" - ) + collection1 = await temporary_postgres_db_provider.create_collection( + "Collection 1", "Description 1" ) - collection2 = ( - await temporary_postgres_db_provider.relational.create_collection( - "Collection 2", "Description 2" - ) + collection2 = await temporary_postgres_db_provider.create_collection( + "Collection 2", "Description 2" ) - await temporary_postgres_db_provider.relational.add_user_to_collection( + await temporary_postgres_db_provider.add_user_to_collection( user.id, collection1.collection_id ) - await temporary_postgres_db_provider.relational.add_user_to_collection( + await temporary_postgres_db_provider.add_user_to_collection( user.id, collection2.collection_id ) - user_collections = await temporary_postgres_db_provider.relational.get_collections_for_user( - user.id + user_collections = ( + await temporary_postgres_db_provider.get_collections_for_user(user.id) ) assert len(user_collections["results"]) == 2 assert user_collections["total_entries"] == 2 diff --git a/py/tests/core/providers/database/relational/test_document_db.py b/py/tests/core/providers/database/relational/test_document_db.py index 93f87e636..cd88dab09 100644 --- a/py/tests/core/providers/database/relational/test_document_db.py +++ b/py/tests/core/providers/database/relational/test_document_db.py @@ -14,7 +14,7 @@ @pytest.mark.asyncio async def test_create_table(temporary_postgres_db_provider): - await temporary_postgres_db_provider.relational.create_table() + await temporary_postgres_db_provider.create_table() # Verify that the table is created with the expected columns and constraints # You can execute a query to check the table structure or use a database inspection tool @@ -33,15 +33,13 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider): ingestion_status=IngestionStatus.PENDING, kg_extraction_status=KGExtractionStatus.PENDING, ) - await temporary_postgres_db_provider.relational.upsert_documents_overview( + await temporary_postgres_db_provider.upsert_documents_overview( document_info ) # Verify that the document is inserted correctly - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - filter_document_ids=[document_info.id] - ) + result = await temporary_postgres_db_provider.get_documents_overview( + filter_document_ids=[document_info.id] ) assert len(result["results"]) == 1 inserted_document = result["results"][0] @@ -62,14 +60,12 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider): # Update the document and verify the changes document_info.title = "Updated Test Document" document_info.ingestion_status = IngestionStatus.SUCCESS - await temporary_postgres_db_provider.relational.upsert_documents_overview( + await temporary_postgres_db_provider.upsert_documents_overview( document_info ) - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - filter_document_ids=[document_info.id] - ) + result = await temporary_postgres_db_provider.get_documents_overview( + filter_document_ids=[document_info.id] ) assert len(result["results"]) == 1 updated_document = result["results"][0] @@ -91,19 +87,17 @@ async def test_delete_from_documents_overview(temporary_postgres_db_provider): ingestion_status=IngestionStatus.PENDING, kg_extraction_status=KGExtractionStatus.PENDING, ) - await temporary_postgres_db_provider.relational.upsert_documents_overview( + await temporary_postgres_db_provider.upsert_documents_overview( document_info ) - await temporary_postgres_db_provider.relational.delete_from_documents_overview( + await temporary_postgres_db_provider.delete_from_documents_overview( document_info.id ) # Verify that the document is deleted - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - filter_document_ids=[document_info.id] - ) + result = await temporary_postgres_db_provider.get_documents_overview( + filter_document_ids=[document_info.id] ) assert len(result["results"]) == 0 @@ -134,24 +128,20 @@ async def test_get_documents_overview(temporary_postgres_db_provider): ingestion_status=IngestionStatus.SUCCESS, kg_extraction_status=KGExtractionStatus.PENDING, ) - await temporary_postgres_db_provider.relational.upsert_documents_overview( + await temporary_postgres_db_provider.upsert_documents_overview( [document_info1, document_info2] ) # Test filtering by user ID - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - filter_user_ids=[UUID("00000000-0000-0000-0000-000000000003")] - ) + result = await temporary_postgres_db_provider.get_documents_overview( + filter_user_ids=[UUID("00000000-0000-0000-0000-000000000003")] ) assert len(result["results"]) == 2 assert result["total_entries"] == 2 # Test filtering by document ID - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - filter_document_ids=[UUID("00000000-0000-0000-0000-000000000001")] - ) + result = await temporary_postgres_db_provider.get_documents_overview( + filter_document_ids=[UUID("00000000-0000-0000-0000-000000000001")] ) assert len(result["results"]) == 1 assert result["results"][0].id == UUID( @@ -159,21 +149,15 @@ async def test_get_documents_overview(temporary_postgres_db_provider): ) # Test filtering by collection ID - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - filter_collection_ids=[ - UUID("00000000-0000-0000-0000-000000000002") - ] - ) + result = await temporary_postgres_db_provider.get_documents_overview( + filter_collection_ids=[UUID("00000000-0000-0000-0000-000000000002")] ) assert len(result["results"]) == 2 assert result["total_entries"] == 2 # Test pagination - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - offset=1, limit=1 - ) + result = await temporary_postgres_db_provider.get_documents_overview( + offset=1, limit=1 ) assert len(result["results"]) == 1 assert result["total_entries"] == 2 diff --git a/py/tests/core/providers/database/relational/test_relational_db_provider.py b/py/tests/core/providers/database/relational/test_relational_db_provider.py index 7b631826e..32a4d1f1b 100644 --- a/py/tests/core/providers/database/relational/test_relational_db_provider.py +++ b/py/tests/core/providers/database/relational/test_relational_db_provider.py @@ -7,4 +7,4 @@ @pytest.mark.asyncio async def test_relational_db_initialization(postgres_db_provider): assert isinstance(postgres_db_provider, PostgresDBProvider) - assert postgres_db_provider.relational is not None + # assert postgres_db_provider.relational is not None diff --git a/py/tests/core/providers/database/relational/test_user_db.py b/py/tests/core/providers/database/relational/test_user_db.py index 6856e2003..b27d6299c 100644 --- a/py/tests/core/providers/database/relational/test_user_db.py +++ b/py/tests/core/providers/database/relational/test_user_db.py @@ -9,7 +9,7 @@ @pytest.mark.asyncio async def test_create_user(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) assert isinstance(user, UserResponse) @@ -18,43 +18,40 @@ async def test_create_user(temporary_postgres_db_provider): @pytest.mark.asyncio async def test_get_user_by_id(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) - retrieved_user = ( - await temporary_postgres_db_provider.relational.get_user_by_id(user.id) + retrieved_user = await temporary_postgres_db_provider.get_user_by_id( + user.id ) assert retrieved_user == user @pytest.mark.asyncio async def test_get_user_by_email(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) - retrieved_user = ( - await temporary_postgres_db_provider.relational.get_user_by_email( - "test@example.com" - ) + retrieved_user = await temporary_postgres_db_provider.get_user_by_email( + "test@example.com" ) assert retrieved_user == user @pytest.mark.asyncio async def test_delete_user(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) - await temporary_postgres_db_provider.relational.delete_user(user.id) - user = await temporary_postgres_db_provider.relational.get_user_by_id( - user.id - ) + await temporary_postgres_db_provider.delete_user_relational(user.id) + await temporary_postgres_db_provider.delete_user_vector(user.id) + user = await temporary_postgres_db_provider.get_user_by_id(user.id) assert user is None @pytest.mark.asyncio async def test_update_user(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) updated_user = UserResponse( @@ -68,18 +65,16 @@ async def test_update_user(temporary_postgres_db_provider): bio="Updated bio", collection_ids=[], ) - result = await temporary_postgres_db_provider.relational.update_user( - updated_user - ) + result = await temporary_postgres_db_provider.update_user(updated_user) assert result.email == updated_user.email @pytest.mark.asyncio async def test_update_user_password(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) - await temporary_postgres_db_provider.relational.update_user_password( + await temporary_postgres_db_provider.update_user_password( user.id, "new_password" ) # Additional assertions can be added based on the expected behavior @@ -87,13 +82,13 @@ async def test_update_user_password(temporary_postgres_db_provider): @pytest.mark.asyncio async def test_get_all_users(temporary_postgres_db_provider): - await temporary_postgres_db_provider.relational.create_user( + await temporary_postgres_db_provider.create_user( "test1@example.com", "password" ) - await temporary_postgres_db_provider.relational.create_user( + await temporary_postgres_db_provider.create_user( "test2@example.com", "password" ) - users = await temporary_postgres_db_provider.relational.get_all_users() + users = await temporary_postgres_db_provider.get_all_users() assert len(users) >= 2 assert any(user.email == "test1@example.com" for user in users) assert any(user.email == "test2@example.com" for user in users) @@ -103,34 +98,30 @@ async def test_get_all_users(temporary_postgres_db_provider): async def test_store_and_verify_verification_code( temporary_postgres_db_provider, ): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) verification_code = "123456" expiry = datetime.utcnow() + timedelta(hours=1) - await temporary_postgres_db_provider.relational.store_verification_code( + await temporary_postgres_db_provider.store_verification_code( user.id, verification_code, expiry ) - await temporary_postgres_db_provider.relational.verify_user( - verification_code - ) - updated_user = ( - await temporary_postgres_db_provider.relational.get_user_by_id(user.id) - ) + await temporary_postgres_db_provider.verify_user(verification_code) + updated_user = await temporary_postgres_db_provider.get_user_by_id(user.id) assert updated_user.is_verified @pytest.mark.asyncio async def test_store_and_get_reset_token(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) reset_token = "reset_token" expiry = datetime.utcnow() + timedelta(hours=1) - await temporary_postgres_db_provider.relational.store_reset_token( + await temporary_postgres_db_provider.store_reset_token( user.id, reset_token, expiry ) - user_id = await temporary_postgres_db_provider.relational.get_user_id_by_reset_token( + user_id = await temporary_postgres_db_provider.get_user_id_by_reset_token( reset_token ) assert user_id == user.id @@ -140,21 +131,17 @@ async def test_store_and_get_reset_token(temporary_postgres_db_provider): async def test_add_and_remove_user_from_collection( temporary_postgres_db_provider, ): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) collection_id = UUID("00000000-0000-0000-0000-000000000001") - await temporary_postgres_db_provider.relational.add_user_to_collection( + await temporary_postgres_db_provider.add_user_to_collection( user.id, collection_id ) - updated_user = ( - await temporary_postgres_db_provider.relational.get_user_by_id(user.id) - ) + updated_user = await temporary_postgres_db_provider.get_user_by_id(user.id) assert collection_id in updated_user.collection_ids - await temporary_postgres_db_provider.relational.remove_user_from_collection( + await temporary_postgres_db_provider.remove_user_from_collection( user.id, collection_id ) - updated_user = ( - await temporary_postgres_db_provider.relational.get_user_by_id(user.id) - ) + updated_user = await temporary_postgres_db_provider.get_user_by_id(user.id) assert collection_id not in updated_user.collection_ids diff --git a/py/tests/core/providers/database/test_vector_db_provider.py b/py/tests/core/providers/database/test_vector_db_provider.py index 0fe76d383..ff7a70485 100644 --- a/py/tests/core/providers/database/test_vector_db_provider.py +++ b/py/tests/core/providers/database/test_vector_db_provider.py @@ -7,14 +7,14 @@ @pytest.mark.asyncio async def test_vector_db_initialization(postgres_db_provider): assert isinstance(postgres_db_provider, PostgresDBProvider) - assert postgres_db_provider.vector is not None + # assert postgres_db_provider is not None @pytest.mark.asyncio async def test_search_equality_filter(postgres_db_provider, sample_entries): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=10, filters={"key": {"$eq": "value_id_0"}} ), @@ -25,9 +25,9 @@ async def test_search_equality_filter(postgres_db_provider, sample_entries): @pytest.mark.asyncio async def test_search_not_equal_filter(postgres_db_provider, sample_entries): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=100, filters={"key": {"$ne": "value_id_0"}} ), @@ -40,9 +40,9 @@ async def test_search_not_equal_filter(postgres_db_provider, sample_entries): async def test_search_greater_than_filter( postgres_db_provider, sample_entries ): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=100, filters={"raw_key": {"$gt": 50}} ), @@ -55,9 +55,9 @@ async def test_search_greater_than_filter( async def test_search_less_than_or_equal_filter( postgres_db_provider, sample_entries ): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=10, filters={"raw_key": {"$lte": 20}}, @@ -66,8 +66,8 @@ async def test_search_less_than_or_equal_filter( ) # TODO - Why is this number not always 10? assert len(results) == 10 - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=100, filters={"raw_key": {"$lte": 20}} ), @@ -78,9 +78,9 @@ async def test_search_less_than_or_equal_filter( @pytest.mark.asyncio async def test_search_in_filter(postgres_db_provider, sample_entries): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=10, filters={"key": {"$in": ["value_id_0", "value_id_1"]}}, @@ -94,9 +94,9 @@ async def test_search_in_filter(postgres_db_provider, sample_entries): @pytest.mark.asyncio async def test_search_complex_and_filter(postgres_db_provider, sample_entries): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=10, filters={ @@ -114,9 +114,9 @@ async def test_search_complex_and_filter(postgres_db_provider, sample_entries): @pytest.mark.asyncio async def test_search_complex_or_filter(postgres_db_provider, sample_entries): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=11, ef_search=100, # TODO - Better understand why we need to set this to search the entire database. @@ -137,9 +137,9 @@ async def test_search_complex_or_filter(postgres_db_provider, sample_entries): async def test_search_nested_and_or_filters( postgres_db_provider, sample_entries ): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=10, ef_search=100, # TODO - Better understand why we need to set this to search the entire database. @@ -163,11 +163,11 @@ async def test_search_nested_and_or_filters( @pytest.mark.asyncio async def test_delete_equality(temporary_postgres_db_provider, sample_entries): - deleted_ids = temporary_postgres_db_provider.vector.delete( + deleted_ids = await temporary_postgres_db_provider.delete( {"key": {"$eq": "value_id_0"}} ) assert len(deleted_ids) == 1 - remaining = temporary_postgres_db_provider.vector.semantic_search( + remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, VectorSearchSettings(search_limit=100), ) @@ -179,11 +179,11 @@ async def test_delete_equality(temporary_postgres_db_provider, sample_entries): async def test_delete_greater_than( temporary_postgres_db_provider, sample_entries ): - deleted_ids = temporary_postgres_db_provider.vector.delete( + deleted_ids = await temporary_postgres_db_provider.delete( {"raw_key": {"$gt": 90}} ) assert len(deleted_ids) == 9 - remaining = temporary_postgres_db_provider.vector.semantic_search( + remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, VectorSearchSettings(search_limit=100), ) @@ -193,11 +193,11 @@ async def test_delete_greater_than( @pytest.mark.asyncio async def test_delete_in(temporary_postgres_db_provider, sample_entries): - deleted_ids = temporary_postgres_db_provider.vector.delete( + deleted_ids = await temporary_postgres_db_provider.delete( {"key": {"$in": ["value_id_0", "value_id_1"]}} ) assert len(deleted_ids) == 2 - remaining = temporary_postgres_db_provider.vector.semantic_search( + remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, VectorSearchSettings(search_limit=100), ) @@ -212,7 +212,7 @@ async def test_delete_in(temporary_postgres_db_provider, sample_entries): async def test_delete_complex_and( temporary_postgres_db_provider, sample_entries ): - deleted_ids = temporary_postgres_db_provider.vector.delete( + deleted_ids = await temporary_postgres_db_provider.delete( { "$and": [ {"key": {"$eq": "value_id_0"}}, @@ -221,7 +221,7 @@ async def test_delete_complex_and( } ) assert len(deleted_ids) == 1 - remaining = temporary_postgres_db_provider.vector.semantic_search( + remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, VectorSearchSettings(search_limit=100), ) @@ -236,7 +236,7 @@ async def test_delete_complex_and( async def test_delete_complex_or( temporary_postgres_db_provider, sample_entries ): - deleted_ids = temporary_postgres_db_provider.vector.delete( + deleted_ids = await temporary_postgres_db_provider.delete( { "$or": [ {"key": {"$eq": "value_id_0"}}, @@ -245,7 +245,7 @@ async def test_delete_complex_or( } ) assert len(deleted_ids) == 11 - remaining = temporary_postgres_db_provider.vector.semantic_search( + remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, VectorSearchSettings(search_limit=100), ) @@ -260,7 +260,7 @@ async def test_delete_complex_or( async def test_delete_nested_and_or( temporary_postgres_db_provider, sample_entries ): - deleted_ids = temporary_postgres_db_provider.vector.delete( + deleted_ids = await temporary_postgres_db_provider.delete( { "$and": [ {"key": {"$eq": "value_id_0"}}, @@ -274,7 +274,7 @@ async def test_delete_nested_and_or( } ) assert len(deleted_ids) == 1 - remaining = temporary_postgres_db_provider.vector.semantic_search( + remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, VectorSearchSettings(search_limit=100), ) From 551637308123c5d09ac453ca7268daf93f5ffa47 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Sun, 20 Oct 2024 16:35:10 -0700 Subject: [PATCH 20/27] up --- py/core/providers/kg/postgres.py | 64 +++++++++++++------ py/shared/abstractions/graph.py | 2 +- py/tests/conftest.py | 2 +- .../logging/test_chat_logging_provider.py | 47 +++++++------- 4 files changed, 70 insertions(+), 45 deletions(-) diff --git a/py/core/providers/kg/postgres.py b/py/core/providers/kg/postgres.py index 9e73a90df..b769bc0ba 100644 --- a/py/core/providers/kg/postgres.py +++ b/py/core/providers/kg/postgres.py @@ -199,9 +199,7 @@ async def _add_objects( Upsert objects into the specified table. """ # Get non-null attributes from the first object - non_null_attrs = { - k: v for k, v in objects[0].__dict__.items() if v is not None - } + non_null_attrs = {k: v for k, v in objects[0].items() if v is not None} columns = ", ".join(non_null_attrs.keys()) placeholders = ", ".join(f"${i+1}" for i in range(len(non_null_attrs))) @@ -229,17 +227,12 @@ async def _add_objects( # Filter out null values for each object params = [ tuple( - ( - json.dumps(v) - if isinstance(v, dict) - else (str(v) if v is not None else None) - ) - for v in obj.__dict__.values() + (json.dumps(v) if isinstance(v, dict) else v) + for v in obj.values() if v is not None ) for obj in objects ] - logger.info(f"Upserting {len(params)} params into {table_name}") return await self.execute_many(QUERY, params) # type: ignore @@ -249,7 +242,6 @@ async def add_entities( entities: list[Entity], table_name: str, conflict_columns: list[str] = [], - embedding_col_name: str = "description_embedding", ) -> asyncpg.Record: """ Upsert entities into the entities_raw table. These are raw entities extracted from the document. @@ -261,18 +253,48 @@ async def add_entities( Returns: result: asyncpg.Record: result of the upsert operation """ + cleaned_entities = [] for entity in entities: + entity_dict = entity.to_dict() - if getattr(entity, embedding_col_name, None) is not None: - setattr( - entity, - embedding_col_name, - str(getattr(entity, embedding_col_name)), # type: ignore - ) - + entity_dict["extraction_ids"] = ( + entity_dict["extraction_ids"] + if entity_dict.get("extraction_ids") + else [] + ) + entity_dict["description_embedding"] = ( + str(entity_dict["description_embedding"]) + if entity_dict.get("description_embedding") + else None + ) + # entity_dict["extraction_ids"] = ( + # [str(ele) for ele in entity_dict["extraction_ids"]] + # if entity_dict.get("extraction_ids") + # else [] + # ) + # entity_dict["description_embedding"] = ( + # str(entity_dict["description_embedding"]) + # if entity_dict.get("description_embedding") + # else None + # ) + + # entity_dict["extraction_ids"] = ( + # json.dumps([str(ele) for ele in entity_dict["extraction_ids"]]) + # if entity_dict.get("extraction_ids") + # else json.dumps([]) + # ) + # entity_dict["description_embedding"] = ( + # str(entity_dict["description_embedding"]) + # if entity_dict.get("description_embedding") + # else None + # ) + cleaned_entities.append(entity_dict) + print("entity_dict = ", entity_dict) logger.info(f"Upserting {len(entities)} entities into {table_name}") - return await self._add_objects(entities, table_name, conflict_columns) + return await self._add_objects( + cleaned_entities, table_name, conflict_columns + ) async def add_triples( self, @@ -289,7 +311,9 @@ async def add_triples( Returns: result: asyncpg.Record: result of the upsert operation """ - return await self._add_objects(triples, table_name) + return await self._add_objects( + [ele.to_dict() for ele in triples], table_name + ) async def add_kg_extractions( self, diff --git a/py/shared/abstractions/graph.py b/py/shared/abstractions/graph.py index b8b1aeeea..5443b946c 100644 --- a/py/shared/abstractions/graph.py +++ b/py/shared/abstractions/graph.py @@ -79,7 +79,7 @@ def __init__(self, **kwargs): pass -class Triple(BaseModel): +class Triple(R2RSerializable): """A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities.""" id: Optional[int] = None diff --git a/py/tests/conftest.py b/py/tests/conftest.py index 3eb81c6db..806396597 100644 --- a/py/tests/conftest.py +++ b/py/tests/conftest.py @@ -191,7 +191,7 @@ async def postgres_file_provider(file_config, temporary_postgres_db_provider): ) await file_provider.initialize() yield file_provider - await file_provider._close_connection() + # await file_provider._close_connection() # LLM provider diff --git a/py/tests/core/providers/logging/test_chat_logging_provider.py b/py/tests/core/providers/logging/test_chat_logging_provider.py index fefe546bb..abffbc341 100644 --- a/py/tests/core/providers/logging/test_chat_logging_provider.py +++ b/py/tests/core/providers/logging/test_chat_logging_provider.py @@ -35,8 +35,8 @@ async def test_get_conversation(local_logging_provider): ) assert len(retrieved_messages) == len(messages) for original, retrieved in zip(messages, retrieved_messages): - assert original.role == retrieved.role - assert original.content == retrieved.content + assert original.role == retrieved[1].role + assert original.content == retrieved[1].content @pytest.mark.asyncio @@ -57,21 +57,21 @@ async def test_edit_message(local_logging_provider): conversation_id, new_branch_id ) assert len(retrieved_messages) == 1 - assert retrieved_messages[0].content == "Hello, edited" + assert retrieved_messages[0][1].content == "Hello, edited" -@pytest.mark.asyncio -async def test_branches_overview(local_logging_provider): - conversation_id = await local_logging_provider.create_conversation() - message_id = await local_logging_provider.add_message( - conversation_id, Message(role="user", content="Hello") - ) - await local_logging_provider.edit_message(message_id, "Hello, edited") +# @pytest.mark.asyncio +# async def test_branches_overview(local_logging_provider): +# conversation_id = await local_logging_provider.create_conversation() +# message_id = await local_logging_provider.add_message( +# conversation_id, Message(role="user", content="Hello") +# ) +# await local_logging_provider.edit_message(message_id, "Hello, edited") - branches = await local_logging_provider.branches_overview(conversation_id) - assert len(branches) == 2 - assert branches[0]["branch_point_id"] is None - assert branches[1]["branch_point_id"] == message_id +# branches = await local_logging_provider.branches_overview(conversation_id) +# assert len(branches) == 2 +# assert branches[0]["branch_point_id"] is None +# assert branches[1]["branch_point_id"] == message_id @pytest.mark.asyncio @@ -114,7 +114,7 @@ async def test_branch_at_message(local_logging_provider): conversation_id, branch_id ) assert len(retrieved_messages) == 1 - assert retrieved_messages[0].content == "Hello" + assert retrieved_messages[0][1].content == "Hello" @pytest.mark.asyncio @@ -155,10 +155,10 @@ async def test_edit_message_in_middle(local_logging_provider): print("retrieved_messages = ", retrieved_messages) # Verify that messages after the edited message are not present assert len(retrieved_messages) == 2 - assert retrieved_messages[0].content == "Hello" - assert retrieved_messages[0].role == "user" - assert retrieved_messages[1].content == "Greetings!" - assert retrieved_messages[1].role == "assistant" + assert retrieved_messages[0][1].content == "Hello" + assert retrieved_messages[0][1].role == "user" + assert retrieved_messages[1][1].content == "Greetings!" + assert retrieved_messages[1][1].role == "assistant" @pytest.mark.asyncio @@ -203,14 +203,15 @@ async def test_multiple_branches_from_same_message(local_logging_provider): # Verify first branch messages assert len(retrieved_messages_1) == 2 - assert retrieved_messages_1[0].content == "Tell me a joke." - assert retrieved_messages_1[1].content == "Knock, knock!" + print("retrieved_messages_1[0] = ", retrieved_messages_1[0]) + assert retrieved_messages_1[0][1].content == "Tell me a joke." + assert retrieved_messages_1[1][1].content == "Knock, knock!" # Verify second branch messages assert len(retrieved_messages_2) == 2 - assert retrieved_messages_2[0].content == "Tell me a joke." + assert retrieved_messages_2[0][1].content == "Tell me a joke." assert ( - retrieved_messages_2[1].content + retrieved_messages_2[1][1].content == "What do you call a bear with no teeth? A gummy bear!" ) From 43a32ea6b4bf822a25ff72025d5dc868039f1bf0 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Sun, 20 Oct 2024 17:57:05 -0700 Subject: [PATCH 21/27] merge --- py/core/main/api/ingestion_router.py | 2 +- py/core/main/services/ingestion_service.py | 12 +++--- py/core/providers/database/vector.py | 45 ++++++++++++++++++++-- py/core/providers/file/postgres.py | 4 +- py/shared/abstractions/vector.py | 2 +- py/tests/integration/runner_cli.py | 4 +- py/tests/integration/runner_sdk.py | 15 ++++---- 7 files changed, 62 insertions(+), 22 deletions(-) diff --git a/py/core/main/api/ingestion_router.py b/py/core/main/api/ingestion_router.py index 7e98dca15..17d889992 100644 --- a/py/core/main/api/ingestion_router.py +++ b/py/core/main/api/ingestion_router.py @@ -336,7 +336,7 @@ async def ingest_chunks_app( @self.base_endpoint async def create_vector_index_app( table_name: Optional[VectorTableName] = Body( - default=VectorTableName.CHUNKS, + default=VectorTableName.RAW_CHUNKS, description="The name of the vector table to create.", ), index_method: IndexMethod = Body( diff --git a/py/core/main/services/ingestion_service.py b/py/core/main/services/ingestion_service.py index d928d6b69..30c57e137 100644 --- a/py/core/main/services/ingestion_service.py +++ b/py/core/main/services/ingestion_service.py @@ -382,7 +382,7 @@ async def _get_enriched_chunk_text( ) elif enrichment_strategy == ChunkEnrichmentStrategy.SEMANTIC: - semantic_neighbors = self.providers.database.vector.get_semantic_neighbors( + semantic_neighbors = await self.providers.database.get_semantic_neighbors( document_id=document_id, chunk_id=chunk["extraction_id"], limit=chunk_enrichment_settings.semantic_neighbors, @@ -462,8 +462,10 @@ async def chunk_enrichment(self, document_id: UUID) -> int: self.providers.ingestion.config.chunk_enrichment_settings # type: ignore ) # get all document_chunks - document_chunks = self.providers.database.vector.get_document_chunks( - document_id=document_id, + document_chunks = ( + await self.providers.database.get_document_chunks( + document_id=document_id, + ) )["results"] new_vector_entries = [] @@ -499,14 +501,14 @@ async def chunk_enrichment(self, document_id: UUID) -> int: ) # delete old chunks from vector db - self.providers.database.vector.delete( + self.providers.database.delete( filters={ "document_id": document_id, }, ) # embed and store the enriched chunk - self.providers.database.vector.upsert_entries(new_vector_entries) + self.providers.database.upsert_entries(new_vector_entries) return len(new_vector_entries) diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 9e6d6fc0a..fb69c85f4 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -509,8 +509,8 @@ async def create_index( ArgError: If an invalid index method is used, or if *replace* is False and an index already exists. """ - if table_name == VectorTableName.CHUNKS: - table_name_str = f"{self.project_name}.{self.project_name}" # TODO - Fix bug in vector table naming convention + if table_name == VectorTableName.RAW_CHUNKS: + table_name_str = f"{self.project_name}.{VectorTableName.RAW_CHUNKS}" # TODO - Fix bug in vector table naming convention col_name = "vec" elif table_name == VectorTableName.ENTITIES: table_name_str = f"{self.project_name}.{VectorTableName.ENTITIES}" @@ -586,7 +586,7 @@ async def create_index( return None def _build_filters( - self, filters: dict, parameters: list[dict] + self, filters: dict, parameters: list ) -> Tuple[str, list[Any]]: def parse_condition(key: str, value: Any) -> str: @@ -703,6 +703,45 @@ def parse_filter(filter_dict: dict) -> str: where_clause = parse_filter(filters) return where_clause + async def get_semantic_neighbors( + self, + document_id: UUID, + chunk_id: UUID, + limit: int = 10, + similarity_threshold: float = 0.5, + ) -> list[dict[str, Any]]: + + table_name = self._get_table_name(VectorDBMixin.TABLE_NAME) + query = f""" + WITH target_vector AS ( + SELECT vec FROM {table_name} + WHERE document_id = $1 AND extraction_id = $2 + ) + SELECT t.extraction_id, t.text, t.metadata, t.document_id, (t.vec <=> tv.vec) AS similarity + FROM {table_name} t, target_vector tv + WHERE (t.vec <=> tv.vec) >= $3 + AND t.document_id = $1 + AND t.extraction_id != $2 + ORDER BY similarity ASC + LIMIT $4 + """ + + results = await self.fetch_query( + query, + (str(document_id), str(chunk_id), similarity_threshold, limit), + ) + + return [ + { + "extraction_id": str(r["extraction_id"]), + "text": r["text"], + "metadata": json.loads(r["metadata"]), + "document_id": str(r["document_id"]), + "similarity": float(r["similarity"]), + } + for r in results + ] + def _get_index_options( self, method: IndexMethod, diff --git a/py/core/providers/file/postgres.py b/py/core/providers/file/postgres.py index cb43cf26c..6c25a734c 100644 --- a/py/core/providers/file/postgres.py +++ b/py/core/providers/file/postgres.py @@ -1,6 +1,6 @@ import io import logging -from typing import BinaryIO, Optional +from typing import BinaryIO, Optional, Union from uuid import UUID import asyncpg @@ -253,7 +253,7 @@ async def get_files_overview( ) conditions = [] - params = [] + params: list[Union[str, list[str], int]] = [] query = f""" SELECT document_id, file_name, file_oid, file_size, file_type, created_at, updated_at FROM {self._get_table_name('file_storage')} diff --git a/py/shared/abstractions/vector.py b/py/shared/abstractions/vector.py index 09c1a161b..133d6e8ee 100644 --- a/py/shared/abstractions/vector.py +++ b/py/shared/abstractions/vector.py @@ -107,7 +107,7 @@ class VectorTableName(str, Enum): # {r2r_project_name}.{r2r_project_name} due to a bug in the vector class. """ - CHUNKS = "chunks" + RAW_CHUNKS = "raw_chunks" ENTITIES = "entity_embedding" ENTITIES_DEDUPLICATED = "entity_deduplicated" # TODO: Add support for triples diff --git a/py/tests/integration/runner_cli.py b/py/tests/integration/runner_cli.py index 89e2fe2e3..bdf65aeac 100644 --- a/py/tests/integration/runner_cli.py +++ b/py/tests/integration/runner_cli.py @@ -36,7 +36,7 @@ def run_command(command): def test_ingest_sample_file_cli(): print("Testing: Ingest sample file CLI") run_command("poetry run r2r ingest-sample-file") - time.sleep(10) + time.sleep(30) print("Ingestion successful") print("~" * 100) @@ -47,7 +47,7 @@ def test_ingest_sample_file_2_cli(): """ print("Testing: Ingest sample file CLI 2") run_command("poetry run r2r ingest-sample-file --v2") - time.sleep(10) + time.sleep(30) print("Ingestion successful") print("~" * 100) diff --git a/py/tests/integration/runner_sdk.py b/py/tests/integration/runner_sdk.py index 345eb1074..6cd103c51 100644 --- a/py/tests/integration/runner_sdk.py +++ b/py/tests/integration/runner_sdk.py @@ -67,7 +67,7 @@ def test_ingest_sample_file_with_config_sdk(): ingest_response = client.ingest_files( file_paths=file_paths, ingestion_config={"chunk_size": 4_096} ) - time.sleep(10) + time.sleep(30) if not ingest_response["results"]: print("Ingestion test failed") @@ -102,7 +102,6 @@ def test_reingest_sample_file_sdk(): file_paths = ["core/examples/data/uber_2021.pdf"] try: results = client.ingest_files(file_paths=file_paths) - print("results = ", results) time.sleep(30) if "task_id" not in results["results"][0]: @@ -923,7 +922,7 @@ def test_user_collection_document_management(): # Ingest the "aristotle.txt" file ingest_result = client.ingest_files(["core/examples/data/aristotle.txt"]) - time.sleep(10) + time.sleep(30) document_id = ingest_result["results"][0]["document_id"] @@ -997,7 +996,7 @@ def test_user_removes_document_from_collection(): # Ingest the "aristotle.txt" file ingest_result = client.ingest_files(["core/examples/data/aristotle.txt"]) - time.sleep(10) + time.sleep(30) document_id = ingest_result["results"][0]["document_id"] @@ -1049,7 +1048,7 @@ def test_user_lists_documents_in_collection(): # Ingest the "aristotle.txt" file ingest_result = client.ingest_files(["core/examples/data/aristotle.txt"]) - time.sleep(10) + time.sleep(30) document_id = ingest_result["results"][0]["document_id"] @@ -1411,7 +1410,7 @@ def test_user_gets_collections_for_document(): # Ingest a document ingest_result = client.ingest_files(["core/examples/data/aristotle.txt"]) - time.sleep(10) + time.sleep(30) document_id = ingest_result["results"][0]["document_id"] @@ -1480,7 +1479,7 @@ def test_collection_user_interactions(): # Ingest a document client.login("collection_owner@example.com", "password123") ingest_result = client.ingest_files(["core/examples/data/aristotle.txt"]) - time.sleep(10) + time.sleep(30) document_id = ingest_result["results"][0]["document_id"] @@ -1533,7 +1532,7 @@ def test_collection_document_interactions(): # Ingest a document ingest_result = client.ingest_files(["core/examples/data/aristotle.txt"]) - time.sleep(10) + time.sleep(30) document_id = ingest_result["results"][0]["document_id"] From 2b24e8340bae729e3cfe23b5b905ea86880b28c5 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Sun, 20 Oct 2024 18:03:29 -0700 Subject: [PATCH 22/27] rm pytest --- .github/workflows/r2r-full-py-integration-tests.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/r2r-full-py-integration-tests.yml b/.github/workflows/r2r-full-py-integration-tests.yml index 1afd02802..730bbb566 100644 --- a/.github/workflows/r2r-full-py-integration-tests.yml +++ b/.github/workflows/r2r-full-py-integration-tests.yml @@ -53,9 +53,6 @@ jobs: - name: Start R2R Full server uses: ./.github/actions/start-r2r-full - - name: Run Pytest Ingestion Tests - run: poetry run pytest - - name: Run CLI Ingestion Tests if: matrix.test_category == 'cli-ingestion' uses: ./.github/actions/run-cli-ingestion-tests From e368bccb708dc4e495990fc82d300750ab8ba07f Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Sun, 20 Oct 2024 18:36:16 -0700 Subject: [PATCH 23/27] up --- py/core/base/providers/database.py | 373 ++++++++++++++++++++++++++- py/core/providers/database/vector.py | 56 +++- py/core/providers/kg/postgres.py | 22 -- 3 files changed, 413 insertions(+), 38 deletions(-) diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index 641f55a4e..dcee345bb 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -1,9 +1,9 @@ import logging from abc import ABC, abstractmethod -from typing import Any, Optional +from typing import Any, Optional, Union, Sequence from pydantic import BaseModel - +from uuid import UUID from shared.abstractions.vector import VectorQuantizationType from .base import Provider, ProviderConfig @@ -79,3 +79,372 @@ def __init__(self, config: DatabaseConfig): @abstractmethod def _get_table_name(self, base_name: str) -> str: pass + + @abstractmethod + def execute_query( + self, + query: str, + params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, + isolation_level: Optional[str] = None, + ): + pass + + @abstractmethod + async def execute_many(self, query, params=None, batch_size=1000): + pass + + @abstractmethod + def fetch_query( + self, + query: str, + params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, + ): + pass + + @abstractmethod + def fetchrow_query( + self, + query: str, + params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, + ): + pass + + # @abstractmethod + # def create_table(self): + # pass + + # # Management Methods + # @abstractmethod + # async def update_prompt( + # self, + # name: str, + # template: Optional[str] = None, + # input_types: Optional[dict[str, str]] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def add_prompt( + # self, + # name: str, + # template: str, + # input_types: dict[str, str], + # ) -> dict: + # pass + + # @abstractmethod + # async def get_prompt( + # self, + # prompt_name: str, + # inputs: Optional[dict[str, Any]] = None, + # prompt_override: Optional[str] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def get_all_prompts(self) -> dict: + # pass + + # @abstractmethod + # async def delete_prompt(self, prompt_name: str) -> dict: + # pass + + # @abstractmethod + # async def analytics( + # self, + # filter_criteria: Optional[Union[dict, str]] = None, + # analysis_types: Optional[Union[dict, str]] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def app_settings(self) -> dict: + # pass + + # @abstractmethod + # async def users_overview( + # self, + # user_ids: Optional[list[str]] = None, + # offset: Optional[int] = None, + # limit: Optional[int] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def delete( + # self, + # filters: dict, + # ) -> dict: + # pass + + # @abstractmethod + # async def download_file( + # self, + # document_id: Union[str, UUID], + # ): + # pass + + # @abstractmethod + # async def documents_overview( + # self, + # document_ids: Optional[list[Union[UUID, str]]] = None, + # offset: Optional[int] = None, + # limit: Optional[int] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def document_chunks( + # self, + # document_id: str, + # offset: Optional[int] = None, + # limit: Optional[int] = None, + # include_vectors: Optional[bool] = False, + # ) -> dict: + # pass + + # @abstractmethod + # async def collections_overview( + # self, + # collection_ids: Optional[list[str]] = None, + # offset: Optional[int] = None, + # limit: Optional[int] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def create_collection( + # self, + # name: str, + # description: Optional[str] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def get_collection( + # self, + # collection_id: Union[str, UUID], + # ) -> dict: + # pass + + # @abstractmethod + # async def update_collection( + # self, + # collection_id: Union[str, UUID], + # name: Optional[str] = None, + # description: Optional[str] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def delete_collection( + # self, + # collection_id: Union[str, UUID], + # ) -> dict: + # pass + + # @abstractmethod + # async def delete_user( + # self, + # user_id: str, + # password: Optional[str] = None, + # delete_vector_data: bool = False, + # ) -> dict: + # pass + + # @abstractmethod + # async def list_collections( + # self, + # offset: Optional[int] = None, + # limit: Optional[int] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def add_user_to_collection( + # self, + # user_id: Union[str, UUID], + # collection_id: Union[str, UUID], + # ) -> dict: + # pass + + # @abstractmethod + # async def remove_user_from_collection( + # self, + # user_id: Union[str, UUID], + # collection_id: Union[str, UUID], + # ) -> dict: + # pass + + # @abstractmethod + # async def get_users_in_collection( + # self, + # collection_id: Union[str, UUID], + # offset: Optional[int] = None, + # limit: Optional[int] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def user_collections( + # self, + # user_id: Union[str, UUID], + # offset: Optional[int] = None, + # limit: Optional[int] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def assign_document_to_collection( + # self, + # document_id: Union[str, UUID], + # collection_id: Union[str, UUID], + # ) -> dict: + # pass + + + # # TODO: Verify that this method is implemented, also, should be a PUT request + # @abstractmethod + # async def remove_document_from_collection( + # self, + # document_id: Union[str, UUID], + # collection_id: Union[str, UUID], + # ) -> dict: + # pass + + # @abstractmethod + # async def document_collections( + # self, + # document_id: Union[str, UUID], + # offset: Optional[int] = None, + # limit: Optional[int] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def documents_in_collection( + # self, + # collection_id: Union[str, UUID], + # offset: Optional[int] = None, + # limit: Optional[int] = None, + # ) -> dict: + # pass + + # @abstractmethod + # async def conversations_overview( + # self, + # conversation_ids: Optional[list[Union[UUID, str]]] = None, + # offset: Optional[int] = None, + # limit: Optional[int] = None, + # ) -> dict: + # pass + + # # async def get_conversation( + # # self, + # # conversation_id: Union[str, UUID], + # # branch_id: Optional[str] = None, + # # ) -> dict: + # # """ + # # Get a conversation by its ID. + + # # Args: + # # conversation_id (Union[str, UUID]): The ID of the conversation to retrieve. + # # branch_id (Optional[str]): The ID of a specific branch to retrieve. + + # # Returns: + # # dict: The conversation data. + # # """ + # # query_params = f"?branch_id={branch_id}" if branch_id else "" + # # return await self._make_request( # type: ignore + # # "GET", f"get_conversation/{str(conversation_id)}{query_params}" + # # ) + + # # async def create_conversation(self) -> dict: + # # """ + # # Create a new conversation. + + # # Returns: + # # dict: The response from the server. + # # """ + # # return await self._make_request("POST", "create_conversation") # type: ignore + + # # async def add_message( + # # self, + # # conversation_id: Union[str, UUID], + # # message: Message, + # # parent_id: Optional[str] = None, + # # metadata: Optional[dict[str, Any]] = None, + # # ) -> dict: + # # """ + # # Add a message to an existing conversation. + + # # Args: + # # conversation_id (Union[str, UUID]): The ID of the conversation. + # # message (Message): The message to add. + # # parent_id (Optional[str]): The ID of the parent message. + # # metadata (Optional[dict[str, Any]]): Additional metadata for the message. + + # # Returns: + # # dict: The response from the server. + # # """ + # # data: dict = {"message": message} + # # if parent_id is not None: + # # data["parent_id"] = parent_id + # # if metadata is not None: + # # data["metadata"] = metadata + # # return await self._make_request( # type: ignore + # # "POST", f"add_message/{str(conversation_id)}", data=data + # # ) + + # # async def update_message( + # # self, + # # message_id: str, + # # message: Message, + # # ) -> dict: + # # """ + # # Update a message in an existing conversation. + + # # Args: + # # message_id (str): The ID of the message to update. + # # message (Message): The updated message. + + # # Returns: + # # dict: The response from the server. + # # """ + # # return await self._make_request( # type: ignore + # # "PUT", f"update_message/{message_id}", data=message + # # ) + + # # async def branches_overview( + # # self, + # # conversation_id: Union[str, UUID], + # # ) -> dict: + # # """ + # # Get an overview of branches in a conversation. + + # # Args: + # # conversation_id (Union[str, UUID]): The ID of the conversation to get branches for. + + # # Returns: + # # dict: The response from the server. + # # """ + # # return await self._make_request( # type: ignore + # # "GET", f"branches_overview/{str(conversation_id)}" + # # ) + + # # async def delete_conversation( + # # self, + # # conversation_id: Union[str, UUID], + # # ) -> dict: + # # """ + # # Delete a conversation by its ID. + + # # Args: + # # conversation_id (Union[str, UUID]): The ID of the conversation to delete. + + # # Returns: + # # dict: The response from the server. + # # """ + # # return await self._make_request( # type: ignore + # # "DELETE", f"delete_conversation/{str(conversation_id)}" + # # ) diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index fb69c85f4..2221e48aa 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -173,8 +173,9 @@ async def semantic_search( params.extend([search_settings.search_limit, search_settings.offset]) - print("Generated SQL query:", query) - print("Query parameters:", params) + print('query = ', query) + print('params = ', params) + results = await self.fetch_query(query, params) @@ -233,6 +234,9 @@ async def full_text_search( ] ) + print('query = ', query) + print('params = ', params) + results = await self.fetch_query(query, params) return [ VectorSearchResult( @@ -369,9 +373,6 @@ async def delete( RETURNING extraction_id, document_id, text; """ - print("Generated DELETE SQL query:", query) - print("Query parameters:", params) - results = await self.fetch_query(query, params) return { @@ -586,7 +587,7 @@ async def create_index( return None def _build_filters( - self, filters: dict, parameters: list + self, filters: dict, parameters: list[dict] ) -> Tuple[str, list[Any]]: def parse_condition(key: str, value: Any) -> str: @@ -689,18 +690,45 @@ def parse_filter(filter_dict: dict) -> str: filter_conditions = [] for key, value in filter_dict.items(): if key == "$and": - filter_conditions.append( - f"({' AND '.join([parse_filter(f) for f in value])})" - ) + and_conditions = [parse_filter(f) for f in value if f] # Skip empty dictionaries + if and_conditions: + filter_conditions.append(f"({' AND '.join(and_conditions)})") elif key == "$or": - filter_conditions.append( - f"({' OR '.join([parse_filter(f) for f in value])})" - ) + or_conditions = [parse_filter(f) for f in value if f] # Skip empty dictionaries + if or_conditions: + filter_conditions.append(f"({' OR '.join(or_conditions)})") else: filter_conditions.append(parse_condition(key, value)) - return " AND ".join(filter_conditions) - + + # Check if there is only a single condition + if len(filter_conditions) == 1: + return filter_conditions[0] + else: + return " AND ".join(filter_conditions) + # def parse_filter(filter_dict: dict) -> str: + # filter_conditions = [] + # for key, value in filter_dict.items(): + # if key == "$and": + # filter_conditions.append( + # f"({' AND '.join([parse_filter(f) for f in value])})" + # ) + # elif key == "$or": + # filter_conditions.append( + # f"({' OR '.join([parse_filter(f) for f in value])})" + # ) + # else: + # filter_conditions.append(parse_condition(key, value)) + + # # Check if there is only a single condition + # if len(filter_conditions) == 1: + # return filter_conditions[0] + # else: + # return " AND ".join(filter_conditions) + + print('filters = ', filters) where_clause = parse_filter(filters) + print('where_clause = ', where_clause) + return where_clause async def get_semantic_neighbors( diff --git a/py/core/providers/kg/postgres.py b/py/core/providers/kg/postgres.py index b769bc0ba..ddbf3af08 100644 --- a/py/core/providers/kg/postgres.py +++ b/py/core/providers/kg/postgres.py @@ -256,7 +256,6 @@ async def add_entities( cleaned_entities = [] for entity in entities: entity_dict = entity.to_dict() - entity_dict["extraction_ids"] = ( entity_dict["extraction_ids"] if entity_dict.get("extraction_ids") @@ -267,27 +266,6 @@ async def add_entities( if entity_dict.get("description_embedding") else None ) - # entity_dict["extraction_ids"] = ( - # [str(ele) for ele in entity_dict["extraction_ids"]] - # if entity_dict.get("extraction_ids") - # else [] - # ) - # entity_dict["description_embedding"] = ( - # str(entity_dict["description_embedding"]) - # if entity_dict.get("description_embedding") - # else None - # ) - - # entity_dict["extraction_ids"] = ( - # json.dumps([str(ele) for ele in entity_dict["extraction_ids"]]) - # if entity_dict.get("extraction_ids") - # else json.dumps([]) - # ) - # entity_dict["description_embedding"] = ( - # str(entity_dict["description_embedding"]) - # if entity_dict.get("description_embedding") - # else None - # ) cleaned_entities.append(entity_dict) print("entity_dict = ", entity_dict) logger.info(f"Upserting {len(entities)} entities into {table_name}") From 88844056fa957128cf2cd448fbcbe5210244e344 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Sun, 20 Oct 2024 18:40:50 -0700 Subject: [PATCH 24/27] up --- py/core/providers/kg/postgres.py | 2 +- py/tests/core/providers/kg/test_kg_logic.py | 4 ++-- py/tests/integration/runner_cli.py | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/py/core/providers/kg/postgres.py b/py/core/providers/kg/postgres.py index ddbf3af08..0b348ac2c 100644 --- a/py/core/providers/kg/postgres.py +++ b/py/core/providers/kg/postgres.py @@ -267,7 +267,7 @@ async def add_entities( else None ) cleaned_entities.append(entity_dict) - print("entity_dict = ", entity_dict) + logger.info(f"Upserting {len(entities)} entities into {table_name}") return await self._add_objects( diff --git a/py/tests/core/providers/kg/test_kg_logic.py b/py/tests/core/providers/kg/test_kg_logic.py index d875a0a78..b6afd2c30 100644 --- a/py/tests/core/providers/kg/test_kg_logic.py +++ b/py/tests/core/providers/kg/test_kg_logic.py @@ -408,5 +408,5 @@ async def test_get_community_details( assert community_level == 0 # TODO: change these to objects - assert entities[0]["name"] == "Entity1" - assert triples[0]["subject"] == "Entity1" + assert entities[0].name == "Entity1" + assert triples[0].subject == "Entity1" diff --git a/py/tests/integration/runner_cli.py b/py/tests/integration/runner_cli.py index bdf65aeac..24fdc0203 100644 --- a/py/tests/integration/runner_cli.py +++ b/py/tests/integration/runner_cli.py @@ -253,8 +253,8 @@ def test_kg_create_graph_sample_file_cli(): print("Calling `poetry run r2r create-graph --run` ") output = run_command("poetry run r2r create-graph --run") - if "queued" in output: - time.sleep(60) + # if "queued" in output: + time.sleep(60) response = requests.get( "http://localhost:7272/v2/entities", @@ -274,6 +274,8 @@ def test_kg_create_graph_sample_file_cli(): print(entities_list) + documents_overview = run_command("poetry run r2r documents-overview") + print(documents_overview) assert len(entities_list) >= 1 assert "ARISTOTLE" in entities_list From e279de487a4a09d1e88a6394882055867d98b981 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Sun, 20 Oct 2024 18:46:31 -0700 Subject: [PATCH 25/27] fix delete --- py/core/base/providers/database.py | 3 +- py/core/main/services/management_service.py | 4 ++- py/core/providers/database/vector.py | 34 +++++++++++++-------- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index dcee345bb..8dadbd1c5 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -79,7 +79,7 @@ def __init__(self, config: DatabaseConfig): @abstractmethod def _get_table_name(self, base_name: str) -> str: pass - + @abstractmethod def execute_query( self, @@ -302,7 +302,6 @@ def fetchrow_query( # ) -> dict: # pass - # # TODO: Verify that this method is implemented, also, should be a PUT request # @abstractmethod # async def remove_document_from_collection( diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index 8f2839384..756020d60 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -267,7 +267,9 @@ def validate_filters(filters: dict[str, Any]) -> None: logger.info(f"Deleting entries with filters: {filters}") try: - vector_delete_results = self.providers.database.delete(filters) + vector_delete_results = await self.providers.database.delete( + filters + ) except Exception as e: logger.error(f"Error deleting from vector database: {e}") vector_delete_results = {} diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 2221e48aa..b2c201bd8 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -173,9 +173,8 @@ async def semantic_search( params.extend([search_settings.search_limit, search_settings.offset]) - print('query = ', query) - print('params = ', params) - + print("query = ", query) + print("params = ", params) results = await self.fetch_query(query, params) @@ -234,8 +233,8 @@ async def full_text_search( ] ) - print('query = ', query) - print('params = ', params) + print("query = ", query) + print("params = ", params) results = await self.fetch_query(query, params) return [ @@ -690,21 +689,30 @@ def parse_filter(filter_dict: dict) -> str: filter_conditions = [] for key, value in filter_dict.items(): if key == "$and": - and_conditions = [parse_filter(f) for f in value if f] # Skip empty dictionaries + and_conditions = [ + parse_filter(f) for f in value if f + ] # Skip empty dictionaries if and_conditions: - filter_conditions.append(f"({' AND '.join(and_conditions)})") + filter_conditions.append( + f"({' AND '.join(and_conditions)})" + ) elif key == "$or": - or_conditions = [parse_filter(f) for f in value if f] # Skip empty dictionaries + or_conditions = [ + parse_filter(f) for f in value if f + ] # Skip empty dictionaries if or_conditions: - filter_conditions.append(f"({' OR '.join(or_conditions)})") + filter_conditions.append( + f"({' OR '.join(or_conditions)})" + ) else: filter_conditions.append(parse_condition(key, value)) - + # Check if there is only a single condition if len(filter_conditions) == 1: return filter_conditions[0] else: return " AND ".join(filter_conditions) + # def parse_filter(filter_dict: dict) -> str: # filter_conditions = [] # for key, value in filter_dict.items(): @@ -718,16 +726,16 @@ def parse_filter(filter_dict: dict) -> str: # ) # else: # filter_conditions.append(parse_condition(key, value)) - + # # Check if there is only a single condition # if len(filter_conditions) == 1: # return filter_conditions[0] # else: # return " AND ".join(filter_conditions) - print('filters = ', filters) + print("filters = ", filters) where_clause = parse_filter(filters) - print('where_clause = ', where_clause) + print("where_clause = ", where_clause) return where_clause From 94290d597322998e6cd9ed3dc7086fb5ca2b758c Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Sun, 20 Oct 2024 19:03:16 -0700 Subject: [PATCH 26/27] up --- py/core/base/providers/database.py | 341 +-------------------------- py/core/providers/database/vector.py | 15 +- 2 files changed, 11 insertions(+), 345 deletions(-) diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index 8dadbd1c5..2eee70476 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -107,343 +107,4 @@ def fetchrow_query( query: str, params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, ): - pass - - # @abstractmethod - # def create_table(self): - # pass - - # # Management Methods - # @abstractmethod - # async def update_prompt( - # self, - # name: str, - # template: Optional[str] = None, - # input_types: Optional[dict[str, str]] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def add_prompt( - # self, - # name: str, - # template: str, - # input_types: dict[str, str], - # ) -> dict: - # pass - - # @abstractmethod - # async def get_prompt( - # self, - # prompt_name: str, - # inputs: Optional[dict[str, Any]] = None, - # prompt_override: Optional[str] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def get_all_prompts(self) -> dict: - # pass - - # @abstractmethod - # async def delete_prompt(self, prompt_name: str) -> dict: - # pass - - # @abstractmethod - # async def analytics( - # self, - # filter_criteria: Optional[Union[dict, str]] = None, - # analysis_types: Optional[Union[dict, str]] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def app_settings(self) -> dict: - # pass - - # @abstractmethod - # async def users_overview( - # self, - # user_ids: Optional[list[str]] = None, - # offset: Optional[int] = None, - # limit: Optional[int] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def delete( - # self, - # filters: dict, - # ) -> dict: - # pass - - # @abstractmethod - # async def download_file( - # self, - # document_id: Union[str, UUID], - # ): - # pass - - # @abstractmethod - # async def documents_overview( - # self, - # document_ids: Optional[list[Union[UUID, str]]] = None, - # offset: Optional[int] = None, - # limit: Optional[int] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def document_chunks( - # self, - # document_id: str, - # offset: Optional[int] = None, - # limit: Optional[int] = None, - # include_vectors: Optional[bool] = False, - # ) -> dict: - # pass - - # @abstractmethod - # async def collections_overview( - # self, - # collection_ids: Optional[list[str]] = None, - # offset: Optional[int] = None, - # limit: Optional[int] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def create_collection( - # self, - # name: str, - # description: Optional[str] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def get_collection( - # self, - # collection_id: Union[str, UUID], - # ) -> dict: - # pass - - # @abstractmethod - # async def update_collection( - # self, - # collection_id: Union[str, UUID], - # name: Optional[str] = None, - # description: Optional[str] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def delete_collection( - # self, - # collection_id: Union[str, UUID], - # ) -> dict: - # pass - - # @abstractmethod - # async def delete_user( - # self, - # user_id: str, - # password: Optional[str] = None, - # delete_vector_data: bool = False, - # ) -> dict: - # pass - - # @abstractmethod - # async def list_collections( - # self, - # offset: Optional[int] = None, - # limit: Optional[int] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def add_user_to_collection( - # self, - # user_id: Union[str, UUID], - # collection_id: Union[str, UUID], - # ) -> dict: - # pass - - # @abstractmethod - # async def remove_user_from_collection( - # self, - # user_id: Union[str, UUID], - # collection_id: Union[str, UUID], - # ) -> dict: - # pass - - # @abstractmethod - # async def get_users_in_collection( - # self, - # collection_id: Union[str, UUID], - # offset: Optional[int] = None, - # limit: Optional[int] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def user_collections( - # self, - # user_id: Union[str, UUID], - # offset: Optional[int] = None, - # limit: Optional[int] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def assign_document_to_collection( - # self, - # document_id: Union[str, UUID], - # collection_id: Union[str, UUID], - # ) -> dict: - # pass - - # # TODO: Verify that this method is implemented, also, should be a PUT request - # @abstractmethod - # async def remove_document_from_collection( - # self, - # document_id: Union[str, UUID], - # collection_id: Union[str, UUID], - # ) -> dict: - # pass - - # @abstractmethod - # async def document_collections( - # self, - # document_id: Union[str, UUID], - # offset: Optional[int] = None, - # limit: Optional[int] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def documents_in_collection( - # self, - # collection_id: Union[str, UUID], - # offset: Optional[int] = None, - # limit: Optional[int] = None, - # ) -> dict: - # pass - - # @abstractmethod - # async def conversations_overview( - # self, - # conversation_ids: Optional[list[Union[UUID, str]]] = None, - # offset: Optional[int] = None, - # limit: Optional[int] = None, - # ) -> dict: - # pass - - # # async def get_conversation( - # # self, - # # conversation_id: Union[str, UUID], - # # branch_id: Optional[str] = None, - # # ) -> dict: - # # """ - # # Get a conversation by its ID. - - # # Args: - # # conversation_id (Union[str, UUID]): The ID of the conversation to retrieve. - # # branch_id (Optional[str]): The ID of a specific branch to retrieve. - - # # Returns: - # # dict: The conversation data. - # # """ - # # query_params = f"?branch_id={branch_id}" if branch_id else "" - # # return await self._make_request( # type: ignore - # # "GET", f"get_conversation/{str(conversation_id)}{query_params}" - # # ) - - # # async def create_conversation(self) -> dict: - # # """ - # # Create a new conversation. - - # # Returns: - # # dict: The response from the server. - # # """ - # # return await self._make_request("POST", "create_conversation") # type: ignore - - # # async def add_message( - # # self, - # # conversation_id: Union[str, UUID], - # # message: Message, - # # parent_id: Optional[str] = None, - # # metadata: Optional[dict[str, Any]] = None, - # # ) -> dict: - # # """ - # # Add a message to an existing conversation. - - # # Args: - # # conversation_id (Union[str, UUID]): The ID of the conversation. - # # message (Message): The message to add. - # # parent_id (Optional[str]): The ID of the parent message. - # # metadata (Optional[dict[str, Any]]): Additional metadata for the message. - - # # Returns: - # # dict: The response from the server. - # # """ - # # data: dict = {"message": message} - # # if parent_id is not None: - # # data["parent_id"] = parent_id - # # if metadata is not None: - # # data["metadata"] = metadata - # # return await self._make_request( # type: ignore - # # "POST", f"add_message/{str(conversation_id)}", data=data - # # ) - - # # async def update_message( - # # self, - # # message_id: str, - # # message: Message, - # # ) -> dict: - # # """ - # # Update a message in an existing conversation. - - # # Args: - # # message_id (str): The ID of the message to update. - # # message (Message): The updated message. - - # # Returns: - # # dict: The response from the server. - # # """ - # # return await self._make_request( # type: ignore - # # "PUT", f"update_message/{message_id}", data=message - # # ) - - # # async def branches_overview( - # # self, - # # conversation_id: Union[str, UUID], - # # ) -> dict: - # # """ - # # Get an overview of branches in a conversation. - - # # Args: - # # conversation_id (Union[str, UUID]): The ID of the conversation to get branches for. - - # # Returns: - # # dict: The response from the server. - # # """ - # # return await self._make_request( # type: ignore - # # "GET", f"branches_overview/{str(conversation_id)}" - # # ) - - # # async def delete_conversation( - # # self, - # # conversation_id: Union[str, UUID], - # # ) -> dict: - # # """ - # # Delete a conversation by its ID. - - # # Args: - # # conversation_id (Union[str, UUID]): The ID of the conversation to delete. - - # # Returns: - # # dict: The response from the server. - # # """ - # # return await self._make_request( # type: ignore - # # "DELETE", f"delete_conversation/{str(conversation_id)}" - # # ) + pass \ No newline at end of file diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index b2c201bd8..d02787914 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -207,21 +207,26 @@ async def full_text_search( "Full-text search is not enabled for this collection." ) - where_clause = "" + where_clauses = [] params = [query_text] + if search_settings.filters: - where_clause = self._build_filters(search_settings.filters, params) - where_clause = f"WHERE {where_clause}" + filters_clause = self._build_filters(search_settings.filters, params) + where_clauses.append(filters_clause) + + if where_clauses: + where_clause = "WHERE " + " AND ".join(where_clauses) + " AND fts @@ websearch_to_tsquery('english', $1)" + else: + where_clause = "WHERE fts @@ websearch_to_tsquery('english', $1)" query = f""" SELECT extraction_id, document_id, user_id, collection_ids, text, metadata, ts_rank(fts, websearch_to_tsquery('english', $1), 32) as rank FROM {self._get_table_name(VectorDBMixin.TABLE_NAME)} - WHERE fts @@ websearch_to_tsquery('english', $1) {where_clause} """ - + query += """ ORDER BY rank DESC OFFSET $2 LIMIT $3 From 1ec7e74eab84d8af3c11a8b9572ae86d63074e3c Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Sun, 20 Oct 2024 19:10:51 -0700 Subject: [PATCH 27/27] up --- py/core/providers/database/vector.py | 36 +++------------------------- 1 file changed, 3 insertions(+), 33 deletions(-) diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index d02787914..85f062218 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -173,9 +173,6 @@ async def semantic_search( params.extend([search_settings.search_limit, search_settings.offset]) - print("query = ", query) - print("params = ", params) - results = await self.fetch_query(query, params) return [ @@ -226,10 +223,10 @@ async def full_text_search( FROM {self._get_table_name(VectorDBMixin.TABLE_NAME)} {where_clause} """ - - query += """ + + query += f""" ORDER BY rank DESC - OFFSET $2 LIMIT $3 + OFFSET ${len(params)+1} LIMIT ${len(params)+2} """ params.extend( [ @@ -238,9 +235,6 @@ async def full_text_search( ] ) - print("query = ", query) - print("params = ", params) - results = await self.fetch_query(query, params) return [ VectorSearchResult( @@ -423,9 +417,7 @@ async def delete_collection_vector(self, collection_id: str) -> None: RETURNING collection_ids """ results = await self.fetchrow_query(query, (collection_id,)) - print("results = ", results) deleted_count = len(results) - print("deleted_count", deleted_count) return deleted_count async def get_document_chunks( @@ -718,29 +710,7 @@ def parse_filter(filter_dict: dict) -> str: else: return " AND ".join(filter_conditions) - # def parse_filter(filter_dict: dict) -> str: - # filter_conditions = [] - # for key, value in filter_dict.items(): - # if key == "$and": - # filter_conditions.append( - # f"({' AND '.join([parse_filter(f) for f in value])})" - # ) - # elif key == "$or": - # filter_conditions.append( - # f"({' OR '.join([parse_filter(f) for f in value])})" - # ) - # else: - # filter_conditions.append(parse_condition(key, value)) - - # # Check if there is only a single condition - # if len(filter_conditions) == 1: - # return filter_conditions[0] - # else: - # return " AND ".join(filter_conditions) - - print("filters = ", filters) where_clause = parse_filter(filters) - print("where_clause = ", where_clause) return where_clause