diff --git a/.github/actions/run-cli-graphrag-deduplication-tests/action.yml b/.github/actions/run-cli-graphrag-deduplication-tests/action.yml new file mode 100644 index 000000000..8e38ea46c --- /dev/null +++ b/.github/actions/run-cli-graphrag-deduplication-tests/action.yml @@ -0,0 +1,39 @@ +name: 'Run CLI Graphrag Tests' +description: 'Runs CLI graphrag tests for R2R' +runs: + using: "composite" + steps: + - name: Ingest sample file (CLI) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_cli.py test_ingest_sample_file_2_cli + + - name: Create the graph (CLI) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_cli.py test_kg_create_graph_sample_file_cli + + - name: Deduplicate entities (CLI) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_cli.py test_kg_deduplicate_entities_sample_file_cli + + - name: Enrich the graph (CLI) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_cli.py test_kg_enrich_graph_sample_file_cli + + - name: Search over the graph (CLI) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_cli.py test_kg_search_sample_file_cli + + - name: Delete the graph (CLI) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_cli.py test_kg_delete_graph_sample_file_cli + + - name: Delete the graph with cascading (CLI) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_cli.py test_kg_delete_graph_with_cascading_sample_file_cli diff --git a/.github/actions/run-cli-graphrag-tests/action.yml b/.github/actions/run-cli-graphrag-tests/action.yml index 892263af1..040d84948 100644 --- a/.github/actions/run-cli-graphrag-tests/action.yml +++ b/.github/actions/run-cli-graphrag-tests/action.yml @@ -3,22 +3,32 @@ description: 'Runs CLI graphrag tests for R2R' runs: using: "composite" steps: - - name: Ingest sample file (SDK) + - name: Ingest sample file (CLI) working-directory: ./py shell: bash run: poetry run python tests/integration/runner_cli.py test_ingest_sample_file_2_cli - - name: Create the graph (SDK) + - name: Create the graph (CLI) working-directory: ./py shell: bash run: poetry run python tests/integration/runner_cli.py test_kg_create_graph_sample_file_cli - - name: Enrich the graph (SDK) + - name: Enrich the graph (CLI) working-directory: ./py shell: bash run: poetry run python tests/integration/runner_cli.py test_kg_enrich_graph_sample_file_cli - - name: Search over the graph (SDK) + - name: Search over the graph (CLI) working-directory: ./py shell: bash run: poetry run python tests/integration/runner_cli.py test_kg_search_sample_file_cli + + - name: Delete the graph (CLI) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_cli.py test_kg_delete_graph_sample_file_cli + + - name: Delete the graph with cascading (CLI) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_cli.py test_kg_delete_graph_with_cascading_sample_file_cli diff --git a/.github/actions/run-sdk-graphrag-deduplication-tests/action.yml b/.github/actions/run-sdk-graphrag-deduplication-tests/action.yml new file mode 100644 index 000000000..9e2d085ba --- /dev/null +++ b/.github/actions/run-sdk-graphrag-deduplication-tests/action.yml @@ -0,0 +1,39 @@ +name: 'Run SDK Graphrag Tests' +description: 'Runs SDK graphrag tests for R2R' +runs: + using: "composite" + steps: + - name: Ingest sample file (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_ingest_sample_file_2_sdk + + - name: Create the graph (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_kg_create_graph_sample_file_sdk + + - name: Deduplicate entities (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_kg_deduplicate_entities_sample_file_sdk + + - name: Enrich the graph (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_kg_enrich_graph_sample_file_sdk + + - name: Search over the graph (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_kg_search_sample_file_sdk + + - name: Delete the graph (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_kg_delete_graph_sample_file_sdk + + - name: Delete the graph with cascading (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_kg_delete_graph_with_cascading_sample_file_sdk diff --git a/.github/actions/run-sdk-graphrag-tests/action.yml b/.github/actions/run-sdk-graphrag-tests/action.yml index 108511482..a3cacdd9f 100644 --- a/.github/actions/run-sdk-graphrag-tests/action.yml +++ b/.github/actions/run-sdk-graphrag-tests/action.yml @@ -22,3 +22,13 @@ runs: working-directory: ./py shell: bash run: poetry run python tests/integration/runner_sdk.py test_kg_search_sample_file_sdk + + - name: Delete the graph (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_kg_delete_graph_sample_file_sdk + + - name: Delete the graph with cascading (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_kg_delete_graph_with_cascading_sample_file_sdk diff --git a/.github/actions/setup-postgres-ext/action.yml b/.github/actions/setup-postgres-ext/action.yml index d67f00be7..8c6f8cdfd 100644 --- a/.github/actions/setup-postgres-ext/action.yml +++ b/.github/actions/setup-postgres-ext/action.yml @@ -32,39 +32,60 @@ runs: - name: Setup PostgreSQL on Windows if: inputs.os == 'windows-latest' - shell: pwsh + shell: cmd run: | - choco install postgresql15 --params '/Password:postgres' --force - $env:PATH += ";C:\Program Files\PostgreSQL\15\bin" - $env:PGPASSWORD = 'postgres' + echo Starting PostgreSQL setup and pgvector installation... + + echo Installing PostgreSQL... + choco install postgresql15 --params "/Password:postgres" --force + + echo Updating PATH and setting PGPASSWORD... + set PATH=%PATH%;C:\Program Files\PostgreSQL\15\bin + set PGPASSWORD=postgres + echo PATH updated and PGPASSWORD set. + + echo Altering PostgreSQL user password... psql -U postgres -c "ALTER USER postgres PASSWORD 'postgres';" + echo PostgreSQL user password altered. - # Install Visual Studio Build Tools + echo Installing Visual Studio Build Tools... choco install visualstudio2022buildtools --package-parameters "--add Microsoft.VisualStudio.Workload.VCTools --includeRecommended --passive --norestart" + echo Visual Studio Build Tools installed. - # Set up environment for building pgvector - $vcvars64Path = "C:\Program Files\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat" - cmd.exe /c "call `"$vcvars64Path`" && set > %temp%\vcvars.txt" - Get-Content "$env:temp\vcvars.txt" | Foreach-Object { - if ($_ -match "^(.*?)=(.*)$") { - Set-Content "env:\$($matches[1])" $matches[2] - } - } - - # Clone and build pgvector - $env:PGROOT = "C:\Program Files\PostgreSQL\15" - Set-Location -Path $env:TEMP + echo Setting up Visual Studio environment... + call "C:\Program Files\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat" + echo Visual Studio environment set up. + + echo Cloning and building pgvector... + set PGROOT=C:\Program Files\PostgreSQL\15 + cd /d %TEMP% git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git - Set-Location -Path "$env:TEMP\pgvector" + cd pgvector + echo pgvector cloned. + + echo Creating vector extension... + psql -U postgres -c "CREATE EXTENSION vector;" + echo Vector extension created. + + echo Building pgvector... nmake /F Makefile.win + echo pgvector built. + + echo Installing pgvector... nmake /F Makefile.win install + echo pgvector installed. - psql -U postgres -c "CREATE EXTENSION vector;" + echo Setting max_connections to 1024... + echo max_connections = 1024 >> "C:\Program Files\PostgreSQL\15\data\postgresql.conf" + echo max_connections set. - # Set max_connections to 1024 - Add-Content -Path "C:\Program Files\PostgreSQL\15\data\postgresql.conf" -Value "max_connections = 1024" - Restart-Service postgresql-x64-15 + echo Restarting PostgreSQL service... + net stop postgresql-x64-15 + net start postgresql-x64-15 + echo PostgreSQL service restarted. + + echo Setup complete! - name: Setup PostgreSQL on macOS if: inputs.os == 'macos-latest' diff --git a/.github/actions/setup-python-full/action.yml b/.github/actions/setup-python-full/action.yml index c3b701538..3b5d8b381 100644 --- a/.github/actions/setup-python-full/action.yml +++ b/.github/actions/setup-python-full/action.yml @@ -26,8 +26,9 @@ runs: - name: Install Poetry and dependencies on Windows if: inputs.os == 'windows-latest' - shell: pwsh + shell: cmd run: | - (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python - - $env:PATH += ";$env:USERPROFILE\AppData\Roaming\Python\Scripts" - cd py; poetry install -E core -E ingestion-bundle + python -c "import urllib.request; print(urllib.request.urlopen('https://install.python-poetry.org').read().decode())" > install-poetry.py + python install-poetry.py + echo %USERPROFILE%\AppData\Roaming\Python\Scripts >> %GITHUB_PATH% + cd py && poetry install -E core -E ingestion-bundle diff --git a/.github/actions/setup-python-light/action.yml b/.github/actions/setup-python-light/action.yml index 271f1a021..588912355 100644 --- a/.github/actions/setup-python-light/action.yml +++ b/.github/actions/setup-python-light/action.yml @@ -24,5 +24,7 @@ runs: shell: pwsh run: | (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python - - $env:PATH += ";$env:USERPROFILE\AppData\Roaming\Python\Scripts" - cd py; poetry install -E core -E ingestion-bundle + $env:PATH += ";$env:APPDATA\Python\Scripts" + poetry --version + cd py + poetry install -E core -E ingestion-bundle diff --git a/.github/actions/start-r2r-full/action.yml b/.github/actions/start-r2r-full/action.yml index fb8b83579..bee3d7d17 100644 --- a/.github/actions/start-r2r-full/action.yml +++ b/.github/actions/start-r2r-full/action.yml @@ -12,4 +12,4 @@ runs: shell: bash run: | cd py - poetry run r2r serve --docker --full --config-name=full + poetry run r2r serve --docker --full --config-name=full --build --image=r2r-local diff --git a/.github/workflows/r2r-full-py-integration-tests-graphrag.yml b/.github/workflows/r2r-full-py-integration-tests-graphrag.yml new file mode 100644 index 000000000..7cf42ab7e --- /dev/null +++ b/.github/workflows/r2r-full-py-integration-tests-graphrag.yml @@ -0,0 +1,56 @@ +name: R2R Full Python Integration Test (ubuntu) + +on: + push: + branches: + - dev + - dev-minor + pull_request: + branches: + - dev + - dev-minor + workflow_dispatch: + +jobs: + test: + runs-on: ${{ matrix.os }} + + strategy: + matrix: + os: [ubuntu-latest] + test_category: + - cli-graphrag + - sdk-graphrag + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + TELEMETRY_ENABLED: 'false' + R2R_PROJECT_NAME: r2r_default + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python and install dependencies + uses: ./.github/actions/setup-python-full + with: + os: ${{ matrix.os }} + + - name: Setup and start Docker + uses: ./.github/actions/setup-docker + + - name: Login Docker + uses: ./.github/actions/login-docker + with: + docker_username: ${{ secrets.RAGTORICHES_DOCKER_UNAME }} + docker_password: ${{ secrets.RAGTORICHES_DOCKER_TOKEN }} + + - name: Start R2R Full server + uses: ./.github/actions/start-r2r-full + + - name: Run CLI GraphRAG Tests + if: matrix.test_category == 'cli-graphrag' + uses: ./.github/actions/run-cli-graphrag-tests + + - name: Run SDK GraphRAG Tests + if: matrix.test_category == 'sdk-graphrag' + uses: ./.github/actions/run-sdk-graphrag-tests diff --git a/.github/workflows/r2r-full-py-integration-tests-mac-and-windows.yml b/.github/workflows/r2r-full-py-integration-tests-mac-and-windows.yml index 7fb4def3b..7b2a78c9f 100644 --- a/.github/workflows/r2r-full-py-integration-tests-mac-and-windows.yml +++ b/.github/workflows/r2r-full-py-integration-tests-mac-and-windows.yml @@ -79,4 +79,4 @@ jobs: - name: Run SDK GraphRAG Tests if: matrix.test_category == 'sdk-graphrag' - uses: ./.github/actions/run-cli-graphrag-tests + uses: ./.github/actions/run-sdk-graphrag-tests diff --git a/.github/workflows/r2r-full-py-integration-tests.yml b/.github/workflows/r2r-full-py-integration-tests.yml index 730bbb566..834548029 100644 --- a/.github/workflows/r2r-full-py-integration-tests.yml +++ b/.github/workflows/r2r-full-py-integration-tests.yml @@ -21,12 +21,10 @@ jobs: test_category: - cli-ingestion - cli-retrieval - - cli-graphrag - sdk-ingestion - sdk-retrieval - sdk-auth - sdk-collections - - sdk-graphrag env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} TELEMETRY_ENABLED: 'false' @@ -61,10 +59,6 @@ jobs: if: matrix.test_category == 'cli-retrieval' uses: ./.github/actions/run-cli-retrieval-tests - - name: Run CLI GraphRAG Tests - if: matrix.test_category == 'cli-graphrag' - uses: ./.github/actions/run-cli-graphrag-tests - - name: Run SDK Ingestion Tests if: matrix.test_category == 'sdk-ingestion' uses: ./.github/actions/run-sdk-ingestion-tests @@ -80,7 +74,3 @@ jobs: - name: Run SDK Collections Tests if: matrix.test_category == 'sdk-collections' uses: ./.github/actions/run-sdk-collections-tests - - - name: Run SDK GraphRAG Tests - if: matrix.test_category == 'sdk-graphrag' - uses: ./.github/actions/run-cli-graphrag-tests diff --git a/.github/workflows/r2r-light-py-integration-tests-graphrag.yml b/.github/workflows/r2r-light-py-integration-tests-graphrag.yml new file mode 100644 index 000000000..c110185b1 --- /dev/null +++ b/.github/workflows/r2r-light-py-integration-tests-graphrag.yml @@ -0,0 +1,59 @@ +# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json + +name: R2R Light Python Integration Test (ubuntu) + +on: + push: + branches: + - dev + - dev-minor + pull_request: + branches: + - dev + - dev-minor + workflow_dispatch: + +jobs: + test: + runs-on: ${{ matrix.os }} + + strategy: + matrix: + os: [ubuntu-latest] + test_category: + - cli-graphrag + - sdk-graphrag + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + TELEMETRY_ENABLED: 'false' + R2R_POSTGRES_HOST: localhost + R2R_POSTGRES_DBNAME: postgres + R2R_POSTGRES_PORT: '5432' + R2R_POSTGRES_PASSWORD: postgres + R2R_POSTGRES_USER: postgres + R2R_PROJECT_NAME: r2r_default + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python and install dependencies + uses: ./.github/actions/setup-python-light + with: + os: ${{ matrix.os }} + + - name: Setup and start PostgreSQL + uses: ./.github/actions/setup-postgres-ext + with: + os: ${{ matrix.os }} + + - name: Start R2R Light server + uses: ./.github/actions/start-r2r-light + + - name: Run CLI GraphRAG Tests + if: matrix.test_category == 'cli-graphrag' + uses: ./.github/actions/run-cli-graphrag-tests + + - name: Run SDK GraphRAG Tests + if: matrix.test_category == 'sdk-graphrag' + uses: ./.github/actions/run-sdk-graphrag-tests diff --git a/.github/workflows/r2r-light-py-integration-tests-mac-and-windows.yml b/.github/workflows/r2r-light-py-integration-tests-mac-and-windows.yml index e683776eb..d6fea4ac3 100644 --- a/.github/workflows/r2r-light-py-integration-tests-mac-and-windows.yml +++ b/.github/workflows/r2r-light-py-integration-tests-mac-and-windows.yml @@ -82,4 +82,4 @@ jobs: - name: Run SDK GraphRAG Tests if: matrix.test_category == 'sdk-graphrag' - uses: ./.github/actions/run-cli-graphrag-tests + uses: ./.github/actions/run-sdk-graphrag-tests diff --git a/.github/workflows/r2r-light-py-integration-tests.yml b/.github/workflows/r2r-light-py-integration-tests.yml index 2d05c8abc..6a334ed45 100644 --- a/.github/workflows/r2r-light-py-integration-tests.yml +++ b/.github/workflows/r2r-light-py-integration-tests.yml @@ -19,16 +19,14 @@ jobs: strategy: matrix: - os: [ubuntu-latest, windows-latest, macos-latest] + os: [ubuntu-latest] test_category: - cli-ingestion - cli-retrieval - - cli-graphrag - sdk-ingestion - sdk-retrieval - sdk-auth - sdk-collections - - sdk-graphrag env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} TELEMETRY_ENABLED: 'false' @@ -64,10 +62,6 @@ jobs: if: matrix.test_category == 'cli-retrieval' uses: ./.github/actions/run-cli-retrieval-tests - - name: Run CLI GraphRAG Tests - if: matrix.test_category == 'cli-graphrag' - uses: ./.github/actions/run-cli-graphrag-tests - - name: Run SDK Ingestion Tests if: matrix.test_category == 'sdk-ingestion' uses: ./.github/actions/run-sdk-ingestion-tests @@ -83,7 +77,3 @@ jobs: - name: Run SDK Collections Tests if: matrix.test_category == 'sdk-collections' uses: ./.github/actions/run-sdk-collections-tests - - - name: Run SDK GraphRAG Tests - if: matrix.test_category == 'sdk-graphrag' - uses: ./.github/actions/run-cli-graphrag-tests diff --git a/docs/api-reference/endpoint/completion.mdx b/docs/api-reference/endpoint/completion.mdx new file mode 100644 index 000000000..2d1317831 --- /dev/null +++ b/docs/api-reference/endpoint/completion.mdx @@ -0,0 +1,4 @@ +--- +title: 'Completion' +openapi: 'GET /v2/completion' +--- diff --git a/docs/api-reference/endpoint/deduplicate_entities.mdx b/docs/api-reference/endpoint/deduplicate_entities.mdx new file mode 100644 index 000000000..556705132 --- /dev/null +++ b/docs/api-reference/endpoint/deduplicate_entities.mdx @@ -0,0 +1,4 @@ +--- +title: Deduplicate Entities +openapi: 'POST /v2/deduplicate_entities' +--- diff --git a/docs/api-reference/endpoint/delete_entities_and_triples.mdx b/docs/api-reference/endpoint/delete_entities_and_triples.mdx new file mode 100644 index 000000000..96fb9c37e --- /dev/null +++ b/docs/api-reference/endpoint/delete_entities_and_triples.mdx @@ -0,0 +1,4 @@ +--- +title: 'Delete graph for collection' +openapi: 'DELETE /v2/delete_graph_for_collection' +--- diff --git a/docs/api-reference/endpoint/delete_vector_indices.mdx b/docs/api-reference/endpoint/delete_vector_indices.mdx new file mode 100644 index 000000000..cc803de0b --- /dev/null +++ b/docs/api-reference/endpoint/delete_vector_indices.mdx @@ -0,0 +1,4 @@ +--- +title: List Vector Indices +openapi: 'GET /v2/list_vector_indices' +--- diff --git a/docs/api-reference/endpoint/list_vector_index.mdx b/docs/api-reference/endpoint/list_vector_index.mdx new file mode 100644 index 000000000..f28b5eec1 --- /dev/null +++ b/docs/api-reference/endpoint/list_vector_index.mdx @@ -0,0 +1,4 @@ +--- +title: Delete Vector Index +openapi: 'DELETE /v2/delete_vector_index' +--- diff --git a/docs/api-reference/openapi.json b/docs/api-reference/openapi.json index 9435d6da7..0511620ea 100644 --- a/docs/api-reference/openapi.json +++ b/docs/api-reference/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"R2R Application API","version":"1.0.0"},"paths":{"/v2/ingest_files":{"post":{"summary":"Ingest Files App","description":"Ingest files into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. More expansive collection permissioning is under development.","operationId":"ingest_files_app_v2_ingest_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_ingest_files_app_v2_ingest_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_files(\n file_paths=[\"pg_essay_1.html\", \"got.txt\"],\n metadatas=[{\"metadata_1\":\"some random metadata\"}, {\"metadata_2\": \"some other random metadata\"}],\n document_ids=None\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1.html;type=text/html\" \\\n -F \"file=@got.txt;type=text/plain\" \\\n -F 'metadatas=[{},{}]' \\\n -F 'document_ids=null'\n"}]}},"/v2/update_files":{"post":{"summary":"Update Files App","description":"Update existing files in the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to update files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only update their own files. More expansive collection permissioning is under development.","operationId":"update_files_app_v2_update_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_update_files_app_v2_update_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdateResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.update_files(\n file_paths=[\"pg_essay_1_v2.txt\"],\n document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/update_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/ingest_chunks":{"post":{"summary":"Ingest Chunks App","description":"Ingest text chunks into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest pre-parsed text chunks into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest chunks for their own access. More expansive collection permissioning is under development.","operationId":"ingest_chunks_app_v2_ingest_chunks_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_ingest_chunks_app_v2_ingest_chunks_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_chunks(\n chunks=[\n {\n \"text\": \"Another chunk of text\",\n },\n {\n \"text\": \"Yet another chunk of text\",\n },\n {\n \"text\": \"A chunk of text\",\n },\n ],\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_chunks\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"chunks\": [\n {\n \"text\": \"Another chunk of text\"\n },\n {\n \"text\": \"Yet another chunk of text\"\n },\n {\n \"text\": \"A chunk of text\"\n }\n ],\n \"document_id\": \"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\",\n \"metadata\": {}\n }'\n"}]}},"/v2/create_vector_index":{"post":{"summary":"Create Vector Index App","operationId":"create_vector_index_app_v2_create_vector_index_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_vector_index_app_v2_create_vector_index_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CreateVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ServerStats_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AllPromptsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AnalyticsResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_LogResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AppSettingsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","minimum":-1,"description":"Number of items to return. Use -1 to return all items.","default":100,"title":"Limit"},"description":"Number of items to return. Use -1 to return all items."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}},{"name":"include_vectors","in":"query","required":false,"schema":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":false,"title":"Include Vectors"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentChunkResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/conversations/{conversation_id}":{"get":{"summary":"Get Conversation","operationId":"get_conversation_v2_conversations__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"},{"name":"branch_id","in":"query","required":false,"schema":{"type":"string","description":"Branch ID","title":"Branch Id"},"description":"Branch ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_Tuple_str__Message___"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_SearchResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGAgentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/completion":{"post":{"summary":"Completion","description":"Generate completions for a list of messages.\n\nThis endpoint uses the language model to generate completions for the provided messages.\nThe generation process can be customized using the generation_config parameter.","operationId":"completion_v2_completion_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_completion_v2_completion_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings.\nIf document IDs are not provided, the graph will be created on all documents in the system.\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information.\nIt creates communities of nodes based on their similarity and adds embeddings to the graph.\nThis step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/entities":{"get":{"summary":"Get Entities","description":"Retrieve entities from the knowledge graph.","operationId":"get_entities_v2_entities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve entities from.","title":"Collection Id"},"description":"Collection ID to retrieve entities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity IDs to filter by.","title":"Entity Ids"},"description":"Entity IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEntitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/triples":{"get":{"summary":"Get Triples","description":"Retrieve triples from the knowledge graph.","operationId":"get_triples_v2_triples_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve triples from.","title":"Collection Id"},"description":"Collection ID to retrieve triples from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_names","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity names to filter by.","title":"Entity Names"},"description":"Entity names to filter by."},{"name":"triple_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Triple IDs to filter by.","title":"Triple Ids"},"description":"Triple IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGTriplesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/communities":{"get":{"summary":"Get Communities","description":"Retrieve communities from the knowledge graph.","operationId":"get_communities_v2_communities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve communities from.","title":"Collection Id"},"description":"Collection ID to retrieve communities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"levels","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Levels to filter by.","title":"Levels"},"description":"Levels to filter by."},{"name":"community_numbers","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Community numbers to filter by.","title":"Community Numbers"},"description":"Community numbers to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGCommunitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AllPromptsResponse":{"properties":{"prompts":{"additionalProperties":{"$ref":"#/components/schemas/PromptResponse"},"type":"object","title":"Prompts"}},"type":"object","required":["prompts"],"title":"AllPromptsResponse"},"AnalyticsResponse":{"properties":{"analytics_data":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Analytics Data"},"filtered_logs":{"type":"object","title":"Filtered Logs"}},"type":"object","required":["filtered_logs"],"title":"AnalyticsResponse"},"AppSettingsResponse":{"properties":{"config":{"type":"object","title":"Config"},"prompts":{"type":"object","title":"Prompts"}},"type":"object","required":["config","prompts"],"title":"AppSettingsResponse"},"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"message":{"anyOf":[{"$ref":"#/components/schemas/Message"},{"type":"null"}],"description":"Message object"},"messages":{"anyOf":[{"items":{"$ref":"#/components/schemas/Message"},"type":"array"},{"type":"null"}],"title":"Messages","description":"List of message objects (deprecated, use message instead)","deprecated":true},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true},"conversation_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Conversation Id","description":"The ID of the conversation, a new ID is assigned if not provided"},"branch_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Branch Id","description":"The ID of the branch"}},"type":"object","title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_completion_v2_completion_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"The messages to complete"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"The generation config"}},"type":"object","required":["messages"],"title":"Body_completion_v2_completion_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to create graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph creation process."},"kg_creation_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process."}},"type":"object","title":"Body_create_graph_v2_create_graph_post"},"Body_create_vector_index_app_v2_create_vector_index_post":{"properties":{"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The name of the vector table to create.","default":"chunks"},"index_method":{"$ref":"#/components/schemas/IndexMethod","description":"The type of vector index to create.","default":"hnsw"},"measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The measure for the index.","default":"cosine_distance"},"index_arguments":{"anyOf":[{"$ref":"#/components/schemas/IndexArgsIVFFlat"},{"$ref":"#/components/schemas/IndexArgsHNSW"},{"type":"null"}],"title":"Index Arguments","description":"The arguments for the index method."},"replace":{"type":"boolean","title":"Replace","description":"Whether to replace an existing index.","default":true},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to create the index concurrently.","default":true}},"type":"object","title":"Body_create_vector_index_app_v2_create_vector_index_post"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to enrich graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_chunks_app_v2_ingest_chunks_post":{"properties":{"chunks":{"items":{"$ref":"#/components/schemas/RawChunk"},"type":"array","title":"Chunks","description":"A list of text chunks to ingest into the system.","default":{}},"document_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document Id","description":"An optional document id to associate the chunks with. If not provided, a unique document id will be generated."},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","title":"Body_ingest_chunks_app_v2_ingest_chunks_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id","description":"ID of the user to update"},"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"is_superuser":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Is Superuser","description":"Updated superuser status"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"CollectionOverviewResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"user_count":{"type":"integer","title":"User Count"},"document_count":{"type":"integer","title":"Document Count"}},"type":"object","required":["collection_id","name","description","created_at","updated_at","user_count","document_count"],"title":"CollectionOverviewResponse"},"CollectionResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"}},"type":"object","required":["collection_id","name","description","created_at","updated_at"],"title":"CollectionResponse"},"CommunityReport":{"properties":{"community_number":{"type":"integer","title":"Community Number"},"level":{"type":"integer","title":"Level"},"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name","default":""},"summary":{"type":"string","title":"Summary","default":""},"findings":{"items":{"type":"string"},"type":"array","title":"Findings","default":[]},"rating":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Rating"},"rating_explanation":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rating Explanation"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Embedding"},"attributes":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["community_number","level","collection_id"],"title":"CommunityReport"},"CreateVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message","description":"Vector index creation queued successfully."}},"type":"object","required":["message"],"title":"CreateVectorIndexResponse"},"DocumentChunkResponse":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"},"vector":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Vector"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","text","metadata"],"title":"DocumentChunkResponse"},"DocumentOverviewResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"title":{"type":"string","title":"Title"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"type":{"type":"string","title":"Type"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"ingestion_status":{"type":"string","title":"Ingestion Status"},"kg_extraction_status":{"type":"string","title":"Kg Extraction Status"},"version":{"type":"string","title":"Version"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","title","user_id","type","created_at","updated_at","ingestion_status","kg_extraction_status","version","collection_ids","metadata"],"title":"DocumentOverviewResponse"},"Entity":{"properties":{"name":{"type":"string","title":"Name"},"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"category":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Category"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"description_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Description Embedding"},"community_numbers":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Community Numbers"},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id"},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["name"],"title":"Entity","description":"An entity extracted from a document."},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexArgsHNSW":{"properties":{"m":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"M","default":16},"ef_construction":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Construction","default":64}},"type":"object","title":"IndexArgsHNSW","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an HNSW type index.\n\nRef: https://github.com/pgvector/pgvector#index-options\n\nBoth attributes are Optional in case the user only wants to specify one and\nleave the other as default\n\nAttributes:\n m (int): Maximum number of connections per node per layer (default: 16)\n ef_construction (int): Size of the dynamic candidate list for\n constructing the graph (default: 64)"},"IndexArgsIVFFlat":{"properties":{"n_lists":{"type":"integer","title":"N Lists"}},"type":"object","required":["n_lists"],"title":"IndexArgsIVFFlat","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an IVFFlat type index.\n\nAttributes:\n nlist (int): The number of IVF centroids that the index should use"},"IndexMeasure":{"type":"string","enum":["cosine_distance","l2_distance","max_inner_product"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"IndexMethod":{"type":"string","enum":["auto","ivfflat","hnsw"],"title":"IndexMethod","description":"An enum representing the index methods available.\n\nThis class currently only supports the 'ivfflat' method but may\nexpand in the future.\n\nAttributes:\n auto (str): Automatically choose the best available index method.\n ivfflat (str): The ivfflat index method.\n hnsw (str): The hnsw index method."},"IngestionResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_id":{"type":"string","format":"uuid","title":"Document Id","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_id"],"title":"IngestionResponse","example":{"document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","message":"Ingestion task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCommunitiesResponse":{"properties":{"communities":{"items":{"$ref":"#/components/schemas/CommunityReport"},"type":"array","title":"Communities","description":"The list of communities in the graph for the collection."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of communities in the graph."}},"type":"object","required":["communities","total_entries"],"title":"KGCommunitiesResponse","description":"Response for knowledge graph communities.","example":{"communities":[{"collection_ids":["122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"],"community_number":1,"findings":["finding1","finding2"],"id":"1","level":0,"name":"community name","rating":"8","rating_explanation":"rating explanation","summary":"community summary"}],"total_count":1}},"KGCommunityResult":{"properties":{"name":{"type":"string","title":"Name"},"summary":{"type":"string","title":"Summary"},"rating":{"type":"number","title":"Rating"},"rating_explanation":{"type":"string","title":"Rating Explanation"},"findings":{"items":{"type":"string"},"type":"array","title":"Findings"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","summary","rating","rating_explanation","findings"],"title":"KGCommunityResult","findings":["Finding 1","Finding 2"],"metadata":{},"name":"Community Name","rating":9,"rating_explanation":"Rating Explanation","summary":"Community Summary"},"KGCreationEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"document_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Document Count","description":"The number of documents in the collection."},"number_of_jobs_created":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Number Of Jobs Created","description":"The number of jobs created for the graph creation process."},"total_chunks":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Chunks","description":"The estimated total number of chunks."},"estimated_entities":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Entities","description":"The estimated number of entities in the graph."},"estimated_triples":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Triples","description":"The estimated number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls in millions."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph creation process in minutes."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph creation process in USD."}},"type":"object","title":"KGCreationEstimationResponse","description":"Response for knowledge graph creation estimation."},"KGCreationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG creation request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG creation request."}},"type":"object","required":["message","task_id"],"title":"KGCreationResponse","example":{"message":"Graph creation queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEnrichmentEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"total_entities":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Entities","description":"The total number of entities in the graph."},"total_triples":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Triples","description":"The total number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph enrichment process."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph enrichment process."}},"type":"object","title":"KGEnrichmentEstimationResponse","description":"Response for knowledge graph enrichment estimation."},"KGEnrichmentResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG enrichment request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG enrichment request."}},"type":"object","required":["message","task_id"],"title":"KGEnrichmentResponse","example":{"message":"Graph enrichment queued successfuly.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEntitiesResponse":{"properties":{"entities":{"items":{"$ref":"#/components/schemas/Entity"},"type":"array","title":"Entities","description":"The list of entities in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of entities in the graph for the collection or document."}},"type":"object","required":["entities","total_entries"],"title":"KGEntitiesResponse","description":"Response for knowledge graph entities.","example":{"entities":[{"description":"Description 1","id":"1","name":"Entity 1"},{"description":"Description 2","id":"2","name":"Entity 2"}],"total_entries":2}},"KGEntityResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGEntityResult","description":"Entity Description","metadata":{},"name":"Entity Name"},"KGGlobalResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGGlobalResult","description":"Global Result Description","metadata":{},"name":"Global Result Name"},"KGRelationshipResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGRelationshipResult","description":"Relationship Description","metadata":{},"name":"Relationship Name"},"KGRunType":{"type":"string","enum":["estimate","run"],"title":"KGRunType","description":"Type of KG run."},"KGSearchMethod":{"type":"string","enum":["local"],"const":"local","title":"KGSearchMethod"},"KGSearchResult":{"properties":{"method":{"$ref":"#/components/schemas/KGSearchMethod"},"content":{"anyOf":[{"$ref":"#/components/schemas/KGEntityResult"},{"$ref":"#/components/schemas/KGRelationshipResult"},{"$ref":"#/components/schemas/KGCommunityResult"},{"$ref":"#/components/schemas/KGGlobalResult"}],"title":"Content"},"result_type":{"anyOf":[{"$ref":"#/components/schemas/KGSearchResultType"},{"type":"null"}]},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"metadata":{"type":"object","title":"Metadata","default":{}}},"type":"object","required":["method","content"],"title":"KGSearchResult","content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"},"KGSearchResultType":{"type":"string","enum":["entity","relationship","community"],"title":"KGSearchResultType"},"KGSearchSettings":{"properties":{"filters":{"type":"object","title":"Filters","description":"Alias for search_filters","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.\n\n Commonly seen filters include operations include the following:\n\n `{\"document_id\": {\"$eq\": \"9fbe403b-...\"}}`\n\n `{\"document_id\": {\"$in\": [\"9fbe403b-...\", \"3e157b3a-...\"]}}`\n\n `{\"collection_ids\": {\"$overlap\": [\"122fdf6a-...\", \"...\"]}}`\n\n `{\"$and\": {\"$document_id\": ..., \"collection_ids\": ...}}`"},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"graphrag_map_system":{"type":"string","title":"Graphrag Map System","description":"The system prompt for the graphrag map prompt.","default":"graphrag_map_system"},"graphrag_reduce_system":{"type":"string","title":"Graphrag Reduce System","description":"The system prompt for the graphrag reduce prompt.","default":"graphrag_reduce_system"},"use_kg_search":{"type":"boolean","title":"Use Kg Search","description":"Whether to use KG search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","description":"KG search type","default":"local"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level","description":"KG search level"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph search."},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"local","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"KGTriplesResponse":{"properties":{"triples":{"items":{"$ref":"#/components/schemas/Triple"},"type":"array","title":"Triples","description":"The list of triples in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of triples in the graph for the collection or document."}},"type":"object","required":["triples","total_entries"],"title":"KGTriplesResponse","description":"Response for knowledge graph triples.","example":{"total_entries":2,"triples":[{"description":"Paris is the capital of France","object":"France","predicate":"is capital of","subject":"Paris"}]}},"LogEntry":{"properties":{"key":{"type":"string","title":"Key"},"value":{"title":"Value"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp"}},"type":"object","required":["key","value","timestamp"],"title":"LogEntry"},"LogResponse":{"properties":{"run_id":{"type":"string","format":"uuid","title":"Run Id"},"run_type":{"type":"string","title":"Run Type"},"entries":{"items":{"$ref":"#/components/schemas/LogEntry"},"type":"array","title":"Entries"},"timestamp":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Timestamp"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["run_id","run_type","entries","timestamp","user_id"],"title":"LogResponse"},"Message":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"PaginatedResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[CollectionResponse]]"},"PaginatedResultsWrapper_list_DocumentChunkResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentChunkResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentChunkResponse]]"},"PaginatedResultsWrapper_list_DocumentOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentOverviewResponse]]"},"PaginatedResultsWrapper_list_UserOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/UserOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserOverviewResponse]]"},"PaginatedResultsWrapper_list_UserResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/shared__api__models__management__responses__UserResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserResponse]]"},"PromptResponse":{"properties":{"name":{"type":"string","title":"Name"},"template":{"type":"string","title":"Template"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types"}},"type":"object","required":["name","template","created_at","updated_at","input_types"],"title":"PromptResponse"},"RAGAgentResponse":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"List of messages in the RAG agent response"},"conversation_id":{"type":"string","title":"Conversation Id","description":"The conversation ID for the RAG agent response"}},"type":"object","required":["messages","conversation_id"],"title":"RAGAgentResponse","example":[{"content":"## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.","role":"system"},{"content":"You are a helpful assistant.","role":"system"},{"content":"Who is the greatest philospher of all time?","role":"user"},{"content":"Aristotle is widely considered the greatest philospher of all time.","role":"assistant"},{"content":"Can you tell me more about him?","role":"user"},{"function_call":{"arguments":"{\"query\":\"Aristotle biography\"}","name":"search"},"role":"assistant"},{"content":"1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n","name":"search","role":"function"},{"content":"Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.","role":"assistant"}]},"RAGResponse":{"properties":{"completion":{"title":"Completion","description":"The generated completion from the RAG process"},"search_results":{"$ref":"#/components/schemas/SearchResponse","description":"The search results used for the RAG process"}},"type":"object","required":["completion","search_results"],"title":"RAGResponse","example":{"completion":{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Paris is the capital of France.","role":"assistant"}}],"id":"chatcmpl-example123"},"search_results":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}}},"RawChunk":{"properties":{"text":{"type":"string","title":"Text"}},"type":"object","required":["text"],"title":"RawChunk"},"ResultsWrapper_AllPromptsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AllPromptsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AllPromptsResponse]"},"ResultsWrapper_AnalyticsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AnalyticsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AnalyticsResponse]"},"ResultsWrapper_AppSettingsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AppSettingsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AppSettingsResponse]"},"ResultsWrapper_CollectionResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CollectionResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CollectionResponse]"},"ResultsWrapper_CreateVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CreateVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CreateVectorIndexResponse]"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_KGCommunitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGCommunitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGCommunitiesResponse]"},"ResultsWrapper_KGEntitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEntitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEntitiesResponse]"},"ResultsWrapper_KGTriplesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGTriplesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGTriplesResponse]"},"ResultsWrapper_NoneType_":{"properties":{"results":{"type":"null","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[NoneType]"},"ResultsWrapper_RAGAgentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGAgentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGAgentResponse]"},"ResultsWrapper_RAGResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGResponse]"},"ResultsWrapper_SearchResponse_":{"properties":{"results":{"$ref":"#/components/schemas/SearchResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[SearchResponse]"},"ResultsWrapper_ServerStats_":{"properties":{"results":{"$ref":"#/components/schemas/ServerStats"}},"type":"object","required":["results"],"title":"ResultsWrapper[ServerStats]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGCreationResponse"},{"$ref":"#/components/schemas/KGCreationEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGCreationResponse, KGCreationEstimationResponse]]"},"ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGEnrichmentResponse"},{"$ref":"#/components/schemas/KGEnrichmentEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGEnrichmentResponse, KGEnrichmentEstimationResponse]]"},"ResultsWrapper_UpdatePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdatePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdatePromptResponse]"},"ResultsWrapper_UpdateResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdateResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdateResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/shared__api__models__auth__responses__UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"ResultsWrapper_list_CollectionOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionOverviewResponse]]"},"ResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionResponse]]"},"ResultsWrapper_list_IngestionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/IngestionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[IngestionResponse]]"},"ResultsWrapper_list_LogResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/LogResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[LogResponse]]"},"ResultsWrapper_list_Tuple_str__Message___":{"properties":{"results":{"items":{"prefixItems":[{"type":"string"},{"$ref":"#/components/schemas/Message"}],"type":"array","maxItems":2,"minItems":2},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[Tuple[str, Message]]]"},"SearchResponse":{"properties":{"vector_search_results":{"items":{"$ref":"#/components/schemas/VectorSearchResult"},"type":"array","title":"Vector Search Results","description":"List of vector search results"},"kg_search_results":{"anyOf":[{"items":{"$ref":"#/components/schemas/KGSearchResult"},"type":"array"},{"type":"null"}],"title":"Kg Search Results","description":"Knowledge graph search results, if applicable"}},"type":"object","required":["vector_search_results"],"title":"SearchResponse","example":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}},"ServerStats":{"properties":{"start_time":{"type":"string","format":"date-time","title":"Start Time"},"uptime_seconds":{"type":"number","title":"Uptime Seconds"},"cpu_usage":{"type":"number","title":"Cpu Usage"},"memory_usage":{"type":"number","title":"Memory Usage"}},"type":"object","required":["start_time","uptime_seconds","cpu_usage","memory_usage"],"title":"ServerStats"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"Triple":{"properties":{"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"subject":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Subject"},"predicate":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Predicate"},"object":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Object"},"weight":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Weight","default":1.0},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"predicate_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Predicate Embedding"},"extraction_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Extraction Ids","default":[]},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"}],"title":"Attributes","default":{}}},"type":"object","title":"Triple","description":"A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities."},"UpdatePromptResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"UpdatePromptResponse"},"UpdateResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_ids"],"title":"UpdateResponse","example":{"document_ids":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"],"message":"Update task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"UserOverviewResponse":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id"},"num_files":{"type":"integer","title":"Num Files"},"total_size_in_bytes":{"type":"integer","title":"Total Size In Bytes"},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids"}},"type":"object","required":["user_id","num_files","total_size_in_bytes","document_ids"],"title":"UserOverviewResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchResult":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"score":{"type":"number","title":"Score"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","score","text","metadata"],"title":"VectorSearchResult","description":"Result of a search operation.","collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Alias for search_filters","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.\n\n Commonly seen filters include operations include the following:\n\n `{\"document_id\": {\"$eq\": \"9fbe403b-...\"}}`\n\n `{\"document_id\": {\"$in\": [\"9fbe403b-...\", \"3e157b3a-...\"]}}`\n\n `{\"collection_ids\": {\"$overlap\": [\"122fdf6a-...\", \"...\"]}}`\n\n `{\"$and\": {\"$document_id\": ..., \"collection_ids\": ...}}`"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"offset":{"type":"integer","minimum":0.0,"title":"Offset","description":"Offset to paginate search results","default":0},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"anyOf":[{"$ref":"#/components/schemas/HybridSearchSettings"},{"type":"null"}],"description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"type":"string","title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","limit":20,"offset":0,"probes":10,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true},"VectorTableName":{"type":"string","enum":["chunks","entity_embedding","community_report"],"title":"VectorTableName","description":"This enum represents the different tables where we store vectors.\n\n# TODO: change the table name of the chunks table. Right now it is called\n# {r2r_project_name}.{r2r_project_name} due to a bug in the vector class."},"shared__api__models__auth__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-16T17:20:12.245500"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-16T17:20:12.245505"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"shared__api__models__management__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-16T17:20:12.257770"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-16T17:20:12.257773"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} +{"openapi":"3.1.0","info":{"title":"R2R Application API","version":"1.0.0"},"paths":{"/v2/ingest_files":{"post":{"summary":"Ingest Files App","description":"Ingest files into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. More expansive collection permissioning is under development.","operationId":"ingest_files_app_v2_ingest_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_ingest_files_app_v2_ingest_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_files(\n file_paths=[\"pg_essay_1.html\", \"got.txt\"],\n metadatas=[{\"metadata_1\":\"some random metadata\"}, {\"metadata_2\": \"some other random metadata\"}],\n document_ids=None\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1.html;type=text/html\" \\\n -F \"file=@got.txt;type=text/plain\" \\\n -F 'metadatas=[{},{}]' \\\n -F 'document_ids=null'\n"}]}},"/v2/update_files":{"post":{"summary":"Update Files App","description":"Update existing files in the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to update files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only update their own files. More expansive collection permissioning is under development.","operationId":"update_files_app_v2_update_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_update_files_app_v2_update_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdateResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.update_files(\n file_paths=[\"pg_essay_1_v2.txt\"],\n document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/update_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/ingest_chunks":{"post":{"summary":"Ingest Chunks App","description":"Ingest text chunks into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest pre-parsed text chunks into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest chunks for their own access. More expansive collection permissioning is under development.","operationId":"ingest_chunks_app_v2_ingest_chunks_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_ingest_chunks_app_v2_ingest_chunks_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_chunks(\n chunks=[\n {\n \"text\": \"Another chunk of text\",\n },\n {\n \"text\": \"Yet another chunk of text\",\n },\n {\n \"text\": \"A chunk of text\",\n },\n ],\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_chunks\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"chunks\": [\n {\n \"text\": \"Another chunk of text\"\n },\n {\n \"text\": \"Yet another chunk of text\"\n },\n {\n \"text\": \"A chunk of text\"\n }\n ],\n \"document_id\": \"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\",\n \"metadata\": {}\n }'\n"}]}},"/v2/create_vector_index":{"post":{"summary":"Create Vector Index App","description":"Create a vector index for a given table.","operationId":"create_vector_index_app_v2_create_vector_index_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_vector_index_app_v2_create_vector_index_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CreateVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.create_vector_index(\n table_name=\"vectors\",\n index_method=\"hnsw\",\n index_measure=\"cosine_distance\",\n index_arguments={\"m\": 16, \"ef_construction\": 64},\n concurrently=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"http://localhost:7276/v2/create_vector_index\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"table_name\": \"vectors\",\n \"index_method\": \"hnsw\",\n \"index_measure\": \"cosine_distance\",\n \"index_arguments\": {\n \"m\": 16,\n \"ef_construction\": 64\n },\n \"concurrently\": true\n }'\n"}]}},"/v2/list_vector_indices":{"get":{"summary":"List Vector Indices App","operationId":"list_vector_indices_app_v2_list_vector_indices_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"table_name","in":"query","required":false,"schema":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The table to create the index on. Default: vectors","default":"vectors","title":"Table Name"},"description":"The table to create the index on. Default: vectors"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ListVectorIndicesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.create_vector_index(\n table_name=\"vectors\",\n index_method=\"hnsw\",\n index_measure=\"cosine_distance\",\n index_arguments={\"m\": 16, \"ef_construction\": 64},\n concurrently=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"http://localhost:7276/v2/create_vector_index\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"table_name\": \"vectors\",\n \"index_method\": \"hnsw\",\n \"index_measure\": \"cosine_distance\",\n \"index_arguments\": {\n \"m\": 16,\n \"ef_construction\": 64\n },\n \"concurrently\": true\n }'\n"}]}},"/v2/delete_vector_index":{"delete":{"summary":"Delete Vector Index App","operationId":"delete_vector_index_app_v2_delete_vector_index_delete","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_vector_index_app_v2_delete_vector_index_delete"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_DeleteVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.delete_vector_index(\n index_name=\"ix_vector_cosine_ops_hnsw__20241021211541\",\n table_name=\"vectors\",\n concurrently=True\n)\n"},{"lang":"Shell","source":"curl -X DELETE \"http://localhost:7276/v2/delete_vector_index\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"index_name\": \"ix_vector_cosine_ops_hnsw__20241021211541\",\n \"table_name\": \"vectors\", \n \"concurrently\": true\n }'\n"}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ServerStats_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AllPromptsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AnalyticsResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_LogResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AppSettingsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","minimum":-1,"description":"Number of items to return. Use -1 to return all items.","default":100,"title":"Limit"},"description":"Number of items to return. Use -1 to return all items."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}},{"name":"include_vectors","in":"query","required":false,"schema":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":false,"title":"Include Vectors"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentChunkResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/conversations_overview":{"get":{"summary":"Conversations Overview App","operationId":"conversations_overview_app_v2_conversations_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Conversation Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_ConversationOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_conversation/{conversation_id}":{"get":{"summary":"Get Conversation","operationId":"get_conversation_v2_get_conversation__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"},{"name":"branch_id","in":"query","required":false,"schema":{"type":"string","description":"Branch ID","title":"Branch Id"},"description":"Branch ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_Tuple_str__Message___"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_conversation":{"post":{"summary":"Create Conversation","operationId":"create_conversation_v2_create_conversation_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Create Conversation V2 Create Conversation Post"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_message/{conversation_id}":{"post":{"summary":"Add Message","operationId":"add_message_v2_add_message__conversation_id__post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_message_v2_add_message__conversation_id__post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Add Message V2 Add Message Conversation Id Post"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_message/{message_id}":{"put":{"summary":"Edit Message","operationId":"edit_message_v2_update_message__message_id__put","security":[{"HTTPBearer":[]}],"parameters":[{"name":"message_id","in":"path","required":true,"schema":{"type":"string","description":"Message ID","title":"Message Id"},"description":"Message ID"}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"string","description":"New content","title":"Message"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Edit Message V2 Update Message Message Id Put"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/branches_overview/{conversation_id}":{"get":{"summary":"Branches Overview","operationId":"branches_overview_v2_branches_overview__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Branches Overview V2 Branches Overview Conversation Id Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete_conversation/{conversation_id}":{"delete":{"summary":"Delete Conversation","operationId":"delete_conversation_v2_delete_conversation__conversation_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_SearchResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGAgentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/completion":{"post":{"summary":"Completion","description":"Generate completions for a list of messages.\n\nThis endpoint uses the language model to generate completions for the provided messages.\nThe generation process can be customized using the generation_config parameter.","operationId":"completion_v2_completion_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_completion_v2_completion_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings.\nIf document IDs are not provided, the graph will be created on all documents in the system.\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information.\nIt creates communities of nodes based on their similarity and adds embeddings to the graph.\nThis step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/entities":{"get":{"summary":"Get Entities","description":"Retrieve entities from the knowledge graph.","operationId":"get_entities_v2_entities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"entity_level","in":"query","required":false,"schema":{"anyOf":[{"$ref":"#/components/schemas/EntityLevel"},{"type":"null"}],"description":"Type of entities to retrieve. Options are: raw, dedup_document, dedup_collection.","default":"collection","title":"Entity Level"},"description":"Type of entities to retrieve. Options are: raw, dedup_document, dedup_collection."},{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve entities from.","title":"Collection Id"},"description":"Collection ID to retrieve entities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity IDs to filter by.","title":"Entity Ids"},"description":"Entity IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEntitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/triples":{"get":{"summary":"Get Triples","description":"Retrieve triples from the knowledge graph.","operationId":"get_triples_v2_triples_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve triples from.","title":"Collection Id"},"description":"Collection ID to retrieve triples from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_names","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity names to filter by.","title":"Entity Names"},"description":"Entity names to filter by."},{"name":"triple_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Triple IDs to filter by.","title":"Triple Ids"},"description":"Triple IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGTriplesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/communities":{"get":{"summary":"Get Communities","description":"Retrieve communities from the knowledge graph.","operationId":"get_communities_v2_communities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve communities from.","title":"Collection Id"},"description":"Collection ID to retrieve communities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"levels","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Levels to filter by.","title":"Levels"},"description":"Levels to filter by."},{"name":"community_numbers","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Community numbers to filter by.","title":"Community Numbers"},"description":"Community numbers to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGCommunitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/deduplicate_entities":{"post":{"summary":"Deduplicate Entities","description":"Deduplicate entities in the knowledge graph.","operationId":"deduplicate_entities_v2_deduplicate_entities_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_deduplicate_entities_v2_deduplicate_entities_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEntityDeduplicationResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_graph_for_collection":{"delete":{"summary":"Delete Graph For Collection","description":"Delete the graph for a given collection. Note that this endpoint may delete a large amount of data created by the KG pipeline, this deletion is irreversible, and recreating the graph may be an expensive operation.\n\nNotes:\nThe endpoint deletes all communities for a given collection. If the cascade flag is set to true, the endpoint also deletes all the entities and triples associated with the collection.\n\nWARNING: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection.","operationId":"delete_graph_for_collection_v2_delete_graph_for_collection_delete","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_graph_for_collection_v2_delete_graph_for_collection_delete"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AllPromptsResponse":{"properties":{"prompts":{"additionalProperties":{"$ref":"#/components/schemas/PromptResponse"},"type":"object","title":"Prompts"}},"type":"object","required":["prompts"],"title":"AllPromptsResponse"},"AnalyticsResponse":{"properties":{"analytics_data":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Analytics Data"},"filtered_logs":{"type":"object","title":"Filtered Logs"}},"type":"object","required":["filtered_logs"],"title":"AnalyticsResponse"},"AppSettingsResponse":{"properties":{"config":{"type":"object","title":"Config"},"prompts":{"type":"object","title":"Prompts"}},"type":"object","required":["config","prompts"],"title":"AppSettingsResponse"},"Body_add_message_v2_add_message__conversation_id__post":{"properties":{"message":{"$ref":"#/components/schemas/Message","description":"Message content"},"parent_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Parent Id","description":"Parent message ID"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata","description":"Metadata"}},"type":"object","required":["message"],"title":"Body_add_message_v2_add_message__conversation_id__post"},"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"message":{"anyOf":[{"$ref":"#/components/schemas/Message"},{"type":"null"}],"description":"Message object"},"messages":{"anyOf":[{"items":{"$ref":"#/components/schemas/Message"},"type":"array"},{"type":"null"}],"title":"Messages","description":"List of message objects (deprecated, use message instead)","deprecated":true},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true},"conversation_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Conversation Id","description":"The ID of the conversation, a new ID is assigned if not provided"},"branch_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Branch Id","description":"The ID of the branch"}},"type":"object","title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_completion_v2_completion_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"The messages to complete"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"The generation config"}},"type":"object","required":["messages"],"title":"Body_completion_v2_completion_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to create graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph creation process."},"kg_creation_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process."}},"type":"object","title":"Body_create_graph_v2_create_graph_post"},"Body_create_vector_index_app_v2_create_vector_index_post":{"properties":{"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The table to create the index on. Default: vectors","default":"vectors"},"index_method":{"$ref":"#/components/schemas/IndexMethod","description":"The indexing method to use. Options: hnsw, ivfflat, auto. Default: hnsw","default":"hnsw"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"Distance measure for vector comparisons. Options: cosine_distance, l2_distance, max_inner_product. Default: cosine_distance","default":"cosine_distance"},"index_arguments":{"anyOf":[{"$ref":"#/components/schemas/IndexArgsIVFFlat"},{"$ref":"#/components/schemas/IndexArgsHNSW"},{"type":"null"}],"title":"Index Arguments","description":"Configuration parameters for the chosen index method. For HNSW: {m: int, ef_construction: int}. For IVFFlat: {n_lists: int}"},"index_name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Index Name","description":"Optional custom name for the index. If not provided, one will be auto-generated"},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to create the index concurrently. Default: true","default":true}},"type":"object","title":"Body_create_vector_index_app_v2_create_vector_index_post"},"Body_deduplicate_entities_v2_deduplicate_entities_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to deduplicate entities for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the deduplication process."},"deduplication_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Deduplication Settings","description":"Settings for the deduplication process."}},"type":"object","title":"Body_deduplicate_entities_v2_deduplicate_entities_post"},"Body_delete_graph_for_collection_v2_delete_graph_for_collection_delete":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id","description":"Collection ID to delete graph for."},"cascade":{"type":"boolean","title":"Cascade","description":"Whether to cascade the deletion, and delete entities and triples belonging to the collection.","default":false}},"type":"object","required":["collection_id"],"title":"Body_delete_graph_for_collection_v2_delete_graph_for_collection_delete"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_delete_vector_index_app_v2_delete_vector_index_delete":{"properties":{"index_name":{"type":"string","title":"Index Name","description":"The name of the index to delete"},"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The name of the table containing the index. Default: vectors","default":"vectors"},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to delete the index concurrently. Default: true","default":true}},"type":"object","required":["index_name"],"title":"Body_delete_vector_index_app_v2_delete_vector_index_delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to enrich graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_chunks_app_v2_ingest_chunks_post":{"properties":{"chunks":{"items":{"$ref":"#/components/schemas/RawChunk"},"type":"array","title":"Chunks","description":"A list of text chunks to ingest into the system.","default":{}},"document_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document Id","description":"An optional document id to associate the chunks with. If not provided, a unique document id will be generated."},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","title":"Body_ingest_chunks_app_v2_ingest_chunks_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id","description":"ID of the user to update"},"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"is_superuser":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Is Superuser","description":"Updated superuser status"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"CollectionOverviewResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"user_count":{"type":"integer","title":"User Count"},"document_count":{"type":"integer","title":"Document Count"}},"type":"object","required":["collection_id","name","description","created_at","updated_at","user_count","document_count"],"title":"CollectionOverviewResponse"},"CollectionResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"}},"type":"object","required":["collection_id","name","description","created_at","updated_at"],"title":"CollectionResponse"},"CommunityReport":{"properties":{"community_number":{"type":"integer","title":"Community Number"},"level":{"type":"integer","title":"Level"},"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name","default":""},"summary":{"type":"string","title":"Summary","default":""},"findings":{"items":{"type":"string"},"type":"array","title":"Findings","default":[]},"rating":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Rating"},"rating_explanation":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rating Explanation"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Embedding"},"attributes":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["community_number","level","collection_id"],"title":"CommunityReport"},"ConversationOverviewResponse":{"properties":{"conversation_id":{"type":"string","format":"uuid","title":"Conversation Id"},"created_at":{"type":"string","format":"date-time","title":"Created At"}},"type":"object","required":["conversation_id","created_at"],"title":"ConversationOverviewResponse"},"CreateVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message","description":"Vector index creation queued successfully."}},"type":"object","required":["message"],"title":"CreateVectorIndexResponse"},"DeleteVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"DeleteVectorIndexResponse"},"DocumentChunkResponse":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"},"vector":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Vector"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","text","metadata"],"title":"DocumentChunkResponse"},"DocumentOverviewResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"title":{"type":"string","title":"Title"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"type":{"type":"string","title":"Type"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"ingestion_status":{"type":"string","title":"Ingestion Status"},"kg_extraction_status":{"type":"string","title":"Kg Extraction Status"},"version":{"type":"string","title":"Version"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","title","user_id","type","created_at","updated_at","ingestion_status","kg_extraction_status","version","collection_ids","metadata"],"title":"DocumentOverviewResponse"},"Entity":{"properties":{"name":{"type":"string","title":"Name"},"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"category":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Category"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"description_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Description Embedding"},"community_numbers":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Community Numbers"},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id"},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"document_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Document Ids"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["name"],"title":"Entity","description":"An entity extracted from a document."},"EntityLevel":{"type":"string","enum":["collection","document","chunk"],"title":"EntityLevel"},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexArgsHNSW":{"properties":{"m":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"M","default":16},"ef_construction":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Construction","default":64}},"type":"object","title":"IndexArgsHNSW","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an HNSW type index.\n\nRef: https://github.com/pgvector/pgvector#index-options\n\nBoth attributes are Optional in case the user only wants to specify one and\nleave the other as default\n\nAttributes:\n m (int): Maximum number of connections per node per layer (default: 16)\n ef_construction (int): Size of the dynamic candidate list for\n constructing the graph (default: 64)"},"IndexArgsIVFFlat":{"properties":{"n_lists":{"type":"integer","title":"N Lists"}},"type":"object","required":["n_lists"],"title":"IndexArgsIVFFlat","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an IVFFlat type index.\n\nAttributes:\n nlist (int): The number of IVF centroids that the index should use"},"IndexMeasure":{"type":"string","enum":["cosine_distance","l2_distance","max_inner_product"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"IndexMethod":{"type":"string","enum":["auto","ivfflat","hnsw"],"title":"IndexMethod","description":"An enum representing the index methods available.\n\nThis class currently only supports the 'ivfflat' method but may\nexpand in the future.\n\nAttributes:\n auto (str): Automatically choose the best available index method.\n ivfflat (str): The ivfflat index method.\n hnsw (str): The hnsw index method."},"IngestionResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_id":{"type":"string","format":"uuid","title":"Document Id","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_id"],"title":"IngestionResponse","example":{"document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","message":"Ingestion task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCommunitiesResponse":{"properties":{"communities":{"items":{"$ref":"#/components/schemas/CommunityReport"},"type":"array","title":"Communities","description":"The list of communities in the graph for the collection."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of communities in the graph."}},"type":"object","required":["communities","total_entries"],"title":"KGCommunitiesResponse","description":"Response for knowledge graph communities.","example":{"communities":[{"collection_ids":["122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"],"community_number":1,"findings":["finding1","finding2"],"id":"1","level":0,"name":"community name","rating":"8","rating_explanation":"rating explanation","summary":"community summary"}],"total_count":1}},"KGCommunityResult":{"properties":{"name":{"type":"string","title":"Name"},"summary":{"type":"string","title":"Summary"},"rating":{"type":"number","title":"Rating"},"rating_explanation":{"type":"string","title":"Rating Explanation"},"findings":{"items":{"type":"string"},"type":"array","title":"Findings"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","summary","rating","rating_explanation","findings"],"title":"KGCommunityResult","findings":["Finding 1","Finding 2"],"metadata":{},"name":"Community Name","rating":9,"rating_explanation":"Rating Explanation","summary":"Community Summary"},"KGCreationEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"document_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Document Count","description":"The number of documents in the collection."},"number_of_jobs_created":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Number Of Jobs Created","description":"The number of jobs created for the graph creation process."},"total_chunks":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Chunks","description":"The estimated total number of chunks."},"estimated_entities":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Entities","description":"The estimated number of entities in the graph."},"estimated_triples":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Triples","description":"The estimated number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls in millions."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph creation process in minutes."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph creation process in USD."}},"type":"object","title":"KGCreationEstimationResponse","description":"Response for knowledge graph creation estimation."},"KGCreationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG creation request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG creation request."}},"type":"object","required":["message","task_id"],"title":"KGCreationResponse","example":{"message":"Graph creation queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEnrichmentEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"total_entities":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Entities","description":"The total number of entities in the graph."},"total_triples":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Triples","description":"The total number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph enrichment process."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph enrichment process."}},"type":"object","title":"KGEnrichmentEstimationResponse","description":"Response for knowledge graph enrichment estimation."},"KGEnrichmentResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG enrichment request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG enrichment request."}},"type":"object","required":["message","task_id"],"title":"KGEnrichmentResponse","example":{"message":"Graph enrichment queued successfuly.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEntitiesResponse":{"properties":{"entities":{"items":{"$ref":"#/components/schemas/Entity"},"type":"array","title":"Entities","description":"The list of entities in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of entities in the graph for the collection or document."}},"type":"object","required":["entities","total_entries"],"title":"KGEntitiesResponse","description":"Response for knowledge graph entities.","example":{"entities":[{"description":"Description 1","id":"1","name":"Entity 1"},{"description":"Description 2","id":"2","name":"Entity 2"}],"total_entries":2}},"KGEntityDeduplicationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user."}},"type":"object","required":["message"],"title":"KGEntityDeduplicationResponse","description":"Response for knowledge graph entity deduplication.","example":{"message":"Entity deduplication queued successfully."}},"KGEntityResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGEntityResult","description":"Entity Description","metadata":{},"name":"Entity Name"},"KGGlobalResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGGlobalResult","description":"Global Result Description","metadata":{},"name":"Global Result Name"},"KGRelationshipResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGRelationshipResult","description":"Relationship Description","metadata":{},"name":"Relationship Name"},"KGRunType":{"type":"string","enum":["estimate","run"],"title":"KGRunType","description":"Type of KG run."},"KGSearchMethod":{"type":"string","enum":["local"],"const":"local","title":"KGSearchMethod"},"KGSearchResult":{"properties":{"method":{"$ref":"#/components/schemas/KGSearchMethod"},"content":{"anyOf":[{"$ref":"#/components/schemas/KGEntityResult"},{"$ref":"#/components/schemas/KGRelationshipResult"},{"$ref":"#/components/schemas/KGCommunityResult"},{"$ref":"#/components/schemas/KGGlobalResult"}],"title":"Content"},"result_type":{"anyOf":[{"$ref":"#/components/schemas/KGSearchResultType"},{"type":"null"}]},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"metadata":{"type":"object","title":"Metadata","default":{}}},"type":"object","required":["method","content"],"title":"KGSearchResult","content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"},"KGSearchResultType":{"type":"string","enum":["entity","relationship","community"],"title":"KGSearchResultType"},"KGSearchSettings":{"properties":{"filters":{"type":"object","title":"Filters","description":"Alias for search_filters","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.\n\n Commonly seen filters include operations include the following:\n\n `{\"document_id\": {\"$eq\": \"9fbe403b-...\"}}`\n\n `{\"document_id\": {\"$in\": [\"9fbe403b-...\", \"3e157b3a-...\"]}}`\n\n `{\"collection_ids\": {\"$overlap\": [\"122fdf6a-...\", \"...\"]}}`\n\n `{\"$and\": {\"$document_id\": ..., \"collection_ids\": ...}}`"},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"graphrag_map_system":{"type":"string","title":"Graphrag Map System","description":"The system prompt for the graphrag map prompt.","default":"graphrag_map_system"},"graphrag_reduce_system":{"type":"string","title":"Graphrag Reduce System","description":"The system prompt for the graphrag reduce prompt.","default":"graphrag_reduce_system"},"use_kg_search":{"type":"boolean","title":"Use Kg Search","description":"Whether to use KG search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","description":"KG search type","default":"local"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level","description":"KG search level"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph search."},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"local","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"KGTriplesResponse":{"properties":{"triples":{"items":{"$ref":"#/components/schemas/Triple"},"type":"array","title":"Triples","description":"The list of triples in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of triples in the graph for the collection or document."}},"type":"object","required":["triples","total_entries"],"title":"KGTriplesResponse","description":"Response for knowledge graph triples.","example":{"total_entries":2,"triples":[{"description":"Paris is the capital of France","object":"France","predicate":"is capital of","subject":"Paris"}]}},"ListVectorIndicesResponse":{"properties":{"indices":{"items":{"type":"object"},"type":"array","title":"Indices"}},"type":"object","required":["indices"],"title":"ListVectorIndicesResponse"},"LogEntry":{"properties":{"key":{"type":"string","title":"Key"},"value":{"title":"Value"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp"}},"type":"object","required":["key","value","timestamp"],"title":"LogEntry"},"LogResponse":{"properties":{"run_id":{"type":"string","format":"uuid","title":"Run Id"},"run_type":{"type":"string","title":"Run Type"},"entries":{"items":{"$ref":"#/components/schemas/LogEntry"},"type":"array","title":"Entries"},"timestamp":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Timestamp"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["run_id","run_type","entries","timestamp","user_id"],"title":"LogResponse"},"Message":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"PaginatedResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[CollectionResponse]]"},"PaginatedResultsWrapper_list_ConversationOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/ConversationOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[ConversationOverviewResponse]]"},"PaginatedResultsWrapper_list_DocumentChunkResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentChunkResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentChunkResponse]]"},"PaginatedResultsWrapper_list_DocumentOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentOverviewResponse]]"},"PaginatedResultsWrapper_list_UserOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/UserOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserOverviewResponse]]"},"PaginatedResultsWrapper_list_UserResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/shared__api__models__management__responses__UserResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserResponse]]"},"PromptResponse":{"properties":{"name":{"type":"string","title":"Name"},"template":{"type":"string","title":"Template"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types"}},"type":"object","required":["name","template","created_at","updated_at","input_types"],"title":"PromptResponse"},"RAGAgentResponse":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"List of messages in the RAG agent response"},"conversation_id":{"type":"string","title":"Conversation Id","description":"The conversation ID for the RAG agent response"}},"type":"object","required":["messages","conversation_id"],"title":"RAGAgentResponse","example":[{"content":"## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.","role":"system"},{"content":"You are a helpful assistant.","role":"system"},{"content":"Who is the greatest philospher of all time?","role":"user"},{"content":"Aristotle is widely considered the greatest philospher of all time.","role":"assistant"},{"content":"Can you tell me more about him?","role":"user"},{"function_call":{"arguments":"{\"query\":\"Aristotle biography\"}","name":"search"},"role":"assistant"},{"content":"1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n","name":"search","role":"function"},{"content":"Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.","role":"assistant"}]},"RAGResponse":{"properties":{"completion":{"title":"Completion","description":"The generated completion from the RAG process"},"search_results":{"$ref":"#/components/schemas/SearchResponse","description":"The search results used for the RAG process"}},"type":"object","required":["completion","search_results"],"title":"RAGResponse","example":{"completion":{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Paris is the capital of France.","role":"assistant"}}],"id":"chatcmpl-example123"},"search_results":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}}},"RawChunk":{"properties":{"text":{"type":"string","title":"Text"}},"type":"object","required":["text"],"title":"RawChunk"},"ResultsWrapper_AllPromptsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AllPromptsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AllPromptsResponse]"},"ResultsWrapper_AnalyticsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AnalyticsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AnalyticsResponse]"},"ResultsWrapper_AppSettingsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AppSettingsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AppSettingsResponse]"},"ResultsWrapper_CollectionResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CollectionResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CollectionResponse]"},"ResultsWrapper_CreateVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CreateVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CreateVectorIndexResponse]"},"ResultsWrapper_DeleteVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/DeleteVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[DeleteVectorIndexResponse]"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_KGCommunitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGCommunitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGCommunitiesResponse]"},"ResultsWrapper_KGEntitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEntitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEntitiesResponse]"},"ResultsWrapper_KGEntityDeduplicationResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEntityDeduplicationResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEntityDeduplicationResponse]"},"ResultsWrapper_KGTriplesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGTriplesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGTriplesResponse]"},"ResultsWrapper_ListVectorIndicesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/ListVectorIndicesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[ListVectorIndicesResponse]"},"ResultsWrapper_NoneType_":{"properties":{"results":{"type":"null","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[NoneType]"},"ResultsWrapper_RAGAgentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGAgentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGAgentResponse]"},"ResultsWrapper_RAGResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGResponse]"},"ResultsWrapper_SearchResponse_":{"properties":{"results":{"$ref":"#/components/schemas/SearchResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[SearchResponse]"},"ResultsWrapper_ServerStats_":{"properties":{"results":{"$ref":"#/components/schemas/ServerStats"}},"type":"object","required":["results"],"title":"ResultsWrapper[ServerStats]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGCreationResponse"},{"$ref":"#/components/schemas/KGCreationEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGCreationResponse, KGCreationEstimationResponse]]"},"ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGEnrichmentResponse"},{"$ref":"#/components/schemas/KGEnrichmentEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGEnrichmentResponse, KGEnrichmentEstimationResponse]]"},"ResultsWrapper_UpdatePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdatePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdatePromptResponse]"},"ResultsWrapper_UpdateResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdateResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdateResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/shared__api__models__auth__responses__UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"ResultsWrapper_list_CollectionOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionOverviewResponse]]"},"ResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionResponse]]"},"ResultsWrapper_list_IngestionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/IngestionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[IngestionResponse]]"},"ResultsWrapper_list_LogResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/LogResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[LogResponse]]"},"ResultsWrapper_list_Tuple_str__Message___":{"properties":{"results":{"items":{"prefixItems":[{"type":"string"},{"$ref":"#/components/schemas/Message"}],"type":"array","maxItems":2,"minItems":2},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[Tuple[str, Message]]]"},"SearchResponse":{"properties":{"vector_search_results":{"items":{"$ref":"#/components/schemas/VectorSearchResult"},"type":"array","title":"Vector Search Results","description":"List of vector search results"},"kg_search_results":{"anyOf":[{"items":{"$ref":"#/components/schemas/KGSearchResult"},"type":"array"},{"type":"null"}],"title":"Kg Search Results","description":"Knowledge graph search results, if applicable"}},"type":"object","required":["vector_search_results"],"title":"SearchResponse","example":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}},"ServerStats":{"properties":{"start_time":{"type":"string","format":"date-time","title":"Start Time"},"uptime_seconds":{"type":"number","title":"Uptime Seconds"},"cpu_usage":{"type":"number","title":"Cpu Usage"},"memory_usage":{"type":"number","title":"Memory Usage"}},"type":"object","required":["start_time","uptime_seconds","cpu_usage","memory_usage"],"title":"ServerStats"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"Triple":{"properties":{"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"subject":{"type":"string","title":"Subject"},"predicate":{"type":"string","title":"Predicate"},"object":{"type":"string","title":"Object"},"weight":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Weight","default":1.0},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"predicate_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Predicate Embedding"},"extraction_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Extraction Ids","default":[]},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"}],"title":"Attributes","default":{}}},"type":"object","required":["subject","predicate","object"],"title":"Triple","description":"A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities."},"UpdatePromptResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"UpdatePromptResponse"},"UpdateResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_ids"],"title":"UpdateResponse","example":{"document_ids":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"],"message":"Update task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"UserOverviewResponse":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id"},"num_files":{"type":"integer","title":"Num Files"},"total_size_in_bytes":{"type":"integer","title":"Total Size In Bytes"},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids"}},"type":"object","required":["user_id","num_files","total_size_in_bytes","document_ids"],"title":"UserOverviewResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchResult":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"score":{"type":"number","title":"Score"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","score","text","metadata"],"title":"VectorSearchResult","description":"Result of a search operation.","collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Alias for search_filters","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.\n\n Commonly seen filters include operations include the following:\n\n `{\"document_id\": {\"$eq\": \"9fbe403b-...\"}}`\n\n `{\"document_id\": {\"$in\": [\"9fbe403b-...\", \"3e157b3a-...\"]}}`\n\n `{\"collection_ids\": {\"$overlap\": [\"122fdf6a-...\", \"...\"]}}`\n\n `{\"$and\": {\"$document_id\": ..., \"collection_ids\": ...}}`"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"offset":{"type":"integer","minimum":0.0,"title":"Offset","description":"Offset to paginate search results","default":0},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"type":"integer","title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"type":"integer","title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"$ref":"#/components/schemas/HybridSearchSettings","description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"type":"string","title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","limit":20,"offset":0,"probes":10,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true},"VectorTableName":{"type":"string","enum":["vectors","document_entity","collection_entity","community_report"],"title":"VectorTableName","description":"This enum represents the different tables where we store vectors.\n\n# TODO: change the table name of the chunks table. Right now it is called\n# {r2r_project_name}.{r2r_project_name} due to a bug in the vector class."},"shared__api__models__auth__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-21T21:59:22.550352"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-21T21:59:22.550357"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"shared__api__models__management__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-21T21:59:22.566332"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-21T21:59:22.566335"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} diff --git a/docs/cookbooks/advanced-graphrag.mdx b/docs/cookbooks/advanced-graphrag.mdx new file mode 100644 index 000000000..60ed7efa1 --- /dev/null +++ b/docs/cookbooks/advanced-graphrag.mdx @@ -0,0 +1,210 @@ +--- +title: 'Advanced GraphRAG' +description: 'Advanced GraphRAG Techniques with R2R' +icon: 'chart-network' +--- + + +## Advanced GraphRAG Techniques + +R2R supports advanced GraphRAG techniques that can be easily configured at runtime. This flexibility allows you to experiment with different SoTA strategies and optimize your RAG pipeline for specific use cases. + + + +Advanced GraphRAG techniques are still a beta feature in R2R.There may be limitations in observability and analytics when implementing them. + +Are we missing an important technique? If so, then please let us know at founders@sciphi.ai. + + + + +### Prompt Tuning + +One way that we can improve upon GraphRAG's already impressive capabilities by tuning our prompts to a specific domain. When we create a knowledge graph, an LLM extracts the relationships between entities; but for very targeted domains, a general approach may fall short. + +To demonstrate this, we can run GraphRAG over the technical papers for the 2024 Nobel Prizes in chemistry, medicine, and physics. By tuning our prompts for GraphRAG, we attempt to understand our documents at a high level, and provide the LLM with a more pointed description. + +The following script, which utilizes the Python SDK, generates the tuned prompts and calls the knowledge graph creation process with these prompts at runtime: + +```python +# Step 1: Tune the prompts for knowledge graph creation +# Tune the entity description prompt +entity_prompt_response = client.get_tuned_prompt( + prompt_name="graphrag_entity_description" +) +tuned_entity_prompt = entity_prompt_response['results']['tuned_prompt'] + +# Tune the triples extraction prompt +triples_prompt_response = client.get_tuned_prompt( + prompt_name="graphrag_triples_extraction_few_shot" +) +tuned_triples_prompt = triples_prompt_response['results']['tuned_prompt'] + +# Step 2: Create the knowledge graph +kg_settings = { + "kg_entity_description_prompt": tuned_entity_prompt +} + +# Generate the initial graph +graph_response = client.create_graph( + run_type="run", + kg_creation_settings=kg_settings +) + +# Step 3: Clean up the graph by removing duplicate entities +client.deduplicate_entities( + run_type="run", + collection_id='122fdf6a-e116-546b-a8f6-e4cb2e2c0a09' +) + +# Step 4: Tune and apply community reports prompt for graph enrichment +community_prompt_response = client.get_tuned_prompt( + prompt_name="graphrag_community_reports" +) +tuned_community_prompt = community_prompt_response['results']['tuned_prompt'] + +# Configure enrichment settings +kg_enrichment_settings = { + "community_reports_prompt": tuned_community_prompt +} + +# Enrich the graph with additional information +client.enrich_graph( + run_type="run", + kg_enrichment_settings=kg_enrichment_settings +) +``` + +For illustrative purposes, we look can look at the `graphrag_entity_description` prompt before and after prompt tuning. It's clear that with prompt tuning, we are able to capture the intent of the documents, giving us a more targeted prompt overall. + + + +```yaml +Provide a comprehensive yet concise summary of the given entity, incorporating its description and associated triples: + +Entity Info: +{entity_info} +Triples: +{triples_txt} + +Your summary should: +1. Clearly define the entity's core concept or purpose +2. Highlight key relationships or attributes from the triples +3. Integrate any relevant information from the existing description +4. Maintain a neutral, factual tone +5. Be approximately 2-3 sentences long + +Ensure the summary is coherent, informative, and captures the essence of the entity within the context of the provided information. +``` + + + + +```yaml +Provide a comprehensive yet concise summary of the given entity, focusing on its significance in the field of scientific research, while incorporating its description and associated triples: + +Entity Info: +{entity_info} +Triples: +{triples_txt} + +Your summary should: +1. Clearly define the entity's core concept or purpose within computational biology, artificial intelligence, and medicine +2. Highlight key relationships or attributes from the triples that illustrate advancements in scientific understanding and reasoning +3. Integrate any relevant information from the existing description, particularly breakthroughs and methodologies +4. Maintain a neutral, factual tone +5. Be approximately 2-3 sentences long + +Ensure the summary is coherent, informative, and captures the essence of the entity within the context of the provided information, emphasizing its impact on the field. +``` + + + + +After prompt tuning, we see an increase in the number of communities—after prompt tuning, these communities appear more focused and domain-specific with clearer thematic boundaries. + +Prompt tuning produces: +- **More precise community separation:** GraphRAG alone produced a single `MicroRNA Research` Community, which GraphRAG with prompt tuning produced communities around `C. elegans MicroRNA Research`, `LET-7 MicroRNA`, and `miRNA-184 and EDICT Syndrome`. +- **Enhanced domain focus:** Previously, we had a single community for `AI Researchers`, but with prompt tuning we create specialized communities such as `Hinton, Hopfield, and Deep Learning`, `Hochreiter and Schmidhuber`, and `Minksy and Papert's ANN Critique.` + +| Count | GraphRAG | GraphRAG with Prompt Tuning | +|-------------|----------|-----------------------------| +| Entities | 661 | 636 | +| Triples | 509 | 503 | +| Communities | 29 | 41 | + +Prompt tuning allow for us to generate communities that better reflect the natural organization of the domain knowledge while maintaining more precise technical and thematic boundaries between related concepts. + +## Contextual Chunk Enrichment + +Contextual chunk enrichment is a technique that allows us to capture the semantic meaning of the entities and relationships in the knowledge graph. This is done by using a combination of the entity's textual description and its contextual embeddings. This enrichment process enhances the quality and depth of information in your knowledge graph by: + +1. Analyzing the surrounding context of each entity mention +2. Incorporating semantic information from related passages +3. Preserving important contextual nuances that might be lost in simple entity extraction + +You can learn more about contextual chunk enrichment [here](/cookbooks/contextual-enrichment). + + +### Entity Deduplication + +When creating a knowledge graph across multiple documents, entities are initially created at the document level. This means that the same real-world entity (e.g., "Albert Einstein" or "CRISPR") might appear multiple times if it's mentioned in different documents. This duplication can lead to: + +- Redundant information in your knowledge graph +- Fragmented relationships across duplicate entities +- Increased storage and processing overhead +- Potentially inconsistent entity descriptions + +The `deduplicate-entities` endpoint addresses these issues by: +1. Identifying similar entities using name (exact match, other strategies coming soon) +2. Merging their properties and relationships +3. Maintaining the most comprehensive description +4. Removing the duplicate entries + + + +```bash +r2r deduplicate-entities --collection-id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09 --run + +# Example Response +[{'message': 'Deduplication task queued successfully.', 'task_id': 'd9dae1bb-5862-4a16-abaf-5297024df390'}] +``` + + + +```python +from r2r import R2RClient + +client = R2RClient("http://localhost:7272") +client.deduplicate_entities( + collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", + run_type="run" +) + +# Example Response +[{'message': 'Deduplication task queued successfully.', 'task_id': 'd9dae1bb-5862-4a16-abaf-5297024df390'}] +``` + + + +#### Monitoring Deduplication + +You can monitor the deduplication process in two ways: + +1. **Hatchet Dashboard**: Access the dashboard at http://localhost:7274 to view: + - Task status and progress + - Any errors or warnings + - Completion time estimates + +2. **API Endpoints**: Once deduplication is complete, verify the results using these endpoints with `entity_level = collection`: + - [Entities API](http://localhost:7272/v2/entities?collection_id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09&entity_level=collection) + - [Triples API](http://localhost:7272/v2/triples?collection_id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09&entity_level=collection) + +#### Best Practices + +When using entity deduplication: + +- Run deduplication after initial graph creation but before any enrichment steps +- Monitor the number of entities before and after to ensure expected reduction +- Review a sample of merged entities to verify accuracy +- For large collections, expect the process to take longer and plan accordingly diff --git a/docs/cookbooks/contextual-enrichment.mdx b/docs/cookbooks/contextual-enrichment.mdx new file mode 100644 index 000000000..dc28f1cf0 --- /dev/null +++ b/docs/cookbooks/contextual-enrichment.mdx @@ -0,0 +1,151 @@ +--- +title: 'Contextual Chunk Enrichment' +description: 'Enhance your RAG system chunks with rich contextual information' +icon: 'puzzle-piece' +--- + +# Understanding Chunk Enrichment in RAG Systems + +In modern Retrieval-Augmented Generation (RAG) systems, documents are systematically broken down into smaller, manageable pieces called chunks. While chunking is essential for efficient vector search operations, these individual chunks sometimes lack the broader context needed for comprehensive question answering or analysis tasks. + +## The Challenge of Context Loss + +Let's examine a real-world example using Lyft's 2021 annual report (Form 10-K) from their [public filing](https://github.com/SciPhi-AI/R2R/blob/main/py/core/examples/data/lyft_2021.pdf). + +During ingestion, this 200+ page document is broken into 1,223 distinct chunks. Consider this isolated chunk: + +```plaintext +storing unrented and returned vehicles. These impacts to the demand for and operations of the different rental programs have and may continue to adversely affect our business, financial condition and results of operation. +``` + +Reading this chunk in isolation raises several questions: +- What specific impacts are being discussed? +- Which rental programs are affected? +- What's the broader context of these business challenges? + +This is where contextual enrichment becomes invaluable. + +## Introducing Contextual Enrichment + +Contextual enrichment is an advanced technique that enhances chunks with relevant information from surrounding or semantically related content. Think of it as giving each chunk its own "memory" of related information. + +### Enabling Enrichment + +To activate this feature, configure your `r2r.toml` file with the following settings: + +```toml +[ingestion.chunk_enrichment_settings] + enable_chunk_enrichment = true # disabled by default + strategies = ["semantic", "neighborhood"] + forward_chunks = 3 # Look ahead 3 chunks + backward_chunks = 3 # Look behind 3 chunks + semantic_neighbors = 10 # Find 10 semantically similar chunks + semantic_similarity_threshold = 0.7 # Minimum similarity score + generation_config = { model = "openai/gpt-4o-mini" } +``` + +## Enrichment Strategies Explained + +R2R implements two sophisticated strategies for chunk enrichment: + +### 1. Neighborhood Strategy +This approach looks at the document's natural flow by examining chunks that come before and after the target chunk: +- **Forward Looking**: Captures upcoming context (configurable, default: 3 chunks) +- **Backward Looking**: Incorporates previous context (configurable, default: 3 chunks) +- **Use Case**: Particularly effective for narrative documents where context flows linearly + +### 2. Semantic Strategy +This method uses advanced embedding similarity to find related content throughout the document: +- **Vector Similarity**: Identifies chunks with similar meaning regardless of location +- **Configurable Neighbors**: Customizable number of similar chunks to consider +- **Similarity Threshold**: Set minimum similarity scores to ensure relevance +- **Use Case**: Excellent for documents with themes repeated across different sections + +## The Enrichment Process + +When enriching chunks, R2R uses a carefully crafted prompt to guide the LLM: + +```plaintext +## Task: + +Enrich and refine the given chunk of text using information from the provided context chunks. The goal is to make the chunk more precise and self-contained. + +## Context Chunks: +{context_chunks} + +## Chunk to Enrich: +{chunk} + +## Instructions: +1. Rewrite the chunk in third person. +2. Replace all common nouns with appropriate proper nouns. +3. Use information from the context chunks to enhance clarity. +4. Ensure the enriched chunk remains independent and self-contained. +5. Maintain original scope without bleeding information. +6. Focus on precision and informativeness. +7. Preserve original meaning while improving clarity. +8. Output only the enriched chunk. + +## Enriched Chunk: +``` + +## Implementation and Results + +To process your documents with enrichment: + +```bash +r2r ingest-files --file_paths path/to/lyft_2021.pdf +``` + +### Viewing Enriched Results + +Access your enriched chunks through the API: +``` +http://localhost:7272/v2/document_chunks/{document_id} +``` + +Let's compare the before and after of our example chunk: + +**Before Enrichment:** +```plaintext +storing unrented and returned vehicles. These impacts to the demand for and operations of the different rental programs have and may continue to adversely affect our business, financial condition and results of operation. +``` + +**After Enrichment:** +```plaintext +The impacts of the COVID-19 pandemic on the demand for and operations of the various vehicle rental programs, including Lyft Rentals and the Express Drive program, have resulted in challenges regarding the storage of unrented and returned vehicles. These adverse conditions are anticipated to continue affecting Lyft's overall business performance, financial condition, and operational results. +``` + +Notice how the enriched version: +- Specifies the cause (COVID-19 pandemic) +- Names specific programs (Lyft Rentals, Express Drive) +- Provides clearer context about the business impact +- Maintains professional, third-person tone + +## Metadata and Storage + +The system maintains both enriched and original versions: + +```json +{ + "results": [ + { + "text": "enriched_version", + "metadata": { + "original_text": "original_version", + "chunk_enrichment_status": "success", + // ... additional metadata ... + } + } + ] +} +``` + +This dual storage ensures transparency and allows for version comparison when needed. + +## Best Practices + +1. **Tune Your Parameters**: Adjust `forward_chunks`, `backward_chunks`, and `semantic_neighbors` based on your document structure +2. **Monitor Enrichment Quality**: Regularly review enriched chunks to ensure they maintain accuracy +3. **Consider Document Type**: Different documents may benefit from different enrichment strategies +4. **Balance Context Size**: More context isn't always better - find the sweet spot for your use case diff --git a/docs/cookbooks/graphrag.mdx b/docs/cookbooks/graphrag.mdx index 5c817d4cc..5aae6d330 100644 --- a/docs/cookbooks/graphrag.mdx +++ b/docs/cookbooks/graphrag.mdx @@ -290,14 +290,14 @@ If you are using R2R Full, you can log into the hatchet dashboard on http://loca ![Hatchet Dashboard](../images/kg_extraction_progress.png) -This step will create a knowledge graph with nodes and relationships. You can get the entities and relationships in the graph using our dashboard on http://localhost:7273 or by calling the following API endpoints: +This step will create a knowledge graph with nodes and relationships. You can get the entities and relationships in the graph using our dashboard on http://localhost:7273 or by calling the following API endpoints. These hit the /v2/entities and /v2/triples endpoints respectively. This will by default use the `entity_level=document` query parameter to get the entities and triples at the document level. We will set the default collection id to `122fdf6a-e116-546b-a8f6-e4cb2e2c0a09` in the examples below. -- Entities: [Entities](http://localhost:7272/v2/entities?collection_id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09) -- Triples: [Triples](http://localhost:7272/v2/triples?collection_id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09) +- Entities: [Entities](http://localhost:7272/v2/entities?collection_id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09&entity_level=document) +- Triples: [Triples](http://localhost:7272/v2/triples?collection_id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09&entity_level=document) ## Graph Enrichment -Now we have a searchable graph, but this graph is not enriched yet. We need to perform the graph enrichment step. +Now we have a searchable graph, but this graph is not enriched yet. It does not have any community level information. We will now run the enrichment step. The graph enrichment step performs hierarchical leiden clustering to create communities, and embeds the descriptions. These embeddings will be used later in the local search stage of the pipeline. If you are more interested in the algorithm, please refer to the blog post [here](https://www.sciphi.ai/blog/graphrag). @@ -422,3 +422,8 @@ In conclusion, integrating R2R with GraphRAG significantly enhances the capabili In essence, combining R2R with GraphRAG empowers your RAG applications to deliver more intelligent, context-aware, and insightful responses, making it a powerful tool for advanced information retrieval and analysis tasks. Feel free to reach out to us at founders@sciphi.ai if you have any questions or need further assistance. + + +# Advanced GraphRAG Techniques + +If you want to learn more about the advanced techniques that we use in GraphRAG, please refer to the [Advanced GraphRAG Techniques](./advanced-graphrag.mdx) page. diff --git a/docs/cookbooks/maintenance.mdx b/docs/cookbooks/maintenance.mdx new file mode 100644 index 000000000..640d667f9 --- /dev/null +++ b/docs/cookbooks/maintenance.mdx @@ -0,0 +1,185 @@ +--- +title: 'Maintenance & Scaling' +description: 'Learn how to maintain and scale your R2R system' +icon: 'paint-roller' +--- + +## Introduction + +This guide covers essential maintenance tasks for R2R deployments, with a focus on vector index management and system updates. Understanding when and how to build vector indices, as well as keeping your R2R installation current, is crucial for maintaining optimal performance at scale. + +## Vector Indices + +### Why Vector Indices Matter + +Vector indices are essential for efficient similarity search across documents. Without an index, every search would require comparing against every vector in your database - a process that becomes increasingly expensive as your dataset grows. + +Based on benchmarks from similar systems (pgvector), vector indices can provide significant performance improvements: +- Queries can be 10-100x faster with proper indexing +- High-dimensional vectors (1536d) benefit even more from indexing than lower-dimensional ones +- Index performance becomes critical at scale (>100K documents) + +### When to Build Vector Indices + +Consider building vector indices when: +- Your document collection exceeds 100K documents +- Query latency exceeds acceptable thresholds +- You're using high-dimensional vectors (e.g., 1536d from large language models) +- You need to support concurrent queries + +### Vector Index Creation + +R2R supports multiple indexing methods, with HNSW (Hierarchical Navigable Small World) being recommended for most use cases: + +```python +create_response = client.create_vector_index( + table_name="vectors", + index_method="hnsw", + index_measure="cosine_distance", + index_arguments={ + "m": 16, # Number of connections per element + "ef_construction": 64 # Size of dynamic candidate list + }, + concurrently=True +) +``` + +#### Important Considerations + +1. **Resource Usage** + - Index creation is CPU and memory intensive + - Memory usage scales with both dataset size and `m` parameter + - Consider creating indices during off-peak hours + +2. **Performance Tuning** + - HNSW Parameters: + - `m`: 16-64 (higher = better quality, more memory) + - `ef_construction`: 64-100 (higher = better quality, longer build time) + - Distance Measures: + - `cosine_distance`: Best for normalized vectors (most common) + - `l2_distance`: Better for absolute distances + - `max_inner_product`: Optimized for dot product similarity + +3. **Index Warming** + - New indices require warming for optimal performance + - Initial queries may be slower until index is loaded into memory + - Consider implementing explicit pre-warming in production + +### Managing Vector Indices + +List existing indices: +```bash +r2r list-vector-indices +``` + +Delete an index: +```bash +r2r delete-vector-index +``` + +For detailed information about vector index management, see the [Ingestion documentation](/documentation/cli/ingestion). + +## System Updates and Maintenance + +### Version Management + +Check your current R2R version: +```bash +r2r version +``` + +### Update Process + +1. **Prepare for Update** + ```bash + # Check current versions + r2r version + r2r db current + + # Generate system report (optional) + r2r generate-report + ``` + +2. **Stop Running Services** + ```bash + r2r docker-down + ``` + +3. **Update R2R** + ```bash + r2r update + ``` + +4. **Update Database** + ```bash + r2r db upgrade + ``` + +5. **Restart Services** + ```bash + r2r serve --docker [additional options] + ``` + +### Database Migration Management + +R2R uses database migrations to manage schema changes. Always check and update your database schema after updates: + +Check current migration: +```bash +r2r db current +``` + +Apply migrations: +```bash +r2r db upgrade +``` + +### Managing Multiple Environments + +Use different project names and schemas for different environments: + +```bash +# Development +export R2R_PROJECT_NAME=r2r_dev +r2r serve --docker --project-name r2r-dev + +# Staging +export R2R_PROJECT_NAME=r2r_staging +r2r serve --docker --project-name r2r-staging + +# Production +export R2R_PROJECT_NAME=r2r_prod +r2r serve --docker --project-name r2r-prod +``` + +## Troubleshooting + +If issues occur: + +1. Generate a system report: + ```bash + r2r generate-report + ``` + +2. Check container health: + ```bash + r2r docker-down + r2r serve --docker + ``` + +3. Review database state: + ```bash + r2r db current + r2r db history + ``` + +4. Roll back if needed: + ```bash + r2r db downgrade --revision + ``` + +## Additional Resources + +- [Python SDK Ingestion Documentation](/documentation/python-sdk/ingestion) +- [CLI Maintenance Documentation](/documentation/cli/maintenance) +- [Ingestion Configuration Documentation](/documentation/configuration/ingestion/overview) diff --git a/docs/documentation/cli/graph.mdx b/docs/documentation/cli/graph.mdx index 52bfbe21c..36aef861d 100644 --- a/docs/documentation/cli/graph.mdx +++ b/docs/documentation/cli/graph.mdx @@ -32,6 +32,16 @@ r2r create-graph --collection-id my-collection --run --kg-creation-settings '{"k +### Deduplicate Entities + +Deduplicate entities in a collection using the `deduplicate-entities` command: + +````bash +r2r deduplicate-entities --collection-id my-collection --run --deduplication-settings '{"key": "value"}' +```` + + + ### Enrich Graph Enrich an existing knowledge graph using the `enrich-graph` command: @@ -120,3 +130,26 @@ r2r get-triples --collection-id my-collection --offset 0 --limit 10 --triple-ids ```` + + +### Delete Graph + +Delete the graph for a collection using the `delete-graph` command: + +````bash +r2r delete-graph --collection-id my-collection --cascade +```` + + + + + Collection ID to delete the graph for. + + + + Whether to cascade the deletion. + + NOTE: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection. + + + diff --git a/docs/documentation/cli/ingestion.mdx b/docs/documentation/cli/ingestion.mdx index 942cffb7f..8adadf704 100644 --- a/docs/documentation/cli/ingestion.mdx +++ b/docs/documentation/cli/ingestion.mdx @@ -1,6 +1,6 @@ --- title: 'Ingestion' -description: 'Ingesting files with the R2R CLI.' +description: 'Ingesting files and managing vector indices with the R2R CLI.' --- ## Document Ingestion and Management @@ -73,9 +73,138 @@ r2r update-files path/to/file1_v2.txt \ +## Vector Index Management +## Vector Index Management + +### Create Vector Index + +Create a new vector index for similarity search using the `create-vector-index` command: + +```bash +r2r create-vector-index \ + --table-name vectors \ + --index-method hnsw \ + --index-measure cosine_distance \ + --index-arguments '{"m": 16, "ef_construction": 64}' +``` + + + + + Table to create index on. Options: vectors, entities_document, entities_collection, communities. Default: vectors + + + + Indexing method to use. Options: hnsw, ivfflat, auto. Default: hnsw + + + + Distance measure for vector comparisons. Options: cosine_distance, l2_distance, max_inner_product. Default: cosine_distance + + + + Configuration parameters as JSON string. For HNSW: `{"m": int, "ef_construction": int}`. For IVFFlat: `{"n_lists": int}` + + + + Optional custom name for the index. If not provided, one will be auto-generated + + + + Disable concurrent index creation. Default: False + + + + +#### Important Considerations + +Vector index creation requires careful planning and consideration of your data and performance requirements. Keep in mind: + +**Resource Intensive Process** +- Index creation can be CPU and memory intensive, especially for large datasets +- For HNSW indexes, memory usage scales with both dataset size and `m` parameter +- Consider creating indexes during off-peak hours for production systems + +**Performance Tuning** +1. **HNSW Parameters:** + - `m`: Higher values (16-64) improve search quality but increase memory usage and build time + - `ef_construction`: Higher values increase build time and quality but have diminishing returns past 100 + - Recommended starting point: `m=16`, `ef_construction=64` + +```bash +# Example balanced configuration +r2r create-vector-index \ + --table-name vectors \ + --index-method hnsw \ + --index-measure cosine_distance \ + --index-arguments '{"m": 16, "ef_construction": 64}' +``` + +**Pre-warming Required** +- **Important:** Newly created indexes require pre-warming to achieve optimal performance +- Initial queries may be slower until the index is loaded into memory +- The first several queries will automatically warm the index +- For production systems, consider implementing explicit pre-warming by running representative queries after index creation +- Without pre-warming, you may not see the expected performance improvements + +**Best Practices** +1. Always use concurrent index creation (avoid `--no-concurrent`) in production to prevent blocking other operations +2. Monitor system resources during index creation +3. Test index performance with representative queries before deploying +4. Consider creating indexes on smaller test datasets first to validate parameters +5. Implement index pre-warming strategy before handling production traffic + +**Distance Measures** +Choose the appropriate measure based on your use case: +- `cosine_distance`: Best for normalized vectors (most common) +- `l2_distance`: Better for absolute distances +- `max_inner_product`: Optimized for dot product similarity + +### List Vector Indices + +List existing vector indices using the `list-vector-indices` command: + +```bash +r2r list-vector-indices --table-name vectors +``` + + + + + Table to list indices from. Options: vectors, entities_document, entities_collection, communities. Default: vectors + + + + +### Delete Vector Index + +Delete a vector index using the `delete-vector-index` command: + +```bash +r2r delete-vector-index my-index-name --table-name vectors +``` + + + + + Name of the index to delete + + + + Table containing the index. Options: vectors, entities_document, entities_collection, communities. Default: vectors + + + + Disable concurrent index deletion. Default: False + + + + +## Sample File Management + ### Ingest Sample Files -Ingest one or more sample files from the R2R GitHub repository using the `ingest-sample-file` or `ingest-sample-files` commands: +Ingest one or more sample files from the R2R GitHub repository: ```bash # Ingest a single sample file @@ -92,7 +221,7 @@ These commands have no additional arguments. The `--v2` flag for `ingest-sample- ### Ingest Local Sample Files -Ingest the local sample files in the `core/examples/data_unstructured` directory using the `ingest-sample-files-from-unstructured` command: +Ingest the local sample files in the `core/examples/data_unstructured` directory: ```bash r2r ingest-sample-files-from-unstructured diff --git a/docs/documentation/cli/introduction.mdx b/docs/documentation/cli/introduction.mdx index c11822c4e..348bd68c9 100644 --- a/docs/documentation/cli/introduction.mdx +++ b/docs/documentation/cli/introduction.mdx @@ -58,5 +58,5 @@ For more detailed information on specific functionalities of the R2R CLI, please - [Document Ingestion](/documentation/cli/ingestion): Learn how to add, retrieve, and manage documents using the CLI. - [Search & RAG](/documentation/cli/retrieval): Explore various querying techniques and Retrieval-Augmented Generation capabilities. -- [Knowledge Graphs](/documentation/cli/graphrag): Learn how to create and enrich knowledge graphs, and perform GraphRAG. +- [Knowledge Graphs](/documentation/cli/graph): Learn how to create and enrich knowledge graphs, and perform GraphRAG. - [Server Management](/documentation/cli/server): Manage your R2R server, including health checks, logs, and updates. diff --git a/docs/documentation/cli/maintenance.mdx b/docs/documentation/cli/maintenance.mdx new file mode 100644 index 000000000..b393fbae1 --- /dev/null +++ b/docs/documentation/cli/maintenance.mdx @@ -0,0 +1,220 @@ +--- +title: 'Maintenance' +description: 'Managing R2R versions, updates, and database migrations across environments.' +--- + +## Deployment Management + +R2R deployments consist of three main components that need to be managed: +1. The R2R Python package +2. The Docker images +3. The database schema + +### Version Management + +Check your current R2R version: + +```bash +r2r version +``` + +### Update R2R + +Update your R2R installation to the latest version: + +```bash +r2r update +``` + +This command will: +- Upgrade the R2R package to the latest version using pip +- Display the update progress and confirmation +- Show any errors if they occur during the update process + + +When you update R2R, the Docker image used by `r2r serve` will automatically be updated to match the new version. The system will attempt to use a version-specific image (e.g., `ragtoriches/prod:1.2.3`) or fall back to `latest` if the specific version isn't available. + + +### Database Management + +R2R uses database migrations to manage schema changes across versions. After updating R2R, you should always check and update your database schema: + +### Check Current Migration + +View the current migration state of your database: + +```bash +r2r db current +``` + + + + + Schema name to check. Defaults to R2R_PROJECT_NAME environment variable. + + + + +### Apply Migrations + +Upgrade your database to the latest version: + +```bash +r2r db upgrade +``` + + + + + Schema name to upgrade. Defaults to R2R_PROJECT_NAME environment variable. + + + Specific revision to upgrade to. Defaults to "head" (latest version). + + + + +## Deployment Process + +Here's the recommended process for updating an R2R deployment: + +1. **Prepare for Update** + ```bash + # Check current versions + r2r version + r2r db current + + # Generate system report (optional) + r2r generate-report + ``` + +2. **Stop Running Services** + ```bash + # Bring down existing deployment + r2r docker-down + ``` + +3. **Update R2R** + ```bash + r2r update + ``` + +4. **Update Database** + ```bash + # Check and apply any new migrations + r2r db upgrade + ``` + +5. **Restart Services** + ```bash + # Start the server with your configuration + r2r serve --docker [additional options] + ``` + + + + + Host to run the server on. Default is "0.0.0.0". + + + Port to run the server on. Default comes from R2R_PORT or PORT env var, or 7272. + + + Run using Docker (recommended for production). + + + Run the full R2R compose with Hatchet and Unstructured. + + + Project name for Docker deployment. + + + Specific Docker image to use (optional). + + + Exclude creating a Postgres container. + + + + + + PostgreSQL host address. Default is "localhost". + + + PostgreSQL port. Default is "5432". + + + PostgreSQL database name. Default is "postgres". + + + PostgreSQL username. Default is "postgres". + + + Project name used for schema. Default is "r2r_default". + + + + +## Managing Multiple Environments + +For different environments (development, staging, production), use different project names and schemas: + +```bash +# Development +export R2R_PROJECT_NAME=r2r_dev +r2r serve --docker --project-name r2r-dev + +# Staging +export R2R_PROJECT_NAME=r2r_staging +r2r serve --docker --project-name r2r-staging + +# Production +export R2R_PROJECT_NAME=r2r_prod +r2r serve --docker --project-name r2r-prod +``` + +## Vector Index Management + +R2R uses vector indices to enable efficient similarity search across documents. For detailed information about managing vector indices, including creation, listing, and deletion, see the [Ingestion documentation](/documentation/cli/ingestion). + +Key vector index management commands: +```bash +# Create a new vector index +r2r create-vector-index + +# List existing indices +r2r list-vector-indices + +# Delete an index +r2r delete-vector-index +``` + + +## Troubleshooting + +If issues occur during deployment: + +1. Generate a system report: + ```bash + r2r generate-report + ``` + +2. Check container health: + ```bash + # Bring down existing deployment + r2r docker-down + + # Start fresh and watch for health checks + r2r serve --docker + ``` + +3. Review the database state: + ```bash + r2r db current + r2r db history + ``` + +4. If needed, roll back database changes: + ```bash + r2r db downgrade --revision + ``` diff --git a/docs/documentation/cli/retrieval.mdx b/docs/documentation/cli/retrieval.mdx new file mode 100644 index 000000000..d551d3239 --- /dev/null +++ b/docs/documentation/cli/retrieval.mdx @@ -0,0 +1,137 @@ +--- +title: 'Retrieval' +description: 'Search and RAG capabilities using the R2R CLI.' +--- + +# Retrieval Operations + +The R2R CLI provides two main retrieval commands: `search` and `rag` (Retrieval-Augmented Generation). These commands allow you to query your document collection and generate AI-powered responses based on the retrieved content. + +## Search Command + +The `search` command performs document retrieval using vector search and/or knowledge graph search capabilities. + +```bash +r2r search --query "Your search query" +``` + +### Vector Search Options + +- `--use-vector-search`: Enable vector search (default: true) +- `--filters`: Apply JSON filters to the search results + ```bash + r2r search --filters '{"document_id":{"$in":["doc-id-1", "doc-id-2"]}}' + ``` +- `--search-limit`: Maximum number of search results to return +- `--use-hybrid-search`: Enable hybrid search combining vector and keyword search +- `--selected-collection-ids`: Specify collection IDs to search within as JSON array +- `--search-strategy`: Choose between "vanilla" search or advanced methods like query fusion or HyDE + +### Knowledge Graph Search Options + +- `--use-kg-search`: Enable knowledge graph search +- `--kg-search-type`: Choose between "local" or "global" search +- `--kg-search-level`: Specify the level for global KG search +- `--entity-types`: Filter by entity types (as JSON) +- `--relationships`: Filter by relationship types (as JSON) +- `--max-community-description-length`: Set maximum length for community descriptions +- `--local-search-limits`: Set limits for local search (as JSON) + +## RAG Command + +The `rag` command combines search capabilities with AI generation to provide contextual responses based on your document collection. + +```bash +r2r rag --query "Your question" +``` + +### Generation Options + +- `--stream`: Stream the response in real-time +- `--rag-model`: Specify the model to use for generation + +### Vector Search Settings + +- `--use-vector-search`: Enable vector search (default: true) +- `--filters`: Apply JSON filters to search results +- `--search-limit`: Maximum number of search results (default: 10) +- `--use-hybrid-search`: Enable hybrid search +- `--selected-collection-ids`: Specify collection IDs to search within +- `--search-strategy`: Choose search method (default: "vanilla") + +### Knowledge Graph Settings + +- `--use-kg-search`: Enable knowledge graph search +- `--kg-search-type`: Set to "local" or "global" (default: "local") +- `--kg-search-level`: Specify cluster level for Global KG search +- `--kg-search-model`: Choose the model for KG agent +- `--entity-types`: Filter by entity types (as JSON) +- `--relationships`: Filter by relationship types (as JSON) +- `--max-community-description-length`: Set maximum community description length +- `--local-search-limits`: Set limits for local search (as JSON) + +## Examples + +### Basic Search + +```bash +# Simple vector search +r2r search --query "What is quantum computing?" + +# Search with filters +r2r search --query "quantum computing" --filters '{"category": "physics"}' +``` + +### Advanced Search + +```bash +# Hybrid search with collection filtering +r2r search --query "quantum computing" \ + --use-hybrid-search \ + --selected-collection-ids '["physics-collection", "computing-collection"]' + +# Knowledge graph search +r2r search --query "quantum computing relationships" \ + --use-kg-search \ + --kg-search-type "local" \ + --entity-types '["Concept", "Technology"]' +``` + +### Basic RAG + +```bash +# Simple RAG query +r2r rag --query "Explain quantum computing" + +# Streaming RAG response +r2r rag --query "Explain quantum computing" --stream +``` + +### Advanced RAG + +```bash +# RAG with custom model and hybrid search +r2r rag --query "Explain quantum computing" \ + --rag-model "gpt-4" \ + --use-hybrid-search \ + --search-limit 20 + +# RAG with knowledge graph integration +r2r rag --query "How do quantum computers relate to cryptography?" \ + --use-kg-search \ + --kg-search-type "global" \ + --relationships '["ENABLES", "IMPACTS"]' \ + --stream +``` + +## Tips for Effective Retrieval + +1. **Refine Your Queries**: Be specific and clear in your search queries to get more relevant results. + +2. **Use Filters**: Narrow down results using filters when you know specific document characteristics. + +3. **Combine Search Types**: Use hybrid search and knowledge graph capabilities together for more comprehensive results. + +4. **Adjust Search Limits**: Modify the search limit based on your needs - higher limits for broad topics, lower limits for specific queries. + +5. **Stream Long Responses**: Use the `--stream` option with RAG for better user experience with longer generations. diff --git a/docs/documentation/python-sdk/graphrag.mdx b/docs/documentation/python-sdk/graphrag.mdx index 4ed1cb125..feadb5b95 100644 --- a/docs/documentation/python-sdk/graphrag.mdx +++ b/docs/documentation/python-sdk/graphrag.mdx @@ -425,6 +425,81 @@ client.get_communities( The list of community numbers to filter by. +## Delete Graph + +Delete the graph for a collection using the `delete_graph_for_collection` method. + +```python +client.delete_graph_for_collection( + collection_id='122fdf6a-e116-546b-a8f6-e4cb2e2c0a09', + cascade=False +) +``` + + + The ID of the collection to delete the graph for. + + + + Whether to cascade the deletion. + + NOTE: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection. + + +## Get Tuned Prompt + +```python +client.get_tuned_prompt( + prompt_name="graphrag_entity_description", + collection_id='122fdf6a-e116-546b-a8f6-e4cb2e2c0a09', + documents_offset=0, + documents_limit=100, + chunk_offset=0, + chunk_limit=100 +) +``` + + + + + The response containing the tuned prompt for GraphRAG. + ```bash + { + "results": { + "tuned_prompt": "string" + } + } + ``` + + + + + + The name of the prompt to tune. Valid values include "graphrag_entity_description", "graphrag_triples_extraction_few_shot", and "graphrag_community_reports". + + + + The ID of the collection to tune the prompt for. If not provided, the default collection will be used. + + + + The offset for pagination of documents. Defaults to 0. + + + + The limit for pagination of documents. Defaults to 100. Controls how many documents are used for tuning. + + + + The offset for pagination of chunks within each document. Defaults to 0. + + + + The limit for pagination of chunks within each document. Defaults to 100. Controls how many chunks per document are used for tuning. + + +The tuning process provides an LLM with chunks from each document in the collection. The relative sample size can therefore be controlled by adjusting the document and chunk limits. + ## Search and RAG diff --git a/docs/documentation/python-sdk/ingestion.mdx b/docs/documentation/python-sdk/ingestion.mdx index c27fdbce0..56504846f 100644 --- a/docs/documentation/python-sdk/ingestion.mdx +++ b/docs/documentation/python-sdk/ingestion.mdx @@ -568,3 +568,191 @@ delete_response = client.delete( A list of logical filters to perform over input documents fields which identifies the unique set of documents to delete (e.g., `{"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}}`). Logical operations might include variables such as `"user_id"` or `"title"` and filters like `neq`, `gte`, etc. + + +## Vector Index Management + +### Create Vector Index + +Create a vector index for similarity search: + +Vector index creation can be a taxing procedure on large datasets. + +```python +create_response = client.create_vector_index( + table_name="vectors", + index_method="hnsw", + index_measure="cosine_distance", + index_arguments={"m": 16, "ef_construction": 64}, + concurrently=True +) +``` + + + + + The response from the R2R system after creating the vector index. + ```bash + { + 'message': 'Vector index creation task queued successfully.', + 'task_id': '7d38dfca-606d-422d-b73f-2d9e138661b5' + } + ``` + + + + + + The table to create the index on. Options: vectors, entities_document, entities_collection, communities. Default: vectors + + + + The indexing method to use. Options: hnsw, ivfflat, auto. Default: hnsw + + + + Distance measure for vector comparisons. Options: cosine_distance, l2_distance, max_inner_product. Default: cosine_distance + + + + Configuration parameters for the chosen index method. + + +
    +
  • m (int): Number of connections per element
  • +
  • ef_construction (int): Size of the dynamic candidate list for construction
  • +
+
+ +
    +
  • n_lists (int): Number of clusters/inverted lists
  • +
+
+
+
+ + + Custom name for the index. If not provided, one will be auto-generated + + + + Whether to create the index concurrently. Default: True + + +#### Important Considerations + +Vector index creation requires careful planning and consideration of your data and performance requirements. Keep in mind: + +**Resource Intensive Process** +- Index creation can be CPU and memory intensive, especially for large datasets +- For HNSW indexes, memory usage scales with both dataset size and `m` parameter +- Consider creating indexes during off-peak hours for production systems + +**Performance Tuning** +1. **HNSW Parameters:** + - `m`: Higher values (16-64) improve search quality but increase memory usage and build time + - `ef_construction`: Higher values increase build time and quality but have diminishing returns past 100 + - Recommended starting point: `m=16`, `ef_construction=64` + +```python +# Example balanced configuration +client.create_vector_index( + table_name="vectors", + index_method="hnsw", + index_measure="cosine_distance", + index_arguments={ + "m": 16, # Moderate connectivity + "ef_construction": 64 # Balanced build time/quality + }, + concurrently=True +) +``` +**Pre-warming Required** +- **Important:** Newly created indexes require pre-warming to achieve optimal performance +- Initial queries may be slower until the index is loaded into memory +- The first several queries will automatically warm the index +- For production systems, consider implementing explicit pre-warming by running representative queries after index creation +- Without pre-warming, you may not see the expected performance improvements + +**Best Practices** +1. Always use `concurrently=True` in production to avoid blocking other operations +2. Monitor system resources during index creation +3. Test index performance with representative queries before deploying +4. Consider creating indexes on smaller test datasets first to validate parameters + +**Distance Measures** +Choose the appropriate measure based on your use case: +- `cosine_distance`: Best for normalized vectors (most common) +- `l2_distance`: Better for absolute distances +- `max_inner_product`: Optimized for dot product similarity + +### List Vector Indices + +List existing vector indices for a table: + +```python +indices = client.list_vector_indices(table_name="vectors") +``` + + + + + The response containing the list of indices. + ```bash + { + 'indices': [ + { + 'name': 'ix_vector_cosine_ops_hnsw__20241021211541', + 'table': 'vectors', + 'method': 'hnsw', + 'measure': 'cosine_distance' + }, + ... + ] + } + ``` + + + + + + The table to list indices from. Options: vectors, entities_document, entities_collection, communities. Default: vectors + + +### Delete Vector Index + +Delete a vector index from a table: + +```python +delete_response = client.delete_vector_index( + index_name="ix_vector_cosine_ops_hnsw__20241021211541", + table_name="vectors", + concurrently=True +) +``` + + + + + The response from the R2R system after deleting the vector index. + ```bash + { + 'message': 'Vector index deletion task queued successfully.', + 'task_id': '8e49efca-606d-422d-b73f-2d9e138661b6' + } + ``` + + + + + + Name of the index to delete + + + + The table containing the index. Options: vectors, entities_document, entities_collection, communities. Default: vectors + + + + Whether to delete the index concurrently. Default: True + diff --git a/docs/mint.json b/docs/mint.json index 268047402..fc7c913d8 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -136,8 +136,10 @@ "documentation/cli/introduction", "documentation/cli/server", "documentation/cli/ingestion", + "documentation/cli/retrieval", "documentation/cli/graph", - "documentation/cli/management" + "documentation/cli/management", + "documentation/cli/maintenance" ] }, @@ -310,17 +312,21 @@ "api-reference/endpoint/ingest_files", "api-reference/endpoint/ingest_chunks", "api-reference/endpoint/update_files", - "api-reference/endpoint/create_vector_index" + "api-reference/endpoint/create_vector_index", + "api-reference/endpoint/delete_vector_index", + "api-reference/endpoint/list_vector_index" ] }, { "group": "Knowledge Graph", "pages": [ "api-reference/endpoint/create_graph", + "api-reference/endpoint/deduplicate_entities", "api-reference/endpoint/enrich_graph", "api-reference/endpoint/entities", "api-reference/endpoint/triples", - "api-reference/endpoint/communities" + "api-reference/endpoint/communities", + "api-reference/endpoint/delete_entities_and_triples" ] }, { @@ -404,11 +410,14 @@ "pages": [ "cookbooks/walkthrough", "cookbooks/ingestion", + "cookbooks/contextual-enrichment", "cookbooks/hybrid-search", - "cookbooks/graphrag", "cookbooks/advanced-rag", + "cookbooks/graphrag", + "cookbooks/advanced-graphrag", "cookbooks/agent", "cookbooks/orchestration", + "cookbooks/maintenance", "cookbooks/web-dev" ] }, diff --git a/js/sdk/__tests__/r2rClientIntegrationSuperUser.test.ts b/js/sdk/__tests__/r2rClientIntegrationSuperUser.test.ts index f5f05f1ae..8fa29bd76 100644 --- a/js/sdk/__tests__/r2rClientIntegrationSuperUser.test.ts +++ b/js/sdk/__tests__/r2rClientIntegrationSuperUser.test.ts @@ -181,16 +181,23 @@ describe("r2rClient Integration Tests", () => { { role: "user", content: "Tell me about Raskolnikov." }, ]; - const stream = await client.agent(messages, undefined, undefined, { - stream: true, - }); + const stream = await client.agent(messages, { stream: true }); expect(stream).toBeDefined(); let fullResponse = ""; - for await (const chunk of stream) { - fullResponse += chunk; + if (stream && stream.getReader) { + const reader = stream.getReader(); + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + fullResponse += new TextDecoder().decode(value); + } + } else { + throw new Error("Stream is not a ReadableStream"); } expect(fullResponse.length).toBeGreaterThan(0); diff --git a/js/sdk/package-lock.json b/js/sdk/package-lock.json index a2889f7d3..a4ea4d7dd 100644 --- a/js/sdk/package-lock.json +++ b/js/sdk/package-lock.json @@ -1,6 +1,6 @@ { "name": "r2r-js", - "version": "0.3.8", + "version": "0.3.10", "lockfileVersion": 3, "requires": true, "packages": { diff --git a/js/sdk/package.json b/js/sdk/package.json index 2aa7432a3..2178fba3e 100644 --- a/js/sdk/package.json +++ b/js/sdk/package.json @@ -1,6 +1,6 @@ { "name": "r2r-js", - "version": "0.3.9", + "version": "0.3.10", "description": "", "main": "dist/index.js", "browser": "dist/index.browser.js", diff --git a/js/sdk/pnpm-lock.yaml b/js/sdk/pnpm-lock.yaml index f5627829b..11ab4030e 100644 --- a/js/sdk/pnpm-lock.yaml +++ b/js/sdk/pnpm-lock.yaml @@ -1,14 +1,14 @@ -lockfileVersion: '6.0' +lockfileVersion: "6.0" settings: autoInstallPeers: true excludeLinksFromLockfile: false dependencies: - '@jest/globals': + "@jest/globals": specifier: ^29.7.0 version: 29.7.0 - '@rrweb/types': + "@rrweb/types": specifier: 2.0.0-alpha.17 version: 2.0.0-alpha.17 axios: @@ -31,13 +31,13 @@ dependencies: version: 10.0.0 devDependencies: - '@types/jest': + "@types/jest": specifier: ^29.5.13 version: 29.5.13 - '@types/node': + "@types/node": specifier: ^20.16.12 version: 20.16.14 - '@types/uuid': + "@types/uuid": specifier: ^10.0.0 version: 10.0.0 jest: @@ -57,39 +57,50 @@ devDependencies: version: 5.6.3 packages: - /@ampproject/remapping@2.3.0: - resolution: {integrity: sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==} - engines: {node: '>=6.0.0'} + resolution: + { + integrity: sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==, + } + engines: { node: ">=6.0.0" } dependencies: - '@jridgewell/gen-mapping': 0.3.5 - '@jridgewell/trace-mapping': 0.3.25 + "@jridgewell/gen-mapping": 0.3.5 + "@jridgewell/trace-mapping": 0.3.25 /@babel/code-frame@7.25.9: - resolution: {integrity: sha512-z88xeGxnzehn2sqZ8UdGQEvYErF1odv2CftxInpSYJt6uHuPe9YjahKZITGs3l5LeI9d2ROG+obuDAoSlqbNfQ==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-z88xeGxnzehn2sqZ8UdGQEvYErF1odv2CftxInpSYJt6uHuPe9YjahKZITGs3l5LeI9d2ROG+obuDAoSlqbNfQ==, + } + engines: { node: ">=6.9.0" } dependencies: - '@babel/highlight': 7.25.9 + "@babel/highlight": 7.25.9 picocolors: 1.1.1 /@babel/compat-data@7.25.9: - resolution: {integrity: sha512-yD+hEuJ/+wAJ4Ox2/rpNv5HIuPG82x3ZlQvYVn8iYCprdxzE7P1udpGF1jyjQVBU4dgznN+k2h103vxZ7NdPyw==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-yD+hEuJ/+wAJ4Ox2/rpNv5HIuPG82x3ZlQvYVn8iYCprdxzE7P1udpGF1jyjQVBU4dgznN+k2h103vxZ7NdPyw==, + } + engines: { node: ">=6.9.0" } /@babel/core@7.25.9: - resolution: {integrity: sha512-WYvQviPw+Qyib0v92AwNIrdLISTp7RfDkM7bPqBvpbnhY4wq8HvHBZREVdYDXk98C8BkOIVnHAY3yvj7AVISxQ==} - engines: {node: '>=6.9.0'} - dependencies: - '@ampproject/remapping': 2.3.0 - '@babel/code-frame': 7.25.9 - '@babel/generator': 7.25.9 - '@babel/helper-compilation-targets': 7.25.9 - '@babel/helper-module-transforms': 7.25.9(@babel/core@7.25.9) - '@babel/helpers': 7.25.9 - '@babel/parser': 7.25.9 - '@babel/template': 7.25.9 - '@babel/traverse': 7.25.9 - '@babel/types': 7.25.9 + resolution: + { + integrity: sha512-WYvQviPw+Qyib0v92AwNIrdLISTp7RfDkM7bPqBvpbnhY4wq8HvHBZREVdYDXk98C8BkOIVnHAY3yvj7AVISxQ==, + } + engines: { node: ">=6.9.0" } + dependencies: + "@ampproject/remapping": 2.3.0 + "@babel/code-frame": 7.25.9 + "@babel/generator": 7.25.9 + "@babel/helper-compilation-targets": 7.25.9 + "@babel/helper-module-transforms": 7.25.9(@babel/core@7.25.9) + "@babel/helpers": 7.25.9 + "@babel/parser": 7.25.9 + "@babel/template": 7.25.9 + "@babel/traverse": 7.25.9 + "@babel/types": 7.25.9 convert-source-map: 2.0.0 debug: 4.3.7 gensync: 1.0.0-beta.2 @@ -99,280 +110,385 @@ packages: - supports-color /@babel/generator@7.25.9: - resolution: {integrity: sha512-omlUGkr5EaoIJrhLf9CJ0TvjBRpd9+AXRG//0GEQ9THSo8wPiTlbpy1/Ow8ZTrbXpjd9FHXfbFQx32I04ht0FA==} - engines: {node: '>=6.9.0'} - dependencies: - '@babel/types': 7.25.9 - '@jridgewell/gen-mapping': 0.3.5 - '@jridgewell/trace-mapping': 0.3.25 + resolution: + { + integrity: sha512-omlUGkr5EaoIJrhLf9CJ0TvjBRpd9+AXRG//0GEQ9THSo8wPiTlbpy1/Ow8ZTrbXpjd9FHXfbFQx32I04ht0FA==, + } + engines: { node: ">=6.9.0" } + dependencies: + "@babel/types": 7.25.9 + "@jridgewell/gen-mapping": 0.3.5 + "@jridgewell/trace-mapping": 0.3.25 jsesc: 3.0.2 /@babel/helper-compilation-targets@7.25.9: - resolution: {integrity: sha512-j9Db8Suy6yV/VHa4qzrj9yZfZxhLWQdVnRlXxmKLYlhWUVB1sB2G5sxuWYXk/whHD9iW76PmNzxZ4UCnTQTVEQ==} - engines: {node: '>=6.9.0'} - dependencies: - '@babel/compat-data': 7.25.9 - '@babel/helper-validator-option': 7.25.9 + resolution: + { + integrity: sha512-j9Db8Suy6yV/VHa4qzrj9yZfZxhLWQdVnRlXxmKLYlhWUVB1sB2G5sxuWYXk/whHD9iW76PmNzxZ4UCnTQTVEQ==, + } + engines: { node: ">=6.9.0" } + dependencies: + "@babel/compat-data": 7.25.9 + "@babel/helper-validator-option": 7.25.9 browserslist: 4.24.2 lru-cache: 5.1.1 semver: 6.3.1 /@babel/helper-module-imports@7.25.9: - resolution: {integrity: sha512-tnUA4RsrmflIM6W6RFTLFSXITtl0wKjgpnLgXyowocVPrbYrLUXSBXDgTs8BlbmIzIdlBySRQjINYs2BAkiLtw==} - engines: {node: '>=6.9.0'} - dependencies: - '@babel/traverse': 7.25.9 - '@babel/types': 7.25.9 + resolution: + { + integrity: sha512-tnUA4RsrmflIM6W6RFTLFSXITtl0wKjgpnLgXyowocVPrbYrLUXSBXDgTs8BlbmIzIdlBySRQjINYs2BAkiLtw==, + } + engines: { node: ">=6.9.0" } + dependencies: + "@babel/traverse": 7.25.9 + "@babel/types": 7.25.9 transitivePeerDependencies: - supports-color /@babel/helper-module-transforms@7.25.9(@babel/core@7.25.9): - resolution: {integrity: sha512-TvLZY/F3+GvdRYFZFyxMvnsKi+4oJdgZzU3BoGN9Uc2d9C6zfNwJcKKhjqLAhK8i46mv93jsO74fDh3ih6rpHA==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-TvLZY/F3+GvdRYFZFyxMvnsKi+4oJdgZzU3BoGN9Uc2d9C6zfNwJcKKhjqLAhK8i46mv93jsO74fDh3ih6rpHA==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0 + "@babel/core": ^7.0.0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-module-imports': 7.25.9 - '@babel/helper-simple-access': 7.25.9 - '@babel/helper-validator-identifier': 7.25.9 - '@babel/traverse': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-module-imports": 7.25.9 + "@babel/helper-simple-access": 7.25.9 + "@babel/helper-validator-identifier": 7.25.9 + "@babel/traverse": 7.25.9 transitivePeerDependencies: - supports-color /@babel/helper-plugin-utils@7.25.9: - resolution: {integrity: sha512-kSMlyUVdWe25rEsRGviIgOWnoT/nfABVWlqt9N19/dIPWViAOW2s9wznP5tURbs/IDuNk4gPy3YdYRgH3uxhBw==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-kSMlyUVdWe25rEsRGviIgOWnoT/nfABVWlqt9N19/dIPWViAOW2s9wznP5tURbs/IDuNk4gPy3YdYRgH3uxhBw==, + } + engines: { node: ">=6.9.0" } /@babel/helper-simple-access@7.25.9: - resolution: {integrity: sha512-c6WHXuiaRsJTyHYLJV75t9IqsmTbItYfdj99PnzYGQZkYKvan5/2jKJ7gu31J3/BJ/A18grImSPModuyG/Eo0Q==} - engines: {node: '>=6.9.0'} - dependencies: - '@babel/traverse': 7.25.9 - '@babel/types': 7.25.9 + resolution: + { + integrity: sha512-c6WHXuiaRsJTyHYLJV75t9IqsmTbItYfdj99PnzYGQZkYKvan5/2jKJ7gu31J3/BJ/A18grImSPModuyG/Eo0Q==, + } + engines: { node: ">=6.9.0" } + dependencies: + "@babel/traverse": 7.25.9 + "@babel/types": 7.25.9 transitivePeerDependencies: - supports-color /@babel/helper-string-parser@7.25.9: - resolution: {integrity: sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==, + } + engines: { node: ">=6.9.0" } /@babel/helper-validator-identifier@7.25.9: - resolution: {integrity: sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==, + } + engines: { node: ">=6.9.0" } /@babel/helper-validator-option@7.25.9: - resolution: {integrity: sha512-e/zv1co8pp55dNdEcCynfj9X7nyUKUXoUEwfXqaZt0omVOmDe9oOTdKStH4GmAw6zxMFs50ZayuMfHDKlO7Tfw==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-e/zv1co8pp55dNdEcCynfj9X7nyUKUXoUEwfXqaZt0omVOmDe9oOTdKStH4GmAw6zxMFs50ZayuMfHDKlO7Tfw==, + } + engines: { node: ">=6.9.0" } /@babel/helpers@7.25.9: - resolution: {integrity: sha512-oKWp3+usOJSzDZOucZUAMayhPz/xVjzymyDzUN8dk0Wd3RWMlGLXi07UCQ/CgQVb8LvXx3XBajJH4XGgkt7H7g==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-oKWp3+usOJSzDZOucZUAMayhPz/xVjzymyDzUN8dk0Wd3RWMlGLXi07UCQ/CgQVb8LvXx3XBajJH4XGgkt7H7g==, + } + engines: { node: ">=6.9.0" } dependencies: - '@babel/template': 7.25.9 - '@babel/types': 7.25.9 + "@babel/template": 7.25.9 + "@babel/types": 7.25.9 /@babel/highlight@7.25.9: - resolution: {integrity: sha512-llL88JShoCsth8fF8R4SJnIn+WLvR6ccFxu1H3FlMhDontdcmZWf2HgIZ7AIqV3Xcck1idlohrN4EUBQz6klbw==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-llL88JShoCsth8fF8R4SJnIn+WLvR6ccFxu1H3FlMhDontdcmZWf2HgIZ7AIqV3Xcck1idlohrN4EUBQz6klbw==, + } + engines: { node: ">=6.9.0" } dependencies: - '@babel/helper-validator-identifier': 7.25.9 + "@babel/helper-validator-identifier": 7.25.9 chalk: 2.4.2 js-tokens: 4.0.0 picocolors: 1.1.1 /@babel/parser@7.25.9: - resolution: {integrity: sha512-aI3jjAAO1fh7vY/pBGsn1i9LDbRP43+asrRlkPuTXW5yHXtd1NgTEMudbBoDDxrf1daEEfPJqR+JBMakzrR4Dg==} - engines: {node: '>=6.0.0'} + resolution: + { + integrity: sha512-aI3jjAAO1fh7vY/pBGsn1i9LDbRP43+asrRlkPuTXW5yHXtd1NgTEMudbBoDDxrf1daEEfPJqR+JBMakzrR4Dg==, + } + engines: { node: ">=6.0.0" } hasBin: true dependencies: - '@babel/types': 7.25.9 + "@babel/types": 7.25.9 /@babel/plugin-syntax-async-generators@7.8.4(@babel/core@7.25.9): - resolution: {integrity: sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==} + resolution: + { + integrity: sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-bigint@7.8.3(@babel/core@7.25.9): - resolution: {integrity: sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==} + resolution: + { + integrity: sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-class-properties@7.12.13(@babel/core@7.25.9): - resolution: {integrity: sha512-fm4idjKla0YahUNgFNLCB0qySdsoPiZP3iQE3rky0mBUtMZ23yDJ9SJdg6dXTSDnulOVqiF3Hgr9nbXvXTQZYA==} + resolution: + { + integrity: sha512-fm4idjKla0YahUNgFNLCB0qySdsoPiZP3iQE3rky0mBUtMZ23yDJ9SJdg6dXTSDnulOVqiF3Hgr9nbXvXTQZYA==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-class-static-block@7.14.5(@babel/core@7.25.9): - resolution: {integrity: sha512-b+YyPmr6ldyNnM6sqYeMWE+bgJcJpO6yS4QD7ymxgH34GBPNDM/THBh8iunyvKIZztiwLH4CJZ0RxTk9emgpjw==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-b+YyPmr6ldyNnM6sqYeMWE+bgJcJpO6yS4QD7ymxgH34GBPNDM/THBh8iunyvKIZztiwLH4CJZ0RxTk9emgpjw==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-import-attributes@7.25.9(@babel/core@7.25.9): - resolution: {integrity: sha512-u3EN9ub8LyYvgTnrgp8gboElouayiwPdnM7x5tcnW3iSt09/lQYPwMNK40I9IUxo7QOZhAsPHCmmuO7EPdruqg==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-u3EN9ub8LyYvgTnrgp8gboElouayiwPdnM7x5tcnW3iSt09/lQYPwMNK40I9IUxo7QOZhAsPHCmmuO7EPdruqg==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-import-meta@7.10.4(@babel/core@7.25.9): - resolution: {integrity: sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==} + resolution: + { + integrity: sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-json-strings@7.8.3(@babel/core@7.25.9): - resolution: {integrity: sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==} + resolution: + { + integrity: sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-jsx@7.25.9(@babel/core@7.25.9): - resolution: {integrity: sha512-ld6oezHQMZsZfp6pWtbjaNDF2tiiCYYDqQszHt5VV437lewP9aSi2Of99CK0D0XB21k7FLgnLcmQKyKzynfeAA==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-ld6oezHQMZsZfp6pWtbjaNDF2tiiCYYDqQszHt5VV437lewP9aSi2Of99CK0D0XB21k7FLgnLcmQKyKzynfeAA==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-logical-assignment-operators@7.10.4(@babel/core@7.25.9): - resolution: {integrity: sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==} + resolution: + { + integrity: sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-nullish-coalescing-operator@7.8.3(@babel/core@7.25.9): - resolution: {integrity: sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==} + resolution: + { + integrity: sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-numeric-separator@7.10.4(@babel/core@7.25.9): - resolution: {integrity: sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==} + resolution: + { + integrity: sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-object-rest-spread@7.8.3(@babel/core@7.25.9): - resolution: {integrity: sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==} + resolution: + { + integrity: sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-optional-catch-binding@7.8.3(@babel/core@7.25.9): - resolution: {integrity: sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==} + resolution: + { + integrity: sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-optional-chaining@7.8.3(@babel/core@7.25.9): - resolution: {integrity: sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==} + resolution: + { + integrity: sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-private-property-in-object@7.14.5(@babel/core@7.25.9): - resolution: {integrity: sha512-0wVnp9dxJ72ZUJDV27ZfbSj6iHLoytYZmh3rFcxNnvsJF3ktkzLDZPy/mA17HGsaQT3/DQsWYX1f1QGWkCoVUg==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-0wVnp9dxJ72ZUJDV27ZfbSj6iHLoytYZmh3rFcxNnvsJF3ktkzLDZPy/mA17HGsaQT3/DQsWYX1f1QGWkCoVUg==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-top-level-await@7.14.5(@babel/core@7.25.9): - resolution: {integrity: sha512-hx++upLv5U1rgYfwe1xBQUhRmU41NEvpUvrp8jkrSCdvGSnM5/qdRMtylJ6PG5OFkBaHkbTAKTnd3/YyESRHFw==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-hx++upLv5U1rgYfwe1xBQUhRmU41NEvpUvrp8jkrSCdvGSnM5/qdRMtylJ6PG5OFkBaHkbTAKTnd3/YyESRHFw==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/plugin-syntax-typescript@7.25.9(@babel/core@7.25.9): - resolution: {integrity: sha512-hjMgRy5hb8uJJjUcdWunWVcoi9bGpJp8p5Ol1229PoN6aytsLwNMgmdftO23wnCLMfVmTwZDWMPNq/D1SY60JQ==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-hjMgRy5hb8uJJjUcdWunWVcoi9bGpJp8p5Ol1229PoN6aytsLwNMgmdftO23wnCLMfVmTwZDWMPNq/D1SY60JQ==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 /@babel/template@7.25.9: - resolution: {integrity: sha512-9DGttpmPvIxBb/2uwpVo3dqJ+O6RooAFOS+lB+xDqoE2PVCE8nfoHMdZLpfCQRLwvohzXISPZcgxt80xLfsuwg==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-9DGttpmPvIxBb/2uwpVo3dqJ+O6RooAFOS+lB+xDqoE2PVCE8nfoHMdZLpfCQRLwvohzXISPZcgxt80xLfsuwg==, + } + engines: { node: ">=6.9.0" } dependencies: - '@babel/code-frame': 7.25.9 - '@babel/parser': 7.25.9 - '@babel/types': 7.25.9 + "@babel/code-frame": 7.25.9 + "@babel/parser": 7.25.9 + "@babel/types": 7.25.9 /@babel/traverse@7.25.9: - resolution: {integrity: sha512-ZCuvfwOwlz/bawvAuvcj8rrithP2/N55Tzz342AkTvq4qaWbGfmCk/tKhNaV2cthijKrPAA8SRJV5WWe7IBMJw==} - engines: {node: '>=6.9.0'} - dependencies: - '@babel/code-frame': 7.25.9 - '@babel/generator': 7.25.9 - '@babel/parser': 7.25.9 - '@babel/template': 7.25.9 - '@babel/types': 7.25.9 + resolution: + { + integrity: sha512-ZCuvfwOwlz/bawvAuvcj8rrithP2/N55Tzz342AkTvq4qaWbGfmCk/tKhNaV2cthijKrPAA8SRJV5WWe7IBMJw==, + } + engines: { node: ">=6.9.0" } + dependencies: + "@babel/code-frame": 7.25.9 + "@babel/generator": 7.25.9 + "@babel/parser": 7.25.9 + "@babel/template": 7.25.9 + "@babel/types": 7.25.9 debug: 4.3.7 globals: 11.12.0 transitivePeerDependencies: - supports-color /@babel/types@7.25.9: - resolution: {integrity: sha512-OwS2CM5KocvQ/k7dFJa8i5bNGJP0hXWfVCfDkqRFP1IreH1JDC7wG6eCYCi0+McbfT8OR/kNqsI0UU0xP9H6PQ==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-OwS2CM5KocvQ/k7dFJa8i5bNGJP0hXWfVCfDkqRFP1IreH1JDC7wG6eCYCi0+McbfT8OR/kNqsI0UU0xP9H6PQ==, + } + engines: { node: ">=6.9.0" } dependencies: - '@babel/helper-string-parser': 7.25.9 - '@babel/helper-validator-identifier': 7.25.9 + "@babel/helper-string-parser": 7.25.9 + "@babel/helper-validator-identifier": 7.25.9 /@bcoe/v8-coverage@0.2.3: - resolution: {integrity: sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==} + resolution: + { + integrity: sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==, + } dev: true /@cspotcode/source-map-support@0.8.1: - resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==} - engines: {node: '>=12'} + resolution: + { + integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==, + } + engines: { node: ">=12" } dependencies: - '@jridgewell/trace-mapping': 0.3.9 + "@jridgewell/trace-mapping": 0.3.9 dev: true /@istanbuljs/load-nyc-config@1.1.0: - resolution: {integrity: sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==, + } + engines: { node: ">=8" } dependencies: camelcase: 5.3.1 find-up: 4.1.0 @@ -381,15 +497,21 @@ packages: resolve-from: 5.0.0 /@istanbuljs/schema@0.1.3: - resolution: {integrity: sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==, + } + engines: { node: ">=8" } /@jest/console@29.7.0: - resolution: {integrity: sha512-5Ni4CU7XHQi32IJ398EEP4RrB8eV09sXP2ROqD4bksHrnTree52PsxvX8tpL8LvTZ3pFzXyPbNQReSN41CAhOg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + resolution: + { + integrity: sha512-5Ni4CU7XHQi32IJ398EEP4RrB8eV09sXP2ROqD4bksHrnTree52PsxvX8tpL8LvTZ3pFzXyPbNQReSN41CAhOg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/types": 29.6.3 + "@types/node": 20.16.14 chalk: 4.1.2 jest-message-util: 29.7.0 jest-util: 29.7.0 @@ -397,20 +519,23 @@ packages: dev: true /@jest/core@29.7.0(ts-node@10.9.2): - resolution: {integrity: sha512-n7aeXWKMnGtDA48y8TLWJPJmLmmZ642Ceo78cYWEpiD7FzDgmNDV/GCVRorPABdXLJZ/9wzzgZAlHjXjxDHGsg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-n7aeXWKMnGtDA48y8TLWJPJmLmmZ642Ceo78cYWEpiD7FzDgmNDV/GCVRorPABdXLJZ/9wzzgZAlHjXjxDHGsg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } peerDependencies: node-notifier: ^8.0.1 || ^9.0.0 || ^10.0.0 peerDependenciesMeta: node-notifier: optional: true dependencies: - '@jest/console': 29.7.0 - '@jest/reporters': 29.7.0 - '@jest/test-result': 29.7.0 - '@jest/transform': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + "@jest/console": 29.7.0 + "@jest/reporters": 29.7.0 + "@jest/test-result": 29.7.0 + "@jest/transform": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 ansi-escapes: 4.3.2 chalk: 4.1.2 ci-info: 3.9.0 @@ -440,23 +565,32 @@ packages: dev: true /@jest/environment@29.7.0: - resolution: {integrity: sha512-aQIfHDq33ExsN4jP1NWGXhxgQ/wixs60gDiKO+XVMd8Mn0NWPWgc34ZQDTb2jKaUWQ7MuwoitXAsN2XVXNMpAw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/fake-timers': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + resolution: + { + integrity: sha512-aQIfHDq33ExsN4jP1NWGXhxgQ/wixs60gDiKO+XVMd8Mn0NWPWgc34ZQDTb2jKaUWQ7MuwoitXAsN2XVXNMpAw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/fake-timers": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 jest-mock: 29.7.0 /@jest/expect-utils@29.7.0: - resolution: {integrity: sha512-GlsNBWiFQFCVi9QVSx7f5AgMeLxe9YCCs5PuP2O2LdjDAA8Jh9eX7lA1Jq/xdXw3Wb3hyvlFNfZIfcRetSzYcA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-GlsNBWiFQFCVi9QVSx7f5AgMeLxe9YCCs5PuP2O2LdjDAA8Jh9eX7lA1Jq/xdXw3Wb3hyvlFNfZIfcRetSzYcA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: jest-get-type: 29.6.3 /@jest/expect@29.7.0: - resolution: {integrity: sha512-8uMeAMycttpva3P1lBHB8VciS9V0XAr3GymPpipdyQXbBcuhkLQOSe8E/p92RyAdToS6ZD1tFkX+CkhoECE0dQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-8uMeAMycttpva3P1lBHB8VciS9V0XAr3GymPpipdyQXbBcuhkLQOSe8E/p92RyAdToS6ZD1tFkX+CkhoECE0dQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: expect: 29.7.0 jest-snapshot: 29.7.0 @@ -464,43 +598,52 @@ packages: - supports-color /@jest/fake-timers@29.7.0: - resolution: {integrity: sha512-q4DH1Ha4TTFPdxLsqDXK1d3+ioSL7yL5oCMJZgDYm6i+6CygW5E5xVr/D1HdsGxjt1ZWSfUAs9OxSB/BNelWrQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/types': 29.6.3 - '@sinonjs/fake-timers': 10.3.0 - '@types/node': 20.16.14 + resolution: + { + integrity: sha512-q4DH1Ha4TTFPdxLsqDXK1d3+ioSL7yL5oCMJZgDYm6i+6CygW5E5xVr/D1HdsGxjt1ZWSfUAs9OxSB/BNelWrQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/types": 29.6.3 + "@sinonjs/fake-timers": 10.3.0 + "@types/node": 20.16.14 jest-message-util: 29.7.0 jest-mock: 29.7.0 jest-util: 29.7.0 /@jest/globals@29.7.0: - resolution: {integrity: sha512-mpiz3dutLbkW2MNFubUGUEVLkTGiqW6yLVTA+JbP6fI6J5iL9Y0Nlg8k95pcF8ctKwCS7WVxteBs29hhfAotzQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/environment': 29.7.0 - '@jest/expect': 29.7.0 - '@jest/types': 29.6.3 + resolution: + { + integrity: sha512-mpiz3dutLbkW2MNFubUGUEVLkTGiqW6yLVTA+JbP6fI6J5iL9Y0Nlg8k95pcF8ctKwCS7WVxteBs29hhfAotzQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/environment": 29.7.0 + "@jest/expect": 29.7.0 + "@jest/types": 29.6.3 jest-mock: 29.7.0 transitivePeerDependencies: - supports-color /@jest/reporters@29.7.0: - resolution: {integrity: sha512-DApq0KJbJOEzAFYjHADNNxAE3KbhxQB1y5Kplb5Waqw6zVbuWatSnMjE5gs8FUgEPmNsnZA3NCWl9NG0ia04Pg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-DApq0KJbJOEzAFYjHADNNxAE3KbhxQB1y5Kplb5Waqw6zVbuWatSnMjE5gs8FUgEPmNsnZA3NCWl9NG0ia04Pg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } peerDependencies: node-notifier: ^8.0.1 || ^9.0.0 || ^10.0.0 peerDependenciesMeta: node-notifier: optional: true dependencies: - '@bcoe/v8-coverage': 0.2.3 - '@jest/console': 29.7.0 - '@jest/test-result': 29.7.0 - '@jest/transform': 29.7.0 - '@jest/types': 29.6.3 - '@jridgewell/trace-mapping': 0.3.25 - '@types/node': 20.16.14 + "@bcoe/v8-coverage": 0.2.3 + "@jest/console": 29.7.0 + "@jest/test-result": 29.7.0 + "@jest/transform": 29.7.0 + "@jest/types": 29.6.3 + "@jridgewell/trace-mapping": 0.3.25 + "@types/node": 20.16.14 chalk: 4.1.2 collect-v8-coverage: 1.0.2 exit: 0.1.2 @@ -523,47 +666,62 @@ packages: dev: true /@jest/schemas@29.6.3: - resolution: {integrity: sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: - '@sinclair/typebox': 0.27.8 + "@sinclair/typebox": 0.27.8 /@jest/source-map@29.6.3: - resolution: {integrity: sha512-MHjT95QuipcPrpLM+8JMSzFx6eHp5Bm+4XeFDJlwsvVBjmKNiIAvasGK2fxz2WbGRlnvqehFbh07MMa7n3YJnw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-MHjT95QuipcPrpLM+8JMSzFx6eHp5Bm+4XeFDJlwsvVBjmKNiIAvasGK2fxz2WbGRlnvqehFbh07MMa7n3YJnw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: - '@jridgewell/trace-mapping': 0.3.25 + "@jridgewell/trace-mapping": 0.3.25 callsites: 3.1.0 graceful-fs: 4.2.11 dev: true /@jest/test-result@29.7.0: - resolution: {integrity: sha512-Fdx+tv6x1zlkJPcWXmMDAG2HBnaR9XPSd5aDWQVsfrZmLVT3lU1cwyxLgRmXR9yrq4NBoEm9BMsfgFzTQAbJYA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/console': 29.7.0 - '@jest/types': 29.6.3 - '@types/istanbul-lib-coverage': 2.0.6 + resolution: + { + integrity: sha512-Fdx+tv6x1zlkJPcWXmMDAG2HBnaR9XPSd5aDWQVsfrZmLVT3lU1cwyxLgRmXR9yrq4NBoEm9BMsfgFzTQAbJYA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/console": 29.7.0 + "@jest/types": 29.6.3 + "@types/istanbul-lib-coverage": 2.0.6 collect-v8-coverage: 1.0.2 dev: true /@jest/test-sequencer@29.7.0: - resolution: {integrity: sha512-GQwJ5WZVrKnOJuiYiAF52UNUJXgTZx1NHjFSEB0qEMmSZKAkdMoIzw/Cj6x6NF4AvV23AUqDpFzQkN/eYCYTxw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-GQwJ5WZVrKnOJuiYiAF52UNUJXgTZx1NHjFSEB0qEMmSZKAkdMoIzw/Cj6x6NF4AvV23AUqDpFzQkN/eYCYTxw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: - '@jest/test-result': 29.7.0 + "@jest/test-result": 29.7.0 graceful-fs: 4.2.11 jest-haste-map: 29.7.0 slash: 3.0.0 dev: true /@jest/transform@29.7.0: - resolution: {integrity: sha512-ok/BTPFzFKVMwO5eOHRrvnBVHdRy9IrsrW1GpMaQ9MCnilNLXQKmAX8s1YXDFaai9xJpac2ySzV0YeRRECr2Vw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@babel/core': 7.25.9 - '@jest/types': 29.6.3 - '@jridgewell/trace-mapping': 0.3.25 + resolution: + { + integrity: sha512-ok/BTPFzFKVMwO5eOHRrvnBVHdRy9IrsrW1GpMaQ9MCnilNLXQKmAX8s1YXDFaai9xJpac2ySzV0YeRRECr2Vw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@babel/core": 7.25.9 + "@jest/types": 29.6.3 + "@jridgewell/trace-mapping": 0.3.25 babel-plugin-istanbul: 6.1.1 chalk: 4.1.2 convert-source-map: 2.0.0 @@ -580,224 +738,350 @@ packages: - supports-color /@jest/types@29.6.3: - resolution: {integrity: sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/schemas': 29.6.3 - '@types/istanbul-lib-coverage': 2.0.6 - '@types/istanbul-reports': 3.0.4 - '@types/node': 20.16.14 - '@types/yargs': 17.0.33 + resolution: + { + integrity: sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/schemas": 29.6.3 + "@types/istanbul-lib-coverage": 2.0.6 + "@types/istanbul-reports": 3.0.4 + "@types/node": 20.16.14 + "@types/yargs": 17.0.33 chalk: 4.1.2 /@jridgewell/gen-mapping@0.3.5: - resolution: {integrity: sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==} - engines: {node: '>=6.0.0'} + resolution: + { + integrity: sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==, + } + engines: { node: ">=6.0.0" } dependencies: - '@jridgewell/set-array': 1.2.1 - '@jridgewell/sourcemap-codec': 1.5.0 - '@jridgewell/trace-mapping': 0.3.25 + "@jridgewell/set-array": 1.2.1 + "@jridgewell/sourcemap-codec": 1.5.0 + "@jridgewell/trace-mapping": 0.3.25 /@jridgewell/resolve-uri@3.1.2: - resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==} - engines: {node: '>=6.0.0'} + resolution: + { + integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==, + } + engines: { node: ">=6.0.0" } /@jridgewell/set-array@1.2.1: - resolution: {integrity: sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==} - engines: {node: '>=6.0.0'} + resolution: + { + integrity: sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==, + } + engines: { node: ">=6.0.0" } /@jridgewell/sourcemap-codec@1.5.0: - resolution: {integrity: sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==} + resolution: + { + integrity: sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==, + } /@jridgewell/trace-mapping@0.3.25: - resolution: {integrity: sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==} + resolution: + { + integrity: sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==, + } dependencies: - '@jridgewell/resolve-uri': 3.1.2 - '@jridgewell/sourcemap-codec': 1.5.0 + "@jridgewell/resolve-uri": 3.1.2 + "@jridgewell/sourcemap-codec": 1.5.0 /@jridgewell/trace-mapping@0.3.9: - resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==} + resolution: + { + integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==, + } dependencies: - '@jridgewell/resolve-uri': 3.1.2 - '@jridgewell/sourcemap-codec': 1.5.0 + "@jridgewell/resolve-uri": 3.1.2 + "@jridgewell/sourcemap-codec": 1.5.0 dev: true /@rrweb/types@2.0.0-alpha.17: - resolution: {integrity: sha512-AfDTVUuCyCaIG0lTSqYtrZqJX39ZEYzs4fYKnexhQ+id+kbZIpIJtaut5cto6dWZbB3SEe4fW0o90Po3LvTmfg==} + resolution: + { + integrity: sha512-AfDTVUuCyCaIG0lTSqYtrZqJX39ZEYzs4fYKnexhQ+id+kbZIpIJtaut5cto6dWZbB3SEe4fW0o90Po3LvTmfg==, + } dependencies: rrweb-snapshot: 2.0.0-alpha.17 dev: false /@sinclair/typebox@0.27.8: - resolution: {integrity: sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==} + resolution: + { + integrity: sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==, + } /@sinonjs/commons@3.0.1: - resolution: {integrity: sha512-K3mCHKQ9sVh8o1C9cxkwxaOmXoAMlDxC1mYyHrjqOWEcBjYr76t96zL2zlj5dUGZ3HSw240X1qgH3Mjf1yJWpQ==} + resolution: + { + integrity: sha512-K3mCHKQ9sVh8o1C9cxkwxaOmXoAMlDxC1mYyHrjqOWEcBjYr76t96zL2zlj5dUGZ3HSw240X1qgH3Mjf1yJWpQ==, + } dependencies: type-detect: 4.0.8 /@sinonjs/fake-timers@10.3.0: - resolution: {integrity: sha512-V4BG07kuYSUkTCSBHG8G8TNhM+F19jXFWnQtzj+we8DrkpSBCee9Z3Ms8yiGer/dlmhe35/Xdgyo3/0rQKg7YA==} + resolution: + { + integrity: sha512-V4BG07kuYSUkTCSBHG8G8TNhM+F19jXFWnQtzj+we8DrkpSBCee9Z3Ms8yiGer/dlmhe35/Xdgyo3/0rQKg7YA==, + } dependencies: - '@sinonjs/commons': 3.0.1 + "@sinonjs/commons": 3.0.1 /@tsconfig/node10@1.0.11: - resolution: {integrity: sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==} + resolution: + { + integrity: sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==, + } dev: true /@tsconfig/node12@1.0.11: - resolution: {integrity: sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==} + resolution: + { + integrity: sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==, + } dev: true /@tsconfig/node14@1.0.3: - resolution: {integrity: sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==} + resolution: + { + integrity: sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==, + } dev: true /@tsconfig/node16@1.0.4: - resolution: {integrity: sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==} + resolution: + { + integrity: sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==, + } dev: true /@types/babel__core@7.20.5: - resolution: {integrity: sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==} + resolution: + { + integrity: sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==, + } dependencies: - '@babel/parser': 7.25.9 - '@babel/types': 7.25.9 - '@types/babel__generator': 7.6.8 - '@types/babel__template': 7.4.4 - '@types/babel__traverse': 7.20.6 + "@babel/parser": 7.25.9 + "@babel/types": 7.25.9 + "@types/babel__generator": 7.6.8 + "@types/babel__template": 7.4.4 + "@types/babel__traverse": 7.20.6 dev: true /@types/babel__generator@7.6.8: - resolution: {integrity: sha512-ASsj+tpEDsEiFr1arWrlN6V3mdfjRMZt6LtK/Vp/kreFLnr5QH5+DhvD5nINYZXzwJvXeGq+05iUXcAzVrqWtw==} + resolution: + { + integrity: sha512-ASsj+tpEDsEiFr1arWrlN6V3mdfjRMZt6LtK/Vp/kreFLnr5QH5+DhvD5nINYZXzwJvXeGq+05iUXcAzVrqWtw==, + } dependencies: - '@babel/types': 7.25.9 + "@babel/types": 7.25.9 dev: true /@types/babel__template@7.4.4: - resolution: {integrity: sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==} + resolution: + { + integrity: sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==, + } dependencies: - '@babel/parser': 7.25.9 - '@babel/types': 7.25.9 + "@babel/parser": 7.25.9 + "@babel/types": 7.25.9 dev: true /@types/babel__traverse@7.20.6: - resolution: {integrity: sha512-r1bzfrm0tomOI8g1SzvCaQHo6Lcv6zu0EA+W2kHrt8dyrHQxGzBBL4kdkzIS+jBMV+EYcMAEAqXqYaLJq5rOZg==} + resolution: + { + integrity: sha512-r1bzfrm0tomOI8g1SzvCaQHo6Lcv6zu0EA+W2kHrt8dyrHQxGzBBL4kdkzIS+jBMV+EYcMAEAqXqYaLJq5rOZg==, + } dependencies: - '@babel/types': 7.25.9 + "@babel/types": 7.25.9 dev: true /@types/graceful-fs@4.1.9: - resolution: {integrity: sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==} + resolution: + { + integrity: sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==, + } dependencies: - '@types/node': 20.16.14 + "@types/node": 20.16.14 /@types/istanbul-lib-coverage@2.0.6: - resolution: {integrity: sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==} + resolution: + { + integrity: sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==, + } /@types/istanbul-lib-report@3.0.3: - resolution: {integrity: sha512-NQn7AHQnk/RSLOxrBbGyJM/aVQ+pjj5HCgasFxc0K/KhoATfQ/47AyUl15I2yBUpihjmas+a+VJBOqecrFH+uA==} + resolution: + { + integrity: sha512-NQn7AHQnk/RSLOxrBbGyJM/aVQ+pjj5HCgasFxc0K/KhoATfQ/47AyUl15I2yBUpihjmas+a+VJBOqecrFH+uA==, + } dependencies: - '@types/istanbul-lib-coverage': 2.0.6 + "@types/istanbul-lib-coverage": 2.0.6 /@types/istanbul-reports@3.0.4: - resolution: {integrity: sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==} + resolution: + { + integrity: sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==, + } dependencies: - '@types/istanbul-lib-report': 3.0.3 + "@types/istanbul-lib-report": 3.0.3 /@types/jest@29.5.13: - resolution: {integrity: sha512-wd+MVEZCHt23V0/L642O5APvspWply/rGY5BcW4SUETo2UzPU3Z26qr8jC2qxpimI2jjx9h7+2cj2FwIr01bXg==} + resolution: + { + integrity: sha512-wd+MVEZCHt23V0/L642O5APvspWply/rGY5BcW4SUETo2UzPU3Z26qr8jC2qxpimI2jjx9h7+2cj2FwIr01bXg==, + } dependencies: expect: 29.7.0 pretty-format: 29.7.0 dev: true /@types/node@20.16.14: - resolution: {integrity: sha512-vtgGzjxLF7QT88qRHtXMzCWpAAmwonE7fwgVjFtXosUva2oSpnIEc3gNO9P7uIfOxKnii2f79/xtOnfreYtDaA==} + resolution: + { + integrity: sha512-vtgGzjxLF7QT88qRHtXMzCWpAAmwonE7fwgVjFtXosUva2oSpnIEc3gNO9P7uIfOxKnii2f79/xtOnfreYtDaA==, + } dependencies: undici-types: 6.19.8 /@types/stack-utils@2.0.3: - resolution: {integrity: sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==} + resolution: + { + integrity: sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==, + } /@types/uuid@10.0.0: - resolution: {integrity: sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==} + resolution: + { + integrity: sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==, + } dev: true /@types/yargs-parser@21.0.3: - resolution: {integrity: sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==} + resolution: + { + integrity: sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==, + } /@types/yargs@17.0.33: - resolution: {integrity: sha512-WpxBCKWPLr4xSsHgz511rFJAM+wS28w2zEO1QDNY5zM/S8ok70NNfztH0xwhqKyaK0OHCbN98LDAZuy1ctxDkA==} + resolution: + { + integrity: sha512-WpxBCKWPLr4xSsHgz511rFJAM+wS28w2zEO1QDNY5zM/S8ok70NNfztH0xwhqKyaK0OHCbN98LDAZuy1ctxDkA==, + } dependencies: - '@types/yargs-parser': 21.0.3 + "@types/yargs-parser": 21.0.3 /acorn-walk@8.3.4: - resolution: {integrity: sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==} - engines: {node: '>=0.4.0'} + resolution: + { + integrity: sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==, + } + engines: { node: ">=0.4.0" } dependencies: acorn: 8.13.0 dev: true /acorn@8.13.0: - resolution: {integrity: sha512-8zSiw54Oxrdym50NlZ9sUusyO1Z1ZchgRLWRaK6c86XJFClyCgFKetdowBg5bKxyp/u+CDBJG4Mpp0m3HLZl9w==} - engines: {node: '>=0.4.0'} + resolution: + { + integrity: sha512-8zSiw54Oxrdym50NlZ9sUusyO1Z1ZchgRLWRaK6c86XJFClyCgFKetdowBg5bKxyp/u+CDBJG4Mpp0m3HLZl9w==, + } + engines: { node: ">=0.4.0" } hasBin: true dev: true /ansi-escapes@4.3.2: - resolution: {integrity: sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==, + } + engines: { node: ">=8" } dependencies: type-fest: 0.21.3 dev: true /ansi-regex@5.0.1: - resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==, + } + engines: { node: ">=8" } dev: true /ansi-styles@3.2.1: - resolution: {integrity: sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==, + } + engines: { node: ">=4" } dependencies: color-convert: 1.9.3 /ansi-styles@4.3.0: - resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==, + } + engines: { node: ">=8" } dependencies: color-convert: 2.0.1 /ansi-styles@5.2.0: - resolution: {integrity: sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==, + } + engines: { node: ">=10" } /anymatch@3.1.3: - resolution: {integrity: sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==} - engines: {node: '>= 8'} + resolution: + { + integrity: sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==, + } + engines: { node: ">= 8" } dependencies: normalize-path: 3.0.0 picomatch: 2.3.1 /arg@4.1.3: - resolution: {integrity: sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==} + resolution: + { + integrity: sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==, + } dev: true /argparse@1.0.10: - resolution: {integrity: sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==} + resolution: + { + integrity: sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==, + } dependencies: sprintf-js: 1.0.3 /async@3.2.6: - resolution: {integrity: sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==} + resolution: + { + integrity: sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==, + } dev: true /asynckit@0.4.0: - resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} + resolution: + { + integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==, + } dev: false /axios@1.7.7: - resolution: {integrity: sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==} + resolution: + { + integrity: sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==, + } dependencies: follow-redirects: 1.15.9 form-data: 4.0.1 @@ -807,14 +1091,17 @@ packages: dev: false /babel-jest@29.7.0(@babel/core@7.25.9): - resolution: {integrity: sha512-BrvGY3xZSwEcCzKvKsCi2GgHqDqsYkOP4/by5xCgIwGXQxIEh+8ew3gmrE1y7XRR6LHZIj6yLYnUi/mm2KXKBg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-BrvGY3xZSwEcCzKvKsCi2GgHqDqsYkOP4/by5xCgIwGXQxIEh+8ew3gmrE1y7XRR6LHZIj6yLYnUi/mm2KXKBg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } peerDependencies: - '@babel/core': ^7.8.0 + "@babel/core": ^7.8.0 dependencies: - '@babel/core': 7.25.9 - '@jest/transform': 29.7.0 - '@types/babel__core': 7.20.5 + "@babel/core": 7.25.9 + "@jest/transform": 29.7.0 + "@types/babel__core": 7.20.5 babel-plugin-istanbul: 6.1.1 babel-preset-jest: 29.6.3(@babel/core@7.25.9) chalk: 4.1.2 @@ -825,84 +1112,111 @@ packages: dev: true /babel-plugin-istanbul@6.1.1: - resolution: {integrity: sha512-Y1IQok9821cC9onCx5otgFfRm7Lm+I+wwxOx738M/WLPZ9Q42m4IG5W0FNX8WLL2gYMZo3JkuXIH2DOpWM+qwA==} - engines: {node: '>=8'} - dependencies: - '@babel/helper-plugin-utils': 7.25.9 - '@istanbuljs/load-nyc-config': 1.1.0 - '@istanbuljs/schema': 0.1.3 + resolution: + { + integrity: sha512-Y1IQok9821cC9onCx5otgFfRm7Lm+I+wwxOx738M/WLPZ9Q42m4IG5W0FNX8WLL2gYMZo3JkuXIH2DOpWM+qwA==, + } + engines: { node: ">=8" } + dependencies: + "@babel/helper-plugin-utils": 7.25.9 + "@istanbuljs/load-nyc-config": 1.1.0 + "@istanbuljs/schema": 0.1.3 istanbul-lib-instrument: 5.2.1 test-exclude: 6.0.0 transitivePeerDependencies: - supports-color /babel-plugin-jest-hoist@29.6.3: - resolution: {integrity: sha512-ESAc/RJvGTFEzRwOTT4+lNDk/GNHMkKbNzsvT0qKRfDyyYTskxB5rnU2njIDYVxXCBHHEI1c0YwHob3WaYujOg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-ESAc/RJvGTFEzRwOTT4+lNDk/GNHMkKbNzsvT0qKRfDyyYTskxB5rnU2njIDYVxXCBHHEI1c0YwHob3WaYujOg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: - '@babel/template': 7.25.9 - '@babel/types': 7.25.9 - '@types/babel__core': 7.20.5 - '@types/babel__traverse': 7.20.6 + "@babel/template": 7.25.9 + "@babel/types": 7.25.9 + "@types/babel__core": 7.20.5 + "@types/babel__traverse": 7.20.6 dev: true /babel-preset-current-node-syntax@1.1.0(@babel/core@7.25.9): - resolution: {integrity: sha512-ldYss8SbBlWva1bs28q78Ju5Zq1F+8BrqBZZ0VFhLBvhh6lCpC2o3gDJi/5DRLs9FgYZCnmPYIVFU4lRXCkyUw==} + resolution: + { + integrity: sha512-ldYss8SbBlWva1bs28q78Ju5Zq1F+8BrqBZZ0VFhLBvhh6lCpC2o3gDJi/5DRLs9FgYZCnmPYIVFU4lRXCkyUw==, + } peerDependencies: - '@babel/core': ^7.0.0 - dependencies: - '@babel/core': 7.25.9 - '@babel/plugin-syntax-async-generators': 7.8.4(@babel/core@7.25.9) - '@babel/plugin-syntax-bigint': 7.8.3(@babel/core@7.25.9) - '@babel/plugin-syntax-class-properties': 7.12.13(@babel/core@7.25.9) - '@babel/plugin-syntax-class-static-block': 7.14.5(@babel/core@7.25.9) - '@babel/plugin-syntax-import-attributes': 7.25.9(@babel/core@7.25.9) - '@babel/plugin-syntax-import-meta': 7.10.4(@babel/core@7.25.9) - '@babel/plugin-syntax-json-strings': 7.8.3(@babel/core@7.25.9) - '@babel/plugin-syntax-logical-assignment-operators': 7.10.4(@babel/core@7.25.9) - '@babel/plugin-syntax-nullish-coalescing-operator': 7.8.3(@babel/core@7.25.9) - '@babel/plugin-syntax-numeric-separator': 7.10.4(@babel/core@7.25.9) - '@babel/plugin-syntax-object-rest-spread': 7.8.3(@babel/core@7.25.9) - '@babel/plugin-syntax-optional-catch-binding': 7.8.3(@babel/core@7.25.9) - '@babel/plugin-syntax-optional-chaining': 7.8.3(@babel/core@7.25.9) - '@babel/plugin-syntax-private-property-in-object': 7.14.5(@babel/core@7.25.9) - '@babel/plugin-syntax-top-level-await': 7.14.5(@babel/core@7.25.9) + "@babel/core": ^7.0.0 + dependencies: + "@babel/core": 7.25.9 + "@babel/plugin-syntax-async-generators": 7.8.4(@babel/core@7.25.9) + "@babel/plugin-syntax-bigint": 7.8.3(@babel/core@7.25.9) + "@babel/plugin-syntax-class-properties": 7.12.13(@babel/core@7.25.9) + "@babel/plugin-syntax-class-static-block": 7.14.5(@babel/core@7.25.9) + "@babel/plugin-syntax-import-attributes": 7.25.9(@babel/core@7.25.9) + "@babel/plugin-syntax-import-meta": 7.10.4(@babel/core@7.25.9) + "@babel/plugin-syntax-json-strings": 7.8.3(@babel/core@7.25.9) + "@babel/plugin-syntax-logical-assignment-operators": 7.10.4(@babel/core@7.25.9) + "@babel/plugin-syntax-nullish-coalescing-operator": 7.8.3(@babel/core@7.25.9) + "@babel/plugin-syntax-numeric-separator": 7.10.4(@babel/core@7.25.9) + "@babel/plugin-syntax-object-rest-spread": 7.8.3(@babel/core@7.25.9) + "@babel/plugin-syntax-optional-catch-binding": 7.8.3(@babel/core@7.25.9) + "@babel/plugin-syntax-optional-chaining": 7.8.3(@babel/core@7.25.9) + "@babel/plugin-syntax-private-property-in-object": 7.14.5(@babel/core@7.25.9) + "@babel/plugin-syntax-top-level-await": 7.14.5(@babel/core@7.25.9) /babel-preset-jest@29.6.3(@babel/core@7.25.9): - resolution: {integrity: sha512-0B3bhxR6snWXJZtR/RliHTDPRgn1sNHOR0yVtq/IiQFyuOVjFS+wuio/R4gSNkyYmKmJB4wGZv2NZanmKmTnNA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-0B3bhxR6snWXJZtR/RliHTDPRgn1sNHOR0yVtq/IiQFyuOVjFS+wuio/R4gSNkyYmKmJB4wGZv2NZanmKmTnNA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } peerDependencies: - '@babel/core': ^7.0.0 + "@babel/core": ^7.0.0 dependencies: - '@babel/core': 7.25.9 + "@babel/core": 7.25.9 babel-plugin-jest-hoist: 29.6.3 babel-preset-current-node-syntax: 1.1.0(@babel/core@7.25.9) dev: true /balanced-match@1.0.2: - resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} + resolution: + { + integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==, + } /brace-expansion@1.1.11: - resolution: {integrity: sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==} + resolution: + { + integrity: sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==, + } dependencies: balanced-match: 1.0.2 concat-map: 0.0.1 /brace-expansion@2.0.1: - resolution: {integrity: sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==} + resolution: + { + integrity: sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==, + } dependencies: balanced-match: 1.0.2 dev: true /braces@3.0.3: - resolution: {integrity: sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==, + } + engines: { node: ">=8" } dependencies: fill-range: 7.1.1 /browserslist@4.24.2: - resolution: {integrity: sha512-ZIc+Q62revdMcqC6aChtW4jz3My3klmCO1fEmINZY/8J3EpBg5/A/D0AKmBveUh6pgoeycoMkVMko84tuYS+Gg==} - engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7} + resolution: + { + integrity: sha512-ZIc+Q62revdMcqC6aChtW4jz3My3klmCO1fEmINZY/8J3EpBg5/A/D0AKmBveUh6pgoeycoMkVMko84tuYS+Gg==, + } + engines: { node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7 } hasBin: true dependencies: caniuse-lite: 1.0.30001669 @@ -911,69 +1225,108 @@ packages: update-browserslist-db: 1.1.1(browserslist@4.24.2) /bs-logger@0.2.6: - resolution: {integrity: sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog==} - engines: {node: '>= 6'} + resolution: + { + integrity: sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog==, + } + engines: { node: ">= 6" } dependencies: fast-json-stable-stringify: 2.1.0 dev: true /bser@2.1.1: - resolution: {integrity: sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==} + resolution: + { + integrity: sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==, + } dependencies: node-int64: 0.4.0 /buffer-from@1.1.2: - resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==} + resolution: + { + integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==, + } dev: true /callsites@3.1.0: - resolution: {integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==, + } + engines: { node: ">=6" } dev: true /camelcase@5.3.1: - resolution: {integrity: sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==, + } + engines: { node: ">=6" } /camelcase@6.3.0: - resolution: {integrity: sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==, + } + engines: { node: ">=10" } dev: true /caniuse-lite@1.0.30001669: - resolution: {integrity: sha512-DlWzFDJqstqtIVx1zeSpIMLjunf5SmwOw0N2Ck/QSQdS8PLS4+9HrLaYei4w8BIAL7IB/UEDu889d8vhCTPA0w==} + resolution: + { + integrity: sha512-DlWzFDJqstqtIVx1zeSpIMLjunf5SmwOw0N2Ck/QSQdS8PLS4+9HrLaYei4w8BIAL7IB/UEDu889d8vhCTPA0w==, + } /chalk@2.4.2: - resolution: {integrity: sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==, + } + engines: { node: ">=4" } dependencies: ansi-styles: 3.2.1 escape-string-regexp: 1.0.5 supports-color: 5.5.0 /chalk@4.1.2: - resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==, + } + engines: { node: ">=10" } dependencies: ansi-styles: 4.3.0 supports-color: 7.2.0 /char-regex@1.0.2: - resolution: {integrity: sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==, + } + engines: { node: ">=10" } dev: true /ci-info@3.9.0: - resolution: {integrity: sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==, + } + engines: { node: ">=8" } /cjs-module-lexer@1.4.1: - resolution: {integrity: sha512-cuSVIHi9/9E/+821Qjdvngor+xpnlwnuwIyZOaLmHBVdXL+gP+I6QQB9VkO7RI77YIcTV+S1W9AreJ5eN63JBA==} + resolution: + { + integrity: sha512-cuSVIHi9/9E/+821Qjdvngor+xpnlwnuwIyZOaLmHBVdXL+gP+I6QQB9VkO7RI77YIcTV+S1W9AreJ5eN63JBA==, + } dev: true /cliui@8.0.1: - resolution: {integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==} - engines: {node: '>=12'} + resolution: + { + integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==, + } + engines: { node: ">=12" } dependencies: string-width: 4.2.3 strip-ansi: 6.0.1 @@ -981,55 +1334,88 @@ packages: dev: true /co@4.6.0: - resolution: {integrity: sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==} - engines: {iojs: '>= 1.0.0', node: '>= 0.12.0'} + resolution: + { + integrity: sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==, + } + engines: { iojs: ">= 1.0.0", node: ">= 0.12.0" } dev: true /collect-v8-coverage@1.0.2: - resolution: {integrity: sha512-lHl4d5/ONEbLlJvaJNtsF/Lz+WvB07u2ycqTYbdrq7UypDXailES4valYb2eWiJFxZlVmpGekfqoxQhzyFdT4Q==} + resolution: + { + integrity: sha512-lHl4d5/ONEbLlJvaJNtsF/Lz+WvB07u2ycqTYbdrq7UypDXailES4valYb2eWiJFxZlVmpGekfqoxQhzyFdT4Q==, + } dev: true /color-convert@1.9.3: - resolution: {integrity: sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==} + resolution: + { + integrity: sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==, + } dependencies: color-name: 1.1.3 /color-convert@2.0.1: - resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==} - engines: {node: '>=7.0.0'} + resolution: + { + integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==, + } + engines: { node: ">=7.0.0" } dependencies: color-name: 1.1.4 /color-name@1.1.3: - resolution: {integrity: sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==} + resolution: + { + integrity: sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==, + } /color-name@1.1.4: - resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} + resolution: + { + integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==, + } /combined-stream@1.0.8: - resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} - engines: {node: '>= 0.8'} + resolution: + { + integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==, + } + engines: { node: ">= 0.8" } dependencies: delayed-stream: 1.0.0 dev: false /concat-map@0.0.1: - resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==} + resolution: + { + integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==, + } /convert-source-map@2.0.0: - resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==} + resolution: + { + integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==, + } /core-js@3.38.1: - resolution: {integrity: sha512-OP35aUorbU3Zvlx7pjsFdu1rGNnD4pgw/CWoYzRY3t2EzoVT7shKHY1dlAy3f41cGIO7ZDPQimhGFTlEYkG/Hw==} + resolution: + { + integrity: sha512-OP35aUorbU3Zvlx7pjsFdu1rGNnD4pgw/CWoYzRY3t2EzoVT7shKHY1dlAy3f41cGIO7ZDPQimhGFTlEYkG/Hw==, + } requiresBuild: true dev: false /create-jest@29.7.0(@types/node@20.16.14)(ts-node@10.9.2): - resolution: {integrity: sha512-Adz2bdH0Vq3F53KEMJOoftQFutWCukm6J24wbPWRO4k1kMY7gS7ds/uoJkNuV8wDCtWWnuwGcJwpWcih+zEW1Q==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-Adz2bdH0Vq3F53KEMJOoftQFutWCukm6J24wbPWRO4k1kMY7gS7ds/uoJkNuV8wDCtWWnuwGcJwpWcih+zEW1Q==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } hasBin: true dependencies: - '@jest/types': 29.6.3 + "@jest/types": 29.6.3 chalk: 4.1.2 exit: 0.1.2 graceful-fs: 4.2.11 @@ -1037,19 +1423,25 @@ packages: jest-util: 29.7.0 prompts: 2.4.2 transitivePeerDependencies: - - '@types/node' + - "@types/node" - babel-plugin-macros - supports-color - ts-node dev: true /create-require@1.1.1: - resolution: {integrity: sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==} + resolution: + { + integrity: sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==, + } dev: true /cross-spawn@7.0.3: - resolution: {integrity: sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==} - engines: {node: '>= 8'} + resolution: + { + integrity: sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==, + } + engines: { node: ">= 8" } dependencies: path-key: 3.1.1 shebang-command: 2.0.0 @@ -1057,10 +1449,13 @@ packages: dev: true /debug@4.3.7: - resolution: {integrity: sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==} - engines: {node: '>=6.0'} + resolution: + { + integrity: sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==, + } + engines: { node: ">=6.0" } peerDependencies: - supports-color: '*' + supports-color: "*" peerDependenciesMeta: supports-color: optional: true @@ -1068,7 +1463,10 @@ packages: ms: 2.1.3 /dedent@1.5.3: - resolution: {integrity: sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==} + resolution: + { + integrity: sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==, + } peerDependencies: babel-plugin-macros: ^3.1.0 peerDependenciesMeta: @@ -1077,75 +1475,120 @@ packages: dev: true /deepmerge@4.3.1: - resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==, + } + engines: { node: ">=0.10.0" } dev: true /delayed-stream@1.0.0: - resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} - engines: {node: '>=0.4.0'} + resolution: + { + integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==, + } + engines: { node: ">=0.4.0" } dev: false /detect-newline@3.1.0: - resolution: {integrity: sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==, + } + engines: { node: ">=8" } dev: true /diff-sequences@29.6.3: - resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } /diff@4.0.2: - resolution: {integrity: sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==} - engines: {node: '>=0.3.1'} + resolution: + { + integrity: sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==, + } + engines: { node: ">=0.3.1" } dev: true /ejs@3.1.10: - resolution: {integrity: sha512-UeJmFfOrAQS8OJWPZ4qtgHyWExa088/MtK5UEyoJGFH67cDEXkZSviOiKRCZ4Xij0zxI3JECgYs3oKx+AizQBA==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-UeJmFfOrAQS8OJWPZ4qtgHyWExa088/MtK5UEyoJGFH67cDEXkZSviOiKRCZ4Xij0zxI3JECgYs3oKx+AizQBA==, + } + engines: { node: ">=0.10.0" } hasBin: true dependencies: jake: 10.9.2 dev: true /electron-to-chromium@1.5.42: - resolution: {integrity: sha512-gIfKavKDw1mhvic9nbzA5lZw8QSHpdMwLwXc0cWidQz9B15pDoDdDH4boIatuFfeoCatb3a/NGL6CYRVFxGZ9g==} + resolution: + { + integrity: sha512-gIfKavKDw1mhvic9nbzA5lZw8QSHpdMwLwXc0cWidQz9B15pDoDdDH4boIatuFfeoCatb3a/NGL6CYRVFxGZ9g==, + } /emittery@0.13.1: - resolution: {integrity: sha512-DeWwawk6r5yR9jFgnDKYt4sLS0LmHJJi3ZOnb5/JdbYwj3nW+FxQnHIjhBKz8YLC7oRNPVM9NQ47I3CVx34eqQ==} - engines: {node: '>=12'} + resolution: + { + integrity: sha512-DeWwawk6r5yR9jFgnDKYt4sLS0LmHJJi3ZOnb5/JdbYwj3nW+FxQnHIjhBKz8YLC7oRNPVM9NQ47I3CVx34eqQ==, + } + engines: { node: ">=12" } dev: true /emoji-regex@8.0.0: - resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==} + resolution: + { + integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==, + } dev: true /error-ex@1.3.2: - resolution: {integrity: sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==} + resolution: + { + integrity: sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==, + } dependencies: is-arrayish: 0.2.1 dev: true /escalade@3.2.0: - resolution: {integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==, + } + engines: { node: ">=6" } /escape-string-regexp@1.0.5: - resolution: {integrity: sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==} - engines: {node: '>=0.8.0'} + resolution: + { + integrity: sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==, + } + engines: { node: ">=0.8.0" } /escape-string-regexp@2.0.0: - resolution: {integrity: sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==, + } + engines: { node: ">=8" } /esprima@4.0.1: - resolution: {integrity: sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==, + } + engines: { node: ">=4" } hasBin: true /execa@5.1.1: - resolution: {integrity: sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==, + } + engines: { node: ">=10" } dependencies: cross-spawn: 7.0.3 get-stream: 6.0.1 @@ -1159,64 +1602,94 @@ packages: dev: true /exit@0.1.2: - resolution: {integrity: sha512-Zk/eNKV2zbjpKzrsQ+n1G6poVbErQxJ0LBOJXaKZ1EViLzH+hrLu9cdXI4zw9dBQJslwBEpbQ2P1oS7nDxs6jQ==} - engines: {node: '>= 0.8.0'} + resolution: + { + integrity: sha512-Zk/eNKV2zbjpKzrsQ+n1G6poVbErQxJ0LBOJXaKZ1EViLzH+hrLu9cdXI4zw9dBQJslwBEpbQ2P1oS7nDxs6jQ==, + } + engines: { node: ">= 0.8.0" } dev: true /expect@29.7.0: - resolution: {integrity: sha512-2Zks0hf1VLFYI1kbh0I5jP3KHHyCHpkfyHBzsSXRFgl/Bg9mWYfMW8oD+PdMPlEwy5HNsR9JutYy6pMeOh61nw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-2Zks0hf1VLFYI1kbh0I5jP3KHHyCHpkfyHBzsSXRFgl/Bg9mWYfMW8oD+PdMPlEwy5HNsR9JutYy6pMeOh61nw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: - '@jest/expect-utils': 29.7.0 + "@jest/expect-utils": 29.7.0 jest-get-type: 29.6.3 jest-matcher-utils: 29.7.0 jest-message-util: 29.7.0 jest-util: 29.7.0 /fast-json-stable-stringify@2.1.0: - resolution: {integrity: sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==} + resolution: + { + integrity: sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==, + } /fb-watchman@2.0.2: - resolution: {integrity: sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==} + resolution: + { + integrity: sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==, + } dependencies: bser: 2.1.1 /fflate@0.4.8: - resolution: {integrity: sha512-FJqqoDBR00Mdj9ppamLa/Y7vxm+PRmNWA67N846RvsoYVMKB4q3y/de5PA7gUmRMYK/8CMz2GDZQmCRN1wBcWA==} + resolution: + { + integrity: sha512-FJqqoDBR00Mdj9ppamLa/Y7vxm+PRmNWA67N846RvsoYVMKB4q3y/de5PA7gUmRMYK/8CMz2GDZQmCRN1wBcWA==, + } dev: false /filelist@1.0.4: - resolution: {integrity: sha512-w1cEuf3S+DrLCQL7ET6kz+gmlJdbq9J7yXCSjK/OZCPA+qEN1WyF4ZAf0YYJa4/shHJra2t/d/r8SV4Ji+x+8Q==} + resolution: + { + integrity: sha512-w1cEuf3S+DrLCQL7ET6kz+gmlJdbq9J7yXCSjK/OZCPA+qEN1WyF4ZAf0YYJa4/shHJra2t/d/r8SV4Ji+x+8Q==, + } dependencies: minimatch: 5.1.6 dev: true /fill-range@7.1.1: - resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==, + } + engines: { node: ">=8" } dependencies: to-regex-range: 5.0.1 /find-up@4.1.0: - resolution: {integrity: sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==, + } + engines: { node: ">=8" } dependencies: locate-path: 5.0.0 path-exists: 4.0.0 /follow-redirects@1.15.9: - resolution: {integrity: sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==} - engines: {node: '>=4.0'} + resolution: + { + integrity: sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==, + } + engines: { node: ">=4.0" } peerDependencies: - debug: '*' + debug: "*" peerDependenciesMeta: debug: optional: true dev: false /form-data@4.0.1: - resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==} - engines: {node: '>= 6'} + resolution: + { + integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==, + } + engines: { node: ">= 6" } dependencies: asynckit: 0.4.0 combined-stream: 1.0.8 @@ -1224,39 +1697,63 @@ packages: dev: false /fs.realpath@1.0.0: - resolution: {integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==} + resolution: + { + integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==, + } /fsevents@2.3.3: - resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} - engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} + resolution: + { + integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==, + } + engines: { node: ^8.16.0 || ^10.6.0 || >=11.0.0 } os: [darwin] requiresBuild: true optional: true /function-bind@1.1.2: - resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==} + resolution: + { + integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==, + } dev: true /gensync@1.0.0-beta.2: - resolution: {integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==, + } + engines: { node: ">=6.9.0" } /get-caller-file@2.0.5: - resolution: {integrity: sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==} - engines: {node: 6.* || 8.* || >= 10.*} + resolution: + { + integrity: sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==, + } + engines: { node: 6.* || 8.* || >= 10.* } dev: true /get-package-type@0.1.0: - resolution: {integrity: sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==} - engines: {node: '>=8.0.0'} + resolution: + { + integrity: sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==, + } + engines: { node: ">=8.0.0" } /get-stream@6.0.1: - resolution: {integrity: sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==, + } + engines: { node: ">=10" } dev: true /glob@7.2.3: - resolution: {integrity: sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==} + resolution: + { + integrity: sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==, + } deprecated: Glob versions prior to v9 are no longer supported dependencies: fs.realpath: 1.0.0 @@ -1267,39 +1764,63 @@ packages: path-is-absolute: 1.0.1 /globals@11.12.0: - resolution: {integrity: sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==, + } + engines: { node: ">=4" } /graceful-fs@4.2.11: - resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} + resolution: + { + integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==, + } /has-flag@3.0.0: - resolution: {integrity: sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==, + } + engines: { node: ">=4" } /has-flag@4.0.0: - resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==, + } + engines: { node: ">=8" } /hasown@2.0.2: - resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} - engines: {node: '>= 0.4'} + resolution: + { + integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==, + } + engines: { node: ">= 0.4" } dependencies: function-bind: 1.1.2 dev: true /html-escaper@2.0.2: - resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} + resolution: + { + integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==, + } dev: true /human-signals@2.1.0: - resolution: {integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==} - engines: {node: '>=10.17.0'} + resolution: + { + integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==, + } + engines: { node: ">=10.17.0" } dev: true /import-local@3.2.0: - resolution: {integrity: sha512-2SPlun1JUPWoM6t3F0dw0FkCF/jWY8kttcY4f599GLTSjh2OCuuhdTkJQsEcZzBqbXZGKMK2OqW1oZsjtf/gQA==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-2SPlun1JUPWoM6t3F0dw0FkCF/jWY8kttcY4f599GLTSjh2OCuuhdTkJQsEcZzBqbXZGKMK2OqW1oZsjtf/gQA==, + } + engines: { node: ">=8" } hasBin: true dependencies: pkg-dir: 4.2.0 @@ -1307,76 +1828,115 @@ packages: dev: true /imurmurhash@0.1.4: - resolution: {integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==} - engines: {node: '>=0.8.19'} + resolution: + { + integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==, + } + engines: { node: ">=0.8.19" } /inflight@1.0.6: - resolution: {integrity: sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==} + resolution: + { + integrity: sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==, + } deprecated: This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful. dependencies: once: 1.4.0 wrappy: 1.0.2 /inherits@2.0.4: - resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} + resolution: + { + integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==, + } /is-arrayish@0.2.1: - resolution: {integrity: sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==} + resolution: + { + integrity: sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==, + } dev: true /is-core-module@2.15.1: - resolution: {integrity: sha512-z0vtXSwucUJtANQWldhbtbt7BnL0vxiFjIdDLAatwhDYty2bad6s+rijD6Ri4YuYJubLzIJLUidCh09e1djEVQ==} - engines: {node: '>= 0.4'} + resolution: + { + integrity: sha512-z0vtXSwucUJtANQWldhbtbt7BnL0vxiFjIdDLAatwhDYty2bad6s+rijD6Ri4YuYJubLzIJLUidCh09e1djEVQ==, + } + engines: { node: ">= 0.4" } dependencies: hasown: 2.0.2 dev: true /is-fullwidth-code-point@3.0.0: - resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==, + } + engines: { node: ">=8" } dev: true /is-generator-fn@2.1.0: - resolution: {integrity: sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==, + } + engines: { node: ">=6" } dev: true /is-number@7.0.0: - resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==} - engines: {node: '>=0.12.0'} + resolution: + { + integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==, + } + engines: { node: ">=0.12.0" } /is-stream@2.0.1: - resolution: {integrity: sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==, + } + engines: { node: ">=8" } dev: true /isexe@2.0.0: - resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==} + resolution: + { + integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==, + } dev: true /istanbul-lib-coverage@3.2.2: - resolution: {integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==, + } + engines: { node: ">=8" } /istanbul-lib-instrument@5.2.1: - resolution: {integrity: sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg==} - engines: {node: '>=8'} - dependencies: - '@babel/core': 7.25.9 - '@babel/parser': 7.25.9 - '@istanbuljs/schema': 0.1.3 + resolution: + { + integrity: sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg==, + } + engines: { node: ">=8" } + dependencies: + "@babel/core": 7.25.9 + "@babel/parser": 7.25.9 + "@istanbuljs/schema": 0.1.3 istanbul-lib-coverage: 3.2.2 semver: 6.3.1 transitivePeerDependencies: - supports-color /istanbul-lib-instrument@6.0.3: - resolution: {integrity: sha512-Vtgk7L/R2JHyyGW07spoFlB8/lpjiOLTjMdms6AFMraYt3BaJauod/NGrfnVG/y4Ix1JEuMRPDPEj2ua+zz1/Q==} - engines: {node: '>=10'} - dependencies: - '@babel/core': 7.25.9 - '@babel/parser': 7.25.9 - '@istanbuljs/schema': 0.1.3 + resolution: + { + integrity: sha512-Vtgk7L/R2JHyyGW07spoFlB8/lpjiOLTjMdms6AFMraYt3BaJauod/NGrfnVG/y4Ix1JEuMRPDPEj2ua+zz1/Q==, + } + engines: { node: ">=10" } + dependencies: + "@babel/core": 7.25.9 + "@babel/parser": 7.25.9 + "@istanbuljs/schema": 0.1.3 istanbul-lib-coverage: 3.2.2 semver: 7.6.3 transitivePeerDependencies: @@ -1384,8 +1944,11 @@ packages: dev: true /istanbul-lib-report@3.0.1: - resolution: {integrity: sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==, + } + engines: { node: ">=10" } dependencies: istanbul-lib-coverage: 3.2.2 make-dir: 4.0.0 @@ -1393,8 +1956,11 @@ packages: dev: true /istanbul-lib-source-maps@4.0.1: - resolution: {integrity: sha512-n3s8EwkdFIJCG3BPKBYvskgXGoy88ARzvegkitk60NxRdwltLOTaH7CUiMRXvwYorl0Q712iEjcWB+fK/MrWVw==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-n3s8EwkdFIJCG3BPKBYvskgXGoy88ARzvegkitk60NxRdwltLOTaH7CUiMRXvwYorl0Q712iEjcWB+fK/MrWVw==, + } + engines: { node: ">=10" } dependencies: debug: 4.3.7 istanbul-lib-coverage: 3.2.2 @@ -1404,16 +1970,22 @@ packages: dev: true /istanbul-reports@3.1.7: - resolution: {integrity: sha512-BewmUXImeuRk2YY0PVbxgKAysvhRPUQE0h5QRM++nVWyubKGV0l8qQ5op8+B2DOmwSe63Jivj0BjkPQVf8fP5g==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-BewmUXImeuRk2YY0PVbxgKAysvhRPUQE0h5QRM++nVWyubKGV0l8qQ5op8+B2DOmwSe63Jivj0BjkPQVf8fP5g==, + } + engines: { node: ">=8" } dependencies: html-escaper: 2.0.2 istanbul-lib-report: 3.0.1 dev: true /jake@10.9.2: - resolution: {integrity: sha512-2P4SQ0HrLQ+fw6llpLnOaGAvN2Zu6778SJMrCUwns4fOoG9ayrTiZk3VV8sCPkVZF8ab0zksVpS8FDY5pRCNBA==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-2P4SQ0HrLQ+fw6llpLnOaGAvN2Zu6778SJMrCUwns4fOoG9ayrTiZk3VV8sCPkVZF8ab0zksVpS8FDY5pRCNBA==, + } + engines: { node: ">=10" } hasBin: true dependencies: async: 3.2.6 @@ -1423,8 +1995,11 @@ packages: dev: true /jest-changed-files@29.7.0: - resolution: {integrity: sha512-fEArFiwf1BpQ+4bXSprcDc3/x4HSzL4al2tozwVpDFpsxALjLYdyiIK4e5Vz66GQJIbXJ82+35PtysofptNX2w==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-fEArFiwf1BpQ+4bXSprcDc3/x4HSzL4al2tozwVpDFpsxALjLYdyiIK4e5Vz66GQJIbXJ82+35PtysofptNX2w==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: execa: 5.1.1 jest-util: 29.7.0 @@ -1432,14 +2007,17 @@ packages: dev: true /jest-circus@29.7.0: - resolution: {integrity: sha512-3E1nCMgipcTkCocFwM90XXQab9bS+GMsjdpmPrlelaxwD93Ad8iVEjX/vvHPdLPnFf+L40u+5+iutRdA1N9myw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/environment': 29.7.0 - '@jest/expect': 29.7.0 - '@jest/test-result': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + resolution: + { + integrity: sha512-3E1nCMgipcTkCocFwM90XXQab9bS+GMsjdpmPrlelaxwD93Ad8iVEjX/vvHPdLPnFf+L40u+5+iutRdA1N9myw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/environment": 29.7.0 + "@jest/expect": 29.7.0 + "@jest/test-result": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 chalk: 4.1.2 co: 4.6.0 dedent: 1.5.3 @@ -1461,8 +2039,11 @@ packages: dev: true /jest-cli@29.7.0(@types/node@20.16.14)(ts-node@10.9.2): - resolution: {integrity: sha512-OVVobw2IubN/GSYsxETi+gOe7Ka59EFMR/twOU3Jb2GnKKeMGJB5SGUUrEz3SFVmJASUdZUzy83sLNNQ2gZslg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-OVVobw2IubN/GSYsxETi+gOe7Ka59EFMR/twOU3Jb2GnKKeMGJB5SGUUrEz3SFVmJASUdZUzy83sLNNQ2gZslg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } hasBin: true peerDependencies: node-notifier: ^8.0.1 || ^9.0.0 || ^10.0.0 @@ -1470,9 +2051,9 @@ packages: node-notifier: optional: true dependencies: - '@jest/core': 29.7.0(ts-node@10.9.2) - '@jest/test-result': 29.7.0 - '@jest/types': 29.6.3 + "@jest/core": 29.7.0(ts-node@10.9.2) + "@jest/test-result": 29.7.0 + "@jest/types": 29.6.3 chalk: 4.1.2 create-jest: 29.7.0(@types/node@20.16.14)(ts-node@10.9.2) exit: 0.1.2 @@ -1482,28 +2063,31 @@ packages: jest-validate: 29.7.0 yargs: 17.7.2 transitivePeerDependencies: - - '@types/node' + - "@types/node" - babel-plugin-macros - supports-color - ts-node dev: true /jest-config@29.7.0(@types/node@20.16.14)(ts-node@10.9.2): - resolution: {integrity: sha512-uXbpfeQ7R6TZBqI3/TxCU4q4ttk3u0PJeC+E0zbfSoSjq6bJ7buBPxzQPL0ifrkY4DNu4JUdk0ImlBUYi840eQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-uXbpfeQ7R6TZBqI3/TxCU4q4ttk3u0PJeC+E0zbfSoSjq6bJ7buBPxzQPL0ifrkY4DNu4JUdk0ImlBUYi840eQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } peerDependencies: - '@types/node': '*' - ts-node: '>=9.0.0' + "@types/node": "*" + ts-node: ">=9.0.0" peerDependenciesMeta: - '@types/node': + "@types/node": optional: true ts-node: optional: true dependencies: - '@babel/core': 7.25.9 - '@jest/test-sequencer': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + "@babel/core": 7.25.9 + "@jest/test-sequencer": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 babel-jest: 29.7.0(@babel/core@7.25.9) chalk: 4.1.2 ci-info: 3.9.0 @@ -1530,8 +2114,11 @@ packages: dev: true /jest-diff@29.7.0: - resolution: {integrity: sha512-LMIgiIrhigmPrs03JHpxUh2yISK3vLFPkAodPeo0+BuF7wA2FoQbkEg1u8gBYBThncu7e1oEDUfIXVuTqLRUjw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-LMIgiIrhigmPrs03JHpxUh2yISK3vLFPkAodPeo0+BuF7wA2FoQbkEg1u8gBYBThncu7e1oEDUfIXVuTqLRUjw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: chalk: 4.1.2 diff-sequences: 29.6.3 @@ -1539,17 +2126,23 @@ packages: pretty-format: 29.7.0 /jest-docblock@29.7.0: - resolution: {integrity: sha512-q617Auw3A612guyaFgsbFeYpNP5t2aoUNLwBUbc/0kD1R4t9ixDbyFTHd1nok4epoVFpr7PmeWHrhvuV3XaJ4g==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-q617Auw3A612guyaFgsbFeYpNP5t2aoUNLwBUbc/0kD1R4t9ixDbyFTHd1nok4epoVFpr7PmeWHrhvuV3XaJ4g==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: detect-newline: 3.1.0 dev: true /jest-each@29.7.0: - resolution: {integrity: sha512-gns+Er14+ZrEoC5fhOfYCY1LOHHr0TI+rQUHZS8Ttw2l7gl+80eHc/gFf2Ktkw0+SIACDTeWvpFcv3B04VembQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-gns+Er14+ZrEoC5fhOfYCY1LOHHr0TI+rQUHZS8Ttw2l7gl+80eHc/gFf2Ktkw0+SIACDTeWvpFcv3B04VembQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: - '@jest/types': 29.6.3 + "@jest/types": 29.6.3 chalk: 4.1.2 jest-get-type: 29.6.3 jest-util: 29.7.0 @@ -1557,28 +2150,37 @@ packages: dev: true /jest-environment-node@29.7.0: - resolution: {integrity: sha512-DOSwCRqXirTOyheM+4d5YZOrWcdu0LNZ87ewUoywbcb2XR4wKgqiG8vNeYwhjFMbEkfju7wx2GYH0P2gevGvFw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/environment': 29.7.0 - '@jest/fake-timers': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + resolution: + { + integrity: sha512-DOSwCRqXirTOyheM+4d5YZOrWcdu0LNZ87ewUoywbcb2XR4wKgqiG8vNeYwhjFMbEkfju7wx2GYH0P2gevGvFw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/environment": 29.7.0 + "@jest/fake-timers": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 jest-mock: 29.7.0 jest-util: 29.7.0 dev: true /jest-get-type@29.6.3: - resolution: {integrity: sha512-zrteXnqYxfQh7l5FHyL38jL39di8H8rHoecLH3JNxH3BwOrBsNeabdap5e0I23lD4HHI8W5VFBZqG4Eaq5LNcw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-zrteXnqYxfQh7l5FHyL38jL39di8H8rHoecLH3JNxH3BwOrBsNeabdap5e0I23lD4HHI8W5VFBZqG4Eaq5LNcw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } /jest-haste-map@29.7.0: - resolution: {integrity: sha512-fP8u2pyfqx0K1rGn1R9pyE0/KTn+G7PxktWidOBTqFPLYX0b9ksaMFkhK5vrS3DVun09pckLdlx90QthlW7AmA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/types': 29.6.3 - '@types/graceful-fs': 4.1.9 - '@types/node': 20.16.14 + resolution: + { + integrity: sha512-fP8u2pyfqx0K1rGn1R9pyE0/KTn+G7PxktWidOBTqFPLYX0b9ksaMFkhK5vrS3DVun09pckLdlx90QthlW7AmA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/types": 29.6.3 + "@types/graceful-fs": 4.1.9 + "@types/node": 20.16.14 anymatch: 3.1.3 fb-watchman: 2.0.2 graceful-fs: 4.2.11 @@ -1591,16 +2193,22 @@ packages: fsevents: 2.3.3 /jest-leak-detector@29.7.0: - resolution: {integrity: sha512-kYA8IJcSYtST2BY9I+SMC32nDpBT3J2NvWJx8+JCuCdl/CR1I4EKUJROiP8XtCcxqgTTBGJNdbB1A8XRKbTetw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-kYA8IJcSYtST2BY9I+SMC32nDpBT3J2NvWJx8+JCuCdl/CR1I4EKUJROiP8XtCcxqgTTBGJNdbB1A8XRKbTetw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: jest-get-type: 29.6.3 pretty-format: 29.7.0 dev: true /jest-matcher-utils@29.7.0: - resolution: {integrity: sha512-sBkD+Xi9DtcChsI3L3u0+N0opgPYnCRPtGcQYrgXmR+hmt/fYfWAL0xRXYU8eWOdfuLgBe0YCW3AFtnRLagq/g==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-sBkD+Xi9DtcChsI3L3u0+N0opgPYnCRPtGcQYrgXmR+hmt/fYfWAL0xRXYU8eWOdfuLgBe0YCW3AFtnRLagq/g==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: chalk: 4.1.2 jest-diff: 29.7.0 @@ -1608,12 +2216,15 @@ packages: pretty-format: 29.7.0 /jest-message-util@29.7.0: - resolution: {integrity: sha512-GBEV4GRADeP+qtB2+6u61stea8mGcOT4mCtrYISZwfu9/ISHFJ/5zOMXYbpBE9RsS5+Gb63DW4FgmnKJ79Kf6w==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@babel/code-frame': 7.25.9 - '@jest/types': 29.6.3 - '@types/stack-utils': 2.0.3 + resolution: + { + integrity: sha512-GBEV4GRADeP+qtB2+6u61stea8mGcOT4mCtrYISZwfu9/ISHFJ/5zOMXYbpBE9RsS5+Gb63DW4FgmnKJ79Kf6w==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@babel/code-frame": 7.25.9 + "@jest/types": 29.6.3 + "@types/stack-utils": 2.0.3 chalk: 4.1.2 graceful-fs: 4.2.11 micromatch: 4.0.8 @@ -1622,18 +2233,24 @@ packages: stack-utils: 2.0.6 /jest-mock@29.7.0: - resolution: {integrity: sha512-ITOMZn+UkYS4ZFh83xYAOzWStloNzJFO2s8DWrE4lhtGD+AorgnbkiKERe4wQVBydIGPx059g6riW5Btp6Llnw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + resolution: + { + integrity: sha512-ITOMZn+UkYS4ZFh83xYAOzWStloNzJFO2s8DWrE4lhtGD+AorgnbkiKERe4wQVBydIGPx059g6riW5Btp6Llnw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/types": 29.6.3 + "@types/node": 20.16.14 jest-util: 29.7.0 /jest-pnp-resolver@1.2.3(jest-resolve@29.7.0): - resolution: {integrity: sha512-+3NpwQEnRoIBtx4fyhblQDPgJI0H1IEIkX7ShLUjPGA7TtUTvI1oiKi3SR4oBR0hQhQR80l4WAe5RrXBwWMA8w==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-+3NpwQEnRoIBtx4fyhblQDPgJI0H1IEIkX7ShLUjPGA7TtUTvI1oiKi3SR4oBR0hQhQR80l4WAe5RrXBwWMA8w==, + } + engines: { node: ">=6" } peerDependencies: - jest-resolve: '*' + jest-resolve: "*" peerDependenciesMeta: jest-resolve: optional: true @@ -1642,12 +2259,18 @@ packages: dev: true /jest-regex-util@29.6.3: - resolution: {integrity: sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } /jest-resolve-dependencies@29.7.0: - resolution: {integrity: sha512-un0zD/6qxJ+S0et7WxeI3H5XSe9lTBBR7bOHCHXkKR6luG5mwDDlIzVQ0V5cZCuoTgEdcdwzTghYkTWfubi+nA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-un0zD/6qxJ+S0et7WxeI3H5XSe9lTBBR7bOHCHXkKR6luG5mwDDlIzVQ0V5cZCuoTgEdcdwzTghYkTWfubi+nA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: jest-regex-util: 29.6.3 jest-snapshot: 29.7.0 @@ -1656,8 +2279,11 @@ packages: dev: true /jest-resolve@29.7.0: - resolution: {integrity: sha512-IOVhZSrg+UvVAshDSDtHyFCCBUl/Q3AAJv8iZ6ZjnZ74xzvwuzLXid9IIIPgTnY62SJjfuupMKZsZQRsCvxEgA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-IOVhZSrg+UvVAshDSDtHyFCCBUl/Q3AAJv8iZ6ZjnZ74xzvwuzLXid9IIIPgTnY62SJjfuupMKZsZQRsCvxEgA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: chalk: 4.1.2 graceful-fs: 4.2.11 @@ -1671,15 +2297,18 @@ packages: dev: true /jest-runner@29.7.0: - resolution: {integrity: sha512-fsc4N6cPCAahybGBfTRcq5wFR6fpLznMg47sY5aDpsoejOcVYFb07AHuSnR0liMcPTgBsA3ZJL6kFOjPdoNipQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/console': 29.7.0 - '@jest/environment': 29.7.0 - '@jest/test-result': 29.7.0 - '@jest/transform': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + resolution: + { + integrity: sha512-fsc4N6cPCAahybGBfTRcq5wFR6fpLznMg47sY5aDpsoejOcVYFb07AHuSnR0liMcPTgBsA3ZJL6kFOjPdoNipQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/console": 29.7.0 + "@jest/environment": 29.7.0 + "@jest/test-result": 29.7.0 + "@jest/transform": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 chalk: 4.1.2 emittery: 0.13.1 graceful-fs: 4.2.11 @@ -1700,17 +2329,20 @@ packages: dev: true /jest-runtime@29.7.0: - resolution: {integrity: sha512-gUnLjgwdGqW7B4LvOIkbKs9WGbn+QLqRQQ9juC6HndeDiezIwhDP+mhMwHWCEcfQ5RUXa6OPnFF8BJh5xegwwQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/environment': 29.7.0 - '@jest/fake-timers': 29.7.0 - '@jest/globals': 29.7.0 - '@jest/source-map': 29.6.3 - '@jest/test-result': 29.7.0 - '@jest/transform': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + resolution: + { + integrity: sha512-gUnLjgwdGqW7B4LvOIkbKs9WGbn+QLqRQQ9juC6HndeDiezIwhDP+mhMwHWCEcfQ5RUXa6OPnFF8BJh5xegwwQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/environment": 29.7.0 + "@jest/fake-timers": 29.7.0 + "@jest/globals": 29.7.0 + "@jest/source-map": 29.6.3 + "@jest/test-result": 29.7.0 + "@jest/transform": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 chalk: 4.1.2 cjs-module-lexer: 1.4.1 collect-v8-coverage: 1.0.2 @@ -1730,17 +2362,20 @@ packages: dev: true /jest-snapshot@29.7.0: - resolution: {integrity: sha512-Rm0BMWtxBcioHr1/OX5YCP8Uov4riHvKPknOGs804Zg9JGZgmIBkbtlxJC/7Z4msKYVbIJtfU+tKb8xlYNfdkw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@babel/core': 7.25.9 - '@babel/generator': 7.25.9 - '@babel/plugin-syntax-jsx': 7.25.9(@babel/core@7.25.9) - '@babel/plugin-syntax-typescript': 7.25.9(@babel/core@7.25.9) - '@babel/types': 7.25.9 - '@jest/expect-utils': 29.7.0 - '@jest/transform': 29.7.0 - '@jest/types': 29.6.3 + resolution: + { + integrity: sha512-Rm0BMWtxBcioHr1/OX5YCP8Uov4riHvKPknOGs804Zg9JGZgmIBkbtlxJC/7Z4msKYVbIJtfU+tKb8xlYNfdkw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@babel/core": 7.25.9 + "@babel/generator": 7.25.9 + "@babel/plugin-syntax-jsx": 7.25.9(@babel/core@7.25.9) + "@babel/plugin-syntax-typescript": 7.25.9(@babel/core@7.25.9) + "@babel/types": 7.25.9 + "@jest/expect-utils": 29.7.0 + "@jest/transform": 29.7.0 + "@jest/types": 29.6.3 babel-preset-current-node-syntax: 1.1.0(@babel/core@7.25.9) chalk: 4.1.2 expect: 29.7.0 @@ -1757,21 +2392,27 @@ packages: - supports-color /jest-util@29.7.0: - resolution: {integrity: sha512-z6EbKajIpqGKU56y5KBUgy1dt1ihhQJgWzUlZHArA/+X2ad7Cb5iF+AK1EWVL/Bo7Rz9uurpqw6SiBCefUbCGA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + resolution: + { + integrity: sha512-z6EbKajIpqGKU56y5KBUgy1dt1ihhQJgWzUlZHArA/+X2ad7Cb5iF+AK1EWVL/Bo7Rz9uurpqw6SiBCefUbCGA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/types": 29.6.3 + "@types/node": 20.16.14 chalk: 4.1.2 ci-info: 3.9.0 graceful-fs: 4.2.11 picomatch: 2.3.1 /jest-validate@29.7.0: - resolution: {integrity: sha512-ZB7wHqaRGVw/9hST/OuFUReG7M8vKeq0/J2egIGLdvjHCmYqGARhzXmtgi+gVeZ5uXFF219aOc3Ls2yLg27tkw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-ZB7wHqaRGVw/9hST/OuFUReG7M8vKeq0/J2egIGLdvjHCmYqGARhzXmtgi+gVeZ5uXFF219aOc3Ls2yLg27tkw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: - '@jest/types': 29.6.3 + "@jest/types": 29.6.3 camelcase: 6.3.0 chalk: 4.1.2 jest-get-type: 29.6.3 @@ -1780,12 +2421,15 @@ packages: dev: true /jest-watcher@29.7.0: - resolution: {integrity: sha512-49Fg7WXkU3Vl2h6LbLtMQ/HyB6rXSIX7SqvBLQmssRBGN9I0PNvPmAmCWSOY6SOvrjhI/F7/bGAv9RtnsPA03g==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - dependencies: - '@jest/test-result': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + resolution: + { + integrity: sha512-49Fg7WXkU3Vl2h6LbLtMQ/HyB6rXSIX7SqvBLQmssRBGN9I0PNvPmAmCWSOY6SOvrjhI/F7/bGAv9RtnsPA03g==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + dependencies: + "@jest/test-result": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 ansi-escapes: 4.3.2 chalk: 4.1.2 emittery: 0.13.1 @@ -1794,17 +2438,23 @@ packages: dev: true /jest-worker@29.7.0: - resolution: {integrity: sha512-eIz2msL/EzL9UFTFFx7jBTkeZfku0yUAyZZZmJ93H2TYEiroIx2PQjEXcwYtYl8zXCxb+PAmA2hLIt/6ZEkPHw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-eIz2msL/EzL9UFTFFx7jBTkeZfku0yUAyZZZmJ93H2TYEiroIx2PQjEXcwYtYl8zXCxb+PAmA2hLIt/6ZEkPHw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: - '@types/node': 20.16.14 + "@types/node": 20.16.14 jest-util: 29.7.0 merge-stream: 2.0.0 supports-color: 8.1.1 /jest@29.7.0(@types/node@20.16.14)(ts-node@10.9.2): - resolution: {integrity: sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } hasBin: true peerDependencies: node-notifier: ^8.0.1 || ^9.0.0 || ^10.0.0 @@ -1812,237 +2462,369 @@ packages: node-notifier: optional: true dependencies: - '@jest/core': 29.7.0(ts-node@10.9.2) - '@jest/types': 29.6.3 + "@jest/core": 29.7.0(ts-node@10.9.2) + "@jest/types": 29.6.3 import-local: 3.2.0 jest-cli: 29.7.0(@types/node@20.16.14)(ts-node@10.9.2) transitivePeerDependencies: - - '@types/node' + - "@types/node" - babel-plugin-macros - supports-color - ts-node dev: true /js-tokens@4.0.0: - resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==} + resolution: + { + integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==, + } /js-yaml@3.14.1: - resolution: {integrity: sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==} + resolution: + { + integrity: sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==, + } hasBin: true dependencies: argparse: 1.0.10 esprima: 4.0.1 /jsesc@3.0.2: - resolution: {integrity: sha512-xKqzzWXDttJuOcawBt4KnKHHIf5oQ/Cxax+0PWFG+DFDgHNAdi+TXECADI+RYiFUMmx8792xsMbbgXj4CwnP4g==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-xKqzzWXDttJuOcawBt4KnKHHIf5oQ/Cxax+0PWFG+DFDgHNAdi+TXECADI+RYiFUMmx8792xsMbbgXj4CwnP4g==, + } + engines: { node: ">=6" } hasBin: true /json-parse-even-better-errors@2.3.1: - resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==} + resolution: + { + integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==, + } dev: true /json5@2.2.3: - resolution: {integrity: sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==, + } + engines: { node: ">=6" } hasBin: true /kleur@3.0.3: - resolution: {integrity: sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==, + } + engines: { node: ">=6" } dev: true /leven@3.1.0: - resolution: {integrity: sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==, + } + engines: { node: ">=6" } dev: true /lines-and-columns@1.2.4: - resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==} + resolution: + { + integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==, + } dev: true /locate-path@5.0.0: - resolution: {integrity: sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==, + } + engines: { node: ">=8" } dependencies: p-locate: 4.1.0 /lodash.memoize@4.1.2: - resolution: {integrity: sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==} + resolution: + { + integrity: sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==, + } dev: true /lru-cache@5.1.1: - resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==} + resolution: + { + integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==, + } dependencies: yallist: 3.1.1 /make-dir@4.0.0: - resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==, + } + engines: { node: ">=10" } dependencies: semver: 7.6.3 dev: true /make-error@1.3.6: - resolution: {integrity: sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==} + resolution: + { + integrity: sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==, + } dev: true /makeerror@1.0.12: - resolution: {integrity: sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==} + resolution: + { + integrity: sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==, + } dependencies: tmpl: 1.0.5 /merge-stream@2.0.0: - resolution: {integrity: sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==} + resolution: + { + integrity: sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==, + } /micromatch@4.0.8: - resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==} - engines: {node: '>=8.6'} + resolution: + { + integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==, + } + engines: { node: ">=8.6" } dependencies: braces: 3.0.3 picomatch: 2.3.1 /mime-db@1.52.0: - resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==} - engines: {node: '>= 0.6'} + resolution: + { + integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==, + } + engines: { node: ">= 0.6" } dev: false /mime-types@2.1.35: - resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==} - engines: {node: '>= 0.6'} + resolution: + { + integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==, + } + engines: { node: ">= 0.6" } dependencies: mime-db: 1.52.0 dev: false /mimic-fn@2.1.0: - resolution: {integrity: sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==, + } + engines: { node: ">=6" } dev: true /minimatch@3.1.2: - resolution: {integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==} + resolution: + { + integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==, + } dependencies: brace-expansion: 1.1.11 /minimatch@5.1.6: - resolution: {integrity: sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==, + } + engines: { node: ">=10" } dependencies: brace-expansion: 2.0.1 dev: true /ms@2.1.3: - resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + resolution: + { + integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==, + } /nanoid@3.3.7: - resolution: {integrity: sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==} - engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} + resolution: + { + integrity: sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==, + } + engines: { node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1 } hasBin: true dev: false /natural-compare@1.4.0: - resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} + resolution: + { + integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==, + } /node-int64@0.4.0: - resolution: {integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==} + resolution: + { + integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==, + } /node-releases@2.0.18: - resolution: {integrity: sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g==} + resolution: + { + integrity: sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g==, + } /normalize-path@3.0.0: - resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==, + } + engines: { node: ">=0.10.0" } /npm-run-path@4.0.1: - resolution: {integrity: sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==, + } + engines: { node: ">=8" } dependencies: path-key: 3.1.1 dev: true /once@1.4.0: - resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} + resolution: + { + integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==, + } dependencies: wrappy: 1.0.2 /onetime@5.1.2: - resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==, + } + engines: { node: ">=6" } dependencies: mimic-fn: 2.1.0 dev: true /p-limit@2.3.0: - resolution: {integrity: sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==, + } + engines: { node: ">=6" } dependencies: p-try: 2.2.0 /p-limit@3.1.0: - resolution: {integrity: sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==, + } + engines: { node: ">=10" } dependencies: yocto-queue: 0.1.0 dev: true /p-locate@4.1.0: - resolution: {integrity: sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==, + } + engines: { node: ">=8" } dependencies: p-limit: 2.3.0 /p-try@2.2.0: - resolution: {integrity: sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==, + } + engines: { node: ">=6" } /parse-json@5.2.0: - resolution: {integrity: sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==, + } + engines: { node: ">=8" } dependencies: - '@babel/code-frame': 7.25.9 + "@babel/code-frame": 7.25.9 error-ex: 1.3.2 json-parse-even-better-errors: 2.3.1 lines-and-columns: 1.2.4 dev: true /path-exists@4.0.0: - resolution: {integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==, + } + engines: { node: ">=8" } /path-is-absolute@1.0.1: - resolution: {integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==, + } + engines: { node: ">=0.10.0" } /path-key@3.1.1: - resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==, + } + engines: { node: ">=8" } dev: true /path-parse@1.0.7: - resolution: {integrity: sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==} + resolution: + { + integrity: sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==, + } dev: true /picocolors@1.1.1: - resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} + resolution: + { + integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==, + } /picomatch@2.3.1: - resolution: {integrity: sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==} - engines: {node: '>=8.6'} + resolution: + { + integrity: sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==, + } + engines: { node: ">=8.6" } /pirates@4.0.6: - resolution: {integrity: sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg==} - engines: {node: '>= 6'} + resolution: + { + integrity: sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg==, + } + engines: { node: ">= 6" } /pkg-dir@4.2.0: - resolution: {integrity: sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==, + } + engines: { node: ">=8" } dependencies: find-up: 4.1.0 dev: true /postcss@8.4.47: - resolution: {integrity: sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==} - engines: {node: ^10 || ^12 || >=14} + resolution: + { + integrity: sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==, + } + engines: { node: ^10 || ^12 || >=14 } dependencies: nanoid: 3.3.7 picocolors: 1.1.1 @@ -2050,7 +2832,10 @@ packages: dev: false /posthog-js@1.174.3: - resolution: {integrity: sha512-fRLncd3jkT9Y7gLiyQe8v8sJ9yuTIiQBBWcYQ8l+vv+m504LWFtxl+/JZtHXPhaG3Eyf7AzZ/Kafkw8jorWV9w==} + resolution: + { + integrity: sha512-fRLncd3jkT9Y7gLiyQe8v8sJ9yuTIiQBBWcYQ8l+vv+m504LWFtxl+/JZtHXPhaG3Eyf7AzZ/Kafkw8jorWV9w==, + } dependencies: core-js: 3.38.1 fflate: 0.4.8 @@ -2059,8 +2844,11 @@ packages: dev: false /posthog-node@4.2.1: - resolution: {integrity: sha512-l+fsjYEkTik3m/G0pE7gMr4qBJP84LhK779oQm6MBzhBGpd4By4qieTW+4FUAlNCyzQTynn3Nhsa50c0IELSxQ==} - engines: {node: '>=15.0.0'} + resolution: + { + integrity: sha512-l+fsjYEkTik3m/G0pE7gMr4qBJP84LhK779oQm6MBzhBGpd4By4qieTW+4FUAlNCyzQTynn3Nhsa50c0IELSxQ==, + } + engines: { node: ">=15.0.0" } dependencies: axios: 1.7.7 rusha: 0.8.14 @@ -2069,65 +2857,101 @@ packages: dev: false /preact@10.24.3: - resolution: {integrity: sha512-Z2dPnBnMUfyQfSQ+GBdsGa16hz35YmLmtTLhM169uW944hYL6xzTYkJjC07j+Wosz733pMWx0fgON3JNw1jJQA==} + resolution: + { + integrity: sha512-Z2dPnBnMUfyQfSQ+GBdsGa16hz35YmLmtTLhM169uW944hYL6xzTYkJjC07j+Wosz733pMWx0fgON3JNw1jJQA==, + } dev: false /prettier@3.3.3: - resolution: {integrity: sha512-i2tDNA0O5IrMO757lfrdQZCc2jPNDVntV0m/+4whiDfWaTKfMNgR7Qz0NAeGz/nRqF4m5/6CLzbP4/liHt12Ew==} - engines: {node: '>=14'} + resolution: + { + integrity: sha512-i2tDNA0O5IrMO757lfrdQZCc2jPNDVntV0m/+4whiDfWaTKfMNgR7Qz0NAeGz/nRqF4m5/6CLzbP4/liHt12Ew==, + } + engines: { node: ">=14" } hasBin: true dev: true /pretty-format@29.7.0: - resolution: {integrity: sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } dependencies: - '@jest/schemas': 29.6.3 + "@jest/schemas": 29.6.3 ansi-styles: 5.2.0 react-is: 18.3.1 /prompts@2.4.2: - resolution: {integrity: sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==} - engines: {node: '>= 6'} + resolution: + { + integrity: sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==, + } + engines: { node: ">= 6" } dependencies: kleur: 3.0.3 sisteransi: 1.0.5 dev: true /proxy-from-env@1.1.0: - resolution: {integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==} + resolution: + { + integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==, + } dev: false /pure-rand@6.1.0: - resolution: {integrity: sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==} + resolution: + { + integrity: sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==, + } dev: true /react-is@18.3.1: - resolution: {integrity: sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==} + resolution: + { + integrity: sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==, + } /require-directory@2.1.1: - resolution: {integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==, + } + engines: { node: ">=0.10.0" } dev: true /resolve-cwd@3.0.0: - resolution: {integrity: sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==, + } + engines: { node: ">=8" } dependencies: resolve-from: 5.0.0 dev: true /resolve-from@5.0.0: - resolution: {integrity: sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==, + } + engines: { node: ">=8" } /resolve.exports@2.0.2: - resolution: {integrity: sha512-X2UW6Nw3n/aMgDVy+0rSqgHlv39WZAlZrXCdnbyEiKm17DSqHX4MmQMaST3FbeWR5FTuRcUwYAziZajji0Y7mg==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-X2UW6Nw3n/aMgDVy+0rSqgHlv39WZAlZrXCdnbyEiKm17DSqHX4MmQMaST3FbeWR5FTuRcUwYAziZajji0Y7mg==, + } + engines: { node: ">=10" } dev: true /resolve@1.22.8: - resolution: {integrity: sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==} + resolution: + { + integrity: sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==, + } hasBin: true dependencies: is-core-module: 2.15.1 @@ -2136,88 +2960,139 @@ packages: dev: true /rrweb-snapshot@2.0.0-alpha.17: - resolution: {integrity: sha512-GBg5pV8LHOTbeVmH2VHLEFR0mc2QpQMzAvcoxEGfPNWgWHc8UvKCyq7pqN1vA+fDZ+yXXbixeO0kB2pzVvFCBw==} + resolution: + { + integrity: sha512-GBg5pV8LHOTbeVmH2VHLEFR0mc2QpQMzAvcoxEGfPNWgWHc8UvKCyq7pqN1vA+fDZ+yXXbixeO0kB2pzVvFCBw==, + } dependencies: postcss: 8.4.47 dev: false /rrweb-snapshot@2.0.0-alpha.4: - resolution: {integrity: sha512-KQ2OtPpXO5jLYqg1OnXS/Hf+EzqnZyP5A+XPqBCjYpj3XIje/Od4gdUwjbFo3cVuWq5Cw5Y1d3/xwgIS7/XpQQ==} + resolution: + { + integrity: sha512-KQ2OtPpXO5jLYqg1OnXS/Hf+EzqnZyP5A+XPqBCjYpj3XIje/Od4gdUwjbFo3cVuWq5Cw5Y1d3/xwgIS7/XpQQ==, + } dev: false /rusha@0.8.14: - resolution: {integrity: sha512-cLgakCUf6PedEu15t8kbsjnwIFFR2D4RfL+W3iWFJ4iac7z4B0ZI8fxy4R3J956kAI68HclCFGL8MPoUVC3qVA==} + resolution: + { + integrity: sha512-cLgakCUf6PedEu15t8kbsjnwIFFR2D4RfL+W3iWFJ4iac7z4B0ZI8fxy4R3J956kAI68HclCFGL8MPoUVC3qVA==, + } dev: false /semver@6.3.1: - resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==} + resolution: + { + integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==, + } hasBin: true /semver@7.6.3: - resolution: {integrity: sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==, + } + engines: { node: ">=10" } hasBin: true /shebang-command@2.0.0: - resolution: {integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==, + } + engines: { node: ">=8" } dependencies: shebang-regex: 3.0.0 dev: true /shebang-regex@3.0.0: - resolution: {integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==, + } + engines: { node: ">=8" } dev: true /signal-exit@3.0.7: - resolution: {integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==} + resolution: + { + integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==, + } /sisteransi@1.0.5: - resolution: {integrity: sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==} + resolution: + { + integrity: sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==, + } dev: true /slash@3.0.0: - resolution: {integrity: sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==, + } + engines: { node: ">=8" } /source-map-js@1.2.1: - resolution: {integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==, + } + engines: { node: ">=0.10.0" } dev: false /source-map-support@0.5.13: - resolution: {integrity: sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==} + resolution: + { + integrity: sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==, + } dependencies: buffer-from: 1.1.2 source-map: 0.6.1 dev: true /source-map@0.6.1: - resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==, + } + engines: { node: ">=0.10.0" } dev: true /sprintf-js@1.0.3: - resolution: {integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==} + resolution: + { + integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==, + } /stack-utils@2.0.6: - resolution: {integrity: sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==, + } + engines: { node: ">=10" } dependencies: escape-string-regexp: 2.0.0 /string-length@4.0.2: - resolution: {integrity: sha512-+l6rNN5fYHNhZZy41RXsYptCjA2Igmq4EG7kZAYFQI1E1VTXarr6ZPXBg6eq7Y6eK4FEhY6AJlyuFIb/v/S0VQ==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-+l6rNN5fYHNhZZy41RXsYptCjA2Igmq4EG7kZAYFQI1E1VTXarr6ZPXBg6eq7Y6eK4FEhY6AJlyuFIb/v/S0VQ==, + } + engines: { node: ">=10" } dependencies: char-regex: 1.0.2 strip-ansi: 6.0.1 dev: true /string-width@4.2.3: - resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==, + } + engines: { node: ">=8" } dependencies: emoji-regex: 8.0.0 is-fullwidth-code-point: 3.0.0 @@ -2225,92 +3100,128 @@ packages: dev: true /strip-ansi@6.0.1: - resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==, + } + engines: { node: ">=8" } dependencies: ansi-regex: 5.0.1 dev: true /strip-bom@4.0.0: - resolution: {integrity: sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==, + } + engines: { node: ">=8" } dev: true /strip-final-newline@2.0.0: - resolution: {integrity: sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==, + } + engines: { node: ">=6" } dev: true /strip-json-comments@3.1.1: - resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==, + } + engines: { node: ">=8" } dev: true /supports-color@5.5.0: - resolution: {integrity: sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==, + } + engines: { node: ">=4" } dependencies: has-flag: 3.0.0 /supports-color@7.2.0: - resolution: {integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==, + } + engines: { node: ">=8" } dependencies: has-flag: 4.0.0 /supports-color@8.1.1: - resolution: {integrity: sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==, + } + engines: { node: ">=10" } dependencies: has-flag: 4.0.0 /supports-preserve-symlinks-flag@1.0.0: - resolution: {integrity: sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==} - engines: {node: '>= 0.4'} + resolution: + { + integrity: sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==, + } + engines: { node: ">= 0.4" } dev: true /test-exclude@6.0.0: - resolution: {integrity: sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==, + } + engines: { node: ">=8" } dependencies: - '@istanbuljs/schema': 0.1.3 + "@istanbuljs/schema": 0.1.3 glob: 7.2.3 minimatch: 3.1.2 /tmpl@1.0.5: - resolution: {integrity: sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==} + resolution: + { + integrity: sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==, + } /to-regex-range@5.0.1: - resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==} - engines: {node: '>=8.0'} + resolution: + { + integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==, + } + engines: { node: ">=8.0" } dependencies: is-number: 7.0.0 /ts-jest@29.2.5(@babel/core@7.25.9)(jest@29.7.0)(typescript@5.6.3): - resolution: {integrity: sha512-KD8zB2aAZrcKIdGk4OwpJggeLcH1FgrICqDSROWqlnJXGCXK4Mn6FcdK2B6670Xr73lHMG1kHw8R87A0ecZ+vA==} - engines: {node: ^14.15.0 || ^16.10.0 || ^18.0.0 || >=20.0.0} + resolution: + { + integrity: sha512-KD8zB2aAZrcKIdGk4OwpJggeLcH1FgrICqDSROWqlnJXGCXK4Mn6FcdK2B6670Xr73lHMG1kHw8R87A0ecZ+vA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || ^18.0.0 || >=20.0.0 } hasBin: true peerDependencies: - '@babel/core': '>=7.0.0-beta.0 <8' - '@jest/transform': ^29.0.0 - '@jest/types': ^29.0.0 + "@babel/core": ">=7.0.0-beta.0 <8" + "@jest/transform": ^29.0.0 + "@jest/types": ^29.0.0 babel-jest: ^29.0.0 - esbuild: '*' + esbuild: "*" jest: ^29.0.0 - typescript: '>=4.3 <6' + typescript: ">=4.3 <6" peerDependenciesMeta: - '@babel/core': + "@babel/core": optional: true - '@jest/transform': + "@jest/transform": optional: true - '@jest/types': + "@jest/types": optional: true babel-jest: optional: true esbuild: optional: true dependencies: - '@babel/core': 7.25.9 + "@babel/core": 7.25.9 bs-logger: 0.2.6 ejs: 3.1.10 fast-json-stable-stringify: 2.1.0 @@ -2325,25 +3236,28 @@ packages: dev: true /ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3): - resolution: {integrity: sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==} + resolution: + { + integrity: sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==, + } hasBin: true peerDependencies: - '@swc/core': '>=1.2.50' - '@swc/wasm': '>=1.2.50' - '@types/node': '*' - typescript: '>=2.7' + "@swc/core": ">=1.2.50" + "@swc/wasm": ">=1.2.50" + "@types/node": "*" + typescript: ">=2.7" peerDependenciesMeta: - '@swc/core': + "@swc/core": optional: true - '@swc/wasm': + "@swc/wasm": optional: true dependencies: - '@cspotcode/source-map-support': 0.8.1 - '@tsconfig/node10': 1.0.11 - '@tsconfig/node12': 1.0.11 - '@tsconfig/node14': 1.0.3 - '@tsconfig/node16': 1.0.4 - '@types/node': 20.16.14 + "@cspotcode/source-map-support": 0.8.1 + "@tsconfig/node10": 1.0.11 + "@tsconfig/node12": 1.0.11 + "@tsconfig/node14": 1.0.3 + "@tsconfig/node16": 1.0.4 + "@types/node": 20.16.14 acorn: 8.13.0 acorn-walk: 8.3.4 arg: 4.1.3 @@ -2356,71 +3270,107 @@ packages: dev: true /type-detect@4.0.8: - resolution: {integrity: sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==, + } + engines: { node: ">=4" } /type-fest@0.21.3: - resolution: {integrity: sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==, + } + engines: { node: ">=10" } dev: true /typescript@5.6.3: - resolution: {integrity: sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==} - engines: {node: '>=14.17'} + resolution: + { + integrity: sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==, + } + engines: { node: ">=14.17" } hasBin: true dev: true /undici-types@6.19.8: - resolution: {integrity: sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==} + resolution: + { + integrity: sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==, + } /update-browserslist-db@1.1.1(browserslist@4.24.2): - resolution: {integrity: sha512-R8UzCaa9Az+38REPiJ1tXlImTJXlVfgHZsglwBD/k6nj76ctsH1E3q4doGrukiLQd3sGQYu56r5+lo5r94l29A==} + resolution: + { + integrity: sha512-R8UzCaa9Az+38REPiJ1tXlImTJXlVfgHZsglwBD/k6nj76ctsH1E3q4doGrukiLQd3sGQYu56r5+lo5r94l29A==, + } hasBin: true peerDependencies: - browserslist: '>= 4.21.0' + browserslist: ">= 4.21.0" dependencies: browserslist: 4.24.2 escalade: 3.2.0 picocolors: 1.1.1 /uuid@10.0.0: - resolution: {integrity: sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==} + resolution: + { + integrity: sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==, + } hasBin: true dev: false /v8-compile-cache-lib@3.0.1: - resolution: {integrity: sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==} + resolution: + { + integrity: sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==, + } dev: true /v8-to-istanbul@9.3.0: - resolution: {integrity: sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA==} - engines: {node: '>=10.12.0'} - dependencies: - '@jridgewell/trace-mapping': 0.3.25 - '@types/istanbul-lib-coverage': 2.0.6 + resolution: + { + integrity: sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA==, + } + engines: { node: ">=10.12.0" } + dependencies: + "@jridgewell/trace-mapping": 0.3.25 + "@types/istanbul-lib-coverage": 2.0.6 convert-source-map: 2.0.0 dev: true /walker@1.0.8: - resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==} + resolution: + { + integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==, + } dependencies: makeerror: 1.0.12 /web-vitals@4.2.3: - resolution: {integrity: sha512-/CFAm1mNxSmOj6i0Co+iGFJ58OS4NRGVP+AWS/l509uIK5a1bSoIVaHz/ZumpHTfHSZBpgrJ+wjfpAOrTHok5Q==} + resolution: + { + integrity: sha512-/CFAm1mNxSmOj6i0Co+iGFJ58OS4NRGVP+AWS/l509uIK5a1bSoIVaHz/ZumpHTfHSZBpgrJ+wjfpAOrTHok5Q==, + } dev: false /which@2.0.2: - resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} - engines: {node: '>= 8'} + resolution: + { + integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==, + } + engines: { node: ">= 8" } hasBin: true dependencies: isexe: 2.0.0 dev: true /wrap-ansi@7.0.0: - resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==, + } + engines: { node: ">=10" } dependencies: ansi-styles: 4.3.0 string-width: 4.2.3 @@ -2428,31 +3378,49 @@ packages: dev: true /wrappy@1.0.2: - resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} + resolution: + { + integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==, + } /write-file-atomic@4.0.2: - resolution: {integrity: sha512-7KxauUdBmSdWnmpaGFg+ppNjKF8uNLry8LyzjauQDOVONfFLNKrKvQOxZ/VuTIcS/gge/YNahf5RIIQWTSarlg==} - engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} + resolution: + { + integrity: sha512-7KxauUdBmSdWnmpaGFg+ppNjKF8uNLry8LyzjauQDOVONfFLNKrKvQOxZ/VuTIcS/gge/YNahf5RIIQWTSarlg==, + } + engines: { node: ^12.13.0 || ^14.15.0 || >=16.0.0 } dependencies: imurmurhash: 0.1.4 signal-exit: 3.0.7 /y18n@5.0.8: - resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==, + } + engines: { node: ">=10" } dev: true /yallist@3.1.1: - resolution: {integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==} + resolution: + { + integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==, + } /yargs-parser@21.1.1: - resolution: {integrity: sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==} - engines: {node: '>=12'} + resolution: + { + integrity: sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==, + } + engines: { node: ">=12" } dev: true /yargs@17.7.2: - resolution: {integrity: sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==} - engines: {node: '>=12'} + resolution: + { + integrity: sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==, + } + engines: { node: ">=12" } dependencies: cliui: 8.0.1 escalade: 3.2.0 @@ -2464,11 +3432,17 @@ packages: dev: true /yn@3.1.1: - resolution: {integrity: sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==, + } + engines: { node: ">=6" } dev: true /yocto-queue@0.1.0: - resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==, + } + engines: { node: ">=10" } dev: true diff --git a/js/sdk/src/r2rClient.ts b/js/sdk/src/r2rClient.ts index be67d7f3a..0b86dc345 100644 --- a/js/sdk/src/r2rClient.ts +++ b/js/sdk/src/r2rClient.ts @@ -545,7 +545,10 @@ export class r2rClient { ingestion_config: options.ingestion_config ? JSON.stringify(options.ingestion_config) : undefined, - run_with_orchestration: (options.run_with_orchestration != undefined) ? String(options.run_with_orchestration) : undefined, + run_with_orchestration: + options.run_with_orchestration != undefined + ? String(options.run_with_orchestration) + : undefined, }; Object.entries(data).forEach(([key, value]) => { @@ -624,7 +627,10 @@ export class r2rClient { ingestion_config: options.ingestion_config ? JSON.stringify(options.ingestion_config) : undefined, - run_with_orchestration: (options.run_with_orchestration != undefined) ? String(options.run_with_orchestration) : undefined, + run_with_orchestration: + options.run_with_orchestration != undefined + ? String(options.run_with_orchestration) + : undefined, }; Object.entries(data).forEach(([key, value]) => { @@ -661,10 +667,8 @@ export class r2rClient { chunks: chunks, document_id: documentId, metadata: metadata, + run_with_orchestration: run_with_orchestration, }; - if (run_with_orchestration != undefined) { - inputData.run_with_orchestration = run_with_orchestration; - } return await this._makeRequest("POST", "ingest_chunks", { data: inputData, @@ -1527,21 +1531,25 @@ export class r2rClient { /** * Performs a single turn in a conversation with a RAG agent. * @param messages The messages to send to the agent. + * @param rag_generation_config RAG generation configuration. * @param vector_search_settings Vector search settings. * @param kg_search_settings KG search settings. - * @param rag_generation_config RAG generation configuration. * @param task_prompt_override Task prompt override. * @param include_title_if_available Include title if available. + * @param conversation_id The ID of the conversation, if not a new conversation. + * @param branch_id The ID of the branch to use, if not a new branch. * @returns A promise that resolves to the response from the server. */ @feature("agent") async agent( messages: Message[], + rag_generation_config?: GenerationConfig | Record, vector_search_settings?: VectorSearchSettings | Record, kg_search_settings?: KGSearchSettings | Record, - rag_generation_config?: GenerationConfig | Record, task_prompt_override?: string, include_title_if_available?: boolean, + conversation_id?: string, + branch_id?: string, ): Promise> { this._ensureAuthenticated(); @@ -1552,6 +1560,8 @@ export class r2rClient { rag_generation_config, task_prompt_override, include_title_if_available, + conversation_id, + branch_id, }; Object.keys(json_data).forEach( diff --git a/py/cli/__init__.py b/py/cli/__init__.py index 11ca47716..bc5458115 100644 --- a/py/cli/__init__.py +++ b/py/cli/__init__.py @@ -1,5 +1,5 @@ from .command_group import cli as command_group_cli -from .commands import auth, ingestion, management, retrieval, server +from .commands import auth, database, ingestion, management, retrieval, server from .main import main __all__ = [ @@ -12,6 +12,7 @@ "ingestion", "management", "kg", + "database", "retrieval", "server", ] diff --git a/py/cli/command_group.py b/py/cli/command_group.py index 06afe0a7e..efa27d936 100644 --- a/py/cli/command_group.py +++ b/py/cli/command_group.py @@ -2,7 +2,7 @@ from asyncclick import pass_context from asyncclick.exceptions import Exit -from sdk.client import R2RClient +from r2r import R2RAsyncClient @click.group() @@ -13,7 +13,7 @@ async def cli(ctx, base_url): """R2R CLI for all core operations.""" - ctx.obj = R2RClient(base_url=base_url) + ctx.obj = R2RAsyncClient(base_url=base_url) # Override the default exit behavior def silent_exit(self, code=0): diff --git a/py/cli/commands/database.py b/py/cli/commands/database.py new file mode 100644 index 000000000..d324b2976 --- /dev/null +++ b/py/cli/commands/database.py @@ -0,0 +1,139 @@ +import sys + +import asyncclick as click + +from cli.command_group import cli + +from ..utils.database_utils import ( + check_database_connection, + get_database_url_from_env, + run_alembic_command, +) + + +@click.group() +def db(): + """Database management commands.""" + pass + + +@db.command() +@click.option( + "--schema", help="Schema name to operate on (defaults to R2R_PROJECT_NAME)" +) +async def history(schema): + """Show database migration history for a specific schema.""" + try: + db_url = get_database_url_from_env(False) + if not await check_database_connection(db_url): + click.secho( + "Database connection failed. Please check your environment variables.", + fg="red", + ) + sys.exit(1) + + result = await run_alembic_command("history", schema_name=schema) + if result != 0: + click.secho("Failed to get migration history.", fg="red") + sys.exit(1) + except Exception as e: + click.secho(f"Error getting migration history: {str(e)}", fg="red") + sys.exit(1) + + +@db.command() +@click.option( + "--schema", help="Schema name to operate on (defaults to R2R_PROJECT_NAME)" +) +async def current(schema): + """Show current database revision for a specific schema.""" + try: + db_url = get_database_url_from_env(False) + if not await check_database_connection(db_url): + click.secho( + "Database connection failed. Please check your environment variables.", + fg="red", + ) + sys.exit(1) + + result = await run_alembic_command("current", schema_name=schema) + if result != 0: + click.secho("Failed to get current revision.", fg="red") + sys.exit(1) + except Exception as e: + click.secho(f"Error getting current revision: {str(e)}", fg="red") + sys.exit(1) + + +@db.command() +@click.option( + "--schema", help="Schema name to operate on (defaults to R2R_PROJECT_NAME)" +) +@click.option("--revision", help="Upgrade to a specific revision") +async def upgrade(schema, revision): + """Upgrade database schema to the latest revision or a specific revision.""" + try: + db_url = get_database_url_from_env(False) + if not await check_database_connection(db_url): + click.secho( + "Database connection failed. Please check your environment variables.", + fg="red", + ) + sys.exit(1) + + click.echo( + f"Running database upgrade for schema {schema or 'default'}..." + ) + command = f"upgrade {revision}" if revision else "upgrade" + result = await run_alembic_command(command, schema_name=schema) + + if result == 0: + click.secho("Database upgrade completed successfully.", fg="green") + else: + click.secho("Database upgrade failed.", fg="red") + sys.exit(1) + + except Exception as e: + click.secho(f"Unexpected error: {str(e)}", fg="red") + sys.exit(1) + + +@db.command() +@click.option( + "--schema", help="Schema name to operate on (defaults to R2R_PROJECT_NAME)" +) +@click.option("--revision", help="Downgrade to a specific revision") +async def downgrade(schema, revision): + """Downgrade database schema to the previous revision or a specific revision.""" + if not revision: + if not click.confirm( + "No revision specified. This will downgrade the database by one revision. Continue?" + ): + return + + try: + db_url = get_database_url_from_env(log=False) + if not await check_database_connection(db_url): + click.secho( + "Database connection failed. Please check your environment variables.", + fg="red", + ) + sys.exit(1) + + click.echo( + f"Running database downgrade for schema {schema or 'default'}..." + ) + command = f"downgrade {revision}" if revision else "downgrade" + result = await run_alembic_command(command, schema_name=schema) + + if result == 0: + click.secho( + "Database downgrade completed successfully.", fg="green" + ) + else: + click.secho("Database downgrade failed.", fg="red") + sys.exit(1) + + except Exception as e: + click.secho(f"Unexpected error: {str(e)}", fg="red") + sys.exit(1) diff --git a/py/cli/commands/ingestion.py b/py/cli/commands/ingestion.py index b960a4256..9810f9ed8 100644 --- a/py/cli/commands/ingestion.py +++ b/py/cli/commands/ingestion.py @@ -11,9 +11,14 @@ from cli.command_group import cli from cli.utils.param_types import JSON from cli.utils.timer import timer +from shared.abstractions.vector import ( + IndexMeasure, + IndexMethod, + VectorTableName, +) -def ingest_files_from_urls(client, urls): +async def ingest_files_from_urls(client, urls): """Download and ingest files from given URLs.""" files_to_ingest = [] metadatas = [] @@ -45,7 +50,7 @@ def ingest_files_from_urls(client, urls): # TODO: use the utils function generate_document_id document_ids.append(uuid.uuid5(uuid.NAMESPACE_DNS, url)) - response = client.ingest_files( + response = await client.ingest_files( files_to_ingest, metadatas=metadatas, document_ids=document_ids ) @@ -70,7 +75,7 @@ def ingest_files_from_urls(client, urls): "--run-without-orchestration", is_flag=True, help="Run with orchestration" ) @pass_context -def ingest_files( +async def ingest_files( ctx, file_paths, document_ids, metadatas, run_without_orchestration ): """Ingest files into R2R.""" @@ -79,7 +84,7 @@ def ingest_files( file_paths = list(file_paths) document_ids = list(document_ids) if document_ids else None run_with_orchestration = not run_without_orchestration - response = client.ingest_files( + response = await client.ingest_files( file_paths, metadatas, document_ids, @@ -104,7 +109,7 @@ def ingest_files( "--run-without-orchestration", is_flag=True, help="Run with orchestration" ) @pass_context -def update_files( +async def update_files( ctx, file_paths, document_ids, metadatas, run_without_orchestration ): """Update existing files in R2R.""" @@ -124,7 +129,7 @@ def update_files( "Metadatas must be a JSON string representing a list of dictionaries or a single dictionary" ) run_with_orchestration = not run_without_orchestration - response = client.update_files( + response = await client.update_files( file_paths, document_ids, metadatas, @@ -138,13 +143,13 @@ def update_files( "--v2", is_flag=True, help="use aristotle_v2.txt (a smaller file)" ) @pass_context -def ingest_sample_file(ctx, v2=False): +async def ingest_sample_file(ctx, v2=False): """Ingest the first sample file into R2R.""" sample_file_url = f"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/aristotle{'_v2' if v2 else ''}.txt" client = ctx.obj with timer(): - response = ingest_files_from_urls(client, [sample_file_url]) + response = await ingest_files_from_urls(client, [sample_file_url]) click.echo( f"Sample file ingestion completed. Ingest files response:\n\n{response}" ) @@ -152,7 +157,7 @@ def ingest_sample_file(ctx, v2=False): @cli.command() @pass_context -def ingest_sample_files(ctx): +async def ingest_sample_files(ctx): """Ingest multiple sample files into R2R.""" client = ctx.obj urls = [ @@ -167,7 +172,7 @@ def ingest_sample_files(ctx): "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_2.html", ] with timer(): - response = ingest_files_from_urls(client, urls) + response = await ingest_files_from_urls(client, urls) click.echo( f"Sample files ingestion completed. Ingest files response:\n\n{response}" @@ -176,7 +181,7 @@ def ingest_sample_files(ctx): @cli.command() @pass_context -def ingest_sample_files_from_unstructured(ctx): +async def ingest_sample_files_from_unstructured(ctx): """Ingest multiple sample files from URLs into R2R.""" client = ctx.obj @@ -194,8 +199,107 @@ def ingest_sample_files_from_unstructured(ctx): file_paths = [os.path.join(folder, file) for file in os.listdir(folder)] with timer(): - response = client.ingest_files(file_paths) + response = await client.ingest_files(file_paths) click.echo( f"Sample files ingestion completed. Ingest files response:\n\n{response}" ) + + +@cli.command() +@click.option( + "--table-name", + type=click.Choice([t.value for t in VectorTableName]), + default=VectorTableName.VECTORS.value, + help="Table to create index on", +) +@click.option( + "--index-method", + type=click.Choice([m.value for m in IndexMethod]), + default=IndexMethod.hnsw.value, + help="Indexing method to use", +) +@click.option( + "--index-measure", + type=click.Choice([m.value for m in IndexMeasure]), + default=IndexMeasure.cosine_distance.value, + help="Distance measure to use", +) +@click.option( + "--index-arguments", + type=JSON, + help="Additional index arguments as JSON", +) +@click.option( + "--index-name", + help="Custom name for the index", +) +@click.option( + "--no-concurrent", + is_flag=True, + help="Disable concurrent index creation", +) +@pass_context +async def create_vector_index( + ctx, + table_name, + index_method, + index_measure, + index_arguments, + index_name, + no_concurrent, +): + """Create a vector index for similarity search.""" + client = ctx.obj + with timer(): + response = await client.create_vector_index( + table_name=table_name, + index_method=index_method, + index_measure=index_measure, + index_arguments=index_arguments, + index_name=index_name, + concurrently=not no_concurrent, + ) + click.echo(json.dumps(response, indent=2)) + + +@cli.command() +@click.option( + "--table-name", + type=click.Choice([t.value for t in VectorTableName]), + default=VectorTableName.VECTORS.value, + help="Table to list indices from", +) +@pass_context +async def list_vector_indices(ctx, table_name): + """List all vector indices for a table.""" + client = ctx.obj + with timer(): + response = await client.list_vector_indices(table_name=table_name) + click.echo(json.dumps(response, indent=2)) + + +@cli.command() +@click.argument("index-name", required=True) +@click.option( + "--table-name", + type=click.Choice([t.value for t in VectorTableName]), + default=VectorTableName.VECTORS.value, + help="Table containing the index", +) +@click.option( + "--no-concurrent", + is_flag=True, + help="Disable concurrent index deletion", +) +@pass_context +async def delete_vector_index(ctx, index_name, table_name, no_concurrent): + """Delete a vector index.""" + client = ctx.obj + with timer(): + response = await client.delete_vector_index( + index_name=index_name, + table_name=table_name, + concurrently=not no_concurrent, + ) + click.echo(json.dumps(response, indent=2)) diff --git a/py/cli/commands/kg.py b/py/cli/commands/kg.py index 0ca08827e..e0a1a2735 100644 --- a/py/cli/commands/kg.py +++ b/py/cli/commands/kg.py @@ -30,7 +30,7 @@ help="Force the graph creation process.", ) @pass_context -def create_graph( +async def create_graph( ctx, collection_id, run, kg_creation_settings, force_kg_creation ): client = ctx.obj @@ -52,7 +52,7 @@ def create_graph( kg_creation_settings = {"force_kg_creation": True} with timer(): - response = client.create_graph( + response = await client.create_graph( collection_id=collection_id, run_type=run_type, kg_creation_settings=kg_creation_settings, @@ -61,6 +61,60 @@ def create_graph( click.echo(json.dumps(response, indent=2)) +@cli.command() +@click.option( + "--collection-id", + required=False, + help="Collection ID to deduplicate entities for.", +) +@click.option( + "--run", + is_flag=True, + help="Run the deduplication process.", +) +@click.option( + "--force-deduplication", + is_flag=True, + help="Force the deduplication process.", +) +@click.option( + "--deduplication-settings", + required=False, + help="Settings for the deduplication process.", +) +@pass_context +async def deduplicate_entities( + ctx, collection_id, run, force_deduplication, deduplication_settings +): + """ + Deduplicate entities in the knowledge graph. + """ + client = ctx.obj + + if deduplication_settings: + try: + deduplication_settings = json.loads(deduplication_settings) + except json.JSONDecodeError: + click.echo( + "Error: deduplication-settings must be a valid JSON string" + ) + return + else: + deduplication_settings = {} + + run_type = "run" if run else "estimate" + + if force_deduplication: + deduplication_settings = {"force_deduplication": True} + + with timer(): + response = await client.deduplicate_entities( + collection_id, run_type, deduplication_settings + ) + + click.echo(json.dumps(response, indent=2)) + + @cli.command() @click.option( "--collection-id", @@ -84,7 +138,7 @@ def create_graph( help="Settings for the graph enrichment process.", ) @pass_context -def enrich_graph( +async def enrich_graph( ctx, collection_id, run, force_kg_enrichment, kg_enrichment_settings ): """ @@ -109,7 +163,7 @@ def enrich_graph( kg_enrichment_settings = {"force_kg_enrichment": True} with timer(): - response = client.enrich_graph( + response = await client.enrich_graph( collection_id, run_type, kg_enrichment_settings ) @@ -140,19 +194,22 @@ def enrich_graph( help="Entity IDs to filter by.", ) @click.option( - "--with-description", - is_flag=True, - help="Include entity descriptions in the response.", + "--entity-level", + default="collection", + help="Entity level to filter by.", ) @pass_context -def get_entities(ctx, collection_id, offset, limit, entity_ids): +async def get_entities( + ctx, collection_id, offset, limit, entity_ids, entity_level +): """ Retrieve entities from the knowledge graph. """ client = ctx.obj with timer(): - response = client.get_entities( + response = await client.get_entities( + entity_level, collection_id, offset, limit, @@ -191,14 +248,16 @@ def get_entities(ctx, collection_id, offset, limit, entity_ids): help="Entity names to filter by.", ) @pass_context -def get_triples(ctx, collection_id, offset, limit, triple_ids, entity_names): +async def get_triples( + ctx, collection_id, offset, limit, triple_ids, entity_names +): """ Retrieve triples from the knowledge graph. """ client = ctx.obj with timer(): - response = client.get_triples( + response = await client.get_triples( collection_id, offset, limit, @@ -207,3 +266,31 @@ def get_triples(ctx, collection_id, offset, limit, triple_ids, entity_names): ) click.echo(json.dumps(response, indent=2)) + + +@cli.command() +@click.option( + "--collection-id", + required=True, + help="Collection ID to delete the graph for.", +) +@click.option( + "--cascade", + is_flag=True, + help="Whether to cascade the deletion.", +) +@pass_context +async def delete_graph_for_collection(ctx, collection_id, cascade): + """ + Delete the graph for a given collection. + + NOTE: Setting the cascade flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection. + """ + client = ctx.obj + + with timer(): + response = await client.delete_graph_for_collection( + collection_id, cascade + ) + + click.echo(json.dumps(response, indent=2)) diff --git a/py/cli/commands/management.py b/py/cli/commands/management.py index db79ae39b..36c512fa5 100644 --- a/py/cli/commands/management.py +++ b/py/cli/commands/management.py @@ -12,21 +12,24 @@ @click.option("--filters", type=JSON, help="Filters for analytics as JSON") @click.option("--analysis-types", type=JSON, help="Analysis types as JSON") @pass_context -def analytics(ctx, filters: Dict[str, Any], analysis_types: Dict[str, Any]): +async def analytics( + ctx, filters: Dict[str, Any], analysis_types: Dict[str, Any] +): client = ctx.obj """Retrieve analytics data.""" with timer(): - response = client.analytics(filters, analysis_types) + response = await client.analytics(filters, analysis_types) click.echo(response) @cli.command() @pass_context -def app_settings(client): +async def app_settings(ctx): """Retrieve application settings.""" + client = ctx.obj with timer(): - response = client.app_settings() + response = await client.app_settings() click.echo(response) @@ -44,13 +47,13 @@ def app_settings(client): help="The maximum number of nodes to return. Defaults to 100.", ) @pass_context -def users_overview(ctx, user_ids, offset, limit): +async def users_overview(ctx, user_ids, offset, limit): """Get an overview of users.""" client = ctx.obj user_ids = list(user_ids) if user_ids else None with timer(): - response = client.users_overview(user_ids, offset, limit) + response = await client.users_overview(user_ids, offset, limit) if "results" in response: click.echo("\nUser Overview:") @@ -73,7 +76,7 @@ def users_overview(ctx, user_ids, offset, limit): help="Filters for deletion in the format key:operator:value", ) @pass_context -def delete(ctx, filter): +async def delete(ctx, filter): """Delete documents based on filters.""" client = ctx.obj filters = {} @@ -84,7 +87,7 @@ def delete(ctx, filter): filters[key][f"${operator}"] = value with timer(): - response = client.delete(filters=filters) + response = await client.delete(filters=filters) click.echo(response) @@ -102,13 +105,13 @@ def delete(ctx, filter): help="The maximum number of nodes to return. Defaults to 100.", ) @pass_context -def documents_overview(ctx, document_ids, offset, limit): +async def documents_overview(ctx, document_ids, offset, limit): """Get an overview of documents.""" client = ctx.obj document_ids = list(document_ids) if document_ids else None with timer(): - response = client.documents_overview(document_ids, offset, limit) + response = await client.documents_overview(document_ids, offset, limit) for document in response["results"]: click.echo(document) @@ -133,7 +136,7 @@ def documents_overview(ctx, document_ids, offset, limit): help="Should the vector be included in the response chunks", ) @pass_context -def document_chunks(ctx, document_id, offset, limit, include_vectors): +async def document_chunks(ctx, document_id, offset, limit, include_vectors): """Get chunks of a specific document.""" client = ctx.obj if not document_id: @@ -141,7 +144,7 @@ def document_chunks(ctx, document_id, offset, limit, include_vectors): return with timer(): - chunks_data = client.document_chunks( + chunks_data = await client.document_chunks( document_id, offset, limit, include_vectors ) diff --git a/py/cli/commands/retrieval.py b/py/cli/commands/retrieval.py index d75e2ee9a..e9de58069 100644 --- a/py/cli/commands/retrieval.py +++ b/py/cli/commands/retrieval.py @@ -55,9 +55,6 @@ type=JSON, help="Max community description length", ) -@click.option( - "--max-llm-queries-for-global-search", type=JSON, help="Max community size" -) @click.option( "--search-strategy", type=str, @@ -65,7 +62,7 @@ ) @click.option("--local-search-limits", type=JSON, help="Local search limits") @pass_context -def search(ctx, query, **kwargs): +async def search(ctx, query, **kwargs): """Perform a search query.""" client = ctx.obj vector_search_settings = { @@ -95,14 +92,13 @@ def search(ctx, query, **kwargs): "entity_types", "relationships", "max_community_description_length", - "max_llm_queries_for_global_search", "local_search_limits", ] and v is not None } with timer(): - results = client.search( + results = await client.search( query, vector_search_settings, kg_search_settings, @@ -165,9 +161,6 @@ def search(ctx, query, **kwargs): type=int, help="Max community description length", ) -@click.option( - "--max-llm-queries-for-global-search", type=int, help="Max community size" -) @click.option( "--search-strategy", type=str, @@ -176,7 +169,7 @@ def search(ctx, query, **kwargs): ) @click.option("--local-search-limits", type=JSON, help="Local search limits") @pass_context -def rag(ctx, query, **kwargs): +async def rag(ctx, query, **kwargs): """Perform a RAG query.""" client = ctx.obj rag_generation_config = { @@ -212,7 +205,6 @@ def rag(ctx, query, **kwargs): "entity_types", "relationships", "max_community_description_length", - "max_llm_queries_for_global_search", "local_search_limits", ] and v is not None @@ -224,7 +216,7 @@ def rag(ctx, query, **kwargs): } with timer(): - response = client.rag( + response = await client.rag( query, rag_generation_config, vector_search_settings, @@ -232,7 +224,7 @@ def rag(ctx, query, **kwargs): ) if rag_generation_config.get("stream"): - for chunk in response: + async for chunk in response: click.echo(chunk, nl=False) click.echo() else: diff --git a/py/cli/commands/server.py b/py/cli/commands/server.py index 343e8f44c..3d3a9e52a 100644 --- a/py/cli/commands/server.py +++ b/py/cli/commands/server.py @@ -22,22 +22,22 @@ @cli.command() @pass_context -def health(ctx): +async def health(ctx): """Check the health of the server.""" client = ctx.obj with timer(): - response = client.health() + response = await client.health() click.echo(response) @cli.command() @pass_context -def server_stats(ctx): +async def server_stats(ctx): client = ctx.obj """Check the server stats.""" with timer(): - response = client.server_stats() + response = await client.server_stats() click.echo(response) @@ -51,11 +51,11 @@ def server_stats(ctx): ) @click.option("--run-type-filter", help="Filter for log types") @pass_context -def logs(ctx, run_type_filter, offset, limit): +async def logs(ctx, run_type_filter, offset, limit): """Retrieve logs with optional type filter.""" client = ctx.obj with timer(): - response = client.logs( + response = await client.logs( offset=offset, limit=limit, run_type_filter=run_type_filter ) @@ -203,7 +203,9 @@ def generate_report(): @cli.command() @click.option("--host", default=None, help="Host to run the server on") -@click.option("--port", default=None, help="Port to run the server on") +@click.option( + "--port", default=None, type=int, help="Port to run the server on" +) @click.option("--docker", is_flag=True, help="Run using Docker") @click.option( "--full", diff --git a/py/cli/commands/templates.py b/py/cli/commands/templates.py deleted file mode 100644 index 0ddc57510..000000000 --- a/py/cli/commands/templates.py +++ /dev/null @@ -1,176 +0,0 @@ -import os -import shutil -import subprocess - -import asyncclick as click - -from cli.command_group import cli - -REPO_URL = "https://github.com/SciPhi-AI/R2R.git" -TEMPLATES_DIR = "templates" - - -def get_templates(): - """Fetch list of available templates.""" - temp_dir = "temp_repo" - try: - _prepare_temp_directory(temp_dir) - _clone_and_checkout_templates(temp_dir) - return _get_template_list() - except subprocess.CalledProcessError as e: - raise click.ClickException( - f"Failed to fetch templates: {e.stderr}" - ) from e - except Exception as e: - raise click.ClickException( - f"An unexpected error occurred: {str(e)}" - ) from e - finally: - _cleanup(temp_dir) - - -def _prepare_temp_directory(temp_dir): - if os.path.exists(temp_dir): - shutil.rmtree(temp_dir) - os.makedirs(temp_dir, exist_ok=True) - - -def _clone_and_checkout_templates(temp_dir): - subprocess.run( - [ - "git", - "clone", - "--depth", - "1", - "--filter=blob:none", - "--sparse", - REPO_URL, - temp_dir, - ], - check=True, - capture_output=True, - text=True, - ) - os.chdir(temp_dir) - subprocess.run( - ["git", "sparse-checkout", "set", TEMPLATES_DIR], - check=True, - capture_output=True, - text=True, - ) - subprocess.run( - ["git", "checkout"], - check=True, - capture_output=True, - text=True, - ) - - -def _get_template_list(): - if not os.path.exists(TEMPLATES_DIR): - raise click.ClickException( - f"Templates directory '{TEMPLATES_DIR}' not found in the repository." - ) - - if templates := [ - d - for d in os.listdir(TEMPLATES_DIR) - if os.path.isdir(os.path.join(TEMPLATES_DIR, d)) - ]: - return templates - else: - raise click.ClickException("No templates found in the repository.") - - -def _cleanup(temp_dir): - os.chdir("..") - if os.path.exists(temp_dir): - shutil.rmtree(temp_dir) - - -def clone_template(template_name, location): - templates = get_templates() - if template_name not in templates: - raise ValueError( - f"Template '{template_name}' not found. Available templates: {', '.join(templates)}" - ) - - if not location: - location = template_name - - if os.path.exists(location) and os.listdir(location): - raise ValueError( - f"Directory '{location}' already exists and is not empty." - ) - - # Clone the repository - subprocess.run( - [ - "git", - "clone", - "--depth", - "1", # Shallow clone - "--filter=blob:none", # Don't download file contents initially - "--sparse", # Enable sparse checkout - REPO_URL, - location, - ], - check=True, - ) - - os.chdir(location) - subprocess.run( - [ - "git", - "sparse-checkout", - "set", - f"{TEMPLATES_DIR}/{template_name}", - ], - check=True, - ) - subprocess.run(["git", "checkout"], check=True) - - template_dir = os.path.join(TEMPLATES_DIR, template_name) - if not os.path.exists(template_dir): - raise ValueError( - f"Template directory '{template_name}' not found in the cloned repository." - ) - - for item in os.listdir(template_dir): - shutil.move(os.path.join(template_dir, item), item) - - shutil.rmtree(TEMPLATES_DIR) - shutil.rmtree(".git") - - -@cli.command() -@click.argument("template_name", required=True) -@click.argument("location", required=False) -def clone(template_name, location): - """Clones a template repository.""" - try: - clone_template(template_name, location) - click.echo( - f"Successfully cloned template '{template_name}' to {location or template_name}" - ) - except ValueError as e: - raise click.ClickException(str(e)) from e - except subprocess.CalledProcessError as e: - raise click.ClickException(f"Git operation failed: {e}") from e - except Exception as e: - raise click.ClickException( - f"An unexpected error occurred: {str(e)}" - ) from e - - -@cli.command() -def list_templates(): - """Lists all available templates.""" - try: - templates = get_templates() - click.echo("Available templates:") - for template in templates: - click.echo(f"- {template}") - except Exception as e: - click.echo(f"Error: {str(e)}", err=True) - raise click.ClickException(str(e)) from e diff --git a/py/cli/main.py b/py/cli/main.py index 271093ffa..a1ea3020f 100644 --- a/py/cli/main.py +++ b/py/cli/main.py @@ -1,12 +1,12 @@ from cli.command_group import cli from cli.commands import ( auth, + database, ingestion, kg, management, retrieval, server, - templates, ) from cli.utils.telemetry import posthog, telemetry @@ -32,9 +32,10 @@ def add_command_with_telemetry(command): add_command_with_telemetry(management.documents_overview) add_command_with_telemetry(management.document_chunks) -# Restructure +# Knowledge Graph add_command_with_telemetry(kg.create_graph) add_command_with_telemetry(kg.enrich_graph) +add_command_with_telemetry(kg.deduplicate_entities) # Retrieval add_command_with_telemetry(retrieval.search) @@ -50,13 +51,17 @@ def add_command_with_telemetry(command): add_command_with_telemetry(server.update) add_command_with_telemetry(server.version) -# Templates -add_command_with_telemetry(templates.clone) +# Database +add_command_with_telemetry(database.db) # Add the main db group +add_command_with_telemetry(database.upgrade) +add_command_with_telemetry(database.downgrade) +add_command_with_telemetry(database.current) +add_command_with_telemetry(database.history) def main(): try: - cli(_anyio_backend="asyncio") + cli() except SystemExit: # Silently exit without printing the traceback pass diff --git a/py/cli/utils/database_utils.py b/py/cli/utils/database_utils.py new file mode 100644 index 000000000..b6535078c --- /dev/null +++ b/py/cli/utils/database_utils.py @@ -0,0 +1,217 @@ +import configparser +import logging.config +import os +import sys +from pathlib import Path +from typing import Dict, Optional + +import alembic.config +import asyncclick as click +from alembic import command as alembic_command +from sqlalchemy import create_engine, text +from sqlalchemy.exc import OperationalError + + +def get_default_db_vars() -> Dict[str, str]: + """Get default database environment variables.""" + return { + "R2R_POSTGRES_HOST": "localhost", + "R2R_POSTGRES_PORT": "5432", + "R2R_POSTGRES_DBNAME": "postgres", + "R2R_POSTGRES_USER": "postgres", + "R2R_POSTGRES_PASSWORD": "postgres", + "R2R_PROJECT_NAME": "r2r_default", + } + + +def get_schema_version_table(schema_name: str) -> str: + """Get the schema-specific version of alembic_version table name.""" + return f"{schema_name}_alembic_version" + + +def get_database_url_from_env(log: bool = True) -> str: + """Construct database URL from environment variables.""" + env_vars = { + k: os.environ.get(k, v) for k, v in get_default_db_vars().items() + } + + if log: + for k, v in env_vars.items(): + click.secho( + f"Using value for {k}: {v}", + fg="yellow" if v == get_default_db_vars()[k] else "green", + ) + + return ( + f"postgresql://{env_vars['R2R_POSTGRES_USER']}:{env_vars['R2R_POSTGRES_PASSWORD']}" + f"@{env_vars['R2R_POSTGRES_HOST']}:{env_vars['R2R_POSTGRES_PORT']}" + f"/{env_vars['R2R_POSTGRES_DBNAME']}" + ) + + +def ensure_schema_exists(engine, schema_name: str): + """Create schema if it doesn't exist and set up schema-specific version table.""" + with engine.begin() as conn: + # Create schema if it doesn't exist + conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")) + + # Move or create alembic_version table in the specific schema + version_table = get_schema_version_table(schema_name) + conn.execute( + text( + f""" + CREATE TABLE IF NOT EXISTS {schema_name}.{version_table} ( + version_num VARCHAR(32) NOT NULL + ) + """ + ) + ) + + +def check_current_revision(engine, schema_name: str) -> Optional[str]: + """Check the current revision in the version table.""" + version_table = get_schema_version_table(schema_name) + with engine.connect() as conn: + result = conn.execute( + text(f"SELECT version_num FROM {schema_name}.{version_table}") + ).fetchone() + return result[0] if result else None + + +async def check_database_connection(db_url: str) -> bool: + """Check if we can connect to the database.""" + try: + engine = create_engine(db_url) + with engine.connect(): + return True + except OperationalError as e: + click.secho(f"Could not connect to database: {str(e)}", fg="red") + if "Connection refused" in str(e): + click.secho( + "Make sure PostgreSQL is running and accessible with the provided credentials.", + fg="yellow", + ) + return False + except Exception as e: + click.secho( + f"Unexpected error checking database connection: {str(e)}", + fg="red", + ) + return False + + +def create_schema_config( + project_root: Path, schema_name: str, db_url: str +) -> alembic.config.Config: + """Create an Alembic config for a specific schema.""" + config = alembic.config.Config() + + # Set basic options + config.set_main_option("script_location", str(project_root / "migrations")) + config.set_main_option("sqlalchemy.url", db_url) + + # Set schema-specific version table + version_table = get_schema_version_table(schema_name) + config.set_main_option("version_table", version_table) + config.set_main_option("version_table_schema", schema_name) + + return config + + +def setup_alembic_logging(): + """Set up logging configuration for Alembic.""" + # Reset existing loggers to prevent duplication + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + + logging_config = { + "version": 1, + "formatters": { + "generic": { + "format": "%(levelname)s [%(name)s] %(message)s", + "datefmt": "%H:%M:%S", + }, + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "generic", + "stream": sys.stderr, + }, + }, + "loggers": { + "alembic": { + "level": "INFO", + "handlers": ["console"], + "propagate": False, # Prevent propagation to root logger + }, + "sqlalchemy": { + "level": "WARN", + "handlers": ["console"], + "propagate": False, # Prevent propagation to root logger + }, + }, + "root": { + "level": "WARN", + "handlers": ["console"], + }, + } + logging.config.dictConfig(logging_config) + + +async def run_alembic_command( + command_name: str, + project_root: Optional[Path] = None, + schema_name: Optional[str] = None, +) -> int: + """Run an Alembic command with schema awareness.""" + try: + if project_root is None: + project_root = Path(__file__).parent.parent.parent + + if schema_name is None: + schema_name = os.environ.get("R2R_PROJECT_NAME", "r2r_default") + + # Set up logging + setup_alembic_logging() + + # Get database URL and create engine + db_url = get_database_url_from_env() + engine = create_engine(db_url) + + # Ensure schema exists and has version table + ensure_schema_exists(engine, schema_name) + + # Create schema-specific config + config = create_schema_config(project_root, schema_name, db_url) + + click.secho(f"\nRunning command for schema: {schema_name}", fg="blue") + + # Execute the command + if command_name == "current": + current_rev = check_current_revision(engine, schema_name) + if current_rev: + click.secho(f"Current revision: {current_rev}", fg="green") + else: + click.secho("No migrations applied yet.", fg="yellow") + alembic_command.current(config) + elif command_name == "history": + alembic_command.history(config) + elif command_name.startswith("upgrade"): + revision = "head" + if " " in command_name: + _, revision = command_name.split(" ", 1) + alembic_command.upgrade(config, revision) + elif command_name.startswith("downgrade"): + revision = "-1" + if " " in command_name: + _, revision = command_name.split(" ", 1) + alembic_command.downgrade(config, revision) + else: + raise ValueError(f"Unsupported command: {command_name}") + + return 0 + + except Exception as e: + click.secho(f"Error running migration command: {str(e)}", fg="red") + return 1 diff --git a/py/cli/utils/docker_utils.py b/py/cli/utils/docker_utils.py index 3be92cb2d..0b0c1dea6 100644 --- a/py/cli/utils/docker_utils.py +++ b/py/cli/utils/docker_utils.py @@ -88,7 +88,7 @@ async def run_local_serve( full: bool = False, ) -> None: try: - from r2r import R2RBuilder, R2RConfig + from core import R2RBuilder, R2RConfig except ImportError as e: click.echo( "Error: You must install the `r2r core` package to run the R2R server locally." @@ -122,7 +122,7 @@ async def run_local_serve( # "core.main.app_entry:app", host=host, port=available_port, reload=False # ) - r2r_instance.serve(host, available_port) + await r2r_instance.serve(host, available_port) def run_docker_serve( @@ -334,7 +334,7 @@ def get_compose_files(): return compose_files -def find_available_port(start_port): +def find_available_port(start_port: int): port = start_port while True: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: diff --git a/py/compose.full.yaml b/py/compose.full.yaml index 7fcf0a987..8c7ea4a19 100644 --- a/py/compose.full.yaml +++ b/py/compose.full.yaml @@ -138,7 +138,7 @@ services: SERVER_GRPC_BIND_ADDRESS: "0.0.0.0" SERVER_GRPC_INSECURE: "t" SERVER_GRPC_BROADCAST_ADDRESS: "hatchet-engine:7077" - SERVER_GRPC_MAX_MSG_SIZE: 13421772800 + SERVER_GRPC_MAX_MSG_SIZE: 134217728 volumes: - hatchet_certs:/hatchet/certs - hatchet_config:/hatchet/config @@ -165,7 +165,7 @@ services: SERVER_GRPC_BIND_ADDRESS: "0.0.0.0" SERVER_GRPC_PORT: "7077" SERVER_GRPC_INSECURE: "t" - SERVER_GRPC_MAX_MSG_SIZE: 13421772800 + SERVER_GRPC_MAX_MSG_SIZE: 134217728 volumes: - hatchet_certs:/hatchet/certs - hatchet_config:/hatchet/config @@ -373,6 +373,7 @@ services: image: emrgntcmplxty/r2r-dashboard:latest environment: - NEXT_PUBLIC_R2R_DEPLOYMENT_URL=${R2R_DEPLOYMENT_URL:-http://localhost:7272} + - NEXT_PUBLIC_HATCHET_DASHBOARD_URL=${HATCHET_DASHBOARD_URL:-http://localhost:${R2R_HATCHET_DASHBOARD_PORT:-7274}} networks: - r2r-network ports: diff --git a/py/core/__init__.py b/py/core/__init__.py index f6b1e4552..decd1d4cf 100644 --- a/py/core/__init__.py +++ b/py/core/__init__.py @@ -46,7 +46,6 @@ "AsyncSyncMeta", "syncable", # Completion abstractions - "CompletionRecord", "MessageType", # Document abstractions "DataType", @@ -80,6 +79,7 @@ "KGSearchSettings", "VectorSearchResult", "VectorSearchSettings", + "HybridSearchSettings", # User abstractions "Token", "TokenData", @@ -140,8 +140,6 @@ # Database providers "DatabaseConfig", "DatabaseProvider", - "RelationalDBProvider", - "VectorDBProvider", # Embedding provider "EmbeddingConfig", "EmbeddingProvider", diff --git a/py/core/base/__init__.py b/py/core/base/__init__.py index c98f2138a..ee70eb0c8 100644 --- a/py/core/base/__init__.py +++ b/py/core/base/__init__.py @@ -14,7 +14,6 @@ "AsyncSyncMeta", "syncable", # Completion abstractions - "CompletionRecord", "MessageType", # Document abstractions "DataType", @@ -48,6 +47,7 @@ "KGSearchSettings", "VectorSearchResult", "VectorSearchSettings", + "HybridSearchSettings", # KG abstractions "KGCreationSettings", "KGEnrichmentSettings", @@ -113,8 +113,6 @@ # Database providers "DatabaseConfig", "DatabaseProvider", - "RelationalDBProvider", - "VectorDBProvider", "PostgresConfigurationSettings", # Embedding provider "EmbeddingConfig", @@ -150,7 +148,6 @@ "generate_default_user_collection_id", "generate_collection_id_from_name", "generate_user_id", - "generate_message_id", "increment_version", "EntityType", "RelationshipType", diff --git a/py/core/base/abstractions/__init__.py b/py/core/base/abstractions/__init__.py index 5d8c5bda5..6fd4605b8 100644 --- a/py/core/base/abstractions/__init__.py +++ b/py/core/base/abstractions/__init__.py @@ -1,5 +1,4 @@ from shared.abstractions.base import AsyncSyncMeta, R2RSerializable, syncable -from shared.abstractions.completion import CompletionRecord, MessageType from shared.abstractions.document import ( DataType, Document, @@ -31,6 +30,8 @@ from shared.abstractions.kg import ( KGCreationSettings, KGEnrichmentSettings, + KGEntityDeduplicationSettings, + KGEntityDeduplicationType, KGRunType, ) from shared.abstractions.llm import ( @@ -38,6 +39,7 @@ LLMChatCompletion, LLMChatCompletionChunk, Message, + MessageType, RAGCompletion, ) from shared.abstractions.prompt import Prompt @@ -60,6 +62,7 @@ StorageResult, Vector, VectorEntry, + VectorQuantizationType, VectorType, ) @@ -69,7 +72,6 @@ "AsyncSyncMeta", "syncable", # Completion abstractions - "CompletionRecord", "MessageType", # Document abstractions "DataType", @@ -116,9 +118,11 @@ "VectorSearchResult", "VectorSearchSettings", "HybridSearchSettings", - # Restructure abstractions + # KG abstractions "KGCreationSettings", "KGEnrichmentSettings", + "KGEntityDeduplicationSettings", + "KGEntityDeduplicationType", "KGRunType", # User abstractions "Token", @@ -128,5 +132,6 @@ "Vector", "VectorEntry", "VectorType", + "VectorQuantizationType", "StorageResult", ] diff --git a/py/core/base/api/models/__init__.py b/py/core/base/api/models/__init__.py index bcb3eec29..5fc4ea41d 100644 --- a/py/core/base/api/models/__init__.py +++ b/py/core/base/api/models/__init__.py @@ -10,17 +10,23 @@ CreateVectorIndexResponse, IngestionResponse, WrappedCreateVectorIndexResponse, + WrappedDeleteVectorIndexResponse, WrappedIngestionResponse, + WrappedListVectorIndicesResponse, + WrappedSelectVectorIndexResponse, WrappedUpdateResponse, ) from shared.api.models.kg.responses import ( KGCreationResponse, KGEnrichmentResponse, + KGEntityDeduplicationResponse, WrappedKGCommunitiesResponse, WrappedKGCreationResponse, WrappedKGEnrichmentResponse, WrappedKGEntitiesResponse, + WrappedKGEntityDeduplicationResponse, WrappedKGTriplesResponse, + WrappedKGTunePromptResponse, ) from shared.api.models.management.responses import ( AnalyticsResponse, @@ -78,11 +84,17 @@ "WrappedUpdateResponse", "CreateVectorIndexResponse", "WrappedCreateVectorIndexResponse", - # Restructure Responses + "WrappedListVectorIndicesResponse", + "WrappedDeleteVectorIndexResponse", + "WrappedSelectVectorIndexResponse", + # Knowledge Graph Responses "KGCreationResponse", "WrappedKGCreationResponse", "KGEnrichmentResponse", "WrappedKGEnrichmentResponse", + "KGEntityDeduplicationResponse", + "WrappedKGEntityDeduplicationResponse", + "WrappedKGTunePromptResponse", # Management Responses "PromptResponse", "ServerStats", diff --git a/py/core/base/providers/__init__.py b/py/core/base/providers/__init__.py index 306eb0ad5..e33d05e2e 100644 --- a/py/core/base/providers/__init__.py +++ b/py/core/base/providers/__init__.py @@ -2,11 +2,15 @@ from .base import AppConfig, Provider, ProviderConfig from .crypto import CryptoConfig, CryptoProvider from .database import ( + CollectionHandler, DatabaseConfig, + DatabaseConnectionManager, DatabaseProvider, + DocumentHandler, PostgresConfigurationSettings, - RelationalDBProvider, - VectorDBProvider, + TokenHandler, + UserHandler, + VectorHandler, ) from .embedding import EmbeddingConfig, EmbeddingProvider from .file import FileConfig, FileProvider @@ -32,11 +36,15 @@ "CryptoConfig", "CryptoProvider", # Database providers + "DatabaseConnectionManager", + "DocumentHandler", + "CollectionHandler", + "TokenHandler", + "UserHandler", + "VectorHandler", "DatabaseConfig", "PostgresConfigurationSettings", "DatabaseProvider", - "RelationalDBProvider", - "VectorDBProvider", # Embedding provider "EmbeddingConfig", "EmbeddingProvider", diff --git a/py/core/base/providers/auth.py b/py/core/base/providers/auth.py index f64ccd88c..37fc95b83 100644 --- a/py/core/base/providers/auth.py +++ b/py/core/base/providers/auth.py @@ -81,7 +81,7 @@ def get_current_active_user( pass @abstractmethod - async def register(self, email: str, password: str) -> dict[str, str]: + async def register(self, email: str, password: str) -> UserResponse: pass @abstractmethod diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index bccbfba68..ecf00e5d9 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -1,13 +1,37 @@ import logging from abc import ABC, abstractmethod -from typing import Any, Optional +from datetime import datetime +from typing import Any, Optional, Sequence, Tuple, Union +from uuid import UUID from pydantic import BaseModel -from shared.abstractions.vector import VectorQuantizationType +from core.base import VectorEntry +from core.base.abstractions import ( + DocumentInfo, + UserStats, + VectorEntry, + VectorSearchResult, + VectorSearchSettings, +) +from core.base.api.models import ( + CollectionOverviewResponse, + CollectionResponse, + UserResponse, +) +from shared.abstractions.vector import ( + IndexArgsHNSW, + IndexArgsIVFFlat, + IndexMeasure, + IndexMethod, + VectorTableName, +) from .base import Provider, ProviderConfig +logger = logging.getLogger() +from shared.utils import _decorate_vector_type + logger = logging.getLogger() @@ -53,6 +77,7 @@ class DatabaseConfig(ProviderConfig): ] = None default_collection_name: str = "Default" default_collection_description: str = "Your default collection." + enable_fts: bool = False def __post_init__(self): self.validate_config() @@ -69,41 +94,856 @@ def supported_providers(self) -> list[str]: return ["postgres"] -class VectorDBProvider(Provider, ABC): +class DatabaseConnectionManager(ABC): + @abstractmethod + def execute_query( + self, + query: str, + params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, + isolation_level: Optional[str] = None, + ): + pass + + @abstractmethod + async def execute_many(self, query, params=None, batch_size=1000): + pass + + @abstractmethod + def fetch_query( + self, + query: str, + params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, + ): + pass + + @abstractmethod + def fetchrow_query( + self, + query: str, + params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, + ): + pass + + @abstractmethod + async def initialize(self, pool: Any): + pass + + +class Handler(ABC): + def __init__( + self, project_name: str, connection_manager: DatabaseConnectionManager + ): + self.project_name = project_name + self.connection_manager = connection_manager + + def _get_table_name(self, base_name: str) -> str: + return f"{self.project_name}.{base_name}" + + @abstractmethod + def create_table(self): + pass + + +class DocumentHandler(Handler): + + @abstractmethod + async def upsert_documents_overview( + self, documents_overview: Union[DocumentInfo, list[DocumentInfo]] + ) -> None: + pass + + @abstractmethod + async def delete_from_documents_overview( + self, document_id: UUID, version: Optional[str] = None + ) -> None: + pass + + @abstractmethod + async def get_documents_overview( + self, + filter_user_ids: Optional[list[UUID]] = None, + filter_document_ids: Optional[list[UUID]] = None, + filter_collection_ids: Optional[list[UUID]] = None, + offset: int = 0, + limit: int = -1, + ) -> dict[str, Any]: + pass + + @abstractmethod + async def get_workflow_status( + self, id: Union[UUID, list[UUID]], status_type: str + ): + pass + + @abstractmethod + async def set_workflow_status( + self, id: Union[UUID, list[UUID]], status_type: str, status: str + ): + pass + + @abstractmethod + async def get_document_ids_by_status( + self, + status_type: str, + status: Union[str, list[str]], + collection_id: Optional[UUID] = None, + ): + pass + + +class CollectionHandler(Handler): + @abstractmethod + async def create_default_collection( + self, user_id: Optional[UUID] = None + ) -> CollectionResponse: + pass + + @abstractmethod + async def collection_exists(self, collection_id: UUID) -> bool: + pass + + @abstractmethod + async def create_collection( + self, + name: str, + description: str = "", + collection_id: Optional[UUID] = None, + ) -> CollectionResponse: + pass + + @abstractmethod + async def get_collection(self, collection_id: UUID) -> CollectionResponse: + pass + + @abstractmethod + async def update_collection( + self, + collection_id: UUID, + name: Optional[str] = None, + description: Optional[str] = None, + ) -> CollectionResponse: + pass + + @abstractmethod + async def delete_collection_relational(self, collection_id: UUID) -> None: + pass + + @abstractmethod + async def list_collections( + self, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[CollectionResponse], int]]: + """List collections with pagination.""" + pass + + @abstractmethod + async def get_collections_by_ids( + self, collection_ids: list[UUID] + ) -> list[CollectionResponse]: + pass + + @abstractmethod + async def documents_in_collection( + self, collection_id: UUID, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[DocumentInfo], int]]: + pass + + @abstractmethod + async def get_collections_overview( + self, + collection_ids: Optional[list[UUID]] = None, + offset: int = 0, + limit: int = -1, + ) -> dict[str, Union[list[CollectionOverviewResponse], int]]: + pass + + @abstractmethod + async def get_collections_for_user( + self, user_id: UUID, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[CollectionResponse], int]]: + pass + + @abstractmethod + async def assign_document_to_collection_relational( + self, + document_id: UUID, + collection_id: UUID, + ) -> UUID: + pass + + @abstractmethod + async def document_collections( + self, document_id: UUID, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[CollectionResponse], int]]: + pass + + @abstractmethod + async def remove_document_from_collection_relational( + self, document_id: UUID, collection_id: UUID + ) -> None: + pass + + +class TokenHandler(Handler): + + @abstractmethod + async def create_table(self): + pass + + @abstractmethod + async def blacklist_token( + self, token: str, current_time: Optional[datetime] = None + ): + pass + + @abstractmethod + async def is_token_blacklisted(self, token: str) -> bool: + pass + + @abstractmethod + async def clean_expired_blacklisted_tokens( + self, + max_age_hours: int = 7 * 24, + current_time: Optional[datetime] = None, + ): + pass + + +class UserHandler(Handler): + TABLE_NAME = "users" + + @abstractmethod + async def get_user_by_id(self, user_id: UUID) -> UserResponse: + pass + + @abstractmethod + async def get_user_by_email(self, email: str) -> UserResponse: + pass + + @abstractmethod + async def create_user(self, email: str, password: str) -> UserResponse: + pass + + @abstractmethod + async def update_user(self, user: UserResponse) -> UserResponse: + pass + + @abstractmethod + async def delete_user_relational(self, user_id: UUID) -> None: + pass + + @abstractmethod + async def update_user_password( + self, user_id: UUID, new_hashed_password: str + ): + pass + + @abstractmethod + async def get_all_users(self) -> list[UserResponse]: + pass + + @abstractmethod + async def store_verification_code( + self, user_id: UUID, verification_code: str, expiry: datetime + ): + pass + + @abstractmethod + async def verify_user(self, verification_code: str) -> None: + pass + + @abstractmethod + async def remove_verification_code(self, verification_code: str): + pass + + @abstractmethod + async def expire_verification_code(self, user_id: UUID): + pass + + @abstractmethod + async def store_reset_token( + self, user_id: UUID, reset_token: str, expiry: datetime + ): + pass + + @abstractmethod + async def get_user_id_by_reset_token( + self, reset_token: str + ) -> Optional[UUID]: + pass + + @abstractmethod + async def remove_reset_token(self, user_id: UUID): + pass + + @abstractmethod + async def remove_user_from_all_collections(self, user_id: UUID): + pass + @abstractmethod - def _initialize_vector_db( - self, dimension: int, quantization_type: VectorQuantizationType + async def add_user_to_collection( + self, user_id: UUID, collection_id: UUID ) -> None: pass + @abstractmethod + async def remove_user_from_collection( + self, user_id: UUID, collection_id: UUID + ) -> None: + pass -class RelationalDBProvider(Provider, ABC): @abstractmethod - async def _initialize_relational_db(self) -> None: + async def get_users_in_collection( + self, collection_id: UUID, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[UserResponse], int]]: + pass + + @abstractmethod + async def mark_user_as_superuser(self, user_id: UUID): + pass + + @abstractmethod + async def get_user_id_by_verification_code( + self, verification_code: str + ) -> Optional[UUID]: + pass + + @abstractmethod + async def mark_user_as_verified(self, user_id: UUID): + pass + + @abstractmethod + async def get_users_overview( + self, + user_ids: Optional[list[UUID]] = None, + offset: int = 0, + limit: int = -1, + ) -> dict[str, Union[list[UserStats], int]]: + pass + + +class VectorHandler(Handler): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @abstractmethod + async def upsert(self, entry: VectorEntry) -> None: + pass + + @abstractmethod + async def upsert_entries(self, entries: list[VectorEntry]) -> None: + pass + + @abstractmethod + async def semantic_search( + self, query_vector: list[float], search_settings: VectorSearchSettings + ) -> list[VectorSearchResult]: + pass + + @abstractmethod + async def full_text_search( + self, query_text: str, search_settings: VectorSearchSettings + ) -> list[VectorSearchResult]: + pass + + @abstractmethod + async def hybrid_search( + self, + query_text: str, + query_vector: list[float], + search_settings: VectorSearchSettings, + *args, + **kwargs, + ) -> list[VectorSearchResult]: + pass + + @abstractmethod + async def delete( + self, filters: dict[str, Any] + ) -> dict[str, dict[str, str]]: + pass + + @abstractmethod + async def assign_document_to_collection_vector( + self, document_id: UUID, collection_id: UUID + ) -> None: + pass + + @abstractmethod + async def remove_document_from_collection_vector( + self, document_id: UUID, collection_id: UUID + ) -> None: + pass + + @abstractmethod + async def delete_user_vector(self, user_id: UUID) -> None: + pass + + @abstractmethod + async def delete_collection_vector(self, collection_id: UUID) -> None: + pass + + @abstractmethod + async def get_document_chunks( + self, + document_id: UUID, + offset: int = 0, + limit: int = -1, + include_vectors: bool = False, + ) -> dict[str, Any]: + pass + + @abstractmethod + async def create_index( + self, + table_name: Optional[VectorTableName] = None, + index_measure: IndexMeasure = IndexMeasure.cosine_distance, + index_method: IndexMethod = IndexMethod.auto, + index_arguments: Optional[ + Union[IndexArgsIVFFlat, IndexArgsHNSW] + ] = None, + index_name: Optional[str] = None, + concurrently: bool = True, + ) -> None: + pass + + @abstractmethod + async def list_indices( + self, table_name: Optional[VectorTableName] = None + ) -> list[dict]: + pass + + @abstractmethod + async def delete_index( + self, + index_name: str, + table_name: Optional[VectorTableName] = None, + concurrently: bool = True, + ) -> None: + pass + + @abstractmethod + async def get_semantic_neighbors( + self, + document_id: UUID, + chunk_id: UUID, + limit: int = 10, + similarity_threshold: float = 0.5, + ) -> list[dict[str, Any]]: pass class DatabaseProvider(Provider): + connection_manager: DatabaseConnectionManager + document_handler: DocumentHandler + collection_handler: CollectionHandler + token_handler: TokenHandler + user_handler: UserHandler + vector_handler: VectorHandler + config: DatabaseConfig + project_name: str + def __init__(self, config: DatabaseConfig): - if not isinstance(config, DatabaseConfig): - raise ValueError( - "DatabaseProvider must be initialized with a `DatabaseConfig`." - ) logger.info(f"Initializing DatabaseProvider with config {config}.") super().__init__(config) - # remove later to re-introduce typing... - self.vector: Any = None - self.relational: Any = None - @abstractmethod - def _initialize_vector_db(self) -> VectorDBProvider: + async def __aenter__(self): pass @abstractmethod - async def _initialize_relational_db(self) -> RelationalDBProvider: + async def __aexit__(self, exc_type, exc, tb): pass - @abstractmethod - def _get_table_name(self, base_name: str) -> str: - pass + # Document handler methods + async def upsert_documents_overview( + self, documents_overview: Union[DocumentInfo, list[DocumentInfo]] + ) -> None: + return await self.document_handler.upsert_documents_overview( + documents_overview + ) + + async def delete_from_documents_overview( + self, document_id: UUID, version: Optional[str] = None + ) -> None: + return await self.document_handler.delete_from_documents_overview( + document_id, version + ) + + async def get_documents_overview( + self, + filter_user_ids: Optional[list[UUID]] = None, + filter_document_ids: Optional[list[UUID]] = None, + filter_collection_ids: Optional[list[UUID]] = None, + offset: int = 0, + limit: int = -1, + ) -> dict[str, Any]: + return await self.document_handler.get_documents_overview( + filter_user_ids, + filter_document_ids, + filter_collection_ids, + offset, + limit, + ) + + async def get_workflow_status( + self, id: Union[UUID, list[UUID]], status_type: str + ): + return await self.document_handler.get_workflow_status(id, status_type) + + async def set_workflow_status( + self, id: Union[UUID, list[UUID]], status_type: str, status: str + ): + return await self.document_handler.set_workflow_status( + id, status_type, status + ) + + async def get_document_ids_by_status( + self, + status_type: str, + status: Union[str, list[str]], + collection_id: Optional[UUID] = None, + ): + return await self.document_handler.get_document_ids_by_status( + status_type, status, collection_id + ) + + # Collection handler methods + async def create_default_collection( + self, user_id: Optional[UUID] = None + ) -> CollectionResponse: + return await self.collection_handler.create_default_collection(user_id) + + async def collection_exists(self, collection_id: UUID) -> bool: + return await self.collection_handler.collection_exists(collection_id) + + async def create_collection( + self, + name: str, + description: str = "", + collection_id: Optional[UUID] = None, + ) -> CollectionResponse: + return await self.collection_handler.create_collection( + name, description, collection_id + ) + + async def get_collection(self, collection_id: UUID) -> CollectionResponse: + return await self.collection_handler.get_collection(collection_id) + + async def update_collection( + self, + collection_id: UUID, + name: Optional[str] = None, + description: Optional[str] = None, + ) -> CollectionResponse: + return await self.collection_handler.update_collection( + collection_id, name, description + ) + + async def delete_collection_relational(self, collection_id: UUID) -> None: + return await self.collection_handler.delete_collection_relational( + collection_id + ) + + async def list_collections( + self, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[CollectionResponse], int]]: + return await self.collection_handler.list_collections(offset, limit) + + async def get_collections_by_ids( + self, collection_ids: list[UUID] + ) -> list[CollectionResponse]: + return await self.collection_handler.get_collections_by_ids( + collection_ids + ) + + async def documents_in_collection( + self, collection_id: UUID, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[DocumentInfo], int]]: + return await self.collection_handler.documents_in_collection( + collection_id, offset, limit + ) + + async def get_collections_overview( + self, + collection_ids: Optional[list[UUID]] = None, + offset: int = 0, + limit: int = -1, + ) -> dict[str, Union[list[CollectionOverviewResponse], int]]: + return await self.collection_handler.get_collections_overview( + collection_ids, offset, limit + ) + + async def get_collections_for_user( + self, user_id: UUID, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[CollectionResponse], int]]: + return await self.collection_handler.get_collections_for_user( + user_id, offset, limit + ) + + async def assign_document_to_collection_relational( + self, + document_id: UUID, + collection_id: UUID, + ) -> UUID: + return await self.collection_handler.assign_document_to_collection_relational( + document_id, collection_id + ) + + async def document_collections( + self, document_id: UUID, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[CollectionResponse], int]]: + return await self.collection_handler.document_collections( + document_id, offset, limit + ) + + async def remove_document_from_collection_relational( + self, document_id: UUID, collection_id: UUID + ) -> None: + return await self.collection_handler.remove_document_from_collection_relational( + document_id, collection_id + ) + + # Token handler methods + async def blacklist_token( + self, token: str, current_time: Optional[datetime] = None + ): + return await self.token_handler.blacklist_token(token, current_time) + + async def is_token_blacklisted(self, token: str) -> bool: + return await self.token_handler.is_token_blacklisted(token) + + async def clean_expired_blacklisted_tokens( + self, + max_age_hours: int = 7 * 24, + current_time: Optional[datetime] = None, + ): + return await self.token_handler.clean_expired_blacklisted_tokens( + max_age_hours, current_time + ) + + # User handler methods + async def get_user_by_id(self, user_id: UUID) -> UserResponse: + return await self.user_handler.get_user_by_id(user_id) + + async def get_user_by_email(self, email: str) -> UserResponse: + return await self.user_handler.get_user_by_email(email) + + async def create_user(self, email: str, password: str) -> UserResponse: + return await self.user_handler.create_user(email, password) + + async def update_user(self, user: UserResponse) -> UserResponse: + return await self.user_handler.update_user(user) + + async def delete_user_relational(self, user_id: UUID) -> None: + return await self.user_handler.delete_user_relational(user_id) + + async def update_user_password( + self, user_id: UUID, new_hashed_password: str + ): + return await self.user_handler.update_user_password( + user_id, new_hashed_password + ) + + async def get_all_users(self) -> list[UserResponse]: + return await self.user_handler.get_all_users() + + async def store_verification_code( + self, user_id: UUID, verification_code: str, expiry: datetime + ): + return await self.user_handler.store_verification_code( + user_id, verification_code, expiry + ) + + async def verify_user(self, verification_code: str) -> None: + return await self.user_handler.verify_user(verification_code) + + async def remove_verification_code(self, verification_code: str): + return await self.user_handler.remove_verification_code( + verification_code + ) + + async def expire_verification_code(self, user_id: UUID): + return await self.user_handler.expire_verification_code(user_id) + + async def store_reset_token( + self, user_id: UUID, reset_token: str, expiry: datetime + ): + return await self.user_handler.store_reset_token( + user_id, reset_token, expiry + ) + + async def get_user_id_by_reset_token( + self, reset_token: str + ) -> Optional[UUID]: + return await self.user_handler.get_user_id_by_reset_token(reset_token) + + async def remove_reset_token(self, user_id: UUID): + return await self.user_handler.remove_reset_token(user_id) + + async def remove_user_from_all_collections(self, user_id: UUID): + return await self.user_handler.remove_user_from_all_collections( + user_id + ) + + async def add_user_to_collection( + self, user_id: UUID, collection_id: UUID + ) -> None: + return await self.user_handler.add_user_to_collection( + user_id, collection_id + ) + + async def remove_user_from_collection( + self, user_id: UUID, collection_id: UUID + ) -> None: + return await self.user_handler.remove_user_from_collection( + user_id, collection_id + ) + + async def get_users_in_collection( + self, collection_id: UUID, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[UserResponse], int]]: + return await self.user_handler.get_users_in_collection( + collection_id, offset, limit + ) + + async def mark_user_as_superuser(self, user_id: UUID): + return await self.user_handler.mark_user_as_superuser(user_id) + + async def get_user_id_by_verification_code( + self, verification_code: str + ) -> Optional[UUID]: + return await self.user_handler.get_user_id_by_verification_code( + verification_code + ) + + async def mark_user_as_verified(self, user_id: UUID): + return await self.user_handler.mark_user_as_verified(user_id) + + async def get_users_overview( + self, + user_ids: Optional[list[UUID]] = None, + offset: int = 0, + limit: int = -1, + ) -> dict[str, Union[list[UserStats], int]]: + return await self.user_handler.get_users_overview( + user_ids, offset, limit + ) + + # Vector handler methods + async def upsert(self, entry: VectorEntry) -> None: + return await self.vector_handler.upsert(entry) + + async def upsert_entries(self, entries: list[VectorEntry]) -> None: + return await self.vector_handler.upsert_entries(entries) + + async def semantic_search( + self, query_vector: list[float], search_settings: VectorSearchSettings + ) -> list[VectorSearchResult]: + return await self.vector_handler.semantic_search( + query_vector, search_settings + ) + + async def full_text_search( + self, query_text: str, search_settings: VectorSearchSettings + ) -> list[VectorSearchResult]: + return await self.vector_handler.full_text_search( + query_text, search_settings + ) + + async def hybrid_search( + self, + query_text: str, + query_vector: list[float], + search_settings: VectorSearchSettings, + *args, + **kwargs, + ) -> list[VectorSearchResult]: + return await self.vector_handler.hybrid_search( + query_text, query_vector, search_settings, *args, **kwargs + ) + + async def delete( + self, filters: dict[str, Any] + ) -> dict[str, dict[str, str]]: + return await self.vector_handler.delete(filters) + + async def assign_document_to_collection_vector( + self, document_id: UUID, collection_id: UUID + ) -> None: + return await self.vector_handler.assign_document_to_collection_vector( + document_id, collection_id + ) + + async def remove_document_from_collection_vector( + self, document_id: UUID, collection_id: UUID + ) -> None: + return ( + await self.vector_handler.remove_document_from_collection_vector( + document_id, collection_id + ) + ) + + async def delete_user_vector(self, user_id: UUID) -> None: + return await self.vector_handler.delete_user_vector(user_id) + + async def delete_collection_vector(self, collection_id: UUID) -> None: + return await self.vector_handler.delete_collection_vector( + collection_id + ) + + async def get_document_chunks( + self, + document_id: UUID, + offset: int = 0, + limit: int = -1, + include_vectors: bool = False, + ) -> dict[str, Any]: + return await self.vector_handler.get_document_chunks( + document_id, offset, limit, include_vectors + ) + + async def create_index( + self, + table_name: Optional[VectorTableName] = None, + index_measure: IndexMeasure = IndexMeasure.cosine_distance, + index_method: IndexMethod = IndexMethod.auto, + index_arguments: Optional[ + Union[IndexArgsIVFFlat, IndexArgsHNSW] + ] = None, + index_name: Optional[str] = None, + concurrently: bool = True, + ) -> None: + return await self.vector_handler.create_index( + table_name, + index_measure, + index_method, + index_arguments, + index_name, + concurrently, + ) + + async def list_indices( + self, table_name: Optional[VectorTableName] = None + ) -> list[dict]: + return await self.vector_handler.list_indices(table_name) + + async def delete_index( + self, + index_name: str, + table_name: Optional[VectorTableName] = None, + concurrently: bool = True, + ) -> None: + return await self.vector_handler.delete_index( + index_name, table_name, concurrently + ) + + async def get_semantic_neighbors( + self, + document_id: UUID, + chunk_id: UUID, + limit: int = 10, + similarity_threshold: float = 0.5, + ) -> list[dict[str, Any]]: + return await self.vector_handler.get_semantic_neighbors( + document_id, chunk_id, limit, similarity_threshold + ) diff --git a/py/core/base/providers/ingestion.py b/py/core/base/providers/ingestion.py index 6ff0be1d4..939027d29 100644 --- a/py/core/base/providers/ingestion.py +++ b/py/core/base/providers/ingestion.py @@ -2,6 +2,8 @@ from abc import ABC from enum import Enum +from shared.abstractions.ingestion import ChunkEnrichmentSettings + from .base import Provider, ProviderConfig logger = logging.getLogger() @@ -10,6 +12,9 @@ class IngestionConfig(ProviderConfig): provider: str = "r2r" excluded_parsers: list[str] = ["mp4"] + chunk_enrichment_settings: ChunkEnrichmentSettings = ( + ChunkEnrichmentSettings() + ) extra_parsers: dict[str, str] = {} @property diff --git a/py/core/base/providers/kg.py b/py/core/base/providers/kg.py index c02d9f31b..8053480e4 100644 --- a/py/core/base/providers/kg.py +++ b/py/core/base/providers/kg.py @@ -10,6 +10,7 @@ Entity, KGCreationSettings, KGEnrichmentSettings, + KGEntityDeduplicationSettings, KGExtraction, KGSearchSettings, RelationshipType, @@ -34,6 +35,9 @@ class KGConfig(ProviderConfig): kg_store_path: Optional[str] = None kg_enrichment_settings: KGEnrichmentSettings = KGEnrichmentSettings() kg_creation_settings: KGCreationSettings = KGCreationSettings() + kg_entity_deduplication_settings: KGEntityDeduplicationSettings = ( + KGEntityDeduplicationSettings() + ) kg_search_settings: KGSearchSettings = KGSearchSettings() def validate_config(self) -> None: @@ -76,7 +80,7 @@ async def add_triples( @abstractmethod async def add_kg_extractions( - self, kg_extractions: list[KGExtraction], table_suffix: str = "_raw" + self, kg_extractions: list[KGExtraction], table_prefix: str = "chunk_" ) -> Tuple[int, int]: """Abstract method to add KG extractions.""" pass @@ -104,10 +108,11 @@ async def get_existing_entity_extraction_ids( async def get_entities( self, collection_id: UUID, - offset: int, - limit: int, + offset: int = 0, + limit: int = -1, entity_ids: list[str] | None = None, - entity_table_name: str = "entity_embedding", + entity_names: list[str] | None = None, + entity_table_name: str = "document_entity", ) -> dict: """Abstract method to get entities.""" pass @@ -194,7 +199,9 @@ async def get_entity_map( pass @abstractmethod - async def get_community_details(self, community_number: int): + async def get_community_details( + self, community_number: int, collection_id: UUID + ): """Abstract method to get community details.""" pass @@ -204,7 +211,7 @@ async def get_entity_count( collection_id: Optional[UUID] = None, document_id: Optional[UUID] = None, distinct: bool = False, - entity_table_name: str = "entity_embedding", + entity_table_name: str = "document_entity", ) -> int: """Abstract method to get the entity count.""" pass @@ -228,6 +235,13 @@ async def get_creation_estimate(self, *args: Any, **kwargs: Any) -> Any: """Abstract method to get the creation estimate.""" pass + @abstractmethod + async def get_deduplication_estimate( + self, *args: Any, **kwargs: Any + ) -> Any: + """Abstract method to get the deduplication estimate.""" + pass + @abstractmethod async def get_enrichment_estimate(self, *args: Any, **kwargs: Any) -> Any: """Abstract method to get the enrichment estimate.""" @@ -259,6 +273,11 @@ async def get_community_count(self, collection_id: UUID) -> int: """Abstract method to get the community count.""" pass + @abstractmethod + async def update_entity_descriptions(self, entities: list[Entity]): + """Abstract method to update entity descriptions.""" + pass + def escape_braces(s: str) -> str: """ diff --git a/py/core/base/utils/__init__.py b/py/core/base/utils/__init__.py index 15ba15949..6b6f50bb8 100644 --- a/py/core/base/utils/__init__.py +++ b/py/core/base/utils/__init__.py @@ -11,7 +11,6 @@ generate_default_user_collection_id, generate_document_id, generate_extraction_id, - generate_message_id, generate_run_id, generate_user_id, increment_version, @@ -35,7 +34,6 @@ "generate_extraction_id", "generate_user_id", "generate_collection_id_from_name", - "generate_message_id", "generate_default_prompt_id", "RecursiveCharacterTextSplitter", "TextSplitter", diff --git a/py/core/configs/full.toml b/py/core/configs/full.toml index 43b6641e7..3d397527e 100644 --- a/py/core/configs/full.toml +++ b/py/core/configs/full.toml @@ -6,11 +6,12 @@ new_after_n_chars = 512 max_characters = 1_024 combine_under_n_chars = 128 overlap = 256 + [ingestion.extra_parsers] - pdf = "zerox" + pdf = "zerox" [orchestration] provider = "hatchet" -kg_creation_concurrency_limit = 32 +kg_creation_concurrency_lipmit = 32 ingestion_concurrency_limit = 128 kg_enrichment_concurrency_limit = 8 diff --git a/py/core/examples/data_dedup/a1.txt b/py/core/examples/data_dedup/a1.txt new file mode 100644 index 000000000..cc83645af --- /dev/null +++ b/py/core/examples/data_dedup/a1.txt @@ -0,0 +1,7 @@ +Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science. + +Little is known about Aristotle's life. He was born in the city of Stagira in northern Greece during the Classical period. His father, Nicomachus, died when Aristotle was a child, and he was brought up by a guardian. At 17 or 18, he joined Plato's Academy in Athens and remained there until the age of 37 (c. 347 BC). Shortly after Plato died, Aristotle left Athens and, at the request of Philip II of Macedon, tutored his son Alexander the Great beginning in 343 BC. He established a library in the Lyceum, which helped him to produce many of his hundreds of books on papyrus scrolls. + +Though Aristotle wrote many elegant treatises and dialogues for publication, only around a third of his original output has survived, none of it intended for publication. Aristotle provided a complex synthesis of the various philosophies existing prior to him. His teachings and methods of inquiry have had a significant impact across the world, and remain a subject of contemporary philosophical discussion. + +Aristotle's views profoundly shaped medieval scholarship. The influence of his physical science extended from late antiquity and the Early Middle Ages into the Renaissance, and was not replaced systematically until the Enlightenment and theories such as classical mechanics were developed. He influenced Judeo-Islamic philosophies during the Middle Ages, as well as Christian theology, especially the Neoplatonism of the Early Church and the scholastic tradition of the Catholic Church. diff --git a/py/core/examples/data_dedup/a10.txt b/py/core/examples/data_dedup/a10.txt new file mode 100644 index 000000000..7d028ebd4 --- /dev/null +++ b/py/core/examples/data_dedup/a10.txt @@ -0,0 +1,31 @@ +Newton's "forced" motion corresponds to Aristotle's "violent" motion with its external agent, but Aristotle's assumption that the agent's effect stops immediately it stops acting (e.g., the ball leaves the thrower's hand) has awkward consequences: he has to suppose that surrounding fluid helps to push the ball along to make it continue to rise even though the hand is no longer acting on it, resulting in the Medieval theory of impetus.[45] + +Four causes +Main article: Four causes + +Aristotle argued by analogy with woodwork that a thing takes its form from four causes: in the case of a table, the wood used (material cause), its design (formal cause), the tools and techniques used (efficient cause), and its decorative or practical purpose (final cause).[47] +Aristotle suggested that the reason for anything coming about can be attributed to four different types of simultaneously active factors. His term aitia is traditionally translated as "cause", but it does not always refer to temporal sequence; it might be better translated as "explanation", but the traditional rendering will be employed here.[48][49] + +Material cause describes the material out of which something is composed. Thus the material cause of a table is wood. It is not about action. It does not mean that one domino knocks over another domino.[48] +The formal cause is its form, i.e., the arrangement of that matter. It tells one what a thing is, that a thing is determined by the definition, form, pattern, essence, whole, synthesis or archetype. It embraces the account of causes in terms of fundamental principles or general laws, as the whole (i.e., macrostructure) is the cause of its parts, a relationship known as the whole-part causation. Plainly put, the formal cause is the idea in the mind of the sculptor that brings the sculpture into being. A simple example of the formal cause is the mental image or idea that allows an artist, architect, or engineer to create a drawing.[48] +The efficient cause is "the primary source", or that from which the change under consideration proceeds. It identifies 'what makes of what is made and what causes change of what is changed' and so suggests all sorts of agents, non-living or living, acting as the sources of change or movement or rest. Representing the current understanding of causality as the relation of cause and effect, this covers the modern definitions of "cause" as either the agent or agency or particular events or states of affairs. In the case of two dominoes, when the first is knocked over it causes the second also to fall over.[48] In the case of animals, this agency is a combination of how it develops from the egg, and how its body functions.[50] +The final cause (telos) is its purpose, the reason why a thing exists or is done, including both purposeful and instrumental actions and activities. The final cause is the purpose or function that something is supposed to serve. This covers modern ideas of motivating causes, such as volition.[48] In the case of living things, it implies adaptation to a particular way of life.[50] +Optics +Further information: History of optics +Aristotle describes experiments in optics using a camera obscura in Problems, book 15. The apparatus consisted of a dark chamber with a small aperture that let light in. With it, he saw that whatever shape he made the hole, the sun's image always remained circular. He also noted that increasing the distance between the aperture and the image surface magnified the image.[51] + +Chance and spontaneity +Further information: Accident (philosophy) +According to Aristotle, spontaneity and chance are causes of some things, distinguishable from other types of cause such as simple necessity. Chance as an incidental cause lies in the realm of accidental things, "from what is spontaneous". There is also more a specific kind of chance, which Aristotle names "luck", that only applies to people's moral choices.[52][53] + +Astronomy +Further information: History of astronomy +In astronomy, Aristotle refuted Democritus's claim that the Milky Way was made up of "those stars which are shaded by the earth from the sun's rays," pointing out partly correctly that if "the size of the sun is greater than that of the earth and the distance of the stars from the earth many times greater than that of the sun, then... the sun shines on all the stars and the earth screens none of them."[54] He also wrote descriptions of comets, including the Great Comet of 371 BC.[55] + +Geology and natural sciences +Further information: History of geology + +Aristotle noted that the ground level of the Aeolian islands changed before a volcanic eruption. +Aristotle was one of the first people to record any geological observations. He stated that geological change was too slow to be observed in one person's lifetime.[56][57] The geologist Charles Lyell noted that Aristotle described such change, including "lakes that had dried up" and "deserts that had become watered by rivers", giving as examples the growth of the Nile delta since the time of Homer, and "the upheaving of one of the Aeolian islands, previous to a volcanic eruption."'[58] + +Meteorologica lends its name to the modern study of meteorology, but its modern usage diverges from the content of Aristotle's ancient treatise on meteors. The ancient Greeks did use the term for a range of atmospheric phenomena, but also for earthquakes and volcanic eruptions. Aristotle proposed that the cause of earthquakes was a gas or vapor (anathymiaseis) that was trapped inside the earth and trying to escape, following other Greek authors Anaxagoras, Empedocles and Democritus.[59] diff --git a/py/core/examples/data_dedup/a2.txt b/py/core/examples/data_dedup/a2.txt new file mode 100644 index 000000000..4b18e2141 --- /dev/null +++ b/py/core/examples/data_dedup/a2.txt @@ -0,0 +1,10 @@ +Aristotle was revered among medieval Muslim scholars as "The First Teacher", and among medieval Christians like Thomas Aquinas as simply "The Philosopher", while the poet Dante called him "the master of those who know". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle's influence on logic continued well into the 19th century. In addition, his ethics, although always influential, gained renewed interest with the modern advent of virtue ethics. + +Life +In general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B] + +Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent some time within the Macedonian palace, making his first connections with the Macedonian monarchy.[7] + + +School of Aristotle in Mieza, Macedonia, Greece. +At the age of seventeen or eighteen, Aristotle moved to Athens to continue his education at Plato's Academy.[8] He probably experienced the Eleusinian Mysteries as he wrote when describing the sights one viewed at the Eleusinian Mysteries, "to experience is to learn" [παθείν μαθεĩν].[9] Aristotle remained in Athens for nearly twenty years before leaving in 348/47 BC. The traditional story about his departure records that he was disappointed with the Academy's direction after control passed to Plato's nephew Speusippus, although it is possible that he feared the anti-Macedonian sentiments in Athens at that time and left before Plato died.[10] Aristotle then accompanied Xenocrates to the court of his friend Hermias of Atarneus in Asia Minor. After the death of Hermias, Aristotle travelled with his pupil Theophrastus to the island of Lesbos, where together they researched the botany and zoology of the island and its sheltered lagoon. While in Lesbos, Aristotle married Pythias, either Hermias's adoptive daughter or niece. They had a daughter, whom they also named Pythias. In 343 BC, Aristotle was invited by Philip II of Macedon to become the tutor to his son Alexander.[11][12] diff --git a/py/core/examples/data_dedup/a3.txt b/py/core/examples/data_dedup/a3.txt new file mode 100644 index 000000000..6544955d7 --- /dev/null +++ b/py/core/examples/data_dedup/a3.txt @@ -0,0 +1,6 @@ +"Aristotle tutoring Alexander" by Jean Leon Gerome Ferris. +Aristotle was appointed as the head of the royal Academy of Macedon. During Aristotle's time in the Macedonian court, he gave lessons not only to Alexander but also to two other future kings: Ptolemy and Cassander.[13] Aristotle encouraged Alexander toward eastern conquest, and Aristotle's own attitude towards Persia was unabashedly ethnocentric. In one famous example, he counsels Alexander to be "a leader to the Greeks and a despot to the barbarians, to look after the former as after friends and relatives, and to deal with the latter as with beasts or plants".[13] By 335 BC, Aristotle had returned to Athens, establishing his own school there known as the Lyceum. Aristotle conducted courses at the school for the next twelve years. While in Athens, his wife Pythias died and Aristotle became involved with Herpyllis of Stagira. They had a son whom Aristotle named after his father, Nicomachus. If the Suda – an uncritical compilation from the Middle Ages – is accurate, he may also have had an erômenos, Palaephatus of Abydus.[14] + + +Portrait bust of Aristotle; an Imperial Roman (1st or 2nd century AD) copy of a lost bronze sculpture made by Lysippos. +This period in Athens, between 335 and 323 BC, is when Aristotle is believed to have composed many of his works.[12] He wrote many dialogues, of which only fragments have survived. Those works that have survived are in treatise form and were not, for the most part, intended for widespread publication; they are generally thought to be lecture aids for his students. His most important treatises include Physics, Metaphysics, Nicomachean Ethics, Politics, On the Soul and Poetics. Aristotle studied and made significant contributions to "logic, metaphysics, mathematics, physics, biology, botany, ethics, politics, agriculture, medicine, dance, and theatre."[15] diff --git a/py/core/examples/data_dedup/a4.txt b/py/core/examples/data_dedup/a4.txt new file mode 100644 index 000000000..b4fa4e233 --- /dev/null +++ b/py/core/examples/data_dedup/a4.txt @@ -0,0 +1,7 @@ +Near the end of his life, Alexander and Aristotle became estranged over Alexander's relationship with Persia and Persians. A widespread tradition in antiquity suspected Aristotle of playing a role in Alexander's death, but the only evidence of this is an unlikely claim made some six years after the death.[16] Following Alexander's death, anti-Macedonian sentiment in Athens was rekindled. In 322 BC, Demophilus and Eurymedon the Hierophant reportedly denounced Aristotle for impiety,[17] prompting him to flee to his mother's family estate in Chalcis, on Euboea, at which occasion he was said to have stated: "I will not allow the Athenians to sin twice against philosophy"[18][19][20] – a reference to Athens's trial and execution of Socrates. He died in Chalcis, Euboea[2][21][15] of natural causes later that same year, having named his student Antipater as his chief executor and leaving a will in which he asked to be buried next to his wife.[22] + +Theoretical philosophy +Logic +Main article: Term logic +Further information: Non-Aristotelian logic +With the Prior Analytics, Aristotle is credited with the earliest study of formal logic,[23] and his conception of it was the dominant form of Western logic until 19th-century advances in mathematical logic.[24] Kant stated in the Critique of Pure Reason that with Aristotle, logic reached its completion.[25] diff --git a/py/core/examples/data_dedup/a5.txt b/py/core/examples/data_dedup/a5.txt new file mode 100644 index 000000000..1d91e8365 --- /dev/null +++ b/py/core/examples/data_dedup/a5.txt @@ -0,0 +1,14 @@ + +Organon +Main article: Organon + +Plato (left) and Aristotle in Raphael's 1509 fresco, The School of Athens. Aristotle holds his Nicomachean Ethics and gestures to the earth, representing his view in immanent realism, whilst Plato gestures to the heavens, indicating his Theory of Forms, and holds his Timaeus.[26][27] +Most of Aristotle's work is probably not in its original form, because it was most likely edited by students and later lecturers. The logical works of Aristotle were compiled into a set of six books called the Organon around 40 BC by Andronicus of Rhodes or others among his followers.[28] The books are: + +Categories +On Interpretation +Prior Analytics +Posterior Analytics +Topics +On Sophistical Refutations +The order of the books (or the teachings from which they are composed) is not certain, but this list was derived from analysis of Aristotle's writings. It goes from the basics, the analysis of simple terms in the Categories, the analysis of propositions and their elementary relations in On Interpretation, to the study of more complex forms, namely, syllogisms (in the Analytics)[29][30] and dialectics (in the Topics and Sophistical Refutations). The first three treatises form the core of the logical theory stricto sensu: the grammar of the language of logic and the correct rules of reasoning. The Rhetoric is not conventionally included, but it states that it relies on the Topics.[31] diff --git a/py/core/examples/data_dedup/a6.txt b/py/core/examples/data_dedup/a6.txt new file mode 100644 index 000000000..b066488a3 --- /dev/null +++ b/py/core/examples/data_dedup/a6.txt @@ -0,0 +1,29 @@ +One of Aristotle's types of syllogism[D] +In words In +terms[E] In equations[F] + All men are mortal. + + All Greeks are men. + +∴ All Greeks are mortal. M a P + +S a M + +S a P +What is today called Aristotelian logic with its types of syllogism (methods of logical argument),[32] Aristotle himself would have labelled "analytics". The term "logic" he reserved to mean dialectics. + +Metaphysics +Main article: Metaphysics (Aristotle) +The word "metaphysics" appears to have been coined by the first century AD editor who assembled various small selections of Aristotle's works to the treatise we know by the name Metaphysics.[34] Aristotle called it "first philosophy", and distinguished it from mathematics and natural science (physics) as the contemplative (theoretikē) philosophy which is "theological" and studies the divine. He wrote in his Metaphysics (1026a16): + +if there were no other independent things besides the composite natural ones, the study of nature would be the primary kind of knowledge; but if there is some motionless independent thing, the knowledge of this precedes it and is first philosophy, and it is universal in just this way, because it is first. And it belongs to this sort of philosophy to study being as being, both what it is and what belongs to it just by virtue of being.[35] + +Substance +Further information: Hylomorphism +Aristotle examines the concepts of substance (ousia) and essence (to ti ên einai, "the what it was to be") in his Metaphysics (Book VII), and he concludes that a particular substance is a combination of both matter and form, a philosophical theory called hylomorphism. In Book VIII, he distinguishes the matter of the substance as the substratum, or the stuff of which it is composed. For example, the matter of a house is the bricks, stones, timbers, etc., or whatever constitutes the potential house, while the form of the substance is the actual house, namely 'covering for bodies and chattels' or any other differentia that let us define something as a house. The formula that gives the components is the account of the matter, and the formula that gives the differentia is the account of the form.[36][34] + +Immanent realism +Main article: Aristotle's theory of universals + +Plato's forms exist as universals, like the ideal form of an apple. For Aristotle, both matter and form belong to the individual thing (hylomorphism). +Like his teacher Plato, Aristotle's philosophy aims at the universal. Aristotle's ontology places the universal (katholou) in particulars (kath' hekaston), things in the world, whereas for Plato the universal is a separately existing form which actual things imitate. For Aristotle, "form" is still what phenomena are based on, but is "instantiated" in a particular substance.[34] diff --git a/py/core/examples/data_dedup/a7.txt b/py/core/examples/data_dedup/a7.txt new file mode 100644 index 000000000..72151c91a --- /dev/null +++ b/py/core/examples/data_dedup/a7.txt @@ -0,0 +1,11 @@ +Plato argued that all things have a universal form, which could be either a property or a relation to other things. When one looks at an apple, for example, one sees an apple, and one can also analyse a form of an apple. In this distinction, there is a particular apple and a universal form of an apple. Moreover, one can place an apple next to a book, so that one can speak of both the book and apple as being next to each other. Plato argued that there are some universal forms that are not a part of particular things. For example, it is possible that there is no particular good in existence, but "good" is still a proper universal form. Aristotle disagreed with Plato on this point, arguing that all universals are instantiated at some period of time, and that there are no universals that are unattached to existing things. In addition, Aristotle disagreed with Plato about the location of universals. Where Plato spoke of the forms as existing separately from the things that participate in them, Aristotle maintained that universals exist within each thing on which each universal is predicated. So, according to Aristotle, the form of apple exists within each apple, rather than in the world of the forms.[34][37] + +Potentiality and actuality +Concerning the nature of change (kinesis) and its causes, as he outlines in his Physics and On Generation and Corruption (319b–320a), he distinguishes coming-to-be (genesis, also translated as 'generation') from: + +growth and diminution, which is change in quantity; +locomotion, which is change in space; and +alteration, which is change in quality. + +Aristotle argued that a capability like playing the flute could be acquired – the potential made actual – by learning. +Coming-to-be is a change where the substrate of the thing that has undergone the change has itself changed. In that particular change he introduces the concept of potentiality (dynamis) and actuality (entelecheia) in association with the matter and the form. Referring to potentiality, this is what a thing is capable of doing or being acted upon if the conditions are right and it is not prevented by something else. For example, the seed of a plant in the soil is potentially (dynamei) a plant, and if it is not prevented by something, it will become a plant. Potentially, beings can either 'act' (poiein) or 'be acted upon' (paschein), which can be either innate or learned. For example, the eyes possess the potentiality of sight (innate – being acted upon), while the capability of playing the flute can be possessed by learning (exercise – acting). Actuality is the fulfilment of the end of the potentiality. Because the end (telos) is the principle of every change, and potentiality exists for the sake of the end, actuality, accordingly, is the end. Referring then to the previous example, it can be said that an actuality is when a plant does one of the activities that plants do.[34] diff --git a/py/core/examples/data_dedup/a8.txt b/py/core/examples/data_dedup/a8.txt new file mode 100644 index 000000000..3d5431ad4 --- /dev/null +++ b/py/core/examples/data_dedup/a8.txt @@ -0,0 +1,17 @@ +For that for the sake of which (to hou heneka) a thing is, is its principle, and the becoming is for the sake of the end; and the actuality is the end, and it is for the sake of this that the potentiality is acquired. For animals do not see in order that they may have sight, but they have sight that they may see.[38] + +In summary, the matter used to make a house has potentiality to be a house and both the activity of building and the form of the final house are actualities, which is also a final cause or end. Then Aristotle proceeds and concludes that the actuality is prior to potentiality in formula, in time and in substantiality. With this definition of the particular substance (i.e., matter and form), Aristotle tries to solve the problem of the unity of the beings, for example, "what is it that makes a man one"? Since, according to Plato there are two Ideas: animal and biped, how then is man a unity? However, according to Aristotle, the potential being (matter) and the actual one (form) are one and the same.[34][39] + +Epistemology +Aristotle's immanent realism means his epistemology is based on the study of things that exist or happen in the world, and rises to knowledge of the universal, whereas for Plato epistemology begins with knowledge of universal Forms (or ideas) and descends to knowledge of particular imitations of these.[31] Aristotle uses induction from examples alongside deduction, whereas Plato relies on deduction from a priori principles.[31] + +Natural philosophy +Aristotle's "natural philosophy" spans a wide range of natural phenomena including those now covered by physics, biology and other natural sciences.[40] In Aristotle's terminology, "natural philosophy" is a branch of philosophy examining the phenomena of the natural world, and includes fields that would be regarded today as physics, biology and other natural sciences. Aristotle's work encompassed virtually all facets of intellectual inquiry. Aristotle makes philosophy in the broad sense coextensive with reasoning, which he also would describe as "science". However, his use of the term science carries a different meaning than that covered by the term "scientific method". For Aristotle, "all science (dianoia) is either practical, poetical or theoretical" (Metaphysics 1025b25). His practical science includes ethics and politics; his poetical science means the study of fine arts including poetry; his theoretical science covers physics, mathematics and metaphysics.[40] + +Physics + +The four classical elements (fire, air, water, earth) of Empedocles and Aristotle illustrated with a burning log. The log releases all four elements as it is destroyed. +Main article: Aristotelian physics +Five elements +Main article: Classical element +In his On Generation and Corruption, Aristotle related each of the four elements proposed earlier by Empedocles, earth, water, air, and fire, to two of the four sensible qualities, hot, cold, wet, and dry. In the Empedoclean scheme, all matter was made of the four elements, in differing proportions. Aristotle's scheme added the heavenly aether, the divine substance of the heavenly spheres, stars and planets.[41] diff --git a/py/core/examples/data_dedup/a9.txt b/py/core/examples/data_dedup/a9.txt new file mode 100644 index 000000000..8c8c78983 --- /dev/null +++ b/py/core/examples/data_dedup/a9.txt @@ -0,0 +1,37 @@ +Aristotle's elements[41] +Element Hot/Cold Wet/Dry Motion Modern state +of matter +Earth Cold Dry Down Solid +Water Cold Wet Down Liquid +Air Hot Wet Up Gas +Fire Hot Dry Up Plasma +Aether (divine +substance) — Circular +(in heavens) Vacuum +Motion +Further information: History of classical mechanics +Aristotle describes two kinds of motion: "violent" or "unnatural motion", such as that of a thrown stone, in the Physics (254b10), and "natural motion", such as of a falling object, in On the Heavens (300a20). In violent motion, as soon as the agent stops causing it, the motion stops also: in other words, the natural state of an object is to be at rest,[42][G] since Aristotle does not address friction.[43] With this understanding, it can be observed that, as Aristotle stated, heavy objects (on the ground, say) require more force to make them move; and objects pushed with greater force move faster.[44][H] This would imply the equation[44] + +𝐹 += +𝑚 +𝑣 +{\displaystyle F=mv}, +incorrect in modern physics.[44] + +Natural motion depends on the element concerned: the aether naturally moves in a circle around the heavens,[I] while the 4 Empedoclean elements move vertically up (like fire, as is observed) or down (like earth) towards their natural resting places.[45][43][J] + + +Aristotle's laws of motion. In Physics he states that objects fall at a speed proportional to their weight and inversely proportional to the density of the fluid they are immersed in.[43] This is a correct approximation for objects in Earth's gravitational field moving in air or water.[45] +In the Physics (215a25), Aristotle effectively states a quantitative law, that the speed, v, of a falling body is proportional (say, with constant c) to its weight, W, and inversely proportional to the density,[K] ρ, of the fluid in which it is falling:;[45][43] + +𝑣 += +𝑐 +𝑊 +𝜌{\displaystyle v=c{\frac {W}{\rho }}} +Aristotle implies that in a vacuum the speed of fall would become infinite, and concludes from this apparent absurdity that a vacuum is not possible.[45][43] Opinions have varied on whether Aristotle intended to state quantitative laws. Henri Carteron held the "extreme view"[43] that Aristotle's concept of force was basically qualitative,[46] but other authors reject this.[43] + +Archimedes corrected Aristotle's theory that bodies move towards their natural resting places; metal boats can float if they displace enough water; floating depends in Archimedes' scheme on the mass and volume of the object, not, as Aristotle thought, its elementary composition.[45] + +Aristotle's writings on motion remained influential until the Early Modern period. John Philoponus (in Late antiquity) and Galileo (in Early modern period) are said to have shown by experiment that Aristotle's claim that a heavier object falls faster than a lighter object is incorrect.[40] A contrary opinion is given by Carlo Rovelli, who argues that Aristotle's physics of motion is correct within its domain of validity, that of objects in the Earth's gravitational field immersed in a fluid such as air. In this system, heavy bodies in steady fall indeed travel faster than light ones (whether friction is ignored, or not[45]), and they do fall more slowly in a denser medium.[44][L] diff --git a/py/core/examples/scripts/upload_hf_textbooks_ex.py b/py/core/examples/scripts/upload_hf_textbooks_ex.py index 6ee9b724c..7f456036e 100644 --- a/py/core/examples/scripts/upload_hf_textbooks_ex.py +++ b/py/core/examples/scripts/upload_hf_textbooks_ex.py @@ -25,7 +25,6 @@ def remove_file(file_path): async def process_batch(client, batch): results = await client.ingest_files(batch) print(f"Submitted {len(results['results'])} files for processing") - print("results = ", results["results"]) # Remove the processed files for file_path in batch: remove_file(file_path) diff --git a/py/core/main/abstractions.py b/py/core/main/abstractions.py index fa93036e8..ee2996a39 100644 --- a/py/core/main/abstractions.py +++ b/py/core/main/abstractions.py @@ -39,7 +39,10 @@ class R2RPipes(BaseModel): kg_storage_pipe: AsyncPipe kg_entity_description_pipe: AsyncPipe kg_clustering_pipe: AsyncPipe + kg_entity_deduplication_pipe: AsyncPipe + kg_entity_deduplication_summary_pipe: AsyncPipe kg_community_summary_pipe: AsyncPipe + kg_prompt_tuning_pipe: AsyncPipe rag_pipe: AsyncPipe streaming_rag_pipe: AsyncPipe vector_storage_pipe: AsyncPipe diff --git a/py/core/main/api/data/ingestion_router_openapi.yml b/py/core/main/api/data/ingestion_router_openapi.yml index 9a6960202..283ad58da 100644 --- a/py/core/main/api/data/ingestion_router_openapi.yml +++ b/py/core/main/api/data/ingestion_router_openapi.yml @@ -107,4 +107,100 @@ ingest_chunks: chunks: "A list of text chunks to ingest into the system." document_id: "An optional document id to associate the chunks with. If not provided, a unique document id will be generated." metadata: "Optional JSON metadata to associate with the ingested chunks." - run_with_orchestration: "Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result." + +list_vector_indices: + openapi_extra: + x-codeSamples: + - lang: Python + source: | + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.list_vector_indices( + table_name="vectors", + concurrently=False + ) + - lang: Shell + source: | + curl -X GET "http://localhost:7276/v2/list_vector_indices" \ + -H "Content-Type: application/json" \ + -d '{ + "table_name": "vectors", + "concurrently": false + }' + + input_descriptions: + table_name: "The name of the table to list indices for. Options: vectors, entities_document, entities_collection, communities" + concurrently: "Whether to perform the operation concurrently" + +create_vector_index: + openapi_extra: + x-codeSamples: + - lang: Python + source: | + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.create_vector_index( + table_name="vectors", + index_method="hnsw", + index_measure="cosine_distance", + index_arguments={"m": 16, "ef_construction": 64}, + concurrently=True + ) + - lang: Shell + source: | + curl -X POST "http://localhost:7276/v2/create_vector_index" \ + -H "Content-Type: application/json" \ + -d '{ + "table_name": "vectors", + "index_method": "hnsw", + "index_measure": "cosine_distance", + "index_arguments": { + "m": 16, + "ef_construction": 64 + }, + "concurrently": true + }' + + input_descriptions: + table_name: "The table to create the index on. Default: vectors" + index_method: "The indexing method to use. Options: hnsw, ivfflat, auto. Default: hnsw" + index_measure: "Distance measure for vector comparisons. Options: cosine_distance, l2_distance, max_inner_product. Default: cosine_distance" + index_name: "Optional custom name for the index. If not provided, one will be auto-generated" + index_arguments: "Configuration parameters for the chosen index method. For HNSW: {m: int, ef_construction: int}. For IVFFlat: {n_lists: int}" + concurrently: "Whether to create the index concurrently. Default: true" + +delete_vector_index: + openapi_extra: + x-codeSamples: + - lang: Python + source: | + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.delete_vector_index( + index_name="ix_vector_cosine_ops_hnsw__20241021211541", + table_name="vectors", + concurrently=True + ) + - lang: Shell + source: | + curl -X DELETE "http://localhost:7276/v2/delete_vector_index" \ + -H "Content-Type: application/json" \ + -d '{ + "index_name": "ix_vector_cosine_ops_hnsw__20241021211541", + "table_name": "vectors", + "concurrently": true + }' + + input_descriptions: + index_name: "The name of the index to delete" + table_name: "The name of the table containing the index. Default: vectors" + concurrently: "Whether to delete the index concurrently. Default: true" diff --git a/py/core/main/api/ingestion_router.py b/py/core/main/api/ingestion_router.py index 21d50bd8c..2b2f546a3 100644 --- a/py/core/main/api/ingestion_router.py +++ b/py/core/main/api/ingestion_router.py @@ -6,14 +6,17 @@ from uuid import UUID import yaml -from fastapi import Body, Depends, File, Form, UploadFile +from fastapi import Body, Depends, File, Form, Query, UploadFile from pydantic import Json from core.base import R2RException, RawChunk, generate_document_id from core.base.api.models import ( CreateVectorIndexResponse, WrappedCreateVectorIndexResponse, + WrappedDeleteVectorIndexResponse, WrappedIngestionResponse, + WrappedListVectorIndicesResponse, + WrappedSelectVectorIndexResponse, WrappedUpdateResponse, ) from core.base.providers import OrchestrationProvider, Workflow @@ -66,6 +69,16 @@ def _register_workflows(self): if self.orchestration_provider.config.provider != "simple" else "Vector index creation task completed successfully." ), + "delete-vector-index": ( + "Vector index deletion task queued successfully." + if self.orchestration_provider.config.provider != "simple" + else "Vector index deletion task completed successfully." + ), + "select-vector-index": ( + "Vector index selection task queued successfully." + if self.orchestration_provider.config.provider != "simple" + else "Vector index selection task completed successfully." + ), }, ) @@ -333,7 +346,7 @@ async def ingest_chunks_app( metadata: Optional[dict] = Body( None, description=ingest_files_descriptions.get("metadata") ), - run_with_orchestration: Optional[bool] = Form( + run_with_orchestration: Optional[bool] = Body( True, description=ingest_files_descriptions.get( "run_with_orchestration" @@ -393,40 +406,54 @@ async def ingest_chunks_app( "task_id": None, } - @self.router.post("/create_vector_index") + create_vector_index_extras = self.openapi_extras.get( + "create_vector_index", {} + ) + create_vector_descriptions = create_vector_index_extras.get( + "input_descriptions", {} + ) + + @self.router.post( + "/create_vector_index", + openapi_extra=create_vector_index_extras.get("openapi_extra"), + ) @self.base_endpoint async def create_vector_index_app( table_name: Optional[VectorTableName] = Body( - default=VectorTableName.CHUNKS, - description="The name of the vector table to create.", + default=VectorTableName.VECTORS, + description=create_vector_descriptions.get("table_name"), ), index_method: IndexMethod = Body( default=IndexMethod.hnsw, - description="The type of vector index to create.", + description=create_vector_descriptions.get("index_method"), ), - measure: IndexMeasure = Body( + index_measure: IndexMeasure = Body( default=IndexMeasure.cosine_distance, - description="The measure for the index.", + description=create_vector_descriptions.get("index_measure"), ), index_arguments: Optional[ Union[IndexArgsIVFFlat, IndexArgsHNSW] ] = Body( None, - description="The arguments for the index method.", + description=create_vector_descriptions.get("index_arguments"), ), - replace: bool = Body( - default=True, - description="Whether to replace an existing index.", + index_name: Optional[str] = Body( + None, + description=create_vector_descriptions.get("index_name"), ), concurrently: bool = Body( default=True, - description="Whether to create the index concurrently.", + description=create_vector_descriptions.get("concurrently"), ), auth_user=Depends(self.service.providers.auth.auth_wrapper), ) -> WrappedCreateVectorIndexResponse: + """ + Create a vector index for a given table. + + """ logger.info( - f"Creating vector index for {table_name} with method {index_method}, measure {measure}, replace {replace}, concurrently {concurrently}" + f"Creating vector index for {table_name} with method {index_method}, measure {index_measure}, concurrently {concurrently}" ) raw_message = await self.orchestration_provider.run_workflow( @@ -435,9 +462,82 @@ async def create_vector_index_app( "request": { "table_name": table_name, "index_method": index_method, - "measure": measure, + "index_measure": index_measure, + "index_name": index_name, "index_arguments": index_arguments, - "replace": replace, + "concurrently": concurrently, + }, + }, + options={ + "additional_metadata": {}, + }, + ) + + return raw_message # type: ignore + + list_vector_indices_extras = self.openapi_extras.get( + "create_vector_index", {} + ) + list_vector_indices_descriptions = list_vector_indices_extras.get( + "input_descriptions", {} + ) + + @self.router.get( + "/list_vector_indices", + openapi_extra=list_vector_indices_extras.get("openapi_extra"), + ) + @self.base_endpoint + async def list_vector_indices_app( + table_name: Optional[VectorTableName] = Query( + default=VectorTableName.VECTORS, + description=list_vector_indices_descriptions.get("table_name"), + ), + auth_user=Depends(self.service.providers.auth.auth_wrapper), + ) -> WrappedListVectorIndicesResponse: + indices = await self.service.providers.database.list_indices( + table_name=table_name + ) + return {"indices": indices} # type: ignore + + delete_vector_index_extras = self.openapi_extras.get( + "delete_vector_index", {} + ) + delete_vector_index_descriptions = delete_vector_index_extras.get( + "input_descriptions", {} + ) + + @self.router.delete( + "/delete_vector_index", + openapi_extra=delete_vector_index_extras.get("openapi_extra"), + ) + @self.base_endpoint + async def delete_vector_index_app( + index_name: str = Body( + ..., + description=delete_vector_index_descriptions.get("index_name"), + ), + table_name: Optional[VectorTableName] = Body( + default=VectorTableName.VECTORS, + description=delete_vector_index_descriptions.get("table_name"), + ), + concurrently: bool = Body( + default=True, + description=delete_vector_index_descriptions.get( + "concurrently" + ), + ), + auth_user=Depends(self.service.providers.auth.auth_wrapper), + ) -> WrappedDeleteVectorIndexResponse: + logger.info( + f"Deleting vector index {index_name} from table {table_name}" + ) + + raw_message = await self.orchestration_provider.run_workflow( + "delete-vector-index", + { + "request": { + "index_name": index_name, + "table_name": table_name, "concurrently": concurrently, }, }, diff --git a/py/core/main/api/kg_router.py b/py/core/main/api/kg_router.py index 761c68104..8dbb8d56c 100644 --- a/py/core/main/api/kg_router.py +++ b/py/core/main/api/kg_router.py @@ -12,10 +12,13 @@ WrappedKGCreationResponse, WrappedKGEnrichmentResponse, WrappedKGEntitiesResponse, + WrappedKGEntityDeduplicationResponse, WrappedKGTriplesResponse, + WrappedKGTunePromptResponse, ) from core.base.providers import OrchestrationProvider, Workflow from core.utils import generate_default_user_collection_id +from shared.abstractions.graph import EntityLevel from shared.abstractions.kg import KGRunType from shared.utils.base_utils import update_settings_from_dict @@ -53,11 +56,19 @@ def _register_workflows(self): workflow_messages["enrich-graph"] = ( "Graph enrichment task queued successfully." ) + workflow_messages["entity-deduplication"] = ( + "KG Entity Deduplication task queued successfully." + ) else: workflow_messages["create-graph"] = ( "Graph created successfully, please run enrich-graph to enrich the graph for GraphRAG." ) - workflow_messages["enrich-graph"] = "Graph enriched successfully." + workflow_messages["enrich-graph"] = ( + "Graph enriched successfully. You can view the communities at http://localhost:7272/v2/communities" + ) + workflow_messages["entity-deduplication"] = ( + "KG Entity Deduplication completed successfully." + ) self.orchestration_provider.register_workflows( Workflow.KG, @@ -201,6 +212,10 @@ async def enrich_graph( @self.router.get("/entities") @self.base_endpoint async def get_entities( + entity_level: Optional[EntityLevel] = Query( + default=EntityLevel.DOCUMENT, + description="Type of entities to retrieve. Options are: raw, dedup_document, dedup_collection.", + ), collection_id: Optional[UUID] = Query( None, description="Collection ID to retrieve entities from." ), @@ -224,11 +239,19 @@ async def get_entities( auth_user.id ) + if entity_level == EntityLevel.CHUNK: + entity_table_name = "chunk_entity" + elif entity_level == EntityLevel.DOCUMENT: + entity_table_name = "document_entity" + else: + entity_table_name = "collection_entity" + return await self.service.get_entities( collection_id, offset, limit, entity_ids, + entity_table_name, ) @self.router.get("/triples") @@ -304,3 +327,139 @@ async def get_communities( levels, community_numbers, ) + + @self.router.post("/deduplicate_entities") + @self.base_endpoint + async def deduplicate_entities( + collection_id: Optional[UUID] = Body( + None, description="Collection ID to deduplicate entities for." + ), + run_type: Optional[KGRunType] = Body( + None, description="Run type for the deduplication process." + ), + deduplication_settings: Optional[dict] = Body( + None, description="Settings for the deduplication process." + ), + auth_user=Depends(self.service.providers.auth.auth_wrapper), + ) -> WrappedKGEntityDeduplicationResponse: + """ + Deduplicate entities in the knowledge graph. + """ + if not auth_user.is_superuser: + logger.warning("Implement permission checks here.") + + if not collection_id: + collection_id = generate_default_user_collection_id( + auth_user.id + ) + + if not run_type: + run_type = KGRunType.ESTIMATE + + server_deduplication_settings = ( + self.service.providers.kg.config.kg_entity_deduplication_settings + ) + + logger.info( + f"Server deduplication settings: {server_deduplication_settings}" + ) + + if deduplication_settings: + server_deduplication_settings = update_settings_from_dict( + server_deduplication_settings, deduplication_settings + ) + + logger.info( + f"Running deduplicate_entities on collection {collection_id}" + ) + logger.info(f"Input data: {server_deduplication_settings}") + + if run_type == KGRunType.ESTIMATE: + return await self.service.get_deduplication_estimate( + collection_id, server_deduplication_settings + ) + + workflow_input = { + "collection_id": str(collection_id), + "run_type": run_type, + "kg_entity_deduplication_settings": server_deduplication_settings.model_dump_json(), + "user": auth_user.json(), + } + + return await self.orchestration_provider.run_workflow( # type: ignore + "entity-deduplication", {"request": workflow_input}, {} + ) + + @self.router.get("/tuned_prompt") + @self.base_endpoint + async def get_tuned_prompt( + prompt_name: str = Query( + ..., + description="The name of the prompt to tune. Valid options are 'kg_triples_extraction_prompt', 'kg_entity_description_prompt' and 'community_reports_prompt'.", + ), + collection_id: Optional[UUID] = Query( + None, description="Collection ID to retrieve communities from." + ), + documents_offset: Optional[int] = Query( + 0, description="Offset for document pagination." + ), + documents_limit: Optional[int] = Query( + 100, description="Limit for document pagination." + ), + chunks_offset: Optional[int] = Query( + 0, description="Offset for chunk pagination." + ), + chunks_limit: Optional[int] = Query( + 100, description="Limit for chunk pagination." + ), + auth_user=Depends(self.service.providers.auth.auth_wrapper), + ) -> WrappedKGTunePromptResponse: + """ + Auto-tune the prompt for a specific collection. + """ + if not auth_user.is_superuser: + logger.warning("Implement permission checks here.") + + if not collection_id: + collection_id = generate_default_user_collection_id( + auth_user.id + ) + + return await self.service.tune_prompt( + prompt_name=prompt_name, + collection_id=collection_id, + documents_offset=documents_offset, + documents_limit=documents_limit, + chunks_offset=chunks_offset, + chunks_limit=chunks_limit, + ) + + @self.router.delete("/delete_graph_for_collection") + @self.base_endpoint + async def delete_graph_for_collection( + collection_id: UUID = Body( + ..., description="Collection ID to delete graph for." + ), + cascade: bool = Body( + default=False, + description="Whether to cascade the deletion, and delete entities and triples belonging to the collection.", + ), + auth_user=Depends(self.service.providers.auth.auth_wrapper), + ): + """ + Delete the graph for a given collection. Note that this endpoint may delete a large amount of data created by the KG pipeline, this deletion is irreversible, and recreating the graph may be an expensive operation. + + Notes: + The endpoint deletes all communities for a given collection. If the cascade flag is set to true, the endpoint also deletes all the entities and triples associated with the collection. + + WARNING: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection. + + """ + if not auth_user.is_superuser: + logger.warning("Implement permission checks here.") + + await self.service.delete_graph_for_collection( + collection_id, cascade + ) + + return {"message": "Graph deleted successfully."} diff --git a/py/core/main/app.py b/py/core/main/app.py index 7ad558069..ab03ff41c 100644 --- a/py/core/main/app.py +++ b/py/core/main/app.py @@ -60,9 +60,11 @@ def _apply_cors(self): allow_headers=["*"], ) - def serve(self, host: str = "0.0.0.0", port: int = 7272): + async def serve(self, host: str = "0.0.0.0", port: int = 7272): # Start the Hatchet worker in a separate thread import uvicorn # Run the FastAPI app - uvicorn.run(self.app, host=host, port=port) + config = uvicorn.Config(self.app, host=host, port=port) + server = uvicorn.Server(config) + await server.serve() diff --git a/py/core/main/app_entry.py b/py/core/main/app_entry.py index 51bb3408c..cfc50af76 100644 --- a/py/core/main/app_entry.py +++ b/py/core/main/app_entry.py @@ -100,6 +100,19 @@ async def create_r2r_app( logger.info( f"Environment R2R_CONFIG_PATH: {'None' if config_path is None else config_path}" ) +logger.info(f"Environment R2R_PROJECT_NAME: {os.getenv('R2R_PROJECT_NAME')}") + +logger.info(f"Environment R2R_POSTGRES_HOST: {os.getenv('R2R_POSTGRES_HOST')}") +logger.info( + f"Environment R2R_POSTGRES_DBNAME: {os.getenv('R2R_POSTGRES_DBNAME')}" +) +logger.info(f"Environment R2R_POSTGRES_PORT: {os.getenv('R2R_POSTGRES_PORT')}") +logger.info( + f"Environment R2R_POSTGRES_PASSWORD: {os.getenv('R2R_POSTGRES_PASSWORD')}" +) +logger.info( + f"Environment R2R_PROJECT_NAME: {os.getenv('R2R_PR2R_PROJECT_NAME')}" +) # Create the FastAPI app app = FastAPI(lifespan=lifespan) diff --git a/py/core/main/assembly/factory.py b/py/core/main/assembly/factory.py index 0b25f74fe..d47404a52 100644 --- a/py/core/main/assembly/factory.py +++ b/py/core/main/assembly/factory.py @@ -147,7 +147,7 @@ async def create_database_provider( "Embedding config must have a base dimension to initialize database." ) - vector_db_dimension = self.config.embedding.base_dimension + dimension = self.config.embedding.base_dimension quantization_type = ( self.config.embedding.quantization_settings.quantization_type ) @@ -156,7 +156,7 @@ async def create_database_provider( database_provider = PostgresDBProvider( db_config, - vector_db_dimension, + dimension, crypto_provider=crypto_provider, quantization_type=quantization_type, ) @@ -392,7 +392,10 @@ def create_pipes( streaming_rag_pipe_override: Optional[AsyncPipe] = None, kg_entity_description_pipe: Optional[AsyncPipe] = None, kg_clustering_pipe: Optional[AsyncPipe] = None, + kg_entity_deduplication_pipe: Optional[AsyncPipe] = None, + kg_entity_deduplication_summary_pipe: Optional[AsyncPipe] = None, kg_community_summary_pipe: Optional[AsyncPipe] = None, + kg_prompt_tuning_pipe: Optional[AsyncPipe] = None, *args, **kwargs, ) -> R2RPipes: @@ -423,8 +426,16 @@ def create_pipes( or self.create_kg_entity_description_pipe(*args, **kwargs), kg_clustering_pipe=kg_clustering_pipe or self.create_kg_clustering_pipe(*args, **kwargs), + kg_entity_deduplication_pipe=kg_entity_deduplication_pipe + or self.create_kg_entity_deduplication_pipe(*args, **kwargs), + kg_entity_deduplication_summary_pipe=kg_entity_deduplication_summary_pipe + or self.create_kg_entity_deduplication_summary_pipe( + *args, **kwargs + ), kg_community_summary_pipe=kg_community_summary_pipe or self.create_kg_community_summary_pipe(*args, **kwargs), + kg_prompt_tuning_pipe=kg_prompt_tuning_pipe + or self.create_kg_prompt_tuning_pipe(*args, **kwargs), ) def create_parsing_pipe(self, *args, **kwargs) -> Any: @@ -621,6 +632,17 @@ def create_kg_clustering_pipe(self, *args, **kwargs) -> Any: config=AsyncPipe.PipeConfig(name="kg_clustering_pipe"), ) + def create_kg_deduplication_summary_pipe(self, *args, **kwargs) -> Any: + from core.pipes import KGEntityDeduplicationSummaryPipe + + return KGEntityDeduplicationSummaryPipe( + kg_provider=self.providers.kg, + prompt_provider=self.providers.prompt, + llm_provider=self.providers.llm, + embedding_provider=self.providers.embedding, + config=AsyncPipe.PipeConfig(name="kg_deduplication_summary_pipe"), + ) + def create_kg_community_summary_pipe(self, *args, **kwargs) -> Any: from core.pipes import KGCommunitySummaryPipe @@ -632,6 +654,42 @@ def create_kg_community_summary_pipe(self, *args, **kwargs) -> Any: config=AsyncPipe.PipeConfig(name="kg_community_summary_pipe"), ) + def create_kg_entity_deduplication_pipe(self, *args, **kwargs) -> Any: + from core.pipes import KGEntityDeduplicationPipe + + return KGEntityDeduplicationPipe( + kg_provider=self.providers.kg, + llm_provider=self.providers.llm, + prompt_provider=self.providers.prompt, + embedding_provider=self.providers.embedding, + config=AsyncPipe.PipeConfig(name="kg_entity_deduplication_pipe"), + ) + + def create_kg_entity_deduplication_summary_pipe( + self, *args, **kwargs + ) -> Any: + from core.pipes import KGEntityDeduplicationSummaryPipe + + return KGEntityDeduplicationSummaryPipe( + kg_provider=self.providers.kg, + prompt_provider=self.providers.prompt, + llm_provider=self.providers.llm, + embedding_provider=self.providers.embedding, + config=AsyncPipe.PipeConfig( + name="kg_entity_deduplication_summary_pipe" + ), + ) + + def create_kg_prompt_tuning_pipe(self, *args, **kwargs) -> Any: + from core.pipes import KGPromptTuningPipe + + return KGPromptTuningPipe( + kg_provider=self.providers.kg, + llm_provider=self.providers.llm, + prompt_provider=self.providers.prompt, + config=AsyncPipe.PipeConfig(name="kg_prompt_tuning_pipe"), + ) + class R2RPipelineFactory: def __init__(self, config: R2RConfig, pipes: R2RPipes): diff --git a/py/core/main/orchestration/hatchet/ingestion_workflow.py b/py/core/main/orchestration/hatchet/ingestion_workflow.py index 84b230581..4798a4178 100644 --- a/py/core/main/orchestration/hatchet/ingestion_workflow.py +++ b/py/core/main/orchestration/hatchet/ingestion_workflow.py @@ -150,21 +150,59 @@ async def parse(self, context: Context) -> dict: status=IngestionStatus.SUCCESS, ) - collection_id = await service.providers.database.relational.assign_document_to_collection( + # TODO: Move logic onto the `management service` + collection_id = generate_default_user_collection_id( + document_info.user_id + ) + await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, - collection_id=generate_default_user_collection_id( - document_info.user_id - ), + collection_id=collection_id, ) - - service.providers.database.vector.assign_document_to_collection( + await service.providers.database.assign_document_to_collection_vector( document_id=document_info.id, collection_id=collection_id ) + chunk_enrichment_settings = getattr( + service.providers.ingestion.config, + "chunk_enrichment_settings", + None, + ) + + if chunk_enrichment_settings and getattr( + chunk_enrichment_settings, "enable_chunk_enrichment", False + ): + + logger.info("Enriching document with contextual chunks") + + # TODO: the status updating doesn't work because document_info doesn't contain information about collection IDs + # we don't update the document_info when we assign document_to_collection_relational and document_to_collection_vector + # hack: get document_info again from DB + document_info = ( + await self.ingestion_service.providers.database.get_documents_overview( + filter_user_ids=[document_info.user_id], + filter_document_ids=[document_info.id], + ) + )["results"][0] + + await self.ingestion_service.update_document_status( + document_info, + status=IngestionStatus.ENRICHING, + ) + + await self.ingestion_service.chunk_enrichment( + document_id=document_info.id, + ) + + await self.ingestion_service.update_document_status( + document_info, + status=IngestionStatus.ENRICHED, + ) + return { "status": "Successfully finalized ingestion", "document_info": document_info.to_dict(), } + except AuthenticationError as e: raise R2RException( status_code=401, @@ -189,7 +227,7 @@ async def on_failure(self, context: Context) -> None: try: documents_overview = ( - await self.ingestion_service.providers.database.relational.get_documents_overview( + await self.ingestion_service.providers.database.get_documents_overview( filter_document_ids=[document_id] ) )["results"] @@ -248,7 +286,7 @@ async def update_files(self, context: Context) -> None: ) documents_overview = ( - await self.ingestion_service.providers.database.relational.get_documents_overview( + await self.ingestion_service.providers.database.get_documents_overview( filter_document_ids=document_ids, filter_user_ids=None if user.is_superuser else [user.id], ) @@ -400,13 +438,15 @@ async def finalize(self, context: Context) -> dict: ) try: - collection_id = await self.ingestion_service.providers.database.relational.assign_document_to_collection( + # TODO - Move logic onto the `management service` + collection_id = generate_default_user_collection_id( + document_info.user_id + ) + await self.ingestion_service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, - collection_id=generate_default_user_collection_id( - document_info.user_id - ), + collection_id=collection_id, ) - self.ingestion_service.providers.database.vector.assign_document_to_collection( + await self.ingestion_service.providers.database.assign_document_to_collection_vector( document_id=document_info.id, collection_id=collection_id ) except Exception as e: @@ -432,7 +472,7 @@ async def on_failure(self, context: Context) -> None: try: documents_overview = ( - await self.ingestion_service.providers.database.relational.get_documents_overview( + await self.ingestion_service.providers.database.get_documents_overview( filter_document_ids=[document_id] ) )["results"] @@ -474,7 +514,7 @@ async def create_vector_index(self, context: Context) -> dict: ) ) - self.ingestion_service.providers.database.vector.create_index( + await self.ingestion_service.providers.database.create_index( **parsed_data ) @@ -482,14 +522,36 @@ async def create_vector_index(self, context: Context) -> dict: "status": "Vector index creation queued successfully.", } + @orchestration_provider.workflow(name="delete-vector-index", timeout="30m") + class HatchetDeleteVectorIndexWorkflow: + def __init__(self, ingestion_service: IngestionService): + self.ingestion_service = ingestion_service + + @orchestration_provider.step(timeout="10m") + async def delete_vector_index(self, context: Context) -> dict: + input_data = context.workflow_input()["request"] + parsed_data = ( + IngestionServiceAdapter.parse_delete_vector_index_input( + input_data + ) + ) + + await self.ingestion_service.providers.database.delete_index( + **parsed_data + ) + + return {"status": "Vector index deleted successfully."} + ingest_files_workflow = HatchetIngestFilesWorkflow(service) update_files_workflow = HatchetUpdateFilesWorkflow(service) ingest_chunks_workflow = HatchetIngestChunksWorkflow(service) create_vector_index_workflow = HatchetCreateVectorIndexWorkflow(service) + delete_vector_index_workflow = HatchetDeleteVectorIndexWorkflow(service) return { "ingest_files": ingest_files_workflow, "update_files": update_files_workflow, "ingest_chunks": ingest_chunks_workflow, "create_vector_index": create_vector_index_workflow, + "delete_vector_index": delete_vector_index_workflow, } diff --git a/py/core/main/orchestration/hatchet/kg_workflow.py b/py/core/main/orchestration/hatchet/kg_workflow.py index 97862c941..83d75014a 100644 --- a/py/core/main/orchestration/hatchet/kg_workflow.py +++ b/py/core/main/orchestration/hatchet/kg_workflow.py @@ -34,6 +34,22 @@ def get_input_data_dict(input_data): if key == "kg_enrichment_settings": input_data[key] = json.loads(value) + if key == "kg_entity_deduplication_settings": + input_data[key] = json.loads(value) + + if isinstance(input_data[key]["generation_config"], str): + input_data[key]["generation_config"] = json.loads( + input_data[key]["generation_config"] + ) + + input_data[key]["generation_config"] = GenerationConfig( + **input_data[key]["generation_config"] + ) + + logger.info( + f"KG Entity Deduplication Settings: {input_data[key]}" + ) + if key == "generation_config": input_data[key] = GenerationConfig(**input_data[key]) return input_data @@ -117,7 +133,7 @@ async def on_failure(self, context: Context) -> None: return try: - await self.kg_service.providers.database.relational.set_workflow_status( + await self.kg_service.providers.database.set_workflow_status( id=uuid.UUID(document_id), status_type="kg_extraction_status", status=KGExtractionStatus.FAILED, @@ -213,6 +229,106 @@ async def kg_extraction_ingress(self, context: Context) -> dict: "result": f"successfully ran graph creation workflows for {len(results)} documents" } + @orchestration_provider.workflow( + name="entity-deduplication", timeout="360m" + ) + class EntityDeduplicationWorkflow: + def __init__(self, kg_service: KgService): + self.kg_service = kg_service + + @orchestration_provider.step(retries=0, timeout="360m") + async def kg_entity_deduplication_setup( + self, context: Context + ) -> dict: + + input_data = get_input_data_dict( + context.workflow_input()["request"] + ) + + collection_id = input_data["collection_id"] + + logger.info( + f"Running KG Entity Deduplication for collection {collection_id}" + ) + logger.info(f"Input data: {input_data}") + logger.info( + f"KG Entity Deduplication Settings: {input_data['kg_entity_deduplication_settings']}" + ) + + number_of_distinct_entities = ( + await self.kg_service.kg_entity_deduplication( + collection_id=collection_id, + **input_data["kg_entity_deduplication_settings"], + ) + )[0]["num_entities"] + + input_data["kg_entity_deduplication_settings"][ + "generation_config" + ] = input_data["kg_entity_deduplication_settings"][ + "generation_config" + ].model_dump_json() + + # run 100 entities in one workflow + total_workflows = math.ceil(number_of_distinct_entities / 100) + workflows = [] + for i in range(total_workflows): + offset = i * 100 + workflows.append( + context.aio.spawn_workflow( + "kg-entity-deduplication-summary", + { + "request": { + "collection_id": collection_id, + "offset": offset, + "limit": 100, + "kg_entity_deduplication_settings": json.dumps( + input_data[ + "kg_entity_deduplication_settings" + ] + ), + } + }, + key=f"{i}/{total_workflows}_entity_deduplication_part", + ) + ) + await asyncio.gather(*workflows) + + return { + "result": f"successfully queued kg entity deduplication for collection {collection_id} with {number_of_distinct_entities} distinct entities" + } + + @orchestration_provider.workflow( + name="kg-entity-deduplication-summary", timeout="360m" + ) + class EntityDeduplicationSummaryWorkflow: + def __init__(self, kg_service: KgService): + self.kg_service = kg_service + + @orchestration_provider.step(retries=0, timeout="360m") + async def kg_entity_deduplication_summary( + self, context: Context + ) -> dict: + + logger.info( + f"Running KG Entity Deduplication Summary for input data: {context.workflow_input()['request']}" + ) + + input_data = get_input_data_dict( + context.workflow_input()["request"] + ) + collection_id = input_data["collection_id"] + + await self.kg_service.kg_entity_deduplication_summary( + collection_id=collection_id, + offset=input_data["offset"], + limit=input_data["limit"], + **input_data["kg_entity_deduplication_settings"], + ) + + return { + "result": f"successfully queued kg entity deduplication summary for collection {collection_id}" + } + @orchestration_provider.workflow(name="enrich-graph", timeout="360m") class EnrichGraphWorkflow: def __init__(self, kg_service: KgService): @@ -333,4 +449,8 @@ async def kg_community_summary(self, context: Context) -> dict: "create-graph": CreateGraphWorkflow(service), "enrich-graph": EnrichGraphWorkflow(service), "kg-community-summary": KGCommunitySummaryWorkflow(service), + "kg-entity-deduplication": EntityDeduplicationWorkflow(service), + "kg-entity-deduplication-summary": EntityDeduplicationSummaryWorkflow( + service + ), } diff --git a/py/core/main/orchestration/simple/ingestion_workflow.py b/py/core/main/orchestration/simple/ingestion_workflow.py index c87eb4dc3..47d04f803 100644 --- a/py/core/main/orchestration/simple/ingestion_workflow.py +++ b/py/core/main/orchestration/simple/ingestion_workflow.py @@ -67,14 +67,16 @@ async def ingest_files(input_data): ) try: - collection_id = await service.providers.database.relational.assign_document_to_collection( + # TODO - Move logic onto management service + collection_id = generate_default_user_collection_id( + str(document_info.user_id) + ) + await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, - collection_id=generate_default_user_collection_id( - str(document_info.user_id) - ), + collection_id=collection_id, ) - service.providers.database.vector.assign_document_to_collection( - document_id=document_info.id, collection_id=collection_id + await service.providers.database.assign_document_to_collection_vector( + document_info.id, collection_id ) except Exception as e: logger.error( @@ -125,7 +127,7 @@ async def update_files(input_data): ) documents_overview = ( - await service.providers.database.relational.get_documents_overview( + await service.providers.database.get_documents_overview( filter_document_ids=document_ids, filter_user_ids=None if user.is_superuser else [user.id], ) @@ -227,13 +229,15 @@ async def ingest_chunks(input_data): ) try: - collection_id = await service.providers.database.relational.assign_document_to_collection( + # TODO - Move logic onto management service + collection_id = generate_default_user_collection_id( + str(document_info.user_id) + ) + await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, - collection_id=generate_default_user_collection_id( - str(document_info.user_id) - ), + collection_id=collection_id, ) - service.providers.database.vector.assign_document_to_collection( + await service.providers.database.assign_document_to_collection_vector( document_id=document_info.id, collection_id=collection_id ) except Exception as e: @@ -262,7 +266,7 @@ async def create_vector_index(input_data): ) ) - service.providers.database.vector.create_index(**parsed_data) + await service.providers.database.create_index(**parsed_data) except Exception as e: raise R2RException( @@ -270,9 +274,30 @@ async def create_vector_index(input_data): message=f"Error during vector index creation: {str(e)}", ) + async def delete_vector_index(input_data): + try: + from core.main import IngestionServiceAdapter + + parsed_data = ( + IngestionServiceAdapter.parse_delete_vector_index_input( + input_data + ) + ) + + await service.providers.database.delete_index(**parsed_data) + + return {"status": "Vector index deleted successfully."} + + except Exception as e: + raise R2RException( + status_code=500, + message=f"Error during vector index deletion: {str(e)}", + ) + return { "ingest-files": ingest_files, "update-files": update_files, "ingest-chunks": ingest_chunks, "create-vector-index": create_vector_index, + "delete-vector-index": delete_vector_index, } diff --git a/py/core/main/orchestration/simple/kg_workflow.py b/py/core/main/orchestration/simple/kg_workflow.py index c7b5d4d34..88805c02a 100644 --- a/py/core/main/orchestration/simple/kg_workflow.py +++ b/py/core/main/orchestration/simple/kg_workflow.py @@ -101,8 +101,27 @@ async def kg_community_summary(input_data): **input_data["kg_enrichment_settings"], ) + async def entity_deduplication_workflow(input_data): + + collection_id = input_data["collection_id"] + + number_of_distinct_entities = ( + await service.kg_entity_deduplication( + collection_id=collection_id, + **input_data["kg_entity_deduplication_settings"], + ) + )[0]["num_entities"] + + await service.kg_entity_deduplication_summary( + collection_id=collection_id, + offset=0, + limit=number_of_distinct_entities, + **input_data["kg_entity_deduplication_settings"], + ) + return { "create-graph": create_graph, "enrich-graph": enrich_graph, "kg-community-summary": kg_community_summary, + "entity-deduplication": entity_deduplication_workflow, } diff --git a/py/core/main/services/auth_service.py b/py/core/main/services/auth_service.py index 7ff0c8c8d..4923f7649 100644 --- a/py/core/main/services/auth_service.py +++ b/py/core/main/services/auth_service.py @@ -33,7 +33,7 @@ def __init__( ) @telemetry_event("RegisterUser") - async def register(self, email: str, password: str) -> dict[str, str]: + async def register(self, email: str, password: str) -> UserResponse: return await self.providers.auth.register(email, password) @telemetry_event("VerifyEmail") @@ -45,22 +45,24 @@ async def verify_email( status_code=400, message="Email verification is not required" ) - user_id = await self.providers.database.relational.get_user_id_by_verification_code( - verification_code + user_id = ( + await self.providers.database.get_user_id_by_verification_code( + verification_code + ) ) if not user_id: raise R2RException( status_code=400, message="Invalid or expired verification code" ) - user = await self.providers.database.relational.get_user_by_id(user_id) + user = await self.providers.database.get_user_by_id(user_id) if not user or user.email != email: raise R2RException( status_code=400, message="Invalid or expired verification code" ) - await self.providers.database.relational.mark_user_as_verified(user_id) - await self.providers.database.relational.remove_verification_code( + await self.providers.database.mark_user_as_verified(user_id) + await self.providers.database.remove_verification_code( verification_code ) return {"message": f"User account {user_id} verified successfully."} @@ -72,7 +74,11 @@ async def login(self, email: str, password: str) -> dict[str, Token]: @telemetry_event("GetCurrentUser") async def user(self, token: str) -> UserResponse: token_data = await self.providers.auth.decode_token(token) - user = await self.providers.database.relational.get_user_by_email( + if not token_data.email: + raise R2RException( + status_code=401, message="Invalid authentication credentials" + ) + user = await self.providers.database.get_user_by_email( token_data.email ) if user is None: @@ -123,10 +129,8 @@ async def update_user( bio: Optional[str] = None, profile_picture: Optional[str] = None, ) -> UserResponse: - user: UserResponse = ( - await self.providers.database.relational.get_user_by_id( - str(user_id) - ) + user: UserResponse = await self.providers.database.get_user_by_id( + user_id ) if not user: raise R2RException(status_code=404, message="User not found") @@ -140,7 +144,7 @@ async def update_user( user.bio = bio if profile_picture is not None: user.profile_picture = profile_picture - return await self.providers.database.relational.update_user(user) + return await self.providers.database.update_user(user) @telemetry_event("DeleteUserAccount") async def delete_user( @@ -150,19 +154,22 @@ async def delete_user( delete_vector_data: bool = False, is_superuser: bool = False, ) -> dict[str, str]: - user = await self.providers.database.relational.get_user_by_id(user_id) + user = await self.providers.database.get_user_by_id(user_id) if not user: raise R2RException(status_code=404, message="User not found") if not ( is_superuser - or self.providers.auth.crypto_provider.verify_password( # type: ignore - password, user.hashed_password + or ( + user.hashed_password is not None + and self.providers.auth.crypto_provider.verify_password( # type: ignore + password, user.hashed_password + ) ) ): raise R2RException(status_code=400, message="Incorrect password") - await self.providers.database.relational.delete_user(user_id) + await self.providers.database.delete_user_relational(user_id) if delete_vector_data: - self.providers.database.vector.delete_user(user_id) + await self.providers.database.delete_user_vector(user_id) return {"message": f"User account {user_id} deleted successfully."} @@ -172,6 +179,6 @@ async def clean_expired_blacklisted_tokens( max_age_hours: int = 7 * 24, current_time: Optional[datetime] = None, ): - await self.providers.database.relational.clean_expired_blacklisted_tokens( + await self.providers.database.clean_expired_blacklisted_tokens( max_age_hours, current_time ) diff --git a/py/core/main/services/ingestion_service.py b/py/core/main/services/ingestion_service.py index bd93b9ddf..478361c85 100644 --- a/py/core/main/services/ingestion_service.py +++ b/py/core/main/services/ingestion_service.py @@ -1,5 +1,7 @@ +import asyncio import json import logging +import uuid from datetime import datetime from typing import Any, AsyncGenerator, Optional, Sequence, Union from uuid import UUID @@ -9,16 +11,23 @@ DocumentExtraction, DocumentInfo, DocumentType, + IngestionConfig, IngestionStatus, R2RException, R2RLoggingProvider, RawChunk, RunManager, + Vector, VectorEntry, + VectorType, decrement_version, ) from core.base.api.models import UserResponse from core.telemetry.telemetry_decorator import telemetry_event +from shared.abstractions.ingestion import ( + ChunkEnrichmentSettings, + ChunkEnrichmentStrategy, +) from shared.abstractions.vector import ( IndexMeasure, IndexMethod, @@ -94,7 +103,7 @@ async def ingest_file_ingress( ) existing_document_info = ( - await self.providers.database.relational.get_documents_overview( + await self.providers.database.get_documents_overview( filter_user_ids=[user.id], filter_document_ids=[document_id], ) @@ -120,7 +129,7 @@ async def ingest_file_ingress( message=f"Document {document_id} was already ingested and is not in a failed state.", ) - await self.providers.database.relational.upsert_documents_overview( + await self.providers.database.upsert_documents_overview( document_info ) @@ -256,7 +265,7 @@ async def finalize_ingestion( is_update: bool = False, ) -> None: if is_update: - self.providers.database.vector.delete( + await self.providers.database.delete( filters={ "$and": [ {"document_id": {"$eq": document_info.id}}, @@ -284,7 +293,7 @@ async def update_document_status( async def _update_document_status_in_db(self, document_info: DocumentInfo): try: - await self.providers.database.relational.upsert_documents_overview( + await self.providers.database.upsert_documents_overview( document_info ) except Exception as e: @@ -325,7 +334,7 @@ async def ingest_chunks_ingress( ) existing_document_info = ( - await self.providers.database.relational.get_documents_overview( + await self.providers.database.get_documents_overview( filter_user_ids=[user.id], filter_document_ids=[document_id], ) @@ -339,12 +348,183 @@ async def ingest_chunks_ingress( message=f"Document {document_id} was already ingested and is not in a failed state.", ) - await self.providers.database.relational.upsert_documents_overview( - document_info - ) + await self.providers.database.upsert_documents_overview(document_info) return document_info + async def _get_enriched_chunk_text( + self, + chunk_idx: int, + chunk: dict, + document_id: UUID, + chunk_enrichment_settings: ChunkEnrichmentSettings, + document_chunks: list[dict], + document_chunks_dict: dict, + ) -> VectorEntry: + # get chunks in context + + context_chunk_ids = [] + for enrichment_strategy in chunk_enrichment_settings.strategies: + if enrichment_strategy == ChunkEnrichmentStrategy.NEIGHBORHOOD: + for prev in range( + 1, chunk_enrichment_settings.backward_chunks + 1 + ): + if chunk_idx - prev >= 0: + context_chunk_ids.append( + document_chunks[chunk_idx - prev]["extraction_id"] + ) + for next in range( + 1, chunk_enrichment_settings.forward_chunks + 1 + ): + if chunk_idx + next < len(document_chunks): + context_chunk_ids.append( + document_chunks[chunk_idx + next]["extraction_id"] + ) + + elif enrichment_strategy == ChunkEnrichmentStrategy.SEMANTIC: + semantic_neighbors = await self.providers.database.get_semantic_neighbors( + document_id=document_id, + chunk_id=chunk["extraction_id"], + limit=chunk_enrichment_settings.semantic_neighbors, + similarity_threshold=chunk_enrichment_settings.semantic_similarity_threshold, + ) + + for neighbor in semantic_neighbors: + context_chunk_ids.append(neighbor["extraction_id"]) + + context_chunk_ids = list(set(context_chunk_ids)) + + context_chunk_texts = [] + for context_chunk_id in context_chunk_ids: + context_chunk_texts.append( + ( + document_chunks_dict[context_chunk_id]["text"], + document_chunks_dict[context_chunk_id]["metadata"][ + "chunk_order" + ], + ) + ) + + # sort by chunk_order + context_chunk_texts.sort(key=lambda x: x[1]) + + # enrich chunk + # get prompt and call LLM on it. Then finally embed and store it. + # don't call a pipe here. + # just call the LLM directly + try: + updated_chunk_text = ( + ( + await self.providers.llm.aget_completion( + messages=await self.providers.prompt._get_message_payload( + task_prompt_name="chunk_enrichment", + task_inputs={ + "context_chunks": ( + "\n".join( + [ + text + for text, _ in context_chunk_texts + ] + ) + ), + "chunk": chunk["text"], + }, + ), + generation_config=chunk_enrichment_settings.generation_config, + ) + ) + .choices[0] + .message.content + ) + + except Exception as e: + updated_chunk_text = chunk["text"] + chunk["metadata"]["chunk_enrichment_status"] = "failed" + else: + if not updated_chunk_text: + updated_chunk_text = chunk["text"] + chunk["metadata"]["chunk_enrichment_status"] = "failed" + else: + chunk["metadata"]["chunk_enrichment_status"] = "success" + + data = await self.providers.embedding.async_get_embedding( + updated_chunk_text or chunk["text"] + ) + + chunk["metadata"]["original_text"] = chunk["text"] + + vector_entry_new = VectorEntry( + extraction_id=uuid.uuid5( + uuid.NAMESPACE_DNS, str(chunk["extraction_id"]) + ), + vector=Vector(data=data, type=VectorType.FIXED, length=len(data)), + document_id=document_id, + user_id=chunk["user_id"], + collection_ids=chunk["collection_ids"], + text=updated_chunk_text or chunk["text"], + metadata=chunk["metadata"], + ) + + return vector_entry_new + + async def chunk_enrichment(self, document_id: UUID) -> int: + # just call the pipe on every chunk of the document + + # TODO: Why is the config not recognized as an ingestionconfig but as a providerconfig? + chunk_enrichment_settings = ( + self.providers.ingestion.config.chunk_enrichment_settings # type: ignore + ) + # get all document_chunks + document_chunks = ( + await self.providers.database.get_document_chunks( + document_id=document_id, + ) + )["results"] + + new_vector_entries = [] + document_chunks_dict = { + chunk["extraction_id"]: chunk for chunk in document_chunks + } + + tasks = [] + total_completed = 0 + for chunk_idx, chunk in enumerate(document_chunks): + tasks.append( + self._get_enriched_chunk_text( + chunk_idx, + chunk, + document_id, + chunk_enrichment_settings, + document_chunks, + document_chunks_dict, + ) + ) + + if len(tasks) == 128: + new_vector_entries.extend(await asyncio.gather(*tasks)) + total_completed += 128 + logger.info( + f"Completed {total_completed} out of {len(document_chunks)} chunks for document {document_id}" + ) + tasks = [] + + new_vector_entries.extend(await asyncio.gather(*tasks)) + logger.info( + f"Completed enrichment of {len(document_chunks)} chunks for document {document_id}" + ) + + # delete old chunks from vector db + await self.providers.database.delete( + filters={ + "document_id": document_id, + }, + ) + + # embed and store the enriched chunk + await self.providers.database.upsert_entries(new_vector_entries) + + return len(new_vector_entries) + class IngestionServiceAdapter: @staticmethod @@ -358,6 +538,21 @@ def _parse_user_data(user_data) -> UserResponse: ) from e return UserResponse.from_dict(user_data) + @staticmethod + def _parse_chunk_enrichment_settings( + chunk_enrichment_settings: dict, + ) -> ChunkEnrichmentSettings: + if isinstance(chunk_enrichment_settings, str): + try: + chunk_enrichment_settings = json.loads( + chunk_enrichment_settings + ) + except json.JSONDecodeError as e: + raise ValueError( + f"Invalid chunk enrichment settings format: {chunk_enrichment_settings}" + ) from e + return ChunkEnrichmentSettings.from_dict(chunk_enrichment_settings) + @staticmethod def parse_ingest_file_input(data: dict) -> dict: return { @@ -398,8 +593,27 @@ def parse_create_vector_index_input(data: dict) -> dict: return { "table_name": VectorTableName(data["table_name"]), "index_method": IndexMethod(data["index_method"]), - "measure": IndexMeasure(data["measure"]), + "index_measure": IndexMeasure(data["index_measure"]), + "index_name": data["index_name"], "index_arguments": data["index_arguments"], - "replace": data["replace"], "concurrently": data["concurrently"], } + + @staticmethod + def parse_list_vector_indices_input(input_data: dict) -> dict: + return {"table_name": input_data["table_name"]} + + @staticmethod + def parse_delete_vector_index_input(input_data: dict) -> dict: + return { + "index_name": input_data["index_name"], + "table_name": input_data.get("table_name"), + "concurrently": input_data.get("concurrently", True), + } + + @staticmethod + def parse_select_vector_index_input(input_data: dict) -> dict: + return { + "index_name": input_data["index_name"], + "table_name": input_data.get("table_name"), + } diff --git a/py/core/main/services/kg_service.py b/py/core/main/services/kg_service.py index dafbe837c..c99964a50 100644 --- a/py/core/main/services/kg_service.py +++ b/py/core/main/services/kg_service.py @@ -1,8 +1,7 @@ import logging import math import time -from time import strftime -from typing import Any, AsyncGenerator, Optional, Union +from typing import AsyncGenerator, Optional from uuid import UUID from core.base import KGExtractionStatus, R2RLoggingProvider, RunManager @@ -10,6 +9,9 @@ GenerationConfig, KGCreationSettings, KGEnrichmentSettings, + KGEntityDeduplicationSettings, + KGEntityDeduplicationType, + R2RException, ) from core.telemetry.telemetry_decorator import telemetry_event @@ -67,7 +69,7 @@ async def kg_triples_extraction( f"KGService: Processing document {document_id} for KG extraction" ) - await self.providers.database.relational.set_workflow_status( + await self.providers.database.set_workflow_status( id=document_id, status_type="kg_extraction_status", status=KGExtractionStatus.PROCESSING, @@ -101,7 +103,7 @@ async def kg_triples_extraction( except Exception as e: logger.error(f"KGService: Error in kg_extraction: {e}") - await self.providers.database.relational.set_workflow_status( + await self.providers.database.set_workflow_status( id=document_id, status_type="kg_extraction_status", status=KGExtractionStatus.FAILED, @@ -127,10 +129,12 @@ async def get_document_ids_for_create_graph( KGExtractionStatus.PROCESSING, ] - document_ids = await self.providers.database.relational.get_document_ids_by_status( - status_type="kg_extraction_status", - status=document_status_filter, - collection_id=collection_id, + document_ids = ( + await self.providers.database.get_document_ids_by_status( + status_type="kg_extraction_status", + status=[str(ele) for ele in document_status_filter], + collection_id=collection_id, + ) ) return document_ids @@ -152,7 +156,7 @@ async def kg_entity_description( entity_count = await self.providers.kg.get_entity_count( document_id=document_id, distinct=True, - entity_table_name="entity_raw", + entity_table_name="chunk_entity", ) logger.info( @@ -193,7 +197,7 @@ async def kg_entity_description( f"KGService: Completed kg_entity_description for batch {i+1}/{num_batches} for document {document_id}" ) - await self.providers.database.relational.set_workflow_status( + await self.providers.database.set_workflow_status( id=document_id, status_type="kg_extraction_status", status=KGExtractionStatus.SUCCESS, @@ -318,7 +322,7 @@ async def get_entities( offset: int = 0, limit: int = 100, entity_ids: Optional[list[str]] = None, - entity_table_name: str = "entity_embedding", + entity_table_name: str = "document_entity", **kwargs, ): return await self.providers.kg.get_entities( @@ -364,3 +368,152 @@ async def get_communities( levels, community_numbers, ) + + @telemetry_event("get_deduplication_estimate") + async def get_deduplication_estimate( + self, + collection_id: UUID, + kg_deduplication_settings: KGEntityDeduplicationSettings, + **kwargs, + ): + return await self.providers.kg.get_deduplication_estimate( + collection_id, kg_deduplication_settings + ) + + @telemetry_event("kg_entity_deduplication") + async def kg_entity_deduplication( + self, + collection_id: UUID, + kg_entity_deduplication_type: KGEntityDeduplicationType, + kg_entity_deduplication_prompt: str, + generation_config: GenerationConfig, + **kwargs, + ): + deduplication_results = await self.pipes.kg_entity_deduplication_pipe.run( + input=self.pipes.kg_entity_deduplication_pipe.Input( + message={ + "collection_id": collection_id, + "kg_entity_deduplication_type": kg_entity_deduplication_type, + "kg_entity_deduplication_prompt": kg_entity_deduplication_prompt, + "generation_config": generation_config, + **kwargs, + } + ), + state=None, + run_manager=self.run_manager, + ) + return await _collect_results(deduplication_results) + + @telemetry_event("kg_entity_deduplication_summary") + async def kg_entity_deduplication_summary( + self, + collection_id: UUID, + offset: int, + limit: int, + kg_entity_deduplication_type: KGEntityDeduplicationType, + kg_entity_deduplication_prompt: str, + generation_config: GenerationConfig, + **kwargs, + ): + + logger.info( + f"Running kg_entity_deduplication_summary for collection {collection_id} with settings {kwargs}" + ) + deduplication_summary_results = await self.pipes.kg_entity_deduplication_summary_pipe.run( + input=self.pipes.kg_entity_deduplication_summary_pipe.Input( + message={ + "collection_id": collection_id, + "offset": offset, + "limit": limit, + "kg_entity_deduplication_type": kg_entity_deduplication_type, + "kg_entity_deduplication_prompt": kg_entity_deduplication_prompt, + "generation_config": generation_config, + } + ), + state=None, + run_manager=self.run_manager, + ) + + return await _collect_results(deduplication_summary_results) + + @telemetry_event("tune_prompt") + async def tune_prompt( + self, + prompt_name: str, + collection_id: UUID, + documents_offset: int = 0, + documents_limit: int = 100, + chunks_offset: int = 0, + chunks_limit: int = 100, + **kwargs, + ): + + document_response = ( + await self.providers.database.documents_in_collection( + collection_id, offset=documents_offset, limit=documents_limit + ) + ) + results = document_response["results"] + + if isinstance(results, int): + raise TypeError("Expected list of documents, got count instead") + + documents = results + + if not documents: + raise R2RException( + message="No documents found in collection", + status_code=404, + ) + + all_chunks = [] + + for document in documents: + chunks_response = ( + await self.providers.database.get_document_chunks( + document.id, + offset=chunks_offset, + limit=chunks_limit, + ) + ) + + if chunks := chunks_response.get("results", []): + all_chunks.extend(chunks) + else: + logger.warning(f"No chunks found for document {document.id}") + + if not all_chunks: + raise R2RException( + message="No chunks found in documents", + status_code=404, + ) + + chunk_texts = [ + chunk["text"] for chunk in all_chunks if chunk.get("text") + ] + + # Pass chunks to the tuning pipe + tune_prompt_results = await self.pipes.kg_prompt_tuning_pipe.run( + input=self.pipes.kg_prompt_tuning_pipe.Input( + message={ + "collection_id": collection_id, + "prompt_name": prompt_name, + "chunks": chunk_texts, # Pass just the text content + **kwargs, + } + ), + state=None, + run_manager=self.run_manager, + ) + + results = [] + async for result in tune_prompt_results: + results.append(result) + + if not results: + raise R2RException( + message="No results generated from prompt tuning", + status_code=500, + ) + + return results[0] diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index ecaebcbd0..c4297cdff 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -1,12 +1,14 @@ import logging from collections import defaultdict -from typing import Any, BinaryIO, Dict, Optional, Tuple +from typing import Any, BinaryIO, Dict, Optional, Tuple, Union from uuid import UUID import toml from core.base import ( AnalysisTypes, + CollectionResponse, + DocumentInfo, LogFilterCriteria, LogProcessor, Message, @@ -15,6 +17,7 @@ R2RLoggingProvider, RunManager, RunType, + UserResponse, ) from core.base.utils import validate_uuid from core.telemetry.telemetry_decorator import telemetry_event @@ -204,8 +207,8 @@ async def users_overview( *args, **kwargs, ): - return await self.providers.database.relational.get_users_overview( - [str(ele) for ele in user_ids] if user_ids else None, + return await self.providers.database.get_users_overview( + user_ids, offset=offset, limit=limit, ) @@ -267,7 +270,7 @@ def validate_filters(filters: dict[str, Any]) -> None: logger.info(f"Deleting entries with filters: {filters}") try: - vector_delete_results = self.providers.database.vector.delete( + vector_delete_results = await self.providers.database.delete( filters ) except Exception as e: @@ -277,7 +280,7 @@ def validate_filters(filters: dict[str, Any]) -> None: document_ids_to_purge: set[UUID] = set() if vector_delete_results: document_ids_to_purge.update( - doc_id + UUID(doc_id) for doc_id in ( result.get("document_id") for result in vector_delete_results.values() @@ -299,8 +302,8 @@ def validate_filters(filters: dict[str, Any]) -> None: try: documents_overview = ( - await self.providers.database.relational.get_documents_overview( - **relational_filters + await self.providers.database.get_documents_overview( + **relational_filters # type: ignore ) )["results"] except Exception as e: @@ -319,8 +322,8 @@ def validate_filters(filters: dict[str, Any]) -> None: for document_id in document_ids_to_purge: try: - await self.providers.database.relational.delete_from_documents_overview( - str(document_id) + await self.providers.database.delete_from_documents_overview( + document_id ) logger.info( f"Deleted document ID {document_id} from documents_overview." @@ -351,12 +354,12 @@ async def documents_overview( *args: Any, **kwargs: Any, ): - return await self.providers.database.relational.get_documents_overview( + return await self.providers.database.get_documents_overview( filter_document_ids=document_ids, filter_user_ids=user_ids, filter_collection_ids=collection_ids, - offset=offset, - limit=limit, + offset=offset or 0, + limit=limit or -1, ) @telemetry_event("DocumentChunks") @@ -369,7 +372,7 @@ async def document_chunks( *args, **kwargs, ): - return self.providers.database.vector.get_document_chunks( + return await self.providers.database.get_document_chunks( document_id, offset=offset, limit=limit, @@ -378,12 +381,12 @@ async def document_chunks( @telemetry_event("AssignDocumentToCollection") async def assign_document_to_collection( - self, document_id: str, collection_id: UUID + self, document_id: UUID, collection_id: UUID ): - await self.providers.database.relational.assign_document_to_collection( + await self.providers.database.assign_document_to_collection_vector( document_id, collection_id ) - self.providers.database.vector.assign_document_to_collection( + await self.providers.database.assign_document_to_collection_relational( document_id, collection_id ) return {"message": "Document assigned to collection successfully"} @@ -392,10 +395,10 @@ async def assign_document_to_collection( async def remove_document_from_collection( self, document_id: UUID, collection_id: UUID ): - await self.providers.database.relational.remove_document_from_collection( + await self.providers.database.remove_document_from_collection_relational( document_id, collection_id ) - self.providers.database.vector.remove_document_from_collection( + await self.providers.database.remove_document_from_collection_vector( document_id, collection_id ) await self.providers.kg.delete_node_via_document_id( @@ -405,9 +408,9 @@ async def remove_document_from_collection( @telemetry_event("DocumentCollections") async def document_collections( - self, document_id: str, offset: int = 0, limit: int = 100 + self, document_id: UUID, offset: int = 0, limit: int = 100 ): - return await self.providers.database.relational.document_collections( + return await self.providers.database.document_collections( document_id, offset=offset, limit=limit ) @@ -501,16 +504,14 @@ def _get_central_nodes( @telemetry_event("CreateCollection") async def create_collection( self, name: str, description: str = "" - ) -> UUID: - return await self.providers.database.relational.create_collection( + ) -> CollectionResponse: + return await self.providers.database.create_collection( name, description ) @telemetry_event("GetCollection") - async def get_collection(self, collection_id: UUID) -> Optional[dict]: - return await self.providers.database.relational.get_collection( - collection_id - ) + async def get_collection(self, collection_id: UUID) -> CollectionResponse: + return await self.providers.database.get_collection(collection_id) @telemetry_event("UpdateCollection") async def update_collection( @@ -518,61 +519,57 @@ async def update_collection( collection_id: UUID, name: Optional[str] = None, description: Optional[str] = None, - ) -> bool: - return await self.providers.database.relational.update_collection( + ) -> CollectionResponse: + return await self.providers.database.update_collection( collection_id, name, description ) @telemetry_event("DeleteCollection") async def delete_collection(self, collection_id: UUID) -> bool: - await self.providers.database.relational.delete_collection( + await self.providers.database.delete_collection_relational( collection_id ) - self.providers.database.vector.delete_collection(collection_id) + await self.providers.database.delete_collection_vector(collection_id) return True @telemetry_event("ListCollections") async def list_collections( self, offset: int = 0, limit: int = 100 - ) -> list[dict]: - return await self.providers.database.relational.list_collections( + ) -> dict[str, list[CollectionResponse] | int]: + return await self.providers.database.list_collections( offset=offset, limit=limit ) @telemetry_event("AddUserToCollection") async def add_user_to_collection( self, user_id: UUID, collection_id: UUID - ) -> bool: - return await self.providers.database.relational.add_user_to_collection( + ) -> None: + return await self.providers.database.add_user_to_collection( user_id, collection_id ) @telemetry_event("RemoveUserFromCollection") async def remove_user_from_collection( self, user_id: UUID, collection_id: UUID - ) -> bool: - return await self.providers.database.relational.remove_user_from_collection( + ) -> None: + return await self.providers.database.remove_user_from_collection( user_id, collection_id ) @telemetry_event("GetUsersInCollection") async def get_users_in_collection( self, collection_id: UUID, offset: int = 0, limit: int = 100 - ) -> list[dict]: - return ( - await self.providers.database.relational.get_users_in_collection( - collection_id, offset=offset, limit=limit - ) + ) -> dict[str, list[UserResponse] | int]: + return await self.providers.database.get_users_in_collection( + collection_id, offset=offset, limit=limit ) @telemetry_event("GetCollectionsForUser") async def get_collections_for_user( self, user_id: UUID, offset: int = 0, limit: int = 100 - ) -> list[dict]: - return ( - await self.providers.database.relational.get_collections_for_user( - user_id, offset, limit - ) + ) -> dict[str, list[CollectionResponse] | int]: + return await self.providers.database.get_collections_for_user( + user_id, offset, limit ) @telemetry_event("CollectionsOverview") @@ -584,26 +581,18 @@ async def collections_overview( *args, **kwargs, ): - return ( - await self.providers.database.relational.get_collections_overview( - ( - [str(ele) for ele in collection_ids] - if collection_ids - else None - ), - offset=offset, - limit=limit, - ) + return await self.providers.database.get_collections_overview( + collection_ids, + offset=offset, + limit=limit, ) @telemetry_event("GetDocumentsInCollection") async def documents_in_collection( self, collection_id: UUID, offset: int = 0, limit: int = 100 - ) -> list[dict]: - return ( - await self.providers.database.relational.documents_in_collection( - collection_id, offset=offset, limit=limit - ) + ) -> dict[str, Union[list[DocumentInfo], int]]: + return await self.providers.database.documents_in_collection( + collection_id, offset=offset, limit=limit ) @telemetry_event("AddPrompt") diff --git a/py/core/main/services/retrieval_service.py b/py/core/main/services/retrieval_service.py index 4912f1628..36e7c1029 100644 --- a/py/core/main/services/retrieval_service.py +++ b/py/core/main/services/retrieval_service.py @@ -1,17 +1,14 @@ import json import logging import time -from datetime import datetime from typing import Optional from uuid import UUID from core import R2RStreamingRAGAgent from core.base import ( - CompletionRecord, GenerationConfig, KGSearchSettings, Message, - MessageType, R2RException, R2RLoggingProvider, RunManager, @@ -22,7 +19,6 @@ ) from core.base.api.models import RAGResponse, SearchResponse, UserResponse from core.telemetry.telemetry_decorator import telemetry_event -from core.utils import generate_message_id from ..abstractions import R2RAgents, R2RPipelines, R2RPipes, R2RProviders from ..config import R2RConfig @@ -279,6 +275,14 @@ async def agent( conversation_id, branch_id ) ) + if not conversation: + logger.error( + f"No conversation found for ID: {conversation_id}" + ) + raise R2RException( + status_code=404, + message=f"Conversation not found: {conversation_id}", + ) messages = [conv[1] for conv in conversation] + [ message ] @@ -302,12 +306,13 @@ async def agent( conversation_id, inner_message, parent_id ) ) - message = messages[-1] + + current_message = messages[-1] # Save the new message to the conversation message_id = await self.logging_connection.add_message( conversation_id, # type: ignore - message, # type: ignore + current_message, # type: ignore parent_id=str(ids[-2]) if (ids and len(ids) > 1) else None, # type: ignore ) diff --git a/py/core/pipes/__init__.py b/py/core/pipes/__init__.py index 510879e05..cef5778d3 100644 --- a/py/core/pipes/__init__.py +++ b/py/core/pipes/__init__.py @@ -5,9 +5,12 @@ from .ingestion.vector_storage_pipe import VectorStoragePipe from .kg.clustering import KGClusteringPipe from .kg.community_summary import KGCommunitySummaryPipe +from .kg.deduplication import KGEntityDeduplicationPipe +from .kg.deduplication_summary import KGEntityDeduplicationSummaryPipe from .kg.entity_description import KGEntityDescriptionPipe from .kg.storage import KGStoragePipe from .kg.triples_extraction import KGTriplesExtractionPipe +from .kg.prompt_tuning import KGPromptTuningPipe from .retrieval.kg_search_pipe import KGSearchSearchPipe from .retrieval.multi_search import MultiSearchPipe from .retrieval.query_transform_pipe import QueryTransformPipe @@ -34,4 +37,7 @@ "MultiSearchPipe", "KGCommunitySummaryPipe", "RoutingSearchPipe", + "KGEntityDeduplicationPipe", + "KGEntityDeduplicationSummaryPipe", + "KGPromptTuningPipe", ] diff --git a/py/core/pipes/abstractions/generator_pipe.py b/py/core/pipes/abstractions/generator_pipe.py index 03485e6c2..ab6c9661d 100644 --- a/py/core/pipes/abstractions/generator_pipe.py +++ b/py/core/pipes/abstractions/generator_pipe.py @@ -5,7 +5,6 @@ from core.base import ( AsyncState, CompletionProvider, - CompletionRecord, PipeType, PromptProvider, R2RLoggingProvider, diff --git a/py/core/pipes/ingestion/vector_storage_pipe.py b/py/core/pipes/ingestion/vector_storage_pipe.py index e57698881..9f2c5925a 100644 --- a/py/core/pipes/ingestion/vector_storage_pipe.py +++ b/py/core/pipes/ingestion/vector_storage_pipe.py @@ -51,7 +51,7 @@ async def store( """ try: - self.database_provider.vector.upsert_entries(vector_entries) + await self.database_provider.upsert_entries(vector_entries) except Exception as e: error_message = ( f"Failed to store vector entries in the database: {e}" diff --git a/py/core/pipes/kg/community_summary.py b/py/core/pipes/kg/community_summary.py index 928373458..84b6ee400 100644 --- a/py/core/pipes/kg/community_summary.py +++ b/py/core/pipes/kg/community_summary.py @@ -145,7 +145,8 @@ async def process_community( community_level, entities, triples = ( await self.kg_provider.get_community_details( - community_number=community_number + community_number=community_number, + collection_id=collection_id, ) ) diff --git a/py/core/pipes/kg/deduplication.py b/py/core/pipes/kg/deduplication.py new file mode 100644 index 000000000..a8dc7c872 --- /dev/null +++ b/py/core/pipes/kg/deduplication.py @@ -0,0 +1,155 @@ +import logging +from typing import Any, AsyncGenerator, Optional +from uuid import UUID + +from core.base import AsyncState +from core.base.logging import R2RLoggingProvider +from core.base.pipes import AsyncPipe, PipeType +from core.base.providers import ( + CompletionProvider, + EmbeddingProvider, + KGProvider, + PromptProvider, +) +from shared.abstractions.graph import Entity +from shared.abstractions.kg import KGEntityDeduplicationType + +logger = logging.getLogger() + + +class KGEntityDeduplicationPipe(AsyncPipe): + def __init__( + self, + config: AsyncPipe.PipeConfig, + kg_provider: KGProvider, + llm_provider: CompletionProvider, + prompt_provider: PromptProvider, + embedding_provider: EmbeddingProvider, + type: PipeType = PipeType.OTHER, + pipe_logger: Optional[R2RLoggingProvider] = None, + **kwargs, + ): + super().__init__( + pipe_logger=pipe_logger, + type=type, + config=config + or AsyncPipe.PipeConfig(name="kg_entity_deduplication_pipe"), + ) + self.kg_provider = kg_provider + self.llm_provider = llm_provider + self.prompt_provider = prompt_provider + self.embedding_provider = embedding_provider + + async def kg_named_entity_deduplication( + self, collection_id: UUID, **kwargs + ): + + entity_count = await self.kg_provider.get_entity_count( + collection_id=collection_id, distinct=True + ) + + logger.info( + f"KGEntityDeduplicationPipe: Getting entities for collection {collection_id}" + ) + logger.info(f"KGEntityDeduplicationPipe: Entity count: {entity_count}") + + entities = ( + await self.kg_provider.get_entities( + collection_id=collection_id, offset=0, limit=-1 + ) + )["entities"] + + logger.info( + f"KGEntityDeduplicationPipe: Got {len(entities)} entities for collection {collection_id}" + ) + + # deduplicate entities by name + deduplicated_entities: dict[str, dict[str, list[str]]] = {} + deduplication_source_keys = [ + "extraction_ids", + "document_id", + "attributes", + ] + deduplication_target_keys = [ + "extraction_ids", + "document_ids", + "attributes", + ] + deduplication_keys = list( + zip(deduplication_source_keys, deduplication_target_keys) + ) + for entity in entities: + if not entity.name in deduplicated_entities: + deduplicated_entities[entity.name] = { + target_key: [] for _, target_key in deduplication_keys + } + for source_key, target_key in deduplication_keys: + value = getattr(entity, source_key) + if isinstance(value, list): + deduplicated_entities[entity.name][target_key].extend( + value + ) + else: + deduplicated_entities[entity.name][target_key].append( + value + ) + + logger.info( + f"KGEntityDeduplicationPipe: Deduplicated {len(deduplicated_entities)} entities" + ) + + # upsert deduplcated entities in the collection_entity table + deduplicated_entities_list = [ + Entity( + name=name, + collection_id=collection_id, + extraction_ids=entity["extraction_ids"], + document_ids=entity["document_ids"], + attributes={}, + ) + for name, entity in deduplicated_entities.items() + ] + + logger.info( + f"KGEntityDeduplicationPipe: Upserting {len(deduplicated_entities_list)} deduplicated entities for collection {collection_id}" + ) + await self.kg_provider.add_entities( + deduplicated_entities_list, + table_name="collection_entity", + conflict_columns=["name", "collection_id", "attributes"], + ) + + yield { + "result": f"successfully deduplicated {len(entities)} entities to {len(deduplicated_entities)} entities for collection {collection_id}", + "num_entities": len(deduplicated_entities), + } + + async def _run_logic( + self, + input: AsyncPipe.Input, + state: AsyncState, + run_id: UUID, + *args: Any, + **kwargs: Any, + ): + # TODO: figure out why the return type AsyncGenerator[dict, None] is not working + + collection_id = input.message["collection_id"] + + kg_entity_deduplication_type = input.message[ + "kg_entity_deduplication_type" + ] + kg_entity_deduplication_prompt = input.message[ + "kg_entity_deduplication_prompt" + ] + generation_config = input.message["generation_config"] + + if kg_entity_deduplication_type == KGEntityDeduplicationType.BY_NAME: + async for result in self.kg_named_entity_deduplication( + collection_id, **kwargs + ): + yield result + else: + raise NotImplementedError( + f"KGEntityDeduplicationPipe: Deduplication type {kg_entity_deduplication_type} not implemented" + ) diff --git a/py/core/pipes/kg/deduplication_summary.py b/py/core/pipes/kg/deduplication_summary.py new file mode 100644 index 000000000..edccf0cf9 --- /dev/null +++ b/py/core/pipes/kg/deduplication_summary.py @@ -0,0 +1,219 @@ +import asyncio +import logging +from typing import Any, AsyncGenerator, Optional +from uuid import UUID + +from core.base import AsyncState +from core.base.logging import R2RLoggingProvider +from core.base.pipes import AsyncPipe, PipeType +from core.base.providers import ( + CompletionProvider, + EmbeddingProvider, + KGProvider, + PromptProvider, +) +from shared.abstractions import Entity, GenerationConfig + +logger = logging.getLogger() + + +class KGEntityDeduplicationSummaryPipe(AsyncPipe[Any]): + + class Input(AsyncPipe.Input): + message: dict + + def __init__( + self, + kg_provider: KGProvider, + prompt_provider: PromptProvider, + llm_provider: CompletionProvider, + embedding_provider: EmbeddingProvider, + config: AsyncPipe.PipeConfig, + pipe_logger: Optional[R2RLoggingProvider] = None, + type: PipeType = PipeType.OTHER, + **kwargs, + ): + super().__init__( + pipe_logger=pipe_logger, type=type, config=config, **kwargs + ) + self.kg_provider = kg_provider + self.prompt_provider = prompt_provider + self.llm_provider = llm_provider + self.embedding_provider = embedding_provider + + async def _merge_entity_descriptions_llm_prompt( + self, + entity_name: str, + entity_descriptions: list[str], + generation_config: GenerationConfig, + ) -> Entity: + + # find the index until the length is less than 1024 + index = 0 + description_length = 0 + while index < len(entity_descriptions): + if ( + len(entity_descriptions[index]) + description_length + > self.kg_provider.config.kg_entity_deduplication_settings.max_description_input_length + ): + break + description_length += len(entity_descriptions[index]) + index += 1 + + completion = await self.llm_provider.aget_completion( + messages=await self.prompt_provider._get_message_payload( + task_prompt_name=self.kg_provider.config.kg_entity_deduplication_settings.kg_entity_deduplication_prompt, + task_inputs={ + "entity_name": entity_name, + "entity_descriptions": "\n".join( + entity_descriptions[:index] + ), + }, + ), + generation_config=GenerationConfig(**generation_config), # type: ignore + ) + + # get the $$description$$ + try: + description = completion.choices[0].message.content or "" + description = description.split("$$")[1] + except: + logger.error( + f"Failed to generate a summary for entity {entity_name}." + ) + + return Entity(name=entity_name, description=description) + + async def _merge_entity_descriptions( + self, + entity_name: str, + entity_descriptions: list[str], + generation_config: GenerationConfig, + ) -> Entity: + + # TODO: Expose this as a hyperparameter + if len(entity_descriptions) <= 5: + return Entity( + name=entity_name, description="\n".join(entity_descriptions) + ) + else: + return await self._merge_entity_descriptions_llm_prompt( + entity_name, entity_descriptions, generation_config + ) + + async def _prepare_and_upsert_entities( + self, entities_batch: list[Entity], collection_id: UUID + ) -> Any: + + embeddings = await self.embedding_provider.async_get_embeddings( + [entity.description or "" for entity in entities_batch] + ) + + for i, entity in enumerate(entities_batch): + entity.description_embedding = str(embeddings[i]) # type: ignore + entity.collection_id = collection_id + + logger.info( + f"Upserting {len(entities_batch)} entities for collection {collection_id}" + ) + + result = await self.kg_provider.update_entity_descriptions( + entities_batch, + ) + + logger.info( + f"Upserted {len(entities_batch)} entities for collection {collection_id}" + ) + + for i, entity in enumerate(entities_batch): + yield entity + + async def _run_logic( + self, + input: AsyncPipe.Input, + state: AsyncState, + run_id: UUID, + *args: Any, + **kwargs: Any, + ): + # TODO: figure out why the return type AsyncGenerator[dict, None] is not working + + collection_id = input.message["collection_id"] + offset = input.message["offset"] + limit = input.message["limit"] + kg_entity_deduplication_type = input.message[ + "kg_entity_deduplication_type" + ] + kg_entity_deduplication_prompt = input.message[ + "kg_entity_deduplication_prompt" + ] + generation_config = input.message["generation_config"] + + logger.info( + f"Running kg_entity_deduplication_summary for collection {collection_id} with settings kg_entity_deduplication_type: {kg_entity_deduplication_type}, kg_entity_deduplication_prompt: {kg_entity_deduplication_prompt}, generation_config: {generation_config}" + ) + + entities = ( + await self.kg_provider.get_entities( + collection_id, + offset, + limit, + entity_table_name="collection_entity", + ) + )["entities"] + + entity_names = [entity.name for entity in entities] + + entity_descriptions = ( + await self.kg_provider.get_entities( + collection_id, + entity_names=entity_names, + entity_table_name="document_entity", + ) + )["entities"] + + entity_descriptions_dict: dict[str, list[str]] = {} + for entity_description in entity_descriptions: + if not entity_description.name in entity_descriptions_dict: + entity_descriptions_dict[entity_description.name] = [] + entity_descriptions_dict[entity_description.name].append( + entity_description.description + ) + + logger.info( + f"Retrieved {len(entity_descriptions)} entity descriptions for collection {collection_id}" + ) + + tasks = [] + entities_batch = [] + for entity in entities: + tasks.append( + self._merge_entity_descriptions( + entity.name, + entity_descriptions_dict[entity.name], + generation_config, + ) + ) + + if len(tasks) == 32: + entities_batch = await asyncio.gather(*tasks) + # prepare and upsert entities + + async for result in self._prepare_and_upsert_entities( + entities_batch, collection_id + ): + yield result + + tasks = [] + + if tasks: + + entities_batch = await asyncio.gather(*tasks) + for entity in entities_batch: + yield entity + + # prepare and upsert entities + async for result in self._prepare_and_upsert_entities( + entities_batch, collection_id + ): + yield result diff --git a/py/core/pipes/kg/entity_description.py b/py/core/pipes/kg/entity_description.py index db5c5ccc5..7c5bee14d 100644 --- a/py/core/pipes/kg/entity_description.py +++ b/py/core/pipes/kg/entity_description.py @@ -144,7 +144,7 @@ async def process_entity( document_id, ) ], - "entity_embedding", + "document_entity", ) return out_entity.name diff --git a/py/core/pipes/kg/prompt_tuning.py b/py/core/pipes/kg/prompt_tuning.py new file mode 100644 index 000000000..7734da78a --- /dev/null +++ b/py/core/pipes/kg/prompt_tuning.py @@ -0,0 +1,95 @@ +""" +Pipe to tune the prompt for the KG model. +""" + +import logging +from typing import Any, Optional +from uuid import UUID + +from core.base import ( + AsyncState, + CompletionProvider, + KGProvider, + PipeType, + PromptProvider, + R2RException, + R2RLoggingProvider, +) +from core.base.pipes.base_pipe import AsyncPipe + +logger = logging.getLogger() + + +class KGPromptTuningPipe(AsyncPipe): + """ + A pipe to tune a prompt for a specific domain. + """ + + def __init__( + self, + kg_provider: KGProvider, + llm_provider: CompletionProvider, + prompt_provider: PromptProvider, + config: AsyncPipe.PipeConfig, + pipe_logger: Optional[R2RLoggingProvider] = None, + type: PipeType = PipeType.OTHER, + *args, + **kwargs, + ): + super().__init__( + pipe_logger=pipe_logger, + type=type, + config=config, + ) + self.kg_provider = kg_provider + self.llm_provider = llm_provider + self.prompt_provider = prompt_provider + + async def _run_logic( + self, + input: AsyncPipe.Input, + state: AsyncState, + run_id: UUID, + *args: Any, + **kwargs: Any, + ): + try: + prompt_name = input.message["prompt_name"] + current_prompt = self.prompt_provider.get_all_prompts().get( + prompt_name + ) + + if not current_prompt: + raise R2RException( + message=f"Prompt {prompt_name} not found.", status_code=404 + ) + + chunks = input.message["chunks"] + + # Add logging + logger.info(f"Starting prompt tuning for {prompt_name}") + + tuned_prompt = await self.llm_provider.aget_completion( + messages=await self.prompt_provider._get_message_payload( + task_prompt_name="prompt_tuning_task", + task_inputs={ + "prompt_template": current_prompt.template, + "input_types": str(current_prompt.input_types), + "sample_data": chunks, + }, + ), + generation_config=self.kg_provider.config.kg_creation_settings.generation_config, + ) + + if not tuned_prompt: + raise R2RException( + message="Failed to generate tuned prompt", status_code=500 + ) + + yield {"tuned_prompt": tuned_prompt.choices[0].message.content} + + except Exception as e: + logger.error(f"Error in prompt tuning: {str(e)}") + raise R2RException( + message=f"Error tuning prompt: {str(e)}", status_code=500 + ) diff --git a/py/core/pipes/kg/triples_extraction.py b/py/core/pipes/kg/triples_extraction.py index a24cb5787..6de094d14 100644 --- a/py/core/pipes/kg/triples_extraction.py +++ b/py/core/pipes/kg/triples_extraction.py @@ -249,6 +249,12 @@ async def _run_logic( # type: ignore f"KGTriplesExtractionPipe: Processing document {document_id} for KG extraction", ) + # First get the chunks response + chunks_response = await self.database_provider.get_document_chunks( + document_id=document_id + ) + + # Then create the extractions from the results extractions = [ DocumentExtraction( id=extraction["extraction_id"], @@ -258,11 +264,11 @@ async def _run_logic( # type: ignore data=extraction["text"], metadata=extraction["metadata"], ) - for extraction in self.database_provider.vector.get_document_chunks( - document_id=document_id - )[ - "results" - ] + for extraction in ( + await self.database_provider.get_document_chunks( + document_id=document_id + ) + )["results"] ] logger.info( diff --git a/py/core/pipes/retrieval/kg_search_pipe.py b/py/core/pipes/retrieval/kg_search_pipe.py index 5e19131af..4de857704 100644 --- a/py/core/pipes/retrieval/kg_search_pipe.py +++ b/py/core/pipes/retrieval/kg_search_pipe.py @@ -133,6 +133,7 @@ async def local_search( "extraction_ids", ], filters=kg_search_settings.filters, + entities_level=kg_search_settings.entities_level, ): yield KGSearchResult( content=KGEntityResult( diff --git a/py/core/pipes/retrieval/search_rag_pipe.py b/py/core/pipes/retrieval/search_rag_pipe.py index 2ebcaca36..d14a92b98 100644 --- a/py/core/pipes/retrieval/search_rag_pipe.py +++ b/py/core/pipes/retrieval/search_rag_pipe.py @@ -6,7 +6,6 @@ AsyncPipe, AsyncState, CompletionProvider, - CompletionRecord, PipeType, PromptProvider, ) diff --git a/py/core/pipes/retrieval/streaming_rag_pipe.py b/py/core/pipes/retrieval/streaming_rag_pipe.py index 1ce7b341f..cd6cacb01 100644 --- a/py/core/pipes/retrieval/streaming_rag_pipe.py +++ b/py/core/pipes/retrieval/streaming_rag_pipe.py @@ -6,7 +6,6 @@ from core.base import ( AsyncState, CompletionProvider, - CompletionRecord, LLMChatCompletionChunk, PipeType, PromptProvider, diff --git a/py/core/pipes/retrieval/vector_search_pipe.py b/py/core/pipes/retrieval/vector_search_pipe.py index f12ba2ae3..dbeccebdd 100644 --- a/py/core/pipes/retrieval/vector_search_pipe.py +++ b/py/core/pipes/retrieval/vector_search_pipe.py @@ -63,14 +63,14 @@ async def search( # type: ignore purpose=EmbeddingPurpose.QUERY, ) - search_results = ( - self.database_provider.vector.hybrid_search( + search_results = await ( + self.database_provider.hybrid_search( query_vector=query_vector, query_text=message, search_settings=search_settings, ) if search_settings.use_hybrid_search - else self.database_provider.vector.semantic_search( + else self.database_provider.semantic_search( query_vector=query_vector, search_settings=search_settings, ) diff --git a/py/core/providers/auth/r2r_auth.py b/py/core/providers/auth/r2r_auth.py index 21afba66e..d337d9098 100644 --- a/py/core/providers/auth/r2r_auth.py +++ b/py/core/providers/auth/r2r_auth.py @@ -56,7 +56,7 @@ async def initialize(self): user = await self.register( email=self.admin_email, password=self.admin_password ) - await self.db_provider.relational.mark_user_as_superuser(user.id) + await self.db_provider.mark_user_as_superuser(user.id) except R2RException: logger.info("Default admin user already exists.") @@ -85,7 +85,7 @@ def create_refresh_token(self, data: dict) -> str: async def decode_token(self, token: str) -> TokenData: try: # First, check if the token is blacklisted - if await self.db_provider.relational.is_token_blacklisted(token): + if await self.db_provider.is_token_blacklisted(token): raise R2RException( status_code=401, message="Token has been invalidated" ) @@ -114,9 +114,11 @@ async def decode_token(self, token: str) -> TokenData: async def user(self, token: str = Depends(oauth2_scheme)) -> UserResponse: token_data = await self.decode_token(token) - user = await self.db_provider.relational.get_user_by_email( - token_data.email - ) + if not token_data.email: + raise R2RException( + status_code=401, message="Could not validate credentials" + ) + user = await self.db_provider.get_user_by_email(token_data.email) if user is None: raise R2RException( status_code=401, message="Invalid authentication credentials" @@ -130,18 +132,14 @@ def get_current_active_user( raise R2RException(status_code=400, message="Inactive user") return current_user - async def register(self, email: str, password: str) -> Dict[str, str]: + async def register(self, email: str, password: str) -> UserResponse: # Create new user and give them a default collection - new_user = await self.db_provider.relational.create_user( - email, password - ) - default_collection = ( - await self.db_provider.relational.create_default_collection( - new_user.id, - ) + new_user = await self.db_provider.create_user(email, password) + default_collection = await self.db_provider.create_default_collection( + new_user.id, ) - await self.db_provider.relational.add_user_to_collection( + await self.db_provider.add_user_to_collection( new_user.id, default_collection.collection_id ) @@ -152,46 +150,42 @@ async def register(self, email: str, password: str) -> Dict[str, str]: ) expiry = datetime.now(timezone.utc) + timedelta(hours=24) - await self.db_provider.relational.store_verification_code( + await self.db_provider.store_verification_code( new_user.id, verification_code, expiry ) new_user.verification_code_expiry = expiry # TODO - Integrate email provider(s) # self.providers.email.send_verification_email(new_user.email, verification_code) else: + expiry = datetime.now(timezone.utc) + timedelta(hours=366 * 10) + # Mark user as verified - await self.db_provider.relational.store_verification_code( - new_user.id, None, None - ) - await self.db_provider.relational.mark_user_as_verified( - new_user.id + await self.db_provider.store_verification_code( + new_user.id, str(-1), expiry ) + await self.db_provider.mark_user_as_verified(new_user.id) return new_user async def verify_email( self, email: str, verification_code: str ) -> dict[str, str]: - user_id = ( - await self.db_provider.relational.get_user_id_by_verification_code( - verification_code - ) + user_id = await self.db_provider.get_user_id_by_verification_code( + verification_code ) if not user_id: raise R2RException( status_code=400, message="Invalid or expired verification code" ) - await self.db_provider.relational.mark_user_as_verified(user_id) - await self.db_provider.relational.remove_verification_code( - verification_code - ) + await self.db_provider.mark_user_as_verified(user_id) + await self.db_provider.remove_verification_code(verification_code) return {"message": "Email verified successfully"} async def login(self, email: str, password: str) -> Dict[str, Token]: logger = logging.getLogger() logger.debug(f"Attempting login for email: {email}") - user = await self.db_provider.relational.get_user_by_email(email) + user = await self.db_provider.get_user_by_email(email) if not user: logger.warning(f"No user found for email: {email}") raise R2RException( @@ -245,7 +239,7 @@ async def refresh_access_token( ) # Invalidate the old refresh token and create a new one - await self.db_provider.relational.blacklist_token(refresh_token) + await self.db_provider.blacklist_token(refresh_token) new_access_token = self.create_access_token( data={"sub": token_data.email} @@ -281,13 +275,13 @@ async def change_password( hashed_new_password = self.crypto_provider.get_password_hash( new_password ) - await self.db_provider.relational.update_user_password( + await self.db_provider.update_user_password( user.id, hashed_new_password ) return {"message": "Password changed successfully"} async def request_password_reset(self, email: str) -> Dict[str, str]: - user = await self.db_provider.relational.get_user_by_email(email) + user = await self.db_provider.get_user_by_email(email) if not user: # To prevent email enumeration, always return a success message return { @@ -296,9 +290,7 @@ async def request_password_reset(self, email: str) -> Dict[str, str]: reset_token = self.crypto_provider.generate_verification_code() expiry = datetime.now(timezone.utc) + timedelta(hours=1) - await self.db_provider.relational.store_reset_token( - user.id, reset_token, expiry - ) + await self.db_provider.store_reset_token(user.id, reset_token, expiry) # TODO: Integrate with email provider to send reset link # self.email_provider.send_reset_email(email, reset_token) @@ -308,7 +300,7 @@ async def request_password_reset(self, email: str) -> Dict[str, str]: async def confirm_password_reset( self, reset_token: str, new_password: str ) -> Dict[str, str]: - user_id = await self.db_provider.relational.get_user_id_by_reset_token( + user_id = await self.db_provider.get_user_id_by_reset_token( reset_token ) if not user_id: @@ -319,16 +311,16 @@ async def confirm_password_reset( hashed_new_password = self.crypto_provider.get_password_hash( new_password ) - await self.db_provider.relational.update_user_password( + await self.db_provider.update_user_password( user_id, hashed_new_password ) - await self.db_provider.relational.remove_reset_token(user_id) + await self.db_provider.remove_reset_token(user_id) return {"message": "Password reset successfully"} async def logout(self, token: str) -> Dict[str, str]: # Add the token to a blacklist - await self.db_provider.relational.blacklist_token(token) + await self.db_provider.blacklist_token(token) return {"message": "Logged out successfully"} async def clean_expired_blacklisted_tokens(self): - await self.db_provider.relational.clean_expired_blacklisted_tokens() + await self.db_provider.clean_expired_blacklisted_tokens() diff --git a/py/core/providers/auth/supabase.py b/py/core/providers/auth/supabase.py index a42b7c11f..6ee4215b3 100644 --- a/py/core/providers/auth/supabase.py +++ b/py/core/providers/auth/supabase.py @@ -65,7 +65,7 @@ async def decode_token(self, token: str) -> TokenData: "decode_token is not used with Supabase authentication" ) - async def register(self, email: str, password: str) -> dict[str, str]: + async def register(self, email: str, password: str) -> dict[str, str]: # type: ignore # Use Supabase client to create a new user user = self.supabase.auth.sign_up(email=email, password=password) diff --git a/py/core/providers/database/base.py b/py/core/providers/database/base.py index 32ae6a334..71e530047 100644 --- a/py/core/providers/database/base.py +++ b/py/core/providers/database/base.py @@ -1,22 +1,51 @@ +import asyncio +import logging +from contextlib import asynccontextmanager from typing import Any, Optional, Sequence, Union -from sqlalchemy import TextClause, text +import asyncpg -from .vecs import Client +from core.base import DatabaseConnectionManager +logger = logging.getLogger() -# TODO: This should be defined at the mixin, not here -def execute_query( - vx: Client, - query: Union[str, TextClause], - params: Optional[dict[str, Any]] = None, -): - with vx.Session() as sess: - if isinstance(query, str): - query = text(query) - result = sess.execute(query, params or {}) - sess.commit() - return result + +class SemaphoreConnectionPool: + def __init__(self, connection_string, postgres_configuration_settings): + self.connection_string = connection_string + self.postgres_configuration_settings = postgres_configuration_settings + + async def initialize(self): + try: + logger.info( + f"Connecting with {int(self.postgres_configuration_settings.max_connections * 0.9)} connections to `asyncpg.create_pool`." + ) + + self.semaphore = asyncio.Semaphore( + int(self.postgres_configuration_settings.max_connections * 0.9) + ) + + self.pool = await asyncpg.create_pool( + self.connection_string, + max_size=self.postgres_configuration_settings.max_connections, + ) + + logger.info( + "Successfully connected to Postgres database and created connection pool." + ) + except Exception as e: + raise ValueError( + f"Error {e} occurred while attempting to connect to relational database." + ) from e + + @asynccontextmanager + async def get_connection(self): + async with self.semaphore: + async with self.pool.acquire() as conn: + yield conn + + async def close(self): + await self.pool.close() class QueryBuilder: @@ -75,35 +104,59 @@ def build(self): return query, self.params -class DatabaseMixin: - def get_config(self): - if hasattr(self, "config"): - return self.config - raise AttributeError("Config not set in the provider class") - - def _get_table_name(self, base_name: str) -> str: - raise NotImplementedError("Subclasses must implement this method") - - def execute_query( - self, - query: Union[str, TextClause], - params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, - ): - raise NotImplementedError("Subclasses must implement this method") - - def fetch_query( - self, - query: Union[str, TextClause], - params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, - ): - raise NotImplementedError("Subclasses must implement this method") - - def fetchrow_query( - self, - query: Union[str, TextClause], - params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, - ): - raise NotImplementedError("Subclasses must implement this method") - - def create_table(self): - raise NotImplementedError("Subclasses must implement this method") +class PostgresConnectionManager(DatabaseConnectionManager): + + def __init__(self): + self.pool: Optional[SemaphoreConnectionPool] = None + + async def initialize(self, pool: SemaphoreConnectionPool): + self.pool = pool + + async def execute_query(self, query, params=None, isolation_level=None): + if not self.pool: + raise ValueError("PostgresConnectionManager is not initialized.") + async with self.pool.get_connection() as conn: + if isolation_level: + async with conn.transaction(isolation=isolation_level): + if params: + return await conn.execute(query, *params) + else: + return await conn.execute(query) + else: + if params: + return await conn.execute(query, *params) + else: + return await conn.execute(query) + + async def execute_many(self, query, params=None, batch_size=1000): + if not self.pool: + raise ValueError("PostgresConnectionManager is not initialized.") + async with self.pool.get_connection() as conn: + async with conn.transaction(): + if params: + for i in range(0, len(params), batch_size): + param_batch = params[i : i + batch_size] + await conn.executemany(query, param_batch) + else: + await conn.executemany(query) + + async def fetch_query(self, query, params=None): + if not self.pool: + raise ValueError("PostgresConnectionManager is not initialized.") + async with self.pool.get_connection() as conn: + async with conn.transaction(): + return ( + await conn.fetch(query, *params) + if params + else await conn.fetch(query) + ) + + async def fetchrow_query(self, query, params=None): + if not self.pool: + raise ValueError("PostgresConnectionManager is not initialized.") + async with self.pool.get_connection() as conn: + async with conn.transaction(): + if params: + return await conn.fetchrow(query, *params) + else: + return await conn.fetchrow(query) diff --git a/py/core/providers/database/collection.py b/py/core/providers/database/collection.py index 6605a7ed2..0a7fbd4d6 100644 --- a/py/core/providers/database/collection.py +++ b/py/core/providers/database/collection.py @@ -4,7 +4,12 @@ from typing import Optional, Union from uuid import UUID, uuid4 -from core.base import R2RException, generate_default_user_collection_id +from core.base import ( + CollectionHandler, + DatabaseConfig, + R2RException, + generate_default_user_collection_id, +) from core.base.abstractions import DocumentInfo, DocumentType, IngestionStatus from core.base.api.models import CollectionOverviewResponse, CollectionResponse from core.utils import ( @@ -12,15 +17,26 @@ generate_default_user_collection_id, ) -from .base import DatabaseMixin +from .base import PostgresConnectionManager logger = logging.getLogger() -class CollectionMixin(DatabaseMixin): +class PostgresCollectionHandler(CollectionHandler): + TABLE_NAME = "collections" + + def __init__( + self, + project_name: str, + connection_manager: PostgresConnectionManager, + config: DatabaseConfig, + ): + self.config = config + super().__init__(project_name, connection_manager) + async def create_table(self) -> None: query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name('collections')} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} ( collection_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), name TEXT NOT NULL, description TEXT, @@ -29,13 +45,12 @@ async def create_table(self) -> None: updated_at TIMESTAMPTZ DEFAULT NOW() ); """ - await self.execute_query(query) + await self.connection_manager.execute_query(query) async def create_default_collection( self, user_id: Optional[UUID] = None ) -> CollectionResponse: """Create a default collection if it doesn't exist.""" - config = self.get_config() if user_id: default_collection_uuid = generate_default_user_collection_id( @@ -43,14 +58,14 @@ async def create_default_collection( ) else: default_collection_uuid = generate_collection_id_from_name( - config.default_collection_name + self.config.default_collection_name ) if not await self.collection_exists(default_collection_uuid): logger.info("Initializing a new default collection...") return await self.create_collection( - name=config.default_collection_name, - description=config.default_collection_description, + name=self.config.default_collection_name, + description=self.config.default_collection_description, collection_id=default_collection_uuid, ) @@ -59,10 +74,12 @@ async def create_default_collection( async def collection_exists(self, collection_id: UUID) -> bool: """Check if a collection exists.""" query = f""" - SELECT 1 FROM {self._get_table_name('collections')} + SELECT 1 FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} WHERE collection_id = $1 """ - result = await self.fetchrow_query(query, [collection_id]) + result = await self.connection_manager.fetchrow_query( + query, [collection_id] + ) return result is not None async def create_collection( @@ -73,7 +90,7 @@ async def create_collection( ) -> CollectionResponse: current_time = datetime.utcnow() query = f""" - INSERT INTO {self._get_table_name('collections')} (collection_id, name, description, created_at, updated_at) + INSERT INTO {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} (collection_id, name, description, created_at, updated_at) VALUES ($1, $2, $3, $4, $5) RETURNING collection_id, name, description, created_at, updated_at """ @@ -85,27 +102,17 @@ async def create_collection( current_time, ] - try: - async with self.pool.acquire() as conn: # type: ignore - row = await conn.fetchrow(query, *params) - - if not row: - raise R2RException( - status_code=500, message="Failed to create collection" - ) + result = await self.connection_manager.fetchrow_query(query, params) + if not result: + raise R2RException(status_code=404, message="Collection not found") - return CollectionResponse( - collection_id=row["collection_id"], - name=row["name"], - description=row["description"], - created_at=row["created_at"], - updated_at=row["updated_at"], - ) - except Exception as e: - raise R2RException( - status_code=500, - message=f"An error occurred while creating the collection: {str(e)}", - ) + return CollectionResponse( + collection_id=result["collection_id"], + name=result["name"], + description=result["description"], + created_at=result["created_at"], + updated_at=result["updated_at"], + ) async def get_collection(self, collection_id: UUID) -> CollectionResponse: """Get a collection by its ID.""" @@ -114,10 +121,12 @@ async def get_collection(self, collection_id: UUID) -> CollectionResponse: query = f""" SELECT collection_id, name, description, created_at, updated_at - FROM {self._get_table_name('collections')} + FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} WHERE collection_id = $1 """ - result = await self.fetchrow_query(query, [collection_id]) + result = await self.connection_manager.fetchrow_query( + query, [collection_id] + ) if not result: raise R2RException(status_code=404, message="Collection not found") @@ -157,13 +166,13 @@ async def update_collection( params.append(collection_id) query = f""" - UPDATE {self._get_table_name('collections')} + UPDATE {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} SET {', '.join(update_fields)} WHERE collection_id = ${len(params)} RETURNING collection_id, name, description, created_at, updated_at """ - result = await self.fetchrow_query(query, params) + result = await self.connection_manager.fetchrow_query(query, params) if not result: raise R2RException(status_code=404, message="Collection not found") @@ -175,54 +184,46 @@ async def update_collection( updated_at=result["updated_at"], ) - async def delete_collection(self, collection_id: UUID) -> None: - async with self.pool.acquire() as conn: # type: ignore - async with conn.transaction(): - try: - # Remove collection_id from users - user_update_query = f""" - UPDATE {self._get_table_name('users')} - SET collection_ids = array_remove(collection_ids, $1) - WHERE $1 = ANY(collection_ids) - """ - await conn.execute(user_update_query, collection_id) - - # Remove collection_id from documents - document_update_query = f""" - WITH updated AS ( - UPDATE {self._get_table_name('document_info')} - SET collection_ids = array_remove(collection_ids, $1) - WHERE $1 = ANY(collection_ids) - RETURNING 1 - ) - SELECT COUNT(*) AS affected_rows FROM updated - """ - result = await conn.fetchrow( - document_update_query, collection_id - ) - affected_rows = result["affected_rows"] - - # Delete the collection - delete_query = f""" - DELETE FROM {self._get_table_name('collections')} - WHERE collection_id = $1 - RETURNING collection_id - """ - deleted = await conn.fetchrow(delete_query, collection_id) - - if not deleted: - raise R2RException( - status_code=404, message="Collection not found" - ) - - except Exception as e: - logger.error( - f"Error deleting collection {collection_id}: {str(e)}" - ) - raise R2RException( - status_code=500, - message=f"An error occurred while deleting the collection: {str(e)}", - ) + async def delete_collection_relational(self, collection_id: UUID) -> None: + # async with self.connection_manager.pool.get_connection() as conn: # type: ignore + # async with conn.transaction(): + # try: + # Remove collection_id from users + user_update_query = f""" + UPDATE {self._get_table_name('users')} + SET collection_ids = array_remove(collection_ids, $1) + WHERE $1 = ANY(collection_ids) + """ + await self.connection_manager.execute_query( + user_update_query, [collection_id] + ) + + # Remove collection_id from documents + document_update_query = f""" + WITH updated AS ( + UPDATE {self._get_table_name('document_info')} + SET collection_ids = array_remove(collection_ids, $1) + WHERE $1 = ANY(collection_ids) + RETURNING 1 + ) + SELECT COUNT(*) AS affected_rows FROM updated + """ + await self.connection_manager.fetchrow_query( + document_update_query, [collection_id] + ) + + # Delete the collection + delete_query = f""" + DELETE FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} + WHERE collection_id = $1 + RETURNING collection_id + """ + deleted = await self.connection_manager.fetchrow_query( + delete_query, [collection_id] + ) + + if not deleted: + raise R2RException(status_code=404, message="Collection not found") async def list_collections( self, offset: int = 0, limit: int = -1 @@ -230,7 +231,7 @@ async def list_collections( """List collections with pagination.""" query = f""" SELECT collection_id, name, description, created_at, updated_at, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name('collections')} + FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} ORDER BY name OFFSET $1 """ @@ -240,7 +241,7 @@ async def list_collections( query += " LIMIT $2" conditions.append(limit) - results = await self.fetch_query(query, conditions) + results = await self.connection_manager.fetch_query(query, conditions) if not results: logger.info("No collections found.") return {"results": [], "total_entries": 0} @@ -267,7 +268,9 @@ async def get_collections_by_ids( FROM {self._get_table_name("collections")} WHERE collection_id = ANY($1) """ - results = await self.fetch_query(query, [collection_ids]) + results = await self.connection_manager.fetch_query( + query, [collection_ids] + ) if len(results) != len(collection_ids): raise R2RException( status_code=404, @@ -313,7 +316,7 @@ async def documents_in_collection( query += " LIMIT $3" conditions.append(limit) - results = await self.fetch_query(query, conditions) + results = await self.connection_manager.fetch_query(query, conditions) documents = [ DocumentInfo( id=row["document_id"], @@ -346,7 +349,7 @@ async def get_collections_overview( SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at, COUNT(DISTINCT u.user_id) AS user_count, COUNT(DISTINCT d.document_id) AS document_count - FROM {self._get_table_name('collections')} g + FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} g LEFT JOIN {self._get_table_name('users')} u ON g.collection_id = ANY(u.collection_ids) LEFT JOIN {self._get_table_name('document_info')} d ON g.collection_id = ANY(d.collection_ids) {' WHERE g.collection_id = ANY($1)' if collection_ids else ''} @@ -369,7 +372,7 @@ async def get_collections_overview( if limit != -1: params.append(limit) - results = await self.fetch_query(query, params) + results = await self.connection_manager.fetch_query(query, params) if not results: logger.info("No collections found.") @@ -397,7 +400,7 @@ async def get_collections_for_user( ) -> dict[str, Union[list[CollectionResponse], int]]: query = f""" SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name('collections')} g + FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} g JOIN {self._get_table_name('users')} u ON g.collection_id = ANY(u.collection_ids) WHERE u.user_id = $1 ORDER BY g.name @@ -409,7 +412,7 @@ async def get_collections_for_user( query += " LIMIT $3" params.append(limit) - results = await self.fetch_query(query, params) + results = await self.connection_manager.fetch_query(query, params) collections = [ CollectionResponse( @@ -425,7 +428,7 @@ async def get_collections_for_user( return {"results": collections, "total_entries": total_entries} - async def assign_document_to_collection( + async def assign_document_to_collection_relational( self, document_id: UUID, collection_id: UUID, @@ -452,7 +455,7 @@ async def assign_document_to_collection( SELECT 1 FROM {self._get_table_name('document_info')} WHERE document_id = $1 """ - document_exists = await self.fetchrow_query( + document_exists = await self.connection_manager.fetchrow_query( document_check_query, [document_id] ) @@ -468,7 +471,7 @@ async def assign_document_to_collection( WHERE document_id = $2 AND NOT ($1 = ANY(collection_ids)) RETURNING document_id """ - result = await self.fetchrow_query( + result = await self.connection_manager.fetchrow_query( assign_query, [collection_id, document_id] ) @@ -495,7 +498,7 @@ async def document_collections( ) -> dict[str, Union[list[CollectionResponse], int]]: query = f""" SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name('collections')} g + FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} g JOIN {self._get_table_name('document_info')} d ON g.collection_id = ANY(d.collection_ids) WHERE d.document_id = $1 ORDER BY g.name @@ -507,7 +510,7 @@ async def document_collections( query += " LIMIT $3" conditions.append(limit) - results = await self.fetch_query(query, conditions) + results = await self.connection_manager.fetch_query(query, conditions) collections = [ CollectionResponse( @@ -524,7 +527,7 @@ async def document_collections( return {"results": collections, "total_entries": total_entries} - async def remove_document_from_collection( + async def remove_document_from_collection_relational( self, document_id: UUID, collection_id: UUID ) -> None: """ @@ -546,7 +549,9 @@ async def remove_document_from_collection( WHERE document_id = $2 AND $1 = ANY(collection_ids) RETURNING document_id """ - result = await self.fetchrow_query(query, [collection_id, document_id]) + result = await self.connection_manager.fetchrow_query( + query, [collection_id, document_id] + ) if not result: raise R2RException( diff --git a/py/core/providers/database/document.py b/py/core/providers/database/document.py index 96eaa384c..7bd51d552 100644 --- a/py/core/providers/database/document.py +++ b/py/core/providers/database/document.py @@ -5,19 +5,9 @@ from uuid import UUID import asyncpg -from sqlalchemy import ( - ARRAY, - JSON, - Column, - DateTime, - Integer, - MetaData, - String, - Table, -) -from sqlalchemy.dialects.postgresql import UUID as SqlUUID from core.base import ( + DocumentHandler, DocumentInfo, DocumentType, IngestionStatus, @@ -26,36 +16,25 @@ R2RException, ) -from .base import DatabaseMixin +from .base import PostgresConnectionManager logger = logging.getLogger() -class DocumentMixin(DatabaseMixin): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.metadata = MetaData() - self.document_info_table = Table( - self._get_table_name("document_info"), - self.metadata, - Column("document_id", SqlUUID, primary_key=True), - Column("collection_ids", ARRAY(SqlUUID)), - Column("user_id", SqlUUID), - Column("type", String), - Column("metadata", JSON), - Column("title", String), - Column("version", String), - Column("size_in_bytes", Integer), - Column("ingestion_status", String), - Column("kg_extraction_status", String), - Column("created_at", DateTime), - Column("updated_at", DateTime), - Column("ingestion_attempt_number", Integer, default=0), - ) +class PostgresDocumentHandler(DocumentHandler): + TABLE_NAME = "document_info" + + def __init__( + self, project_name: str, connection_manager: PostgresConnectionManager + ): + super().__init__(project_name, connection_manager) async def create_table(self): + logger.info( + f"Creating table, if not exists: {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)}" + ) query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name('document_info')} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} ( document_id UUID PRIMARY KEY, collection_ids UUID[], user_id UUID, @@ -71,9 +50,9 @@ async def create_table(self): ingestion_attempt_number INT DEFAULT 0 ); CREATE INDEX IF NOT EXISTS idx_collection_ids_{self.project_name} - ON {self._get_table_name('document_info')} USING GIN (collection_ids); + ON {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} USING GIN (collection_ids); """ - await self.execute_query(query) + await self.connection_manager.execute_query(query) async def upsert_documents_overview( self, documents_overview: Union[DocumentInfo, list[DocumentInfo]] @@ -87,11 +66,11 @@ async def upsert_documents_overview( retries = 0 while retries < max_retries: try: - async with self.pool.acquire() as conn: # type: ignore + async with self.connection_manager.pool.get_connection() as conn: # type: ignore async with conn.transaction(): # Lock the row for update check_query = f""" - SELECT ingestion_attempt_number, ingestion_status FROM {self._get_table_name('document_info')} + SELECT ingestion_attempt_number, ingestion_status FROM {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} WHERE document_id = $1 FOR UPDATE """ existing_doc = await conn.fetchrow( @@ -124,7 +103,7 @@ async def upsert_documents_overview( ) update_query = f""" - UPDATE {self._get_table_name('document_info')} + UPDATE {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} SET collection_ids = $1, user_id = $2, type = $3, metadata = $4, title = $5, version = $6, size_in_bytes = $7, ingestion_status = $8, kg_extraction_status = $9, updated_at = $10, ingestion_attempt_number = $11 @@ -147,7 +126,7 @@ async def upsert_documents_overview( ) else: insert_query = f""" - INSERT INTO {self._get_table_name('document_info')} + INSERT INTO {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} (document_id, collection_ids, user_id, type, metadata, title, version, size_in_bytes, ingestion_status, kg_extraction_status, created_at, updated_at, ingestion_attempt_number) @@ -186,20 +165,20 @@ async def upsert_documents_overview( await asyncio.sleep(wait_time) async def delete_from_documents_overview( - self, document_id: str, version: Optional[str] = None + self, document_id: UUID, version: Optional[str] = None ) -> None: query = f""" - DELETE FROM {self._get_table_name('document_info')} + DELETE FROM {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} WHERE document_id = $1 """ - params = [document_id] + params = [str(document_id)] if version: query += " AND version = $2" - params = [document_id, version] + params = [str(document_id), version] - await self.execute_query(query, params) + await self.connection_manager.execute_query(query, params) async def _get_status_from_table( self, ids: list[UUID], table_name: str, status_type: str @@ -219,7 +198,7 @@ async def _get_status_from_table( SELECT {status_type} FROM {self._get_table_name(table_name)} WHERE document_id = ANY($1) """ - return await self.fetch_query(query, [ids]) + return await self.connection_manager.fetch_query(query, [ids]) async def _get_ids_from_table( self, @@ -240,7 +219,9 @@ async def _get_ids_from_table( SELECT document_id FROM {self._get_table_name(table_name)} WHERE {status_type} = ANY($1) and $2 = ANY(collection_ids) """ - records = await self.fetch_query(query, [status, collection_id]) + records = await self.connection_manager.fetch_query( + query, [status, collection_id] + ) document_ids = [record["document_id"] for record in records] return document_ids @@ -261,7 +242,7 @@ async def _set_status_in_table( SET {status_type} = $1 WHERE document_id = Any($2) """ - await self.execute_query(query, [status, ids]) + await self.connection_manager.execute_query(query, [status, ids]) def _get_status_model_and_table_name(self, status_type: str): """ @@ -386,7 +367,7 @@ async def get_documents_overview( param_index += 1 base_query = f""" - FROM {self._get_table_name('document_info')} + FROM {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} """ if conditions: @@ -409,7 +390,7 @@ async def get_documents_overview( param_index += 1 try: - results = await self.fetch_query(query, params) + results = await self.connection_manager.fetch_query(query, params) total_entries = results[0]["total_entries"] if results else 0 documents = [ diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index 350fe0d2b..70b02fade 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -1,5 +1,4 @@ # TODO: Clean this up and make it more congruent across the vector database and the relational database. - import logging import os import warnings @@ -8,15 +7,20 @@ from core.base import ( CryptoProvider, DatabaseConfig, + DatabaseConnectionManager, DatabaseProvider, PostgresConfigurationSettings, - RelationalDBProvider, - VectorDBProvider, + VectorQuantizationType, ) +from core.providers.database.base import PostgresConnectionManager +from core.providers.database.collection import PostgresCollectionHandler +from core.providers.database.document import PostgresDocumentHandler +from core.providers.database.tokens import PostgresTokenHandler +from core.providers.database.user import PostgresUserHandler +from core.providers.database.vector import PostgresVectorHandler from shared.abstractions.vector import VectorQuantizationType -from .relational import PostgresRelationalDBProvider -from .vector import PostgresVectorDBProvider +from .base import SemaphoreConnectionPool logger = logging.getLogger() @@ -38,7 +42,7 @@ class PostgresDBProvider(DatabaseProvider): db_name: str project_name: str connection_string: str - vector_db_dimension: int + dimension: int conn: Optional[Any] crypto_provider: CryptoProvider postgres_configuration_settings: PostgresConfigurationSettings @@ -98,8 +102,8 @@ def __init__( self.connection_string = f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.db_name}" logger.info("Connecting to Postgres via TCP/IP") - self.vector_db_dimension = dimension - self.vector_db_quantization_type = quantization_type + self.dimension = dimension + self.quantization_type = quantization_type self.conn = None self.config: DatabaseConfig = config self.crypto_provider = crypto_provider @@ -110,33 +114,54 @@ def __init__( self.default_collection_description = ( config.default_collection_description ) + self.enable_fts = config.enable_fts - def _get_table_name(self, base_name: str) -> str: - return f"{self.project_name}.{base_name}" + self.connection_manager: DatabaseConnectionManager = ( + PostgresConnectionManager() + ) + self.document_handler = PostgresDocumentHandler( + self.project_name, self.connection_manager + ) + self.token_handler = PostgresTokenHandler( + self.project_name, self.connection_manager + ) + self.collection_handler = PostgresCollectionHandler( + self.project_name, self.connection_manager, self.config + ) + self.user_handler = PostgresUserHandler( + self.project_name, self.connection_manager, self.crypto_provider + ) + self.vector_handler = PostgresVectorHandler( + self.project_name, + self.connection_manager, + self.dimension, + self.enable_fts, + ) async def initialize(self): - self.vector = self._initialize_vector_db() - self.relational = await self._initialize_relational_db() - - def _initialize_vector_db(self) -> VectorDBProvider: - return PostgresVectorDBProvider( - self.config, - connection_string=self.connection_string, - project_name=self.project_name, - dimension=self.vector_db_dimension, - quantization_type=self.vector_db_quantization_type, + logger.info("Initializing `PostgresDBProvider`.") + self.pool = SemaphoreConnectionPool( + self.connection_string, self.postgres_configuration_settings ) + await self.pool.initialize() + await self.connection_manager.initialize(self.pool) + + async with self.pool.get_connection() as conn: + await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') + await conn.execute("CREATE EXTENSION IF NOT EXISTS vector;") + await conn.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;") + await conn.execute("CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;") + + # Create schema if it doesn't exist + await conn.execute( + f'CREATE SCHEMA IF NOT EXISTS "{self.project_name}";' + ) - async def _initialize_relational_db(self) -> RelationalDBProvider: - relational_db = PostgresRelationalDBProvider( - self.config, - connection_string=self.connection_string, - crypto_provider=self.crypto_provider, - project_name=self.project_name, - postgres_configuration_settings=self.postgres_configuration_settings, - ) - await relational_db.initialize() - return relational_db + await self.document_handler.create_table() + await self.collection_handler.create_table() + await self.token_handler.create_table() + await self.user_handler.create_table() + await self.vector_handler.create_table() def _get_postgres_configuration_settings( self, config: DatabaseConfig @@ -178,3 +203,14 @@ def _get_postgres_configuration_settings( setattr(settings, setting, value) return settings + + async def close(self): + if self.pool: + await self.pool.close() + + async def __aenter__(self): + await self.initialize() + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.close() diff --git a/py/core/providers/database/relational.py b/py/core/providers/database/relational.py deleted file mode 100644 index dc5f5cfb0..000000000 --- a/py/core/providers/database/relational.py +++ /dev/null @@ -1,120 +0,0 @@ -import asyncio -import logging -from contextlib import asynccontextmanager - -import asyncpg - -from core.base import RelationalDBProvider -from core.providers.database.base import DatabaseMixin -from core.providers.database.collection import CollectionMixin -from core.providers.database.document import DocumentMixin -from core.providers.database.tokens import BlacklistedTokensMixin -from core.providers.database.user import UserMixin - -logger = logging.getLogger() - - -class PostgresRelationalDBProvider( - RelationalDBProvider, - DocumentMixin, - CollectionMixin, - BlacklistedTokensMixin, - UserMixin, -): - def __init__( - self, - config, - connection_string, - crypto_provider, - project_name, - postgres_configuration_settings, - ): - super().__init__(config) - self.config = config - self.connection_string = connection_string - self.crypto_provider = crypto_provider - self.project_name = project_name - self.pool = None - self.postgres_configuration_settings = postgres_configuration_settings - self.semaphore = asyncio.Semaphore( - int(self.postgres_configuration_settings.max_connections * 0.9) - ) - - async def initialize(self): - try: - self.pool = await asyncpg.create_pool( - self.connection_string, - max_size=self.postgres_configuration_settings.max_connections, - ) - - logger.info( - "Successfully connected to Postgres database and created connection pool." - ) - except Exception as e: - raise ValueError( - f"Error {e} occurred while attempting to connect to relational database." - ) from e - - await self._initialize_relational_db() - - def _get_table_name(self, base_name: str) -> str: - return f"{self.project_name}.{base_name}" - - @asynccontextmanager - async def get_connection(self): - async with self.semaphore: - async with self.pool.acquire() as conn: - yield conn - - async def execute_query(self, query, params=None): - async with self.get_connection() as conn: - async with conn.transaction(): - if params: - return await conn.execute(query, *params) - else: - return await conn.execute(query) - - async def execute_many(self, query, params=None, batch_size=1000): - async with self.get_connection() as conn: - async with conn.transaction(): - if params: - for i in range(0, len(params), batch_size): - param_batch = params[i : i + batch_size] - await conn.executemany(query, param_batch) - else: - await conn.executemany(query) - - async def fetch_query(self, query, params=None): - async with self.get_connection() as conn: - async with conn.transaction(): - return ( - await conn.fetch(query, *params) - if params - else await conn.fetch(query) - ) - - async def fetchrow_query(self, query, params=None): - async with self.get_connection() as conn: - async with conn.transaction(): - if params: - return await conn.fetchrow(query, *params) - else: - return await conn.fetchrow(query) - - # async def copy_records_to_table(self, table_name, records): - # async with self.get_connection() as conn: - # async with conn.transaction(): - # await conn.copy_records_to_table(table_name, records) - - async def _initialize_relational_db(self): - async with self.get_connection() as conn: - await conn.execute(f'CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') - - # Call create_table for each mixin - for base_class in self.__class__.__bases__: - if issubclass(base_class, DatabaseMixin): - await base_class.create_table(self) - - async def close(self): - if self.pool: - await self.pool.close() diff --git a/py/core/providers/database/tokens.py b/py/core/providers/database/tokens.py index 009c6aaf9..1f6831dbf 100644 --- a/py/core/providers/database/tokens.py +++ b/py/core/providers/database/tokens.py @@ -1,23 +1,32 @@ from datetime import datetime, timedelta from typing import Optional -from .base import DatabaseMixin +from core.base import TokenHandler +from .base import PostgresConnectionManager + + +class PostgresTokenHandler(TokenHandler): + TABLE_NAME = "blacklisted_tokens" + + def __init__( + self, project_name: str, connection_manager: PostgresConnectionManager + ): + super().__init__(project_name, connection_manager) -class BlacklistedTokensMixin(DatabaseMixin): async def create_table(self): query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name('blacklisted_tokens')} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresTokenHandler.TABLE_NAME)} ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), token TEXT NOT NULL, blacklisted_at TIMESTAMPTZ DEFAULT NOW() ); - CREATE INDEX IF NOT EXISTS idx_blacklisted_tokens_{self.project_name}_token - ON {self._get_table_name('blacklisted_tokens')} (token); - CREATE INDEX IF NOT EXISTS idx_blacklisted_tokens_{self.project_name}_blacklisted_at - ON {self._get_table_name('blacklisted_tokens')} (blacklisted_at); + CREATE INDEX IF NOT EXISTS idx_{self.project_name}_{PostgresTokenHandler.TABLE_NAME}_token + ON {self._get_table_name(PostgresTokenHandler.TABLE_NAME)} (token); + CREATE INDEX IF NOT EXISTS idx_{self.project_name}_{PostgresTokenHandler.TABLE_NAME}_blacklisted_at + ON {self._get_table_name(PostgresTokenHandler.TABLE_NAME)} (blacklisted_at); """ - await self.execute_query(query) + await self.connection_manager.execute_query(query) async def blacklist_token( self, token: str, current_time: Optional[datetime] = None @@ -26,18 +35,20 @@ async def blacklist_token( current_time = datetime.utcnow() query = f""" - INSERT INTO {self._get_table_name("blacklisted_tokens")} (token, blacklisted_at) + INSERT INTO {self._get_table_name(PostgresTokenHandler.TABLE_NAME)} (token, blacklisted_at) VALUES ($1, $2) """ - await self.execute_query(query, [token, current_time]) + await self.connection_manager.execute_query( + query, [token, current_time] + ) async def is_token_blacklisted(self, token: str) -> bool: query = f""" - SELECT 1 FROM {self._get_table_name("blacklisted_tokens")} + SELECT 1 FROM {self._get_table_name(PostgresTokenHandler.TABLE_NAME)} WHERE token = $1 LIMIT 1 """ - result = await self.fetchrow_query(query, [token]) + result = await self.connection_manager.fetchrow_query(query, [token]) return bool(result) async def clean_expired_blacklisted_tokens( @@ -50,7 +61,7 @@ async def clean_expired_blacklisted_tokens( expiry_time = current_time - timedelta(hours=max_age_hours) query = f""" - DELETE FROM {self._get_table_name("blacklisted_tokens")} + DELETE FROM {self._get_table_name(PostgresTokenHandler.TABLE_NAME)} WHERE blacklisted_at < $1 """ - await self.execute_query(query, [expiry_time]) + await self.connection_manager.execute_query(query, [expiry_time]) diff --git a/py/core/providers/database/user.py b/py/core/providers/database/user.py index e315e17ec..bf0af6867 100644 --- a/py/core/providers/database/user.py +++ b/py/core/providers/database/user.py @@ -2,18 +2,30 @@ from typing import Optional, Union from uuid import UUID +from core.base import CryptoProvider, UserHandler from core.base.abstractions import R2RException, UserStats from core.base.api.models import UserResponse from core.utils import generate_user_id -from .base import DatabaseMixin, QueryBuilder +from .base import PostgresConnectionManager, QueryBuilder +from .collection import PostgresCollectionHandler -class UserMixin(DatabaseMixin): +class PostgresUserHandler(UserHandler): + TABLE_NAME = "users" + + def __init__( + self, + project_name: str, + connection_manager: PostgresConnectionManager, + crypto_provider: CryptoProvider, + ): + super().__init__(project_name, connection_manager) + self.crypto_provider = crypto_provider async def create_table(self): query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name('users')} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresUserHandler.TABLE_NAME)} ( user_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), email TEXT UNIQUE NOT NULL, hashed_password TEXT NOT NULL, @@ -32,9 +44,9 @@ async def create_table(self): updated_at TIMESTAMPTZ DEFAULT NOW() ); """ - await self.execute_query(query) + await self.connection_manager.execute_query(query) - async def get_user_by_id(self, user_id: UUID) -> Optional[UserResponse]: + async def get_user_by_id(self, user_id: UUID) -> UserResponse: query, _ = ( QueryBuilder(self._get_table_name("users")) .select( @@ -56,10 +68,10 @@ async def get_user_by_id(self, user_id: UUID) -> Optional[UserResponse]: .where("user_id = $1") .build() ) - result = await self.fetchrow_query(query, [user_id]) + result = await self.connection_manager.fetchrow_query(query, [user_id]) if not result: - return None + raise R2RException(status_code=404, message="User not found") return UserResponse( id=result["user_id"], @@ -98,7 +110,7 @@ async def get_user_by_email(self, email: str) -> UserResponse: .where("email = $1") .build() ) - result = await self.fetchrow_query(query, [email]) + result = await self.connection_manager.fetchrow_query(query, [email]) if not result: raise R2RException(status_code=404, message="User not found") @@ -130,12 +142,12 @@ async def create_user(self, email: str, password: str) -> UserResponse: hashed_password = self.crypto_provider.get_password_hash(password) # type: ignore query = f""" - INSERT INTO {self._get_table_name('users')} + INSERT INTO {self._get_table_name(PostgresUserHandler.TABLE_NAME)} (email, user_id, hashed_password, collection_ids) VALUES ($1, $2, $3, $4) RETURNING user_id, email, is_superuser, is_active, is_verified, created_at, updated_at, collection_ids """ - result = await self.fetchrow_query( + result = await self.connection_manager.fetchrow_query( query, [email, generate_user_id(email), hashed_password, []] ) @@ -158,13 +170,13 @@ async def create_user(self, email: str, password: str) -> UserResponse: async def update_user(self, user: UserResponse) -> UserResponse: query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET email = $1, is_superuser = $2, is_active = $3, is_verified = $4, updated_at = NOW(), name = $5, profile_picture = $6, bio = $7, collection_ids = $8 WHERE user_id = $9 RETURNING user_id, email, is_superuser, is_active, is_verified, created_at, updated_at, name, profile_picture, bio, collection_ids """ - result = await self.fetchrow_query( + result = await self.connection_manager.fetchrow_query( query, [ user.email, @@ -198,13 +210,13 @@ async def update_user(self, user: UserResponse) -> UserResponse: collection_ids=result["collection_ids"], ) - async def delete_user(self, user_id: UUID) -> None: + async def delete_user_relational(self, user_id: UUID) -> None: # Get the collections the user belongs to collection_query = f""" - SELECT collection_ids FROM {self._get_table_name('users')} + SELECT collection_ids FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} WHERE user_id = $1 """ - collection_result = await self.fetchrow_query( + collection_result = await self.connection_manager.fetchrow_query( collection_query, [user_id] ) @@ -217,15 +229,19 @@ async def delete_user(self, user_id: UUID) -> None: SET user_id = NULL WHERE user_id = $1 """ - await self.execute_query(doc_update_query, [user_id]) + await self.connection_manager.execute_query( + doc_update_query, [user_id] + ) # Delete the user delete_query = f""" - DELETE FROM {self._get_table_name('users')} + DELETE FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} WHERE user_id = $1 RETURNING user_id """ - result = await self.fetchrow_query(delete_query, [user_id]) + result = await self.connection_manager.fetchrow_query( + delete_query, [user_id] + ) if not result: raise R2RException(status_code=404, message="User not found") @@ -234,18 +250,20 @@ async def update_user_password( self, user_id: UUID, new_hashed_password: str ): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET hashed_password = $1, updated_at = NOW() WHERE user_id = $2 """ - await self.execute_query(query, [new_hashed_password, user_id]) + await self.connection_manager.execute_query( + query, [new_hashed_password, user_id] + ) async def get_all_users(self) -> list[UserResponse]: query = f""" SELECT user_id, email, is_superuser, is_active, is_verified, created_at, updated_at, collection_ids - FROM {self._get_table_name('users')} + FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} """ - results = await self.fetch_query(query) + results = await self.connection_manager.fetch_query(query) return [ UserResponse( @@ -266,20 +284,24 @@ async def store_verification_code( self, user_id: UUID, verification_code: str, expiry: datetime ): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET verification_code = $1, verification_code_expiry = $2 WHERE user_id = $3 """ - await self.execute_query(query, [verification_code, expiry, user_id]) + await self.connection_manager.execute_query( + query, [verification_code, expiry, user_id] + ) async def verify_user(self, verification_code: str) -> None: query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET is_verified = TRUE, verification_code = NULL, verification_code_expiry = NULL WHERE verification_code = $1 AND verification_code_expiry > NOW() RETURNING user_id """ - result = await self.fetchrow_query(query, [verification_code]) + result = await self.connection_manager.fetchrow_query( + query, [verification_code] + ) if not result: raise R2RException( @@ -288,55 +310,59 @@ async def verify_user(self, verification_code: str) -> None: async def remove_verification_code(self, verification_code: str): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET verification_code = NULL, verification_code_expiry = NULL WHERE verification_code = $1 """ - await self.execute_query(query, [verification_code]) + await self.connection_manager.execute_query(query, [verification_code]) async def expire_verification_code(self, user_id: UUID): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET verification_code_expiry = NOW() - INTERVAL '1 day' WHERE user_id = $1 """ - await self.execute_query(query, [user_id]) + await self.connection_manager.execute_query(query, [user_id]) async def store_reset_token( self, user_id: UUID, reset_token: str, expiry: datetime ): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET reset_token = $1, reset_token_expiry = $2 WHERE user_id = $3 """ - await self.execute_query(query, [reset_token, expiry, user_id]) + await self.connection_manager.execute_query( + query, [reset_token, expiry, user_id] + ) async def get_user_id_by_reset_token( self, reset_token: str ) -> Optional[UUID]: query = f""" - SELECT user_id FROM {self._get_table_name('users')} + SELECT user_id FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} WHERE reset_token = $1 AND reset_token_expiry > NOW() """ - result = await self.fetchrow_query(query, [reset_token]) + result = await self.connection_manager.fetchrow_query( + query, [reset_token] + ) return result["user_id"] if result else None async def remove_reset_token(self, user_id: UUID): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET reset_token = NULL, reset_token_expiry = NULL WHERE user_id = $1 """ - await self.execute_query(query, [user_id]) + await self.connection_manager.execute_query(query, [user_id]) async def remove_user_from_all_collections(self, user_id: UUID): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET collection_ids = ARRAY[]::UUID[] WHERE user_id = $1 """ - await self.execute_query(query, [user_id]) + await self.connection_manager.execute_query(query, [user_id]) async def add_user_to_collection( self, user_id: UUID, collection_id: UUID @@ -345,12 +371,12 @@ async def add_user_to_collection( raise R2RException(status_code=404, message="User not found") query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET collection_ids = array_append(collection_ids, $1) WHERE user_id = $2 AND NOT ($1 = ANY(collection_ids)) RETURNING user_id """ - result = await self.fetchrow_query( + result = await self.connection_manager.fetchrow_query( query, [collection_id, user_id] ) # fetchrow instead of execute_query if not result: @@ -366,12 +392,14 @@ async def remove_user_from_collection( raise R2RException(status_code=404, message="User not found") query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET collection_ids = array_remove(collection_ids, $1) WHERE user_id = $2 AND $1 = ANY(collection_ids) RETURNING user_id """ - result = await self.fetchrow_query(query, [collection_id, user_id]) + result = await self.connection_manager.fetchrow_query( + query, [collection_id, user_id] + ) if not result: raise R2RException( status_code=400, @@ -396,14 +424,14 @@ async def get_users_in_collection( Raises: R2RException: If the collection doesn't exist. """ - if not await self.collection_exists(collection_id): # type: ignore + if not await self._collection_exists(collection_id): # type: ignore raise R2RException(status_code=404, message="Collection not found") query = f""" SELECT u.user_id, u.email, u.is_active, u.is_superuser, u.created_at, u.updated_at, u.is_verified, u.collection_ids, u.name, u.bio, u.profile_picture, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name('users')} u + FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} u WHERE $1 = ANY(u.collection_ids) ORDER BY u.name OFFSET $2 @@ -414,7 +442,7 @@ async def get_users_in_collection( query += " LIMIT $3" conditions.append(limit) - results = await self.fetch_query(query, conditions) + results = await self.connection_manager.fetch_query(query, conditions) users = [ UserResponse( @@ -441,20 +469,22 @@ async def get_users_in_collection( async def mark_user_as_superuser(self, user_id: UUID): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET is_superuser = TRUE, is_verified = TRUE, verification_code = NULL, verification_code_expiry = NULL WHERE user_id = $1 """ - await self.execute_query(query, [user_id]) + await self.connection_manager.execute_query(query, [user_id]) async def get_user_id_by_verification_code( self, verification_code: str ) -> Optional[UUID]: query = f""" - SELECT user_id FROM {self._get_table_name('users')} + SELECT user_id FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} WHERE verification_code = $1 AND verification_code_expiry > NOW() """ - result = await self.fetchrow_query(query, [verification_code]) + result = await self.connection_manager.fetchrow_query( + query, [verification_code] + ) if not result: raise R2RException( @@ -465,11 +495,11 @@ async def get_user_id_by_verification_code( async def mark_user_as_verified(self, user_id: UUID): query = f""" - UPDATE {self._get_table_name('users')} + UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET is_verified = TRUE, verification_code = NULL, verification_code_expiry = NULL WHERE user_id = $1 """ - await self.execute_query(query, [user_id]) + await self.connection_manager.execute_query(query, [user_id]) async def get_users_overview( self, @@ -492,7 +522,7 @@ async def get_users_overview( COALESCE(SUM(d.size_in_bytes), 0) AS total_size_in_bytes, ARRAY_AGG(d.document_id) FILTER (WHERE d.document_id IS NOT NULL) AS document_ids, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name('users')} u + FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} u LEFT JOIN {self._get_table_name('document_info')} d ON u.user_id = d.user_id {' WHERE u.user_id = ANY($3::uuid[])' if user_ids else ''} GROUP BY u.user_id, u.email, u.is_superuser, u.is_active, u.is_verified, u.created_at, u.updated_at, u.collection_ids @@ -512,7 +542,7 @@ async def get_users_overview( if user_ids: params.append(user_ids) - results = await self.fetch_query(query, params) + results = await self.connection_manager.fetch_query(query, params) users = [ UserStats( @@ -534,3 +564,14 @@ async def get_users_overview( total_entries = results[0]["total_entries"] return {"results": users, "total_entries": total_entries} + + async def _collection_exists(self, collection_id: UUID) -> bool: + """Check if a collection exists.""" + query = f""" + SELECT 1 FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} + WHERE collection_id = $1 + """ + result = await self.connection_manager.fetchrow_query( + query, [collection_id] + ) + return result is not None diff --git a/py/core/providers/database/vecs/__init__.py b/py/core/providers/database/vecs/__init__.py index bbcdaf3bb..ba49390f5 100644 --- a/py/core/providers/database/vecs/__init__.py +++ b/py/core/providers/database/vecs/__init__.py @@ -1,18 +1,5 @@ from . import exc -from .client import Client -from .collection import Collection - -__project__ = "vecs" -__version__ = "0.4.2" - __all__ = [ - "Collection", - "Client", "exc", ] - - -def create_client(connection_string: str, *args, **kwargs) -> Client: - """Creates a client from a Postgres connection string""" - return Client(connection_string, *args, **kwargs) diff --git a/py/core/providers/database/vecs/client.py b/py/core/providers/database/vecs/client.py deleted file mode 100644 index 62e54ae5f..000000000 --- a/py/core/providers/database/vecs/client.py +++ /dev/null @@ -1,323 +0,0 @@ -""" -Defines the 'Client' class - -Importing from the `vecs.client` directly is not supported. -All public classes, enums, and functions are re-exported by the top level `vecs` module. -""" - -from __future__ import annotations - -import logging -import time -from typing import TYPE_CHECKING, List, Optional - -import sqlalchemy -from deprecated import deprecated -from sqlalchemy import MetaData, create_engine, text -from sqlalchemy.orm import sessionmaker -from sqlalchemy.pool import QueuePool - -from shared.abstractions.vector import VectorQuantizationType - -from .adapter import Adapter -from .exc import CollectionNotFound - -if TYPE_CHECKING: - from core.providers.database.vecs.collection import Collection - -logger = logging.getLogger() - - -class Client: - """ - The `vecs.Client` class serves as an interface to a PostgreSQL database with pgvector support. It facilitates - the creation, retrieval, listing and deletion of vector collections, while managing connections to the - database. - - A `Client` instance represents a connection to a PostgreSQL database. This connection can be used to create - and manipulate vector collections, where each collection is a group of vector records in a PostgreSQL table. - - The `vecs.Client` class can be also supports usage as a context manager to ensure the connection to the database - is properly closed after operations, or used directly. - - Example usage: - - DB_CONNECTION = "postgresql://:@:/" - - with vecs.create_client(DB_CONNECTION) as vx: - # do some work - pass - - # OR - - vx = vecs.create_client(DB_CONNECTION) - # do some work - vx.disconnect() - """ - - def __init__( - self, - connection_string: str, - pool_size: int = 1, - max_retries: int = 3, - retry_delay: int = 1, - project_name: str = "vecs", - ): - self.engine = create_engine( - connection_string, - pool_size=pool_size, - poolclass=QueuePool, - pool_recycle=300, # Recycle connections after 5 min - ) - self.meta = MetaData(schema=project_name) - self.Session = sessionmaker(self.engine) - self.max_retries = max_retries - self.retry_delay = retry_delay - self.project_name = project_name - self.vector_version: Optional[str] = None - self._initialize_database() - - def _initialize_database(self): - retries = 0 - error = None - while retries < self.max_retries: - try: - with self.Session() as sess: - with sess.begin(): - self._create_schema(sess) - self._create_extension(sess) - self._get_vector_version(sess) - return - except Exception as e: - logger.warning( - f"Database connection error: {str(e)}. Retrying in {self.retry_delay} seconds..." - ) - retries += 1 - time.sleep(self.retry_delay) - error = e - - error_message = f"Failed to initialize database after {self.max_retries} retries with error: {str(error)}" - logger.error(error_message) - raise RuntimeError(error_message) - - def _create_schema(self, sess): - try: - sess.execute( - text(f'CREATE SCHEMA IF NOT EXISTS "{self.project_name}";') - ) - except Exception as e: - logger.warning(f"Failed to create schema: {str(e)}") - - def _create_extension(self, sess): - try: - sess.execute(text(f"CREATE EXTENSION IF NOT EXISTS vector;")) - sess.execute(text(f"CREATE EXTENSION IF NOT EXISTS pg_trgm;")) - sess.execute( - text(f"CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;") - ) - except Exception as e: - logger.warning(f"Failed to create extension: {str(e)}") - - def _get_vector_version(self, sess): - try: - self.vector_version = sess.execute( - text( - "SELECT installed_version FROM pg_available_extensions WHERE name = 'vector' LIMIT 1;" - ) - ).scalar_one() - except sqlalchemy.exc.InternalError as e: - logger.error(f"Failed with internal alchemy error: {str(e)}") - - import psycopg2 - - if isinstance(e.orig, psycopg2.errors.InFailedSqlTransaction): - sess.rollback() - self.vector_version = sess.execute( - text( - "SELECT installed_version FROM pg_available_extensions WHERE name = 'vector' LIMIT 1;" - ) - ).scalar_one() - else: - raise e - except Exception as e: - logger.error(f"Failed to retrieve vector version: {str(e)}") - raise e - - def _supports_hnsw(self): - return ( - not self.vector_version.startswith("0.4") - and not self.vector_version.startswith("0.3") - and not self.vector_version.startswith("0.2") - and not self.vector_version.startswith("0.1") - and not self.vector_version.startswith("0.0") - ) - - def get_or_create_vector_table( - self, - name: str, - *, - dimension: Optional[int] = None, - adapter: Optional[Adapter] = None, - quantization_type: Optional[VectorQuantizationType] = None, - ) -> Collection: - """ - Get a vector collection by name, or create it if no collection with - *name* exists. - - Args: - name (str): The name of the collection. - - Keyword Args: - dimension (int): The dimensionality of the vectors in the collection. - pipeline (int): The dimensionality of the vectors in the collection. - - Returns: - Collection: The created collection. - - Raises: - CollectionAlreadyExists: If a collection with the same name already exists - """ - from core.providers.database.vecs.collection import Collection - - adapter_dimension = adapter.exported_dimension if adapter else None - - collection = Collection( - name=name, - dimension=dimension or adapter_dimension, # type: ignore - quantization_type=quantization_type, - client=self, - adapter=adapter, - ) - - return collection._create_if_not_exists() - - @deprecated("use Client.get_or_create_vector_table") - def create_collection(self, name: str, dimension: int) -> Collection: - """ - Create a new vector collection. - - Args: - name (str): The name of the collection. - dimension (int): The dimensionality of the vectors in the collection. - - Returns: - Collection: The created collection. - - Raises: - CollectionAlreadyExists: If a collection with the same name already exists - """ - from core.providers.database.vecs.collection import Collection - - return Collection(name, dimension, self)._create() - - @deprecated("use Client.get_or_create_vector_table") - def get_collection(self, name: str) -> Collection: - """ - Retrieve an existing vector collection. - - Args: - name (str): The name of the collection. - - Returns: - Collection: The retrieved collection. - - Raises: - CollectionNotFound: If no collection with the given name exists. - """ - from core.providers.database.vecs.collection import Collection - - query = text( - f""" - select - relname as table_name, - atttypmod as embedding_dim - from - pg_class pc - join pg_attribute pa - on pc.oid = pa.attrelid - where - pc.relnamespace = "{self.project_name}"::regnamespace - and pc.relkind = 'r' - and pa.attname = 'vec' - and not pc.relname ^@ '_' - and pc.relname = :name - """ - ).bindparams(name=name) - with self.Session() as sess: - query_result = sess.execute(query).fetchone() - - if query_result is None: - raise CollectionNotFound( - "No collection found with requested name" - ) - - name, dimension = query_result - return Collection( - name, - dimension, - self, - ) - - def list_collections(self) -> List["Collection"]: - """ - List all vector collections. - - Returns: - list[Collection]: A list of all collections. - """ - from core.providers.database.vecs.collection import Collection - - return Collection._list_collections(self) - - def delete_collection(self, name: str) -> None: - """ - Delete a vector collection. - - If no collection with requested name exists, does nothing. - - Args: - name (str): The name of the collection. - - Returns: - None - """ - from core.providers.database.vecs.collection import Collection - - Collection(name, -1, self)._drop() - return - - def disconnect(self) -> None: - """ - Disconnect the client from the database. - - Returns: - None - """ - self.engine.dispose() - logger.info("Disconnected from the database.") - return - - def __enter__(self) -> "Client": - """ - Enable use of the 'with' statement. - - Returns: - Client: The current instance of the Client. - """ - - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """ - Disconnect the client on exiting the 'with' statement context. - - Args: - exc_type: The exception type, if any. - exc_val: The exception value, if any. - exc_tb: The traceback, if any. - - Returns: - None - """ - self.disconnect() - return diff --git a/py/core/providers/database/vecs/collection.py b/py/core/providers/database/vecs/collection.py deleted file mode 100644 index e3aa7ccaa..000000000 --- a/py/core/providers/database/vecs/collection.py +++ /dev/null @@ -1,1102 +0,0 @@ -""" -Defines the 'Collection' class - -Importing from the `vecs.collection` directly is not supported. -All public classes, enums, and functions are re-exported by the top level `vecs` module. -""" - -from __future__ import annotations - -import math -import time -import warnings -from dataclasses import dataclass -from enum import Enum -from typing import TYPE_CHECKING, Any, Iterable, Optional, Union -from uuid import UUID, uuid4 - -from flupy import flu -from sqlalchemy import ( - Column, - Index, - MetaData, - Table, - and_, - cast, - delete, - func, - or_, - select, - text, -) -from sqlalchemy.dialects import postgresql -from sqlalchemy.types import Float, UserDefinedType - -from core.base import VectorSearchResult -from core.base.abstractions import VectorSearchSettings -from shared.abstractions.vector import ( - INDEX_MEASURE_TO_SQLA_ACC, - IndexArgsHNSW, - IndexArgsIVFFlat, - IndexMeasure, - IndexMethod, - VectorQuantizationType, - VectorTableName, -) -from shared.utils import _decorate_vector_type - -from .adapter import Adapter, AdapterContext, NoOp, Record -from .exc import ( - ArgError, - CollectionAlreadyExists, - CollectionNotFound, - FilterError, - MismatchedDimension, -) - -if TYPE_CHECKING: - from vecs.client import Client - - -def index_measure_to_ops( - measure: IndexMeasure, quantization_type: VectorQuantizationType -): - return _decorate_vector_type(measure.ops, quantization_type) - - -class Vector(UserDefinedType): - cache_ok = True - - def __init__( - self, - dim=None, - quantization_type: Optional[ - VectorQuantizationType - ] = VectorQuantizationType.FP32, - ): - super(UserDefinedType, self).__init__() - self.dim = dim - self.quantization_type = quantization_type - - def get_col_spec(self, **kw): - col_spec = "" - if self.dim is None: - col_spec = _decorate_vector_type("", self.quantization_type) - else: - col_spec = _decorate_vector_type( - f"({self.dim})", self.quantization_type - ) - return col_spec - - def bind_processor(self, dialect): - def process(value): - if value is None: - return value - if not isinstance(value, list): - raise ValueError("Expected a list") - if self.dim is not None and len(value) != self.dim: - raise ValueError( - f"Expected {self.dim} dimensions, not {len(value)}" - ) - return "[" + ",".join(str(float(v)) for v in value) + "]" - - return process - - def result_processor(self, dialect, coltype): - return lambda value: ( - value - if value is None - else [float(v) for v in value[1:-1].split(",")] - ) - - class comparator_factory(UserDefinedType.Comparator): - def l2_distance(self, other): - return self.op("<->", return_type=Float)(other) - - def max_inner_product(self, other): - return self.op("<#>", return_type=Float)(other) - - def cosine_distance(self, other): - return self.op("<=>", return_type=Float)(other) - - -class Collection: - """ - The `vecs.Collection` class represents a collection of vectors within a PostgreSQL database with pgvector support. - It provides methods to manage (create, delete, fetch, upsert), index, and perform similarity searches on these vector collections. - - The collections are stored in separate tables in the database, with each vector associated with an identifier and optional metadata. - - Example usage: - - with vecs.create_client(DB_CONNECTION) as vx: - collection = vx.create_collection(name="docs", dimension=3) - collection.upsert([("id1", [1, 1, 1], {"key": "value"})]) - # Further operations on 'collection' - - Public Attributes: - name: The name of the vector collection. - dimension: The dimension of vectors in the collection. - - Note: Some methods of this class can raise exceptions from the `vecs.exc` module if errors occur. - """ - - COLUMN_VARS = [ - "extraction_id", - "document_id", - "user_id", - "collection_ids", - ] - - def __init__( - self, - name: str, - dimension: int, - quantization_type: VectorQuantizationType, - client: Client, - adapter: Optional[Adapter] = None, - ): - """ - Initializes a new instance of the `Collection` class. - - During expected use, developers initialize instances of `Collection` using the - `vecs.Client` with `vecs.Client.create_collection(...)` rather than directly. - - Args: - name (str): The name of the collection. - dimension (int): The dimension of the vectors in the collection. - client (Client): The client to use for interacting with the database. - """ - from core.providers.database.vecs.adapter import Adapter - - self.client = client - self.name = name - self.dimension = dimension - self.quantization_type = quantization_type - self.table = _build_table( - client.project_name, - name, - client.meta, - dimension, - quantization_type, - ) - self._index: Optional[str] = None - self.adapter = adapter or Adapter(steps=[NoOp(dimension=dimension)]) - - reported_dimensions = set( - [ - x - for x in [ - dimension, - adapter.exported_dimension if adapter else None, - ] - if x is not None - ] - ) - - if len(reported_dimensions) == 0: - raise ArgError( - "One of dimension or adapter must provide a dimension" - ) - elif len(reported_dimensions) > 1: - raise MismatchedDimension( - "Mismatch in the reported dimensions of the selected vector collection and embedding model. Correct the selected embedding model or specify a new vector collection by modifying the `R2R_PROJECT_NAME` environment variable." - ) - - def __repr__(self): - """ - Returns a string representation of the `Collection` instance. - - Returns: - str: A string representation of the `Collection` instance. - """ - return ( - f'vecs.Collection(name="{self.name}", dimension={self.dimension})' - ) - - def __len__(self) -> int: - """ - Returns the number of vectors in the collection. - - Returns: - int: The number of vectors in the collection. - """ - with self.client.Session() as sess: - with sess.begin(): - stmt = select(func.count()).select_from(self.table) - return sess.execute(stmt).scalar() or 0 - - def _create_if_not_exists(self): - """ - PRIVATE - - Creates a new collection in the database if it doesn't already exist - - Returns: - Collection: The found or created collection. - """ - query = text( - f""" - select - relname as table_name, - atttypmod as embedding_dim - from - pg_class pc - join pg_attribute pa - on pc.oid = pa.attrelid - where - pc.relnamespace = '{self.client.project_name}'::regnamespace - and pc.relkind = 'r' - and pa.attname = 'vec' - and not pc.relname ^@ '_' - and pc.relname = :name - """ - ).bindparams(name=self.name) - with self.client.Session() as sess: - query_result = sess.execute(query).fetchone() - - if query_result: - _, collection_dimension = query_result - else: - collection_dimension = None - - reported_dimensions = set( - [ - x - for x in [self.dimension, collection_dimension] - if x is not None - ] - ) - if len(reported_dimensions) > 1: - raise MismatchedDimension( - "Mismatch in the reported dimensions of the selected vector collection and embedding model. Correct the selected embedding model or specify a new vector collection by modifying the `R2R_PROJECT_NAME` environment variable." - ) - - if not collection_dimension: - self.table.create(self.client.engine) - - return self - - def _create(self): - """ - PRIVATE - - Creates a new collection in the database. Raises a `vecs.exc.CollectionAlreadyExists` - exception if a collection with the specified name already exists. - - Returns: - Collection: The newly created collection. - """ - - collection_exists = self.__class__._does_collection_exist( - self.client, self.name - ) - if collection_exists: - raise CollectionAlreadyExists( - "Collection with requested name already exists" - ) - self.table.create(self.client.engine) - - unique_string = str(uuid4()).replace("-", "_")[0:7] - with self.client.Session() as sess: - sess.execute( - text( - f""" - create index ix_meta_{unique_string} - on {self.client.project_name}."{self.table.name}" - using gin ( metadata jsonb_path_ops ) - """ - ) - ) - - # Create trigger to update fts column - sess.execute( - text( - f""" - CREATE TRIGGER tsvector_update_{unique_string} BEFORE INSERT OR UPDATE - ON {self.client.project_name}."{self.table.name}" FOR EACH ROW EXECUTE FUNCTION - tsvector_update_trigger(fts, 'pg_catalog.english', text); - """ - ) - ) - return self - - def _drop(self): - """ - PRIVATE - - Deletes the collection from the database. Raises a `vecs.exc.CollectionNotFound` - exception if no collection with the specified name exists. - - Returns: - Collection: The deleted collection. - """ - with self.client.Session() as sess: - sess.execute(text(f"DROP TABLE IF EXISTS {self.name} CASCADE")) - sess.commit() - - return self - - def _get_index_options( - self, - method: IndexMethod, - index_arguments: Optional[Union[IndexArgsIVFFlat, IndexArgsHNSW]], - ) -> str: - if method == IndexMethod.ivfflat: - if isinstance(index_arguments, IndexArgsIVFFlat): - return f"WITH (lists={index_arguments.n_lists})" - else: - # Default value if no arguments provided - return "WITH (lists=100)" - elif method == IndexMethod.hnsw: - if isinstance(index_arguments, IndexArgsHNSW): - return f"WITH (m={index_arguments.m}, ef_construction={index_arguments.ef_construction})" - else: - # Default values if no arguments provided - return "WITH (m=16, ef_construction=64)" - else: - return "" # No options for other methods - - def upsert( - self, - records: Iterable[Record], - ) -> None: - chunk_size = 512 - - pipeline = flu(self.adapter(records, AdapterContext("upsert"))).chunk( - chunk_size - ) - - with self.client.Session() as sess: - with sess.begin(): - for chunk in pipeline: - stmt = postgresql.insert(self.table).values( - [ - { - "extraction_id": record[0], - "document_id": record[1], - "user_id": record[2], - "collection_ids": record[3], - "vec": record[4], - "text": record[5], - "metadata": record[6], - "fts": func.to_tsvector(record[5]), - } - for record in chunk - ] - ) - stmt = stmt.on_conflict_do_update( - index_elements=[self.table.c.extraction_id], - set_=dict( - document_id=stmt.excluded.document_id, - user_id=stmt.excluded.user_id, - collection_ids=stmt.excluded.collection_ids, - vec=stmt.excluded.vec, - text=stmt.excluded.text, - metadata=stmt.excluded.metadata, - fts=stmt.excluded.fts, - ), - ) - sess.execute(stmt) - return None - - def fetch(self, ids: Iterable[UUID]) -> list[Record]: - """ - Fetches vectors from the collection by their identifiers. - - Args: - ids (Iterable[UUID]): An iterable of vector identifiers. - - Returns: - list[Record]: A list of the fetched vectors. - - Raises: - ArgError: If ids is not an iterable of UUIDs. - """ - if isinstance(ids, (str, UUID)): - raise ArgError("ids must be an iterable of UUIDs") - - chunk_size = 12 - records = [] - with self.client.Session() as sess: - with sess.begin(): - for id_chunk in flu(ids).chunk(chunk_size): - stmt = select(self.table).where( - self.table.c.extraction_id.in_(id_chunk) - ) - chunk_records = sess.execute(stmt) - records.extend(chunk_records) - return records - - def delete( - self, - ids: Optional[Iterable[UUID]] = None, - filters: Optional[dict[str, Any]] = None, - ) -> dict[str, dict[str, str]]: - """ - Deletes vectors from the collection by matching filters or ids. - - Args: - extraction_ids (Optional[Iterable[UUID]], optional): An iterable of vector fragment identifiers. - filters (Optional[dict], optional): Filters to apply to the search. Defaults to None. - - Returns: - dict[str, dict[str, str]]: A dictionary of deleted records, where the key is the extraction_id - and the value is a dictionary containing 'document_id', 'extraction_id', and 'text'. - - Raises: - ArgError: If neither ids nor filters are provided, or if both are provided. - """ - if ids is None and filters is None: - raise ArgError("Either ids or filters must be provided.") - - if ids is not None and filters is not None: - raise ArgError("Either ids or filters must be provided, not both.") - - if isinstance(ids, (str, UUID)): - raise ArgError("ids must be an iterable of UUIDs") - - deleted_records = {} - - with self.client.Session() as sess: - with sess.begin(): - if ids: - for id_chunk in flu(ids).chunk(12): - delete_stmt = ( - delete(self.table) - .where(self.table.c.extraction_id.in_(id_chunk)) - .returning( - self.table.c.extraction_id, - self.table.c.document_id, - self.table.c.text, - ) - ) - result = sess.execute(delete_stmt) - for row in result: - extraction_id = str(row[0]) - deleted_records[extraction_id] = { - "extraction_id": extraction_id, - "document_id": str(row[1]), - "text": row[2], - } - - if filters: - meta_filter = self.build_filters(filters) - delete_stmt = ( - delete(self.table) - .where(meta_filter) - .returning( - self.table.c.extraction_id, - self.table.c.document_id, - self.table.c.text, - ) - ) - result = sess.execute(delete_stmt) - for row in result: - extraction_id = str(row[0]) - deleted_records[extraction_id] = { - "extraction_id": extraction_id, - "document_id": str(row[1]), - "text": row[2], - } - return deleted_records - - def __getitem__(self, items): - """ - Fetches a vector from the collection by its identifier. - - Args: - items (str): The identifier of the vector. - - Returns: - Record: The fetched vector. - """ - if not isinstance(items, str): - raise ArgError("items must be a string id") - - row = self.fetch([items]) - - if row == []: - raise KeyError("no item found with requested id") - return row[0] - - def semantic_search( - self, - vector: list[float], - search_settings: VectorSearchSettings, - ) -> Union[list[Record], list[str]]: - """ - Executes a similarity search in the collection. - - The return type is dependent on arguments *include_value* and *include_metadata* - - Args: - data (list[float]): The vector to use as the query. - search_settings (VectorSearchSettings): The search settings to use. - - Returns: - Union[list[Record], list[str]]: The result of the similarity search. - """ - - try: - imeasure_obj = IndexMeasure(search_settings.index_measure) - except ValueError: - raise ArgError("Invalid index measure") - - if not self.is_indexed_for_measure(imeasure_obj): - warnings.warn( - UserWarning( - f"Query does not have a covering index for {imeasure_obj}. See Collection.create_index" - ) - ) - - distance_lambda = INDEX_MEASURE_TO_SQLA_ACC.get(imeasure_obj) - if distance_lambda is None: - # unreachable - raise ArgError("invalid distance_measure") # pragma: no cover - - distance_clause = distance_lambda(self.table.c.vec)(vector) - - cols = [ - self.table.c.extraction_id, - self.table.c.document_id, - self.table.c.user_id, - self.table.c.collection_ids, - self.table.c.text, - ] - if search_settings.include_values: - cols.append(distance_clause) - - if search_settings.include_metadatas: - cols.append(self.table.c.metadata) - - stmt = select(*cols) - - if search_settings.filters: - stmt = stmt.filter(self.build_filters(search_settings.filters)) - - stmt = stmt.order_by(distance_clause) - stmt = stmt.offset(search_settings.offset) - stmt = stmt.limit(search_settings.search_limit) - - with self.client.Session() as sess: - with sess.begin(): - # index ignored if greater than n_lists - sess.execute( - text("set local ivfflat.probes = :probes").bindparams( - probes=search_settings.probes - ) - ) - if self.client._supports_hnsw(): - sess.execute( - text( - "set local hnsw.ef_search = :ef_search" - ).bindparams( - ef_search=max( - search_settings.ef_search, - search_settings.search_limit, - ) - ) - ) - if len(cols) == 1: - return [str(x) for x in sess.scalars(stmt).fetchall()] - result = sess.execute(stmt).fetchall() - return result or [] - - def full_text_search( - self, query_text: str, search_settings: VectorSearchSettings - ) -> list[VectorSearchResult]: - # Create a tsquery from the input query - ts_query = func.websearch_to_tsquery("english", query_text) - - # Use ts_rank for ranking - rank_function = func.ts_rank(self.table.c.fts, ts_query, 32).label( - "rank" - ) - - # Build the main query - stmt = ( - select( - self.table.c.extraction_id, - self.table.c.document_id, - self.table.c.user_id, - self.table.c.collection_ids, - self.table.c.text, - self.table.c.metadata, - rank_function, - ) - .where(self.table.c.fts.op("@@")(ts_query)) - .where(self.build_filters(search_settings.filters)) - .order_by(rank_function.desc()) - .offset(search_settings.offset) - .limit(search_settings.hybrid_search_settings.full_text_limit) - ) - - # Execute the query - with self.client.Session() as sess: - results = sess.execute(stmt).fetchall() - - # Convert the results to VectorSearchResult objects - return [ - VectorSearchResult( - extraction_id=str(r.extraction_id), - document_id=str(r.document_id), - user_id=str(r.user_id), - collection_ids=r.collection_ids, - text=r.text, - score=float(r.rank), - metadata=r.metadata, - ) - for r in results - ] - - def build_filters(self, filters: dict): - """ - PUBLIC - - Builds filters for SQL query based on provided dictionary. - - Args: - filters (dict): The dictionary specifying filter conditions. - - Raises: - FilterError: If filter conditions are not correctly formatted. - - Returns: - The filter clause for the SQL query. - """ - - if not isinstance(filters, dict): - raise FilterError("filters must be a dict") - - def parse_condition(key, value): - if key in Collection.COLUMN_VARS: - # Handle column-based filters - column = getattr(self.table.c, key) - if isinstance(value, dict): - op, clause = next(iter(value.items())) - - if op == "$eq": - return column == clause - elif op == "$ne": - return column != clause - elif op == "$in": - return column.in_(clause) - elif op == "$nin": - return ~column.in_(clause) - elif op == "$overlap": - return column.overlap(clause) - elif op == "$contains": - return column.contains(clause) - elif op == "$any": - if key == "collection_ids": - # Use ANY for UUID array comparison - return func.array_to_string(column, ",").like( - f"%{clause}%" - ) - # New operator for checking if any element in the array matches - return column.any(clause) - else: - raise FilterError( - f"Unsupported operator for column {key}: {op}" - ) - else: - # Handle direct equality - if isinstance(value, str): - value = UUID(value) - return column == value - else: - # Handle JSON-based filters - json_col = self.table.c.metadata - if key.startswith("metadata."): - key = key.split("metadata.")[1] - if isinstance(value, dict): - if len(value) > 1: - raise FilterError("only one operator permitted") - operator, clause = next(iter(value.items())) - if operator not in ( - "$eq", - "$ne", - "$lt", - "$lte", - "$gt", - "$gte", - "$in", - "$contains", - ): - raise FilterError("unknown operator") - - if operator == "$eq" and not hasattr(clause, "__len__"): - contains_value = cast({key: clause}, postgresql.JSONB) - return json_col.op("@>")(contains_value) - - if operator == "$in": - if not isinstance(clause, list): - raise FilterError( - "argument to $in filter must be a list" - ) - for elem in clause: - if not isinstance(elem, (int, str, float)): - raise FilterError( - "argument to $in filter must be a list of scalars" - ) - contains_value = [ - cast(elem, postgresql.JSONB) for elem in clause - ] - return json_col.op("->")(key).in_(contains_value) - - matches_value = cast(clause, postgresql.JSONB) - - if operator == "$contains": - if not isinstance(clause, (int, str, float)): - raise FilterError( - "argument to $contains filter must be a scalar" - ) - return and_( - json_col.op("->")(key).contains(matches_value), - func.jsonb_typeof(json_col.op("->")(key)) - == "array", - ) - - return { - "$eq": json_col.op("->")(key) == matches_value, - "$ne": json_col.op("->")(key) != matches_value, - "$lt": json_col.op("->")(key) < matches_value, - "$lte": json_col.op("->")(key) <= matches_value, - "$gt": json_col.op("->")(key) > matches_value, - "$gte": json_col.op("->")(key) >= matches_value, - }[operator] - else: - contains_value = cast({key: value}, postgresql.JSONB) - return json_col.op("@>")(contains_value) - - def parse_filter(filter_dict): - conditions = [] - for key, value in filter_dict.items(): - if key == "$and": - conditions.append(and_(*[parse_filter(f) for f in value])) - elif key == "$or": - conditions.append(or_(*[parse_filter(f) for f in value])) - else: - conditions.append(parse_condition(key, value)) - return and_(*conditions) - - return parse_filter(filters) - - @classmethod - def _list_collections(cls, client: "Client") -> list["Collection"]: - """ - PRIVATE - - Retrieves all collections from the database. - - Args: - client (Client): The database client. - - Returns: - list[Collection]: A list of all existing collections. - """ - - query = text( - """ - select - relname as table_name, - atttypmod as embedding_dim - from - pg_class pc - join pg_attribute pa - on pc.oid = pa.attrelid - where - pc.relnamespace = '{client.project_name}'::regnamespace - and pc.relkind = 'r' - and pa.attname = 'vec' - and not pc.relname ^@ '_' - """ - ) - xc = [] - with client.Session() as sess: - for name, dimension in sess.execute(query): - existing_collection = cls(name, dimension, client) - xc.append(existing_collection) - return xc - - @classmethod - def _does_collection_exist(cls, client: "Client", name: str) -> bool: - """ - PRIVATE - - Checks if a collection with a given name exists within the database - - Args: - client (Client): The database client. - name (str): The name of the collection - - Returns: - Exists: Whether the collection exists or not - """ - - try: - client.get_collection(name) - return True - except CollectionNotFound: - return False - - @property - def index(self) -> Optional[str]: - """ - PRIVATE - - Note: - The `index` property is private and expected to undergo refactoring. - Do not rely on it's output. - - Retrieves the SQL name of the collection's vector index, if it exists. - - Returns: - Optional[str]: The name of the index, or None if no index exists. - """ - - if self._index is None: - query = text( - f""" - select - relname as table_name - from - pg_class pc - where - pc.relnamespace = '{self.client.project_name}'::regnamespace - and relname ilike 'ix_vector%' - and pc.relkind = 'i' - """ - ) - with self.client.Session() as sess: - ix_name = sess.execute(query).scalar() - self._index = ix_name - return self._index - - def is_indexed_for_measure(self, measure: IndexMeasure): - """ - Checks if the collection is indexed for a specific measure. - - Args: - measure (IndexMeasure): The measure to check for. - - Returns: - bool: True if the collection is indexed for the measure, False otherwise. - """ - - index_name = self.index - if index_name is None: - return False - - ops = index_measure_to_ops(measure, self.quantization_type) - if ops is None: - return False - - if ops in index_name: - return True - - return False - - def close(self): - """ - Closes the database connection associated with this collection. - - This method should be called when you are done using the collection to release - the database resources. - """ - if self.client: - self.client.close() - - def create_index( - self, - table_name: Optional[VectorTableName] = None, - measure: IndexMeasure = IndexMeasure.cosine_distance, - method: IndexMethod = IndexMethod.auto, - index_arguments: Optional[ - Union[IndexArgsIVFFlat, IndexArgsHNSW] - ] = None, - replace: bool = True, - concurrently: bool = True, - quantization_type: VectorQuantizationType = VectorQuantizationType.FP32, - ) -> None: - """ - Creates an index for the collection. - - Note: - When `vecs` creates an index on a pgvector column in PostgreSQL, it uses a multi-step - process that enables performant indexes to be built for large collections with low end - database hardware. - - Those steps are: - - - Creates a new table with a different name - - Randomly selects records from the existing table - - Inserts the random records from the existing table into the new table - - Creates the requested vector index on the new table - - Upserts all data from the existing table into the new table - - Drops the existing table - - Renames the new table to the existing tables name - - If you create dependencies (like views) on the table that underpins - a `vecs.Collection` the `create_index` step may require you to drop those dependencies before - it will succeed. - - Args: - measure (IndexMeasure, optional): The measure to index for. Defaults to 'cosine_distance'. - method (IndexMethod, optional): The indexing method to use. Defaults to 'auto'. - index_arguments: (IndexArgsIVFFlat | IndexArgsHNSW, optional): Index type specific arguments - replace (bool, optional): Whether to replace the existing index. Defaults to True. - concurrently (bool, optional): Whether to create the index concurrently. Defaults to True. - Raises: - ArgError: If an invalid index method is used, or if *replace* is False and an index already exists. - """ - - if table_name == VectorTableName.CHUNKS: - table_name = f"{self.client.project_name}.{self.table.name}" - col_name = "vec" - elif table_name == VectorTableName.ENTITIES: - table_name = ( - f"{self.client.project_name}.{VectorTableName.ENTITIES}" - ) - col_name = "description_embedding" - elif table_name == VectorTableName.COMMUNITIES: - table_name = ( - f"{self.client.project_name}.{VectorTableName.COMMUNITIES}" - ) - col_name = "embedding" - else: - raise ArgError("invalid table name") - if method not in ( - IndexMethod.ivfflat, - IndexMethod.hnsw, - IndexMethod.auto, - ): - raise ArgError("invalid index method") - - if index_arguments: - # Disallow case where user submits index arguments but uses the - # IndexMethod.auto index (index build arguments should only be - # used with a specific index) - if method == IndexMethod.auto: - raise ArgError( - "Index build parameters are not allowed when using the IndexMethod.auto index." - ) - # Disallow case where user specifies one index type but submits - # index build arguments for the other index type - if ( - isinstance(index_arguments, IndexArgsHNSW) - and method != IndexMethod.hnsw - ) or ( - isinstance(index_arguments, IndexArgsIVFFlat) - and method != IndexMethod.ivfflat - ): - raise ArgError( - f"{index_arguments.__class__.__name__} build parameters were supplied but {method} index was specified." - ) - - if method == IndexMethod.auto: - if self.client._supports_hnsw(): - method = IndexMethod.hnsw - else: - method = IndexMethod.ivfflat - - if method == IndexMethod.hnsw and not self.client._supports_hnsw(): - raise ArgError( - "HNSW Unavailable. Upgrade your pgvector installation to > 0.5.0 to enable HNSW support" - ) - - ops = index_measure_to_ops( - measure, quantization_type=self.quantization_type - ) - - if ops is None: - raise ArgError("Unknown index measure") - - concurrently_sql = "CONCURRENTLY" if concurrently else "" - - # Drop existing index if needed (must be outside of transaction) - # Doesn't drop - if self.index is not None and replace: - drop_index_sql = f'DROP INDEX {concurrently_sql} IF EXISTS {self.client.project_name}."{self.index}";' - try: - with self.client.engine.connect() as connection: - connection = connection.execution_options( - isolation_level="AUTOCOMMIT" - ) - connection.execute(text(drop_index_sql)) - except Exception as e: - raise Exception(f"Failed to drop existing index: {e}") - self._index = None - - timestamp = time.strftime("%Y%m%d%H%M%S") - index_name = f"ix_{ops}_{method}__{timestamp}" - - create_index_sql = f""" - CREATE INDEX {concurrently_sql} {index_name} - ON {table_name} - USING {method} ({col_name} {ops}) {self._get_index_options(method, index_arguments)}; - """ - - try: - if concurrently: - with self.client.engine.connect() as connection: - connection = connection.execution_options( - isolation_level="AUTOCOMMIT" - ) - connection.execute(text(create_index_sql)) - else: - with self.client.Session() as sess: - sess.execute(text(create_index_sql)) - sess.commit() - except Exception as e: - raise Exception(f"Failed to create index: {e}") - - self._index = index_name - - return None - - -def _build_table( - project_name: str, - name: str, - meta: MetaData, - dimension: int, - quantization_type: VectorQuantizationType = VectorQuantizationType.FP32, -) -> Table: - - table = Table( - name, - meta, - Column("extraction_id", postgresql.UUID, primary_key=True), - Column("document_id", postgresql.UUID, nullable=False), - Column("user_id", postgresql.UUID, nullable=False), - Column( - "collection_ids", - postgresql.ARRAY(postgresql.UUID), - server_default="{}", - ), - Column( - "vec", - Vector(dimension, quantization_type=quantization_type), - nullable=False, - ), - Column("text", postgresql.TEXT, nullable=True), - Column( - "fts", - postgresql.TSVECTOR, - nullable=False, - server_default=text("to_tsvector('english'::regconfig, '')"), - ), - Column( - "metadata", - postgresql.JSONB, - server_default=text("'{}'::jsonb"), - nullable=False, - ), - # Create a GIN index for the tsvector column - Index(f"idx_{name}_fts", "fts", postgresql_using="gin"), - extend_existing=True, - ) - - return table diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 0496bd579..354da033c 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -1,18 +1,15 @@ -import concurrent.futures import copy import json import logging import time -from concurrent.futures import ThreadPoolExecutor -from typing import Any, Optional, Union - -from sqlalchemy import text -from sqlalchemy.exc import NoResultFound, SQLAlchemyError +import uuid +from typing import Any, Optional, Tuple, TypedDict, Union +from uuid import UUID from core.base import ( - DatabaseConfig, - VectorDBProvider, VectorEntry, + VectorHandler, + VectorQuantizationType, VectorSearchResult, ) from core.base.abstractions import VectorSearchSettings @@ -21,142 +18,278 @@ IndexArgsIVFFlat, IndexMeasure, IndexMethod, - VectorQuantizationType, VectorTableName, ) -from .vecs import Client, Collection, create_client +from .base import PostgresConnectionManager +from .vecs.exc import ArgError, FilterError logger = logging.getLogger() +from shared.utils import _decorate_vector_type -class PostgresVectorDBProvider(VectorDBProvider): - def __init__(self, config: DatabaseConfig, *args, **kwargs): - super().__init__(config) - self.collection: Optional[Collection] = None - self.project_name = kwargs.get("project_name", None) - connection_string = kwargs.get("connection_string", None) - if not connection_string: - raise ValueError( - "Please provide a valid `connection_string` to the `PostgresVectorDBProvider`." - ) - self.vx: Client = create_client( - connection_string=connection_string, project_name=self.project_name - ) - if not self.vx: - raise ValueError( - "Error occurred while attempting to connect to the pgvector provider." - ) - self.project_name = kwargs.get("project_name", None) - if not self.project_name: - raise ValueError( - "Please provide a valid `project_name` to the `PostgresVectorDBProvider`." - ) - dimension = kwargs.get("dimension", None) - quantization_type = kwargs.get("quantization_type", None) - if not dimension: - raise ValueError( - "Please provide a valid `dimension` to the `PostgresVectorDBProvider`." - ) +def index_measure_to_ops( + measure: IndexMeasure, + quantization_type: VectorQuantizationType = VectorQuantizationType.FP32, +): + return _decorate_vector_type(measure.ops, quantization_type) - self._initialize_vector_db(dimension, quantization_type) - logger.info( - f"Successfully initialized PGVectorDB for project: {self.project_name}" - ) - def _initialize_vector_db( - self, dimension: int, quantization_type: VectorQuantizationType - ) -> None: - # Create extension for trigram similarity - with self.vx.Session() as sess: - sess.execute(text(f"CREATE EXTENSION IF NOT EXISTS pg_trgm;")) - sess.execute(text(f"CREATE EXTENSION IF NOT EXISTS btree_gin;")) - sess.commit() - - self.collection = self.vx.get_or_create_vector_table( - name=self.project_name, - dimension=dimension, - quantization_type=quantization_type, - ) +class HybridSearchIntermediateResult(TypedDict): + semantic_rank: int + full_text_rank: int + data: VectorSearchResult + rrf_score: float - # NOTE: Do not create an index during initialization - # self.create_index() - def upsert(self, entry: VectorEntry) -> None: - if self.collection is None: - raise ValueError( - "Please call `initialize_collection` before attempting to run `upsert`." - ) +class PostgresVectorHandler(VectorHandler): + TABLE_NAME = VectorTableName.VECTORS - self.collection.upsert( - records=[ - ( - entry.extraction_id, - entry.document_id, - entry.user_id, - entry.collection_ids, - entry.vector.data, - entry.text, - entry.metadata, - ) - ] + COLUMN_VARS = [ + "extraction_id", + "document_id", + "user_id", + "collection_ids", + ] + + def __init__( + self, + project_name: str, + connection_manager: PostgresConnectionManager, + dimension: int, + enable_fts: bool = False, + ): + super().__init__(project_name, connection_manager) + self.dimension = dimension + self.enable_fts = enable_fts + + async def create_table(self): + # Check for old table name first + check_query = f""" + SELECT EXISTS ( + SELECT FROM pg_tables + WHERE schemaname = $1 + AND tablename = $2 + ); + """ + old_table_exists = await self.connection_manager.fetch_query( + check_query, (self.project_name, self.project_name) ) - def upsert_entries(self, entries: list[VectorEntry]) -> None: - if self.collection is None: + if len(old_table_exists) > 0 and old_table_exists[0]["exists"]: raise ValueError( - "Please call `initialize_collection` before attempting to run `upsert_entries`." + f"Found old vector table '{self.project_name}.{self.project_name}'. " + "Please run `r2r db upgrade` with the CLI, or to run manually, " + "run in R2R/py/migrations with 'alembic upgrade head' to update " + "your database schema to the new version." ) - self.collection.upsert( - records=[ - ( - entry.extraction_id, - entry.document_id, - entry.user_id, - entry.collection_ids, - entry.vector.data, - entry.text, - entry.metadata, - ) - for entry in entries - ] + + # TODO - Move ids to `UUID` type + # Create the vector table if it doesn't exist + query = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} ( + extraction_id UUID PRIMARY KEY, + document_id UUID, + user_id UUID, + collection_ids UUID[], + vec vector({self.dimension}), + text TEXT, + metadata JSONB + {",fts tsvector GENERATED ALWAYS AS (to_tsvector('english', text)) STORED" if self.enable_fts else ""} + ); + CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} (document_id); + CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} (user_id); + CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} USING GIN (collection_ids); + CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} USING GIN (to_tsvector('english', text)); + """ + if self.enable_fts: + query += f""" + CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} USING GIN (to_tsvector('english', text)); + """ + + await self.connection_manager.execute_query(query) + + async def upsert(self, entry: VectorEntry) -> None: + query = f""" + INSERT INTO {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (extraction_id) DO UPDATE SET + document_id = EXCLUDED.document_id, + user_id = EXCLUDED.user_id, + collection_ids = EXCLUDED.collection_ids, + vec = EXCLUDED.vec, + text = EXCLUDED.text, + metadata = EXCLUDED.metadata; + """ + await self.connection_manager.execute_query( + query, + ( + entry.extraction_id, + entry.document_id, + entry.user_id, + entry.collection_ids, + str(entry.vector.data), + entry.text, + json.dumps(entry.metadata), + ), ) - def semantic_search( + async def upsert_entries(self, entries: list[VectorEntry]) -> None: + query = f""" + INSERT INTO {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (extraction_id) DO UPDATE SET + document_id = EXCLUDED.document_id, + user_id = EXCLUDED.user_id, + collection_ids = EXCLUDED.collection_ids, + vec = EXCLUDED.vec, + text = EXCLUDED.text, + metadata = EXCLUDED.metadata; + """ + params = [ + ( + entry.extraction_id, + entry.document_id, + entry.user_id, + entry.collection_ids, + str(entry.vector.data), + entry.text, + json.dumps(entry.metadata), + ) + for entry in entries + ] + await self.connection_manager.execute_many(query, params) + + async def semantic_search( self, query_vector: list[float], search_settings: VectorSearchSettings ) -> list[VectorSearchResult]: - if self.collection is None: - raise ValueError( - "Please call `initialize_collection` before attempting to run `semantic_search`." - ) - results = self.collection.semantic_search( - vector=query_vector, search_settings=search_settings - ) + try: + imeasure_obj = IndexMeasure(search_settings.index_measure) + except ValueError: + raise ValueError("Invalid index measure") + + table_name = self._get_table_name(PostgresVectorHandler.TABLE_NAME) + cols = [ + f"{table_name}.extraction_id", + f"{table_name}.document_id", + f"{table_name}.user_id", + f"{table_name}.collection_ids", + f"{table_name}.text", + ] + + # Use cosine distance calculation + distance_calc = f"{table_name}.vec <=> $1::vector" + + if search_settings.include_values: + cols.append(f"({distance_calc}) AS distance") + + if search_settings.include_metadatas: + cols.append(f"{table_name}.metadata") + + select_clause = ", ".join(cols) + + where_clause = "" + params: list[Union[str, int]] = [str(query_vector)] + if search_settings.filters: + where_clause = self._build_filters(search_settings.filters, params) + where_clause = f"WHERE {where_clause}" + + query = f""" + SELECT {select_clause} + FROM {table_name} + {where_clause} + ORDER BY {distance_calc} + LIMIT ${len(params) + 1} + OFFSET ${len(params) + 2} + """ + + params.extend([search_settings.search_limit, search_settings.offset]) + + results = await self.connection_manager.fetch_query(query, params) + return [ VectorSearchResult( - extraction_id=result[0], # type: ignore - document_id=result[1], # type: ignore - user_id=result[2], # type: ignore - collection_ids=result[3], # type: ignore - text=result[4], # type: ignore - score=1 - float(result[5]), # type: ignore - metadata=result[6], # type: ignore + extraction_id=UUID(str(result["extraction_id"])), + document_id=UUID(str(result["document_id"])), + user_id=UUID(str(result["user_id"])), + collection_ids=result["collection_ids"], + text=result["text"], + score=( + (1 - float(result["distance"])) + if search_settings.include_values + else -1 + ), + metadata=( + json.loads(result["metadata"]) + if search_settings.include_metadatas + else {} + ), ) for result in results ] - def full_text_search( + async def full_text_search( self, query_text: str, search_settings: VectorSearchSettings ) -> list[VectorSearchResult]: - if self.collection is None: + if not self.enable_fts: raise ValueError( - "Please call `initialize_collection` before attempting to run `full_text_search`." + "Full-text search is not enabled for this collection." + ) + + where_clauses = [] + params: list[Union[str, int]] = [query_text] + + if search_settings.filters: + filters_clause = self._build_filters( + search_settings.filters, params + ) + where_clauses.append(filters_clause) + + if where_clauses: + where_clause = ( + "WHERE " + + " AND ".join(where_clauses) + + " AND fts @@ websearch_to_tsquery('english', $1)" ) - return self.collection.full_text_search( - query_text=query_text, search_settings=search_settings + else: + where_clause = "WHERE fts @@ websearch_to_tsquery('english', $1)" + + query = f""" + SELECT + extraction_id, document_id, user_id, collection_ids, text, metadata, + ts_rank(fts, websearch_to_tsquery('english', $1), 32) as rank + FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + {where_clause} + """ + + query += f""" + ORDER BY rank DESC + OFFSET ${len(params)+1} LIMIT ${len(params)+2} + """ + params.extend( + [ + search_settings.offset, + search_settings.hybrid_search_settings.full_text_limit, + ] ) - def hybrid_search( + results = await self.connection_manager.fetch_query(query, params) + return [ + VectorSearchResult( + extraction_id=UUID(str(r["extraction_id"])), + document_id=UUID(str(r["document_id"])), + user_id=UUID(str(r["user_id"])), + collection_ids=r["collection_ids"], + text=r["text"], + score=float(r["rank"]), + metadata=json.loads(r["metadata"]), + ) + for r in results + ] + + async def hybrid_search( self, query_text: str, query_vector: list[float], @@ -180,46 +313,16 @@ def hybrid_search( semantic_settings.search_limit += search_settings.offset full_text_settings = copy.deepcopy(search_settings) - full_text_settings.hybrid_search_settings.full_text_limit += ( # type: ignore + full_text_settings.hybrid_search_settings.full_text_limit += ( search_settings.offset ) - # Use ThreadPoolExecutor to run searches in parallel - with ThreadPoolExecutor(max_workers=2) as executor: - semantic_future = executor.submit( - self.semantic_search, query_vector, semantic_settings - ) - full_text_future = executor.submit( - self.full_text_search, query_text, full_text_settings - ) - - # Wait for both searches to complete - concurrent.futures.wait([semantic_future, full_text_future]) - - semantic_results: list[VectorSearchResult] = semantic_future.result() - full_text_results: list[VectorSearchResult] = full_text_future.result() - - semantic_limit = search_settings.search_limit - full_text_limit = ( - search_settings.hybrid_search_settings.full_text_limit + semantic_results: list[VectorSearchResult] = ( + await self.semantic_search(query_vector, semantic_settings) ) - semantic_weight = ( - search_settings.hybrid_search_settings.semantic_weight + full_text_results: list[VectorSearchResult] = ( + await self.full_text_search(query_text, full_text_settings) ) - full_text_weight = ( - search_settings.hybrid_search_settings.full_text_weight - ) - rrf_k = search_settings.hybrid_search_settings.rrf_k - - # Combine results using RRF - combined_results = { - result.extraction_id: { - "semantic_rank": rank, - "full_text_rank": full_text_limit, - "data": result, - } - for rank, result in enumerate(semantic_results, 1) - } semantic_limit = search_settings.search_limit full_text_limit = ( @@ -232,15 +335,16 @@ def hybrid_search( search_settings.hybrid_search_settings.full_text_weight ) rrf_k = search_settings.hybrid_search_settings.rrf_k - # Combine results using RRF - combined_results = { - result.extraction_id: { + + combined_results: dict[uuid.UUID, HybridSearchIntermediateResult] = {} + + for rank, result in enumerate(semantic_results, 1): + combined_results[result.extraction_id] = { "semantic_rank": rank, "full_text_rank": full_text_limit, "data": result, + "rrf_score": 0.0, # Initialize with 0, will be calculated later } - for rank, result in enumerate(semantic_results, 1) - } for rank, result in enumerate(full_text_results, 1): if result.extraction_id in combined_results: @@ -250,29 +354,27 @@ def hybrid_search( "semantic_rank": semantic_limit, "full_text_rank": rank, "data": result, + "rrf_score": 0.0, # Initialize with 0, will be calculated later } - # Filter out non-overlapping results combined_results = { k: v for k, v in combined_results.items() - if v["semantic_rank"] <= semantic_limit * 2 # type: ignore - and v["full_text_rank"] <= full_text_limit * 2 # type: ignore + if v["semantic_rank"] <= semantic_limit * 2 + and v["full_text_rank"] <= full_text_limit * 2 } - # Calculate RRF scores - for result in combined_results.values(): # type: ignore - semantic_score = 1 / (rrf_k + result["semantic_rank"]) # type: ignore - full_text_score = 1 / (rrf_k + result["full_text_rank"]) # type: ignore - result["rrf_score"] = ( # type: ignore + for hyb_result in combined_results.values(): + semantic_score = 1 / (rrf_k + hyb_result["semantic_rank"]) + full_text_score = 1 / (rrf_k + hyb_result["full_text_rank"]) + hyb_result["rrf_score"] = ( semantic_score * semantic_weight + full_text_score * full_text_weight ) / (semantic_weight + full_text_weight) - # Sort by RRF score and apply offset and limit sorted_results = sorted( combined_results.values(), - key=lambda x: x["rrf_score"], # type: ignore + key=lambda x: x["rrf_score"], reverse=True, ) offset_results = sorted_results[ @@ -282,14 +384,14 @@ def hybrid_search( return [ VectorSearchResult( - extraction_id=result["data"].extraction_id, # type: ignore - document_id=result["data"].document_id, # type: ignore - user_id=result["data"].user_id, # type: ignore - collection_ids=result["data"].collection_ids, # type: ignore - text=result["data"].text, # type: ignore - score=result["rrf_score"], # type: ignore + extraction_id=result["data"].extraction_id, + document_id=result["data"].document_id, + user_id=result["data"].user_id, + collection_ids=result["data"].collection_ids, + text=result["data"].text, + score=result["rrf_score"], metadata={ - **result["data"].metadata, # type: ignore + **result["data"].metadata, "semantic_rank": result["semantic_rank"], "full_text_rank": result["full_text_rank"], }, @@ -297,260 +399,611 @@ def hybrid_search( for result in offset_results ] - def create_index( - self, - table_name: Optional[VectorTableName] = None, - index_method: IndexMethod = IndexMethod.hnsw, - measure: IndexMeasure = IndexMeasure.cosine_distance, - index_arguments: Optional[ - Union[IndexArgsHNSW, IndexArgsIVFFlat] - ] = None, - replace: bool = True, - concurrently: bool = True, - ): - if self.collection is None: - raise ValueError("Collection is not initialized.") - - start_time = time.time() - - self.collection.create_index( - table_name=table_name, - method=index_method, - measure=measure, - index_arguments=index_arguments, - replace=replace, - concurrently=concurrently, - ) + async def delete( + self, filters: dict[str, Any] + ) -> dict[str, dict[str, str]]: + params: list[Union[str, int]] = [] + where_clause = self._build_filters(filters, params) - end_time = time.time() - logger.info(f"Index creation took {end_time - start_time:.2f} seconds") + query = f""" + DELETE FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + WHERE {where_clause} + RETURNING extraction_id, document_id, text; + """ - def delete( - self, - filters: dict[str, Any], - ) -> dict[str, dict[str, str]]: - if self.collection is None: - raise ValueError( - "Please call `initialize_collection` before attempting to run `delete`." - ) + results = await self.connection_manager.fetch_query(query, params) - return self.collection.delete(filters=filters) + return { + str(result["extraction_id"]): { + "status": "deleted", + "extraction_id": str(result["extraction_id"]), + "document_id": str(result["document_id"]), + "text": result["text"], + } + for result in results + } - def assign_document_to_collection( - self, document_id: str, collection_id: str + async def assign_document_to_collection_vector( + self, document_id: UUID, collection_id: UUID ) -> None: + query = f""" + UPDATE {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + SET collection_ids = array_append(collection_ids, $1) + WHERE document_id = $2 AND NOT ($1 = ANY(collection_ids)); """ - Assign a document to a collection in the vector database. - - Args: - document_id (str): The ID of the document to assign. - collection_id (str): The ID of the collection to assign the document to. + await self.connection_manager.execute_query( + query, (str(collection_id), str(document_id)) + ) - Raises: - ValueError: If the collection is not initialized. + async def remove_document_from_collection_vector( + self, document_id: UUID, collection_id: UUID + ) -> None: + query = f""" + UPDATE {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + SET collection_ids = array_remove(collection_ids, $1) + WHERE document_id = $2; """ - if self.collection is None: - raise ValueError( - "Please call `initialize_collection` before attempting to run `assign_document_to_collection`." - ) + await self.connection_manager.execute_query( + query, (collection_id, document_id) + ) - table_name = self.collection.table.name - query = text( - f""" - UPDATE {self.project_name}."{table_name}" - SET collection_ids = array_append(collection_ids, :collection_id) - WHERE document_id = :document_id AND NOT (:collection_id = ANY(collection_ids)) - RETURNING document_id - """ + async def delete_user_vector(self, user_id: UUID) -> None: + query = f""" + DELETE FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + WHERE user_id = $1; + """ + await self.connection_manager.execute_query(query, (user_id,)) + + async def delete_collection_vector(self, collection_id: UUID) -> None: + query = f""" + DELETE FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + WHERE $1 = ANY(collection_ids) + RETURNING collection_ids + """ + results = await self.connection_manager.fetchrow_query( + query, (collection_id,) ) + return None - with self.vx.Session() as sess: - result = sess.execute( - query, - {"document_id": document_id, "collection_id": collection_id}, - ).fetchone() - sess.commit() + async def get_document_chunks( + self, + document_id: UUID, + offset: int = 0, + limit: int = -1, + include_vectors: bool = False, + ) -> dict[str, Any]: + vector_select = ", vec" if include_vectors else "" + limit_clause = f"LIMIT {limit}" if limit > -1 else "" + + query = f""" + SELECT extraction_id, document_id, user_id, collection_ids, text, metadata{vector_select}, COUNT(*) OVER() AS total + FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + WHERE document_id = $1 + OFFSET $2 + {limit_clause}; + """ - if not result: - logger.warning( - f"Document {document_id} not found or already assigned to collection {collection_id}" - ) + params = [document_id, offset] + + results = await self.connection_manager.fetch_query(query, params) - def remove_document_from_collection( - self, document_id: str, collection_id: str + chunks = [] + total = 0 + if results: + total = results[0].get("total", 0) + chunks = [ + { + "extraction_id": result["extraction_id"], + "document_id": result["document_id"], + "user_id": result["user_id"], + "collection_ids": result["collection_ids"], + "text": result["text"], + "metadata": json.loads(result["metadata"]), + "vector": ( + json.loads(result["vec"]) if include_vectors else None + ), + } + for result in results + ] + + return {"results": chunks, "total_entries": total} + + async def create_index( + self, + table_name: Optional[VectorTableName] = None, + index_measure: IndexMeasure = IndexMeasure.cosine_distance, + index_method: IndexMethod = IndexMethod.auto, + index_arguments: Optional[ + Union[IndexArgsIVFFlat, IndexArgsHNSW] + ] = None, + index_name: Optional[str] = None, + concurrently: bool = True, ) -> None: """ - Remove a document from a collection in the vector database. + Creates an index for the collection. - Args: - document_id (str): The ID of the document to remove. - collection_id (str): The ID of the collection to remove the document from. + Note: + When `vecs` creates an index on a pgvector column in PostgreSQL, it uses a multi-step + process that enables performant indexes to be built for large collections with low end + database hardware. + + Those steps are: + + - Creates a new table with a different name + - Randomly selects records from the existing table + - Inserts the random records from the existing table into the new table + - Creates the requested vector index on the new table + - Upserts all data from the existing table into the new table + - Drops the existing table + - Renames the new table to the existing tables name + If you create dependencies (like views) on the table that underpins + a `vecs.Collection` the `create_index` step may require you to drop those dependencies before + it will succeed. + + Args: + index_measure (IndexMeasure, optional): The measure to index for. Defaults to 'cosine_distance'. + index_method (IndexMethod, optional): The indexing method to use. Defaults to 'auto'. + index_arguments: (IndexArgsIVFFlat | IndexArgsHNSW, optional): Index type specific arguments + index_name (str, optional): The name of the index to create. Defaults to None. + concurrently (bool, optional): Whether to create the index concurrently. Defaults to True. Raises: - ValueError: If the collection is not initialized. + ArgError: If an invalid index method is used, or if *replace* is False and an index already exists. """ - if self.collection is None: - raise ValueError( - "Please call `initialize_collection` before attempting to run `remove_document_from_collection`." + + if table_name == VectorTableName.VECTORS: + table_name_str = f"{self.project_name}.{VectorTableName.VECTORS}" # TODO - Fix bug in vector table naming convention + col_name = "vec" + elif table_name == VectorTableName.ENTITIES_DOCUMENT: + table_name_str = ( + f"{self.project_name}.{VectorTableName.ENTITIES_DOCUMENT}" ) + col_name = "description_embedding" + elif table_name == VectorTableName.ENTITIES_COLLECTION: + table_name_str = ( + f"{self.project_name}.{VectorTableName.ENTITIES_COLLECTION}" + ) + col_name = "description_embedding" + elif table_name == VectorTableName.COMMUNITIES: + table_name_str = ( + f"{self.project_name}.{VectorTableName.COMMUNITIES}" + ) + col_name = "embedding" + else: + raise ArgError("invalid table name") + if index_method not in ( + IndexMethod.ivfflat, + IndexMethod.hnsw, + IndexMethod.auto, + ): + raise ArgError("invalid index method") + + if index_arguments: + # Disallow case where user submits index arguments but uses the + # IndexMethod.auto index (index build arguments should only be + # used with a specific index) + if index_method == IndexMethod.auto: + raise ArgError( + "Index build parameters are not allowed when using the IndexMethod.auto index." + ) + # Disallow case where user specifies one index type but submits + # index build arguments for the other index type + if ( + isinstance(index_arguments, IndexArgsHNSW) + and index_method != IndexMethod.hnsw + ) or ( + isinstance(index_arguments, IndexArgsIVFFlat) + and index_method != IndexMethod.ivfflat + ): + raise ArgError( + f"{index_arguments.__class__.__name__} build parameters were supplied but {index_method} index was specified." + ) - table_name = self.collection.table.name - query = text( - f""" - UPDATE {self.project_name}."{table_name}" - SET collection_ids = array_remove(collection_ids, :collection_id) - WHERE document_id = :document_id AND :collection_id = ANY(collection_ids) - RETURNING document_id - """ - ) + if index_method == IndexMethod.auto: + index_method = IndexMethod.hnsw - with self.vx.Session() as sess: - result = sess.execute( - query, - {"document_id": document_id, "collection_id": collection_id}, - ).fetchone() - sess.commit() + ops = index_measure_to_ops( + index_measure # , quantization_type=self.quantization_type + ) - if not result: - logger.warning( - f"Document {document_id} not found in collection {collection_id} or already removed" - ) + if ops is None: + raise ArgError("Unknown index measure") - def remove_collection_from_documents(self, collection_id: str) -> None: - if self.collection is None: - raise ValueError("Collection is not initialized.") + concurrently_sql = "CONCURRENTLY" if concurrently else "" - table_name = self.collection.table.name - query = text( - f""" - UPDATE {self.project_name}."{table_name}" - SET collection_ids = array_remove(collection_ids, :collection_id) - WHERE :collection_id = ANY(collection_ids) - """ + index_name = ( + index_name + or f"ix_{ops}_{index_method}__{time.strftime('%Y%m%d%H%M%S')}" ) - with self.vx.Session() as sess: - sess.execute(query, {"collection_id": collection_id}) - sess.commit() + create_index_sql = f""" + CREATE INDEX {concurrently_sql} {index_name} + ON {table_name_str} + USING {index_method} ({col_name} {ops}) {self._get_index_options(index_method, index_arguments)}; + """ - def delete_user(self, user_id: str) -> None: - if self.collection is None: - raise ValueError("Collection is not initialized.") + try: + if concurrently: + async with ( + self.connection_manager.pool.get_connection() as conn # type: ignore + ): + # Disable automatic transaction management + await conn.execute( + "SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL READ COMMITTED" + ) + await conn.execute(create_index_sql) + else: + # Non-concurrent index creation can use normal query execution + await self.connection_manager.execute_query(create_index_sql) + except Exception as e: + raise Exception(f"Failed to create index: {e}") + return None + + def _build_filters( + self, filters: dict, parameters: list[Union[str, int]] + ) -> str: + + def parse_condition(key: str, value: Any) -> str: # type: ignore + # nonlocal parameters + if key in self.COLUMN_VARS: + # Handle column-based filters + if isinstance(value, dict): + op, clause = next(iter(value.items())) + if op == "$eq": + parameters.append(clause) + return f"{key} = ${len(parameters)}" + elif op == "$ne": + parameters.append(clause) + return f"{key} != ${len(parameters)}" + elif op == "$in": + parameters.append(clause) + return f"{key} = ANY(${len(parameters)})" + elif op == "$nin": + parameters.append(clause) + return f"{key} != ALL(${len(parameters)})" + elif op == "$overlap": + parameters.append(clause) + return f"{key} && ${len(parameters)}" + elif op == "$contains": + parameters.append(clause) + return f"{key} @> ${len(parameters)}" + elif op == "$any": + if key == "collection_ids": + parameters.append(f"%{clause}%") + return f"array_to_string({key}, ',') LIKE ${len(parameters)}" + parameters.append(clause) + return f"${len(parameters)} = ANY({key})" + else: + raise FilterError( + f"Unsupported operator for column {key}: {op}" + ) + else: + # Handle direct equality + parameters.append(value) + return f"{key} = ${len(parameters)}" + else: + # Handle JSON-based filters + json_col = "metadata" + if key.startswith("metadata."): + key = key.split("metadata.")[1] + if isinstance(value, dict): + op, clause = next(iter(value.items())) + if op not in ( + "$eq", + "$ne", + "$lt", + "$lte", + "$gt", + "$gte", + "$in", + "$contains", + ): + raise FilterError("unknown operator") + + if op == "$eq": + parameters.append(json.dumps(clause)) + return ( + f"{json_col}->'{key}' = ${len(parameters)}::jsonb" + ) + elif op == "$ne": + parameters.append(json.dumps(clause)) + return ( + f"{json_col}->'{key}' != ${len(parameters)}::jsonb" + ) + elif op == "$lt": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float < (${len(parameters)}::jsonb)::float" + elif op == "$lte": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float <= (${len(parameters)}::jsonb)::float" + elif op == "$gt": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float > (${len(parameters)}::jsonb)::float" + elif op == "$gte": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float >= (${len(parameters)}::jsonb)::float" + elif op == "$in": + if not isinstance(clause, list): + raise FilterError( + "argument to $in filter must be a list" + ) + parameters.append(json.dumps(clause)) + return f"{json_col}->'{key}' = ANY(SELECT jsonb_array_elements(${len(parameters)}::jsonb))" + elif op == "$contains": + if not isinstance(clause, (int, str, float, list)): + raise FilterError( + "argument to $contains filter must be a scalar or array" + ) + parameters.append(json.dumps(clause)) + return ( + f"{json_col}->'{key}' @> ${len(parameters)}::jsonb" + ) + + def parse_filter(filter_dict: dict) -> str: + filter_conditions = [] + for key, value in filter_dict.items(): + if key == "$and": + and_conditions = [ + parse_filter(f) for f in value if f + ] # Skip empty dictionaries + if and_conditions: + filter_conditions.append( + f"({' AND '.join(and_conditions)})" + ) + elif key == "$or": + or_conditions = [ + parse_filter(f) for f in value if f + ] # Skip empty dictionaries + if or_conditions: + filter_conditions.append( + f"({' OR '.join(or_conditions)})" + ) + else: + filter_conditions.append(parse_condition(key, value)) + + # Check if there is only a single condition + if len(filter_conditions) == 1: + return filter_conditions[0] + else: + return " AND ".join(filter_conditions) - table_name = self.collection.table.name - query = text( - f""" - UPDATE {self.project_name}."{table_name}" - SET user_id = NULL - WHERE user_id = :user_id - """ - ) + where_clause = parse_filter(filters) + + return where_clause - with self.vx.Session() as sess: - sess.execute(query, {"user_id": user_id}) - sess.commit() + async def list_indices( + self, table_name: Optional[VectorTableName] = None + ) -> list[dict[str, Any]]: + """ + Lists all vector indices for the specified table. - def delete_collection(self, collection_id: str) -> None: - if self.collection is None: - raise ValueError("Collection is not initialized.") + Args: + table_name (VectorTableName, optional): The table to list indices for. + If None, defaults to VECTORS table. - table_name = self.collection.table.name + Returns: + List[dict]: List of indices with their properties - query = text( - f""" - WITH updated AS ( - UPDATE {self.project_name}."{table_name}" - SET collection_ids = array_remove(collection_ids, :collection_id) - WHERE :collection_id = ANY(collection_ids) - RETURNING 1 + Raises: + ArgError: If an invalid table name is provided + """ + if table_name == VectorTableName.VECTORS: + table_name_str = f"{self.project_name}.{VectorTableName.VECTORS}" + col_name = "vec" + elif table_name == VectorTableName.ENTITIES_DOCUMENT: + table_name_str = ( + f"{self.project_name}.{VectorTableName.ENTITIES_DOCUMENT}" ) - SELECT COUNT(*) AS affected_rows FROM updated - """ - ) + col_name = "description_embedding" + elif table_name == VectorTableName.ENTITIES_COLLECTION: + table_name_str = ( + f"{self.project_name}.{VectorTableName.ENTITIES_COLLECTION}" + ) + elif table_name == VectorTableName.COMMUNITIES: + table_name_str = ( + f"{self.project_name}.{VectorTableName.COMMUNITIES}" + ) + col_name = "embedding" + else: + raise ArgError("invalid table name") + + query = """ + SELECT + i.indexname as name, + i.indexdef as definition, + am.amname as method, + pg_relation_size(c.oid) as size_in_bytes, + COALESCE(psat.idx_scan, 0) as number_of_scans, + COALESCE(psat.idx_tup_read, 0) as tuples_read, + COALESCE(psat.idx_tup_fetch, 0) as tuples_fetched + FROM pg_indexes i + JOIN pg_class c ON c.relname = i.indexname + JOIN pg_am am ON c.relam = am.oid + LEFT JOIN pg_stat_user_indexes psat ON psat.indexrelname = i.indexname + AND psat.schemaname = i.schemaname + WHERE i.schemaname || '.' || i.tablename = $1 + AND i.indexdef LIKE $2; + """ - with self.vx.Session() as sess: - try: - result = sess.execute(query, {"collection_id": collection_id}) - row = result.one() - affected_rows = row.affected_rows - sess.commit() + results = await self.connection_manager.fetch_query( + query, (table_name_str, f"%({col_name}%") + ) - if affected_rows == 0: - logger.warning( - f"Collection {collection_id} not found in any documents." - ) - except NoResultFound: - raise ValueError( - f"Unexpected error: No result returned for collection {collection_id}" - ) - except SQLAlchemyError as e: - sess.rollback() - logger.error( - f"Error deleting collection {collection_id}: {str(e)}" - ) - raise + return [ + { + "name": result["name"], + "definition": result["definition"], + "method": result["method"], + "size_in_bytes": result["size_in_bytes"], + "number_of_scans": result["number_of_scans"], + "tuples_read": result["tuples_read"], + "tuples_fetched": result["tuples_fetched"], + } + for result in results + ] - def get_document_chunks( + async def delete_index( self, - document_id: str, - offset: int = 0, - limit: int = -1, - include_vectors: bool = False, - ) -> dict[str, Any]: - if not self.collection: - raise ValueError("Collection is not initialized.") - - limit_clause = f"LIMIT {limit}" if limit != -1 else "" - table_name = self.collection.table.name - - select_clause = "SELECT extraction_id, document_id, user_id, collection_ids, text, metadata" - if include_vectors: - select_clause += ", vec" - - query = text( - f""" - {select_clause}, COUNT(*) OVER() AS total - FROM {self.project_name}."{table_name}" - WHERE document_id = :document_id - ORDER BY CAST(metadata->>'chunk_order' AS INTEGER) - {limit_clause} OFFSET :offset + index_name: str, + table_name: Optional[VectorTableName] = None, + concurrently: bool = True, + ) -> None: """ - ) + Deletes a vector index. - params = {"document_id": document_id, "offset": offset} - if limit != -1: - params["limit"] = limit + Args: + index_name (str): Name of the index to delete + table_name (VectorTableName, optional): Table the index belongs to + concurrently (bool): Whether to drop the index concurrently - with self.vx.Session() as sess: - results = sess.execute(query, params).fetchall() + Raises: + ArgError: If table name is invalid or index doesn't exist + Exception: If index deletion fails + """ + # Validate table name and get column name + if table_name == VectorTableName.VECTORS: + table_name_str = f"{self.project_name}.{VectorTableName.VECTORS}" + col_name = "vec" + elif table_name == VectorTableName.ENTITIES_DOCUMENT: + table_name_str = ( + f"{self.project_name}.{VectorTableName.ENTITIES_DOCUMENT}" + ) + col_name = "description_embedding" + elif table_name == VectorTableName.ENTITIES_COLLECTION: + table_name_str = ( + f"{self.project_name}.{VectorTableName.ENTITIES_COLLECTION}" + ) + col_name = "description_embedding" + elif table_name == VectorTableName.COMMUNITIES: + table_name_str = ( + f"{self.project_name}.{VectorTableName.COMMUNITIES}" + ) + col_name = "embedding" + else: + raise ArgError("invalid table name") + + # Extract schema and base table name + schema_name, base_table_name = table_name_str.split(".") + + # Verify index exists and is a vector index + query = """ + SELECT indexdef + FROM pg_indexes + WHERE indexname = $1 + AND schemaname = $2 + AND tablename = $3 + AND indexdef LIKE $4 + """ - chunks = [] - total = 0 + result = await self.connection_manager.fetchrow_query( + query, (index_name, schema_name, base_table_name, f"%({col_name}%") + ) - if results: - total = results[0][-1] # Get the total count from the last column - chunks = [ - { - "extraction_id": result[0], - "document_id": result[1], - "user_id": result[2], - "collection_ids": result[3], - "text": result[4], - "metadata": result[5], - "vector": ( - json.loads(result[6]) if include_vectors else None - ), - } - for result in results - ] + if not result: + raise ArgError( + f"Vector index '{index_name}' does not exist on table {table_name_str}" + ) - return {"results": chunks, "total_entries": total} + # Drop the index + concurrently_sql = "CONCURRENTLY" if concurrently else "" + drop_query = ( + f"DROP INDEX {concurrently_sql} {schema_name}.{index_name}" + ) - def close(self) -> None: - if self.vx: - with self.vx.Session() as sess: - sess.close() - if sess.bind: - sess.bind.dispose() # type: ignore + try: + if concurrently: + async with ( + self.connection_manager.pool.get_connection() as conn # type: ignore + ): + # Disable automatic transaction management + await conn.execute( + "SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL READ COMMITTED" + ) + await conn.execute(drop_query) + else: + await self.connection_manager.execute_query(drop_query) + except Exception as e: + raise Exception(f"Failed to delete index: {e}") + + async def get_semantic_neighbors( + self, + document_id: UUID, + chunk_id: UUID, + limit: int = 10, + similarity_threshold: float = 0.5, + ) -> list[dict[str, Any]]: + + table_name = self._get_table_name(PostgresVectorHandler.TABLE_NAME) + query = f""" + WITH target_vector AS ( + SELECT vec FROM {table_name} + WHERE document_id = $1 AND extraction_id = $2 + ) + SELECT t.extraction_id, t.text, t.metadata, t.document_id, (t.vec <=> tv.vec) AS similarity + FROM {table_name} t, target_vector tv + WHERE (t.vec <=> tv.vec) >= $3 + AND t.document_id = $1 + AND t.extraction_id != $2 + ORDER BY similarity ASC + LIMIT $4 + """ + results = await self.connection_manager.fetch_query( + query, + (str(document_id), str(chunk_id), similarity_threshold, limit), + ) - logger.info("Closed PGVectorDB connection.") + return [ + { + "extraction_id": str(r["extraction_id"]), + "text": r["text"], + "metadata": json.loads(r["metadata"]), + "document_id": str(r["document_id"]), + "similarity": float(r["similarity"]), + } + for r in results + ] + + def _get_index_options( + self, + method: IndexMethod, + index_arguments: Optional[Union[IndexArgsIVFFlat, IndexArgsHNSW]], + ) -> str: + if method == IndexMethod.ivfflat: + if isinstance(index_arguments, IndexArgsIVFFlat): + return f"WITH (lists={index_arguments.n_lists})" + else: + # Default value if no arguments provided + return "WITH (lists=100)" + elif method == IndexMethod.hnsw: + if isinstance(index_arguments, IndexArgsHNSW): + return f"WITH (m={index_arguments.m}, ef_construction={index_arguments.ef_construction})" + else: + # Default values if no arguments provided + return "WITH (m=16, ef_construction=64)" + else: + return "" # No options for other methods + + def _get_index_type(self, method: IndexMethod) -> str: + if method == IndexMethod.ivfflat: + return "ivfflat" + elif method == IndexMethod.hnsw: + return "hnsw" + elif method == IndexMethod.auto: + # Here you might want to implement logic to choose between ivfflat and hnsw + return "hnsw" + + def _get_index_operator(self, measure: IndexMeasure) -> str: + if measure == IndexMeasure.l2_distance: + return "vector_l2_ops" + elif measure == IndexMeasure.max_inner_product: + return "vector_ip_ops" + elif measure == IndexMeasure.cosine_distance: + return "vector_cosine_ops" + + def _get_distance_function(self, imeasure_obj: IndexMeasure) -> str: + if imeasure_obj == IndexMeasure.cosine_distance: + return "<=>" + elif imeasure_obj == IndexMeasure.l2_distance: + return "l2_distance" + elif imeasure_obj == IndexMeasure.max_inner_product: + return "max_inner_product" diff --git a/py/core/providers/file/postgres.py b/py/core/providers/file/postgres.py index 56627b484..9858132ee 100644 --- a/py/core/providers/file/postgres.py +++ b/py/core/providers/file/postgres.py @@ -1,51 +1,38 @@ import io import logging -from typing import BinaryIO, Optional +from typing import BinaryIO, Optional, Union from uuid import UUID import asyncpg from core.base import FileConfig, R2RException from core.base.providers import FileProvider -from core.providers.database.postgres import PostgresDBProvider +from core.providers.database.postgres import ( + PostgresDBProvider, + SemaphoreConnectionPool, +) logger = logging.getLogger() +# Refactor this to be a `PostgresFileHandler` class PostgresFileProvider(FileProvider): def __init__(self, config: FileConfig, db_provider: PostgresDBProvider): super().__init__(config) self.config: FileConfig = config self.db_provider = db_provider - self.pool = None - - async def __aenter__(self): - await self.initialize() - return self - - async def __aexit__(self, exc_type, exc, tb): - await self._close_connection() - - async def _close_connection(self): - if self.pool: - await self.pool.close() - self.pool = None + self.pool: Optional[SemaphoreConnectionPool] = None # Initialize pool async def initialize(self): - self.pool = await asyncpg.create_pool( - self.db_provider.connection_string - ) - logger.info( - "File provider successfully connected to Postgres database." - ) - - async with self.pool.acquire() as conn: + self.pool = self.db_provider.pool + + async with self.pool.get_connection() as conn: await conn.execute('CREATE EXTENSION IF NOT EXISTS "lo";') await self.create_table() def _get_table_name(self, base_name: str) -> str: - return self.db_provider._get_table_name(base_name) + return f"{self.db_provider.project_name}.{base_name}" async def create_table(self): query = f""" @@ -59,7 +46,7 @@ async def create_table(self): updated_at TIMESTAMPTZ DEFAULT NOW() ); """ - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): await conn.execute(query) @@ -88,7 +75,7 @@ async def upsert_file( file_type = EXCLUDED.file_type, updated_at = NOW(); """ - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): await conn.execute( query, @@ -109,7 +96,7 @@ async def store_file( ) file_size = file_content.getbuffer().nbytes - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): oid = await conn.fetchval("SELECT lo_create(0)") await self._write_lobject(conn, oid, file_content) @@ -159,7 +146,7 @@ async def retrieve_file( FROM {self._get_table_name('file_storage')} WHERE document_id = $1 """ - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): result = await conn.fetchrow(query, document_id) if not result: @@ -231,7 +218,7 @@ async def delete_file(self, document_id: UUID) -> bool: SELECT file_oid FROM {self._get_table_name('file_storage')} WHERE document_id = $1 """ - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): result = await conn.fetchval(query, document_id) if not result: @@ -267,7 +254,7 @@ async def get_files_overview( ) conditions = [] - params = [] + params: list[Union[str, list[str], int]] = [] query = f""" SELECT document_id, file_name, file_oid, file_size, file_type, created_at, updated_at FROM {self._get_table_name('file_storage')} @@ -287,7 +274,7 @@ async def get_files_overview( query += f" ORDER BY created_at DESC OFFSET ${len(params) + 1} LIMIT ${len(params) + 2}" params.extend([offset, limit]) - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): results = await conn.fetch(query, *params) diff --git a/py/core/providers/kg/postgres.py b/py/core/providers/kg/postgres.py index fe0ea3366..e97f7e5e7 100644 --- a/py/core/providers/kg/postgres.py +++ b/py/core/providers/kg/postgres.py @@ -1,7 +1,7 @@ import json import logging import time -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple from uuid import UUID import asyncpg @@ -17,10 +17,16 @@ KGProvider, Triple, ) -from shared.abstractions import KGCreationSettings, KGEnrichmentSettings +from shared.abstractions import ( + KGCreationSettings, + KGEnrichmentSettings, + KGEntityDeduplicationSettings, +) +from shared.abstractions.graph import EntityLevel from shared.abstractions.vector import VectorQuantizationType from shared.api.models.kg.responses import ( KGCreationEstimationResponse, + KGDeduplicationEstimationResponse, KGEnrichmentEstimationResponse, ) from shared.utils import _decorate_vector_type, llm_cost_per_million_tokens @@ -28,6 +34,7 @@ logger = logging.getLogger() +# TODO - Refactor this to `PostgresKGHandler` class PostgresKGProvider(KGProvider): def __init__( @@ -40,7 +47,7 @@ def __init__( ) -> None: super().__init__(config, *args, **kwargs) - self.db_provider = db_provider.relational + self.db_provider = db_provider self.embedding_provider = embedding_provider try: @@ -52,6 +59,9 @@ def __init__( "NetworkX is not installed. Please install it to use this module." ) from exc + def _get_table_name(self, base_name: str) -> str: + return f"{self.db_provider.project_name}.{base_name}" + async def initialize(self): logger.info( f"Initializing PostgresKGProvider for project {self.db_provider.project_name}" @@ -64,7 +74,9 @@ async def initialize(self): async def execute_query( self, query: str, params: Optional[list[Any]] = None ) -> Any: - return await self.db_provider.execute_query(query, params) + return await self.db_provider.connection_manager.execute_query( + query, params + ) async def execute_many( self, @@ -72,17 +84,18 @@ async def execute_many( params: Optional[list[tuple[Any]]] = None, batch_size: int = 1000, ) -> Any: - return await self.db_provider.execute_many(query, params, batch_size) + return await self.db_provider.connection_manager.execute_many( + query, params, batch_size + ) async def fetch_query( self, query: str, params: Optional[Any] = None, # TODO: make this strongly typed ) -> Any: - return await self.db_provider.fetch_query(query, params) - - def _get_table_name(self, base_name: str) -> str: - return self.db_provider._get_table_name(base_name) + return await self.db_provider.connection_manager.fetch_query( + query, params + ) async def create_tables( self, embedding_dim: int, quantization_type: VectorQuantizationType @@ -95,7 +108,7 @@ async def create_tables( ) query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name("entity_raw")} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name("chunk_entity")} ( id SERIAL PRIMARY KEY, category TEXT NOT NULL, name TEXT NOT NULL, @@ -109,7 +122,7 @@ async def create_tables( # raw triples table, also the final table. this will have embeddings. query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name("triple_raw")} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name("chunk_triple")} ( id SERIAL PRIMARY KEY, subject TEXT NOT NULL, predicate TEXT NOT NULL, @@ -126,7 +139,7 @@ async def create_tables( # embeddings tables query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name("entity_embedding")} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name("document_entity")} ( id SERIAL PRIMARY KEY, name TEXT NOT NULL, description TEXT NOT NULL, @@ -139,9 +152,25 @@ async def create_tables( await self.execute_query(query) + # deduplicated entities table + query = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name("collection_entity")} ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + description TEXT, + extraction_ids UUID[] NOT NULL, + document_ids UUID[] NOT NULL, + collection_id UUID NOT NULL, + description_embedding {vector_column_str}, + attributes JSONB, + UNIQUE (name, collection_id, attributes) + );""" + + await self.execute_query(query) + # communities table, result of the Leiden algorithm query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name("community")} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name("community_info")} ( id SERIAL PRIMARY KEY, node TEXT NOT NULL, cluster INT NOT NULL, @@ -174,39 +203,53 @@ async def create_tables( await self.execute_query(query) async def _add_objects( - self, objects: list[Any], table_name: str + self, + objects: list[Any], + table_name: str, + conflict_columns: list[str] = [], ) -> asyncpg.Record: """ Upsert objects into the specified table. """ # Get non-null attributes from the first object - non_null_attrs = { - k: v for k, v in objects[0].__dict__.items() if v is not None - } + non_null_attrs = {k: v for k, v in objects[0].items() if v is not None} columns = ", ".join(non_null_attrs.keys()) placeholders = ", ".join(f"${i+1}" for i in range(len(non_null_attrs))) + if conflict_columns: + conflict_columns_str = ", ".join(conflict_columns) + replace_columns_str = ", ".join( + f"{column} = EXCLUDED.{column}" + for column in non_null_attrs.keys() + ) + on_conflict_query = f"ON CONFLICT ({conflict_columns_str}) DO UPDATE SET {replace_columns_str}" + else: + on_conflict_query = "" + QUERY = f""" INSERT INTO {self._get_table_name(table_name)} ({columns}) VALUES ({placeholders}) + {on_conflict_query} """ # Filter out null values for each object params = [ tuple( - json.dumps(v) if isinstance(v, dict) else v - for v in obj.__dict__.values() + (json.dumps(v) if isinstance(v, dict) else v) + for v in obj.values() if v is not None ) for obj in objects ] + return await self.execute_many(QUERY, params) # type: ignore async def add_entities( self, entities: list[Entity], table_name: str, + conflict_columns: list[str] = [], ) -> asyncpg.Record: """ Upsert entities into the entities_raw table. These are raw entities extracted from the document. @@ -218,21 +261,32 @@ async def add_entities( Returns: result: asyncpg.Record: result of the upsert operation """ + cleaned_entities = [] for entity in entities: - if entity.description_embedding is not None: - entity.description_embedding = str( # type: ignore - entity.description_embedding - ) + entity_dict = entity.to_dict() + entity_dict["extraction_ids"] = ( + entity_dict["extraction_ids"] + if entity_dict.get("extraction_ids") + else [] + ) + entity_dict["description_embedding"] = ( + str(entity_dict["description_embedding"]) + if entity_dict.get("description_embedding") + else None + ) + cleaned_entities.append(entity_dict) - return await self._add_objects(entities, table_name) + return await self._add_objects( + cleaned_entities, table_name, conflict_columns + ) async def add_triples( self, triples: list[Triple], - table_name: str = "triple_raw", + table_name: str = "chunk_triple", ) -> None: """ - Upsert triples into the triple_raw table. These are raw triples extracted from the document. + Upsert triples into the chunk_triple table. These are raw triples extracted from the document. Args: triples: list[Triple]: list of triples to upsert @@ -241,19 +295,21 @@ async def add_triples( Returns: result: asyncpg.Record: result of the upsert operation """ - return await self._add_objects(triples, table_name) + return await self._add_objects( + [ele.to_dict() for ele in triples], table_name + ) async def add_kg_extractions( self, kg_extractions: list[KGExtraction], - table_suffix: str = "_raw", + table_prefix: str = "chunk_", ) -> Tuple[int, int]: """ Upsert entities and triples into the database. These are raw entities and triples extracted from the document fragments. Args: kg_extractions: list[KGExtraction]: list of KG extractions to upsert - table_suffix: str: suffix to add to the table names + table_prefix: str: prefix to add to the table names Returns: total_entities: int: total number of entities upserted @@ -281,7 +337,7 @@ async def add_kg_extractions( await self.add_entities( extraction.entities, - table_name="entity" + table_suffix, + table_name=table_prefix + "entity", ) if extraction.triples: @@ -294,7 +350,7 @@ async def add_kg_extractions( await self.add_triples( extraction.triples, - table_name="triple" + table_suffix, + table_name=table_prefix + "triple", ) return (total_entities, total_relationships) @@ -306,7 +362,7 @@ async def get_entity_map( QUERY1 = f""" WITH entities_list AS ( SELECT DISTINCT name - FROM {self._get_table_name("entity_raw")} + FROM {self._get_table_name("chunk_entity")} WHERE document_id = $1 ORDER BY name ASC LIMIT {limit} OFFSET {offset} @@ -314,7 +370,7 @@ async def get_entity_map( SELECT e.name, e.description, e.category, (SELECT array_agg(DISTINCT x) FROM unnest(e.extraction_ids) x) AS extraction_ids, e.document_id - FROM {self._get_table_name("entity_raw")} e + FROM {self._get_table_name("chunk_entity")} e JOIN entities_list el ON e.name = el.name GROUP BY e.name, e.description, e.category, e.extraction_ids, e.document_id ORDER BY e.name;""" @@ -335,7 +391,7 @@ async def get_entity_map( WITH entities_list AS ( SELECT DISTINCT name - FROM {self._get_table_name("entity_raw")} + FROM {self._get_table_name("chunk_entity")} WHERE document_id = $1 ORDER BY name ASC LIMIT {limit} OFFSET {offset} @@ -343,7 +399,7 @@ async def get_entity_map( SELECT DISTINCT t.subject, t.predicate, t.object, t.weight, t.description, (SELECT array_agg(DISTINCT x) FROM unnest(t.extraction_ids) x) AS extraction_ids, t.document_id - FROM {self._get_table_name("triple_raw")} t + FROM {self._get_table_name("chunk_triple")} t JOIN entities_list el ON t.subject = el.name ORDER BY t.subject, t.predicate, t.object; """ @@ -409,13 +465,18 @@ async def vector_query(self, query: str, **kwargs: Any) -> Any: embedding_type = kwargs.get("embedding_type", "description_embedding") property_names = kwargs.get("property_names", ["name", "description"]) filters = kwargs.get("filters", {}) + entities_level = kwargs.get("entities_level", EntityLevel.DOCUMENT) limit = kwargs.get("limit", 10) table_name = "" if search_type == "__Entity__": - table_name = "entity_embedding" + table_name = ( + "collection_entity" + if entities_level == EntityLevel.COLLECTION + else "document_entity" + ) elif search_type == "__Relationship__": - table_name = "triple_raw" + table_name = "chunk_triple" elif search_type == "__Community__": table_name = "community_report" else: @@ -476,14 +537,14 @@ async def get_all_triples(self, collection_id: UUID) -> List[Triple]: document_ids = [doc_id["document_id"] for doc_id in document_ids] QUERY = f""" - SELECT id, subject, predicate, weight, object FROM {self._get_table_name("triple_raw")} WHERE document_id = ANY($1) + SELECT id, subject, predicate, weight, object FROM {self._get_table_name("chunk_triple")} WHERE document_id = ANY($1) """ triples = await self.fetch_query(QUERY, [document_ids]) return [Triple(**triple) for triple in triples] async def add_communities(self, communities: List[Any]) -> None: QUERY = f""" - INSERT INTO {self._get_table_name("community")} (node, cluster, parent_cluster, level, is_final_cluster, triple_ids, collection_id) + INSERT INTO {self._get_table_name("community_info")} (node, cluster, parent_cluster, level, is_final_cluster, triple_ids, collection_id) VALUES ($1, $2, $3, $4, $5, $6, $7) """ await self.execute_many(QUERY, communities) @@ -683,45 +744,61 @@ async def _compute_leiden_communities( raise ImportError("Please install the graspologic package.") from e async def get_community_details( - self, community_number: int + self, community_number: int, collection_id: UUID ) -> Tuple[int, List[Dict[str, Any]], List[Dict[str, Any]]]: QUERY = f""" - SELECT level FROM {self._get_table_name("community")} WHERE cluster = $1 + SELECT level FROM {self._get_table_name("community_info")} WHERE cluster = $1 AND collection_id = $2 LIMIT 1 """ - level = (await self.fetch_query(QUERY, [community_number]))[0]["level"] + level = ( + await self.fetch_query(QUERY, [community_number, collection_id]) + )[0]["level"] + + # selecting table name based on entity level + # check if there are any entities in the community that are not in the entity_embedding table + query = f""" + SELECT COUNT(*) FROM {self._get_table_name("collection_entity")} WHERE collection_id = $1 + """ + entity_count = (await self.fetch_query(query, [collection_id]))[0][ + "count" + ] + table_name = ( + "collection_entity" if entity_count > 0 else "document_entity" + ) QUERY = f""" WITH node_triple_ids AS ( - SELECT node, triple_ids - FROM {self._get_table_name("community")} - WHERE cluster = $1 + FROM {self._get_table_name("community_info")} + WHERE cluster = $1 AND collection_id = $2 ) SELECT DISTINCT e.id AS id, e.name AS name, e.description AS description FROM node_triple_ids nti - JOIN {self._get_table_name("entity_embedding")} e ON e.name = nti.node; + JOIN {self._get_table_name(table_name)} e ON e.name = nti.node; """ - entities = await self.fetch_query(QUERY, [community_number]) + entities = await self.fetch_query( + QUERY, [community_number, collection_id] + ) entities = [Entity(**entity) for entity in entities] QUERY = f""" WITH node_triple_ids AS ( - SELECT node, triple_ids - FROM {self._get_table_name("community")} - WHERE cluster = $1 + FROM {self._get_table_name("community_info")} + WHERE cluster = $1 and collection_id = $2 ) SELECT DISTINCT t.id, t.subject, t.predicate, t.object, t.weight, t.description FROM node_triple_ids nti - JOIN {self._get_table_name("triple_raw")} t ON t.id = ANY(nti.triple_ids); + JOIN {self._get_table_name("chunk_triple")} t ON t.id = ANY(nti.triple_ids); """ - triples = await self.fetch_query(QUERY, [community_number]) + triples = await self.fetch_query( + QUERY, [community_number, collection_id] + ) triples = [Triple(**triple) for triple in triples] return level, entities, triples @@ -763,23 +840,40 @@ async def delete_graph_for_collection( return # remove all triples for these documents. - QUERY = f""" - DELETE FROM {self._get_table_name("community")} WHERE collection_id = $1; - DELETE FROM {self._get_table_name("community_report")} WHERE collection_id = $1; - """ + DELETE_QUERIES = [ + f"DELETE FROM {self._get_table_name('community_info')} WHERE collection_id = $1;", + f"DELETE FROM {self._get_table_name('community_report')} WHERE collection_id = $1;", + ] - document_ids = await self.db_provider.documents_in_collection( + document_ids_response = await self.db_provider.documents_in_collection( collection_id ) + # This type ignore is due to insufficient typing of the documents_in_collection method + document_ids = [doc.id for doc in document_ids_response["results"]] # type: ignore + + # TODO: make these queries more efficient. Pass the document_ids as params. if cascade: - QUERY += f""" - DELETE FROM {self._get_table_name("entity_raw")} WHERE document_id = ANY($1); - DELETE FROM {self._get_table_name("triple_raw")} WHERE document_id = ANY($1); - DELETE FROM {self._get_table_name("entity_embedding")} WHERE document_id = ANY($1); + DELETE_QUERIES += [ + f"DELETE FROM {self._get_table_name('chunk_entity')} WHERE document_id = ANY($1::uuid[]);", + f"DELETE FROM {self._get_table_name('chunk_triple')} WHERE document_id = ANY($1::uuid[]);", + f"DELETE FROM {self._get_table_name('document_entity')} WHERE document_id = ANY($1::uuid[]);", + f"DELETE FROM {self._get_table_name('collection_entity')} WHERE collection_id = $1;", + ] + + # setting the kg_creation_status to PENDING for this collection. + QUERY = f""" + UPDATE {self._get_table_name("document_info")} SET kg_extraction_status = $1 WHERE $2::uuid = ANY(collection_ids) """ + await self.execute_query( + QUERY, [KGExtractionStatus.PENDING, collection_id] + ) - await self.execute_query(QUERY, [document_ids]) + for query in DELETE_QUERIES: + if "community" in query or "collection_entity" in query: + await self.execute_query(query, [collection_id]) + else: + await self.execute_query(query, [document_ids]) # set status to PENDING for this collection. QUERY = f""" @@ -804,9 +898,9 @@ async def delete_node_via_document_id( # Execute separate DELETE queries delete_queries = [ - f"DELETE FROM {self._get_table_name('entity_raw')} WHERE document_id = $1", - f"DELETE FROM {self._get_table_name('triple_raw')} WHERE document_id = $1", - f"DELETE FROM {self._get_table_name('entity_embedding')} WHERE document_id = $1", + f"DELETE FROM {self._get_table_name('chunk_entity')} WHERE document_id = $1", + f"DELETE FROM {self._get_table_name('chunk_triple')} WHERE document_id = $1", + f"DELETE FROM {self._get_table_name('document_entity')} WHERE document_id = $1", ] for query in delete_queries: @@ -821,7 +915,7 @@ async def delete_node_via_document_id( if count == 0: # If it's the last document, delete collection-related data collection_queries = [ - f"DELETE FROM {self._get_table_name('community')} WHERE collection_id = $1", + f"DELETE FROM {self._get_table_name('community_info')} WHERE collection_id = $1", f"DELETE FROM {self._get_table_name('community_report')} WHERE collection_id = $1", ] for query in collection_queries: @@ -849,7 +943,7 @@ async def get_existing_entity_extraction_ids( self, document_id: UUID ) -> list[str]: QUERY = f""" - SELECT DISTINCT unnest(extraction_ids) AS extraction_id FROM {self._get_table_name("entity_raw")} WHERE document_id = $1 + SELECT DISTINCT unnest(extraction_ids) AS extraction_id FROM {self._get_table_name("chunk_entity")} WHERE document_id = $1 """ extraction_ids = [ item["extraction_id"] @@ -865,16 +959,13 @@ async def get_creation_estimate( document_ids = [ doc.id for doc in ( - await self.db_provider.documents_in_collection(collection_id) + await self.db_provider.documents_in_collection(collection_id) # type: ignore )["results"] ] - # TODO: Vecs schema naming got messed up somewhere. - schema_name = self._get_table_name("document_chunks").split(".")[0] - query = f""" SELECT document_id, COUNT(*) as chunk_count - FROM {schema_name}.{schema_name} + FROM {self._get_table_name("vectors")} WHERE document_id = ANY($1) GROUP BY document_id """ @@ -951,12 +1042,12 @@ async def get_enrichment_estimate( document_ids = [ doc.id for doc in ( - await self.db_provider.documents_in_collection(collection_id) + await self.db_provider.documents_in_collection(collection_id) # type: ignore )["results"] ] QUERY = f""" - SELECT COUNT(*) FROM {self._get_table_name("entity_embedding")} WHERE document_id = ANY($1); + SELECT COUNT(*) FROM {self._get_table_name("document_entity")} WHERE document_id = ANY($1); """ entity_count = (await self.fetch_query(QUERY, [document_ids]))[0][ "count" @@ -968,7 +1059,7 @@ async def get_enrichment_estimate( ) QUERY = f""" - SELECT COUNT(*) FROM {self._get_table_name("triple_raw")} WHERE document_id = ANY($1); + SELECT COUNT(*) FROM {self._get_table_name("chunk_triple")} WHERE document_id = ANY($1); """ triple_count = (await self.fetch_query(QUERY, [document_ids]))[0][ "count" @@ -1028,9 +1119,10 @@ async def get_entities( self, collection_id: UUID, offset: int = 0, - limit: int = 100, + limit: int = -1, entity_ids: Optional[List[str]] = None, - entity_table_name: str = "entity_embedding", + entity_names: Optional[List[str]] = None, + entity_table_name: str = "document_entity", ) -> dict: conditions = [] params: list = [collection_id] @@ -1039,9 +1131,32 @@ async def get_entities( conditions.append(f"id = ANY(${len(params) + 1})") params.append(entity_ids) - params.extend([offset, limit]) + if entity_names: + conditions.append(f"name = ANY(${len(params) + 1})") + params.append(entity_names) - query = f""" + if limit != -1: + params.extend([offset, limit]) + offset_limit_clause = ( + f"OFFSET ${len(params) - 1} LIMIT ${len(params)}" + ) + else: + params.append(offset) + offset_limit_clause = f"OFFSET ${len(params)}" + + if entity_table_name == "collection_entity": + # entity deduplicated table has document_ids, not document_id. + # we directly use the collection_id to get the entities list. + query = f""" + SELECT id, name, description, extraction_ids, document_ids + FROM {self._get_table_name(entity_table_name)} + WHERE collection_id = $1 + {" AND " + " AND ".join(conditions) if conditions else ""} + ORDER BY id + {offset_limit_clause} + """ + else: + query = f""" SELECT id, name, description, extraction_ids, document_id FROM {self._get_table_name(entity_table_name)} WHERE document_id = ANY( @@ -1050,8 +1165,9 @@ async def get_entities( ) {" AND " + " AND ".join(conditions) if conditions else ""} ORDER BY id - OFFSET ${len(params) - 1} LIMIT ${len(params)} + {offset_limit_clause} """ + results = await self.fetch_query(query, params) entities = [Entity(**entity) for entity in results] @@ -1085,7 +1201,7 @@ async def get_triples( query = f""" SELECT id, subject, predicate, object, description - FROM {self._get_table_name("triple_raw")} + FROM {self._get_table_name("chunk_triple")} WHERE document_id = ANY( SELECT document_id FROM {self._get_table_name("document_info")} WHERE $1 = ANY(collection_ids) @@ -1121,7 +1237,7 @@ async def get_entity_count( collection_id: Optional[UUID] = None, document_id: Optional[UUID] = None, distinct: bool = False, - entity_table_name: str = "entity_embedding", + entity_table_name: str = "document_entity", ) -> int: if collection_id is None and document_id is None: raise ValueError( @@ -1131,19 +1247,31 @@ async def get_entity_count( conditions = [] params = [] - if collection_id: - conditions.append( - f""" - document_id = ANY( - SELECT document_id FROM {self._get_table_name("document_info")} - WHERE $1 = ANY(collection_ids) + if entity_table_name == "collection_entity": + + if document_id: + raise ValueError( + "document_id is not supported for collection_entity table" ) - """ - ) - params.append(str(collection_id)) + + if collection_id: + conditions.append("collection_id = $1") + params.append(str(collection_id)) + else: - conditions.append("document_id = $1") - params.append(str(document_id)) + if collection_id: + conditions.append( + f""" + document_id = ANY( + SELECT document_id FROM {self._get_table_name("document_info")} + WHERE $1 = ANY(collection_ids) + ) + """ + ) + params.append(str(collection_id)) + else: + conditions.append("document_id = $1") + params.append(str(document_id)) if distinct: count_value = "DISTINCT name" @@ -1184,7 +1312,84 @@ async def get_triple_count( params.append(str(document_id)) QUERY = f""" - SELECT COUNT(*) FROM {self._get_table_name("triple_raw")} + SELECT COUNT(*) FROM {self._get_table_name("chunk_triple")} WHERE {" AND ".join(conditions)} """ return (await self.fetch_query(QUERY, params))[0]["count"] + + async def update_entity_descriptions(self, entities: list[Entity]): + + query = f""" + UPDATE {self._get_table_name("collection_entity")} + SET description = $3, description_embedding = $4 + WHERE name = $1 AND collection_id = $2 + """ + + inputs = [ + ( + entity.name, + entity.collection_id, + entity.description, + entity.description_embedding, + ) + for entity in entities + ] + + await self.execute_many(query, inputs) # type: ignore + + async def get_deduplication_estimate( + self, + collection_id: UUID, + kg_deduplication_settings: KGEntityDeduplicationSettings, + ): + # number of documents in collection + query = f""" + SELECT name, count(name) + FROM {self._get_table_name("entity_embedding")} + WHERE document_id = ANY( + SELECT document_id FROM {self._get_table_name("document_info")} + WHERE $1 = ANY(collection_ids) + ) + GROUP BY name + HAVING count(name) >= 5 + """ + entities = await self.fetch_query(query, [collection_id]) + num_entities = len(entities) + + estimated_llm_calls = (num_entities, num_entities) + estimated_total_in_out_tokens_in_millions = ( + estimated_llm_calls[0] * 1000 / 1000000, + estimated_llm_calls[1] * 5000 / 1000000, + ) + estimated_cost_in_usd = ( + estimated_total_in_out_tokens_in_millions[0] + * llm_cost_per_million_tokens( + kg_deduplication_settings.generation_config.model + ), + estimated_total_in_out_tokens_in_millions[1] + * llm_cost_per_million_tokens( + kg_deduplication_settings.generation_config.model + ), + ) + + estimated_total_time_in_minutes = ( + estimated_total_in_out_tokens_in_millions[0] * 10 / 60, + estimated_total_in_out_tokens_in_millions[1] * 10 / 60, + ) + + return KGDeduplicationEstimationResponse( + message='Ran Deduplication Estimate (not the actual run). Note that these are estimated ranges, actual values may vary. To run the Deduplication process, run `deduplicate-entities` with `--run` in the cli, or `run_type="run"` in the client.', + num_entities=num_entities, + estimated_llm_calls=self._get_str_estimation_output( + estimated_llm_calls + ), + estimated_total_in_out_tokens_in_millions=self._get_str_estimation_output( + estimated_total_in_out_tokens_in_millions + ), + estimated_cost_in_usd=self._get_str_estimation_output( + estimated_cost_in_usd + ), + estimated_total_time_in_minutes=self._get_str_estimation_output( + estimated_total_time_in_minutes + ), + ) diff --git a/py/core/providers/prompts/defaults/chunk_enrichment.yaml b/py/core/providers/prompts/defaults/chunk_enrichment.yaml new file mode 100644 index 000000000..54c72807c --- /dev/null +++ b/py/core/providers/prompts/defaults/chunk_enrichment.yaml @@ -0,0 +1,27 @@ +chunk_enrichment: + template: > + ## Task: + + Enrich and refine the given chunk of text using information from the provided context chunks. The goal is to make the chunk more precise and self-contained. + + ## Context Chunks: + {context_chunks} + + ## Chunk to Enrich: + {chunk} + + ## Instructions: + 1. Rewrite the chunk in third person. + 2. Replace all common nouns with appropriate proper nouns. Use specific names, titles, or identifiers instead of general terms. + 3. Use information from the context chunks to enhance the clarity and precision of the given chunk. + 4. Ensure the enriched chunk remains independent and self-contained. + 5. Do not incorporate specific information or details from other chunks into this one. + 6. Focus on making the chunk more informative and precise within its own scope. + 7. Maintain the original meaning and intent of the chunk while improving its clarity and usefulness. + 8. Just output the enriched chunk. Do not include any other text. + + ## Enriched Chunk: + + input_types: + chunk: str + context_chunks: str diff --git a/py/core/providers/prompts/defaults/graphrag_entity_deduplication.yaml b/py/core/providers/prompts/defaults/graphrag_entity_deduplication.yaml new file mode 100644 index 000000000..41412c2dc --- /dev/null +++ b/py/core/providers/prompts/defaults/graphrag_entity_deduplication.yaml @@ -0,0 +1,24 @@ +graphrag_entity_deduplication: + template: | + You are an expert at deduplicating entity descriptions. You are given a list of entity descriptions and you need to merge them into a single description. + + Entity Name: + {entity_name} + + Entity Descriptions: + {entity_descriptions} + + Your summary should: + 1. Clearly define the entity's core concept or purpose. + 2. Integrate any relevant information from the existing description. + 3. Maintain a neutral, factual tone. + 4. Make sure that all information from the original descriptions is included, but not repeated. + 5. Do not hallucinate any information, you can only use the information provided. + + Return the summary in the following format. Do not output anything else. + + $$$$ + + input_types: + entity_name: str + entity_descriptions: str diff --git a/py/core/providers/prompts/defaults/prompt_tuning.yaml b/py/core/providers/prompts/defaults/prompt_tuning.yaml new file mode 100644 index 000000000..7f2f3d687 --- /dev/null +++ b/py/core/providers/prompts/defaults/prompt_tuning.yaml @@ -0,0 +1,25 @@ +prompt_tuning_task: + template: | + You are an expert prompt tuner whose task is to adjust the current prompt template to better suit the specified domain and specialization. + Current prompt template to be tuned: + ``` + {prompt_template} + ``` + + The current prompt has these input types: + {input_types} + + And here is a sample of the data that will be used with this prompt: + {sample_data} + + Please generate a new version of this prompt that: + 1. Maintains the same basic structure and required input types + 2. Is specialized for the given domain and specialization + 3. Emphasizes the specified relationships + 4. Pays special attention to the specific fields + 5. Maintains clarity and conciseness + + Return only the new prompt template, maintaining the exact format required for the input types. + input_types: + prompt_template: str + input_types: str diff --git a/py/core/providers/prompts/r2r_prompts.py b/py/core/providers/prompts/r2r_prompts.py index 68b53702d..564d18fa4 100644 --- a/py/core/providers/prompts/r2r_prompts.py +++ b/py/core/providers/prompts/r2r_prompts.py @@ -9,6 +9,7 @@ from core.base import DatabaseProvider, Prompt, PromptConfig, PromptProvider from core.base.utils import generate_default_prompt_id +from core.providers.database.postgres import SemaphoreConnectionPool logger = logging.getLogger() @@ -19,7 +20,7 @@ def __init__(self, config: PromptConfig, db_provider: DatabaseProvider): self.prompts: dict[str, Prompt] = {} self.config: PromptConfig = config self.db_provider = db_provider - self.pool: Optional[asyncpg.pool.Pool] = None # Initialize pool + self.pool: Optional[SemaphoreConnectionPool] = None # Initialize pool async def __aenter__(self): await self.initialize() @@ -35,14 +36,9 @@ async def _close_connection(self): async def initialize(self): try: - self.pool = await asyncpg.create_pool( - self.db_provider.connection_string - ) - logger.info( - "R2RPromptProvider successfully connected to Postgres database." - ) + self.pool = self.db_provider.pool - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: await conn.execute('CREATE EXTENSION IF NOT EXISTS "lo";') await self.create_table() @@ -53,7 +49,7 @@ async def initialize(self): raise def _get_table_name(self, base_name: str) -> str: - return self.db_provider._get_table_name(base_name) + return f"{self.db_provider.project_name}.{base_name}" async def create_table(self): query = f""" @@ -78,7 +74,7 @@ async def execute_query( ) -> Any: if not self.pool: raise ConnectionError("Database pool is not initialized.") - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): if params: return await conn.execute(query, *params) @@ -89,7 +85,7 @@ async def fetch_query( ) -> Any: if not self.pool: raise ConnectionError("Database pool is not initialized.") - async with self.pool.acquire() as conn: + async with self.pool.get_connection() as conn: async with conn.transaction(): return ( await conn.fetch(query, *params) diff --git a/py/core/utils/__init__.py b/py/core/utils/__init__.py index 4a98fc4b4..2c7b88bcf 100644 --- a/py/core/utils/__init__.py +++ b/py/core/utils/__init__.py @@ -8,7 +8,6 @@ generate_default_user_collection_id, generate_document_id, generate_extraction_id, - generate_message_id, generate_run_id, generate_user_id, increment_version, @@ -29,7 +28,6 @@ "generate_run_id", "generate_document_id", "generate_extraction_id", - "generate_message_id", "generate_collection_id_from_name", "generate_user_id", "increment_version", diff --git a/py/migrations/README b/py/migrations/README new file mode 100644 index 000000000..2500aa1bc --- /dev/null +++ b/py/migrations/README @@ -0,0 +1 @@ +Generic single-database configuration. diff --git a/py/migrations/env.py b/py/migrations/env.py new file mode 100644 index 000000000..47e0d1559 --- /dev/null +++ b/py/migrations/env.py @@ -0,0 +1,89 @@ +import os +from logging.config import fileConfig + +from alembic import context +from sqlalchemy import engine_from_config, pool, text + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = None + + +def get_schema_name(): + """Get the schema name from environment or config.""" + return os.environ.get("R2R_PROJECT_NAME", "r2r_default") + + +def include_object(object, name, type_, reflected, compare_to): + """Filter objects based on schema.""" + # Include only objects in our schema + if hasattr(object, "schema"): + return object.schema == get_schema_name() + return True + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode.""" + url = config.get_main_option("sqlalchemy.url") + schema_name = get_schema_name() + + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + include_schemas=True, + include_object=include_object, + version_table_schema=schema_name, + version_table=f"{schema_name}_alembic_version", + ) + + with context.begin_transaction(): + # Ensure schema exists + context.execute(text(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")) + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + schema_name = get_schema_name() + + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + # Ensure schema exists + connection.execute(text(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")) + connection.commit() + + context.configure( + connection=connection, + target_metadata=target_metadata, + include_schemas=True, + include_object=include_object, + version_table_schema=schema_name, + version_table=f"{schema_name}_alembic_version", + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/py/migrations/script.py.mako b/py/migrations/script.py.mako new file mode 100644 index 000000000..d6bdcd467 --- /dev/null +++ b/py/migrations/script.py.mako @@ -0,0 +1,54 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} +Schema: %(schema)s +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + +def upgrade() -> None: + # Get the schema name + schema = op.get_context().get_context_kwargs.get('version_table_schema') + + """ + ### Schema-aware migration + All table operations should include the schema name, for example: + + op.create_table( + 'my_table', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(), nullable=True), + schema=schema + ) + + op.create_index( + 'idx_my_table_name', + 'my_table', + ['name'], + schema=schema + ) + """ + ${upgrades if upgrades else "pass"} + +def downgrade() -> None: + # Get the schema name + schema = op.get_context().get_context_kwargs.get('version_table_schema') + + """ + ### Schema-aware downgrade + Remember to include schema in all operations, for example: + + op.drop_table('my_table', schema=schema) + """ + ${downgrades if downgrades else "pass"} diff --git a/py/migrations/versions/d342e632358a_migrate_to_asyncpg.py b/py/migrations/versions/d342e632358a_migrate_to_asyncpg.py new file mode 100644 index 000000000..e6dca0c4d --- /dev/null +++ b/py/migrations/versions/d342e632358a_migrate_to_asyncpg.py @@ -0,0 +1,168 @@ +"""migrate_to_asyncpg + +Revision ID: d342e632358a +Revises: +Create Date: 2024-10-22 11:55:49.461015 + +""" + +import os +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql +from sqlalchemy.types import UserDefinedType + +# revision identifiers, used by Alembic. +revision: str = "d342e632358a" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +project_name = os.getenv("R2R_PROJECT_NAME") or "r2r_default" + +new_vector_table_name = "vectors" +old_vector_table_name = project_name + + +class Vector(UserDefinedType): + def get_col_spec(self, **kw): + return "vector" + + +def upgrade() -> None: + # Create required extensions + op.execute("CREATE EXTENSION IF NOT EXISTS vector") + op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm") + op.execute("CREATE EXTENSION IF NOT EXISTS btree_gin") + + # KG table migrations + op.execute( + f"ALTER TABLE IF EXISTS {project_name}.entity_raw RENAME TO chunk_entity" + ) + op.execute( + f"ALTER TABLE IF EXISTS {project_name}.triple_raw RENAME TO chunk_triple" + ) + op.execute( + f"ALTER TABLE IF EXISTS {project_name}.entity_embedding RENAME TO document_entity" + ) + op.execute( + f"ALTER TABLE IF EXISTS {project_name}.community RENAME TO community_info" + ) + + # Create the new table + op.create_table( + new_vector_table_name, + sa.Column("extraction_id", postgresql.UUID(), nullable=False), + sa.Column("document_id", postgresql.UUID(), nullable=False), + sa.Column("user_id", postgresql.UUID(), nullable=False), + sa.Column( + "collection_ids", + postgresql.ARRAY(postgresql.UUID()), + server_default="{}", + ), + sa.Column("vec", Vector), # This will be handled as a vector type + sa.Column("text", sa.Text(), nullable=True), + sa.Column( + "fts", + postgresql.TSVECTOR, + nullable=False, + server_default=sa.text("to_tsvector('english'::regconfig, '')"), + ), + sa.Column( + "metadata", postgresql.JSONB(), server_default="{}", nullable=False + ), + sa.PrimaryKeyConstraint("extraction_id"), + schema=project_name, + ) + + # Create indices + op.create_index( + "idx_vectors_document_id", + new_vector_table_name, + ["document_id"], + schema=project_name, + ) + + op.create_index( + "idx_vectors_user_id", + new_vector_table_name, + ["user_id"], + schema=project_name, + ) + + op.create_index( + "idx_vectors_collection_ids", + new_vector_table_name, + ["collection_ids"], + schema=project_name, + postgresql_using="gin", + ) + + op.create_index( + "idx_vectors_fts", + new_vector_table_name, + ["fts"], + schema=project_name, + postgresql_using="gin", + ) + + # Migrate data from old table (assuming old table name is 'old_vectors') + # Note: You'll need to replace 'old_schema' and 'old_vectors' with your actual names + op.execute( + f""" + INSERT INTO {project_name}.{new_vector_table_name} + (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) + SELECT + extraction_id, + document_id, + user_id, + collection_ids, + vec, + text, + metadata + FROM {project_name}.{old_vector_table_name} + """ + ) + + # Verify data migration + op.execute( + f""" + SELECT COUNT(*) old_count FROM {project_name}.{old_vector_table_name}; + SELECT COUNT(*) new_count FROM {project_name}.{new_vector_table_name}; + """ + ) + + # If we get here, migration was successful, so drop the old table + op.execute( + f""" + DROP TABLE IF EXISTS {project_name}.{old_vector_table_name}; + """ + ) + + +def downgrade() -> None: + # Drop all indices + op.drop_index("idx_vectors_fts", schema=project_name) + op.drop_index("idx_vectors_collection_ids", schema=project_name) + op.drop_index("idx_vectors_user_id", schema=project_name) + op.drop_index("idx_vectors_document_id", schema=project_name) + + # Drop the new table + op.drop_table(new_vector_table_name, schema=project_name) + + # Revert KG table migrations + op.execute( + f"ALTER TABLE IF EXISTS {project_name}.chunk_entity RENAME TO entity_raw" + ) + op.execute( + f"ALTER TABLE IF EXISTS {project_name}.chunk_triple RENAME TO triple_raw" + ) + op.execute( + f"ALTER TABLE IF EXISTS {project_name}.document_entity RENAME TO entity_embedding" + ) + op.execute( + f"ALTER TABLE IF EXISTS {project_name}.community_info RENAME TO community" + ) diff --git a/py/poetry.lock b/py/poetry.lock index 11cbacb4d..ed345c7e4 100644 --- a/py/poetry.lock +++ b/py/poetry.lock @@ -204,6 +204,25 @@ files = [ [package.dependencies] typing-extensions = "*" +[[package]] +name = "alembic" +version = "1.13.3" +description = "A database migration tool for SQLAlchemy." +optional = false +python-versions = ">=3.8" +files = [ + {file = "alembic-1.13.3-py3-none-any.whl", hash = "sha256:908e905976d15235fae59c9ac42c4c5b75cfcefe3d27c0fbf7ae15a37715d80e"}, + {file = "alembic-1.13.3.tar.gz", hash = "sha256:203503117415561e203aa14541740643a611f641517f0209fcae63e9fa09f1a2"}, +] + +[package.dependencies] +Mako = "*" +SQLAlchemy = ">=1.3.0" +typing-extensions = ">=4" + +[package.extras] +tz = ["backports.zoneinfo"] + [[package]] name = "annotated-types" version = "0.7.0" @@ -1573,7 +1592,7 @@ files = [ name = "greenlet" version = "3.1.1" description = "Lightweight in-process concurrent programming" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"}, @@ -2576,6 +2595,25 @@ html5 = ["html5lib"] htmlsoup = ["BeautifulSoup4"] source = ["Cython (>=3.0.11)"] +[[package]] +name = "mako" +version = "1.3.6" +description = "A super-fast templating language that borrows the best ideas from the existing templating languages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "Mako-1.3.6-py3-none-any.whl", hash = "sha256:a91198468092a2f1a0de86ca92690fb0cfc43ca90ee17e15d93662b4c04b241a"}, + {file = "mako-1.3.6.tar.gz", hash = "sha256:9ec3a1583713479fae654f83ed9fa8c9a4c16b7bb0daba0e6bbebff50c0d983d"}, +] + +[package.dependencies] +MarkupSafe = ">=0.9.2" + +[package.extras] +babel = ["Babel"] +lingua = ["lingua"] +testing = ["pytest"] + [[package]] name = "markdown" version = "3.7" @@ -2595,7 +2633,7 @@ testing = ["coverage", "pyyaml"] name = "markupsafe" version = "3.0.1" description = "Safely add untrusted strings to HTML/XML markup." -optional = true +optional = false python-versions = ">=3.9" files = [ {file = "MarkupSafe-3.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:db842712984e91707437461930e6011e60b39136c7331e971952bb30465bc1a1"}, @@ -4730,7 +4768,7 @@ files = [ name = "sqlalchemy" version = "2.0.35" description = "Database Abstraction Library" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "SQLAlchemy-2.0.35-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:67219632be22f14750f0d1c70e62f204ba69d28f62fd6432ba05ab295853de9b"}, @@ -5728,10 +5766,10 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", type = ["pytest-mypy"] [extras] -core = ["aiosqlite", "apscheduler", "asyncpg", "bcrypt", "boto3", "deepdiff", "fire", "fsspec", "future", "graspologic", "gunicorn", "hatchet-sdk", "litellm", "networkx", "ollama", "passlib", "psutil", "pydantic", "pyjwt", "python-multipart", "pyyaml", "sqlalchemy", "supabase", "tokenizers", "unstructured-client", "uvicorn", "vecs"] +core = ["aiosqlite", "alembic", "apscheduler", "asyncpg", "bcrypt", "boto3", "deepdiff", "fire", "fsspec", "future", "graspologic", "gunicorn", "hatchet-sdk", "litellm", "networkx", "ollama", "passlib", "psutil", "pydantic", "pyjwt", "python-multipart", "pyyaml", "sqlalchemy", "supabase", "tokenizers", "unstructured-client", "uvicorn", "vecs"] ingestion-bundle = ["aiofiles", "aioshutil", "beautifulsoup4", "bs4", "markdown", "numpy", "openpyxl", "pdf2image", "pypdf", "pypdf2", "python-docx", "python-pptx"] [metadata] lock-version = "2.0" python-versions = ">=3.12,<3.13" -content-hash = "d647a5f3523f38b18090aada3c6fad4b3ba01b42503901a056c7098534498694" +content-hash = "7148049b2c8d505ca2179d25072e32601694caedd629588f9416b60dd1459bb0" diff --git a/py/pyproject.toml b/py/pyproject.toml index 9ddd00205..79cc1913c 100644 --- a/py/pyproject.toml +++ b/py/pyproject.toml @@ -5,24 +5,25 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "r2r" readme = "README.md" -version = "3.2.17" +version = "3.2.30" description = "SciPhi R2R" authors = ["Owen Colegrove "] license = "MIT" include = ["r2r.toml", "compose.yaml", "compose.full.yaml", "pyproject.toml"] packages = [ - { include = "r2r", from = "." }, - { include = "core", from = "." }, - { include = "cli", from = "." }, + { include = "r2r" }, { include = "sdk", from = "." }, { include = "shared", from = "." }, + { include = "core", from = "." }, + { include = "cli", from = "." }, ] [tool.poetry.dependencies] # Python Versions python = ">=3.12,<3.13" +alembic = "^1.13.3" asyncclick = "^8.1.7.2" click = "^8.0.0" fastapi = "^0.114.0" @@ -80,6 +81,7 @@ python-docx = { version = "^1.1.0", optional = true } [tool.poetry.extras] core = [ + "alembic", "aiosqlite", "apscheduler", "asyncpg", diff --git a/py/r2r.toml b/py/r2r.toml index 55e31642b..f18711159 100644 --- a/py/r2r.toml +++ b/py/r2r.toml @@ -35,6 +35,7 @@ provider = "bcrypt" provider = "postgres" default_collection_name = "Default" default_collection_description = "Your default collection." +enable_fts = true # whether or not to enable full-text search, e.g `hybrid search` [embedding] provider = "litellm" @@ -55,6 +56,16 @@ chunking_strategy = "recursive" chunk_size = 1_024 chunk_overlap = 512 excluded_parsers = ["mp4"] + + [ingestion.chunk_enrichment_settings] + enable_chunk_enrichment = false # disabled by default + strategies = ["semantic", "neighborhood"] + forward_chunks = 3 + backward_chunks = 3 + semantic_neighbors = 10 + semantic_similarity_threshold = 0.7 + generation_config = { model = "openai/gpt-4o-mini" } + [ingestion.extra_parsers] pdf = "zerox" @@ -69,15 +80,23 @@ batch_size = 256 relation_types = [] # if empty, all relations are extracted fragment_merge_count = 4 # number of fragments to merge into a single extraction max_knowledge_triples = 100 - max_description_input_length = 1024 + max_description_input_length = 65536 generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for triplet extraction + [kg.kg_entity_deduplication_settings] + kg_entity_deduplication_type = "by_name" + kg_entity_deduplication_prompt = "graphrag_entity_deduplication" + max_description_input_length = 65536 + generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for deduplication + [kg.kg_enrichment_settings] community_reports_prompt = "graphrag_community_reports" + max_summary_input_length = 65536 generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering leiden_params = {} [kg.kg_search_settings] + entities_level = "document" # set to collection if you've run deduplication map_system_prompt = "graphrag_map_system" reduce_system_prompt = "graphrag_reduce_system" generation_config = { model = "openai/gpt-4o-mini" } diff --git a/py/r2r/__init__.py b/py/r2r/__init__.py index 5272076b7..15ab4e234 100644 --- a/py/r2r/__init__.py +++ b/py/r2r/__init__.py @@ -3,8 +3,9 @@ import toml -from sdk import * -from shared import * +from sdk.async_client import R2RAsyncClient +from sdk.models import R2RException +from sdk.sync_client import R2RClient logger = logging.getLogger() @@ -12,12 +13,11 @@ pyproject_data = toml.load(pyproject_path) __version__ = pyproject_data["tool"]["poetry"]["version"] - __all__ = [ - # R2R SDK "R2RAsyncClient", "R2RClient", "__version__", + "R2RException", ] try: diff --git a/py/sdk/__init__.py b/py/sdk/__init__.py index df1bc2d75..092eba630 100644 --- a/py/sdk/__init__.py +++ b/py/sdk/__init__.py @@ -1,6 +1,4 @@ -from .client import R2RAsyncClient, R2RClient +from .async_client import R2RAsyncClient +from .sync_client import R2RClient -__all__ = [ - "R2RAsyncClient", - "R2RClient", -] +__all__ = ["R2RAsyncClient", "R2RClient"] diff --git a/py/sdk/async_client.py b/py/sdk/async_client.py new file mode 100644 index 000000000..df48089d0 --- /dev/null +++ b/py/sdk/async_client.py @@ -0,0 +1,106 @@ +import json +from typing import Any, AsyncGenerator + +import httpx + +from shared.abstractions import R2RException + +from .base.base_client import BaseClient +from .mixins import ( + AuthMixins, + IngestionMixins, + KGMixins, + ManagementMixins, + RetrievalMixins, + ServerMixins, +) + + +class R2RAsyncClient( + BaseClient, + AuthMixins, + IngestionMixins, + KGMixins, + ManagementMixins, + RetrievalMixins, + ServerMixins, +): + """ + Asynchronous client for interacting with the R2R API. + + Args: + base_url (str, optional): The base URL of the R2R API. Defaults to "http://localhost:7272". + prefix (str, optional): The prefix for the API. Defaults to "/v2". + custom_client (httpx.AsyncClient, optional): A custom HTTP client. Defaults to None. + timeout (float, optional): The timeout for requests. Defaults to 300.0. + """ + + def __init__( + self, + base_url: str = "http://localhost:7272", + prefix: str = "/v2", + custom_client=None, + timeout: float = 300.0, + ): + super().__init__(base_url, prefix, timeout) + self.client = custom_client or httpx.AsyncClient(timeout=timeout) + + async def _make_request(self, method: str, endpoint: str, **kwargs): + url = self._get_full_url(endpoint) + request_args = self._prepare_request_args(endpoint, **kwargs) + + try: + async with httpx.AsyncClient(timeout=self.timeout) as client: + response = await client.request(method, url, **request_args) + await self._handle_response(response) + return response.json() if response.content else None + except httpx.RequestError as e: + raise R2RException( + status_code=500, message=f"Request failed: {str(e)}" + ) from e + + async def _make_streaming_request( + self, method: str, endpoint: str, **kwargs + ) -> AsyncGenerator[Any, None]: + url = self._get_full_url(endpoint) + request_args = self._prepare_request_args(endpoint, **kwargs) + + async with httpx.AsyncClient(timeout=self.timeout) as client: + async with client.stream(method, url, **request_args) as response: + await self._handle_response(response) + async for line in response.aiter_lines(): + if line.strip(): # Ignore empty lines + try: + yield json.loads(line) + except json.JSONDecodeError: + yield line + + async def _handle_response(self, response): + if response.status_code >= 400: + try: + error_content = response.json() + if isinstance(error_content, dict): + message = ( + error_content.get("detail", {}).get( + "message", str(error_content) + ) + if isinstance(error_content.get("detail"), dict) + else error_content.get("detail", str(error_content)) + ) + else: + message = str(error_content) + except json.JSONDecodeError: + message = response.text + + raise R2RException( + status_code=response.status_code, message=message + ) + + async def close(self): + await self.client.aclose() + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() diff --git a/py/sdk/base/__init_.py b/py/sdk/base/__init_.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/sdk/base/base_client.py b/py/sdk/base/base_client.py new file mode 100644 index 000000000..096cd69aa --- /dev/null +++ b/py/sdk/base/base_client.py @@ -0,0 +1,49 @@ +from typing import Optional + +from shared.abstractions import R2RException + + +class BaseClient: + def __init__( + self, + base_url: str = "http://localhost:7272", + prefix: str = "/v2", + timeout: float = 300.0, + ): + self.base_url = base_url + self.prefix = prefix + self.timeout = timeout + self.access_token: Optional[str] = None + self._refresh_token: Optional[str] = None + + def _get_auth_header(self) -> dict[str, str]: + if not self.access_token: + return {} + return {"Authorization": f"Bearer {self.access_token}"} + + def _ensure_authenticated(self): + if not self.access_token: + raise R2RException( + status_code=401, + message="Not authenticated. Please login first.", + ) + + def _get_full_url(self, endpoint: str) -> str: + return f"{self.base_url}{self.prefix}/{endpoint}" + + def _prepare_request_args(self, endpoint: str, **kwargs) -> dict: + headers = kwargs.pop("headers", {}) + if self.access_token and endpoint not in [ + "register", + "login", + "verify_email", + ]: + headers.update(self._get_auth_header()) + + if ( + kwargs.get("params", None) == {} + or kwargs.get("params", None) is None + ): + kwargs.pop("params", None) + + return {"headers": headers, **kwargs} diff --git a/py/sdk/client.py b/py/sdk/client.py deleted file mode 100644 index 8fb11be8a..000000000 --- a/py/sdk/client.py +++ /dev/null @@ -1,240 +0,0 @@ -import asyncio -import inspect -import json -from typing import AsyncGenerator, Generator - -import httpx -import nest_asyncio -from fastapi.testclient import TestClient - -from .auth import AuthMethods -from .ingestion import IngestionMethods -from .kg import KGMethods -from .management import ManagementMethods -from .models import R2RException -from .retrieval import RetrievalMethods -from .server import ServerMethods - -nest_asyncio.apply() - -# The empty args become necessary after a recent modification to `base_endpoint` -# TODO - Remove the explicitly empty args -EMPTY_ARGS = {"args": "", "kwargs": "{}"} - - -def handle_request_error(response): - if response.status_code < 400: - return - - try: - error_content = response.json() - if isinstance(error_content, dict) and "detail" in error_content: - detail = error_content["detail"] - if isinstance(detail, dict): - message = detail.get("message", str(response.text)) - else: - message = str(detail) - else: - message = str(error_content) - except json.JSONDecodeError: - message = response.text - - raise R2RException( - status_code=response.status_code, - message=message, - ) - - -async def handle_request_error_async(response): - if response.status_code < 400: - return - - try: - if response.headers.get("content-type") == "application/json": - error_content = await response.json() - else: - error_content = await response.text - - if isinstance(error_content, dict) and "detail" in error_content: - detail = error_content["detail"] - if isinstance(detail, dict): - message = detail.get("message", str(error_content)) - else: - message = str(detail) - else: - message = str(error_content) - except Exception: - message = response.text - - raise R2RException( - status_code=response.status_code, - message=message, - ) - - -class R2RAsyncClient: - def __init__( - self, - base_url: str = "http://localhost:7272", - prefix: str = "/v2", - custom_client=None, - timeout: float = 300.0, - ): - self.base_url = base_url - self.prefix = prefix - self.access_token = None - self._refresh_token = None - self.client = custom_client or httpx.AsyncClient(timeout=timeout) - self.timeout = timeout - - # Initialize methods grouop - self._auth = AuthMethods - self._ingestion = IngestionMethods - self._management = ManagementMethods - self._kg = KGMethods - self._retrieval = RetrievalMethods - self._server = ServerMethods - - # Collect all methods from the methods group - self._methods = {} - for collection in [ - self._auth, - self._ingestion, - self._management, - self._kg, - self._retrieval, - self._server, - ]: - for name, method in inspect.getmembers( - collection, predicate=inspect.isfunction - ): - if not name.startswith("_"): - self._methods[name] = method - - async def _make_request(self, method, endpoint, **kwargs): - url = f"{self.base_url}{self.prefix}/{endpoint}" - headers = kwargs.pop("headers", {}) - if self.access_token and endpoint not in [ - "register", - "login", - "verify_email", - ]: - headers.update(self._get_auth_header()) - if ( - kwargs.get("params", None) == {} - or kwargs.get("params", None) is None - ): - if "params" in kwargs: - kwargs.pop("params") - - if isinstance(self.client, TestClient): - # Weird mocking fix... - params = kwargs.pop("params", {}) - params = {**params, **EMPTY_ARGS} - response = getattr(self.client, method.lower())( - url, headers=headers, params=params, **kwargs - ) - return response.json() if response.content else None - else: - try: - response = await self.client.request( - method, url, headers=headers, **kwargs - ) - await handle_request_error_async(response) - return response.json() if response.content else None - except httpx.RequestError as e: - raise R2RException( - status_code=500, message=f"Request failed: {str(e)}" - ) from e - - async def _make_streaming_request( - self, method: str, endpoint: str, **kwargs - ) -> AsyncGenerator[str, None]: - url = f"{self.base_url}{self.prefix}/{endpoint}" - headers = kwargs.pop("headers", {}) - if self.access_token and endpoint not in [ - "register", - "login", - "verify_email", - ]: - headers.update(self._get_auth_header()) - - async with httpx.AsyncClient() as client: - async with client.stream( - method, url, headers=headers, timeout=self.timeout, **kwargs - ) as response: - handle_request_error(response) - async for chunk in response.aiter_text(): - yield chunk - - def _get_auth_header(self) -> dict: - if not self.access_token: - return {} - return {"Authorization": f"Bearer {self.access_token}"} - - def _ensure_authenticated(self): - if not self.access_token: - raise R2RException( - status_code=401, - message="Not authenticated. Please login first.", - ) - - async def close(self): - await self.client.aclose() - - async def __aenter__(self): - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - await self.close() - - def __getattr__(self, name): - if name in self._methods: - return lambda *args, **kwargs: self._methods[name]( - self, *args, **kwargs - ) - raise AttributeError(f"'R2RClient' object has no attribute '{name}'") - - def __dir__(self): - return list(set(super().__dir__() + list(self._methods.keys()))) - - -class R2RClient: - def __init__(self, *args, **kwargs): - self.async_client = R2RAsyncClient(*args, **kwargs) - - def _sync_generator(self, async_gen: AsyncGenerator) -> Generator: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - try: - while True: - yield loop.run_until_complete(async_gen.__anext__()) - except StopAsyncIteration: - pass - finally: - loop.close() - - def __getattr__(self, name): - async_attr = getattr(self.async_client, name) - if callable(async_attr): - - def sync_wrapper(*args, **kwargs): - result = asyncio.get_event_loop().run_until_complete( - async_attr(*args, **kwargs) - ) - if isinstance(result, AsyncGenerator): - return self._sync_generator(result) - return result - - return sync_wrapper - return async_attr - - def __dir__(self): - return dir(self.async_client) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - asyncio.get_event_loop().run_until_complete(self.async_client.close()) diff --git a/py/sdk/mixins/__init__.py b/py/sdk/mixins/__init__.py new file mode 100644 index 000000000..43ef9d2c0 --- /dev/null +++ b/py/sdk/mixins/__init__.py @@ -0,0 +1,15 @@ +from .auth import AuthMixins +from .ingestion import IngestionMixins +from .kg import KGMixins +from .management import ManagementMixins +from .retrieval import RetrievalMixins +from .server import ServerMixins + +__all__ = [ + "AuthMixins", + "IngestionMixins", + "KGMixins", + "ManagementMixins", + "RetrievalMixins", + "ServerMixins", +] diff --git a/py/sdk/auth.py b/py/sdk/mixins/auth.py similarity index 70% rename from py/sdk/auth.py rename to py/sdk/mixins/auth.py index fc2716649..03dc5db5c 100644 --- a/py/sdk/auth.py +++ b/py/sdk/mixins/auth.py @@ -1,12 +1,11 @@ from typing import Optional, Union from uuid import UUID -from .models import Token, UserResponse +from ..models import Token, UserResponse -class AuthMethods: - @staticmethod - async def register(client, email: str, password: str) -> UserResponse: +class AuthMixins: + async def register(self, email: str, password: str) -> UserResponse: """ Registers a new user with the given email and password. @@ -18,10 +17,9 @@ async def register(client, email: str, password: str) -> UserResponse: UserResponse: The response from the server. """ data = {"email": email, "password": password} - return await client._make_request("POST", "register", json=data) + return await self._make_request("POST", "register", json=data) # type: ignore - @staticmethod - async def verify_email(client, verification_code: str) -> dict: + async def verify_email(self, verification_code: str) -> dict: """ Verifies the email of a user with the given verification code. @@ -29,14 +27,13 @@ async def verify_email(client, verification_code: str) -> dict: verification_code (str): The verification code to verify the email with. """ - return await client._make_request( + return await self._make_request( # type: ignore "POST", "verify_email", json=verification_code, ) - @staticmethod - async def login(client, email: str, password: str) -> dict[str, Token]: + async def login(self, email: str, password: str) -> dict[str, Token]: """ Attempts to log in a user with the given email and password. @@ -48,37 +45,34 @@ async def login(client, email: str, password: str) -> dict[str, Token]: dict[str, Token]: The access and refresh tokens from the server. """ data = {"username": email, "password": password} - response = await client._make_request("POST", "login", data=data) - client.access_token = response["results"]["access_token"]["token"] - client._refresh_token = response["results"]["refresh_token"]["token"] + response = await self._make_request("POST", "login", data=data) # type: ignore + self.access_token = response["results"]["access_token"]["token"] + self._refresh_token = response["results"]["refresh_token"]["token"] return response - @staticmethod - async def logout(client) -> dict: + async def logout(self) -> dict: """ Logs out the currently authenticated user. Returns: dict: The response from the server. """ - response = await client._make_request("POST", "logout") - client.access_token = None - client._refresh_token = None + response = await self._make_request("POST", "logout") # type: ignore + self.access_token = None + self._refresh_token = None return response - @staticmethod - async def user(client) -> UserResponse: + async def user(self) -> UserResponse: """ Retrieves the user information for the currently authenticated user. Returns: UserResponse: The response from the server. """ - return await client._make_request("GET", "user") + return await self._make_request("GET", "user") # type: ignore - @staticmethod async def update_user( - client, + self, user_id: Union[str, UUID], email: Optional[str] = None, is_superuser: Optional[bool] = None, @@ -109,26 +103,24 @@ async def update_user( "profile_picture": profile_picture, } data = {k: v for k, v in data.items() if v is not None} - return await client._make_request("PUT", "user", json=data) + return await self._make_request("PUT", "user", json=data) # type: ignore - @staticmethod - async def refresh_access_token(client) -> dict[str, Token]: + async def refresh_access_token(self) -> dict[str, Token]: """ Refreshes the access token for the currently authenticated user. Returns: dict[str, Token]: The access and refresh tokens from the server. """ - response = await client._make_request( - "POST", "refresh_access_token", json=client._refresh_token + response = await self._make_request( # type: ignore + "POST", "refresh_access_token", json=self._refresh_token ) - client.access_token = response["results"]["access_token"]["token"] - client._refresh_token = response["results"]["refresh_token"]["token"] + self.access_token = response["results"]["access_token"]["token"] + self._refresh_token = response["results"]["refresh_token"]["token"] return response - @staticmethod async def change_password( - client, current_password: str, new_password: str + self, current_password: str, new_password: str ) -> dict: """ Changes the password of the currently authenticated user. @@ -144,10 +136,9 @@ async def change_password( "current_password": current_password, "new_password": new_password, } - return await client._make_request("POST", "change_password", json=data) + return await self._make_request("POST", "change_password", json=data) # type: ignore - @staticmethod - async def request_password_reset(client, email: str) -> dict: + async def request_password_reset(self, email: str) -> dict: """ Requests a password reset for the user with the given email. @@ -157,13 +148,12 @@ async def request_password_reset(client, email: str) -> dict: Returns: dict: The response from the server. """ - return await client._make_request( + return await self._make_request( # type: ignore "POST", "request_password_reset", json=email ) - @staticmethod async def confirm_password_reset( - client, reset_token: str, new_password: str + self, reset_token: str, new_password: str ) -> dict: """ Confirms a password reset for the user with the given reset token. @@ -176,11 +166,10 @@ async def confirm_password_reset( dict: The response from the server. """ data = {"reset_token": reset_token, "new_password": new_password} - return await client._make_request("POST", "reset_password", json=data) + return await self._make_request("POST", "reset_password", json=data) # type: ignore - @staticmethod async def login_with_token( - client, + self, access_token: str, ) -> dict[str, Token]: """ @@ -193,10 +182,10 @@ async def login_with_token( Returns: dict[str, Token]: The access and refresh tokens from the server. """ - client.access_token = access_token + self.access_token = access_token # Verify the tokens by making a request to the user endpoint try: - await client._make_request("GET", "user") + await self._make_request("GET", "user") # type: ignore return { "access_token": Token( token=access_token, token_type="access_token" @@ -204,6 +193,6 @@ async def login_with_token( } except Exception: # If the request fails, clear the tokens and raise an exception - client.access_token = None - client._refresh_token = None + self.access_token = None + self._refresh_token = None raise ValueError("Invalid tokens provided") diff --git a/py/sdk/ingestion.py b/py/sdk/mixins/ingestion.py similarity index 64% rename from py/sdk/ingestion.py rename to py/sdk/mixins/ingestion.py index 7100cbba6..10c4cb9d3 100644 --- a/py/sdk/ingestion.py +++ b/py/sdk/mixins/ingestion.py @@ -4,11 +4,16 @@ from typing import Optional, Union from uuid import UUID +from shared.abstractions.vector import ( + IndexMeasure, + IndexMethod, + VectorTableName, +) -class IngestionMethods: - @staticmethod + +class IngestionMixins: async def ingest_files( - client, + self, file_paths: list[str], document_ids: Optional[list[Union[str, UUID]]] = None, metadatas: Optional[list[dict]] = None, @@ -73,13 +78,12 @@ async def ingest_files( if run_with_orchestration is not None: data["run_with_orchestration"] = str(run_with_orchestration) - return await client._make_request( + return await self._make_request( # type: ignore "POST", "ingest_files", data=data, files=files_tuples ) - @staticmethod async def update_files( - client, + self, file_paths: list[str], document_ids: Optional[list[Union[str, UUID]]] = None, metadatas: Optional[list[dict]] = None, @@ -133,13 +137,12 @@ async def update_files( if run_with_orchestration is not None: data["run_with_orchestration"] = str(run_with_orchestration) - return await client._make_request( + return await self._make_request( # type: ignore "POST", "update_files", data=data, files=files ) - @staticmethod async def ingest_chunks( - client, + self, chunks: list[dict], document_id: Optional[UUID] = None, metadata: Optional[dict] = None, @@ -164,4 +167,83 @@ async def ingest_chunks( } if run_with_orchestration is not None: data["run_with_orchestration"] = str(run_with_orchestration) # type: ignore - return await client._make_request("POST", "ingest_chunks", json=data) + return await self._make_request("POST", "ingest_chunks", json=data) # type: ignore + + async def create_vector_index( + self, + table_name: VectorTableName = VectorTableName.VECTORS, + index_method: IndexMethod = IndexMethod.hnsw, + index_measure: IndexMeasure = IndexMeasure.cosine_distance, + index_arguments: Optional[dict] = None, + index_name: Optional[str] = None, + concurrently: bool = True, + ) -> dict: + """ + Create a vector index for a given table. + + Args: + table_name (VectorTableName): Name of the table to create index on + index_method (IndexMethod): Method to use for indexing (hnsw or ivf_flat) + index_measure (IndexMeasure): Distance measure to use + index_arguments (Optional[dict]): Additional arguments for the index + index_name (Optional[str]): Custom name for the index + concurrently (bool): Whether to create the index concurrently + + Returns: + dict: Response containing the creation status + """ + data = { + "table_name": table_name, + "index_method": index_method, + "index_measure": index_measure, + "index_arguments": index_arguments, + "index_name": index_name, + "concurrently": concurrently, + } + return await self._make_request( # type: ignore + "POST", "create_vector_index", json=data + ) + + async def list_vector_indices( + self, + table_name: VectorTableName = VectorTableName.VECTORS, + ) -> dict: + """ + List all vector indices for a given table. + + Args: + table_name (VectorTableName): Name of the table to list indices from + + Returns: + dict: Response containing the list of indices + """ + params = {"table_name": table_name} + return await self._make_request( # type: ignore + "GET", "list_vector_indices", params=params + ) + + async def delete_vector_index( + self, + index_name: str, + table_name: VectorTableName = VectorTableName.VECTORS, + concurrently: bool = True, + ) -> dict: + """ + Delete a vector index from a given table. + + Args: + index_name (str): Name of the index to delete + table_name (VectorTableName): Name of the table containing the index + concurrently (bool): Whether to delete the index concurrently + + Returns: + dict: Response containing the deletion status + """ + data = { + "index_name": index_name, + "table_name": table_name, + "concurrently": concurrently, + } + return await self._make_request( # type: ignore + "DELETE", "delete_vector_index", json=data + ) diff --git a/py/sdk/kg.py b/py/sdk/mixins/kg.py similarity index 53% rename from py/sdk/kg.py rename to py/sdk/mixins/kg.py index d9c4dd0ab..a718c390d 100644 --- a/py/sdk/kg.py +++ b/py/sdk/mixins/kg.py @@ -1,24 +1,22 @@ from typing import Optional, Union from uuid import UUID -from .models import ( - KGCreationResponse, +from ..models import ( KGCreationSettings, - KGEnrichmentResponse, KGEnrichmentSettings, + KGEntityDeduplicationResponse, + KGEntityDeduplicationSettings, KGRunType, ) -class KGMethods: - - @staticmethod +class KGMixins: async def create_graph( - client, + self, collection_id: Optional[Union[UUID, str]] = None, run_type: Optional[Union[str, KGRunType]] = None, kg_creation_settings: Optional[Union[dict, KGCreationSettings]] = None, - ) -> KGCreationResponse: + ) -> dict: """ Create a graph from the given settings. @@ -36,17 +34,16 @@ async def create_graph( "kg_creation_settings": kg_creation_settings or {}, } - return await client._make_request("POST", "create_graph", json=data) + return await self._make_request("POST", "create_graph", json=data) # type: ignore - @staticmethod async def enrich_graph( - client, + self, collection_id: Optional[Union[UUID, str]] = None, run_type: Optional[Union[str, KGRunType]] = None, kg_enrichment_settings: Optional[ Union[dict, KGEnrichmentSettings] ] = None, - ) -> KGEnrichmentResponse: + ) -> dict: """ Perform graph enrichment over the entire graph. @@ -66,14 +63,14 @@ async def enrich_graph( "kg_enrichment_settings": kg_enrichment_settings or {}, } - return await client._make_request("POST", "enrich_graph", json=data) + return await self._make_request("POST", "enrich_graph", json=data) # type: ignore - @staticmethod async def get_entities( - client, + self, collection_id: str, offset: int = 0, limit: int = 100, + entity_level: Optional[str] = "collection", entity_ids: Optional[list[str]] = None, ) -> dict: """ @@ -83,12 +80,14 @@ async def get_entities( collection_id (str): The ID of the collection to retrieve entities from. offset (int): The offset for pagination. limit (int): The limit for pagination. + entity_level (Optional[str]): The level of entity to filter by. entity_ids (Optional[List[str]]): Optional list of entity IDs to filter by. Returns: dict: A dictionary containing the retrieved entities and total count. """ params = { + "entity_level": entity_level, "collection_id": collection_id, "offset": offset, "limit": limit, @@ -96,11 +95,10 @@ async def get_entities( if entity_ids: params["entity_ids"] = ",".join(entity_ids) - return await client._make_request("GET", "entities", params=params) + return await self._make_request("GET", "entities", params=params) # type: ignore - @staticmethod async def get_triples( - client, + self, collection_id: str, offset: int = 0, limit: int = 100, @@ -132,11 +130,10 @@ async def get_triples( if triple_ids: params["triple_ids"] = ",".join(triple_ids) - return await client._make_request("GET", "triples", params=params) + return await self._make_request("GET", "triples", params=params) # type: ignore - @staticmethod async def get_communities( - client, + self, collection_id: str, offset: int = 0, limit: int = 100, @@ -167,4 +164,90 @@ async def get_communities( if community_numbers: params["community_numbers"] = community_numbers - return await client._make_request("GET", "communities", params=params) + return await self._make_request("GET", "communities", params=params) # type: ignore + + async def get_tuned_prompt( + self, + prompt_name: str, + collection_id: Optional[str] = None, + documents_offset: Optional[int] = 0, + documents_limit: Optional[int] = 100, + chunk_offset: Optional[int] = 0, + chunk_limit: Optional[int] = 100, + ) -> dict: + """ + Tune the GraphRAG prompt for a given collection. + + The tuning process provides an LLM with chunks from each document in the collection. The relative sample size can therefore be controlled by adjusting the document and chunk limits. + + Args: + prompt_name (str): The name of the prompt to tune. + collection_id (str): The ID of the collection to tune the prompt for. + documents_offset (Optional[int]): The offset for pagination of documents. + documents_limit (Optional[int]): The limit for pagination of documents. + chunk_offset (Optional[int]): The offset for pagination of chunks. + chunk_limit (Optional[int]): The limit for pagination of chunks. + + Returns: + dict: A dictionary containing the tuned prompt. + """ + params = { + "prompt_name": prompt_name, + "collection_id": collection_id, + "documents_offset": documents_offset, + "documents_limit": documents_limit, + "chunk_offset": chunk_offset, + "chunk_limit": chunk_limit, + } + + params = {k: v for k, v in params.items() if v is not None} + + return await self._make_request("GET", "tuned_prompt", params=params) # type: ignore + + async def deduplicate_entities( + self, + collection_id: Optional[Union[UUID, str]] = None, + run_type: Optional[Union[str, KGRunType]] = None, + deduplication_settings: Optional[ + Union[dict, KGEntityDeduplicationSettings] + ] = None, + ) -> KGEntityDeduplicationResponse: + """ + Deduplicate entities in the knowledge graph. + Args: + collection_id (Optional[Union[UUID, str]]): The ID of the collection to deduplicate entities for. + run_type (Optional[Union[str, KGRunType]]): The type of run to perform. + deduplication_settings (Optional[Union[dict, KGEntityDeduplicationSettings]]): Settings for the deduplication process. + """ + if isinstance(deduplication_settings, KGEntityDeduplicationSettings): + deduplication_settings = deduplication_settings.model_dump() + + data = { + "collection_id": str(collection_id) if collection_id else None, + "run_type": str(run_type) if run_type else None, + "deduplication_settings": deduplication_settings or {}, + } + + return await self._make_request( # type: ignore + "POST", "deduplicate_entities", json=data + ) + + async def delete_graph_for_collection( + self, collection_id: Union[UUID, str], cascade: bool = False + ) -> dict: + """ + Delete the graph for a given collection. + + Args: + collection_id (Union[UUID, str]): The ID of the collection to delete the graph for. + cascade (bool): Whether to cascade the deletion, and delete entities and triples belonging to the collection. + + NOTE: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection. + """ + + data = { + "collection_id": str(collection_id), + "cascade": cascade, + } + + return await self._make_request("DELETE", "delete_graph_for_collection", json=data) # type: ignore diff --git a/py/sdk/management.py b/py/sdk/mixins/management.py similarity index 85% rename from py/sdk/management.py rename to py/sdk/mixins/management.py index 0d4d92b64..10d63b4c1 100644 --- a/py/sdk/management.py +++ b/py/sdk/mixins/management.py @@ -2,13 +2,12 @@ from typing import Any, Optional, Union from uuid import UUID -from shared.abstractions.llm import Message +from ..models import Message -class ManagementMethods: - @staticmethod +class ManagementMixins: async def update_prompt( - client, + self, name: str, template: Optional[str] = None, input_types: Optional[dict[str, str]] = None, @@ -30,11 +29,10 @@ async def update_prompt( if input_types is not None: data["input_types"] = input_types - return await client._make_request("POST", "update_prompt", json=data) + return await self._make_request("POST", "update_prompt", json=data) # type: ignore - @staticmethod async def add_prompt( - client, + self, name: str, template: str, input_types: dict[str, str], @@ -55,11 +53,10 @@ async def add_prompt( "template": template, "input_types": input_types, } - return await client._make_request("POST", "add_prompt", json=data) + return await self._make_request("POST", "add_prompt", json=data) # type: ignore - @staticmethod async def get_prompt( - client, + self, prompt_name: str, inputs: Optional[dict[str, Any]] = None, prompt_override: Optional[str] = None, @@ -80,22 +77,20 @@ async def get_prompt( params["inputs"] = json.dumps(inputs) if prompt_override: params["prompt_override"] = prompt_override - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"get_prompt/{prompt_name}", params=params ) - @staticmethod - async def get_all_prompts(client) -> dict: + async def get_all_prompts(self) -> dict: """ Get all prompts from the system. Returns: dict: The response from the server containing all prompts. """ - return await client._make_request("GET", "get_all_prompts") + return await self._make_request("GET", "get_all_prompts") # type: ignore - @staticmethod - async def delete_prompt(client, prompt_name: str) -> dict: + async def delete_prompt(self, prompt_name: str) -> dict: """ Delete a prompt from the system. @@ -105,13 +100,12 @@ async def delete_prompt(client, prompt_name: str) -> dict: Returns: dict: The response from the server. """ - return await client._make_request( + return await self._make_request( # type: ignore "DELETE", f"delete_prompt/{prompt_name}" ) - @staticmethod async def analytics( - client, + self, filter_criteria: Optional[Union[dict, str]] = None, analysis_types: Optional[Union[dict, str]] = None, ) -> dict: @@ -137,21 +131,19 @@ async def analytics( else: params["analysis_types"] = analysis_types - return await client._make_request("GET", "analytics", params=params) + return await self._make_request("GET", "analytics", params=params) # type: ignore - @staticmethod - async def app_settings(client) -> dict: + async def app_settings(self) -> dict: """ Get the configuration settings for the app. Returns: dict: The app settings. """ - return await client._make_request("GET", "app_settings") + return await self._make_request("GET", "app_settings") # type: ignore - @staticmethod async def users_overview( - client, + self, user_ids: Optional[list[str]] = None, offset: Optional[int] = None, limit: Optional[int] = None, @@ -172,13 +164,12 @@ async def users_overview( params["offset"] = offset if limit is not None: params["limit"] = limit - return await client._make_request( + return await self._make_request( # type: ignore "GET", "users_overview", params=params ) - @staticmethod async def delete( - client, + self, filters: dict, ) -> dict: """ @@ -192,13 +183,12 @@ async def delete( """ filters_json = json.dumps(filters) - return await client._make_request( + return await self._make_request( # type: ignore "DELETE", "delete", params={"filters": filters_json} ) or {"results": {}} - @staticmethod async def download_file( - client, + self, document_id: Union[str, UUID], ): """ @@ -210,13 +200,12 @@ async def download_file( Returns: dict: The response from the server. """ - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"download_file/{str(document_id)}" ) - @staticmethod async def documents_overview( - client, + self, document_ids: Optional[list[Union[UUID, str]]] = None, offset: Optional[int] = None, limit: Optional[int] = None, @@ -240,13 +229,12 @@ async def documents_overview( params["offset"] = offset if limit is not None: params["limit"] = limit - return await client._make_request( + return await self._make_request( # type: ignore "GET", "documents_overview", params=params ) - @staticmethod async def document_chunks( - client, + self, document_id: str, offset: Optional[int] = None, limit: Optional[int] = None, @@ -269,17 +257,16 @@ async def document_chunks( if include_vectors: params["include_vectors"] = include_vectors if not params: - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"document_chunks/{document_id}" ) else: - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"document_chunks/{document_id}", params=params ) - @staticmethod async def collections_overview( - client, + self, collection_ids: Optional[list[str]] = None, offset: Optional[int] = None, limit: Optional[int] = None, @@ -302,13 +289,12 @@ async def collections_overview( params["offset"] = offset if limit: params["limit"] = limit - return await client._make_request( + return await self._make_request( # type: ignore "GET", "collections_overview", params=params ) - @staticmethod async def create_collection( - client, + self, name: str, description: Optional[str] = None, ) -> dict: @@ -326,13 +312,12 @@ async def create_collection( if description is not None: data["description"] = description - return await client._make_request( + return await self._make_request( # type: ignore "POST", "create_collection", json=data ) - @staticmethod async def get_collection( - client, + self, collection_id: Union[str, UUID], ) -> dict: """ @@ -344,13 +329,12 @@ async def get_collection( Returns: dict: The collection data. """ - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"get_collection/{str(collection_id)}" ) - @staticmethod async def update_collection( - client, + self, collection_id: Union[str, UUID], name: Optional[str] = None, description: Optional[str] = None, @@ -372,13 +356,12 @@ async def update_collection( if description is not None: data["description"] = description - return await client._make_request( + return await self._make_request( # type: ignore "PUT", "update_collection", json=data ) - @staticmethod async def delete_collection( - client, + self, collection_id: Union[str, UUID], ) -> dict: """ @@ -390,13 +373,12 @@ async def delete_collection( Returns: dict: The response from the server. """ - return await client._make_request( + return await self._make_request( # type: ignore "DELETE", f"delete_collection/{str(collection_id)}" ) - @staticmethod async def delete_user( - client, + self, user_id: str, password: Optional[str] = None, delete_vector_data: bool = False, @@ -415,16 +397,15 @@ async def delete_user( params["password"] = password if delete_vector_data: params["delete_vector_data"] = delete_vector_data - if params == {}: - return await client._make_request("DELETE", f"user/{user_id}") + if not params: + return await self._make_request("DELETE", f"user/{user_id}") # type: ignore else: - return await client._make_request( + return await self._make_request( # type: ignore "DELETE", f"user/{user_id}", json=params ) - @staticmethod async def list_collections( - client, + self, offset: Optional[int] = None, limit: Optional[int] = None, ) -> dict: @@ -443,13 +424,12 @@ async def list_collections( params["offset"] = offset if limit is not None: params["limit"] = limit - return await client._make_request( + return await self._make_request( # type: ignore "GET", "list_collections", params=params ) - @staticmethod async def add_user_to_collection( - client, + self, user_id: Union[str, UUID], collection_id: Union[str, UUID], ) -> dict: @@ -467,13 +447,12 @@ async def add_user_to_collection( "user_id": str(user_id), "collection_id": str(collection_id), } - return await client._make_request( + return await self._make_request( # type: ignore "POST", "add_user_to_collection", json=data ) - @staticmethod async def remove_user_from_collection( - client, + self, user_id: Union[str, UUID], collection_id: Union[str, UUID], ) -> dict: @@ -491,13 +470,12 @@ async def remove_user_from_collection( "user_id": str(user_id), "collection_id": str(collection_id), } - return await client._make_request( + return await self._make_request( # type: ignore "POST", "remove_user_from_collection", json=data ) - @staticmethod async def get_users_in_collection( - client, + self, collection_id: Union[str, UUID], offset: Optional[int] = None, limit: Optional[int] = None, @@ -518,15 +496,14 @@ async def get_users_in_collection( params["offset"] = offset if limit is not None: params["limit"] = limit - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"get_users_in_collection/{str(collection_id)}", params=params, ) - @staticmethod async def user_collections( - client, + self, user_id: Union[str, UUID], offset: Optional[int] = None, limit: Optional[int] = None, @@ -546,17 +523,16 @@ async def user_collections( if limit is not None: params["limit"] = limit if not params: - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"user_collections/{str(user_id)}" ) else: - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"user_collections/{str(user_id)}", params=params ) - @staticmethod async def assign_document_to_collection( - client, + self, document_id: Union[str, UUID], collection_id: Union[str, UUID], ) -> dict: @@ -574,14 +550,13 @@ async def assign_document_to_collection( "document_id": str(document_id), "collection_id": str(collection_id), } - return await client._make_request( + return await self._make_request( # type: ignore "POST", "assign_document_to_collection", json=data ) # TODO: Verify that this method is implemented, also, should be a PUT request - @staticmethod async def remove_document_from_collection( - client, + self, document_id: Union[str, UUID], collection_id: Union[str, UUID], ) -> dict: @@ -599,13 +574,12 @@ async def remove_document_from_collection( "document_id": str(document_id), "collection_id": str(collection_id), } - return await client._make_request( + return await self._make_request( # type: ignore "POST", "remove_document_from_collection", json=data ) - @staticmethod async def document_collections( - client, + self, document_id: Union[str, UUID], offset: Optional[int] = None, limit: Optional[int] = None, @@ -625,19 +599,18 @@ async def document_collections( if limit is not None: params["limit"] = limit if not params: - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"document_collections/{str(document_id)}", params=params, ) else: - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"document_collections/{str(document_id)}" ) - @staticmethod async def documents_in_collection( - client, + self, collection_id: Union[str, UUID], offset: Optional[int] = None, limit: Optional[int] = None, @@ -658,13 +631,12 @@ async def documents_in_collection( params["offset"] = offset if limit is not None: params["limit"] = limit - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"collection/{str(collection_id)}/documents", params=params ) - @staticmethod async def conversations_overview( - client, + self, conversation_ids: Optional[list[Union[UUID, str]]] = None, offset: Optional[int] = None, limit: Optional[int] = None, @@ -687,13 +659,12 @@ async def conversations_overview( params["offset"] = offset if limit is not None: params["limit"] = limit - return await client._make_request( + return await self._make_request( # type: ignore "GET", "conversations_overview", params=params ) - @staticmethod async def get_conversation( - client, + self, conversation_id: Union[str, UUID], branch_id: Optional[str] = None, ) -> dict: @@ -708,23 +679,21 @@ async def get_conversation( dict: The conversation data. """ query_params = f"?branch_id={branch_id}" if branch_id else "" - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"get_conversation/{str(conversation_id)}{query_params}" ) - @staticmethod - async def create_conversation(client) -> dict: + async def create_conversation(self) -> dict: """ Create a new conversation. Returns: dict: The response from the server. """ - return await client._make_request("POST", "create_conversation") + return await self._make_request("POST", "create_conversation") # type: ignore - @staticmethod async def add_message( - client, + self, conversation_id: Union[str, UUID], message: Message, parent_id: Optional[str] = None, @@ -747,13 +716,12 @@ async def add_message( data["parent_id"] = parent_id if metadata is not None: data["metadata"] = metadata - return await client._make_request( + return await self._make_request( # type: ignore "POST", f"add_message/{str(conversation_id)}", data=data ) - @staticmethod async def update_message( - client, + self, message_id: str, message: Message, ) -> dict: @@ -767,13 +735,12 @@ async def update_message( Returns: dict: The response from the server. """ - return await client._make_request( + return await self._make_request( # type: ignore "PUT", f"update_message/{message_id}", data=message ) - @staticmethod async def branches_overview( - client, + self, conversation_id: Union[str, UUID], ) -> dict: """ @@ -785,13 +752,12 @@ async def branches_overview( Returns: dict: The response from the server. """ - return await client._make_request( + return await self._make_request( # type: ignore "GET", f"branches_overview/{str(conversation_id)}" ) # TODO: Publish these methods once more testing is done - # @staticmethod - # async def get_next_branch(client, branch_id: str) -> dict: + # async def get_next_branch(self, branch_id: str) -> dict: # """ # Get the next branch in a conversation. # @@ -801,10 +767,9 @@ async def branches_overview( # Returns: # dict: The response from the server. # """ - # return await client._make_request("GET", f"get_next_branch/{branch_id}") + # return await self._make_request("GET", f"get_next_branch/{branch_id}") # type: ignore # - # @staticmethod - # async def get_previous_branch(client, branch_id: str) -> dict: + # async def get_previous_branch(self, branch_id: str) -> dict: # """ # Get the previous branch in a conversation. # @@ -814,11 +779,10 @@ async def branches_overview( # Returns: # dict: The response from the server. # """ - # return await client._make_request("GET", f"get_previous_branch/{branch_id}") + # return await self._make_request("GET", f"get_previous_branch/{branch_id}") # type: ignore # - # @staticmethod # async def branch_at_message( - # client, + # self, # conversation_id: Union[str, UUID], # message_id: str, # ) -> dict: @@ -832,11 +796,10 @@ async def branches_overview( # Returns: # dict: The response from the server. # """ - # return await client._make_request("POST", f"branch_at_message/{str(conversation_id)}/{message_id}") + # return await self._make_request("POST", f"branch_at_message/{str(conversation_id)}/{message_id}") # type: ignore - @staticmethod async def delete_conversation( - client, + self, conversation_id: Union[str, UUID], ) -> dict: """ @@ -848,6 +811,6 @@ async def delete_conversation( Returns: dict: The response from the server. """ - return await client._make_request( + return await self._make_request( # type: ignore "DELETE", f"delete_conversation/{str(conversation_id)}" ) diff --git a/py/sdk/retrieval.py b/py/sdk/mixins/retrieval.py similarity index 92% rename from py/sdk/retrieval.py rename to py/sdk/mixins/retrieval.py index 1bd2c92f7..8c94a1fa1 100644 --- a/py/sdk/retrieval.py +++ b/py/sdk/mixins/retrieval.py @@ -1,7 +1,7 @@ import logging from typing import AsyncGenerator, Optional, Union -from .models import ( +from ..models import ( GenerationConfig, KGSearchSettings, Message, @@ -13,10 +13,9 @@ logger = logging.getLogger() -class RetrievalMethods: - @staticmethod +class RetrievalMixins: async def search( - client, + self, query: str, vector_search_settings: Optional[ Union[dict, VectorSearchSettings] @@ -46,11 +45,10 @@ async def search( "vector_search_settings": vector_search_settings, "kg_search_settings": kg_search_settings, } - return await client._make_request("POST", "search", json=data) + return await self._make_request("POST", "search", json=data) # type: ignore - @staticmethod async def completion( - client, + self, messages: list[Union[dict, Message]], generation_config: Optional[Union[dict, GenerationConfig]] = None, ): @@ -67,11 +65,10 @@ async def completion( "generation_config": generation_config, } - return await client._make_request("POST", "completion", json=data) + return await self._make_request("POST", "completion", json=data) # type: ignore - @staticmethod async def rag( - client, + self, query: str, rag_generation_config: Optional[Union[dict, GenerationConfig]] = None, vector_search_settings: Optional[ @@ -118,13 +115,12 @@ async def rag( if rag_generation_config and rag_generation_config.get( # type: ignore "stream", False ): - return client._make_streaming_request("POST", "rag", json=data) + return self._make_streaming_request("POST", "rag", json=data) # type: ignore else: - return await client._make_request("POST", "rag", json=data) + return await self._make_request("POST", "rag", json=data) # type: ignore - @staticmethod async def agent( - client, + self, message: Optional[Union[dict, Message]] = None, rag_generation_config: Optional[Union[dict, GenerationConfig]] = None, vector_search_settings: Optional[ @@ -196,6 +192,6 @@ async def agent( if rag_generation_config and rag_generation_config.get( # type: ignore "stream", False ): - return client._make_streaming_request("POST", "agent", json=data) + return self._make_streaming_request("POST", "agent", json=data) # type: ignore else: - return await client._make_request("POST", "agent", json=data) + return await self._make_request("POST", "agent", json=data) # type: ignore diff --git a/py/sdk/server.py b/py/sdk/mixins/server.py similarity index 75% rename from py/sdk/server.py rename to py/sdk/mixins/server.py index 0554ae6cd..f85afd1c2 100644 --- a/py/sdk/server.py +++ b/py/sdk/mixins/server.py @@ -1,24 +1,21 @@ from typing import Optional -class ServerMethods: - @staticmethod +class ServerMixins: async def health(self) -> dict: - return await self._make_request("GET", "health") + return await self._make_request("GET", "health") # type: ignore - @staticmethod - async def server_stats(client) -> dict: + async def server_stats(self) -> dict: """ Get statistics about the server, including the start time, uptime, CPU usage, and memory usage. Returns: dict: The server statistics. """ - return await client._make_request("GET", "server_stats") + return await self._make_request("GET", "server_stats") # type: ignore - @staticmethod async def logs( - client, + self, offset: Optional[int] = None, limit: Optional[int] = None, run_type_filter: Optional[str] = None, @@ -43,4 +40,4 @@ async def logs( }.items() if value is not None } - return await client._make_request("GET", "logs", params=params) + return await self._make_request("GET", "logs", params=params) # type: ignore diff --git a/py/sdk/models.py b/py/sdk/models.py index 0b40bd0a7..8c975ed53 100644 --- a/py/sdk/models.py +++ b/py/sdk/models.py @@ -4,6 +4,7 @@ KGCommunityResult, KGCreationSettings, KGEnrichmentSettings, + KGEntityDeduplicationSettings, KGEntityResult, KGGlobalResult, KGRelationshipResult, @@ -23,6 +24,7 @@ from shared.api.models import ( KGCreationResponse, KGEnrichmentResponse, + KGEntityDeduplicationResponse, RAGResponse, SearchResponse, UserResponse, @@ -37,6 +39,7 @@ "KGEntityResult", "KGGlobalResult", "KGRelationshipResult", + "KGRunType", "KGSearchMethod", "KGSearchResult", "KGSearchResultType", @@ -48,6 +51,8 @@ "Token", "VectorSearchResult", "VectorSearchSettings", + "KGEntityDeduplicationSettings", + "KGEntityDeduplicationResponse", "KGCreationResponse", "KGEnrichmentResponse", "RAGResponse", diff --git a/py/sdk/sync_client.py b/py/sdk/sync_client.py new file mode 100644 index 000000000..b5371ebb2 --- /dev/null +++ b/py/sdk/sync_client.py @@ -0,0 +1,38 @@ +import asyncio + +from .async_client import R2RAsyncClient +from .utils import SyncClientMetaclass + + +class R2RClient(R2RAsyncClient, metaclass=SyncClientMetaclass): + """ + Synchronous client for the R2R API. + + Args: + base_url (str, optional): The base URL of the R2R API. Defaults to "http://localhost:7272". + prefix (str, optional): The prefix for the API. Defaults to "/v2". + custom_client (httpx.AsyncClient, optional): A custom HTTP client. Defaults to None. + timeout (float, optional): The timeout for requests. Defaults to 300.0. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def _make_streaming_request(self, method: str, endpoint: str, **kwargs): + async_gen = super()._make_streaming_request(method, endpoint, **kwargs) + return self._sync_generator(async_gen) + + def _sync_generator(self, async_gen): + loop = asyncio.get_event_loop() + try: + while True: + yield loop.run_until_complete(async_gen.__anext__()) + except StopAsyncIteration: + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + loop = asyncio.get_event_loop() + loop.run_until_complete(self.close()) diff --git a/py/sdk/utils.py b/py/sdk/utils.py new file mode 100644 index 000000000..4a64ed171 --- /dev/null +++ b/py/sdk/utils.py @@ -0,0 +1,68 @@ +import asyncio +import functools +import inspect +from typing import Any, Callable + + +def sync_wrapper(async_func: Callable) -> Callable: + if inspect.isasyncgenfunction(async_func): + + @functools.wraps(async_func) + def generator_wrapper(*args: Any, **kwargs: Any) -> Any: + async_gen = async_func(*args, **kwargs) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + def sync_gen(): + try: + while True: + yield loop.run_until_complete(async_gen.__anext__()) + except StopAsyncIteration: + pass + finally: + loop.close() + + return sync_gen() + + return generator_wrapper + else: + + @functools.wraps(async_func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + return loop.run_until_complete(async_func(*args, **kwargs)) + else: + import threading + + def run_in_new_loop(loop, coro): + asyncio.set_event_loop(loop) + return loop.run_until_complete(coro) + + new_loop = asyncio.new_event_loop() + return loop.run_in_executor( + None, + run_in_new_loop, + new_loop, + async_func(*args, **kwargs), + ) + + return wrapper + + +class SyncClientMetaclass(type): + def __new__(cls, name, bases, dct): + for attr_name, attr_value in dct.items(): + if asyncio.iscoroutinefunction(attr_value): + dct[attr_name] = sync_wrapper(attr_value) + + for base in bases: + for attr_name in dir(base): + attr_value = getattr(base, attr_name) + if asyncio.iscoroutinefunction(attr_value): + dct[attr_name] = sync_wrapper(attr_value) + + return super().__new__(cls, name, bases, dct) diff --git a/py/shared/abstractions/__init__.py b/py/shared/abstractions/__init__.py index 485789b07..225308646 100644 --- a/py/shared/abstractions/__init__.py +++ b/py/shared/abstractions/__init__.py @@ -1,5 +1,4 @@ from .base import AsyncSyncMeta, R2RSerializable, syncable -from .completion import CompletionRecord, MessageType from .document import ( DataType, Document, @@ -22,12 +21,18 @@ RelationshipType, Triple, ) -from .kg import KGCreationSettings, KGEnrichmentSettings, KGRunType +from .kg import ( + KGCreationSettings, + KGEnrichmentSettings, + KGEntityDeduplicationSettings, + KGRunType, +) from .llm import ( GenerationConfig, LLMChatCompletion, LLMChatCompletionChunk, Message, + MessageType, RAGCompletion, ) from .prompt import Prompt @@ -46,7 +51,13 @@ VectorSearchSettings, ) from .user import Token, TokenData, UserStats -from .vector import StorageResult, Vector, VectorEntry, VectorType +from .vector import ( + StorageResult, + Vector, + VectorEntry, + VectorQuantizationType, + VectorType, +) __all__ = [ # Base abstractions @@ -54,7 +65,6 @@ "AsyncSyncMeta", "syncable", # Completion abstractions - "CompletionRecord", "MessageType", # Document abstractions "DataType", @@ -114,5 +124,6 @@ "Vector", "VectorEntry", "VectorType", + "VectorQuantizationType", "StorageResult", ] diff --git a/py/shared/abstractions/completion.py b/py/shared/abstractions/completion.py deleted file mode 100644 index 3222fd1f8..000000000 --- a/py/shared/abstractions/completion.py +++ /dev/null @@ -1,64 +0,0 @@ -""" -Abstractions for LLM completions. -""" - -import json -from datetime import datetime -from enum import Enum -from typing import List, Optional -from uuid import UUID - -from pydantic import BaseModel - -from .llm import MessageType -from .search import AggregateSearchResult - - -class CompletionRecord(BaseModel): - message_id: UUID - message_type: MessageType - timestamp: datetime = datetime.now() - feedback: Optional[List[str]] = None - score: Optional[List[float]] = None - completion_start_time: Optional[datetime] = None - completion_end_time: Optional[datetime] = None - search_query: Optional[str] = None - search_results: Optional[AggregateSearchResult] = None - llm_response: Optional[str] = None - - class Config: - arbitrary_types_allowed = True - - def to_dict(self): - def serialize(obj): - if isinstance(obj, UUID): - return str(obj) - elif isinstance(obj, datetime): - return obj.isoformat() - elif isinstance(obj, Enum): - return str(obj) - elif isinstance(obj, dict): - return {k: serialize(v) for k, v in obj.items()} - elif isinstance(obj, list): - return [serialize(v) for v in obj] - elif hasattr(obj, "dict"): - return serialize(obj.dict()) - return obj - - return serialize( - { - "message_id": self.message_id, - "message_type": self.message_type, - "timestamp": self.timestamp, - "feedback": self.feedback, - "score": self.score, - "completion_start_time": self.completion_start_time, - "completion_end_time": self.completion_end_time, - "search_query": self.search_query, - "search_results": self.search_results, - "llm_response": self.llm_response, - } - ) - - def to_json(self): - return json.dumps(self.to_dict()) diff --git a/py/shared/abstractions/document.py b/py/shared/abstractions/document.py index 2828183bb..2494d04f5 100644 --- a/py/shared/abstractions/document.py +++ b/py/shared/abstractions/document.py @@ -115,6 +115,8 @@ class IngestionStatus(str, Enum): CHUNKING = "chunking" EMBEDDING = "embedding" STORING = "storing" + ENRICHING = "enriching" + ENRICHED = "enriched" FAILED = "failed" SUCCESS = "success" @@ -128,6 +130,9 @@ class KGExtractionStatus(str, Enum): SUCCESS = "success" FAILED = "failed" + def __str__(self): + return self.value + class KGEnrichmentStatus(str, Enum): """Status of KG Enrichment per collection.""" @@ -137,6 +142,9 @@ class KGEnrichmentStatus(str, Enum): SUCCESS = "success" FAILED = "failed" + def __str__(self): + return self.value + class DocumentInfo(R2RSerializable): """Base class for document information handling.""" diff --git a/py/shared/abstractions/graph.py b/py/shared/abstractions/graph.py index 8821c4837..979cd5340 100644 --- a/py/shared/abstractions/graph.py +++ b/py/shared/abstractions/graph.py @@ -2,6 +2,7 @@ import logging import uuid from dataclasses import dataclass +from enum import Enum from typing import Any, Optional, Union from uuid import UUID @@ -43,6 +44,15 @@ class RelationshipType(R2RSerializable): description: str | None = None +class EntityLevel(str, Enum): + COLLECTION = "collection" + DOCUMENT = "document" + CHUNK = "chunk" + + def __str__(self): + return self.value + + class Entity(R2RSerializable): """An entity extracted from a document.""" @@ -55,6 +65,7 @@ class Entity(R2RSerializable): extraction_ids: Optional[list[UUID]] = None collection_id: Optional[UUID] = None document_id: Optional[UUID] = None + document_ids: Optional[list[UUID]] = None # we don't use these yet # name_embedding: Optional[list[float]] = None # graph_embedding: Optional[list[float]] = None @@ -78,7 +89,7 @@ def __init__(self, **kwargs): pass -class Triple(BaseModel): +class Triple(R2RSerializable): """A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities.""" id: Optional[int] = None @@ -119,7 +130,7 @@ def __init__(self, **kwargs): self.attributes = self.attributes @classmethod - def from_dict( + def from_dict( # type: ignore cls, d: dict[str, Any], id_key: str = "id", diff --git a/py/shared/abstractions/ingestion.py b/py/shared/abstractions/ingestion.py new file mode 100644 index 000000000..df0b75400 --- /dev/null +++ b/py/shared/abstractions/ingestion.py @@ -0,0 +1,50 @@ +# Abstractions for ingestion + +from enum import Enum + +from pydantic import Field + +from .base import R2RSerializable +from .llm import GenerationConfig + + +class ChunkEnrichmentStrategy(str, Enum): + SEMANTIC = "semantic" + NEIGHBORHOOD = "neighborhood" + + def __str__(self) -> str: + return self.value + + +class ChunkEnrichmentSettings(R2RSerializable): + """ + Settings for chunk enrichment. + """ + + enable_chunk_enrichment: bool = Field( + default=False, + description="Whether to enable chunk enrichment or not", + ) + strategies: list[ChunkEnrichmentStrategy] = Field( + default=[], + description="The strategies to use for chunk enrichment. Union of chunks obtained from each strategy is used as context.", + ) + forward_chunks: int = Field( + default=3, + description="The number after the current chunk to include in the LLM context while enriching", + ) + backward_chunks: int = Field( + default=3, + description="The number of chunks before the current chunk in the LLM context while enriching", + ) + semantic_neighbors: int = Field( + default=10, description="The number of semantic neighbors to include" + ) + semantic_similarity_threshold: float = Field( + default=0.7, + description="The similarity threshold for semantic neighbors", + ) + generation_config: GenerationConfig = Field( + default=GenerationConfig(), + description="The generation config to use for chunk enrichment", + ) diff --git a/py/shared/abstractions/kg.py b/py/shared/abstractions/kg.py index 62b680d82..e50ae95f0 100644 --- a/py/shared/abstractions/kg.py +++ b/py/shared/abstractions/kg.py @@ -12,6 +12,19 @@ class KGRunType(str, Enum): ESTIMATE = "estimate" RUN = "run" + def __str__(self): + return self.value + + +class KGEntityDeduplicationType(str, Enum): + """Type of KG entity deduplication.""" + + BY_NAME = "by_name" + BY_DESCRIPTION = "by_description" + + def __str__(self): + return self.value + class KGCreationSettings(R2RSerializable): """Settings for knowledge graph creation.""" @@ -64,6 +77,30 @@ class KGCreationSettings(R2RSerializable): ) +class KGEntityDeduplicationSettings(R2RSerializable): + """Settings for knowledge graph entity deduplication.""" + + kg_entity_deduplication_type: KGEntityDeduplicationType = Field( + default=KGEntityDeduplicationType.BY_NAME, + description="The type of entity deduplication to use.", + ) + + max_description_input_length: int = Field( + default=65536, + description="The maximum length of the description for a node in the graph.", + ) + + kg_entity_deduplication_prompt: str = Field( + default="graphrag_entity_deduplication", + description="The prompt to use for knowledge graph entity deduplication.", + ) + + generation_config: GenerationConfig = Field( + default_factory=GenerationConfig, + description="Configuration for text generation during graph entity deduplication.", + ) + + class KGEnrichmentSettings(R2RSerializable): """Settings for knowledge graph enrichment.""" diff --git a/py/shared/abstractions/search.py b/py/shared/abstractions/search.py index 8d946b115..9b616a3bb 100644 --- a/py/shared/abstractions/search.py +++ b/py/shared/abstractions/search.py @@ -9,6 +9,8 @@ from .base import R2RSerializable from .llm import GenerationConfig +from shared.abstractions.graph import EntityLevel + class VectorSearchResult(R2RSerializable): """Result of a search operation.""" @@ -249,15 +251,15 @@ class VectorSearchSettings(R2RSerializable): default=True, description="Whether to include element metadata in the search results", ) - probes: Optional[int] = Field( + probes: int = Field( default=10, description="Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.", ) - ef_search: Optional[int] = Field( + ef_search: int = Field( default=40, description="Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.", ) - hybrid_search_settings: Optional[HybridSearchSettings] = Field( + hybrid_search_settings: HybridSearchSettings = Field( default=HybridSearchSettings(), description="Settings for hybrid search", ) @@ -312,11 +314,17 @@ def __init__(self, **data): class KGSearchSettings(R2RSerializable): + entities_level: EntityLevel = Field( + default=EntityLevel.DOCUMENT, + description="The level of entities to search for", + ) + filters: dict[str, Any] = Field( default_factory=dict, description="Alias for search_filters", deprecated=True, ) + search_filters: dict[str, Any] = Field( default_factory=dict, description="""Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. diff --git a/py/shared/abstractions/vector.py b/py/shared/abstractions/vector.py index 19294f15d..b367edd03 100644 --- a/py/shared/abstractions/vector.py +++ b/py/shared/abstractions/vector.py @@ -102,15 +102,13 @@ class IndexArgsHNSW(R2RSerializable): class VectorTableName(str, Enum): """ This enum represents the different tables where we store vectors. - - # TODO: change the table name of the chunks table. Right now it is called - # {r2r_project_name}.{r2r_project_name} due to a bug in the vector class. """ - CHUNKS = "chunks" - ENTITIES = "entity_embedding" + VECTORS = "vectors" + ENTITIES_DOCUMENT = "document_entity" + ENTITIES_COLLECTION = "collection_entity" # TODO: Add support for triples - # TRIPLES = "triple_raw" + # TRIPLES = "chunk_triple" COMMUNITIES = "community_report" def __str__(self) -> str: diff --git a/py/shared/api/models/__init__.py b/py/shared/api/models/__init__.py index be242493d..9c3fc06d3 100644 --- a/py/shared/api/models/__init__.py +++ b/py/shared/api/models/__init__.py @@ -14,8 +14,10 @@ from shared.api.models.kg.responses import ( KGCreationResponse, KGEnrichmentResponse, + KGEntityDeduplicationResponse, WrappedKGCreationResponse, WrappedKGEnrichmentResponse, + WrappedKGEntityDeduplicationResponse, ) from shared.api.models.management.responses import ( AnalyticsResponse, diff --git a/py/shared/api/models/ingestion/responses.py b/py/shared/api/models/ingestion/responses.py index f8a899389..58e187d1a 100644 --- a/py/shared/api/models/ingestion/responses.py +++ b/py/shared/api/models/ingestion/responses.py @@ -1,4 +1,4 @@ -from typing import Optional, TypeVar +from typing import Any, Optional, TypeVar from uuid import UUID from pydantic import BaseModel, Field @@ -63,6 +63,21 @@ class CreateVectorIndexResponse(BaseModel): ) +class ListVectorIndicesResponse(BaseModel): + indices: list[dict[str, Any]] + + +class DeleteVectorIndexResponse(BaseModel): + message: str + + +class SelectVectorIndexResponse(BaseModel): + status: str + + WrappedIngestionResponse = ResultsWrapper[list[IngestionResponse]] WrappedUpdateResponse = ResultsWrapper[UpdateResponse] WrappedCreateVectorIndexResponse = ResultsWrapper[CreateVectorIndexResponse] +WrappedListVectorIndicesResponse = ResultsWrapper[ListVectorIndicesResponse] +WrappedDeleteVectorIndexResponse = ResultsWrapper[DeleteVectorIndexResponse] +WrappedSelectVectorIndexResponse = ResultsWrapper[SelectVectorIndexResponse] diff --git a/py/shared/api/models/kg/responses.py b/py/shared/api/models/kg/responses.py index e9e5a1fda..3438b26e1 100644 --- a/py/shared/api/models/kg/responses.py +++ b/py/shared/api/models/kg/responses.py @@ -18,13 +18,14 @@ class KGCreationResponse(BaseModel): description="The task ID of the KG creation request.", ) - class Config: - json_schema_extra = { - "example": { - "message": "Graph creation queued successfully.", - "task_id": "c68dc72e-fc23-5452-8f49-d7bd46088a96", - } + +class Config: + json_schema_extra = { + "example": { + "message": "Graph creation queued successfully.", + "task_id": "c68dc72e-fc23-5452-8f49-d7bd46088a96", } + } class KGEnrichmentResponse(BaseModel): @@ -100,6 +101,40 @@ class KGCreationEstimationResponse(R2RSerializable): ) +class KGDeduplicationEstimationResponse(R2RSerializable): + """Response for knowledge graph deduplication estimation.""" + + message: str = Field( + default="", + description="The message to display to the user.", + ) + + num_entities: Optional[int] = Field( + default=None, + description="The number of entities in the collection.", + ) + + estimated_llm_calls: Optional[str] = Field( + default=None, + description="The estimated number of LLM calls.", + ) + + estimated_total_in_out_tokens_in_millions: Optional[str] = Field( + default=None, + description="The estimated total number of input and output tokens in millions.", + ) + + estimated_cost_in_usd: Optional[str] = Field( + default=None, + description="The estimated cost in USD.", + ) + + estimated_total_time_in_minutes: Optional[str] = Field( + default=None, + description="The estimated time in minutes.", + ) + + class KGEnrichmentEstimationResponse(R2RSerializable): """Response for knowledge graph enrichment estimation.""" @@ -237,6 +272,34 @@ class Config: } +class KGEntityDeduplicationResponse(BaseModel): + """Response for knowledge graph entity deduplication.""" + + message: str = Field( + ..., + description="The message to display to the user.", + ) + + class Config: + json_schema_extra = { + "example": { + "message": "Entity deduplication queued successfully.", + } + } + + +class KGTunePromptResponse(R2RSerializable): + """Response containing just the tuned prompt string.""" + + tuned_prompt: str = Field( + ..., + description="The updated prompt.", + ) + + class Config: + json_schema_extra = {"example": {"tuned_prompt": "The updated prompt"}} + + WrappedKGCreationResponse = ResultsWrapper[ Union[KGCreationResponse, KGCreationEstimationResponse] ] @@ -245,4 +308,8 @@ class Config: ] WrappedKGEntitiesResponse = ResultsWrapper[KGEntitiesResponse] WrappedKGTriplesResponse = ResultsWrapper[KGTriplesResponse] +WrappedKGTunePromptResponse = ResultsWrapper[KGTunePromptResponse] WrappedKGCommunitiesResponse = ResultsWrapper[KGCommunitiesResponse] +WrappedKGEntityDeduplicationResponse = ResultsWrapper[ + Union[KGEntityDeduplicationResponse, KGDeduplicationEstimationResponse] +] diff --git a/py/shared/utils/__init__.py b/py/shared/utils/__init__.py index 1ee107c3d..4a0362cbc 100644 --- a/py/shared/utils/__init__.py +++ b/py/shared/utils/__init__.py @@ -10,7 +10,6 @@ generate_default_user_collection_id, generate_document_id, generate_extraction_id, - generate_message_id, generate_run_id, generate_user_id, increment_version, @@ -33,7 +32,6 @@ "generate_default_user_collection_id", "generate_user_id", "generate_collection_id_from_name", - "generate_message_id", "generate_default_prompt_id", # Other "increment_version", diff --git a/py/shared/utils/base_utils.py b/py/shared/utils/base_utils.py index a1f868ae5..961c6ce6e 100644 --- a/py/shared/utils/base_utils.py +++ b/py/shared/utils/base_utils.py @@ -2,8 +2,7 @@ import json import logging from copy import deepcopy -from datetime import datetime -from typing import TYPE_CHECKING, Any, AsyncGenerator, Iterable, Optional +from typing import TYPE_CHECKING, Any, AsyncGenerator, Iterable from uuid import NAMESPACE_DNS, UUID, uuid4, uuid5 from ..abstractions.graph import EntityType, RelationshipType @@ -153,15 +152,6 @@ def generate_user_id(email: str) -> UUID: return _generate_id_from_label(email) -def generate_message_id(query: str, completion_start_time: datetime) -> UUID: - """ - Generates a unique message id from a given query and completion start time - """ - return _generate_id_from_label( - f"{query}-{completion_start_time.isoformat()}" - ) - - def generate_default_prompt_id(prompt_name: str) -> UUID: """ Generates a unique prompt id diff --git a/py/tests/conftest.py b/py/tests/conftest.py index 0aed6ec7e..68f9c5fd7 100644 --- a/py/tests/conftest.py +++ b/py/tests/conftest.py @@ -27,6 +27,7 @@ IngestionStatus, KGEnrichmentStatus, KGExtractionStatus, + OrchestrationConfig, ) from core.providers import ( BCryptProvider, @@ -75,12 +76,16 @@ def generate_random_vector_entry( @pytest.fixture(scope="function") -def app_config(): +def project_name(): collection_id = uuid.uuid4() - random_project_name = f"a_{collection_id.hex}_test_project" + return f"test_collection_{collection_id.hex}" - return AppConfig(project_name=random_project_name) + +@pytest.fixture(scope="function") +def app_config(project_name): + + return AppConfig(project_name=project_name) # Crypto @@ -108,7 +113,7 @@ async def postgres_db_provider( db_config, dimension=dimension, crypto_provider=crypto_provider ) await db.initialize() - db.vector.upsert_entries(sample_entries) + await db.upsert_entries(sample_entries) yield db # Teardown # TODO - Add teardown methods @@ -116,12 +121,9 @@ async def postgres_db_provider( @pytest.fixture(scope="function") -def db_config_temporary(app_config): - collection_id = uuid.uuid4() - - random_project_name = f"test_collection_{collection_id.hex}" +def db_config_temporary(project_name, app_config): return DatabaseConfig.create( - provider="postgres", project_name=random_project_name, app=app_config + provider="postgres", project_name=project_name, app=app_config ) @@ -135,12 +137,12 @@ async def temporary_postgres_db_provider( crypto_provider=crypto_provider, ) await db.initialize() - db.vector.upsert_entries(sample_entries) + await db.upsert_entries(sample_entries) try: yield db finally: - await db.relational.close() - db.vector.close() + await db.close() + # db.vector.close() # Auth @@ -178,6 +180,18 @@ def litellm_provider(app_config): return LiteLLMEmbeddingProvider(config) +# Embeddings +@pytest.fixture +def litellm_provider_128(app_config): + config = EmbeddingConfig( + provider="litellm", + base_model="text-embedding-3-small", + base_dimension=128, + app=app_config, + ) + return LiteLLMEmbeddingProvider(config) + + # File Provider @pytest.fixture(scope="function") def file_config(app_config): @@ -191,7 +205,7 @@ async def postgres_file_provider(file_config, temporary_postgres_db_provider): ) await file_provider.initialize() yield file_provider - await file_provider._close_connection() + # await file_provider._close_connection() # LLM provider @@ -257,10 +271,10 @@ async def postgres_kg_provider( kg_extraction_status=KGExtractionStatus.PENDING, ) - await temporary_postgres_db_provider.relational.upsert_documents_overview( + await temporary_postgres_db_provider.upsert_documents_overview( document_info ) - + print("config = ", temporary_postgres_db_provider.config) kg_provider = PostgresKGProvider( kg_config_temporary, temporary_postgres_db_provider, litellm_provider ) @@ -275,6 +289,11 @@ def prompt_config(app_config): return PromptConfig(provider="r2r", app=app_config) +@pytest.fixture(scope="function") +def orchestration_config(app_config): + return OrchestrationConfig(provider="simple", app=app_config) + + @pytest.fixture(scope="function") async def r2r_prompt_provider(prompt_config, temporary_postgres_db_provider): prompt_provider = R2RPromptProvider( diff --git a/py/tests/core/pipes/test_kg_community_summary_pipe.py b/py/tests/core/pipes/test_kg_community_summary_pipe.py index 75c0ab1a3..9b1883bbf 100644 --- a/py/tests/core/pipes/test_kg_community_summary_pipe.py +++ b/py/tests/core/pipes/test_kg_community_summary_pipe.py @@ -162,5 +162,18 @@ async def test_community_summary_prompt( summary = await kg_community_summary_pipe.community_summary_prompt( entities_list, triples_raw_list, max_summary_input_length ) - expected_summary = "\n Entity: Entity1\n Descriptions: \n 1,Description1\n Triples: \n 1,Entity1,object1,predicate1,description1\n \n Entity: Entity2\n Descriptions: \n 2,Description2\n Triples: \n 2,Entity2,object2,predicate2,description2\n " - assert summary == expected_summary + expected_summary = """ + Entity: Entity1 + Descriptions: + 1,Description1 + Triples: + 1,Entity1,object1,predicate1,description1 + + Entity: Entity2 + Descriptions: + 2,Description2 + Triples: + 2,Entity2,object2,predicate2,description2 + """ + # "\n Entity: Entity1\n Descriptions: \n 1,Description1\n Triples: \n 1,Entity1,object1,predicate1,description1\n \n Entity: Entity2\n Descriptions: \n 2,Description2\n Triples: \n 2,Entity2,object2,predicate2,description2\n " + assert summary.strip() == expected_summary.strip() diff --git a/py/tests/core/providers/database/relational/test_collection_db.py b/py/tests/core/providers/database/relational/test_collection_db.py index b43cfa065..7eef24edb 100644 --- a/py/tests/core/providers/database/relational/test_collection_db.py +++ b/py/tests/core/providers/database/relational/test_collection_db.py @@ -8,10 +8,8 @@ @pytest.mark.asyncio async def test_create_collection(temporary_postgres_db_provider): - collection = ( - await temporary_postgres_db_provider.relational.create_collection( - "Test Collection", "Test Description" - ) + collection = await temporary_postgres_db_provider.create_collection( + "Test Collection", "Test Description" ) assert isinstance(collection, CollectionResponse) assert collection.name == "Test Collection" @@ -21,14 +19,12 @@ async def test_create_collection(temporary_postgres_db_provider): @pytest.mark.asyncio async def test_get_collection(temporary_postgres_db_provider): created_collection = ( - await temporary_postgres_db_provider.relational.create_collection( + await temporary_postgres_db_provider.create_collection( "Test Collection", "Test Description" ) ) - retrieved_collection = ( - await temporary_postgres_db_provider.relational.get_collection( - created_collection.collection_id - ) + retrieved_collection = await temporary_postgres_db_provider.get_collection( + created_collection.collection_id ) assert retrieved_collection == created_collection @@ -36,12 +32,12 @@ async def test_get_collection(temporary_postgres_db_provider): @pytest.mark.asyncio async def test_update_collection(temporary_postgres_db_provider): created_collection = ( - await temporary_postgres_db_provider.relational.create_collection( + await temporary_postgres_db_provider.create_collection( "Test Collection", "Test Description" ) ) updated_collection = ( - await temporary_postgres_db_provider.relational.update_collection( + await temporary_postgres_db_provider.update_collection( created_collection.collection_id, name="Updated Collection", description="Updated Description", @@ -54,50 +50,50 @@ async def test_update_collection(temporary_postgres_db_provider): @pytest.mark.asyncio async def test_delete_collection(temporary_postgres_db_provider): created_collection = ( - await temporary_postgres_db_provider.relational.create_collection( + await temporary_postgres_db_provider.create_collection( "Test Collection", "Test Description" ) ) - await temporary_postgres_db_provider.relational.delete_collection( + await temporary_postgres_db_provider.delete_collection_relational( created_collection.collection_id ) with pytest.raises(R2RException): - await temporary_postgres_db_provider.relational.get_collection( + await temporary_postgres_db_provider.delete_collection_relational( created_collection.collection_id ) + # await temporary_postgres_db_provider.delete_collection_vector( + # created_collection.collection_id + # ) + # with pytest.raises(R2RException): + # await temporary_postgres_db_provider.delete_collection_vector( + # created_collection.collection_id + # ) + @pytest.mark.asyncio async def test_list_collections(temporary_postgres_db_provider): - await temporary_postgres_db_provider.relational.create_collection( + await temporary_postgres_db_provider.create_collection( "Collection 1", "Description 1" ) - await temporary_postgres_db_provider.relational.create_collection( + await temporary_postgres_db_provider.create_collection( "Collection 2", "Description 2" ) - collections = ( - await temporary_postgres_db_provider.relational.list_collections() - ) + collections = await temporary_postgres_db_provider.list_collections() assert len(collections["results"]) >= 2 assert collections["total_entries"] >= 2 @pytest.mark.asyncio async def test_get_collections_by_ids(temporary_postgres_db_provider): - collection1 = ( - await temporary_postgres_db_provider.relational.create_collection( - "Collection 1", "Description 1" - ) + collection1 = await temporary_postgres_db_provider.create_collection( + "Collection 1", "Description 1" ) - collection2 = ( - await temporary_postgres_db_provider.relational.create_collection( - "Collection 2", "Description 2" - ) + collection2 = await temporary_postgres_db_provider.create_collection( + "Collection 2", "Description 2" ) - collections = ( - await temporary_postgres_db_provider.relational.get_collections_by_ids( - [collection1.collection_id, collection2.collection_id] - ) + collections = await temporary_postgres_db_provider.get_collections_by_ids( + [collection1.collection_id, collection2.collection_id] ) assert len(collections) == 2 assert collections[0].collection_id == collection1.collection_id @@ -108,13 +104,11 @@ async def test_get_collections_by_ids(temporary_postgres_db_provider): async def test_assign_and_remove_document_from_collection( temporary_postgres_db_provider, ): - collection = ( - await temporary_postgres_db_provider.relational.create_collection( - "Test Collection", "Test Description" - ) + collection = await temporary_postgres_db_provider.create_collection( + "Test Collection", "Test Description" ) document_id = UUID("00000000-0000-0000-0000-000000000001") - await temporary_postgres_db_provider.relational.upsert_documents_overview( + await temporary_postgres_db_provider.upsert_documents_overview( DocumentInfo( id=document_id, collection_ids=[], @@ -125,13 +119,14 @@ async def test_assign_and_remove_document_from_collection( size_in_bytes=0, ) ) - await temporary_postgres_db_provider.relational.assign_document_to_collection( + await temporary_postgres_db_provider.assign_document_to_collection_relational( + document_id, collection.collection_id + ) + await temporary_postgres_db_provider.assign_document_to_collection_vector( document_id, collection.collection_id ) document_collections = ( - await temporary_postgres_db_provider.relational.document_collections( - document_id - ) + await temporary_postgres_db_provider.document_collections(document_id) ) assert len(document_collections["results"]) == 1 assert ( @@ -139,40 +134,37 @@ async def test_assign_and_remove_document_from_collection( == collection.collection_id ) - await temporary_postgres_db_provider.relational.remove_document_from_collection( + await temporary_postgres_db_provider.remove_document_from_collection_relational( + document_id, collection.collection_id + ) + await temporary_postgres_db_provider.remove_document_from_collection_vector( document_id, collection.collection_id ) document_collections = ( - await temporary_postgres_db_provider.relational.document_collections( - document_id - ) + await temporary_postgres_db_provider.document_collections(document_id) ) assert len(document_collections["results"]) == 0 @pytest.mark.asyncio async def test_get_collections_for_user(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) - collection1 = ( - await temporary_postgres_db_provider.relational.create_collection( - "Collection 1", "Description 1" - ) + collection1 = await temporary_postgres_db_provider.create_collection( + "Collection 1", "Description 1" ) - collection2 = ( - await temporary_postgres_db_provider.relational.create_collection( - "Collection 2", "Description 2" - ) + collection2 = await temporary_postgres_db_provider.create_collection( + "Collection 2", "Description 2" ) - await temporary_postgres_db_provider.relational.add_user_to_collection( + await temporary_postgres_db_provider.add_user_to_collection( user.id, collection1.collection_id ) - await temporary_postgres_db_provider.relational.add_user_to_collection( + await temporary_postgres_db_provider.add_user_to_collection( user.id, collection2.collection_id ) - user_collections = await temporary_postgres_db_provider.relational.get_collections_for_user( - user.id + user_collections = ( + await temporary_postgres_db_provider.get_collections_for_user(user.id) ) assert len(user_collections["results"]) == 2 assert user_collections["total_entries"] == 2 diff --git a/py/tests/core/providers/database/relational/test_document_db.py b/py/tests/core/providers/database/relational/test_document_db.py index 93f87e636..256f5d188 100644 --- a/py/tests/core/providers/database/relational/test_document_db.py +++ b/py/tests/core/providers/database/relational/test_document_db.py @@ -11,12 +11,11 @@ KGExtractionStatus, ) - -@pytest.mark.asyncio -async def test_create_table(temporary_postgres_db_provider): - await temporary_postgres_db_provider.relational.create_table() - # Verify that the table is created with the expected columns and constraints - # You can execute a query to check the table structure or use a database inspection tool +# @pytest.mark.asyncio +# async def test_create_table(temporary_postgres_db_provider): +# await temporary_postgres_db_provider.create_table() +# # Verify that the table is created with the expected columns and constraints +# # You can execute a query to check the table structure or use a database inspection tool @pytest.mark.asyncio @@ -33,15 +32,13 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider): ingestion_status=IngestionStatus.PENDING, kg_extraction_status=KGExtractionStatus.PENDING, ) - await temporary_postgres_db_provider.relational.upsert_documents_overview( + await temporary_postgres_db_provider.upsert_documents_overview( document_info ) # Verify that the document is inserted correctly - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - filter_document_ids=[document_info.id] - ) + result = await temporary_postgres_db_provider.get_documents_overview( + filter_document_ids=[document_info.id] ) assert len(result["results"]) == 1 inserted_document = result["results"][0] @@ -62,14 +59,12 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider): # Update the document and verify the changes document_info.title = "Updated Test Document" document_info.ingestion_status = IngestionStatus.SUCCESS - await temporary_postgres_db_provider.relational.upsert_documents_overview( + await temporary_postgres_db_provider.upsert_documents_overview( document_info ) - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - filter_document_ids=[document_info.id] - ) + result = await temporary_postgres_db_provider.get_documents_overview( + filter_document_ids=[document_info.id] ) assert len(result["results"]) == 1 updated_document = result["results"][0] @@ -91,19 +86,17 @@ async def test_delete_from_documents_overview(temporary_postgres_db_provider): ingestion_status=IngestionStatus.PENDING, kg_extraction_status=KGExtractionStatus.PENDING, ) - await temporary_postgres_db_provider.relational.upsert_documents_overview( + await temporary_postgres_db_provider.upsert_documents_overview( document_info ) - await temporary_postgres_db_provider.relational.delete_from_documents_overview( + await temporary_postgres_db_provider.delete_from_documents_overview( document_info.id ) # Verify that the document is deleted - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - filter_document_ids=[document_info.id] - ) + result = await temporary_postgres_db_provider.get_documents_overview( + filter_document_ids=[document_info.id] ) assert len(result["results"]) == 0 @@ -134,24 +127,20 @@ async def test_get_documents_overview(temporary_postgres_db_provider): ingestion_status=IngestionStatus.SUCCESS, kg_extraction_status=KGExtractionStatus.PENDING, ) - await temporary_postgres_db_provider.relational.upsert_documents_overview( + await temporary_postgres_db_provider.upsert_documents_overview( [document_info1, document_info2] ) # Test filtering by user ID - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - filter_user_ids=[UUID("00000000-0000-0000-0000-000000000003")] - ) + result = await temporary_postgres_db_provider.get_documents_overview( + filter_user_ids=[UUID("00000000-0000-0000-0000-000000000003")] ) assert len(result["results"]) == 2 assert result["total_entries"] == 2 # Test filtering by document ID - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - filter_document_ids=[UUID("00000000-0000-0000-0000-000000000001")] - ) + result = await temporary_postgres_db_provider.get_documents_overview( + filter_document_ids=[UUID("00000000-0000-0000-0000-000000000001")] ) assert len(result["results"]) == 1 assert result["results"][0].id == UUID( @@ -159,21 +148,15 @@ async def test_get_documents_overview(temporary_postgres_db_provider): ) # Test filtering by collection ID - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - filter_collection_ids=[ - UUID("00000000-0000-0000-0000-000000000002") - ] - ) + result = await temporary_postgres_db_provider.get_documents_overview( + filter_collection_ids=[UUID("00000000-0000-0000-0000-000000000002")] ) assert len(result["results"]) == 2 assert result["total_entries"] == 2 # Test pagination - result = ( - await temporary_postgres_db_provider.relational.get_documents_overview( - offset=1, limit=1 - ) + result = await temporary_postgres_db_provider.get_documents_overview( + offset=1, limit=1 ) assert len(result["results"]) == 1 assert result["total_entries"] == 2 diff --git a/py/tests/core/providers/database/relational/test_relational_db_provider.py b/py/tests/core/providers/database/relational/test_relational_db_provider.py index 7b631826e..32a4d1f1b 100644 --- a/py/tests/core/providers/database/relational/test_relational_db_provider.py +++ b/py/tests/core/providers/database/relational/test_relational_db_provider.py @@ -7,4 +7,4 @@ @pytest.mark.asyncio async def test_relational_db_initialization(postgres_db_provider): assert isinstance(postgres_db_provider, PostgresDBProvider) - assert postgres_db_provider.relational is not None + # assert postgres_db_provider.relational is not None diff --git a/py/tests/core/providers/database/relational/test_user_db.py b/py/tests/core/providers/database/relational/test_user_db.py index 6856e2003..2388eef90 100644 --- a/py/tests/core/providers/database/relational/test_user_db.py +++ b/py/tests/core/providers/database/relational/test_user_db.py @@ -9,7 +9,7 @@ @pytest.mark.asyncio async def test_create_user(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) assert isinstance(user, UserResponse) @@ -18,43 +18,43 @@ async def test_create_user(temporary_postgres_db_provider): @pytest.mark.asyncio async def test_get_user_by_id(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) - retrieved_user = ( - await temporary_postgres_db_provider.relational.get_user_by_id(user.id) + retrieved_user = await temporary_postgres_db_provider.get_user_by_id( + user.id ) assert retrieved_user == user @pytest.mark.asyncio async def test_get_user_by_email(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) - retrieved_user = ( - await temporary_postgres_db_provider.relational.get_user_by_email( - "test@example.com" - ) + retrieved_user = await temporary_postgres_db_provider.get_user_by_email( + "test@example.com" ) assert retrieved_user == user @pytest.mark.asyncio async def test_delete_user(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) - await temporary_postgres_db_provider.relational.delete_user(user.id) - user = await temporary_postgres_db_provider.relational.get_user_by_id( - user.id - ) - assert user is None + await temporary_postgres_db_provider.delete_user_relational(user.id) + await temporary_postgres_db_provider.delete_user_vector(user.id) + try: + user = await temporary_postgres_db_provider.get_user_by_id(user.id) + raise ValueError("User should not exist") + except: + pass @pytest.mark.asyncio async def test_update_user(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) updated_user = UserResponse( @@ -68,18 +68,16 @@ async def test_update_user(temporary_postgres_db_provider): bio="Updated bio", collection_ids=[], ) - result = await temporary_postgres_db_provider.relational.update_user( - updated_user - ) + result = await temporary_postgres_db_provider.update_user(updated_user) assert result.email == updated_user.email @pytest.mark.asyncio async def test_update_user_password(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) - await temporary_postgres_db_provider.relational.update_user_password( + await temporary_postgres_db_provider.update_user_password( user.id, "new_password" ) # Additional assertions can be added based on the expected behavior @@ -87,13 +85,13 @@ async def test_update_user_password(temporary_postgres_db_provider): @pytest.mark.asyncio async def test_get_all_users(temporary_postgres_db_provider): - await temporary_postgres_db_provider.relational.create_user( + await temporary_postgres_db_provider.create_user( "test1@example.com", "password" ) - await temporary_postgres_db_provider.relational.create_user( + await temporary_postgres_db_provider.create_user( "test2@example.com", "password" ) - users = await temporary_postgres_db_provider.relational.get_all_users() + users = await temporary_postgres_db_provider.get_all_users() assert len(users) >= 2 assert any(user.email == "test1@example.com" for user in users) assert any(user.email == "test2@example.com" for user in users) @@ -103,34 +101,30 @@ async def test_get_all_users(temporary_postgres_db_provider): async def test_store_and_verify_verification_code( temporary_postgres_db_provider, ): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) verification_code = "123456" expiry = datetime.utcnow() + timedelta(hours=1) - await temporary_postgres_db_provider.relational.store_verification_code( + await temporary_postgres_db_provider.store_verification_code( user.id, verification_code, expiry ) - await temporary_postgres_db_provider.relational.verify_user( - verification_code - ) - updated_user = ( - await temporary_postgres_db_provider.relational.get_user_by_id(user.id) - ) + await temporary_postgres_db_provider.verify_user(verification_code) + updated_user = await temporary_postgres_db_provider.get_user_by_id(user.id) assert updated_user.is_verified @pytest.mark.asyncio async def test_store_and_get_reset_token(temporary_postgres_db_provider): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) reset_token = "reset_token" expiry = datetime.utcnow() + timedelta(hours=1) - await temporary_postgres_db_provider.relational.store_reset_token( + await temporary_postgres_db_provider.store_reset_token( user.id, reset_token, expiry ) - user_id = await temporary_postgres_db_provider.relational.get_user_id_by_reset_token( + user_id = await temporary_postgres_db_provider.get_user_id_by_reset_token( reset_token ) assert user_id == user.id @@ -140,21 +134,17 @@ async def test_store_and_get_reset_token(temporary_postgres_db_provider): async def test_add_and_remove_user_from_collection( temporary_postgres_db_provider, ): - user = await temporary_postgres_db_provider.relational.create_user( + user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) collection_id = UUID("00000000-0000-0000-0000-000000000001") - await temporary_postgres_db_provider.relational.add_user_to_collection( + await temporary_postgres_db_provider.add_user_to_collection( user.id, collection_id ) - updated_user = ( - await temporary_postgres_db_provider.relational.get_user_by_id(user.id) - ) + updated_user = await temporary_postgres_db_provider.get_user_by_id(user.id) assert collection_id in updated_user.collection_ids - await temporary_postgres_db_provider.relational.remove_user_from_collection( + await temporary_postgres_db_provider.remove_user_from_collection( user.id, collection_id ) - updated_user = ( - await temporary_postgres_db_provider.relational.get_user_by_id(user.id) - ) + updated_user = await temporary_postgres_db_provider.get_user_by_id(user.id) assert collection_id not in updated_user.collection_ids diff --git a/py/tests/core/providers/database/test_vector_db_provider.py b/py/tests/core/providers/database/test_vector_db_provider.py index 0fe76d383..ff7a70485 100644 --- a/py/tests/core/providers/database/test_vector_db_provider.py +++ b/py/tests/core/providers/database/test_vector_db_provider.py @@ -7,14 +7,14 @@ @pytest.mark.asyncio async def test_vector_db_initialization(postgres_db_provider): assert isinstance(postgres_db_provider, PostgresDBProvider) - assert postgres_db_provider.vector is not None + # assert postgres_db_provider is not None @pytest.mark.asyncio async def test_search_equality_filter(postgres_db_provider, sample_entries): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=10, filters={"key": {"$eq": "value_id_0"}} ), @@ -25,9 +25,9 @@ async def test_search_equality_filter(postgres_db_provider, sample_entries): @pytest.mark.asyncio async def test_search_not_equal_filter(postgres_db_provider, sample_entries): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=100, filters={"key": {"$ne": "value_id_0"}} ), @@ -40,9 +40,9 @@ async def test_search_not_equal_filter(postgres_db_provider, sample_entries): async def test_search_greater_than_filter( postgres_db_provider, sample_entries ): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=100, filters={"raw_key": {"$gt": 50}} ), @@ -55,9 +55,9 @@ async def test_search_greater_than_filter( async def test_search_less_than_or_equal_filter( postgres_db_provider, sample_entries ): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=10, filters={"raw_key": {"$lte": 20}}, @@ -66,8 +66,8 @@ async def test_search_less_than_or_equal_filter( ) # TODO - Why is this number not always 10? assert len(results) == 10 - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=100, filters={"raw_key": {"$lte": 20}} ), @@ -78,9 +78,9 @@ async def test_search_less_than_or_equal_filter( @pytest.mark.asyncio async def test_search_in_filter(postgres_db_provider, sample_entries): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=10, filters={"key": {"$in": ["value_id_0", "value_id_1"]}}, @@ -94,9 +94,9 @@ async def test_search_in_filter(postgres_db_provider, sample_entries): @pytest.mark.asyncio async def test_search_complex_and_filter(postgres_db_provider, sample_entries): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=10, filters={ @@ -114,9 +114,9 @@ async def test_search_complex_and_filter(postgres_db_provider, sample_entries): @pytest.mark.asyncio async def test_search_complex_or_filter(postgres_db_provider, sample_entries): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=11, ef_search=100, # TODO - Better understand why we need to set this to search the entire database. @@ -137,9 +137,9 @@ async def test_search_complex_or_filter(postgres_db_provider, sample_entries): async def test_search_nested_and_or_filters( postgres_db_provider, sample_entries ): - query_vector = sample_entries[0].vector - results = postgres_db_provider.vector.semantic_search( - query_vector.data, + query_vector = sample_entries[0] + results = await postgres_db_provider.semantic_search( + query_vector.vector.data, VectorSearchSettings( search_limit=10, ef_search=100, # TODO - Better understand why we need to set this to search the entire database. @@ -163,11 +163,11 @@ async def test_search_nested_and_or_filters( @pytest.mark.asyncio async def test_delete_equality(temporary_postgres_db_provider, sample_entries): - deleted_ids = temporary_postgres_db_provider.vector.delete( + deleted_ids = await temporary_postgres_db_provider.delete( {"key": {"$eq": "value_id_0"}} ) assert len(deleted_ids) == 1 - remaining = temporary_postgres_db_provider.vector.semantic_search( + remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, VectorSearchSettings(search_limit=100), ) @@ -179,11 +179,11 @@ async def test_delete_equality(temporary_postgres_db_provider, sample_entries): async def test_delete_greater_than( temporary_postgres_db_provider, sample_entries ): - deleted_ids = temporary_postgres_db_provider.vector.delete( + deleted_ids = await temporary_postgres_db_provider.delete( {"raw_key": {"$gt": 90}} ) assert len(deleted_ids) == 9 - remaining = temporary_postgres_db_provider.vector.semantic_search( + remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, VectorSearchSettings(search_limit=100), ) @@ -193,11 +193,11 @@ async def test_delete_greater_than( @pytest.mark.asyncio async def test_delete_in(temporary_postgres_db_provider, sample_entries): - deleted_ids = temporary_postgres_db_provider.vector.delete( + deleted_ids = await temporary_postgres_db_provider.delete( {"key": {"$in": ["value_id_0", "value_id_1"]}} ) assert len(deleted_ids) == 2 - remaining = temporary_postgres_db_provider.vector.semantic_search( + remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, VectorSearchSettings(search_limit=100), ) @@ -212,7 +212,7 @@ async def test_delete_in(temporary_postgres_db_provider, sample_entries): async def test_delete_complex_and( temporary_postgres_db_provider, sample_entries ): - deleted_ids = temporary_postgres_db_provider.vector.delete( + deleted_ids = await temporary_postgres_db_provider.delete( { "$and": [ {"key": {"$eq": "value_id_0"}}, @@ -221,7 +221,7 @@ async def test_delete_complex_and( } ) assert len(deleted_ids) == 1 - remaining = temporary_postgres_db_provider.vector.semantic_search( + remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, VectorSearchSettings(search_limit=100), ) @@ -236,7 +236,7 @@ async def test_delete_complex_and( async def test_delete_complex_or( temporary_postgres_db_provider, sample_entries ): - deleted_ids = temporary_postgres_db_provider.vector.delete( + deleted_ids = await temporary_postgres_db_provider.delete( { "$or": [ {"key": {"$eq": "value_id_0"}}, @@ -245,7 +245,7 @@ async def test_delete_complex_or( } ) assert len(deleted_ids) == 11 - remaining = temporary_postgres_db_provider.vector.semantic_search( + remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, VectorSearchSettings(search_limit=100), ) @@ -260,7 +260,7 @@ async def test_delete_complex_or( async def test_delete_nested_and_or( temporary_postgres_db_provider, sample_entries ): - deleted_ids = temporary_postgres_db_provider.vector.delete( + deleted_ids = await temporary_postgres_db_provider.delete( { "$and": [ {"key": {"$eq": "value_id_0"}}, @@ -274,7 +274,7 @@ async def test_delete_nested_and_or( } ) assert len(deleted_ids) == 1 - remaining = temporary_postgres_db_provider.vector.semantic_search( + remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, VectorSearchSettings(search_limit=100), ) diff --git a/py/tests/core/providers/database/test_vector_index_logic.py b/py/tests/core/providers/database/test_vector_index_logic.py new file mode 100644 index 000000000..5c2f5827f --- /dev/null +++ b/py/tests/core/providers/database/test_vector_index_logic.py @@ -0,0 +1,139 @@ +from uuid import uuid4 + +import pytest + +from shared.abstractions.vector import ( + IndexArgsHNSW, + IndexArgsIVFFlat, + IndexMeasure, + IndexMethod, + VectorTableName, +) + + +@pytest.mark.asyncio +async def test_index_lifecycle(postgres_db_provider): + """Test the full lifecycle of index operations""" + + # Create an index + index_name = f"test_index_{uuid4().hex[:8]}" + await postgres_db_provider.create_index( + table_name=VectorTableName.VECTORS, + index_measure=IndexMeasure.cosine_distance, + index_method=IndexMethod.hnsw, + index_name=index_name, + concurrently=False, # Changed to avoid isolation level issues + ) + + # List indices and verify our index exists + indices = await postgres_db_provider.list_indices(VectorTableName.VECTORS) + print("indices = ", indices) + assert indices, "No indices returned" + assert any(index["name"] == index_name for index in indices) + + # # Select the index for use + # await postgres_db_provider.select_index( + # index_name, VectorTableName.VECTORS + # ) + + # Delete the index + await postgres_db_provider.delete_index( + index_name, + table_name=VectorTableName.VECTORS, + concurrently=False, # Consistent with creation + ) + + # Verify index was deleted + indices_after = await postgres_db_provider.list_indices( + VectorTableName.VECTORS + ) + assert not any(index["name"] == index_name for index in indices_after) + + +@pytest.mark.asyncio +async def test_multiple_index_types(postgres_db_provider): + """Test creating and managing multiple types of indices""" + + # Create HNSW index + hnsw_name = f"hnsw_index_{uuid4().hex[:8]}" + await postgres_db_provider.create_index( + table_name=VectorTableName.VECTORS, + index_measure=IndexMeasure.cosine_distance, + index_method=IndexMethod.hnsw, + index_name=hnsw_name, + index_arguments=IndexArgsHNSW(m=16, ef_construction=64), + concurrently=False, # Changed to avoid isolation level issues + ) + + # Create IVF-Flat index + ivf_name = f"ivf_index_{uuid4().hex[:8]}" + await postgres_db_provider.create_index( + table_name=VectorTableName.VECTORS, + index_measure=IndexMeasure.cosine_distance, + index_method=IndexMethod.ivfflat, + index_name=ivf_name, + index_arguments=IndexArgsIVFFlat(n_lists=100), + concurrently=False, # Changed to avoid isolation level issues + ) + + # List indices and verify both exist + indices = await postgres_db_provider.list_indices(VectorTableName.VECTORS) + assert any(index["name"] == hnsw_name for index in indices) + assert any(index["name"] == ivf_name for index in indices) + + # Clean up + await postgres_db_provider.delete_index( + hnsw_name, table_name=VectorTableName.VECTORS, concurrently=False + ) + await postgres_db_provider.delete_index( + ivf_name, table_name=VectorTableName.VECTORS, concurrently=False + ) + + +@pytest.mark.asyncio +async def test_index_operations_invalid_inputs(postgres_db_provider): + """Test error handling for invalid index operations""" + + # Try to list indices for invalid table + with pytest.raises(Exception): + await postgres_db_provider.list_indices("invalid_table") + + # Try to delete non-existent index + with pytest.raises(Exception): + await postgres_db_provider.delete_index( + "nonexistent_index", VectorTableName.VECTORS + ) + + # Try to select non-existent index + # with pytest.raises(Exception): + # await postgres_db_provider.select_index( + # "nonexistent_index", VectorTableName.VECTORS + # ) + + +@pytest.mark.asyncio +async def test_index_persistence( + postgres_db_provider, temporary_postgres_db_provider +): + """Test that indices persist and are usable between connections""" + + # Create index using first connection + index_name = f"persist_test_{uuid4().hex[:8]}" + await postgres_db_provider.create_index( + table_name=VectorTableName.VECTORS, + index_measure=IndexMeasure.cosine_distance, + index_method=IndexMethod.hnsw, + index_name=index_name, + concurrently=False, # Changed to avoid isolation level issues + ) + + # Verify index exists using second connection + indices = await temporary_postgres_db_provider.list_indices( + VectorTableName.VECTORS + ) + assert any(index["name"] == index_name for index in indices) + + # Clean up + await postgres_db_provider.delete_index( + index_name, table_name=VectorTableName.VECTORS, concurrently=False + ) diff --git a/py/tests/core/providers/ingestion/test_contextual_embedding.py b/py/tests/core/providers/ingestion/test_contextual_embedding.py new file mode 100644 index 000000000..713679654 --- /dev/null +++ b/py/tests/core/providers/ingestion/test_contextual_embedding.py @@ -0,0 +1,230 @@ +import pytest +from uuid import UUID +from datetime import datetime +from shared.api.models.auth.responses import UserResponse +from core.base import RawChunk, DocumentType, IngestionStatus, VectorEntry +from shared.abstractions.ingestion import ( + ChunkEnrichmentStrategy, + ChunkEnrichmentSettings, +) +import subprocess +from core.main.services.ingestion_service import ( + IngestionService, + IngestionConfig, +) +from core.main.abstractions import R2RProviders +from core.providers.orchestration import SimpleOrchestrationProvider +from core.providers.ingestion import R2RIngestionConfig, R2RIngestionProvider + +from core.base import Vector, VectorType +import random + + +@pytest.fixture +def sample_document_id(): + return UUID("12345678-1234-5678-1234-567812345678") + + +@pytest.fixture +def sample_user(): + return UserResponse( + id=UUID("87654321-8765-4321-8765-432187654321"), + email="test@example.com", + is_superuser=True, + ) + + +@pytest.fixture +def collection_ids(): + return [UUID("12345678-1234-5678-1234-567812345678")] + + +@pytest.fixture +def extraction_ids(): + return [ + UUID("fce959df-46a2-4983-aa8b-dd1f93777e02"), + UUID("9a85269c-84cd-4dff-bf21-7bd09974f668"), + UUID("4b1199b2-2b96-4198-9ded-954c900a23dd"), + ] + + +@pytest.fixture +def sample_chunks( + sample_document_id, sample_user, collection_ids, extraction_ids +): + return [ + VectorEntry( + extraction_id=extraction_ids[0], + document_id=sample_document_id, + user_id=sample_user.id, + collection_ids=collection_ids, + vector=Vector( + data=[random.random() for _ in range(128)], + type=VectorType.FIXED, + length=128, + ), + text="This is the first chunk of text.", + metadata={"chunk_order": 0}, + ), + VectorEntry( + extraction_id=extraction_ids[1], + document_id=sample_document_id, + user_id=sample_user.id, + collection_ids=collection_ids, + vector=Vector( + data=[random.random() for _ in range(128)], + type=VectorType.FIXED, + length=128, + ), + text="This is the second chunk with different content.", + metadata={"chunk_order": 1}, + ), + VectorEntry( + extraction_id=extraction_ids[2], + document_id=sample_document_id, + user_id=sample_user.id, + collection_ids=collection_ids, + vector=Vector( + data=[random.random() for _ in range(128)], + type=VectorType.FIXED, + length=128, + ), + text="And this is the third chunk with more information.", + metadata={"chunk_order": 2}, + ), + ] + + +@pytest.fixture +def enrichment_settings(): + return ChunkEnrichmentSettings( + enable_chunk_enrichment=True, + strategies=[ + ChunkEnrichmentStrategy.NEIGHBORHOOD, + ChunkEnrichmentStrategy.SEMANTIC, + ], + backward_chunks=1, + forward_chunks=1, + semantic_neighbors=2, + semantic_similarity_threshold=0.7, + ) + + +@pytest.fixture +def r2r_ingestion_provider(app_config): + return R2RIngestionProvider(R2RIngestionConfig(app=app_config)) + + +@pytest.fixture +def orchestration_provider(orchestration_config): + return SimpleOrchestrationProvider(orchestration_config) + + +@pytest.fixture +def r2r_providers( + r2r_ingestion_provider, + r2r_prompt_provider, + postgres_kg_provider, + postgres_db_provider, + litellm_provider_128, + postgres_file_provider, + r2r_auth_provider, + litellm_completion_provider, + orchestration_provider, +): + return R2RProviders( + ingestion=r2r_ingestion_provider, + prompt=r2r_prompt_provider, + kg=postgres_kg_provider, + database=postgres_db_provider, + embedding=litellm_provider_128, + file=postgres_file_provider, + auth=r2r_auth_provider, + llm=litellm_completion_provider, + orchestration=orchestration_provider, + ) + + +@pytest.fixture +def ingestion_config(app_config, enrichment_settings): + return IngestionConfig( + app=app_config, chunk_enrichment_settings=enrichment_settings + ) + + +@pytest.fixture +async def ingestion_service(r2r_providers, ingestion_config): + # You'll need to mock your dependencies here + service = IngestionService( + providers=r2r_providers, + config=ingestion_config, + pipes=[], + pipelines=[], + agents=[], + run_manager=None, + logging_connection=None, + ) + return service + + +async def test_chunk_enrichment_basic( + sample_chunks, ingestion_service, sample_document_id, sample_user +): + # Test basic chunk enrichment functionality + + # ingest chunks ingress. Just add document info to the table + await ingestion_service.ingest_chunks_ingress( + document_id=sample_document_id, + chunks=sample_chunks, + metadata={}, + user=sample_user, + ) + + # upsert entries + await ingestion_service.providers.database.upsert_entries(sample_chunks) + + # enrich chunks + await ingestion_service.chunk_enrichment(sample_document_id) + + # document chunks + document_chunks = ( + await ingestion_service.providers.database.get_document_chunks( + sample_document_id + ) + ) + + assert len(document_chunks["results"]) == len(sample_chunks) + + for document_chunk in document_chunks["results"]: + assert ( + document_chunk["metadata"]["chunk_enrichment_status"] == "success" + ) + assert ( + document_chunk["metadata"]["original_text"] + == sample_chunks[document_chunk["metadata"]["chunk_order"]].text + ) + + +# Other tests +# TODO: Implement in services/test_ingestion_service.py + +# test_enriched_chunk_content: +# Ingests chunks, enriches them, then verifies each chunk in DB has metadata containing both 'original_text' and 'chunk_enrichment_status' (success/failed) + +# test_neighborhood_strategy: +# Tests _get_enriched_chunk_text() on middle chunk (idx 1) with NEIGHBORHOOD strategy to verify it incorporates text from chunks before/after it + +# test_semantic_strategy: +# Sets ChunkEnrichmentStrategy.SEMANTIC, ingests chunks, then enriches them using semantic similarity to find and incorporate related chunks' content + +# test_error_handling: +# Attempts chunk_enrichment() with non-existent UUID('00000000-0000-0000-0000-000000000000') to verify proper exception handling + +# test_empty_chunks: +# Attempts to ingest_chunks_ingress() with empty chunks list to verify it raises appropriate exception rather than processing empty data + +# test_concurrent_processing: +# Creates 200 RawChunks ("Chunk number {0-199}"), ingests and enriches them all to verify concurrent processing handles large batch correctly + +# test_vector_storage: +# Ingests chunks, enriches them, then verifies get_document_vectors() returns vectors with correct structure including vector data and extraction_id fields diff --git a/py/tests/core/providers/kg/test_kg_logic.py b/py/tests/core/providers/kg/test_kg_logic.py index d875a0a78..20d9dd729 100644 --- a/py/tests/core/providers/kg/test_kg_logic.py +++ b/py/tests/core/providers/kg/test_kg_logic.py @@ -214,10 +214,10 @@ async def test_add_entities_raw( postgres_kg_provider, entities_raw_list, collection_id ): await postgres_kg_provider.add_entities( - entities_raw_list, table_name="entity_raw" + entities_raw_list, table_name="chunk_entity" ) entities = await postgres_kg_provider.get_entities( - collection_id, entity_table_name="entity_raw" + collection_id, entity_table_name="chunk_entity" ) assert entities["entities"][0].name == "Entity1" assert entities["entities"][1].name == "Entity2" @@ -230,10 +230,10 @@ async def test_add_entities( postgres_kg_provider, entities_list, collection_id ): await postgres_kg_provider.add_entities( - entities_list, table_name="entity_embedding" + entities_list, table_name="document_entity" ) entities = await postgres_kg_provider.get_entities( - collection_id, entity_table_name="entity_embedding" + collection_id, entity_table_name="document_entity" ) assert entities["entities"][0].name == "Entity1" assert entities["entities"][1].name == "Entity2" @@ -246,7 +246,7 @@ async def test_add_triples( postgres_kg_provider, triples_raw_list, collection_id ): await postgres_kg_provider.add_triples( - triples_raw_list, table_name="triple_raw" + triples_raw_list, table_name="chunk_triple" ) triples = await postgres_kg_provider.get_triples(collection_id) assert triples["triples"][0].subject == "Entity1" @@ -260,13 +260,13 @@ async def test_add_kg_extractions( postgres_kg_provider, kg_extractions, collection_id ): added_extractions = await postgres_kg_provider.add_kg_extractions( - kg_extractions, table_suffix="_raw" + kg_extractions, table_prefix="chunk_" ) assert added_extractions == (2, 2) entities = await postgres_kg_provider.get_entities( - collection_id, entity_table_name="entity_raw" + collection_id, entity_table_name="chunk_entity" ) assert entities["entities"][0].name == "Entity1" assert entities["entities"][1].name == "Entity2" @@ -285,7 +285,7 @@ async def test_get_entity_map( postgres_kg_provider, entities_raw_list, triples_raw_list, document_id ): await postgres_kg_provider.add_entities( - entities_raw_list, table_name="entity_raw" + entities_raw_list, table_name="chunk_entity" ) entity_map = await postgres_kg_provider.get_entity_map(0, 2, document_id) assert entity_map["Entity1"]["entities"][0].name == "Entity1" @@ -304,7 +304,7 @@ async def test_get_entity_map( async def test_upsert_embeddings( postgres_kg_provider, collection_id, entities_list ): - table_name = "entity_embedding" + table_name = "document_entity" entities_list_to_upsert = [ ( @@ -371,10 +371,10 @@ async def test_perform_graph_clustering( # addd entities and triples await postgres_kg_provider.add_entities( - entities_list, table_name="entity_embedding" + entities_list, table_name="document_entity" ) await postgres_kg_provider.add_triples( - triples_raw_list, table_name="triple_raw" + triples_raw_list, table_name="chunk_triple" ) num_communities = await postgres_kg_provider.perform_graph_clustering( @@ -394,19 +394,21 @@ async def test_get_community_details( ): await postgres_kg_provider.add_entities( - entities_list, table_name="entity_embedding" + entities_list, table_name="document_entity" ) await postgres_kg_provider.add_triples( - triples_raw_list, table_name="triple_raw" + triples_raw_list, table_name="chunk_triple" ) await postgres_kg_provider.add_communities(community_table_info) await postgres_kg_provider.add_community_report(community_report_list[0]) community_level, entities, triples = ( - await postgres_kg_provider.get_community_details(community_number=1) + await postgres_kg_provider.get_community_details( + community_number=1, collection_id=collection_id + ) ) assert community_level == 0 # TODO: change these to objects - assert entities[0]["name"] == "Entity1" - assert triples[0]["subject"] == "Entity1" + assert entities[0].name == "Entity1" + assert triples[0].subject == "Entity1" diff --git a/py/tests/core/providers/logging/test_chat_logging_provider.py b/py/tests/core/providers/logging/test_chat_logging_provider.py index fefe546bb..abffbc341 100644 --- a/py/tests/core/providers/logging/test_chat_logging_provider.py +++ b/py/tests/core/providers/logging/test_chat_logging_provider.py @@ -35,8 +35,8 @@ async def test_get_conversation(local_logging_provider): ) assert len(retrieved_messages) == len(messages) for original, retrieved in zip(messages, retrieved_messages): - assert original.role == retrieved.role - assert original.content == retrieved.content + assert original.role == retrieved[1].role + assert original.content == retrieved[1].content @pytest.mark.asyncio @@ -57,21 +57,21 @@ async def test_edit_message(local_logging_provider): conversation_id, new_branch_id ) assert len(retrieved_messages) == 1 - assert retrieved_messages[0].content == "Hello, edited" + assert retrieved_messages[0][1].content == "Hello, edited" -@pytest.mark.asyncio -async def test_branches_overview(local_logging_provider): - conversation_id = await local_logging_provider.create_conversation() - message_id = await local_logging_provider.add_message( - conversation_id, Message(role="user", content="Hello") - ) - await local_logging_provider.edit_message(message_id, "Hello, edited") +# @pytest.mark.asyncio +# async def test_branches_overview(local_logging_provider): +# conversation_id = await local_logging_provider.create_conversation() +# message_id = await local_logging_provider.add_message( +# conversation_id, Message(role="user", content="Hello") +# ) +# await local_logging_provider.edit_message(message_id, "Hello, edited") - branches = await local_logging_provider.branches_overview(conversation_id) - assert len(branches) == 2 - assert branches[0]["branch_point_id"] is None - assert branches[1]["branch_point_id"] == message_id +# branches = await local_logging_provider.branches_overview(conversation_id) +# assert len(branches) == 2 +# assert branches[0]["branch_point_id"] is None +# assert branches[1]["branch_point_id"] == message_id @pytest.mark.asyncio @@ -114,7 +114,7 @@ async def test_branch_at_message(local_logging_provider): conversation_id, branch_id ) assert len(retrieved_messages) == 1 - assert retrieved_messages[0].content == "Hello" + assert retrieved_messages[0][1].content == "Hello" @pytest.mark.asyncio @@ -155,10 +155,10 @@ async def test_edit_message_in_middle(local_logging_provider): print("retrieved_messages = ", retrieved_messages) # Verify that messages after the edited message are not present assert len(retrieved_messages) == 2 - assert retrieved_messages[0].content == "Hello" - assert retrieved_messages[0].role == "user" - assert retrieved_messages[1].content == "Greetings!" - assert retrieved_messages[1].role == "assistant" + assert retrieved_messages[0][1].content == "Hello" + assert retrieved_messages[0][1].role == "user" + assert retrieved_messages[1][1].content == "Greetings!" + assert retrieved_messages[1][1].role == "assistant" @pytest.mark.asyncio @@ -203,14 +203,15 @@ async def test_multiple_branches_from_same_message(local_logging_provider): # Verify first branch messages assert len(retrieved_messages_1) == 2 - assert retrieved_messages_1[0].content == "Tell me a joke." - assert retrieved_messages_1[1].content == "Knock, knock!" + print("retrieved_messages_1[0] = ", retrieved_messages_1[0]) + assert retrieved_messages_1[0][1].content == "Tell me a joke." + assert retrieved_messages_1[1][1].content == "Knock, knock!" # Verify second branch messages assert len(retrieved_messages_2) == 2 - assert retrieved_messages_2[0].content == "Tell me a joke." + assert retrieved_messages_2[0][1].content == "Tell me a joke." assert ( - retrieved_messages_2[1].content + retrieved_messages_2[1][1].content == "What do you call a bear with no teeth? A gummy bear!" ) diff --git a/py/tests/core/services/test_ingestion_service.py b/py/tests/core/services/test_ingestion_service.py new file mode 100644 index 000000000..b6be7e360 --- /dev/null +++ b/py/tests/core/services/test_ingestion_service.py @@ -0,0 +1,184 @@ +from uuid import UUID + +import pytest + +from core.base import RawChunk +from core.main.services.ingestion_service import IngestionService + + +@pytest.fixture +def sample_document_id(): + return UUID("12345678-1234-5678-1234-567812345678") + + +@pytest.fixture +def sample_chunks(): + return [ + RawChunk( + text="This is the first chunk of text.", + metadata={"chunk_order": 1}, + ), + RawChunk( + text="This is the second chunk with different content.", + metadata={"chunk_order": 2}, + ), + RawChunk( + text="And this is the third chunk with more information.", + metadata={"chunk_order": 3}, + ), + ] + + +async def test_ingest_chunks_ingress_success( + ingestion_service, sample_document_id, sample_chunks +): + """Test successful ingestion of chunks""" + result = await ingestion_service.ingest_chunks_ingress( + document_id=sample_document_id, + chunks=sample_chunks, + metadata={"title": "Test Document"}, + user_id="test_user", + ) + + assert result is not None + # Add assertions based on your expected return type + + +async def test_ingest_chunks_ingress_empty_chunks( + ingestion_service, sample_document_id +): + """Test handling of empty chunks list""" + with pytest.raises(ValueError): + await ingestion_service.ingest_chunks_ingress( + document_id=sample_document_id, + chunks=[], + metadata={}, + user_id="test_user", + ) + + +async def test_ingest_chunks_ingress_invalid_metadata( + ingestion_service, sample_document_id, sample_chunks +): + """Test handling of invalid metadata""" + with pytest.raises(TypeError): + await ingestion_service.ingest_chunks_ingress( + document_id=sample_document_id, + chunks=sample_chunks, + metadata=None, # Invalid metadata + user_id="test_user", + ) + + +async def test_ingest_chunks_ingress_large_document( + ingestion_service, sample_document_id +): + """Test ingestion of a large number of chunks""" + large_chunks = [ + RawChunk(text=f"Chunk number {i}", metadata={"chunk_order": i}) + for i in range(1000) + ] + + result = await ingestion_service.ingest_chunks_ingress( + document_id=sample_document_id, + chunks=large_chunks, + metadata={"title": "Large Document"}, + user_id="test_user", + ) + + assert result is not None + # Add assertions for large document handling + + +async def test_ingest_chunks_ingress_duplicate_chunk_orders( + ingestion_service, sample_document_id +): + """Test handling of chunks with duplicate chunk orders""" + duplicate_chunks = [ + RawChunk(text="First chunk", metadata={"chunk_order": 1}), + RawChunk( + text="Second chunk", + metadata={"chunk_order": 1}, # Duplicate chunk_order + ), + ] + + with pytest.raises(ValueError): + await ingestion_service.ingest_chunks_ingress( + document_id=sample_document_id, + chunks=duplicate_chunks, + metadata={}, + user_id="test_user", + ) + + +async def test_ingest_chunks_ingress_invalid_user( + ingestion_service, sample_document_id, sample_chunks +): + """Test handling of invalid user ID""" + with pytest.raises(ValueError): + await ingestion_service.ingest_chunks_ingress( + document_id=sample_document_id, + chunks=sample_chunks, + metadata={}, + user_id="", # Invalid user ID + ) + + +async def test_ingest_chunks_ingress_metadata_validation( + ingestion_service, sample_document_id, sample_chunks +): + """Test metadata validation""" + test_cases = [ + ({"title": "Valid title"}, True), + ({"title": ""}, False), + ({"invalid_key": "value"}, False), + ( + {}, + True, + ), # Empty metadata might be valid depending on your requirements + ] + + for metadata, should_succeed in test_cases: + if should_succeed: + result = await ingestion_service.ingest_chunks_ingress( + document_id=sample_document_id, + chunks=sample_chunks, + metadata=metadata, + user_id="test_user", + ) + assert result is not None + else: + with pytest.raises((ValueError, TypeError)): + await ingestion_service.ingest_chunks_ingress( + document_id=sample_document_id, + chunks=sample_chunks, + metadata=metadata, + user_id="test_user", + ) + + +async def test_ingest_chunks_ingress_concurrent_requests( + ingestion_service, sample_chunks +): + """Test handling of concurrent ingestion requests""" + import asyncio + + document_ids = [ + UUID("12345678-1234-5678-1234-56781234567" + str(i)) for i in range(5) + ] + + async def ingest_document(doc_id): + return await ingestion_service.ingest_chunks_ingress( + document_id=doc_id, + chunks=sample_chunks, + metadata={"title": f"Document {doc_id}"}, + user_id="test_user", + ) + + results = await asyncio.gather( + *[ingest_document(doc_id) for doc_id in document_ids] + ) + + assert len(results) == len(document_ids) + for result in results: + assert result is not None diff --git a/py/tests/integration/runner_cli.py b/py/tests/integration/runner_cli.py index 89e2fe2e3..3059c7001 100644 --- a/py/tests/integration/runner_cli.py +++ b/py/tests/integration/runner_cli.py @@ -36,7 +36,7 @@ def run_command(command): def test_ingest_sample_file_cli(): print("Testing: Ingest sample file CLI") run_command("poetry run r2r ingest-sample-file") - time.sleep(10) + time.sleep(30) print("Ingestion successful") print("~" * 100) @@ -47,7 +47,7 @@ def test_ingest_sample_file_2_cli(): """ print("Testing: Ingest sample file CLI 2") run_command("poetry run r2r ingest-sample-file --v2") - time.sleep(10) + time.sleep(30) print("Ingestion successful") print("~" * 100) @@ -250,17 +250,18 @@ def test_rag_response_stream_sample_file_cli(): def test_kg_create_graph_sample_file_cli(): print("Testing: KG create graph") - print("Calling `poetry run r2r create-graph --run` ") + print("Calling `poetry run r2r create-graph --run`") output = run_command("poetry run r2r create-graph --run") if "queued" in output: time.sleep(60) response = requests.get( - "http://localhost:7272/v2/entities", + "http://localhost:7272/v2/entities/", params={ "collection_id": "122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", "limit": 1000, + "entity_level": "document", }, ) @@ -274,6 +275,8 @@ def test_kg_create_graph_sample_file_cli(): print(entities_list) + documents_overview = run_command("poetry run r2r documents-overview") + print(documents_overview) assert len(entities_list) >= 1 assert "ARISTOTLE" in entities_list @@ -281,6 +284,37 @@ def test_kg_create_graph_sample_file_cli(): print("~" * 100) +def test_kg_deduplicate_entities_sample_file_cli(): + print("Testing: KG deduplicate entities") + output = run_command("poetry run r2r deduplicate-entities --run") + + print(output) + + if "queued" in output: + time.sleep(45) + + response = requests.get( + "http://localhost:7272/v2/entities", + params={ + "collection_id": "122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", + "entity_level": "collection", + }, + ) + + if response.status_code != 200: + print("KG deduplicate entities test failed: Communities not created") + sys.exit(1) + + entities = response.json()["results"]["entities"] + assert len(entities) >= 1 + + entities_list = [ele["name"] for ele in entities] + assert "ARISTOTLE" in entities_list + + print("KG deduplicate entities test passed") + print("~" * 100) + + def test_kg_enrich_graph_sample_file_cli(): print("Testing: KG enrich graph") output = run_command("poetry run r2r enrich-graph --run") @@ -355,6 +389,56 @@ def test_kg_search_sample_file_cli(): print("~" * 100) +def test_kg_delete_graph_sample_file_cli(): + print("Testing: KG delete graph") + output = run_command( + "poetry run r2r delete-graph-for-collection --collection-id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09" + ) + print(output) + + response = requests.get( + "http://localhost:7272/v2/communities", + params={"collection_id": "122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"}, + ) + + assert response.json()["results"]["communities"] == [] + + response = requests.get( + "http://localhost:7272/v2/entities", + params={"collection_id": "122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"}, + ) + + assert response.json()["results"]["entities"] != [] + + print("KG delete graph test passed") + print("~" * 100) + + +def test_kg_delete_graph_with_cascading_sample_file_cli(): + print("Testing: KG delete graph with cascading") + output = run_command( + "poetry run r2r delete-graph-for-collection --collection-id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09 --cascade" + ) + print(output) + + response = requests.get( + "http://localhost:7272/v2/entities", + params={"collection_id": "122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"}, + ) + + assert response.json()["results"]["entities"] == [] + + response = requests.get( + "http://localhost:7272/v2/triples", + params={"collection_id": "122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"}, + ) + + assert response.json()["results"]["triples"] == [] + + print("KG delete graph with cascading test passed") + print("~" * 100) + + if __name__ == "__main__": if len(sys.argv) < 2: print("Please specify a test function to run") diff --git a/py/tests/integration/runner_sdk.py b/py/tests/integration/runner_sdk.py index 80b38afb6..dc50a2ba7 100644 --- a/py/tests/integration/runner_sdk.py +++ b/py/tests/integration/runner_sdk.py @@ -2,7 +2,7 @@ import sys import time -from r2r import Message, R2RClient, R2RException +from r2r import R2RClient, R2RException def compare_result_fields(result, expected_fields): @@ -36,12 +36,13 @@ def test_ingest_sample_file_sdk(): def test_ingest_sample_file_2_sdk(): print("Testing: Ingest sample file SDK 2") - file_paths = ["core/examples/data/aristotle_v2.txt"] + file_paths = [f"core/examples/data_dedup/a{i}.txt" for i in range(1, 11)] ingest_response = client.ingest_files(file_paths=file_paths) if not ingest_response["results"]: print("Ingestion test failed") sys.exit(1) + time.sleep(60) print("Ingestion successful") print("~" * 100) @@ -67,7 +68,7 @@ def test_ingest_sample_file_with_config_sdk(): ingest_response = client.ingest_files( file_paths=file_paths, ingestion_config={"chunk_size": 4_096} ) - time.sleep(10) + time.sleep(30) if not ingest_response["results"]: print("Ingestion test failed") @@ -102,7 +103,6 @@ def test_reingest_sample_file_sdk(): file_paths = ["core/examples/data/uber_2021.pdf"] try: results = client.ingest_files(file_paths=file_paths) - print("results = ", results) time.sleep(30) if "task_id" not in results["results"][0]: @@ -686,11 +686,15 @@ def test_kg_create_graph_sample_file_sdk(): collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", run_type="run" ) + print(create_graph_result) + if "queued" in create_graph_result["results"]["message"]: time.sleep(60) result = client.get_entities( - collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", limit=1000 + collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", + limit=1000, + entity_level="document", ) entities_list = [ele["name"] for ele in result["results"]["entities"]] @@ -704,6 +708,35 @@ def test_kg_create_graph_sample_file_sdk(): print("~" * 100) +def test_kg_deduplicate_entities_sample_file_sdk(): + print("Testing: KG deduplicate entities") + + entities_deduplication_result = client.deduplicate_entities( + collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", + run_type="run", + ) + + if "queued" in entities_deduplication_result["results"]["message"]: + time.sleep(45) + + response = client.get_entities( + collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", + limit=1000, + entity_level="collection", + ) + + entities_list = [ele["name"] for ele in response["results"]["entities"]] + + assert len(entities_list) >= 1 + assert "ARISTOTLE" in entities_list + + # Check that there are no duplicates + assert sorted(entities_list) == sorted(list(set(entities_list))) + + print("KG deduplicate entities test passed") + print("~" * 100) + + def test_kg_enrich_graph_sample_file_sdk(): print("Testing: KG enrich graph") @@ -760,6 +793,58 @@ def test_kg_search_sample_file_sdk(): print("~" * 100) +def test_kg_delete_graph_sample_file_sdk(): + print("Testing: KG delete graph") + + response = client.get_communities( + collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09" + ) + + assert response["results"]["communities"] != [] + client.delete_graph_for_collection( + collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09" + ) + + response = client.get_communities( + collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09" + ) + + assert response["results"]["communities"] == [] + + response = client.get_entities( + collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", + entity_level="document", + ) + + assert response["results"]["entities"] != [] + + print("KG delete graph test passed") + print("~" * 100) + + +def test_kg_delete_graph_with_cascading_sample_file_sdk(): + print("Testing: KG delete graph with cascading") + + client.delete_graph_for_collection( + collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", cascade=True + ) + + response = client.get_entities( + collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09" + ) + + assert response["results"]["entities"] == [] + + response = client.get_triples( + collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09" + ) + + assert response["results"]["triples"] == [] + + print("KG delete graph with cascading test passed") + print("~" * 100) + + def test_user_creates_collection(): print("Testing: User creates a collection") @@ -923,7 +1008,7 @@ def test_user_collection_document_management(): # Ingest the "aristotle.txt" file ingest_result = client.ingest_files(["core/examples/data/aristotle.txt"]) - time.sleep(10) + time.sleep(15) document_id = ingest_result["results"][0]["document_id"] @@ -997,7 +1082,7 @@ def test_user_removes_document_from_collection(): # Ingest the "aristotle.txt" file ingest_result = client.ingest_files(["core/examples/data/aristotle.txt"]) - time.sleep(10) + time.sleep(30) document_id = ingest_result["results"][0]["document_id"] @@ -1049,7 +1134,7 @@ def test_user_lists_documents_in_collection(): # Ingest the "aristotle.txt" file ingest_result = client.ingest_files(["core/examples/data/aristotle.txt"]) - time.sleep(10) + time.sleep(30) document_id = ingest_result["results"][0]["document_id"] @@ -1411,7 +1496,7 @@ def test_user_gets_collections_for_document(): # Ingest a document ingest_result = client.ingest_files(["core/examples/data/aristotle.txt"]) - time.sleep(10) + time.sleep(30) document_id = ingest_result["results"][0]["document_id"] @@ -1480,7 +1565,7 @@ def test_collection_user_interactions(): # Ingest a document client.login("collection_owner@example.com", "password123") ingest_result = client.ingest_files(["core/examples/data/aristotle.txt"]) - time.sleep(10) + time.sleep(30) document_id = ingest_result["results"][0]["document_id"] @@ -1533,7 +1618,7 @@ def test_collection_document_interactions(): # Ingest a document ingest_result = client.ingest_files(["core/examples/data/aristotle.txt"]) - time.sleep(10) + time.sleep(30) document_id = ingest_result["results"][0]["document_id"] diff --git a/templates/README.md b/templates/README.md index 4f01d26db..1d89fef3b 100644 --- a/templates/README.md +++ b/templates/README.md @@ -2,19 +2,7 @@ ### R2R templates are in beta! We value your feedback and contributions to make them more widely accessible. -A collection of templates curated by the SciPhi team and our community to make it easy for you to get started with your RAG application. These templates are ready to deploy in the [SciPhi Cloud](https://app.sciphi.ai/) or locally. - -Run any template from the R2R CLI using the following commands: -```bash -# Install the CLI -pip install r2r - -# List all availible templates -r2r list-templates - -# Clone your template -r2r clone < your-template > -``` +A collection of templates curated by the SciPhi team and our community to make it easy for you to get started with your RAG application. [Search for templates and preview live deployments.](https://app.sciphi.ai/templates)