From 1088e57e11f6aaa509f7c7aa541c4d7d2953c7f9 Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Thu, 18 Jan 2024 10:21:27 +0100 Subject: [PATCH] Create lambda infrastructure (#3830) * Create test Lambda calling into QW to get its version * Switch to provided AL image * Revert Dockerfile changes * Add querying and indexing * Add querying and indexing * Rename querier to searcher to align with existing terminology * Fix failing CI tests * Use S3 store source instead of bundled * Refactor binaries to seprate bin dir * Fork the CLI local search and index methods * Create index if not found * Add flexible indexing and query inputs * Add instance of Quickwit lambda with mock data generation * Log end of indexing * Add benchmarck commands * wip: trying to setup tracing * Add root trace * Try fix trace flushing * Fix merge disabling * Cache and better tracing * Add pyaload to context span * Use API Gateway events in search * Log as json and extract logs from cloudwatch * Add histogram oneshot search example * Fix errors due to rebase * Improve example queries * Expose config to disable partial_request_cache_capacity * Improve benchmark script * Document the setup of an API Gateway * Add partial request cache to bench * Add packaging workflow * Address review regarding hdfs index config * Fix rebase errors * Fix CI errors * Add mypy linter * Improve release tag name * Try using package pip install in CI * Update lambda package version * Enable download using uploaded artifact * Add API Gateway construct and refactor cdk code * Add staging lifecycle rule * Fix SpawnPipeline field after rebase * Fix unused rust-toolchain.toml * Add tutorial * Final cleanup * Fix after rebase * Fix fmt in quickwit-lambda * Handle gzip file source. * Fix clippy. * Update github action. * Add telemetry. * Apply new versioning and skip hdfs decompression * Add comment in file source. * Add test on skip reader. * Take review comments into account. * Fix rebase. --------- Co-authored-by: fmassot --- .github/workflows/publish_lambda_packages.yml | 52 + distribution/lambda/.gitignore | 18 + distribution/lambda/Makefile | 134 +++ distribution/lambda/README.md | 120 ++ distribution/lambda/cdk/__init__.py | 0 distribution/lambda/cdk/app.py | 55 + distribution/lambda/cdk/cdk.json | 57 + distribution/lambda/cdk/cli.py | 357 ++++++ distribution/lambda/cdk/stacks/__init__.py | 0 .../lambda/cdk/stacks/examples/__init__.py | 0 .../lambda/cdk/stacks/examples/hdfs_stack.py | 73 ++ .../cdk/stacks/examples/mock_data_stack.py | 172 +++ .../lambda/cdk/stacks/services/__init__.py | 0 .../cdk/stacks/services/indexer_service.py | 46 + .../cdk/stacks/services/quickwit_service.py | 65 ++ .../cdk/stacks/services/searcher_service.py | 37 + distribution/lambda/poetry.lock | 1029 +++++++++++++++++ distribution/lambda/pyproject.toml | 27 + .../lambda/resources/data-generator.py | 30 + distribution/lambda/resources/hdfs-logs.yaml | 38 + distribution/lambda/resources/mock-sales.yaml | 38 + .../images/quickwit-lambda-tutorial.svg | 4 + .../tutorials/tutorial-aws-lambda.md | 170 +++ quickwit/Cargo.lock | 177 +++ quickwit/Cargo.toml | 8 + quickwit/quickwit-indexing/Cargo.toml | 1 + .../data/test_corpus.json.gz | Bin 0 -> 228 bytes .../src/source/file_source.rs | 223 +++- quickwit/quickwit-lambda/Cargo.toml | 59 + quickwit/quickwit-lambda/src/bin/indexer.rs | 31 + quickwit/quickwit-lambda/src/bin/searcher.rs | 29 + .../src/indexer/environment.rs | 39 + .../quickwit-lambda/src/indexer/handler.rs | 78 ++ .../quickwit-lambda/src/indexer/ingest.rs | 251 ++++ quickwit/quickwit-lambda/src/indexer/mod.rs | 25 + quickwit/quickwit-lambda/src/indexer/model.rs | 112 ++ quickwit/quickwit-lambda/src/lib.rs | 23 + quickwit/quickwit-lambda/src/logger.rs | 153 +++ .../src/searcher/environment.rs | 37 + .../quickwit-lambda/src/searcher/handler.rs | 84 ++ quickwit/quickwit-lambda/src/searcher/mod.rs | 24 + .../quickwit-lambda/src/searcher/search.rs | 127 ++ quickwit/quickwit-lambda/src/utils.rs | 67 ++ quickwit/quickwit-telemetry/src/payload.rs | 1 + 44 files changed, 4036 insertions(+), 35 deletions(-) create mode 100644 .github/workflows/publish_lambda_packages.yml create mode 100644 distribution/lambda/.gitignore create mode 100644 distribution/lambda/Makefile create mode 100644 distribution/lambda/README.md create mode 100644 distribution/lambda/cdk/__init__.py create mode 100755 distribution/lambda/cdk/app.py create mode 100644 distribution/lambda/cdk/cdk.json create mode 100644 distribution/lambda/cdk/cli.py create mode 100644 distribution/lambda/cdk/stacks/__init__.py create mode 100644 distribution/lambda/cdk/stacks/examples/__init__.py create mode 100644 distribution/lambda/cdk/stacks/examples/hdfs_stack.py create mode 100644 distribution/lambda/cdk/stacks/examples/mock_data_stack.py create mode 100644 distribution/lambda/cdk/stacks/services/__init__.py create mode 100644 distribution/lambda/cdk/stacks/services/indexer_service.py create mode 100644 distribution/lambda/cdk/stacks/services/quickwit_service.py create mode 100644 distribution/lambda/cdk/stacks/services/searcher_service.py create mode 100644 distribution/lambda/poetry.lock create mode 100644 distribution/lambda/pyproject.toml create mode 100644 distribution/lambda/resources/data-generator.py create mode 100644 distribution/lambda/resources/hdfs-logs.yaml create mode 100644 distribution/lambda/resources/mock-sales.yaml create mode 100644 docs/assets/images/quickwit-lambda-tutorial.svg create mode 100644 docs/get-started/tutorials/tutorial-aws-lambda.md create mode 100644 quickwit/quickwit-indexing/data/test_corpus.json.gz create mode 100644 quickwit/quickwit-lambda/Cargo.toml create mode 100644 quickwit/quickwit-lambda/src/bin/indexer.rs create mode 100644 quickwit/quickwit-lambda/src/bin/searcher.rs create mode 100644 quickwit/quickwit-lambda/src/indexer/environment.rs create mode 100644 quickwit/quickwit-lambda/src/indexer/handler.rs create mode 100644 quickwit/quickwit-lambda/src/indexer/ingest.rs create mode 100644 quickwit/quickwit-lambda/src/indexer/mod.rs create mode 100644 quickwit/quickwit-lambda/src/indexer/model.rs create mode 100644 quickwit/quickwit-lambda/src/lib.rs create mode 100644 quickwit/quickwit-lambda/src/logger.rs create mode 100644 quickwit/quickwit-lambda/src/searcher/environment.rs create mode 100644 quickwit/quickwit-lambda/src/searcher/handler.rs create mode 100644 quickwit/quickwit-lambda/src/searcher/mod.rs create mode 100644 quickwit/quickwit-lambda/src/searcher/search.rs create mode 100644 quickwit/quickwit-lambda/src/utils.rs diff --git a/.github/workflows/publish_lambda_packages.yml b/.github/workflows/publish_lambda_packages.yml new file mode 100644 index 00000000000..6b511183dac --- /dev/null +++ b/.github/workflows/publish_lambda_packages.yml @@ -0,0 +1,52 @@ +name: Build and publish AWS Lambda packages + +on: + push: + tags: + - "lambda-beta-*" + +jobs: + build-lambdas: + name: Build Quickwit Lambdas + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Ubuntu packages + run: sudo apt-get -y install protobuf-compiler python3 python3-pip + - name: Install rustup + run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain none -y + - name: Install python dependencies + run: pip install ./distribution/lambda + - name: Mypy lint + run: mypy distribution/lambda/ + + - name: Extract asset version of release + run: echo "QW_LAMBDA_VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV + if: ${{ github.event_name == 'push' }} + - name: Retrieve and export commit date, hash, and tags + run: | + echo "QW_COMMIT_DATE=$(TZ=UTC0 git log -1 --format=%cd --date=format-local:%Y-%m-%dT%H:%M:%SZ)" >> $GITHUB_ENV + echo "QW_COMMIT_HASH=$(git rev-parse HEAD)" >> $GITHUB_ENV + echo "QW_COMMIT_TAGS=$(git tag --points-at HEAD | tr '\n' ',')" >> $GITHUB_ENV + - name: Build Quickwit Lambdas + run: make package + env: + QW_COMMIT_DATE: ${{ env.QW_COMMIT_DATE }} + QW_COMMIT_HASH: ${{ env.QW_COMMIT_HASH }} + QW_COMMIT_TAGS: ${{ env.QW_COMMIT_TAGS }} + QW_LAMBDA_BUILD: 1 + working-directory: ./distribution/lambda + - name: Extract package locations + run: | + echo "SEARCHER_PACKAGE_LOCATION=./distribution/lambda/$(make searcher-package-path)" >> $GITHUB_ENV + echo "INDEXER_PACKAGE_LOCATION=./distribution/lambda/$(make indexer-package-path)" >> $GITHUB_ENV + working-directory: ./distribution/lambda + - name: Upload Lambda archives + uses: quickwit-inc/upload-to-github-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + file: ${{ env.SEARCHER_PACKAGE_LOCATION }};${{ env.INDEXER_PACKAGE_LOCATION }} + overwrite: true + draft: true + tag_name: aws-${{ env.QW_LAMBDA_VERSION }} diff --git a/distribution/lambda/.gitignore b/distribution/lambda/.gitignore new file mode 100644 index 00000000000..85cfc7bfdef --- /dev/null +++ b/distribution/lambda/.gitignore @@ -0,0 +1,18 @@ +*.swp +package-lock.json +__pycache__ +.pytest_cache +.venv +*.egg-info +build/ +.mypy_cache + +# CDK asset staging directory +.cdk.staging +cdk.out + +# AWS SAM build directory +.aws-sam + +# Benchmark output files +*.log diff --git a/distribution/lambda/Makefile b/distribution/lambda/Makefile new file mode 100644 index 00000000000..4dfd1d1b076 --- /dev/null +++ b/distribution/lambda/Makefile @@ -0,0 +1,134 @@ +.SILENT: +.ONESHELL: +SHELL := bash +.SHELLFLAGS := -eu -o pipefail -c + +# Update this when cutting a new release +QW_LAMBDA_VERSION?=beta-01 +PACKAGE_BASE_URL=https://github.com/quickwit-oss/quickwit/releases/download/aws-lambda-$(QW_LAMBDA_VERSION)/ +SEARCHER_PACKAGE_FILE=quickwit-lambda-searcher-$(QW_LAMBDA_VERSION)-x86_64.zip +INDEXER_PACKAGE_FILE=quickwit-lambda-indexer-$(QW_LAMBDA_VERSION)-x86_64.zip +export SEARCHER_PACKAGE_PATH=cdk.out/$(SEARCHER_PACKAGE_FILE) +export INDEXER_PACKAGE_PATH=cdk.out/$(INDEXER_PACKAGE_FILE) + +check-env: +ifndef CDK_ACCOUNT + $(error CDK_ACCOUNT is undefined) +endif +ifndef CDK_REGION + $(error CDK_REGION is undefined) +endif + +# Build or download the packages from the release page +# - Download by default, the version can be set with QW_LAMBDA_VERSION +# - To build locally, set QW_LAMBDA_BUILD=1 +package: + mkdir -p cdk.out + if [ "$${QW_LAMBDA_BUILD:-0}" = "1" ] + then + pushd ../../quickwit/ + cargo lambda build \ + -p quickwit-lambda \ + --release \ + --output-format zip \ + --target x86_64-unknown-linux-gnu + popd + cp -u ../../quickwit/target/lambda/searcher/bootstrap.zip $(SEARCHER_PACKAGE_PATH) + cp -u ../../quickwit/target/lambda/indexer/bootstrap.zip $(INDEXER_PACKAGE_PATH) + else + if ! [ -f $(SEARCHER_PACKAGE_PATH) ]; then + echo "Downloading package $(PACKAGE_BASE_URL)$(SEARCHER_PACKAGE_FILE)" + curl -C - -Ls -o $(SEARCHER_PACKAGE_PATH) $(PACKAGE_BASE_URL)$(SEARCHER_PACKAGE_FILE) + else + echo "Using cached package $(SEARCHER_PACKAGE_PATH)" + fi + if ! [ -f $(INDEXER_PACKAGE_PATH) ]; then + echo "Downloading package $(PACKAGE_BASE_URL)$(INDEXER_PACKAGE_FILE)" + curl -C - -Ls -o $(INDEXER_PACKAGE_PATH) $(PACKAGE_BASE_URL)$(INDEXER_PACKAGE_FILE) + else + echo "Using cached package $(INDEXER_PACKAGE_PATH)" + fi + fi + +indexer-package-path: + echo -n $(INDEXER_PACKAGE_PATH) + +searcher-package-path: + echo -n $(SEARCHER_PACKAGE_PATH) + +bootstrap: package check-env + cdk bootstrap aws://$$CDK_ACCOUNT/$$CDK_REGION + +deploy-hdfs: package check-env + cdk deploy -a cdk/app.py HdfsStack + +deploy-mock-data: package check-env + cdk deploy -a cdk/app.py MockDataStack + +destroy-hdfs: + cdk destroy -a cdk/app.py HdfsStack + +destroy-mock-data: + cdk destroy -a cdk/app.py MockDataStack + +clean: + rm -rf cdk.out + +## Invocation examples + +invoke-mock-data-searcher: check-env + python -c 'from cdk import cli; cli.invoke_mock_data_searcher()' + +invoke-hdfs-indexer: check-env + python -c 'from cdk import cli; cli.upload_hdfs_src_file()' + python -c 'from cdk import cli; cli.invoke_hdfs_indexer()' + +invoke-hdfs-searcher-term: check-env + python -c 'from cdk import cli; cli.invoke_hdfs_searcher("""{"query": "severity_text:ERROR", "max_hits": 10}""")' + +invoke-hdfs-searcher-histogram: check-env + python -c 'from cdk import cli; cli.invoke_hdfs_searcher("""{ "query": "*", "max_hits": 0, "aggs": { "events": { "date_histogram": { "field": "timestamp", "fixed_interval": "1d" }, "aggs": { "log_level": { "terms": { "size": 10, "field": "severity_text", "order": { "_count": "desc" } } } } } } }""")' + +bench-index: + mem_sizes=( 10240 8192 6144 4096 3072 2048 ) + export QW_LAMBDA_DISABLE_MERGE=true + for mem_size in "$${mem_sizes[@]}" + do + export INDEXER_MEMORY_SIZE=$${mem_size} + $(MAKE) deploy-hdfs + python -c 'from cdk import cli; cli.benchmark_hdfs_indexing()' + done + +bench-search-term: + mem_sizes=( 1024 2048 4096 8192 ) + for mem_size in "$${mem_sizes[@]}" + do + export SEARCHER_MEMORY_SIZE=$${mem_size} + $(MAKE) deploy-hdfs + python -c 'from cdk import cli; cli.benchmark_hdfs_search("""{"query": "severity_text:ERROR", "max_hits": 10}""")' + done + +bench-search-histogram: + mem_sizes=( 1024 2048 4096 8192 ) + for mem_size in "$${mem_sizes[@]}" + do + export SEARCHER_MEMORY_SIZE=$${mem_size} + $(MAKE) deploy-hdfs + python -c 'from cdk import cli; cli.benchmark_hdfs_search("""{ "query": "*", "max_hits": 0, "aggs": { "events": { "date_histogram": { "field": "timestamp", "fixed_interval": "1d" }, "aggs": { "log_level": { "terms": { "size": 10, "field": "severity_text", "order": { "_count": "desc" } } } } } } }""")' + done + +bench-search: + for run in {1..30} + do + export QW_LAMBDA_DISABLE_SEARCH_CACHE=true + $(MAKE) bench-search-term + $(MAKE) bench-search-histogram + export QW_LAMBDA_DISABLE_SEARCH_CACHE=false + export QW_LAMBDA_PARTIAL_REQUEST_CACHE_CAPACITY=0 + $(MAKE) bench-search-term + $(MAKE) bench-search-histogram + export QW_LAMBDA_DISABLE_SEARCH_CACHE=false + export QW_LAMBDA_PARTIAL_REQUEST_CACHE_CAPACITY=64MB + $(MAKE) bench-search-term + $(MAKE) bench-search-histogram + done diff --git a/distribution/lambda/README.md b/distribution/lambda/README.md new file mode 100644 index 00000000000..6fd65615b78 --- /dev/null +++ b/distribution/lambda/README.md @@ -0,0 +1,120 @@ + +# CDK template for running Quickwit on AWS Lambda + +## Prerequisites + +- Install AWS CDK Toolkit (cdk command) + - `npm install -g aws-cdk ` +- Ensure `curl` and `make` are installed +- To run the invocation example `make` commands, you will also need Python 3.10 + or later and `pip` installed (see [Python venv](#python-venv) below). + +## AWS Lambda service quotas + +For newly created AWS accounts, a conservative quota of 10 concurrent executions +is applied to Lambda in each individual region. If that's the case, CDK won't be +able to apply the reserved concurrency of the indexing Quickwit lambda. You can +increase the quota without charge using the [Service Quotas +console](https://console.aws.amazon.com/servicequotas/home/services/lambda/quotas). + +> **Note:** The request can take hours or even days to be processed. + +## Python venv + +This project is set up like a standard Python project. The initialization +process also creates a virtualenv within this project, stored under the `.venv` +directory. To create the virtualenv it assumes that there is a `python3` +executable in your path with access to the `venv` package. If for any reason the +automatic creation of the virtualenv fails, you can create the virtualenv +manually. + +To manually create a virtualenv on MacOS and Linux: + +```bash +python3 -m venv .venv +``` + +After the init process completes and the virtualenv is created, you can use the following +step to activate your virtualenv. + +```bash +source .venv/bin/activate +``` + +Once the virtualenv is activated, you can install the required dependencies. + +```bash +pip install . +``` + +If you prefer using Poetry, achieve the same by running: +```bash +poetry shell +poetry install +``` + +## Example stacks + +Provided demonstration setups: +- HDFS example data: index the the [HDFS + dataset](https://quickwit-datasets-public.s3.amazonaws.com/hdfs-logs-multitenants-10000.json) + by triggering the Quickwit lambda manually. +- Mock Data generator: start a mock data generator lambda that pushes mock JSON + data every X minutes to S3. Those file trigger the Quickwit indexer lambda + automatically. + +## Deploy and run + +The Makefile is a usefull entrypoint to show how the Lambda deployment can used. + +Configure your shell and AWS account: +```bash +# replace with you AWS account ID and preferred region +export CDK_ACCOUNT=123456789 +export CDK_REGION=us-east-1 +make bootstrap +``` + +Deploy, index and query the HDFS dataset: +```bash +make deploy-hdfs +make invoke-hdfs-indexer +make invoke-hdfs-searcher +``` + +Deploy the mock data generator and query the indexed data: +```bash +make deploy-mock-data +# wait a few minutes... +make invoke-mock-data-searcher +``` + +## Set up a search API + +You can configure an HTTP API endpoint around the Quickwit Searcher Lambda. The +mock data example stack shows such a configuration. The API Gateway is enabled +when the `SEARCHER_API_KEY` environment variable is set: + +```bash +SEARCHER_API_KEY=my-at-least-20-char-long-key make deploy-mock-data +``` + +> [!WARNING] +> The API key is stored in plain text in the CDK stack. For a real world +> deployment, the key should be fetched from something like [AWS Secrets +> Manager](https://docs.aws.amazon.com/cdk/v2/guide/get_secrets_manager_value.html). + +Note that the response is always gzipped compressed, regardless the +`Accept-Encoding` request header: + +```bash +curl -d '{"query":"quantity:>5", "max_hits": 10}' -H "Content-Type: application/json" -H "x-api-key: my-at-least-20-char-long-key" -X POST https://{api_id}.execute-api.{region}.amazonaws.com/api/v1/mock-sales/search --compressed +``` + +## Useful CDK commands + + * `cdk ls` list all stacks in the app + * `cdk synth` emits the synthesized CloudFormation template + * `cdk deploy` deploy this stack to your default AWS account/region + * `cdk diff` compare deployed stack with current state + * `cdk docs` open CDK documentation diff --git a/distribution/lambda/cdk/__init__.py b/distribution/lambda/cdk/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/distribution/lambda/cdk/app.py b/distribution/lambda/cdk/app.py new file mode 100755 index 00000000000..91df71cd775 --- /dev/null +++ b/distribution/lambda/cdk/app.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +import os +from typing import Literal + +import aws_cdk as cdk + +from cdk.stacks.services.quickwit_service import DEFAULT_LAMBDA_MEMORY_SIZE +from cdk.stacks.examples.hdfs_stack import HdfsStack +from cdk.stacks.examples.mock_data_stack import MockDataStack + +HDFS_STACK_NAME = "HdfsStack" +MOCK_DATA_STACK_NAME = "MockDataStack" + + +def package_location_from_env(type: Literal["searcher"] | Literal["indexer"]) -> str: + path_var = f"{type.upper()}_PACKAGE_PATH" + if path_var in os.environ: + return os.environ[path_var] + else: + print( + f"Could not infer the {type} package location. Configure it using the {path_var} environment variable" + ) + exit(1) + + +app = cdk.App() + +HdfsStack( + app, + HDFS_STACK_NAME, + env=cdk.Environment( + account=os.getenv("CDK_ACCOUNT"), region=os.getenv("CDK_REGION") + ), + indexer_memory_size=int( + os.environ.get("INDEXER_MEMORY_SIZE", DEFAULT_LAMBDA_MEMORY_SIZE) + ), + searcher_memory_size=int( + os.environ.get("SEARCHER_MEMORY_SIZE", DEFAULT_LAMBDA_MEMORY_SIZE) + ), + indexer_package_location=package_location_from_env("indexer"), + searcher_package_location=package_location_from_env("searcher"), +) + +MockDataStack( + app, + MOCK_DATA_STACK_NAME, + env=cdk.Environment( + account=os.getenv("CDK_ACCOUNT"), region=os.getenv("CDK_REGION") + ), + indexer_package_location=package_location_from_env("indexer"), + searcher_package_location=package_location_from_env("searcher"), + search_api_key=os.getenv("SEARCHER_API_KEY", None), +) + +app.synth() diff --git a/distribution/lambda/cdk/cdk.json b/distribution/lambda/cdk/cdk.json new file mode 100644 index 00000000000..ed7ea2f776b --- /dev/null +++ b/distribution/lambda/cdk/cdk.json @@ -0,0 +1,57 @@ +{ + "app": "python3 app.py", + "watch": { + "include": [ + "**" + ], + "exclude": [ + "README.md", + "cdk*.json", + "requirements*.txt", + "**/__init__.py", + "python/__pycache__", + "tests" + ] + }, + "context": { + "@aws-cdk/aws-lambda:recognizeLayerVersion": true, + "@aws-cdk/core:checkSecretUsage": true, + "@aws-cdk/core:target-partitions": [ + "aws", + "aws-cn" + ], + "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, + "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, + "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, + "@aws-cdk/aws-iam:minimizePolicies": true, + "@aws-cdk/core:validateSnapshotRemovalPolicy": true, + "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, + "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, + "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, + "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, + "@aws-cdk/core:enablePartitionLiterals": true, + "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, + "@aws-cdk/aws-iam:standardizedServicePrincipals": true, + "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, + "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, + "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, + "@aws-cdk/aws-route53-patters:useCertificate": true, + "@aws-cdk/customresources:installLatestAwsSdkDefault": false, + "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, + "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, + "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, + "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, + "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, + "@aws-cdk/aws-redshift:columnId": true, + "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, + "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, + "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, + "@aws-cdk/aws-kms:aliasNameRef": true, + "@aws-cdk/aws-autoscaling:generateLaunchTemplateInsteadOfLaunchConfig": true, + "@aws-cdk/core:includePrefixInUniqueNameGeneration": true, + "@aws-cdk/aws-efs:denyAnonymousAccess": true, + "@aws-cdk/aws-opensearchservice:enableOpensearchMultiAzWithStandby": true, + "@aws-cdk/aws-lambda-nodejs:useLatestRuntimeVersion": true, + "@aws-cdk/aws-efs:mountTargetOrderInsensitiveLogicalId": true + } +} diff --git a/distribution/lambda/cdk/cli.py b/distribution/lambda/cdk/cli.py new file mode 100644 index 00000000000..ce2fe4cf75d --- /dev/null +++ b/distribution/lambda/cdk/cli.py @@ -0,0 +1,357 @@ +"""Helper scripts to test and explore the deployed infrastructure. + +These functions are wrapped by the Makefile for convenience.""" + +import base64 +import gzip +import http.client +import json +import os +import re +import subprocess +import tempfile +import time +from dataclasses import dataclass +from functools import cache +from io import BytesIO + +import boto3 +import botocore.config +import botocore.exceptions +from cdk import app +from cdk.stacks.examples import hdfs_stack, mock_data_stack + +region = os.environ["CDK_REGION"] + +example_host = "quickwit-datasets-public.s3.amazonaws.com" +example_hdfs_file = "hdfs-logs-multitenants.json.gz" +INDEXING_BOTO_CONFIG = botocore.config.Config( + retries={"max_attempts": 0}, read_timeout=60 * 15 +) +session = boto3.Session(region_name=region) + + +@cache +def _get_cloudformation_output_value(stack_name: str, export_name: str) -> str: + client = session.client("cloudformation") + stacks = client.describe_stacks(StackName=stack_name)["Stacks"] + if len(stacks) != 1: + print(f"Stack {stack_name} not identified uniquely, found {stacks}") + outputs = stacks[0]["Outputs"] + for output in outputs: + if output["ExportName"] == export_name: + return output["OutputValue"] + else: + print(f"Export name {export_name} not found in stack {stack_name}") + exit(1) + + +@dataclass +class LambdaResult: + function_error: str + log_tail: str + payload: str + raw_size_bytes: int + + @staticmethod + def from_lambda_response(lambda_resp: dict) -> "LambdaResult": + payload = lambda_resp["Payload"].read().decode() + return LambdaResult( + function_error=lambda_resp.get("FunctionError", ""), + log_tail=base64.b64decode(lambda_resp["LogResult"]).decode(), + payload=payload, + raw_size_bytes=len(payload), + ) + + @staticmethod + def from_lambda_gateway_response(lambda_resp: dict) -> "LambdaResult": + gw_str = lambda_resp["Payload"].read().decode() + gw_obj = json.loads(gw_str) + payload = gw_obj["body"] + if gw_obj["isBase64Encoded"]: + dec_payload = base64.b64decode(payload) + if gw_obj.get("headers", {}).get("content-encoding", "") == "gzip": + payload = ( + gzip.GzipFile(mode="rb", fileobj=BytesIO(dec_payload)) + .read() + .decode() + ) + else: + payload = dec_payload.decode() + return LambdaResult( + function_error=lambda_resp.get("FunctionError", ""), + log_tail=base64.b64decode(lambda_resp["LogResult"]).decode(), + payload=payload, + raw_size_bytes=len(gw_str), + ) + + def extract_report(self) -> str: + """Expect "REPORT RequestId: xxx Duration: yyy..." to be in log tail""" + for line in reversed(self.log_tail.strip().splitlines()): + if line.startswith("REPORT"): + return line + else: + raise ValueError(f"Could not find report in log tail") + + def request_id(self) -> str: + report = self.extract_report() + match = re.search(r"RequestId: ([0-9a-z\-]+)", report) + if match: + return match.group(1) + else: + raise ValueError(f"Could not find RequestId in report: {report}") + + +def _format_lambda_output( + lambda_result: LambdaResult, duration=None, max_resp_size=10 * 1000 +): + if lambda_result.function_error != "": + print("\n## FUNCTION ERROR:") + print(lambda_result.function_error) + print("\n## LOG TAIL:") + print(lambda_result.log_tail) + print("\n## RAW RESPONSE SIZE (BYTES):") + ratio = lambda_result.raw_size_bytes / len(lambda_result.payload) + print(f"{lambda_result.raw_size_bytes} ({ratio:.1f}x the final payload)") + print("\n## RESPONSE:") + payload_size = len(lambda_result.payload) + print(lambda_result.payload[:max_resp_size]) + if payload_size > max_resp_size: + print(f"Response too long ({payload_size}), truncated to {max_resp_size} bytes") + + if duration is not None: + print("\n## TOTAL INVOCATION DURATION:") + print(duration) + + +def upload_hdfs_src_file(): + bucket_name = _get_cloudformation_output_value( + app.HDFS_STACK_NAME, hdfs_stack.INDEX_STORE_BUCKET_NAME_EXPORT_NAME + ) + uri = f"s3://{bucket_name}/{example_hdfs_file}" + try: + resp = session.client("s3").head_object( + Bucket=bucket_name, Key=example_hdfs_file + ) + print(f"{uri} already exists ({resp['ContentLength']} bytes), skipping upload") + return + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] != "404": + raise e + print(f"download dataset https://{example_host}/{example_hdfs_file}") + conn = http.client.HTTPSConnection(example_host) + conn.request("GET", f"/{example_hdfs_file}") + response = conn.getresponse() + if response.status != 200: + print(f"Failed to fetch dataset") + exit(1) + with tempfile.NamedTemporaryFile() as tmp: + while True: + chunk = response.read(1024 * 1024) + if len(chunk) == 0: + break + tmp.write(chunk) + tmp.flush() + print(f"downloaded {tmp.tell()} bytes") + print(f"upload dataset to {uri}") + session.client("s3").upload_file( + Bucket=bucket_name, Filename=tmp.name, Key=example_hdfs_file + ) + + +def invoke_hdfs_indexer() -> LambdaResult: + function_name = _get_cloudformation_output_value( + app.HDFS_STACK_NAME, hdfs_stack.INDEXER_FUNCTION_NAME_EXPORT_NAME + ) + print(f"indexer function name: {function_name}") + bucket_name = _get_cloudformation_output_value( + app.HDFS_STACK_NAME, hdfs_stack.INDEX_STORE_BUCKET_NAME_EXPORT_NAME + ) + source_uri = f"s3://{bucket_name}/{example_hdfs_file}" + print(f"src_file: {source_uri}") + invoke_start = time.time() + resp = session.client("lambda", config=INDEXING_BOTO_CONFIG).invoke( + FunctionName=function_name, + InvocationType="RequestResponse", + LogType="Tail", + Payload=f"""{{ "source_uri": "{source_uri}" }}""", + ) + invoke_duration = time.time() - invoke_start + lambda_result = LambdaResult.from_lambda_response(resp) + _format_lambda_output(lambda_result, invoke_duration) + return lambda_result + + +def _invoke_searcher( + stack_name: str, + function_export_name: str, + payload: str, + download_logs: bool, +) -> LambdaResult: + function_name = _get_cloudformation_output_value(stack_name, function_export_name) + client = session.client("lambda") + print(f"searcher function name: {function_name}") + invoke_start = time.time() + resp = client.invoke( + FunctionName=function_name, + InvocationType="RequestResponse", + LogType="Tail", + Payload=json.dumps( + { + "headers": {"Content-Type": "application/json"}, + "requestContext": { + "http": {"method": "POST"}, + }, + "body": payload, + "isBase64Encoded": False, + } + ), + ) + invoke_duration = time.time() - invoke_start + lambda_result = LambdaResult.from_lambda_gateway_response(resp) + _format_lambda_output(lambda_result, invoke_duration) + if download_logs: + download_logs_to_file(lambda_result.request_id(), function_name, invoke_start) + return lambda_result + + +def invoke_hdfs_searcher(payload: str, download_logs: bool = True) -> LambdaResult: + return _invoke_searcher( + app.HDFS_STACK_NAME, + hdfs_stack.SEARCHER_FUNCTION_NAME_EXPORT_NAME, + payload, + download_logs, + ) + + +def get_logs( + function_name: str, request_id: str, timestamp_unix_ms: int, timeout: float = 60 +): + print(f"Getting logs for requestId: {request_id}...") + client = session.client("logs") + log_group_name = f"/aws/lambda/{function_name}" + paginator = client.get_paginator("filter_log_events") + lower_time_bound = timestamp_unix_ms - 1000 * 3600 + upper_time_bound = timestamp_unix_ms + 1000 * 3600 + last_event_id = "" + last_event_found = True + start_time = time.time() + while time.time() - start_time < timeout: + for page in paginator.paginate( + logGroupName=log_group_name, + filterPattern=f"%{request_id}%", + startTime=lower_time_bound, + endTime=upper_time_bound, + ): + for event in page["events"]: + if last_event_found or event["eventId"] == last_event_id: + last_event_found = True + last_event_id = event["eventId"] + yield event["message"] + if event["message"].startswith("REPORT"): + print(event["message"]) + lower_time_bound = int(event["timestamp"]) + last_event_id = "REPORT" + break + if last_event_id == "REPORT": + break + if last_event_id == "REPORT": + break + elif last_event_id == "": + print(f"no event found, retrying...") + else: + print(f"last event not found, retrying...") + last_event_found = False + time.sleep(3) + + else: + raise TimeoutError(f"Log collection timed out after {timeout}s") + + +def download_logs_to_file(request_id: str, function_name: str, invoke_start: float): + with open(f"lambda.{request_id}.log", "w") as f: + for log in get_logs( + function_name, + request_id, + int(invoke_start * 1000), + ): + f.write(log) + + +def invoke_mock_data_searcher(): + _invoke_searcher( + app.MOCK_DATA_STACK_NAME, + mock_data_stack.SEARCHER_FUNCTION_NAME_EXPORT_NAME, + """{"query": "id:1", "sort_by": "ts", "max_hits": 10}""", + True, + ) + + +def _clean_s3_bucket(bucket_name: str, prefix: str = ""): + s3 = session.resource("s3") + bucket = s3.Bucket(bucket_name) + bucket.objects.filter(Prefix=prefix).delete() + + +@cache +def _git_commit(): + return subprocess.run( + ["git", "describe", "--dirty"], check=True, capture_output=True, text=True + ).stdout.strip() + + +def benchmark_hdfs_indexing(): + memory_size = os.environ["INDEXER_MEMORY_SIZE"] + bucket_name = _get_cloudformation_output_value( + app.HDFS_STACK_NAME, hdfs_stack.INDEX_STORE_BUCKET_NAME_EXPORT_NAME + ) + _clean_s3_bucket(bucket_name, "index/") + bench_result = { + "run": "benchmark_hdfs_indexing", + "ts": time.time(), + "commit": _git_commit(), + "memory_size": memory_size, + "env": { + k: os.environ[k] + for k in os.environ.keys() + if k.startswith("QW_LAMBDA_") + and k != "QW_LAMBDA_OPENTELEMETRY_AUTHORIZATION" + }, + } + try: + indexer_result = invoke_hdfs_indexer() + bench_result["lambda_report"] = indexer_result.extract_report() + except Exception as e: + bench_result["invokation_error"] = repr(e) + print(f"Failed to invoke indexer") + + with open(f"lambda-bench.log", "a+") as f: + f.write(json.dumps(bench_result)) + f.write("\n") + + +def benchmark_hdfs_search(payload: str): + memory_size = os.environ["SEARCHER_MEMORY_SIZE"] + for _ in range(2): + bench_result = { + "run": "benchmark_hdfs_search", + "ts": time.time(), + "commit": _git_commit(), + "memory_size": memory_size, + "payload": json.loads(payload), + "env": { + k: os.environ[k] + for k in os.environ.keys() + if k != "QW_LAMBDA_OPENTELEMETRY_AUTHORIZATION" + }, + } + try: + indexer_result = invoke_hdfs_searcher(payload, download_logs=False) + bench_result["lambda_report"] = indexer_result.extract_report() + except Exception as e: + bench_result["invokation_error"] = repr(e) + print(f"Failed to invoke searcher") + + with open(f"lambda-bench.log", "a+") as f: + f.write(json.dumps(bench_result)) + f.write("\n") diff --git a/distribution/lambda/cdk/stacks/__init__.py b/distribution/lambda/cdk/stacks/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/distribution/lambda/cdk/stacks/examples/__init__.py b/distribution/lambda/cdk/stacks/examples/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/distribution/lambda/cdk/stacks/examples/hdfs_stack.py b/distribution/lambda/cdk/stacks/examples/hdfs_stack.py new file mode 100644 index 00000000000..db3c63e9413 --- /dev/null +++ b/distribution/lambda/cdk/stacks/examples/hdfs_stack.py @@ -0,0 +1,73 @@ +import aws_cdk +from aws_cdk import Stack, aws_s3_assets +from constructs import Construct +import yaml + +from ..services import quickwit_service + + +INDEX_STORE_BUCKET_NAME_EXPORT_NAME = "hdfs-index-store-bucket-name" +INDEXER_FUNCTION_NAME_EXPORT_NAME = "hdfs-indexer-function-name" +SEARCHER_FUNCTION_NAME_EXPORT_NAME = "hdfs-searcher-function-name" + + +class HdfsStack(Stack): + def __init__( + self, + scope: Construct, + construct_id: str, + indexer_memory_size: int, + searcher_memory_size: int, + indexer_package_location: str, + searcher_package_location: str, + **kwargs + ) -> None: + super().__init__(scope, construct_id, **kwargs) + + index_config_local_path = "./resources/hdfs-logs.yaml" + + with open(index_config_local_path) as f: + index_config_dict = yaml.safe_load(f) + index_id = index_config_dict["index_id"] + + index_config = aws_s3_assets.Asset( + self, + "mock-data-index-config", + path=index_config_local_path, + ) + lambda_env = { + **quickwit_service.extract_local_env(), + "RUST_LOG": "quickwit=debug", + } + qw_svc = quickwit_service.QuickwitService( + self, + "Quickwit", + index_id=index_id, + index_config_bucket=index_config.s3_bucket_name, + index_config_key=index_config.s3_object_key, + indexer_environment=lambda_env, + searcher_environment=lambda_env, + indexer_memory_size=indexer_memory_size, + searcher_memory_size=searcher_memory_size, + indexer_package_location=indexer_package_location, + searcher_package_location=searcher_package_location, + ) + + aws_cdk.CfnOutput( + self, + "index-store-bucket-name", + value=qw_svc.bucket.bucket_name, + export_name=INDEX_STORE_BUCKET_NAME_EXPORT_NAME, + ) + aws_cdk.CfnOutput( + self, + "indexer-function-name", + value=qw_svc.indexer.lambda_function.function_name, + export_name=INDEXER_FUNCTION_NAME_EXPORT_NAME, + ) + aws_cdk.CfnOutput( + self, + "searcher-function-name", + value=qw_svc.searcher.lambda_function.function_name, + export_name=SEARCHER_FUNCTION_NAME_EXPORT_NAME, + ) diff --git a/distribution/lambda/cdk/stacks/examples/mock_data_stack.py b/distribution/lambda/cdk/stacks/examples/mock_data_stack.py new file mode 100644 index 00000000000..a54018a6d8d --- /dev/null +++ b/distribution/lambda/cdk/stacks/examples/mock_data_stack.py @@ -0,0 +1,172 @@ +import aws_cdk +from aws_cdk import ( + Stack, + aws_apigateway, + aws_lambda, + aws_s3, + aws_s3_assets, + aws_s3_notifications, + aws_events, + aws_events_targets, +) +from constructs import Construct +import yaml + +from ..services.quickwit_service import QuickwitService + +SEARCHER_FUNCTION_NAME_EXPORT_NAME = "mock-data-searcher-function-name" + + +class Source(Construct): + """An synthetic data source that generates mock data and pushes it to the + indexer through a staging S3 bucket""" + + def __init__( + self, + scope: Construct, + construct_id: str, + index_id: str, + qw_svc: QuickwitService, + **kwargs, + ): + super().__init__(scope, construct_id, **kwargs) + mock_data_bucket = aws_s3.Bucket( + self, + "mock-data", + removal_policy=aws_cdk.RemovalPolicy.DESTROY, + lifecycle_rules=[ + aws_s3.LifecycleRule(enabled=True, expiration=aws_cdk.Duration.days(1)) + ], + ) + + with open("resources/data-generator.py") as f: + lambda_code = f.read() + generator_lambda = aws_lambda.Function( + self, + id="MockDataGenerator", + code=aws_lambda.Code.from_inline(lambda_code), + runtime=aws_lambda.Runtime.PYTHON_3_10, + handler="index.lambda_handler", + environment={ + "BUCKET_NAME": mock_data_bucket.bucket_name, + "PREFIX": index_id, + }, + timeout=aws_cdk.Duration.seconds(30), + memory_size=1024, + ) + mock_data_bucket.grant_read_write(generator_lambda) + rule = aws_events.Rule( + self, + "ScheduledRule", + schedule=aws_events.Schedule.rate(aws_cdk.Duration.minutes(5)), + ) + rule.add_target(aws_events_targets.LambdaFunction(generator_lambda)) + + mock_data_bucket.grant_read(qw_svc.indexer.lambda_function) + mock_data_bucket.add_object_created_notification( + aws_s3_notifications.LambdaDestination(qw_svc.indexer.lambda_function) + ) + + +class SearchAPI(Construct): + """An API Gateway example configuration to expose the Searcher Lambda + function as a Quickwit search endpoint.""" + + def __init__( + self, + scope: Construct, + construct_id: str, + index_id: str, + qw_svc: QuickwitService, + api_key: str, + **kwargs, + ) -> None: + super().__init__(scope, construct_id, **kwargs) + + api = aws_apigateway.RestApi( + self, + "quickwit-search-api", + rest_api_name=f"Quickwit {index_id} search API", + deploy=False, + ) + searcher_integration = aws_apigateway.LambdaIntegration( + qw_svc.searcher.lambda_function + ) + search_resource = ( + api.root.add_resource("v1").add_resource(index_id).add_resource("search") + ) + search_resource.add_method("POST", searcher_integration, api_key_required=True) + api_deployment = aws_apigateway.Deployment(self, "api-deployment", api=api) + api_stage = aws_apigateway.Stage( + self, "api", deployment=api_deployment, stage_name="api" + ) + plan = aws_apigateway.UsagePlan( + self, + "default-usage-plan", + api_stages=[aws_apigateway.UsagePlanPerApiStage(api=api, stage=api_stage)], + description="Usage plan for the Quickwit search API", + ) + key = aws_apigateway.ApiKey( + self, + "default-api-key", + value=api_key, + description="Default API key for the Quickwit search API", + ) + plan.add_api_key(key) + api.deployment_stage = api_stage + + aws_cdk.CfnOutput( + self, "search-api-url", value=api.url.rstrip("/") + search_resource.path + ) + + +class MockDataStack(Stack): + def __init__( + self, + scope: Construct, + construct_id: str, + indexer_package_location: str, + searcher_package_location: str, + search_api_key: str | None = None, + **kwargs, + ) -> None: + """If `search_api_key` is not set, the search API is not deployed.""" + super().__init__(scope, construct_id, **kwargs) + + index_config_local_path = "resources/mock-sales.yaml" + with open(index_config_local_path) as f: + index_config_dict = yaml.safe_load(f) + index_id = index_config_dict["index_id"] + + index_config = aws_s3_assets.Asset( + self, + "mock-data-index-config", + path=index_config_local_path, + ) + qw_svc = QuickwitService( + self, + "Quickwit", + index_id=index_id, + index_config_bucket=index_config.s3_bucket_name, + index_config_key=index_config.s3_object_key, + indexer_package_location=indexer_package_location, + searcher_package_location=searcher_package_location, + ) + + Source(self, "Source", index_id=index_id, qw_svc=qw_svc) + + if search_api_key is not None: + SearchAPI( + self, + "SearchAPI", + index_id=index_id, + qw_svc=qw_svc, + api_key=search_api_key, + ) + + aws_cdk.CfnOutput( + self, + "searcher-function-name", + value=qw_svc.searcher.lambda_function.function_name, + export_name=SEARCHER_FUNCTION_NAME_EXPORT_NAME, + ) diff --git a/distribution/lambda/cdk/stacks/services/__init__.py b/distribution/lambda/cdk/stacks/services/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/distribution/lambda/cdk/stacks/services/indexer_service.py b/distribution/lambda/cdk/stacks/services/indexer_service.py new file mode 100644 index 00000000000..1dee9230e6f --- /dev/null +++ b/distribution/lambda/cdk/stacks/services/indexer_service.py @@ -0,0 +1,46 @@ +import aws_cdk +from aws_cdk import aws_lambda, aws_s3, aws_iam +from constructs import Construct + + +class IndexerService(Construct): + def __init__( + self, + scope: Construct, + construct_id: str, + store_bucket: aws_s3.Bucket, + index_id: str, + index_config_bucket: str, + index_config_key: str, + memory_size: int, + environment: dict[str, str], + asset_path: str, + **kwargs, + ) -> None: + super().__init__(scope, construct_id, **kwargs) + + self.lambda_function = aws_lambda.Function( + self, + id="Lambda", + code=aws_lambda.Code.from_asset(asset_path), + runtime=aws_lambda.Runtime.PROVIDED_AL2, + handler="N/A", + environment={ + "QW_LAMBDA_INDEX_BUCKET": store_bucket.bucket_name, + "QW_LAMBDA_METASTORE_BUCKET": store_bucket.bucket_name, + "QW_LAMBDA_INDEX_ID": index_id, + "QW_LAMBDA_INDEX_CONFIG_URI": f"s3://{index_config_bucket}/{index_config_key}", + **environment, + }, + timeout=aws_cdk.Duration.minutes(15), + reserved_concurrent_executions=1, + memory_size=memory_size, + ephemeral_storage_size=aws_cdk.Size.gibibytes(10), + ) + self.lambda_function.add_to_role_policy( + aws_iam.PolicyStatement( + actions=["s3:GetObject"], + resources=[f"arn:aws:s3:::{index_config_bucket}/{index_config_key}"], + ) + ) + store_bucket.grant_read_write(self.lambda_function) diff --git a/distribution/lambda/cdk/stacks/services/quickwit_service.py b/distribution/lambda/cdk/stacks/services/quickwit_service.py new file mode 100644 index 00000000000..2887983f1c1 --- /dev/null +++ b/distribution/lambda/cdk/stacks/services/quickwit_service.py @@ -0,0 +1,65 @@ +import os + +import aws_cdk +from aws_cdk import aws_s3 +from constructs import Construct + +from . import indexer_service, searcher_service + +# Using 3008MB as default because higher memory configurations need to be +# enabled for each AWS account through the support. +DEFAULT_LAMBDA_MEMORY_SIZE = 3008 + + +def extract_local_env() -> dict[str, str]: + """Extracts local environment variables that start with QW_LAMBDA_""" + return {k: os.environ[k] for k in os.environ.keys() if k.startswith("QW_LAMBDA_")} + + +class QuickwitService(Construct): + def __init__( + self, + scope: Construct, + construct_id: str, + index_config_bucket: str, + index_config_key: str, + index_id: str, + searcher_package_location: str, + indexer_package_location: str, + indexer_memory_size: int = DEFAULT_LAMBDA_MEMORY_SIZE, + indexer_environment: dict[str, str] = {}, + searcher_memory_size: int = DEFAULT_LAMBDA_MEMORY_SIZE, + searcher_environment: dict[str, str] = {}, + **kwargs, + ) -> None: + """Create a new Quickwit Lambda service construct node. + + `{indexer|searcher}_package_location` is the path of the `zip` asset for + the Lambda function. + """ + super().__init__(scope, construct_id, **kwargs) + self.bucket = aws_s3.Bucket( + self, + "IndexStore", + removal_policy=aws_cdk.RemovalPolicy.DESTROY, + ) + self.indexer = indexer_service.IndexerService( + self, + "Indexer", + store_bucket=self.bucket, + index_id=index_id, + index_config_bucket=index_config_bucket, + index_config_key=index_config_key, + memory_size=indexer_memory_size, + environment=indexer_environment, + asset_path=indexer_package_location, + ) + self.searcher = searcher_service.SearcherService( + self, + "Searcher", + store_bucket=self.bucket, + index_id=index_id, + memory_size=searcher_memory_size, + environment=searcher_environment, + asset_path=searcher_package_location, + ) diff --git a/distribution/lambda/cdk/stacks/services/searcher_service.py b/distribution/lambda/cdk/stacks/services/searcher_service.py new file mode 100644 index 00000000000..5950a2de73e --- /dev/null +++ b/distribution/lambda/cdk/stacks/services/searcher_service.py @@ -0,0 +1,37 @@ +import aws_cdk +from aws_cdk import aws_lambda, aws_s3, PhysicalName +from constructs import Construct + + +class SearcherService(Construct): + def __init__( + self, + scope: Construct, + construct_id: str, + store_bucket: aws_s3.Bucket, + index_id: str, + memory_size: int, + environment: dict[str, str], + asset_path: str, + **kwargs + ) -> None: + super().__init__(scope, construct_id, **kwargs) + + self.lambda_function = aws_lambda.Function( + self, + id="Lambda", + code=aws_lambda.Code.from_asset(asset_path), + runtime=aws_lambda.Runtime.PROVIDED_AL2, + handler="N/A", + environment={ + "QW_LAMBDA_INDEX_BUCKET": store_bucket.bucket_name, + "QW_LAMBDA_METASTORE_BUCKET": store_bucket.bucket_name, + "QW_LAMBDA_INDEX_ID": index_id, + **environment, + }, + timeout=aws_cdk.Duration.seconds(30), + memory_size=memory_size, + ephemeral_storage_size=aws_cdk.Size.gibibytes(10), + ) + + store_bucket.grant_read_write(self.lambda_function) diff --git a/distribution/lambda/poetry.lock b/distribution/lambda/poetry.lock new file mode 100644 index 00000000000..6ddc73a2b70 --- /dev/null +++ b/distribution/lambda/poetry.lock @@ -0,0 +1,1029 @@ +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. + +[[package]] +name = "attrs" +version = "23.1.0" +description = "Classes Without Boilerplate" +optional = false +python-versions = ">=3.7" +files = [ + {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, + {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, +] + +[package.extras] +cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]", "pre-commit"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] +tests = ["attrs[tests-no-zope]", "zope-interface"] +tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] + +[[package]] +name = "aws-cdk-asset-awscli-v1" +version = "2.2.201" +description = "A library that contains the AWS CLI for use in Lambda Layers" +optional = false +python-versions = "~=3.7" +files = [ + {file = "aws-cdk.asset-awscli-v1-2.2.201.tar.gz", hash = "sha256:88d1c269fd5cf8c9f6e0464ed22e2d4f269dfd5b36b8c4d37687bdba9c269839"}, + {file = "aws_cdk.asset_awscli_v1-2.2.201-py3-none-any.whl", hash = "sha256:56fe2ef91d3c8d33559aa32d2130e5f35f23af1fb82f06648ebbc82ffe0a5879"}, +] + +[package.dependencies] +jsii = ">=1.91.0,<2.0.0" +publication = ">=0.0.3" +typeguard = ">=2.13.3,<2.14.0" + +[[package]] +name = "aws-cdk-asset-kubectl-v20" +version = "2.1.2" +description = "A library that contains kubectl for use in Lambda Layers" +optional = false +python-versions = "~=3.7" +files = [ + {file = "aws-cdk.asset-kubectl-v20-2.1.2.tar.gz", hash = "sha256:346283e43018a43e3b3ca571de3f44e85d49c038dc20851894cb8f9b2052b164"}, + {file = "aws_cdk.asset_kubectl_v20-2.1.2-py3-none-any.whl", hash = "sha256:7f0617ab6cb942b066bd7174bf3e1f377e57878c3e1cddc21d6b2d13c92d0cc1"}, +] + +[package.dependencies] +jsii = ">=1.70.0,<2.0.0" +publication = ">=0.0.3" +typeguard = ">=2.13.3,<2.14.0" + +[[package]] +name = "aws-cdk-asset-node-proxy-agent-v6" +version = "2.0.1" +description = "@aws-cdk/asset-node-proxy-agent-v6" +optional = false +python-versions = "~=3.7" +files = [ + {file = "aws-cdk.asset-node-proxy-agent-v6-2.0.1.tar.gz", hash = "sha256:42cdbc1de2ed3f845e3eb883a72f58fc7e5554c2e0b6fcdb366c159778dce74d"}, + {file = "aws_cdk.asset_node_proxy_agent_v6-2.0.1-py3-none-any.whl", hash = "sha256:e442673d4f93137ab165b75386761b1d46eea25fc5015e5145ae3afa9da06b6e"}, +] + +[package.dependencies] +jsii = ">=1.86.1,<2.0.0" +publication = ">=0.0.3" +typeguard = ">=2.13.3,<2.14.0" + +[[package]] +name = "aws-cdk-lib" +version = "2.110.1" +description = "Version 2 of the AWS Cloud Development Kit library" +optional = false +python-versions = "~=3.7" +files = [ + {file = "aws-cdk-lib-2.110.1.tar.gz", hash = "sha256:f9780664b70e11aa886ef42fdb4e45dab180721e42eb8a4575617573a8e46ed0"}, + {file = "aws_cdk_lib-2.110.1-py3-none-any.whl", hash = "sha256:63f234360832f08ae7a767fa1e3f6775ceeef0b8f9a75aa9ec7b79642c1fee21"}, +] + +[package.dependencies] +"aws-cdk.asset-awscli-v1" = ">=2.2.201,<3.0.0" +"aws-cdk.asset-kubectl-v20" = ">=2.1.2,<3.0.0" +"aws-cdk.asset-node-proxy-agent-v6" = ">=2.0.1,<3.0.0" +constructs = ">=10.0.0,<11.0.0" +jsii = ">=1.91.0,<2.0.0" +publication = ">=0.0.3" +typeguard = ">=2.13.3,<2.14.0" + +[[package]] +name = "black" +version = "23.11.0" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.8" +files = [ + {file = "black-23.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dbea0bb8575c6b6303cc65017b46351dc5953eea5c0a59d7b7e3a2d2f433a911"}, + {file = "black-23.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:412f56bab20ac85927f3a959230331de5614aecda1ede14b373083f62ec24e6f"}, + {file = "black-23.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d136ef5b418c81660ad847efe0e55c58c8208b77a57a28a503a5f345ccf01394"}, + {file = "black-23.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:6c1cac07e64433f646a9a838cdc00c9768b3c362805afc3fce341af0e6a9ae9f"}, + {file = "black-23.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cf57719e581cfd48c4efe28543fea3d139c6b6f1238b3f0102a9c73992cbb479"}, + {file = "black-23.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:698c1e0d5c43354ec5d6f4d914d0d553a9ada56c85415700b81dc90125aac244"}, + {file = "black-23.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:760415ccc20f9e8747084169110ef75d545f3b0932ee21368f63ac0fee86b221"}, + {file = "black-23.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:58e5f4d08a205b11800332920e285bd25e1a75c54953e05502052738fe16b3b5"}, + {file = "black-23.11.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:45aa1d4675964946e53ab81aeec7a37613c1cb71647b5394779e6efb79d6d187"}, + {file = "black-23.11.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4c44b7211a3a0570cc097e81135faa5f261264f4dfaa22bd5ee2875a4e773bd6"}, + {file = "black-23.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a9acad1451632021ee0d146c8765782a0c3846e0e0ea46659d7c4f89d9b212b"}, + {file = "black-23.11.0-cp38-cp38-win_amd64.whl", hash = "sha256:fc7f6a44d52747e65a02558e1d807c82df1d66ffa80a601862040a43ec2e3142"}, + {file = "black-23.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7f622b6822f02bfaf2a5cd31fdb7cd86fcf33dab6ced5185c35f5db98260b055"}, + {file = "black-23.11.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:250d7e60f323fcfc8ea6c800d5eba12f7967400eb6c2d21ae85ad31c204fb1f4"}, + {file = "black-23.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5133f5507007ba08d8b7b263c7aa0f931af5ba88a29beacc4b2dc23fcefe9c06"}, + {file = "black-23.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:421f3e44aa67138ab1b9bfbc22ee3780b22fa5b291e4db8ab7eee95200726b07"}, + {file = "black-23.11.0-py3-none-any.whl", hash = "sha256:54caaa703227c6e0c87b76326d0862184729a69b73d3b7305b6288e1d830067e"}, + {file = "black-23.11.0.tar.gz", hash = "sha256:4c68855825ff432d197229846f971bc4d6666ce90492e5b02013bcaca4d9ab05"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "boto3" +version = "1.29.5" +description = "The AWS SDK for Python" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "boto3-1.29.5-py3-none-any.whl", hash = "sha256:030b0f0faf8d44f97e67a5411644243482f33ebf1c45338bb40662239a16dda4"}, + {file = "boto3-1.29.5.tar.gz", hash = "sha256:76fc6a17781c27558c526e899579ccf530df10eb279261fe7800540f0043917e"}, +] + +[package.dependencies] +botocore = ">=1.32.5,<1.33.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.7.0,<0.8.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "boto3-stubs" +version = "1.29.5" +description = "Type annotations for boto3 1.29.5 generated with mypy-boto3-builder 7.20.3" +optional = false +python-versions = ">=3.7" +files = [ + {file = "boto3-stubs-1.29.5.tar.gz", hash = "sha256:62d7e2f6dbadb9d5900f661b87a69c1be39acf14326908ad207f199f8dd00fd2"}, + {file = "boto3_stubs-1.29.5-py3-none-any.whl", hash = "sha256:28076d8fceaa0c40de9c10408e941eacf80363702d6f6a087981e611aa2abfc1"}, +] + +[package.dependencies] +botocore-stubs = "*" +types-s3transfer = "*" +typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} + +[package.extras] +accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.29.0,<1.30.0)"] +account = ["mypy-boto3-account (>=1.29.0,<1.30.0)"] +acm = ["mypy-boto3-acm (>=1.29.0,<1.30.0)"] +acm-pca = ["mypy-boto3-acm-pca (>=1.29.0,<1.30.0)"] +alexaforbusiness = ["mypy-boto3-alexaforbusiness (>=1.29.0,<1.30.0)"] +all = ["mypy-boto3-accessanalyzer (>=1.29.0,<1.30.0)", "mypy-boto3-account (>=1.29.0,<1.30.0)", "mypy-boto3-acm (>=1.29.0,<1.30.0)", "mypy-boto3-acm-pca (>=1.29.0,<1.30.0)", "mypy-boto3-alexaforbusiness (>=1.29.0,<1.30.0)", "mypy-boto3-amp (>=1.29.0,<1.30.0)", "mypy-boto3-amplify (>=1.29.0,<1.30.0)", "mypy-boto3-amplifybackend (>=1.29.0,<1.30.0)", "mypy-boto3-amplifyuibuilder (>=1.29.0,<1.30.0)", "mypy-boto3-apigateway (>=1.29.0,<1.30.0)", "mypy-boto3-apigatewaymanagementapi (>=1.29.0,<1.30.0)", "mypy-boto3-apigatewayv2 (>=1.29.0,<1.30.0)", "mypy-boto3-appconfig (>=1.29.0,<1.30.0)", "mypy-boto3-appconfigdata (>=1.29.0,<1.30.0)", "mypy-boto3-appfabric (>=1.29.0,<1.30.0)", "mypy-boto3-appflow (>=1.29.0,<1.30.0)", "mypy-boto3-appintegrations (>=1.29.0,<1.30.0)", "mypy-boto3-application-autoscaling (>=1.29.0,<1.30.0)", "mypy-boto3-application-insights (>=1.29.0,<1.30.0)", "mypy-boto3-applicationcostprofiler (>=1.29.0,<1.30.0)", "mypy-boto3-appmesh (>=1.29.0,<1.30.0)", "mypy-boto3-apprunner (>=1.29.0,<1.30.0)", "mypy-boto3-appstream (>=1.29.0,<1.30.0)", "mypy-boto3-appsync (>=1.29.0,<1.30.0)", "mypy-boto3-arc-zonal-shift (>=1.29.0,<1.30.0)", "mypy-boto3-athena (>=1.29.0,<1.30.0)", "mypy-boto3-auditmanager (>=1.29.0,<1.30.0)", "mypy-boto3-autoscaling (>=1.29.0,<1.30.0)", "mypy-boto3-autoscaling-plans (>=1.29.0,<1.30.0)", "mypy-boto3-backup (>=1.29.0,<1.30.0)", "mypy-boto3-backup-gateway (>=1.29.0,<1.30.0)", "mypy-boto3-backupstorage (>=1.29.0,<1.30.0)", "mypy-boto3-batch (>=1.29.0,<1.30.0)", "mypy-boto3-bedrock (>=1.29.0,<1.30.0)", "mypy-boto3-bedrock-runtime (>=1.29.0,<1.30.0)", "mypy-boto3-billingconductor (>=1.29.0,<1.30.0)", "mypy-boto3-braket (>=1.29.0,<1.30.0)", "mypy-boto3-budgets (>=1.29.0,<1.30.0)", "mypy-boto3-ce (>=1.29.0,<1.30.0)", "mypy-boto3-chime (>=1.29.0,<1.30.0)", "mypy-boto3-chime-sdk-identity (>=1.29.0,<1.30.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.29.0,<1.30.0)", "mypy-boto3-chime-sdk-meetings (>=1.29.0,<1.30.0)", "mypy-boto3-chime-sdk-messaging (>=1.29.0,<1.30.0)", "mypy-boto3-chime-sdk-voice (>=1.29.0,<1.30.0)", "mypy-boto3-cleanrooms (>=1.29.0,<1.30.0)", "mypy-boto3-cloud9 (>=1.29.0,<1.30.0)", "mypy-boto3-cloudcontrol (>=1.29.0,<1.30.0)", "mypy-boto3-clouddirectory (>=1.29.0,<1.30.0)", "mypy-boto3-cloudformation (>=1.29.0,<1.30.0)", "mypy-boto3-cloudfront (>=1.29.0,<1.30.0)", "mypy-boto3-cloudfront-keyvaluestore (>=1.29.0,<1.30.0)", "mypy-boto3-cloudhsm (>=1.29.0,<1.30.0)", "mypy-boto3-cloudhsmv2 (>=1.29.0,<1.30.0)", "mypy-boto3-cloudsearch (>=1.29.0,<1.30.0)", "mypy-boto3-cloudsearchdomain (>=1.29.0,<1.30.0)", "mypy-boto3-cloudtrail (>=1.29.0,<1.30.0)", "mypy-boto3-cloudtrail-data (>=1.29.0,<1.30.0)", "mypy-boto3-cloudwatch (>=1.29.0,<1.30.0)", "mypy-boto3-codeartifact (>=1.29.0,<1.30.0)", "mypy-boto3-codebuild (>=1.29.0,<1.30.0)", "mypy-boto3-codecatalyst (>=1.29.0,<1.30.0)", "mypy-boto3-codecommit (>=1.29.0,<1.30.0)", "mypy-boto3-codedeploy (>=1.29.0,<1.30.0)", "mypy-boto3-codeguru-reviewer (>=1.29.0,<1.30.0)", "mypy-boto3-codeguru-security (>=1.29.0,<1.30.0)", "mypy-boto3-codeguruprofiler (>=1.29.0,<1.30.0)", "mypy-boto3-codepipeline (>=1.29.0,<1.30.0)", "mypy-boto3-codestar (>=1.29.0,<1.30.0)", "mypy-boto3-codestar-connections (>=1.29.0,<1.30.0)", "mypy-boto3-codestar-notifications (>=1.29.0,<1.30.0)", "mypy-boto3-cognito-identity (>=1.29.0,<1.30.0)", "mypy-boto3-cognito-idp (>=1.29.0,<1.30.0)", "mypy-boto3-cognito-sync (>=1.29.0,<1.30.0)", "mypy-boto3-comprehend (>=1.29.0,<1.30.0)", "mypy-boto3-comprehendmedical (>=1.29.0,<1.30.0)", "mypy-boto3-compute-optimizer (>=1.29.0,<1.30.0)", "mypy-boto3-config (>=1.29.0,<1.30.0)", "mypy-boto3-connect (>=1.29.0,<1.30.0)", "mypy-boto3-connect-contact-lens (>=1.29.0,<1.30.0)", "mypy-boto3-connectcampaigns (>=1.29.0,<1.30.0)", "mypy-boto3-connectcases (>=1.29.0,<1.30.0)", "mypy-boto3-connectparticipant (>=1.29.0,<1.30.0)", "mypy-boto3-controltower (>=1.29.0,<1.30.0)", "mypy-boto3-cur (>=1.29.0,<1.30.0)", "mypy-boto3-customer-profiles (>=1.29.0,<1.30.0)", "mypy-boto3-databrew (>=1.29.0,<1.30.0)", "mypy-boto3-dataexchange (>=1.29.0,<1.30.0)", "mypy-boto3-datapipeline (>=1.29.0,<1.30.0)", "mypy-boto3-datasync (>=1.29.0,<1.30.0)", "mypy-boto3-datazone (>=1.29.0,<1.30.0)", "mypy-boto3-dax (>=1.29.0,<1.30.0)", "mypy-boto3-detective (>=1.29.0,<1.30.0)", "mypy-boto3-devicefarm (>=1.29.0,<1.30.0)", "mypy-boto3-devops-guru (>=1.29.0,<1.30.0)", "mypy-boto3-directconnect (>=1.29.0,<1.30.0)", "mypy-boto3-discovery (>=1.29.0,<1.30.0)", "mypy-boto3-dlm (>=1.29.0,<1.30.0)", "mypy-boto3-dms (>=1.29.0,<1.30.0)", "mypy-boto3-docdb (>=1.29.0,<1.30.0)", "mypy-boto3-docdb-elastic (>=1.29.0,<1.30.0)", "mypy-boto3-drs (>=1.29.0,<1.30.0)", "mypy-boto3-ds (>=1.29.0,<1.30.0)", "mypy-boto3-dynamodb (>=1.29.0,<1.30.0)", "mypy-boto3-dynamodbstreams (>=1.29.0,<1.30.0)", "mypy-boto3-ebs (>=1.29.0,<1.30.0)", "mypy-boto3-ec2 (>=1.29.0,<1.30.0)", "mypy-boto3-ec2-instance-connect (>=1.29.0,<1.30.0)", "mypy-boto3-ecr (>=1.29.0,<1.30.0)", "mypy-boto3-ecr-public (>=1.29.0,<1.30.0)", "mypy-boto3-ecs (>=1.29.0,<1.30.0)", "mypy-boto3-efs (>=1.29.0,<1.30.0)", "mypy-boto3-eks (>=1.29.0,<1.30.0)", "mypy-boto3-elastic-inference (>=1.29.0,<1.30.0)", "mypy-boto3-elasticache (>=1.29.0,<1.30.0)", "mypy-boto3-elasticbeanstalk (>=1.29.0,<1.30.0)", "mypy-boto3-elastictranscoder (>=1.29.0,<1.30.0)", "mypy-boto3-elb (>=1.29.0,<1.30.0)", "mypy-boto3-elbv2 (>=1.29.0,<1.30.0)", "mypy-boto3-emr (>=1.29.0,<1.30.0)", "mypy-boto3-emr-containers (>=1.29.0,<1.30.0)", "mypy-boto3-emr-serverless (>=1.29.0,<1.30.0)", "mypy-boto3-entityresolution (>=1.29.0,<1.30.0)", "mypy-boto3-es (>=1.29.0,<1.30.0)", "mypy-boto3-events (>=1.29.0,<1.30.0)", "mypy-boto3-evidently (>=1.29.0,<1.30.0)", "mypy-boto3-finspace (>=1.29.0,<1.30.0)", "mypy-boto3-finspace-data (>=1.29.0,<1.30.0)", "mypy-boto3-firehose (>=1.29.0,<1.30.0)", "mypy-boto3-fis (>=1.29.0,<1.30.0)", "mypy-boto3-fms (>=1.29.0,<1.30.0)", "mypy-boto3-forecast (>=1.29.0,<1.30.0)", "mypy-boto3-forecastquery (>=1.29.0,<1.30.0)", "mypy-boto3-frauddetector (>=1.29.0,<1.30.0)", "mypy-boto3-fsx (>=1.29.0,<1.30.0)", "mypy-boto3-gamelift (>=1.29.0,<1.30.0)", "mypy-boto3-glacier (>=1.29.0,<1.30.0)", "mypy-boto3-globalaccelerator (>=1.29.0,<1.30.0)", "mypy-boto3-glue (>=1.29.0,<1.30.0)", "mypy-boto3-grafana (>=1.29.0,<1.30.0)", "mypy-boto3-greengrass (>=1.29.0,<1.30.0)", "mypy-boto3-greengrassv2 (>=1.29.0,<1.30.0)", "mypy-boto3-groundstation (>=1.29.0,<1.30.0)", "mypy-boto3-guardduty (>=1.29.0,<1.30.0)", "mypy-boto3-health (>=1.29.0,<1.30.0)", "mypy-boto3-healthlake (>=1.29.0,<1.30.0)", "mypy-boto3-honeycode (>=1.29.0,<1.30.0)", "mypy-boto3-iam (>=1.29.0,<1.30.0)", "mypy-boto3-identitystore (>=1.29.0,<1.30.0)", "mypy-boto3-imagebuilder (>=1.29.0,<1.30.0)", "mypy-boto3-importexport (>=1.29.0,<1.30.0)", "mypy-boto3-inspector (>=1.29.0,<1.30.0)", "mypy-boto3-inspector-scan (>=1.29.0,<1.30.0)", "mypy-boto3-inspector2 (>=1.29.0,<1.30.0)", "mypy-boto3-internetmonitor (>=1.29.0,<1.30.0)", "mypy-boto3-iot (>=1.29.0,<1.30.0)", "mypy-boto3-iot-data (>=1.29.0,<1.30.0)", "mypy-boto3-iot-jobs-data (>=1.29.0,<1.30.0)", "mypy-boto3-iot-roborunner (>=1.29.0,<1.30.0)", "mypy-boto3-iot1click-devices (>=1.29.0,<1.30.0)", "mypy-boto3-iot1click-projects (>=1.29.0,<1.30.0)", "mypy-boto3-iotanalytics (>=1.29.0,<1.30.0)", "mypy-boto3-iotdeviceadvisor (>=1.29.0,<1.30.0)", "mypy-boto3-iotevents (>=1.29.0,<1.30.0)", "mypy-boto3-iotevents-data (>=1.29.0,<1.30.0)", "mypy-boto3-iotfleethub (>=1.29.0,<1.30.0)", "mypy-boto3-iotfleetwise (>=1.29.0,<1.30.0)", "mypy-boto3-iotsecuretunneling (>=1.29.0,<1.30.0)", "mypy-boto3-iotsitewise (>=1.29.0,<1.30.0)", "mypy-boto3-iotthingsgraph (>=1.29.0,<1.30.0)", "mypy-boto3-iottwinmaker (>=1.29.0,<1.30.0)", "mypy-boto3-iotwireless (>=1.29.0,<1.30.0)", "mypy-boto3-ivs (>=1.29.0,<1.30.0)", "mypy-boto3-ivs-realtime (>=1.29.0,<1.30.0)", "mypy-boto3-ivschat (>=1.29.0,<1.30.0)", "mypy-boto3-kafka (>=1.29.0,<1.30.0)", "mypy-boto3-kafkaconnect (>=1.29.0,<1.30.0)", "mypy-boto3-kendra (>=1.29.0,<1.30.0)", "mypy-boto3-kendra-ranking (>=1.29.0,<1.30.0)", "mypy-boto3-keyspaces (>=1.29.0,<1.30.0)", "mypy-boto3-kinesis (>=1.29.0,<1.30.0)", "mypy-boto3-kinesis-video-archived-media (>=1.29.0,<1.30.0)", "mypy-boto3-kinesis-video-media (>=1.29.0,<1.30.0)", "mypy-boto3-kinesis-video-signaling (>=1.29.0,<1.30.0)", "mypy-boto3-kinesis-video-webrtc-storage (>=1.29.0,<1.30.0)", "mypy-boto3-kinesisanalytics (>=1.29.0,<1.30.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.29.0,<1.30.0)", "mypy-boto3-kinesisvideo (>=1.29.0,<1.30.0)", "mypy-boto3-kms (>=1.29.0,<1.30.0)", "mypy-boto3-lakeformation (>=1.29.0,<1.30.0)", "mypy-boto3-lambda (>=1.29.0,<1.30.0)", "mypy-boto3-launch-wizard (>=1.29.0,<1.30.0)", "mypy-boto3-lex-models (>=1.29.0,<1.30.0)", "mypy-boto3-lex-runtime (>=1.29.0,<1.30.0)", "mypy-boto3-lexv2-models (>=1.29.0,<1.30.0)", "mypy-boto3-lexv2-runtime (>=1.29.0,<1.30.0)", "mypy-boto3-license-manager (>=1.29.0,<1.30.0)", "mypy-boto3-license-manager-linux-subscriptions (>=1.29.0,<1.30.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.29.0,<1.30.0)", "mypy-boto3-lightsail (>=1.29.0,<1.30.0)", "mypy-boto3-location (>=1.29.0,<1.30.0)", "mypy-boto3-logs (>=1.29.0,<1.30.0)", "mypy-boto3-lookoutequipment (>=1.29.0,<1.30.0)", "mypy-boto3-lookoutmetrics (>=1.29.0,<1.30.0)", "mypy-boto3-lookoutvision (>=1.29.0,<1.30.0)", "mypy-boto3-m2 (>=1.29.0,<1.30.0)", "mypy-boto3-machinelearning (>=1.29.0,<1.30.0)", "mypy-boto3-macie2 (>=1.29.0,<1.30.0)", "mypy-boto3-managedblockchain (>=1.29.0,<1.30.0)", "mypy-boto3-managedblockchain-query (>=1.29.0,<1.30.0)", "mypy-boto3-marketplace-catalog (>=1.29.0,<1.30.0)", "mypy-boto3-marketplace-entitlement (>=1.29.0,<1.30.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.29.0,<1.30.0)", "mypy-boto3-mediaconnect (>=1.29.0,<1.30.0)", "mypy-boto3-mediaconvert (>=1.29.0,<1.30.0)", "mypy-boto3-medialive (>=1.29.0,<1.30.0)", "mypy-boto3-mediapackage (>=1.29.0,<1.30.0)", "mypy-boto3-mediapackage-vod (>=1.29.0,<1.30.0)", "mypy-boto3-mediapackagev2 (>=1.29.0,<1.30.0)", "mypy-boto3-mediastore (>=1.29.0,<1.30.0)", "mypy-boto3-mediastore-data (>=1.29.0,<1.30.0)", "mypy-boto3-mediatailor (>=1.29.0,<1.30.0)", "mypy-boto3-medical-imaging (>=1.29.0,<1.30.0)", "mypy-boto3-memorydb (>=1.29.0,<1.30.0)", "mypy-boto3-meteringmarketplace (>=1.29.0,<1.30.0)", "mypy-boto3-mgh (>=1.29.0,<1.30.0)", "mypy-boto3-mgn (>=1.29.0,<1.30.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.29.0,<1.30.0)", "mypy-boto3-migrationhub-config (>=1.29.0,<1.30.0)", "mypy-boto3-migrationhuborchestrator (>=1.29.0,<1.30.0)", "mypy-boto3-migrationhubstrategy (>=1.29.0,<1.30.0)", "mypy-boto3-mobile (>=1.29.0,<1.30.0)", "mypy-boto3-mq (>=1.29.0,<1.30.0)", "mypy-boto3-mturk (>=1.29.0,<1.30.0)", "mypy-boto3-mwaa (>=1.29.0,<1.30.0)", "mypy-boto3-neptune (>=1.29.0,<1.30.0)", "mypy-boto3-neptunedata (>=1.29.0,<1.30.0)", "mypy-boto3-network-firewall (>=1.29.0,<1.30.0)", "mypy-boto3-networkmanager (>=1.29.0,<1.30.0)", "mypy-boto3-nimble (>=1.29.0,<1.30.0)", "mypy-boto3-oam (>=1.29.0,<1.30.0)", "mypy-boto3-omics (>=1.29.0,<1.30.0)", "mypy-boto3-opensearch (>=1.29.0,<1.30.0)", "mypy-boto3-opensearchserverless (>=1.29.0,<1.30.0)", "mypy-boto3-opsworks (>=1.29.0,<1.30.0)", "mypy-boto3-opsworkscm (>=1.29.0,<1.30.0)", "mypy-boto3-organizations (>=1.29.0,<1.30.0)", "mypy-boto3-osis (>=1.29.0,<1.30.0)", "mypy-boto3-outposts (>=1.29.0,<1.30.0)", "mypy-boto3-panorama (>=1.29.0,<1.30.0)", "mypy-boto3-payment-cryptography (>=1.29.0,<1.30.0)", "mypy-boto3-payment-cryptography-data (>=1.29.0,<1.30.0)", "mypy-boto3-pca-connector-ad (>=1.29.0,<1.30.0)", "mypy-boto3-personalize (>=1.29.0,<1.30.0)", "mypy-boto3-personalize-events (>=1.29.0,<1.30.0)", "mypy-boto3-personalize-runtime (>=1.29.0,<1.30.0)", "mypy-boto3-pi (>=1.29.0,<1.30.0)", "mypy-boto3-pinpoint (>=1.29.0,<1.30.0)", "mypy-boto3-pinpoint-email (>=1.29.0,<1.30.0)", "mypy-boto3-pinpoint-sms-voice (>=1.29.0,<1.30.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.29.0,<1.30.0)", "mypy-boto3-pipes (>=1.29.0,<1.30.0)", "mypy-boto3-polly (>=1.29.0,<1.30.0)", "mypy-boto3-pricing (>=1.29.0,<1.30.0)", "mypy-boto3-privatenetworks (>=1.29.0,<1.30.0)", "mypy-boto3-proton (>=1.29.0,<1.30.0)", "mypy-boto3-qldb (>=1.29.0,<1.30.0)", "mypy-boto3-qldb-session (>=1.29.0,<1.30.0)", "mypy-boto3-quicksight (>=1.29.0,<1.30.0)", "mypy-boto3-ram (>=1.29.0,<1.30.0)", "mypy-boto3-rbin (>=1.29.0,<1.30.0)", "mypy-boto3-rds (>=1.29.0,<1.30.0)", "mypy-boto3-rds-data (>=1.29.0,<1.30.0)", "mypy-boto3-redshift (>=1.29.0,<1.30.0)", "mypy-boto3-redshift-data (>=1.29.0,<1.30.0)", "mypy-boto3-redshift-serverless (>=1.29.0,<1.30.0)", "mypy-boto3-rekognition (>=1.29.0,<1.30.0)", "mypy-boto3-resiliencehub (>=1.29.0,<1.30.0)", "mypy-boto3-resource-explorer-2 (>=1.29.0,<1.30.0)", "mypy-boto3-resource-groups (>=1.29.0,<1.30.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.29.0,<1.30.0)", "mypy-boto3-robomaker (>=1.29.0,<1.30.0)", "mypy-boto3-rolesanywhere (>=1.29.0,<1.30.0)", "mypy-boto3-route53 (>=1.29.0,<1.30.0)", "mypy-boto3-route53-recovery-cluster (>=1.29.0,<1.30.0)", "mypy-boto3-route53-recovery-control-config (>=1.29.0,<1.30.0)", "mypy-boto3-route53-recovery-readiness (>=1.29.0,<1.30.0)", "mypy-boto3-route53domains (>=1.29.0,<1.30.0)", "mypy-boto3-route53resolver (>=1.29.0,<1.30.0)", "mypy-boto3-rum (>=1.29.0,<1.30.0)", "mypy-boto3-s3 (>=1.29.0,<1.30.0)", "mypy-boto3-s3control (>=1.29.0,<1.30.0)", "mypy-boto3-s3outposts (>=1.29.0,<1.30.0)", "mypy-boto3-sagemaker (>=1.29.0,<1.30.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.29.0,<1.30.0)", "mypy-boto3-sagemaker-edge (>=1.29.0,<1.30.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.29.0,<1.30.0)", "mypy-boto3-sagemaker-geospatial (>=1.29.0,<1.30.0)", "mypy-boto3-sagemaker-metrics (>=1.29.0,<1.30.0)", "mypy-boto3-sagemaker-runtime (>=1.29.0,<1.30.0)", "mypy-boto3-savingsplans (>=1.29.0,<1.30.0)", "mypy-boto3-scheduler (>=1.29.0,<1.30.0)", "mypy-boto3-schemas (>=1.29.0,<1.30.0)", "mypy-boto3-sdb (>=1.29.0,<1.30.0)", "mypy-boto3-secretsmanager (>=1.29.0,<1.30.0)", "mypy-boto3-securityhub (>=1.29.0,<1.30.0)", "mypy-boto3-securitylake (>=1.29.0,<1.30.0)", "mypy-boto3-serverlessrepo (>=1.29.0,<1.30.0)", "mypy-boto3-service-quotas (>=1.29.0,<1.30.0)", "mypy-boto3-servicecatalog (>=1.29.0,<1.30.0)", "mypy-boto3-servicecatalog-appregistry (>=1.29.0,<1.30.0)", "mypy-boto3-servicediscovery (>=1.29.0,<1.30.0)", "mypy-boto3-ses (>=1.29.0,<1.30.0)", "mypy-boto3-sesv2 (>=1.29.0,<1.30.0)", "mypy-boto3-shield (>=1.29.0,<1.30.0)", "mypy-boto3-signer (>=1.29.0,<1.30.0)", "mypy-boto3-simspaceweaver (>=1.29.0,<1.30.0)", "mypy-boto3-sms (>=1.29.0,<1.30.0)", "mypy-boto3-sms-voice (>=1.29.0,<1.30.0)", "mypy-boto3-snow-device-management (>=1.29.0,<1.30.0)", "mypy-boto3-snowball (>=1.29.0,<1.30.0)", "mypy-boto3-sns (>=1.29.0,<1.30.0)", "mypy-boto3-sqs (>=1.29.0,<1.30.0)", "mypy-boto3-ssm (>=1.29.0,<1.30.0)", "mypy-boto3-ssm-contacts (>=1.29.0,<1.30.0)", "mypy-boto3-ssm-incidents (>=1.29.0,<1.30.0)", "mypy-boto3-ssm-sap (>=1.29.0,<1.30.0)", "mypy-boto3-sso (>=1.29.0,<1.30.0)", "mypy-boto3-sso-admin (>=1.29.0,<1.30.0)", "mypy-boto3-sso-oidc (>=1.29.0,<1.30.0)", "mypy-boto3-stepfunctions (>=1.29.0,<1.30.0)", "mypy-boto3-storagegateway (>=1.29.0,<1.30.0)", "mypy-boto3-sts (>=1.29.0,<1.30.0)", "mypy-boto3-support (>=1.29.0,<1.30.0)", "mypy-boto3-support-app (>=1.29.0,<1.30.0)", "mypy-boto3-swf (>=1.29.0,<1.30.0)", "mypy-boto3-synthetics (>=1.29.0,<1.30.0)", "mypy-boto3-textract (>=1.29.0,<1.30.0)", "mypy-boto3-timestream-query (>=1.29.0,<1.30.0)", "mypy-boto3-timestream-write (>=1.29.0,<1.30.0)", "mypy-boto3-tnb (>=1.29.0,<1.30.0)", "mypy-boto3-transcribe (>=1.29.0,<1.30.0)", "mypy-boto3-transfer (>=1.29.0,<1.30.0)", "mypy-boto3-translate (>=1.29.0,<1.30.0)", "mypy-boto3-trustedadvisor (>=1.29.0,<1.30.0)", "mypy-boto3-verifiedpermissions (>=1.29.0,<1.30.0)", "mypy-boto3-voice-id (>=1.29.0,<1.30.0)", "mypy-boto3-vpc-lattice (>=1.29.0,<1.30.0)", "mypy-boto3-waf (>=1.29.0,<1.30.0)", "mypy-boto3-waf-regional (>=1.29.0,<1.30.0)", "mypy-boto3-wafv2 (>=1.29.0,<1.30.0)", "mypy-boto3-wellarchitected (>=1.29.0,<1.30.0)", "mypy-boto3-wisdom (>=1.29.0,<1.30.0)", "mypy-boto3-workdocs (>=1.29.0,<1.30.0)", "mypy-boto3-worklink (>=1.29.0,<1.30.0)", "mypy-boto3-workmail (>=1.29.0,<1.30.0)", "mypy-boto3-workmailmessageflow (>=1.29.0,<1.30.0)", "mypy-boto3-workspaces (>=1.29.0,<1.30.0)", "mypy-boto3-workspaces-web (>=1.29.0,<1.30.0)", "mypy-boto3-xray (>=1.29.0,<1.30.0)"] +amp = ["mypy-boto3-amp (>=1.29.0,<1.30.0)"] +amplify = ["mypy-boto3-amplify (>=1.29.0,<1.30.0)"] +amplifybackend = ["mypy-boto3-amplifybackend (>=1.29.0,<1.30.0)"] +amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.29.0,<1.30.0)"] +apigateway = ["mypy-boto3-apigateway (>=1.29.0,<1.30.0)"] +apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.29.0,<1.30.0)"] +apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.29.0,<1.30.0)"] +appconfig = ["mypy-boto3-appconfig (>=1.29.0,<1.30.0)"] +appconfigdata = ["mypy-boto3-appconfigdata (>=1.29.0,<1.30.0)"] +appfabric = ["mypy-boto3-appfabric (>=1.29.0,<1.30.0)"] +appflow = ["mypy-boto3-appflow (>=1.29.0,<1.30.0)"] +appintegrations = ["mypy-boto3-appintegrations (>=1.29.0,<1.30.0)"] +application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.29.0,<1.30.0)"] +application-insights = ["mypy-boto3-application-insights (>=1.29.0,<1.30.0)"] +applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.29.0,<1.30.0)"] +appmesh = ["mypy-boto3-appmesh (>=1.29.0,<1.30.0)"] +apprunner = ["mypy-boto3-apprunner (>=1.29.0,<1.30.0)"] +appstream = ["mypy-boto3-appstream (>=1.29.0,<1.30.0)"] +appsync = ["mypy-boto3-appsync (>=1.29.0,<1.30.0)"] +arc-zonal-shift = ["mypy-boto3-arc-zonal-shift (>=1.29.0,<1.30.0)"] +athena = ["mypy-boto3-athena (>=1.29.0,<1.30.0)"] +auditmanager = ["mypy-boto3-auditmanager (>=1.29.0,<1.30.0)"] +autoscaling = ["mypy-boto3-autoscaling (>=1.29.0,<1.30.0)"] +autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.29.0,<1.30.0)"] +backup = ["mypy-boto3-backup (>=1.29.0,<1.30.0)"] +backup-gateway = ["mypy-boto3-backup-gateway (>=1.29.0,<1.30.0)"] +backupstorage = ["mypy-boto3-backupstorage (>=1.29.0,<1.30.0)"] +batch = ["mypy-boto3-batch (>=1.29.0,<1.30.0)"] +bedrock = ["mypy-boto3-bedrock (>=1.29.0,<1.30.0)"] +bedrock-runtime = ["mypy-boto3-bedrock-runtime (>=1.29.0,<1.30.0)"] +billingconductor = ["mypy-boto3-billingconductor (>=1.29.0,<1.30.0)"] +boto3 = ["boto3 (==1.29.5)", "botocore (==1.32.5)"] +braket = ["mypy-boto3-braket (>=1.29.0,<1.30.0)"] +budgets = ["mypy-boto3-budgets (>=1.29.0,<1.30.0)"] +ce = ["mypy-boto3-ce (>=1.29.0,<1.30.0)"] +chime = ["mypy-boto3-chime (>=1.29.0,<1.30.0)"] +chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.29.0,<1.30.0)"] +chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.29.0,<1.30.0)"] +chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.29.0,<1.30.0)"] +chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.29.0,<1.30.0)"] +chime-sdk-voice = ["mypy-boto3-chime-sdk-voice (>=1.29.0,<1.30.0)"] +cleanrooms = ["mypy-boto3-cleanrooms (>=1.29.0,<1.30.0)"] +cloud9 = ["mypy-boto3-cloud9 (>=1.29.0,<1.30.0)"] +cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.29.0,<1.30.0)"] +clouddirectory = ["mypy-boto3-clouddirectory (>=1.29.0,<1.30.0)"] +cloudformation = ["mypy-boto3-cloudformation (>=1.29.0,<1.30.0)"] +cloudfront = ["mypy-boto3-cloudfront (>=1.29.0,<1.30.0)"] +cloudfront-keyvaluestore = ["mypy-boto3-cloudfront-keyvaluestore (>=1.29.0,<1.30.0)"] +cloudhsm = ["mypy-boto3-cloudhsm (>=1.29.0,<1.30.0)"] +cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.29.0,<1.30.0)"] +cloudsearch = ["mypy-boto3-cloudsearch (>=1.29.0,<1.30.0)"] +cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.29.0,<1.30.0)"] +cloudtrail = ["mypy-boto3-cloudtrail (>=1.29.0,<1.30.0)"] +cloudtrail-data = ["mypy-boto3-cloudtrail-data (>=1.29.0,<1.30.0)"] +cloudwatch = ["mypy-boto3-cloudwatch (>=1.29.0,<1.30.0)"] +codeartifact = ["mypy-boto3-codeartifact (>=1.29.0,<1.30.0)"] +codebuild = ["mypy-boto3-codebuild (>=1.29.0,<1.30.0)"] +codecatalyst = ["mypy-boto3-codecatalyst (>=1.29.0,<1.30.0)"] +codecommit = ["mypy-boto3-codecommit (>=1.29.0,<1.30.0)"] +codedeploy = ["mypy-boto3-codedeploy (>=1.29.0,<1.30.0)"] +codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.29.0,<1.30.0)"] +codeguru-security = ["mypy-boto3-codeguru-security (>=1.29.0,<1.30.0)"] +codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.29.0,<1.30.0)"] +codepipeline = ["mypy-boto3-codepipeline (>=1.29.0,<1.30.0)"] +codestar = ["mypy-boto3-codestar (>=1.29.0,<1.30.0)"] +codestar-connections = ["mypy-boto3-codestar-connections (>=1.29.0,<1.30.0)"] +codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.29.0,<1.30.0)"] +cognito-identity = ["mypy-boto3-cognito-identity (>=1.29.0,<1.30.0)"] +cognito-idp = ["mypy-boto3-cognito-idp (>=1.29.0,<1.30.0)"] +cognito-sync = ["mypy-boto3-cognito-sync (>=1.29.0,<1.30.0)"] +comprehend = ["mypy-boto3-comprehend (>=1.29.0,<1.30.0)"] +comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.29.0,<1.30.0)"] +compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.29.0,<1.30.0)"] +config = ["mypy-boto3-config (>=1.29.0,<1.30.0)"] +connect = ["mypy-boto3-connect (>=1.29.0,<1.30.0)"] +connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.29.0,<1.30.0)"] +connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.29.0,<1.30.0)"] +connectcases = ["mypy-boto3-connectcases (>=1.29.0,<1.30.0)"] +connectparticipant = ["mypy-boto3-connectparticipant (>=1.29.0,<1.30.0)"] +controltower = ["mypy-boto3-controltower (>=1.29.0,<1.30.0)"] +cur = ["mypy-boto3-cur (>=1.29.0,<1.30.0)"] +customer-profiles = ["mypy-boto3-customer-profiles (>=1.29.0,<1.30.0)"] +databrew = ["mypy-boto3-databrew (>=1.29.0,<1.30.0)"] +dataexchange = ["mypy-boto3-dataexchange (>=1.29.0,<1.30.0)"] +datapipeline = ["mypy-boto3-datapipeline (>=1.29.0,<1.30.0)"] +datasync = ["mypy-boto3-datasync (>=1.29.0,<1.30.0)"] +datazone = ["mypy-boto3-datazone (>=1.29.0,<1.30.0)"] +dax = ["mypy-boto3-dax (>=1.29.0,<1.30.0)"] +detective = ["mypy-boto3-detective (>=1.29.0,<1.30.0)"] +devicefarm = ["mypy-boto3-devicefarm (>=1.29.0,<1.30.0)"] +devops-guru = ["mypy-boto3-devops-guru (>=1.29.0,<1.30.0)"] +directconnect = ["mypy-boto3-directconnect (>=1.29.0,<1.30.0)"] +discovery = ["mypy-boto3-discovery (>=1.29.0,<1.30.0)"] +dlm = ["mypy-boto3-dlm (>=1.29.0,<1.30.0)"] +dms = ["mypy-boto3-dms (>=1.29.0,<1.30.0)"] +docdb = ["mypy-boto3-docdb (>=1.29.0,<1.30.0)"] +docdb-elastic = ["mypy-boto3-docdb-elastic (>=1.29.0,<1.30.0)"] +drs = ["mypy-boto3-drs (>=1.29.0,<1.30.0)"] +ds = ["mypy-boto3-ds (>=1.29.0,<1.30.0)"] +dynamodb = ["mypy-boto3-dynamodb (>=1.29.0,<1.30.0)"] +dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.29.0,<1.30.0)"] +ebs = ["mypy-boto3-ebs (>=1.29.0,<1.30.0)"] +ec2 = ["mypy-boto3-ec2 (>=1.29.0,<1.30.0)"] +ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.29.0,<1.30.0)"] +ecr = ["mypy-boto3-ecr (>=1.29.0,<1.30.0)"] +ecr-public = ["mypy-boto3-ecr-public (>=1.29.0,<1.30.0)"] +ecs = ["mypy-boto3-ecs (>=1.29.0,<1.30.0)"] +efs = ["mypy-boto3-efs (>=1.29.0,<1.30.0)"] +eks = ["mypy-boto3-eks (>=1.29.0,<1.30.0)"] +elastic-inference = ["mypy-boto3-elastic-inference (>=1.29.0,<1.30.0)"] +elasticache = ["mypy-boto3-elasticache (>=1.29.0,<1.30.0)"] +elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.29.0,<1.30.0)"] +elastictranscoder = ["mypy-boto3-elastictranscoder (>=1.29.0,<1.30.0)"] +elb = ["mypy-boto3-elb (>=1.29.0,<1.30.0)"] +elbv2 = ["mypy-boto3-elbv2 (>=1.29.0,<1.30.0)"] +emr = ["mypy-boto3-emr (>=1.29.0,<1.30.0)"] +emr-containers = ["mypy-boto3-emr-containers (>=1.29.0,<1.30.0)"] +emr-serverless = ["mypy-boto3-emr-serverless (>=1.29.0,<1.30.0)"] +entityresolution = ["mypy-boto3-entityresolution (>=1.29.0,<1.30.0)"] +es = ["mypy-boto3-es (>=1.29.0,<1.30.0)"] +essential = ["mypy-boto3-cloudformation (>=1.29.0,<1.30.0)", "mypy-boto3-dynamodb (>=1.29.0,<1.30.0)", "mypy-boto3-ec2 (>=1.29.0,<1.30.0)", "mypy-boto3-lambda (>=1.29.0,<1.30.0)", "mypy-boto3-rds (>=1.29.0,<1.30.0)", "mypy-boto3-s3 (>=1.29.0,<1.30.0)", "mypy-boto3-sqs (>=1.29.0,<1.30.0)"] +events = ["mypy-boto3-events (>=1.29.0,<1.30.0)"] +evidently = ["mypy-boto3-evidently (>=1.29.0,<1.30.0)"] +finspace = ["mypy-boto3-finspace (>=1.29.0,<1.30.0)"] +finspace-data = ["mypy-boto3-finspace-data (>=1.29.0,<1.30.0)"] +firehose = ["mypy-boto3-firehose (>=1.29.0,<1.30.0)"] +fis = ["mypy-boto3-fis (>=1.29.0,<1.30.0)"] +fms = ["mypy-boto3-fms (>=1.29.0,<1.30.0)"] +forecast = ["mypy-boto3-forecast (>=1.29.0,<1.30.0)"] +forecastquery = ["mypy-boto3-forecastquery (>=1.29.0,<1.30.0)"] +frauddetector = ["mypy-boto3-frauddetector (>=1.29.0,<1.30.0)"] +fsx = ["mypy-boto3-fsx (>=1.29.0,<1.30.0)"] +gamelift = ["mypy-boto3-gamelift (>=1.29.0,<1.30.0)"] +glacier = ["mypy-boto3-glacier (>=1.29.0,<1.30.0)"] +globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.29.0,<1.30.0)"] +glue = ["mypy-boto3-glue (>=1.29.0,<1.30.0)"] +grafana = ["mypy-boto3-grafana (>=1.29.0,<1.30.0)"] +greengrass = ["mypy-boto3-greengrass (>=1.29.0,<1.30.0)"] +greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.29.0,<1.30.0)"] +groundstation = ["mypy-boto3-groundstation (>=1.29.0,<1.30.0)"] +guardduty = ["mypy-boto3-guardduty (>=1.29.0,<1.30.0)"] +health = ["mypy-boto3-health (>=1.29.0,<1.30.0)"] +healthlake = ["mypy-boto3-healthlake (>=1.29.0,<1.30.0)"] +honeycode = ["mypy-boto3-honeycode (>=1.29.0,<1.30.0)"] +iam = ["mypy-boto3-iam (>=1.29.0,<1.30.0)"] +identitystore = ["mypy-boto3-identitystore (>=1.29.0,<1.30.0)"] +imagebuilder = ["mypy-boto3-imagebuilder (>=1.29.0,<1.30.0)"] +importexport = ["mypy-boto3-importexport (>=1.29.0,<1.30.0)"] +inspector = ["mypy-boto3-inspector (>=1.29.0,<1.30.0)"] +inspector-scan = ["mypy-boto3-inspector-scan (>=1.29.0,<1.30.0)"] +inspector2 = ["mypy-boto3-inspector2 (>=1.29.0,<1.30.0)"] +internetmonitor = ["mypy-boto3-internetmonitor (>=1.29.0,<1.30.0)"] +iot = ["mypy-boto3-iot (>=1.29.0,<1.30.0)"] +iot-data = ["mypy-boto3-iot-data (>=1.29.0,<1.30.0)"] +iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.29.0,<1.30.0)"] +iot-roborunner = ["mypy-boto3-iot-roborunner (>=1.29.0,<1.30.0)"] +iot1click-devices = ["mypy-boto3-iot1click-devices (>=1.29.0,<1.30.0)"] +iot1click-projects = ["mypy-boto3-iot1click-projects (>=1.29.0,<1.30.0)"] +iotanalytics = ["mypy-boto3-iotanalytics (>=1.29.0,<1.30.0)"] +iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.29.0,<1.30.0)"] +iotevents = ["mypy-boto3-iotevents (>=1.29.0,<1.30.0)"] +iotevents-data = ["mypy-boto3-iotevents-data (>=1.29.0,<1.30.0)"] +iotfleethub = ["mypy-boto3-iotfleethub (>=1.29.0,<1.30.0)"] +iotfleetwise = ["mypy-boto3-iotfleetwise (>=1.29.0,<1.30.0)"] +iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.29.0,<1.30.0)"] +iotsitewise = ["mypy-boto3-iotsitewise (>=1.29.0,<1.30.0)"] +iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.29.0,<1.30.0)"] +iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.29.0,<1.30.0)"] +iotwireless = ["mypy-boto3-iotwireless (>=1.29.0,<1.30.0)"] +ivs = ["mypy-boto3-ivs (>=1.29.0,<1.30.0)"] +ivs-realtime = ["mypy-boto3-ivs-realtime (>=1.29.0,<1.30.0)"] +ivschat = ["mypy-boto3-ivschat (>=1.29.0,<1.30.0)"] +kafka = ["mypy-boto3-kafka (>=1.29.0,<1.30.0)"] +kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.29.0,<1.30.0)"] +kendra = ["mypy-boto3-kendra (>=1.29.0,<1.30.0)"] +kendra-ranking = ["mypy-boto3-kendra-ranking (>=1.29.0,<1.30.0)"] +keyspaces = ["mypy-boto3-keyspaces (>=1.29.0,<1.30.0)"] +kinesis = ["mypy-boto3-kinesis (>=1.29.0,<1.30.0)"] +kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.29.0,<1.30.0)"] +kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.29.0,<1.30.0)"] +kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.29.0,<1.30.0)"] +kinesis-video-webrtc-storage = ["mypy-boto3-kinesis-video-webrtc-storage (>=1.29.0,<1.30.0)"] +kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.29.0,<1.30.0)"] +kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.29.0,<1.30.0)"] +kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.29.0,<1.30.0)"] +kms = ["mypy-boto3-kms (>=1.29.0,<1.30.0)"] +lakeformation = ["mypy-boto3-lakeformation (>=1.29.0,<1.30.0)"] +lambda = ["mypy-boto3-lambda (>=1.29.0,<1.30.0)"] +launch-wizard = ["mypy-boto3-launch-wizard (>=1.29.0,<1.30.0)"] +lex-models = ["mypy-boto3-lex-models (>=1.29.0,<1.30.0)"] +lex-runtime = ["mypy-boto3-lex-runtime (>=1.29.0,<1.30.0)"] +lexv2-models = ["mypy-boto3-lexv2-models (>=1.29.0,<1.30.0)"] +lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.29.0,<1.30.0)"] +license-manager = ["mypy-boto3-license-manager (>=1.29.0,<1.30.0)"] +license-manager-linux-subscriptions = ["mypy-boto3-license-manager-linux-subscriptions (>=1.29.0,<1.30.0)"] +license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.29.0,<1.30.0)"] +lightsail = ["mypy-boto3-lightsail (>=1.29.0,<1.30.0)"] +location = ["mypy-boto3-location (>=1.29.0,<1.30.0)"] +logs = ["mypy-boto3-logs (>=1.29.0,<1.30.0)"] +lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.29.0,<1.30.0)"] +lookoutmetrics = ["mypy-boto3-lookoutmetrics (>=1.29.0,<1.30.0)"] +lookoutvision = ["mypy-boto3-lookoutvision (>=1.29.0,<1.30.0)"] +m2 = ["mypy-boto3-m2 (>=1.29.0,<1.30.0)"] +machinelearning = ["mypy-boto3-machinelearning (>=1.29.0,<1.30.0)"] +macie2 = ["mypy-boto3-macie2 (>=1.29.0,<1.30.0)"] +managedblockchain = ["mypy-boto3-managedblockchain (>=1.29.0,<1.30.0)"] +managedblockchain-query = ["mypy-boto3-managedblockchain-query (>=1.29.0,<1.30.0)"] +marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.29.0,<1.30.0)"] +marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.29.0,<1.30.0)"] +marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.29.0,<1.30.0)"] +mediaconnect = ["mypy-boto3-mediaconnect (>=1.29.0,<1.30.0)"] +mediaconvert = ["mypy-boto3-mediaconvert (>=1.29.0,<1.30.0)"] +medialive = ["mypy-boto3-medialive (>=1.29.0,<1.30.0)"] +mediapackage = ["mypy-boto3-mediapackage (>=1.29.0,<1.30.0)"] +mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.29.0,<1.30.0)"] +mediapackagev2 = ["mypy-boto3-mediapackagev2 (>=1.29.0,<1.30.0)"] +mediastore = ["mypy-boto3-mediastore (>=1.29.0,<1.30.0)"] +mediastore-data = ["mypy-boto3-mediastore-data (>=1.29.0,<1.30.0)"] +mediatailor = ["mypy-boto3-mediatailor (>=1.29.0,<1.30.0)"] +medical-imaging = ["mypy-boto3-medical-imaging (>=1.29.0,<1.30.0)"] +memorydb = ["mypy-boto3-memorydb (>=1.29.0,<1.30.0)"] +meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.29.0,<1.30.0)"] +mgh = ["mypy-boto3-mgh (>=1.29.0,<1.30.0)"] +mgn = ["mypy-boto3-mgn (>=1.29.0,<1.30.0)"] +migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.29.0,<1.30.0)"] +migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.29.0,<1.30.0)"] +migrationhuborchestrator = ["mypy-boto3-migrationhuborchestrator (>=1.29.0,<1.30.0)"] +migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.29.0,<1.30.0)"] +mobile = ["mypy-boto3-mobile (>=1.29.0,<1.30.0)"] +mq = ["mypy-boto3-mq (>=1.29.0,<1.30.0)"] +mturk = ["mypy-boto3-mturk (>=1.29.0,<1.30.0)"] +mwaa = ["mypy-boto3-mwaa (>=1.29.0,<1.30.0)"] +neptune = ["mypy-boto3-neptune (>=1.29.0,<1.30.0)"] +neptunedata = ["mypy-boto3-neptunedata (>=1.29.0,<1.30.0)"] +network-firewall = ["mypy-boto3-network-firewall (>=1.29.0,<1.30.0)"] +networkmanager = ["mypy-boto3-networkmanager (>=1.29.0,<1.30.0)"] +nimble = ["mypy-boto3-nimble (>=1.29.0,<1.30.0)"] +oam = ["mypy-boto3-oam (>=1.29.0,<1.30.0)"] +omics = ["mypy-boto3-omics (>=1.29.0,<1.30.0)"] +opensearch = ["mypy-boto3-opensearch (>=1.29.0,<1.30.0)"] +opensearchserverless = ["mypy-boto3-opensearchserverless (>=1.29.0,<1.30.0)"] +opsworks = ["mypy-boto3-opsworks (>=1.29.0,<1.30.0)"] +opsworkscm = ["mypy-boto3-opsworkscm (>=1.29.0,<1.30.0)"] +organizations = ["mypy-boto3-organizations (>=1.29.0,<1.30.0)"] +osis = ["mypy-boto3-osis (>=1.29.0,<1.30.0)"] +outposts = ["mypy-boto3-outposts (>=1.29.0,<1.30.0)"] +panorama = ["mypy-boto3-panorama (>=1.29.0,<1.30.0)"] +payment-cryptography = ["mypy-boto3-payment-cryptography (>=1.29.0,<1.30.0)"] +payment-cryptography-data = ["mypy-boto3-payment-cryptography-data (>=1.29.0,<1.30.0)"] +pca-connector-ad = ["mypy-boto3-pca-connector-ad (>=1.29.0,<1.30.0)"] +personalize = ["mypy-boto3-personalize (>=1.29.0,<1.30.0)"] +personalize-events = ["mypy-boto3-personalize-events (>=1.29.0,<1.30.0)"] +personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.29.0,<1.30.0)"] +pi = ["mypy-boto3-pi (>=1.29.0,<1.30.0)"] +pinpoint = ["mypy-boto3-pinpoint (>=1.29.0,<1.30.0)"] +pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.29.0,<1.30.0)"] +pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.29.0,<1.30.0)"] +pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.29.0,<1.30.0)"] +pipes = ["mypy-boto3-pipes (>=1.29.0,<1.30.0)"] +polly = ["mypy-boto3-polly (>=1.29.0,<1.30.0)"] +pricing = ["mypy-boto3-pricing (>=1.29.0,<1.30.0)"] +privatenetworks = ["mypy-boto3-privatenetworks (>=1.29.0,<1.30.0)"] +proton = ["mypy-boto3-proton (>=1.29.0,<1.30.0)"] +qldb = ["mypy-boto3-qldb (>=1.29.0,<1.30.0)"] +qldb-session = ["mypy-boto3-qldb-session (>=1.29.0,<1.30.0)"] +quicksight = ["mypy-boto3-quicksight (>=1.29.0,<1.30.0)"] +ram = ["mypy-boto3-ram (>=1.29.0,<1.30.0)"] +rbin = ["mypy-boto3-rbin (>=1.29.0,<1.30.0)"] +rds = ["mypy-boto3-rds (>=1.29.0,<1.30.0)"] +rds-data = ["mypy-boto3-rds-data (>=1.29.0,<1.30.0)"] +redshift = ["mypy-boto3-redshift (>=1.29.0,<1.30.0)"] +redshift-data = ["mypy-boto3-redshift-data (>=1.29.0,<1.30.0)"] +redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.29.0,<1.30.0)"] +rekognition = ["mypy-boto3-rekognition (>=1.29.0,<1.30.0)"] +resiliencehub = ["mypy-boto3-resiliencehub (>=1.29.0,<1.30.0)"] +resource-explorer-2 = ["mypy-boto3-resource-explorer-2 (>=1.29.0,<1.30.0)"] +resource-groups = ["mypy-boto3-resource-groups (>=1.29.0,<1.30.0)"] +resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.29.0,<1.30.0)"] +robomaker = ["mypy-boto3-robomaker (>=1.29.0,<1.30.0)"] +rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.29.0,<1.30.0)"] +route53 = ["mypy-boto3-route53 (>=1.29.0,<1.30.0)"] +route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.29.0,<1.30.0)"] +route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.29.0,<1.30.0)"] +route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.29.0,<1.30.0)"] +route53domains = ["mypy-boto3-route53domains (>=1.29.0,<1.30.0)"] +route53resolver = ["mypy-boto3-route53resolver (>=1.29.0,<1.30.0)"] +rum = ["mypy-boto3-rum (>=1.29.0,<1.30.0)"] +s3 = ["mypy-boto3-s3 (>=1.29.0,<1.30.0)"] +s3control = ["mypy-boto3-s3control (>=1.29.0,<1.30.0)"] +s3outposts = ["mypy-boto3-s3outposts (>=1.29.0,<1.30.0)"] +sagemaker = ["mypy-boto3-sagemaker (>=1.29.0,<1.30.0)"] +sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.29.0,<1.30.0)"] +sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.29.0,<1.30.0)"] +sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.29.0,<1.30.0)"] +sagemaker-geospatial = ["mypy-boto3-sagemaker-geospatial (>=1.29.0,<1.30.0)"] +sagemaker-metrics = ["mypy-boto3-sagemaker-metrics (>=1.29.0,<1.30.0)"] +sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.29.0,<1.30.0)"] +savingsplans = ["mypy-boto3-savingsplans (>=1.29.0,<1.30.0)"] +scheduler = ["mypy-boto3-scheduler (>=1.29.0,<1.30.0)"] +schemas = ["mypy-boto3-schemas (>=1.29.0,<1.30.0)"] +sdb = ["mypy-boto3-sdb (>=1.29.0,<1.30.0)"] +secretsmanager = ["mypy-boto3-secretsmanager (>=1.29.0,<1.30.0)"] +securityhub = ["mypy-boto3-securityhub (>=1.29.0,<1.30.0)"] +securitylake = ["mypy-boto3-securitylake (>=1.29.0,<1.30.0)"] +serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.29.0,<1.30.0)"] +service-quotas = ["mypy-boto3-service-quotas (>=1.29.0,<1.30.0)"] +servicecatalog = ["mypy-boto3-servicecatalog (>=1.29.0,<1.30.0)"] +servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.29.0,<1.30.0)"] +servicediscovery = ["mypy-boto3-servicediscovery (>=1.29.0,<1.30.0)"] +ses = ["mypy-boto3-ses (>=1.29.0,<1.30.0)"] +sesv2 = ["mypy-boto3-sesv2 (>=1.29.0,<1.30.0)"] +shield = ["mypy-boto3-shield (>=1.29.0,<1.30.0)"] +signer = ["mypy-boto3-signer (>=1.29.0,<1.30.0)"] +simspaceweaver = ["mypy-boto3-simspaceweaver (>=1.29.0,<1.30.0)"] +sms = ["mypy-boto3-sms (>=1.29.0,<1.30.0)"] +sms-voice = ["mypy-boto3-sms-voice (>=1.29.0,<1.30.0)"] +snow-device-management = ["mypy-boto3-snow-device-management (>=1.29.0,<1.30.0)"] +snowball = ["mypy-boto3-snowball (>=1.29.0,<1.30.0)"] +sns = ["mypy-boto3-sns (>=1.29.0,<1.30.0)"] +sqs = ["mypy-boto3-sqs (>=1.29.0,<1.30.0)"] +ssm = ["mypy-boto3-ssm (>=1.29.0,<1.30.0)"] +ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.29.0,<1.30.0)"] +ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.29.0,<1.30.0)"] +ssm-sap = ["mypy-boto3-ssm-sap (>=1.29.0,<1.30.0)"] +sso = ["mypy-boto3-sso (>=1.29.0,<1.30.0)"] +sso-admin = ["mypy-boto3-sso-admin (>=1.29.0,<1.30.0)"] +sso-oidc = ["mypy-boto3-sso-oidc (>=1.29.0,<1.30.0)"] +stepfunctions = ["mypy-boto3-stepfunctions (>=1.29.0,<1.30.0)"] +storagegateway = ["mypy-boto3-storagegateway (>=1.29.0,<1.30.0)"] +sts = ["mypy-boto3-sts (>=1.29.0,<1.30.0)"] +support = ["mypy-boto3-support (>=1.29.0,<1.30.0)"] +support-app = ["mypy-boto3-support-app (>=1.29.0,<1.30.0)"] +swf = ["mypy-boto3-swf (>=1.29.0,<1.30.0)"] +synthetics = ["mypy-boto3-synthetics (>=1.29.0,<1.30.0)"] +textract = ["mypy-boto3-textract (>=1.29.0,<1.30.0)"] +timestream-query = ["mypy-boto3-timestream-query (>=1.29.0,<1.30.0)"] +timestream-write = ["mypy-boto3-timestream-write (>=1.29.0,<1.30.0)"] +tnb = ["mypy-boto3-tnb (>=1.29.0,<1.30.0)"] +transcribe = ["mypy-boto3-transcribe (>=1.29.0,<1.30.0)"] +transfer = ["mypy-boto3-transfer (>=1.29.0,<1.30.0)"] +translate = ["mypy-boto3-translate (>=1.29.0,<1.30.0)"] +trustedadvisor = ["mypy-boto3-trustedadvisor (>=1.29.0,<1.30.0)"] +verifiedpermissions = ["mypy-boto3-verifiedpermissions (>=1.29.0,<1.30.0)"] +voice-id = ["mypy-boto3-voice-id (>=1.29.0,<1.30.0)"] +vpc-lattice = ["mypy-boto3-vpc-lattice (>=1.29.0,<1.30.0)"] +waf = ["mypy-boto3-waf (>=1.29.0,<1.30.0)"] +waf-regional = ["mypy-boto3-waf-regional (>=1.29.0,<1.30.0)"] +wafv2 = ["mypy-boto3-wafv2 (>=1.29.0,<1.30.0)"] +wellarchitected = ["mypy-boto3-wellarchitected (>=1.29.0,<1.30.0)"] +wisdom = ["mypy-boto3-wisdom (>=1.29.0,<1.30.0)"] +workdocs = ["mypy-boto3-workdocs (>=1.29.0,<1.30.0)"] +worklink = ["mypy-boto3-worklink (>=1.29.0,<1.30.0)"] +workmail = ["mypy-boto3-workmail (>=1.29.0,<1.30.0)"] +workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.29.0,<1.30.0)"] +workspaces = ["mypy-boto3-workspaces (>=1.29.0,<1.30.0)"] +workspaces-web = ["mypy-boto3-workspaces-web (>=1.29.0,<1.30.0)"] +xray = ["mypy-boto3-xray (>=1.29.0,<1.30.0)"] + +[[package]] +name = "botocore" +version = "1.32.5" +description = "Low-level, data-driven core of boto 3." +optional = false +python-versions = ">= 3.7" +files = [ + {file = "botocore-1.32.5-py3-none-any.whl", hash = "sha256:b8960c955ba275915bf022c54c896c2dac1038289d8a5ace92d1431257c0a439"}, + {file = "botocore-1.32.5.tar.gz", hash = "sha256:75a68f942cd87baff83b3a20dfda11b3aeda48aad32e4dcd6fe8992c0cb0e7db"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""} + +[package.extras] +crt = ["awscrt (==0.19.12)"] + +[[package]] +name = "botocore-stubs" +version = "1.32.5" +description = "Type annotations and code completion for botocore" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "botocore_stubs-1.32.5-py3-none-any.whl", hash = "sha256:474fce0a2c5b7831cfbe58e381f5fb1493829fb718e1a338babf7ccc9f48ef23"}, + {file = "botocore_stubs-1.32.5.tar.gz", hash = "sha256:1fe2df7f517a9d1a30f8e2e62c7a08f81d4b1b2e8adfa6111a866145c30c26e5"}, +] + +[package.dependencies] +types-awscrt = "*" + +[package.extras] +botocore = ["botocore"] + +[[package]] +name = "cargo-lambda" +version = "0.21.1" +description = "Cargo subcommand to work with AWS Lambda" +optional = false +python-versions = "*" +files = [ + {file = "cargo_lambda-0.21.1-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:f64038d84feaceb94285cd7c64696410de75a4540f30fff4c48c6993f16dec22"}, + {file = "cargo_lambda-0.21.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b56c148dc145566e92bb9b65e8f42372597b3dc296a71cde708034ef525a288e"}, + {file = "cargo_lambda-0.21.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1f38c477c258838707894ff787d0e6e12d643d40801c7876a43a84bff0961b87"}, + {file = "cargo_lambda-0.21.1-py3-none-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:964b85c88c962aea3c3906a32d64fb7cd4ca49e1a53f25b0551fdd76e3465df7"}, + {file = "cargo_lambda-0.21.1-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:229dc24baaefe2fe4c5faa4f259f8fc419a683844031dc5535407a872604e29e"}, + {file = "cargo_lambda-0.21.1-py3-none-win32.whl", hash = "sha256:19f7a54b50d58fb25baa1377bf34df2be886c300966e49835606e2ac6ff04e4d"}, + {file = "cargo_lambda-0.21.1-py3-none-win_amd64.whl", hash = "sha256:8a785dd3e2c03631bc3063ca51993d3d2d56b0d623e6992d7c992312a84742e2"}, +] + +[package.dependencies] +ziglang = ">=0.10.0" + +[[package]] +name = "cattrs" +version = "23.1.2" +description = "Composable complex class support for attrs and dataclasses." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cattrs-23.1.2-py3-none-any.whl", hash = "sha256:b2bb14311ac17bed0d58785e5a60f022e5431aca3932e3fc5cc8ed8639de50a4"}, + {file = "cattrs-23.1.2.tar.gz", hash = "sha256:db1c821b8c537382b2c7c66678c3790091ca0275ac486c76f3c8f3920e83c657"}, +] + +[package.dependencies] +attrs = ">=20" +exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} +typing_extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""} + +[package.extras] +bson = ["pymongo (>=4.2.0,<5.0.0)"] +cbor2 = ["cbor2 (>=5.4.6,<6.0.0)"] +msgpack = ["msgpack (>=1.0.2,<2.0.0)"] +orjson = ["orjson (>=3.5.2,<4.0.0)"] +pyyaml = ["PyYAML (>=6.0,<7.0)"] +tomlkit = ["tomlkit (>=0.11.4,<0.12.0)"] +ujson = ["ujson (>=5.4.0,<6.0.0)"] + +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "constructs" +version = "10.3.0" +description = "A programming model for software-defined state" +optional = false +python-versions = "~=3.7" +files = [ + {file = "constructs-10.3.0-py3-none-any.whl", hash = "sha256:2972f514837565ff5b09171cfba50c0159dfa75ee86a42921ea8c86f2941b3d2"}, + {file = "constructs-10.3.0.tar.gz", hash = "sha256:518551135ec236f9cc6b86500f4fbbe83b803ccdc6c2cb7684e0b7c4d234e7b1"}, +] + +[package.dependencies] +jsii = ">=1.90.0,<2.0.0" +publication = ">=0.0.3" +typeguard = ">=2.13.3,<2.14.0" + +[[package]] +name = "exceptiongroup" +version = "1.2.0" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, + {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "importlib-resources" +version = "6.1.1" +description = "Read resources from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_resources-6.1.1-py3-none-any.whl", hash = "sha256:e8bf90d8213b486f428c9c39714b920041cb02c184686a3dee24905aaa8105d6"}, + {file = "importlib_resources-6.1.1.tar.gz", hash = "sha256:3893a00122eafde6894c59914446a512f728a0c1a45f9bb9b63721b6bacf0b4a"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff", "zipp (>=3.17)"] + +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] + +[[package]] +name = "jsii" +version = "1.92.0" +description = "Python client for jsii runtime" +optional = false +python-versions = "~=3.8" +files = [ + {file = "jsii-1.92.0-py3-none-any.whl", hash = "sha256:30deaea011e146e1d4c0dbb35bd7effd4d292cef676052e5672a825fc1aaaebf"}, + {file = "jsii-1.92.0.tar.gz", hash = "sha256:2b5205c0fec87e1a9a9f283f60577ad172d7124bb614b8cdadc963306e1ac75f"}, +] + +[package.dependencies] +attrs = ">=21.2,<24.0" +cattrs = ">=1.8,<23.2" +importlib-resources = ">=5.2.0" +publication = ">=0.0.3" +python-dateutil = "*" +typeguard = ">=2.13.3,<2.14.0" +typing-extensions = ">=3.8,<5.0" + +[[package]] +name = "mypy" +version = "1.7.0" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mypy-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5da84d7bf257fd8f66b4f759a904fd2c5a765f70d8b52dde62b521972a0a2357"}, + {file = "mypy-1.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a3637c03f4025f6405737570d6cbfa4f1400eb3c649317634d273687a09ffc2f"}, + {file = "mypy-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b633f188fc5ae1b6edca39dae566974d7ef4e9aaaae00bc36efe1f855e5173ac"}, + {file = "mypy-1.7.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d6ed9a3997b90c6f891138e3f83fb8f475c74db4ccaa942a1c7bf99e83a989a1"}, + {file = "mypy-1.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:1fe46e96ae319df21359c8db77e1aecac8e5949da4773c0274c0ef3d8d1268a9"}, + {file = "mypy-1.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:df67fbeb666ee8828f675fee724cc2cbd2e4828cc3df56703e02fe6a421b7401"}, + {file = "mypy-1.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a79cdc12a02eb526d808a32a934c6fe6df07b05f3573d210e41808020aed8b5d"}, + {file = "mypy-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f65f385a6f43211effe8c682e8ec3f55d79391f70a201575def73d08db68ead1"}, + {file = "mypy-1.7.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0e81ffd120ee24959b449b647c4b2fbfcf8acf3465e082b8d58fd6c4c2b27e46"}, + {file = "mypy-1.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:f29386804c3577c83d76520abf18cfcd7d68264c7e431c5907d250ab502658ee"}, + {file = "mypy-1.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:87c076c174e2c7ef8ab416c4e252d94c08cd4980a10967754f91571070bf5fbe"}, + {file = "mypy-1.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6cb8d5f6d0fcd9e708bb190b224089e45902cacef6f6915481806b0c77f7786d"}, + {file = "mypy-1.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d93e76c2256aa50d9c82a88e2f569232e9862c9982095f6d54e13509f01222fc"}, + {file = "mypy-1.7.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cddee95dea7990e2215576fae95f6b78a8c12f4c089d7e4367564704e99118d3"}, + {file = "mypy-1.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:d01921dbd691c4061a3e2ecdbfbfad029410c5c2b1ee88946bf45c62c6c91210"}, + {file = "mypy-1.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:185cff9b9a7fec1f9f7d8352dff8a4c713b2e3eea9c6c4b5ff7f0edf46b91e41"}, + {file = "mypy-1.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7a7b1e399c47b18feb6f8ad4a3eef3813e28c1e871ea7d4ea5d444b2ac03c418"}, + {file = "mypy-1.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc9fe455ad58a20ec68599139ed1113b21f977b536a91b42bef3ffed5cce7391"}, + {file = "mypy-1.7.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d0fa29919d2e720c8dbaf07d5578f93d7b313c3e9954c8ec05b6d83da592e5d9"}, + {file = "mypy-1.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:2b53655a295c1ed1af9e96b462a736bf083adba7b314ae775563e3fb4e6795f5"}, + {file = "mypy-1.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c1b06b4b109e342f7dccc9efda965fc3970a604db70f8560ddfdee7ef19afb05"}, + {file = "mypy-1.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bf7a2f0a6907f231d5e41adba1a82d7d88cf1f61a70335889412dec99feeb0f8"}, + {file = "mypy-1.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:551d4a0cdcbd1d2cccdcc7cb516bb4ae888794929f5b040bb51aae1846062901"}, + {file = "mypy-1.7.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:55d28d7963bef00c330cb6461db80b0b72afe2f3c4e2963c99517cf06454e665"}, + {file = "mypy-1.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:870bd1ffc8a5862e593185a4c169804f2744112b4a7c55b93eb50f48e7a77010"}, + {file = "mypy-1.7.0-py3-none-any.whl", hash = "sha256:96650d9a4c651bc2a4991cf46f100973f656d69edc7faf91844e87fe627f7e96"}, + {file = "mypy-1.7.0.tar.gz", hash = "sha256:1e280b5697202efa698372d2f39e9a6713a0395a756b1c6bd48995f8d72690dc"}, +] + +[package.dependencies] +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.1.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + +[[package]] +name = "pathspec" +version = "0.11.2" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, + {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, +] + +[[package]] +name = "platformdirs" +version = "4.0.0" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.7" +files = [ + {file = "platformdirs-4.0.0-py3-none-any.whl", hash = "sha256:118c954d7e949b35437270383a3f2531e99dd93cf7ce4dc8340d3356d30f173b"}, + {file = "platformdirs-4.0.0.tar.gz", hash = "sha256:cb633b2bcf10c51af60beb0ab06d2f1d69064b43abf4c185ca6b28865f3f9731"}, +] + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] + +[[package]] +name = "publication" +version = "0.0.3" +description = "Publication helps you maintain public-api-friendly modules by preventing unintentional access to private implementation details via introspection." +optional = false +python-versions = "*" +files = [ + {file = "publication-0.0.3-py2.py3-none-any.whl", hash = "sha256:0248885351febc11d8a1098d5c8e3ab2dabcf3e8c0c96db1e17ecd12b53afbe6"}, + {file = "publication-0.0.3.tar.gz", hash = "sha256:68416a0de76dddcdd2930d1c8ef853a743cc96c82416c4e4d3b5d901c6276dc4"}, +] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "s3transfer" +version = "0.7.0" +description = "An Amazon S3 Transfer Manager" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "s3transfer-0.7.0-py3-none-any.whl", hash = "sha256:10d6923c6359175f264811ef4bf6161a3156ce8e350e705396a7557d6293c33a"}, + {file = "s3transfer-0.7.0.tar.gz", hash = "sha256:fd3889a66f5fe17299fe75b82eae6cf722554edca744ca5d5fe308b104883d2e"}, +] + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "typeguard" +version = "2.13.3" +description = "Run-time type checker for Python" +optional = false +python-versions = ">=3.5.3" +files = [ + {file = "typeguard-2.13.3-py3-none-any.whl", hash = "sha256:5e3e3be01e887e7eafae5af63d1f36c849aaa94e3a0112097312aabfa16284f1"}, + {file = "typeguard-2.13.3.tar.gz", hash = "sha256:00edaa8da3a133674796cf5ea87d9f4b4c367d77476e185e80251cc13dfbb8c4"}, +] + +[package.extras] +doc = ["sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["mypy", "pytest", "typing-extensions"] + +[[package]] +name = "types-awscrt" +version = "0.19.13" +description = "Type annotations and code completion for awscrt" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "types_awscrt-0.19.13-py3-none-any.whl", hash = "sha256:3ab5f3636e72318683b2026aadd0a751f673efbafea707940ee2e968e7bafcc7"}, + {file = "types_awscrt-0.19.13.tar.gz", hash = "sha256:3747ab27193414de3b202952b746224981f9f23f72b1016c7124a137bac00f7b"}, +] + +[[package]] +name = "types-pyyaml" +version = "6.0.12.12" +description = "Typing stubs for PyYAML" +optional = false +python-versions = "*" +files = [ + {file = "types-PyYAML-6.0.12.12.tar.gz", hash = "sha256:334373d392fde0fdf95af5c3f1661885fa10c52167b14593eb856289e1855062"}, + {file = "types_PyYAML-6.0.12.12-py3-none-any.whl", hash = "sha256:c05bc6c158facb0676674b7f11fe3960db4f389718e19e62bd2b84d6205cfd24"}, +] + +[[package]] +name = "types-requests" +version = "2.31.0.10" +description = "Typing stubs for requests" +optional = false +python-versions = ">=3.7" +files = [ + {file = "types-requests-2.31.0.10.tar.gz", hash = "sha256:dc5852a76f1eaf60eafa81a2e50aefa3d1f015c34cf0cba130930866b1b22a92"}, + {file = "types_requests-2.31.0.10-py3-none-any.whl", hash = "sha256:b32b9a86beffa876c0c3ac99a4cd3b8b51e973fb8e3bd4e0a6bb32c7efad80fc"}, +] + +[package.dependencies] +urllib3 = ">=2" + +[[package]] +name = "types-s3transfer" +version = "0.7.0" +description = "Type annotations and code completion for s3transfer" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "types_s3transfer-0.7.0-py3-none-any.whl", hash = "sha256:ae9ed9273465d9f43da8b96307383da410c6b59c3b2464c88d20b578768e97c6"}, + {file = "types_s3transfer-0.7.0.tar.gz", hash = "sha256:aca0f2486d0a3a5037cd5b8f3e20a4522a29579a8dd183281ff0aa1c4e2c8aa7"}, +] + +[[package]] +name = "typing-extensions" +version = "4.8.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"}, + {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, +] + +[[package]] +name = "urllib3" +version = "2.0.7" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.7" +files = [ + {file = "urllib3-2.0.7-py3-none-any.whl", hash = "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"}, + {file = "urllib3-2.0.7.tar.gz", hash = "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "ziglang" +version = "0.11.0" +description = "Zig is a general-purpose programming language and toolchain for maintaining robust, optimal, and reusable software." +optional = false +python-versions = "~=3.5" +files = [ + {file = "ziglang-0.11.0-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:bd046eeab97ad51048575768f6dae10468b3a4449f4467ed61dae621faf6ee55"}, + {file = "ziglang-0.11.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:038b95cac9adef0c6dce9b72bdad895a0e4e0654c77c4a8f84fe79d2909a366e"}, + {file = "ziglang-0.11.0-py3-none-manylinux_2_12_i686.manylinux2010_i686.musllinux_1_1_i686.whl", hash = "sha256:4f848c8cca520cb12357cfa3d303bf1149a30566f4c1e5999284dbdf921cc2b8"}, + {file = "ziglang-0.11.0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl", hash = "sha256:45e8116428267e20529b9ee43a7e7364791c1a092845d2143b248a1dbf6760b0"}, + {file = "ziglang-0.11.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:d6372bada34714a5395539cc4d76e9cc6062739cee5ce9949a250f7c525ceb94"}, + {file = "ziglang-0.11.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:5fe81f91fd872fc32ed0f82807df6c680a82cbea56a9f24f818e9da299049022"}, + {file = "ziglang-0.11.0-py3-none-win_amd64.whl", hash = "sha256:a7edc7020e7ffbbb3af3a40c17a9bda65d5a65132ff933e153ffa80d8f5ad731"}, +] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.10,<4.0" +content-hash = "e2b699d102ffef77e5dfc7849c0a6c3e639bd12510a180d37d39e56b32292204" diff --git a/distribution/lambda/pyproject.toml b/distribution/lambda/pyproject.toml new file mode 100644 index 00000000000..c45b9121fcd --- /dev/null +++ b/distribution/lambda/pyproject.toml @@ -0,0 +1,27 @@ +[tool.poetry] +name = "quickwit-lambda" +version = "0.1.0" +description = "Deploy Quickwit on AWS Lambda" +authors = ["Quickwit, Inc. "] +license = "AGPL-3.0" +readme = "README.md" +packages = [{include = "cdk"}] + +[tool.poetry.dependencies] +python = ">=3.10,<4.0" +aws-cdk-lib = "^2.95.1" +cargo-lambda = "^0.21.0" +constructs = ">=10.0.0,<11.0.0" +pyyaml = "^6.0.1" +black = "^23.9.1" +boto3 = "^1.28.59" +mypy = "^1.7.0" + +# types +boto3-stubs = "^1.28.39" +types-requests = "^2.31.0.2" +types-pyyaml = "^6.0.12.11" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/distribution/lambda/resources/data-generator.py b/distribution/lambda/resources/data-generator.py new file mode 100644 index 00000000000..15c9e93d605 --- /dev/null +++ b/distribution/lambda/resources/data-generator.py @@ -0,0 +1,30 @@ +import json +import os +import random +import time + +import boto3 + +s3 = boto3.client("s3") + + +def lambda_handler(event, context): + data = [] + base_time = time.time() + for i in range(100000): + item = { + "ts": int(base_time * 1000) + i, + "id": i, + "name": f"Item {i}", + "price": round(random.uniform(1, 100), 2), + "quantity": random.randint(1, 10), + "description": f"This is a description for Item {i}.", + } + data.append(item) + json_data = "\n".join([json.dumps(d) for d in data]) + key = int(base_time) + s3.put_object( + Bucket=os.environ["BUCKET_NAME"], + Key=f"{os.environ['PREFIX']}/{key}", + Body=json_data, + ) diff --git a/distribution/lambda/resources/hdfs-logs.yaml b/distribution/lambda/resources/hdfs-logs.yaml new file mode 100644 index 00000000000..ceccba394b5 --- /dev/null +++ b/distribution/lambda/resources/hdfs-logs.yaml @@ -0,0 +1,38 @@ +# +# Index config file for hdfs-logs dataset. +# + +version: 0.6 + +index_id: hdfs-logs + +doc_mapping: + field_mappings: + - name: timestamp + type: datetime + input_formats: + - unix_timestamp + output_format: unix_timestamp_secs + fast_precision: seconds + fast: true + - name: tenant_id + type: u64 + - name: severity_text + type: text + tokenizer: raw + fast: true + - name: body + type: text + tokenizer: default + record: position + - name: resource + type: json + tokenizer: raw + tag_fields: [tenant_id] + timestamp_field: timestamp + +search_settings: + default_search_fields: [severity_text, body] + +indexing_settings: + split_num_docs_target: 2000000 diff --git a/distribution/lambda/resources/mock-sales.yaml b/distribution/lambda/resources/mock-sales.yaml new file mode 100644 index 00000000000..46d7405c055 --- /dev/null +++ b/distribution/lambda/resources/mock-sales.yaml @@ -0,0 +1,38 @@ +# +# Index config file for mock-sales data generator. +# + +version: 0.6 + +index_id: mock-sales + +doc_mapping: + field_mappings: + - name: ts + type: datetime + input_formats: + - unix_timestamp + output_format: unix_timestamp_millis + precision: milliseconds + fast: true + - name: id + type: u64 + - name: name + type: text + tokenizer: raw + - name: price + type: f64 + fast: true + - name: quantity + type: u64 + fast: true + - name: description + type: text + tokenizer: default + timestamp_field: ts + +search_settings: + default_search_fields: [name, description] + +indexing_settings: + split_num_docs_target: 2000000 diff --git a/docs/assets/images/quickwit-lambda-tutorial.svg b/docs/assets/images/quickwit-lambda-tutorial.svg new file mode 100644 index 00000000000..62ad5f2e5b9 --- /dev/null +++ b/docs/assets/images/quickwit-lambda-tutorial.svg @@ -0,0 +1,4 @@ + + + +

Sales Data Generator

Sales Data G...

CRON trigger

CRON trigger

Staging

Staging

Indexer

Indexer

Searcher

Searcher

Sales Data
Search Endpoint

Sales Data Search...

Index 

Index 
Searcher API
Searcher API
Quickwit Service
Quickwit Service
Source
Source
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/get-started/tutorials/tutorial-aws-lambda.md b/docs/get-started/tutorials/tutorial-aws-lambda.md new file mode 100644 index 00000000000..d6fadb1f61b --- /dev/null +++ b/docs/get-started/tutorials/tutorial-aws-lambda.md @@ -0,0 +1,170 @@ +--- +title: Serverless Search on AWS Lambda +description: Index and search using AWS Lambda based on an end to end usecase. +tags: [aws, integration] +icon_url: /img/tutorials/aws-logo.png +sidebar_position: 3 +--- + +In this tutorial, we’ll show you how to run Quickwit on Lambda on a complete use case. We’ll present you the associated cloud resources, a cost estimate and how to deploy the whole stack using AWS CDK. + +## The use case +Let’s start with the functional description of what our stack will be doing. + +The current setup to which we want to add search is composed by the following pieces: +- An application that generates sales data and uploads it in batches to S3. To avoid dragging around a third party application in this tutorial stack, we have hacked together a small Lambda function to simulate the application by generating random data. +- Our users want to search the ingested data through an HTTP API authenticated with an API Key. For them, the search is just a service that can be called from their applications backends. We expect a relatively low volume of queries on average, a few thousands per day. But we know that client applications are likely to have usage spikes that will focus queries to our search API on short time intervals, e.g start of the business hours, end of a sales cycle… + +## The cloud resources +Here is a simplified drawing of our example stack: + +![Quickwit Architecture](../../assets/images/quickwit-lambda-tutorial.svg) + +Let’s break it down piece by piece. + +### The Source + +For the sake of the example, the source is based on a data generator that also runs on Lambda. It is triggered every 5 minutes by an [AWS EventBridge Scheduled Rule](https://aws.amazon.com/blogs/compute/introducing-amazon-eventbridge-scheduler/) and pushes its output to a Staging bucket. The data itself has a very simple structure that can be summarized by the following [doc mapping](https://quickwit.io/docs/configuration/index-config): +``` +index_id: mock-sales + +doc_mapping: + field_mappings: + - name: ts + type: datetime + input_formats: + - unix_timestamp + output_format: unix_timestamp_millis + precision: milliseconds + fast: true + - name: id + type: u64 + - name: name + type: text + tokenizer: raw + - name: price + type: f64 + - name: quantity + type: u64 + - name: description + type: text + tokenizer: default + timestamp_field: ts +``` + +It is the Staging bucket that is configured to send the notification that triggers the Indexer Lambda each time an object is created by the Sales Data Generator. + +### The Quickwit Service + +The Quickwit Service in the middle is the base building block. It contains the necessary infrastructure to build and query a Quickwit index. It is composed of three main resources: the Indexer Lambda, the Index Bucket and the Searcher Lambda: +- The Indexer is an AWS Lambda function that can be invoked to load a JSON line delimited file from S3 and index it. It writes the generated index splits and associated metadata to the Index Bucket. Due to [current limitations with the file based metastore](https://quickwit.io/docs/configuration/metastore-config), its reserved concurrency configuration is set to 1 to guarantee that only 1 indexer will write to the metastore file at any given time. +- The Searcher Lambda does not have this limitation and multiple queries can be run in parallel. Note nevertheless that the Searcher is capable of leveraging a cache across consecutive invocations, compared to parallel queries which will end up being executed on different Lambda containers. So running queries sequentially will likely reduce their aggregated running duration as well as the total number of reads from S3. + +### The Searcher API + +The Searcher API uses AWS API Gateway [REST API](https://docs.aws.amazon.com/apigateway/latest/developerguide/http-api-vs-rest.html), a managed service with a [usage based pricing](https://aws.amazon.com/api-gateway/pricing/). This is perfect for our use case with a low volume of requests. + +Our example contains a simplified configuration that mimics the search endpoint of the [Quickwit REST API](https://quickwit.io/docs/reference/rest-api). The generated URL follows the pattern: +``` +https://{api_id}.execute-api.{region}.amazonaws.com/api/v1/mock-sales/search +``` + +The endpoint is protected by an API key that you configure when deploying the stack. + +## Cost estimates + +Before running a system in the Cloud, it is always a good idea to get a rough estimate of the associated costs. This example stack is generating, indexing and storing a hundred thousand events every few minutes. We consider the [current pricing](https://aws.amazon.com/lambda/pricing/) in the region us-east-1 of $0.00005 per second for our 3GB RAM Lambda functions. With a few back-of-the-envelope calculations, we get the following estimates: +- The leading cost is the Indexer Lambda running duration. You might expect an associated cost in the order of $0.1 per day (300 executions of 5 seconds each) +- Objects in the staging area expire after 1 day, so you will never have more than 10GB stored there ($0.2 per month) +- Approximately 1GB of index is created per day. As data accumulates, storage becomes more and more expensive. A months’ worth of historical data (30GB) costs around $0.7 per month. +- The cost of the searches will likely remain negligible. Usually search queries take less than 1 second, so you can run in the order of 10k of them with a budget of $1. A client that has an auto refresh mechanism enabled might quickly reach this amount of queries, but in that case the results will likely be served from the Searcher Lambda cache, which means the query should be resolved in less than 100ms. In practice, for systems that perform mostly identical queries, we expect the number of queries that can be performed with $1 to be 100k or more. More on this in our upcomming blog post about search performances. + +## Deployment + +### Prerequisites + +We use [AWS CDK](https://aws.amazon.com/cdk/) for our infrastructure automation script. Install it using [npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm): +```bash +npm install -g aws-cdk +``` +We also use the `curl` and `make` commands. For instance on Debian based distributions: +```bash +sudo apt update && sudo apt install curl make +``` + +You also need AWS credentials to be properly configured in your shell. One way is using the [credentials file](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html). + +Finally, clone the Quickwit repository: +```bash +git clone https://github.com/quickwit-oss/quickwit.git +cd quickwit/distribution/lambda +``` +### Deploy +Configure the AWS region and [account id](https://docs.aws.amazon.com/IAM/latest/UserGuide/console_account-alias.html) where you want to deploy the example stack: +```bash +export CDK_ACCOUNT=123456789 +export CDK_REGION=us-east-1 +``` + +If this region/account pair was not bootstrapped by CDK yet, run: +```bash +make bootstrap +``` + +This initializes some basic resources to host artifacts such as Lambda packages. + +Everything is ready! You can finally deploy the stack: +```bash +export SEARCHER_API_KEY=my-at-least-20-char-long-key +make deploy-mock-data +``` + +If you don’t set `SEARCHER_API_KEY`, the Searcher API deployment is skipped. + +:::warning +The API key is stored in plain text in the CDK stack. For a real world deployment, the key should be fetched from something like [AWS Secrets Manager](https://docs.aws.amazon.com/cdk/v2/guide/get_secrets_manager_value.html). +::: + +### Query + +Once the CDK deployment is completed, your example stack is up and running. The Sales Data Generator Lambda is going to be triggered every 5 minutes, which in turn will trigger the Indexer Lambda. + +Around the end of the deployment logs, you’ll see a list of outputs. One of them is the URL of the search endpoint. Here is an example request using `curl`: +```bash +curl -d '{"query":"quantity:>5", "max_hits": 10}' \ + -H "Content-Type: application/json" \ + -H "x-api-key: my-at-least-20-char-long-key" \ + -X POST \ + https://{api_id}.execute-api.{region}.amazonaws.com/api/v1/mock-sales/search \ + --compressed +``` + +The index is not created until the first run of the Indexer, so you might need a few minutes before your first search request succeeds. The API Gateway key configuration also takes a minute or two to propagate, so the first requests might receive an authorization error response. + +Because the JSON query responses are often quite verbose, the Searcher Lambda always compresses them before sending them on the wire. It is crucial to keep this size low, both to avoid hitting the Lambda payload size limit of 6MB and to avoid egress costs at around $0.10/GB. We do this regardless of the `accept-encoding` request header, this is why the `--compressed` flag needs to be set to `curl`. + +### Cleaning up + +Once you're done playing with the example stack, it is strongly recommended to delete the associated resources. In the shell where `CDK_ACCOUNT`, `CDK_REGION` and your AWS credentials are configured, run: +```bash +make destroy-mock-data +``` + +If you don’t want to tear down the infrastructure but want to make the costs associated with the stack negligible, you can just stop the source data generator. To do so, open the AWS Console, find the Sales Data Generator Lambda (it should be called something like `MockDataStack-SourceMockDataGenerator{some_random_id}`), and disable its EventBridge scheduled trigger. Without any data generated, the Indexer Lambda is not triggered either. You only pay a small fee for the S3 storage and the eventual queries you make on the dataset (both might even stay within your [free tier](https://aws.amazon.com/free/) if it isn’t already consumed by another application). + +## Alternative use cases + +### Firehose as a source + +A very common way to land data on S3 is using AWS Firehose. It serves as a buffer between data sources that emit one or a few events at a time and S3 where manipulating small objects is often inefficient. + +### Querying without the API Gateway + +API Gateway has the benefit of exposing the Lambda function as an HTTP Endpoint with custom authentication. When calling the Searcher directly from an AWS resource, such as another Lambda function or an EC2 instance, it might actually be simpler to call directly the AWS Lambda [invoke API](https://docs.aws.amazon.com/lambda/latest/dg/API_Invoke.html) using an AWS SDK (e.g boto3 for Python). This leverages the AWS IAM roles for authentication and avoids the intermediate API Gateway layer. + +## Possible improvements +Quickwit Lambda is still in beta and some features might still be added to improve it: +- The current indexer does not clean up the splits that are marked for deletion after a merge. +- The file source does not currently support reading compressed files from the source bucket. More generally, the file source could be made more flexible to support more data formats natively. + +If you are interested in any of these features or other ones, join us on [Discord](https://discord.com/channels/908281611840282624/) and share your use cases with us! diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index b4565da6ff5..46298cf4235 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -758,6 +758,26 @@ dependencies = [ "tracing", ] +[[package]] +name = "aws_lambda_events" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c2981b7ab5d084f06dfa6764e2b51347e836fe9559b549b332e451e6a8287ae" +dependencies = [ + "base64 0.21.7", + "bytes", + "chrono", + "flate2", + "http", + "http-body", + "http-serde", + "query_map", + "serde", + "serde_dynamo", + "serde_json", + "serde_with 3.4.0", +] + [[package]] name = "axum" version = "0.6.20" @@ -926,6 +946,12 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" +[[package]] +name = "base64" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea22880d78093b0cbe17c89f64a7d457941e65759157ec6cb31a31d652b05e5" + [[package]] name = "base64" version = "0.21.7" @@ -3292,6 +3318,66 @@ version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f35c735096c0293d313e8f2a641627472b83d01b937177fe76e5e2708d31e0d" +[[package]] +name = "lambda_http" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2505c4a24f5a8d8ac66a87691215ec1f79736c5bc6e62bb921788dca9753f650" +dependencies = [ + "aws_lambda_events", + "base64 0.21.7", + "bytes", + "encoding_rs", + "futures", + "http", + "http-body", + "hyper", + "lambda_runtime", + "mime", + "percent-encoding", + "serde", + "serde_json", + "serde_urlencoded", + "tokio-stream", + "url", +] + +[[package]] +name = "lambda_runtime" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deca8f65d7ce9a8bfddebb49d7d91b22e788a59ca0c5190f26794ab80ed7a702" +dependencies = [ + "async-stream", + "base64 0.20.0", + "bytes", + "futures", + "http", + "http-body", + "http-serde", + "hyper", + "lambda_runtime_api_client", + "serde", + "serde_json", + "serde_path_to_error", + "tokio", + "tokio-stream", + "tower", + "tracing", +] + +[[package]] +name = "lambda_runtime_api_client" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "690c5ae01f3acac8c9c3348b556fc443054e9b7f1deaf53e9ebab716282bf0ed" +dependencies = [ + "http", + "hyper", + "tokio", + "tower-service", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -4277,6 +4363,19 @@ dependencies = [ "opentelemetry_sdk", ] +[[package]] +name = "opentelemetry-http" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7594ec0e11d8e33faf03530a4c49af7064ebba81c1480e01be67d90b356508b" +dependencies = [ + "async-trait", + "bytes", + "http", + "opentelemetry_api", + "reqwest", +] + [[package]] name = "opentelemetry-otlp" version = "0.13.0" @@ -4286,11 +4385,13 @@ dependencies = [ "async-trait", "futures-core", "http", + "opentelemetry-http", "opentelemetry-proto", "opentelemetry-semantic-conventions", "opentelemetry_api", "opentelemetry_sdk", "prost", + "reqwest", "thiserror", "tokio", "tonic", @@ -5191,6 +5292,17 @@ dependencies = [ "zstd 0.11.2+zstd.1.5.2", ] +[[package]] +name = "query_map" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eab6b8b1074ef3359a863758dae650c7c0c6027927a085b7af911c8e0bf3a15" +dependencies = [ + "form_urlencoded", + "serde", + "serde_derive", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -5603,6 +5715,7 @@ version = "0.7.0" dependencies = [ "anyhow", "arc-swap", + "async-compression", "async-trait", "aws-config", "aws-sdk-kinesis", @@ -5805,6 +5918,46 @@ dependencies = [ "utoipa", ] +[[package]] +name = "quickwit-lambda" +version = "0.7.0" +dependencies = [ + "anyhow", + "aws_lambda_events", + "chitchat", + "flate2", + "lambda_http", + "lambda_runtime", + "once_cell", + "opentelemetry", + "opentelemetry-otlp", + "quickwit-actors", + "quickwit-cli", + "quickwit-cluster", + "quickwit-common", + "quickwit-config", + "quickwit-directories", + "quickwit-doc-mapper", + "quickwit-index-management", + "quickwit-indexing", + "quickwit-ingest", + "quickwit-metastore", + "quickwit-proto", + "quickwit-rest-client", + "quickwit-search", + "quickwit-serve", + "quickwit-storage", + "quickwit-telemetry", + "rand 0.8.5", + "serde", + "serde_json", + "time", + "tokio", + "tracing", + "tracing-opentelemetry", + "tracing-subscriber", +] + [[package]] name = "quickwit-macros" version = "0.7.0" @@ -6462,6 +6615,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls 0.21.10", + "rustls-native-certs", "rustls-pemfile", "serde", "serde_json", @@ -6939,6 +7093,16 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "serde_dynamo" +version = "4.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64307a9d3b5af5237b1a95c0b63fbeb45134d7d7c372c284847fa37a6ddee44" +dependencies = [ + "base64 0.21.7", + "serde", +] + [[package]] name = "serde_json" version = "1.0.111" @@ -8328,6 +8492,16 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "tracing-serde" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.18" @@ -8338,6 +8512,8 @@ dependencies = [ "nu-ansi-term", "once_cell", "regex", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", @@ -8345,6 +8521,7 @@ dependencies = [ "tracing", "tracing-core", "tracing-log 0.2.0", + "tracing-serde", ] [[package]] diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 60b5f7a5760..e55019cb304 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -19,6 +19,7 @@ members = [ "quickwit-integration-tests", "quickwit-jaeger", "quickwit-janitor", + "quickwit-lambda", "quickwit-macros", "quickwit-macros/impl", "quickwit-metastore", @@ -34,6 +35,10 @@ members = [ "quickwit-storage", "quickwit-telemetry", ] +default-members = [ + "quickwit-cli", + "quickwit-metastore" +] [profile.dev] debug = false @@ -42,6 +47,7 @@ debug = false anyhow = "1" arc-swap = "1.6" assert-json-diff = "2" +async-compression = { version = "0.4", features = ["tokio", "gzip"] } async-speed-limit = "0.4" async-trait = "0.1" backoff = { version = "0.4", features = ["tokio"] } @@ -67,6 +73,7 @@ env_logger = "0.10" fail = "0.5" flume = "0.11" fnv = "1" +flate2 = "1.0" futures = "0.3" futures-util = { version = "0.3.25", default-features = false } google-cloud-auth = "0.12.0" @@ -252,6 +259,7 @@ reqsign = { version = "0.14", default-features = false } quickwit-actors = { version = "0.7.0", path = "./quickwit-actors" } quickwit-aws = { version = "0.7.0", path = "./quickwit-aws" } +quickwit-cli = { version = "0.7.0", path = "./quickwit-cli" } quickwit-cluster = { version = "0.7.0", path = "./quickwit-cluster" } quickwit-codegen = { version = "0.7.0", path = "./quickwit-codegen" } quickwit-codegen-example = { version = "0.7.0", path = "./quickwit-codegen/example" } diff --git a/quickwit/quickwit-indexing/Cargo.toml b/quickwit/quickwit-indexing/Cargo.toml index 65de559f8a0..932d317197a 100644 --- a/quickwit/quickwit-indexing/Cargo.toml +++ b/quickwit/quickwit-indexing/Cargo.toml @@ -16,6 +16,7 @@ aws-smithy-client = { workspace = true, optional = true } anyhow = { workspace = true } arc-swap = { workspace = true } +async-compression = { workspace = true } async-trait = { workspace = true } backoff = { workspace = true, optional = true } bytes = { workspace = true } diff --git a/quickwit/quickwit-indexing/data/test_corpus.json.gz b/quickwit/quickwit-indexing/data/test_corpus.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3f20ad0c9216c219e7fec82da489ecbdc63599a GIT binary patch literal 228 zcmV~YnItTle zA(GkX26HWKrjD_HH=7_Lf{%(qp}kykNe(&!H%AJLM+Zfm=9$iNISMLk1}LDWwzZ(Z zQ*gEmv^B^inL5_#I!@9DW)^zQI2>AwM&zU#Ma`Z&@&e`QymjQcp#a4=DcIFbYdm#O zlp6)e?3=3Rg$2fg+-01oR0|o%pB9. -use std::fmt; -use std::ops::Range; +use std::ffi::OsStr; use std::path::Path; use std::sync::Arc; use std::time::Duration; +use std::{fmt, io}; use anyhow::Context; +use async_compression::tokio::bufread::GzipDecoder; use async_trait::async_trait; use bytes::Bytes; use quickwit_actors::{ActorExitStatus, Mailbox}; @@ -32,7 +33,7 @@ use quickwit_config::FileSourceParams; use quickwit_metastore::checkpoint::{PartitionId, SourceCheckpoint}; use quickwit_proto::types::Position; use serde::Serialize; -use tokio::io::{AsyncBufReadExt, AsyncRead, BufReader}; +use tokio::io::{AsyncBufReadExt, AsyncRead, AsyncReadExt, BufReader}; use tracing::info; use crate::actors::DocProcessor; @@ -53,7 +54,7 @@ pub struct FileSource { source_id: String, params: FileSourceParams, counters: FileSourceCounters, - reader: BufReader>, + reader: FileSourceReader, } impl fmt::Debug for FileSource { @@ -139,7 +140,7 @@ impl TypedSourceFactory for FileSourceFactory { checkpoint: SourceCheckpoint, ) -> anyhow::Result { let mut offset = 0; - let reader: Box = if let Some(filepath) = ¶ms.filepath { + let reader: FileSourceReader = if let Some(filepath) = ¶ms.filepath { let partition_id = PartitionId::from(filepath.to_string_lossy().to_string()); offset = checkpoint .position_for_partition(&partition_id) @@ -152,18 +153,27 @@ impl TypedSourceFactory for FileSourceFactory { let (dir_uri, file_name) = dir_and_filename(filepath)?; let storage = ctx.storage_resolver.resolve(&dir_uri).await?; let file_size = storage.file_num_bytes(file_name).await?.try_into().unwrap(); - storage - .get_slice_stream( - file_name, - Range { - start: offset, - end: file_size, - }, - ) - .await? + if offset > file_size { + return Err(anyhow::anyhow!( + "offset {} can't be greater than the file size {}", + offset, + file_size + )); + } + // If it's a gzip file, we can't seek to a specific offset, we need to start from the + // beginning of the file, decompress and skip the first `offset` bytes. + if filepath.extension() == Some(OsStr::new("gz")) { + let stream = storage.get_slice_stream(file_name, 0..file_size).await?; + FileSourceReader::new(Box::new(GzipDecoder::new(BufReader::new(stream))), offset) + } else { + let stream = storage + .get_slice_stream(file_name, offset..file_size) + .await?; + FileSourceReader::new(stream, 0) + } } else { // We cannot use the checkpoint. - Box::new(tokio::io::stdin()) + FileSourceReader::new(Box::new(tokio::io::stdin()), 0) }; let file_source = FileSource { source_id: ctx.source_id().to_string(), @@ -172,13 +182,50 @@ impl TypedSourceFactory for FileSourceFactory { current_offset: offset as u64, num_lines_processed: 0, }, - reader: BufReader::new(reader), + reader, params, }; Ok(file_source) } } +struct FileSourceReader { + reader: BufReader>, + num_bytes_to_skip: usize, +} + +impl FileSourceReader { + fn new(reader: Box, num_bytes_to_skip: usize) -> Self { + Self { + reader: BufReader::new(reader), + num_bytes_to_skip, + } + } + + // This function is only called for GZIP file. + // Because they cannot be seeked into, we have to scan them to the right initial position. + async fn skip(&mut self) -> io::Result<()> { + // Allocate once a 64kb buffer. + let mut buf = [0u8; 64000]; + while self.num_bytes_to_skip > 0 { + let num_bytes_to_read = self.num_bytes_to_skip.min(buf.len()); + let num_bytes_read = self + .reader + .read_exact(&mut buf[..num_bytes_to_read]) + .await?; + self.num_bytes_to_skip -= num_bytes_read; + } + Ok(()) + } + + async fn read_line<'a>(&mut self, buf: &'a mut String) -> io::Result { + if self.num_bytes_to_skip > 0 { + self.skip().await?; + } + self.reader.read_line(buf).await + } +} + pub(crate) fn dir_and_filename(filepath: &Path) -> anyhow::Result<(Uri, &Path)> { let dir_uri: Uri = filepath .parent() @@ -194,10 +241,11 @@ pub(crate) fn dir_and_filename(filepath: &Path) -> anyhow::Result<(Uri, &Path)> #[cfg(test)] mod tests { - use std::io::Write; + use std::io::{Cursor, Write}; use std::num::NonZeroUsize; use std::path::PathBuf; + use async_compression::tokio::write::GzipEncoder; use quickwit_actors::{Command, Universe}; use quickwit_config::{SourceConfig, SourceInputFormat, SourceParams}; use quickwit_metastore::checkpoint::{SourceCheckpoint, SourceCheckpointDelta}; @@ -208,10 +256,19 @@ mod tests { use crate::source::SourceActor; #[tokio::test] - async fn test_file_source() -> anyhow::Result<()> { + async fn test_file_source() { + aux_test_file_source(false).await; + aux_test_file_source(true).await; + } + + async fn aux_test_file_source(gzip: bool) { let universe = Universe::with_accelerated_time(); let (doc_processor_mailbox, indexer_inbox) = universe.create_test_mailbox(); - let params = FileSourceParams::file("data/test_corpus.json"); + let params = if gzip { + FileSourceParams::file("data/test_corpus.json.gz") + } else { + FileSourceParams::file("data/test_corpus.json") + }; let source_config = SourceConfig { source_id: "test-file-source".to_string(), desired_num_pipelines: NonZeroUsize::new(1).unwrap(), @@ -232,7 +289,8 @@ mod tests { params, SourceCheckpoint::default(), ) - .await?; + .await + .unwrap(); let file_source_actor = SourceActor { source: Box::new(file_source), doc_processor_mailbox, @@ -255,23 +313,38 @@ mod tests { batch[1].downcast_ref::().unwrap(), Command::ExitWithSuccess )); - Ok(()) } #[tokio::test] - async fn test_file_source_several_batch() -> anyhow::Result<()> { + async fn test_file_source_several_batch() { + aux_test_file_source_several_batch(false).await; + aux_test_file_source_several_batch(true).await; + } + + async fn aux_test_file_source_several_batch(gzip: bool) { quickwit_common::setup_logging_for_tests(); let universe = Universe::with_accelerated_time(); let (doc_processor_mailbox, doc_processor_inbox) = universe.create_test_mailbox(); - use tempfile::NamedTempFile; - let mut temp_file = NamedTempFile::new()?; - let temp_path = temp_file.path().to_path_buf(); + let mut documents_bytes = Vec::new(); for _ in 0..20_000 { - temp_file.write_all(r#"{"body": "hello happy tax payer!"}"#.as_bytes())?; - temp_file.write_all("\n".as_bytes())?; + documents_bytes + .write_all(r#"{"body": "hello happy tax payer!"}"#.as_bytes()) + .unwrap(); + documents_bytes.write_all("\n".as_bytes()).unwrap(); } - temp_file.flush()?; - let params = FileSourceParams::file(temp_path); + let mut temp_file: tempfile::NamedTempFile = if gzip { + tempfile::Builder::new().suffix(".gz").tempfile().unwrap() + } else { + tempfile::NamedTempFile::new().unwrap() + }; + if gzip { + let gzip_documents = gzip_bytes(&documents_bytes).await; + temp_file.write_all(&gzip_documents).unwrap(); + } else { + temp_file.write_all(&documents_bytes).unwrap(); + } + temp_file.flush().unwrap(); + let params = FileSourceParams::file(temp_file.path()); let filepath = params .filepath .as_ref() @@ -299,7 +372,8 @@ mod tests { params, SourceCheckpoint::default(), ) - .await?; + .await + .unwrap(); let file_source_actor = SourceActor { source: Box::new(source), doc_processor_mailbox, @@ -337,7 +411,6 @@ mod tests { "00000000000000500010..00000000000000700000" ); assert!(matches!(command, &Command::ExitWithSuccess)); - Ok(()) } fn extract_position_delta(checkpoint_delta: &SourceCheckpointDelta) -> Option { @@ -349,16 +422,34 @@ mod tests { #[tokio::test] async fn test_file_source_resume_from_checkpoint() { + aux_test_file_source_resume_from_checkpoint(false).await; + aux_test_file_source_resume_from_checkpoint(true).await; + } + + async fn aux_test_file_source_resume_from_checkpoint(gzip: bool) { quickwit_common::setup_logging_for_tests(); let universe = Universe::with_accelerated_time(); let (doc_processor_mailbox, doc_processor_inbox) = universe.create_test_mailbox(); - use tempfile::NamedTempFile; - let mut temp_file = NamedTempFile::new().unwrap(); + let mut documents_bytes = Vec::new(); for i in 0..100 { - temp_file.write_all(format!("{i}\n").as_bytes()).unwrap(); + documents_bytes + .write_all(format!("{i}\n").as_bytes()) + .unwrap(); } - temp_file.flush().unwrap(); + let mut temp_file: tempfile::NamedTempFile = if gzip { + tempfile::Builder::new().suffix(".gz").tempfile().unwrap() + } else { + tempfile::NamedTempFile::new().unwrap() + }; let temp_file_path = temp_file.path().canonicalize().unwrap(); + if gzip { + let gzipped_documents = gzip_bytes(&documents_bytes).await; + temp_file.write_all(&gzipped_documents).unwrap(); + } else { + temp_file.write_all(&documents_bytes).unwrap(); + } + temp_file.flush().unwrap(); + let params = FileSourceParams::file(&temp_file_path); let mut checkpoint = SourceCheckpoint::default(); let partition_id = PartitionId::from(temp_file_path.to_string_lossy().to_string()); @@ -411,4 +502,66 @@ mod tests { let indexer_messages: Vec = doc_processor_inbox.drain_for_test_typed(); assert!(&indexer_messages[0].docs[0].starts_with(b"2\n")); } + + async fn gzip_bytes(bytes: &[u8]) -> Vec { + let mut gzip_documents = Vec::new(); + let mut encoder = GzipEncoder::new(&mut gzip_documents); + tokio::io::AsyncWriteExt::write_all(&mut encoder, bytes) + .await + .unwrap(); + // flush is not sufficient here and reading the file will raise a unexpected end of file + // error. + tokio::io::AsyncWriteExt::shutdown(&mut encoder) + .await + .unwrap(); + gzip_documents + } + + #[tokio::test] + async fn test_skip_reader() { + { + // Skip 0 bytes. + let mut reader = FileSourceReader::new(Box::new("hello".as_bytes()), 0); + let mut buf = String::new(); + reader.read_line(&mut buf).await.unwrap(); + assert_eq!(buf, "hello"); + } + { + // Skip 2 bytes. + let mut reader = FileSourceReader::new(Box::new("hello".as_bytes()), 2); + let mut buf = String::new(); + reader.read_line(&mut buf).await.unwrap(); + assert_eq!(buf, "llo"); + } + { + let input = "hello"; + let cursor = Cursor::new(input.clone()); + let mut reader = FileSourceReader::new(Box::new(cursor), 5); + let mut buf = String::new(); + assert!(reader.read_line(&mut buf).await.is_ok()); + } + { + let input = "hello"; + let cursor = Cursor::new(input.clone()); + let mut reader = FileSourceReader::new(Box::new(cursor), 10); + let mut buf = String::new(); + assert!(reader.read_line(&mut buf).await.is_err()); + } + { + let input = "hello world".repeat(10000); + let cursor = Cursor::new(input.clone()); + let mut reader = FileSourceReader::new(Box::new(cursor), 64000); + let mut buf = String::new(); + reader.read_line(&mut buf).await.unwrap(); + assert_eq!(buf, input[64000..]); + } + { + let input = "hello world".repeat(10000); + let cursor = Cursor::new(input.clone()); + let mut reader = FileSourceReader::new(Box::new(cursor), 64001); + let mut buf = String::new(); + reader.read_line(&mut buf).await.unwrap(); + assert_eq!(buf, input[64001..]); + } + } } diff --git a/quickwit/quickwit-lambda/Cargo.toml b/quickwit/quickwit-lambda/Cargo.toml new file mode 100644 index 00000000000..e8300e9d20c --- /dev/null +++ b/quickwit/quickwit-lambda/Cargo.toml @@ -0,0 +1,59 @@ +[package] +name = "quickwit-lambda" +version = "0.7.0" +authors = ["Quickwit, Inc. "] +edition = "2021" +license = "AGPL-3.0-or-later" # For a commercial, license, contact hello@quickwit.io +description = "Quickwit is a cost-efficient search engine." +repository = "https://github.com/quickwit-oss/quickwit" +homepage = "https://quickwit.io/" +documentation = "https://quickwit.io/docs/" + +[[bin]] +name = "indexer" +path = "src/bin/indexer.rs" + +[[bin]] +name = "searcher" +path = "src/bin/searcher.rs" + +[dependencies] +anyhow = { workspace = true } +aws_lambda_events = "0.12.0" +chitchat = { workspace = true } +flate2 = { workspace = true } +lambda_http = "0.8.3" +lambda_runtime = "0.8.3" +once_cell = { workspace = true } +opentelemetry = { workspace = true } +opentelemetry-otlp = { workspace = true, features = [ + "reqwest-client", + "reqwest-rustls", + "http-proto", +] } +rand = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +time = { workspace = true } +tokio = { workspace = true } +tracing = { workspace = true } +tracing-opentelemetry = { workspace = true } +tracing-subscriber = { workspace = true, features = [ "json" ] } + +quickwit-actors = { workspace = true } +quickwit-cluster = { workspace = true } +quickwit-cli = { workspace = true } +quickwit-common = { workspace = true } +quickwit-config = { workspace = true } +quickwit-index-management = { workspace = true } +quickwit-directories = { workspace = true } +quickwit-doc-mapper = { workspace = true } +quickwit-indexing = { workspace = true } +quickwit-ingest = { workspace = true } +quickwit-metastore = { workspace = true } +quickwit-proto = { workspace = true } +quickwit-rest-client = { workspace = true } +quickwit-search = { workspace = true } +quickwit-serve = { workspace = true } +quickwit-storage = { workspace = true } +quickwit-telemetry = { workspace = true } diff --git a/quickwit/quickwit-lambda/src/bin/indexer.rs b/quickwit/quickwit-lambda/src/bin/indexer.rs new file mode 100644 index 00000000000..4b8aea034e0 --- /dev/null +++ b/quickwit/quickwit-lambda/src/bin/indexer.rs @@ -0,0 +1,31 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use lambda_runtime::service_fn; +use quickwit_lambda::indexer::handler; +use quickwit_lambda::logger; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + logger::setup_lambda_tracer()?; + let func = service_fn(handler); + lambda_runtime::run(func) + .await + .map_err(|e| anyhow::anyhow!(e)) +} diff --git a/quickwit/quickwit-lambda/src/bin/searcher.rs b/quickwit/quickwit-lambda/src/bin/searcher.rs new file mode 100644 index 00000000000..eeef43d00bc --- /dev/null +++ b/quickwit/quickwit-lambda/src/bin/searcher.rs @@ -0,0 +1,29 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use lambda_http::{run, service_fn}; +use quickwit_lambda::logger; +use quickwit_lambda::searcher::handler; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + logger::setup_lambda_tracer()?; + let func = service_fn(handler); + run(func).await.map_err(|e| anyhow::anyhow!(e)) +} diff --git a/quickwit/quickwit-lambda/src/indexer/environment.rs b/quickwit/quickwit-lambda/src/indexer/environment.rs new file mode 100644 index 00000000000..92f264a268c --- /dev/null +++ b/quickwit/quickwit-lambda/src/indexer/environment.rs @@ -0,0 +1,39 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::env::var; + +use once_cell::sync::Lazy; + +pub const CONFIGURATION_TEMPLATE: &str = "version: 0.6 +node_id: lambda-indexer +metastore_uri: s3://${QW_LAMBDA_METASTORE_BUCKET}/index +default_index_root_uri: s3://${QW_LAMBDA_INDEX_BUCKET}/index +data_dir: /tmp +"; + +pub static INDEX_CONFIG_URI: Lazy = Lazy::new(|| { + var("QW_LAMBDA_INDEX_CONFIG_URI").expect("QW_LAMBDA_INDEX_CONFIG_URI must be set") +}); + +pub static INDEX_ID: Lazy = + Lazy::new(|| var("QW_LAMBDA_INDEX_ID").expect("QW_LAMBDA_INDEX_ID must be set")); + +pub static DISABLE_MERGE: Lazy = + Lazy::new(|| var("QW_LAMBDA_DISABLE_MERGE").is_ok_and(|v| v.as_str() == "true")); diff --git a/quickwit/quickwit-lambda/src/indexer/handler.rs b/quickwit/quickwit-lambda/src/indexer/handler.rs new file mode 100644 index 00000000000..9160b10b9b8 --- /dev/null +++ b/quickwit/quickwit-lambda/src/indexer/handler.rs @@ -0,0 +1,78 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use lambda_runtime::{Error, LambdaEvent}; +use serde_json::Value; +use tracing::{debug_span, error, info, info_span, Instrument}; + +use super::environment::{DISABLE_MERGE, INDEX_CONFIG_URI, INDEX_ID}; +use super::ingest::{ingest, IngestArgs}; +use super::model::IndexerEvent; +use crate::logger; +use crate::utils::LambdaContainerContext; + +async fn indexer_handler(event: LambdaEvent) -> Result { + let container_ctx = LambdaContainerContext::load(); + let memory = event.context.env_config.memory; + let payload = serde_json::from_value::(event.payload)?; + + let ingest_res = ingest(IngestArgs { + input_path: payload.uri(), + input_format: quickwit_config::SourceInputFormat::Json, + overwrite: false, + vrl_script: None, + clear_cache: true, + }) + .instrument(debug_span!( + "ingest", + memory, + env.INDEX_CONFIG_URI = *INDEX_CONFIG_URI, + env.INDEX_ID = *INDEX_ID, + env.DISABLE_MERGE = *DISABLE_MERGE, + cold = container_ctx.cold, + container_id = container_ctx.container_id, + )) + .await; + + match ingest_res { + Ok(stats) => { + info!(stats=?stats, "Indexing succeeded"); + Ok(serde_json::to_value(stats)?) + } + Err(e) => { + error!(err=?e, "Indexing failed"); + Err(anyhow::anyhow!("Indexing failed").into()) + } + } +} + +pub async fn handler(event: LambdaEvent) -> Result { + let request_id = event.context.request_id.clone(); + let mut response = indexer_handler(event) + .instrument(info_span!("indexer_handler", request_id)) + .await; + if let Err(e) = &response { + error!(err=?e, "Handler failed"); + } + if let Ok(Value::Object(ref mut map)) = response { + map.insert("request_id".to_string(), Value::String(request_id)); + } + logger::flush_tracer(); + response +} diff --git a/quickwit/quickwit-lambda/src/indexer/ingest.rs b/quickwit/quickwit-lambda/src/indexer/ingest.rs new file mode 100644 index 00000000000..e1a37bec90f --- /dev/null +++ b/quickwit/quickwit-lambda/src/indexer/ingest.rs @@ -0,0 +1,251 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::collections::HashSet; +use std::num::NonZeroUsize; +use std::path::{Path, PathBuf}; + +use anyhow::{bail, Context}; +use chitchat::transport::ChannelTransport; +use chitchat::FailureDetectorConfig; +use quickwit_actors::Universe; +use quickwit_cli::tool::start_statistics_reporting_loop; +use quickwit_cli::{run_index_checklist, start_actor_runtimes}; +use quickwit_cluster::{Cluster, ClusterMember}; +use quickwit_common::pubsub::EventBroker; +use quickwit_common::runtimes::RuntimesConfig; +use quickwit_common::uri::Uri; +use quickwit_config::merge_policy_config::MergePolicyConfig; +use quickwit_config::service::QuickwitService; +use quickwit_config::{ + load_index_config_from_user_config, ConfigFormat, IndexConfig, IndexerConfig, NodeConfig, + SourceConfig, SourceInputFormat, SourceParams, TransformConfig, CLI_INGEST_SOURCE_ID, +}; +use quickwit_index_management::{clear_cache_directory, IndexService}; +use quickwit_indexing::actors::{IndexingService, MergePipelineId}; +use quickwit_indexing::models::{ + DetachIndexingPipeline, DetachMergePipeline, IndexingStatistics, SpawnPipeline, +}; +use quickwit_ingest::IngesterPool; +use quickwit_metastore::CreateIndexRequestExt; +use quickwit_proto::indexing::CpuCapacity; +use quickwit_proto::metastore::{CreateIndexRequest, MetastoreError, MetastoreService}; +use quickwit_proto::types::{NodeId, PipelineUid}; +use quickwit_storage::StorageResolver; +use quickwit_telemetry::payload::{QuickwitFeature, QuickwitTelemetryInfo, TelemetryEvent}; +use tracing::{debug, info, instrument}; + +use super::environment::{CONFIGURATION_TEMPLATE, DISABLE_MERGE, INDEX_CONFIG_URI, INDEX_ID}; +use crate::utils::load_node_config; + +#[derive(Debug, Eq, PartialEq)] +pub struct IngestArgs { + pub input_path: PathBuf, + pub input_format: SourceInputFormat, + pub overwrite: bool, + pub vrl_script: Option, + pub clear_cache: bool, +} + +async fn create_empty_cluster(config: &NodeConfig) -> anyhow::Result { + let self_node = ClusterMember { + node_id: NodeId::new(config.node_id.clone()), + generation_id: quickwit_cluster::GenerationId::now(), + is_ready: false, + enabled_services: HashSet::new(), + gossip_advertise_addr: config.gossip_advertise_addr, + grpc_advertise_addr: config.grpc_advertise_addr, + indexing_tasks: Vec::new(), + indexing_cpu_capacity: CpuCapacity::zero(), + }; + let cluster = Cluster::join( + config.cluster_id.clone(), + self_node, + config.gossip_advertise_addr, + Vec::new(), + FailureDetectorConfig::default(), + &ChannelTransport::default(), + ) + .await?; + Ok(cluster) +} + +/// TODO refactor with `dir_and_filename` in file source +pub fn dir_and_filename(filepath: &Path) -> anyhow::Result<(Uri, &Path)> { + let dir_uri: Uri = filepath + .parent() + .context("Parent directory could not be resolved")? + .to_str() + .context("Path cannot be turned to string")? + .parse()?; + let file_name = filepath + .file_name() + .context("Path does not appear to be a file")?; + Ok((dir_uri, file_name.as_ref())) +} + +#[instrument(level = "debug", skip(resolver))] +async fn load_index_config( + resolver: &StorageResolver, + default_index_root_uri: &Uri, +) -> anyhow::Result { + let (dir, file) = dir_and_filename(Path::new(&*INDEX_CONFIG_URI))?; + let index_config_storage = resolver.resolve(&dir).await?; + let bytes = index_config_storage.get_all(file).await?; + let mut index_config = load_index_config_from_user_config( + ConfigFormat::Yaml, + bytes.as_slice(), + default_index_root_uri, + )?; + if *DISABLE_MERGE { + debug!("force disable merges"); + index_config.indexing_settings.merge_policy = MergePolicyConfig::Nop; + } + Ok(index_config) +} + +pub async fn ingest(args: IngestArgs) -> anyhow::Result { + debug!(args=?args, "lambda-ingest"); + let (config, storage_resolver, mut metastore) = + load_node_config(CONFIGURATION_TEMPLATE).await?; + + let source_params = SourceParams::file(args.input_path); + let transform_config = args + .vrl_script + .map(|vrl_script| TransformConfig::new(vrl_script, None)); + let source_config = SourceConfig { + source_id: CLI_INGEST_SOURCE_ID.to_string(), + max_num_pipelines_per_indexer: NonZeroUsize::new(1).expect("1 is always non-zero."), + desired_num_pipelines: NonZeroUsize::new(1).expect("1 is always non-zero."), + enabled: true, + source_params, + transform_config, + input_format: args.input_format, + }; + + let checklist_result = run_index_checklist( + &mut metastore, + &storage_resolver, + &INDEX_ID, + Some(&source_config), + ) + .await; + if let Err(e) = checklist_result { + let is_not_found = e + .downcast_ref() + .is_some_and(|meta_error| matches!(meta_error, MetastoreError::NotFound(_))); + if !is_not_found { + bail!(e); + } + info!( + index_id = *INDEX_ID, + index_config_uri = *INDEX_CONFIG_URI, + "Index not found, creating it" + ); + let index_config = + load_index_config(&storage_resolver, &config.default_index_root_uri).await?; + if index_config.index_id != *INDEX_ID { + bail!( + "Expected index ID was {} but config file had {}", + *INDEX_ID, + index_config.index_id, + ); + } + metastore + .create_index(CreateIndexRequest::try_from_index_config(index_config)?) + .await?; + debug!("index created"); + } else if args.overwrite { + info!( + index_id = *INDEX_ID, + "Overwrite enabled, clearing existing index", + ); + let mut index_service = IndexService::new(metastore.clone(), storage_resolver.clone()); + index_service.clear_index(&INDEX_ID).await?; + } + // The indexing service needs to update its cluster chitchat state so that the control plane is + // aware of the running tasks. We thus create a fake cluster to instantiate the indexing service + // and avoid impacting potential control plane running on the cluster. + let cluster = create_empty_cluster(&config).await?; + let indexer_config = IndexerConfig { + ..Default::default() + }; + let runtimes_config = RuntimesConfig::default(); + let services: HashSet = + HashSet::from_iter([QuickwitService::Indexer.as_str().to_string()]); + let telemetry_info = + QuickwitTelemetryInfo::new(services, HashSet::from_iter([QuickwitFeature::AwsLambda])); + let _telemetry_handle_opt = quickwit_telemetry::start_telemetry_loop(telemetry_info); + quickwit_telemetry::send_telemetry_event(TelemetryEvent::RunCommand).await; + start_actor_runtimes( + runtimes_config, + &HashSet::from_iter([QuickwitService::Indexer]), + )?; + let indexing_server = IndexingService::new( + config.node_id.clone(), + config.data_dir_path.clone(), + indexer_config, + runtimes_config.num_threads_blocking, + cluster, + metastore, + None, + IngesterPool::default(), + storage_resolver, + EventBroker::default(), + ) + .await?; + let universe = Universe::new(); + let (indexing_server_mailbox, indexing_server_handle) = + universe.spawn_builder().spawn(indexing_server); + let pipeline_id = indexing_server_mailbox + .ask_for_res(SpawnPipeline { + index_id: INDEX_ID.clone(), + source_config, + pipeline_uid: PipelineUid::default(), + }) + .await?; + let merge_pipeline_handle = indexing_server_mailbox + .ask_for_res(DetachMergePipeline { + pipeline_id: MergePipelineId::from(&pipeline_id), + }) + .await?; + let indexing_pipeline_handle = indexing_server_mailbox + .ask_for_res(DetachIndexingPipeline { pipeline_id }) + .await?; + debug!("wait for indexing statistics"); + let statistics = start_statistics_reporting_loop(indexing_pipeline_handle, false).await?; + debug!("indexing completed, tear down actors"); + merge_pipeline_handle.quit().await; + universe + .send_exit_with_success(&indexing_server_mailbox) + .await?; + indexing_server_handle.join().await; + universe.quit().await; + + if args.clear_cache { + info!("clearing local cache directory"); + clear_cache_directory(&config.data_dir_path).await?; + info!("local cache directory cleared"); + } + + if statistics.num_invalid_docs > 0 { + bail!("Failed to ingest {} documents", statistics.num_invalid_docs) + } + Ok(statistics) +} diff --git a/quickwit/quickwit-lambda/src/indexer/mod.rs b/quickwit/quickwit-lambda/src/indexer/mod.rs new file mode 100644 index 00000000000..a77e8485865 --- /dev/null +++ b/quickwit/quickwit-lambda/src/indexer/mod.rs @@ -0,0 +1,25 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +mod environment; +mod handler; +mod ingest; +mod model; + +pub use handler::handler; diff --git a/quickwit/quickwit-lambda/src/indexer/model.rs b/quickwit/quickwit-lambda/src/indexer/model.rs new file mode 100644 index 00000000000..fe6ae14aea4 --- /dev/null +++ b/quickwit/quickwit-lambda/src/indexer/model.rs @@ -0,0 +1,112 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::path::PathBuf; + +use aws_lambda_events::event::s3::S3Event; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(untagged)] +/// Event types that can be used to invoke the indexer Lambda. +pub enum IndexerEvent { + Custom { source_uri: String }, + S3(S3Event), +} + +impl IndexerEvent { + pub fn uri(&self) -> PathBuf { + match &self { + IndexerEvent::Custom { source_uri } => PathBuf::from(source_uri), + IndexerEvent::S3(event) => [ + "s3://", + event.records[0].s3.bucket.name.as_ref().unwrap(), + event.records[0].s3.object.key.as_ref().unwrap(), + ] + .iter() + .collect(), + } + } +} + +#[cfg(test)] +mod tests { + use serde_json::json; + + use super::*; + + #[test] + fn test_custom_event_uri() { + let cust_event = json!({ + "source_uri": "s3://quickwit-test/test.json" + }); + let parsed_cust_event: IndexerEvent = serde_json::from_value(cust_event).unwrap(); + assert_eq!( + parsed_cust_event.uri(), + PathBuf::from("s3://quickwit-test/test.json"), + ); + } + + #[test] + fn test_s3_event_uri() { + let cust_event = json!({ + "Records": [ + { + "eventVersion": "2.0", + "eventSource": "aws:s3", + "awsRegion": "us-east-1", + "eventTime": "1970-01-01T00:00:00.000Z", + "eventName": "ObjectCreated:Put", + "userIdentity": { + "principalId": "EXAMPLE" + }, + "requestParameters": { + "sourceIPAddress": "127.0.0.1" + }, + "responseElements": { + "x-amz-request-id": "EXAMPLE123456789", + "x-amz-id-2": "EXAMPLE123/5678abcdefghijklambdaisawesome/mnopqrstuvwxyzABCDEFGH" + }, + "s3": { + "s3SchemaVersion": "1.0", + "configurationId": "testConfigRule", + "bucket": { + "name": "quickwit-test", + "ownerIdentity": { + "principalId": "EXAMPLE" + }, + "arn": "arn:aws:s3:::quickwit-test" + }, + "object": { + "key": "test.json", + "size": 1024, + "eTag": "0123456789abcdef0123456789abcdef", + "sequencer": "0A1B2C3D4E5F678901" + } + } + } + ] + }); + let parsed_cust_event: IndexerEvent = serde_json::from_value(cust_event).unwrap(); + assert_eq!( + parsed_cust_event.uri(), + PathBuf::from("s3://quickwit-test/test.json"), + ); + } +} diff --git a/quickwit/quickwit-lambda/src/lib.rs b/quickwit/quickwit-lambda/src/lib.rs new file mode 100644 index 00000000000..0bb10d0cd83 --- /dev/null +++ b/quickwit/quickwit-lambda/src/lib.rs @@ -0,0 +1,23 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +pub mod indexer; +pub mod logger; +pub mod searcher; +mod utils; diff --git a/quickwit/quickwit-lambda/src/logger.rs b/quickwit/quickwit-lambda/src/logger.rs new file mode 100644 index 00000000000..2a10c3ab6ed --- /dev/null +++ b/quickwit/quickwit-lambda/src/logger.rs @@ -0,0 +1,153 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use anyhow::Context; +use once_cell::sync::OnceCell; +use opentelemetry::sdk::propagation::TraceContextPropagator; +use opentelemetry::sdk::trace::{BatchConfig, TracerProvider}; +use opentelemetry::sdk::{trace, Resource}; +use opentelemetry::{global, KeyValue}; +use opentelemetry_otlp::WithExportConfig; +use quickwit_serve::BuildInfo; +use tracing::{debug, Level}; +use tracing_subscriber::fmt::format::{FmtSpan, JsonFields}; +use tracing_subscriber::fmt::time::UtcTime; +use tracing_subscriber::prelude::*; +use tracing_subscriber::registry::LookupSpan; +use tracing_subscriber::{EnvFilter, Layer}; + +static TRACER_PROVIDER: OnceCell> = OnceCell::new(); +pub(crate) const RUNTIME_CONTEXT_SPAN: &str = "runtime_context"; + +fn fmt_layer(level: Level, ansi: bool) -> impl Layer +where + S: for<'a> LookupSpan<'a>, + S: tracing::Subscriber, +{ + let default_filter = format!("quickwit={level}") + .parse() + .expect("Invalid default filter"); + let env_filter = EnvFilter::builder() + .with_default_directive(default_filter) + .from_env_lossy(); + let event_format = tracing_subscriber::fmt::format() + .with_target(true) + .with_timer( + // We do not rely on the Rfc3339 implementation, because it has a nanosecond precision. + // See discussion here: https://github.com/time-rs/time/discussions/418 + UtcTime::new( + time::format_description::parse( + "[year]-[month]-[day]T[hour]:[minute]:[second].[subsecond digits:3]Z", + ) + .expect("Time format invalid."), + ), + ) + .json(); + tracing_subscriber::fmt::layer::() + .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE) + .event_format(event_format) + .fmt_fields(JsonFields::default()) + .with_ansi(ansi) + .with_filter(env_filter) +} + +fn otlp_layer( + ot_url: String, + ot_auth: String, + level: Level, + build_info: &BuildInfo, +) -> impl Layer +where + S: for<'a> LookupSpan<'a>, + S: tracing::Subscriber, +{ + let headers = std::collections::HashMap::from([("Authorization".into(), ot_auth)]); + let otlp_exporter = opentelemetry_otlp::new_exporter() + .http() + .with_endpoint(ot_url) + .with_headers(headers); + // In debug mode, Quickwit can generate a lot of spans, and the default queue size of 2048 + // is too small. + let batch_config = BatchConfig::default().with_max_queue_size(32768); + let trace_config = trace::config().with_resource(Resource::new([ + KeyValue::new("service.name", "quickwit"), + KeyValue::new("service.version", build_info.version.clone()), + ])); + let env_filter = std::env::var(EnvFilter::DEFAULT_ENV) + .map(|_| EnvFilter::from_default_env()) + .or_else(|_| { + // record the runtime context span for trace querying + EnvFilter::try_new(format!( + "quickwit={level},quickwit[{RUNTIME_CONTEXT_SPAN}]=trace" + )) + }) + .expect("Failed to set up OTLP tracing filter."); + let tracer = opentelemetry_otlp::new_pipeline() + .tracing() + .with_exporter(otlp_exporter) + .with_trace_config(trace_config) + .with_batch_config(batch_config) + .install_batch(opentelemetry::runtime::Tokio) + .expect("Failed to initialize OpenTelemetry OTLP exporter."); + TRACER_PROVIDER.set(tracer.provider()).unwrap(); + tracing_opentelemetry::layer() + .with_tracer(tracer) + .with_filter(env_filter) +} + +fn setup_logging_and_tracing( + level: Level, + ansi: bool, + build_info: &BuildInfo, +) -> anyhow::Result<()> { + global::set_text_map_propagator(TraceContextPropagator::new()); + let registry = tracing_subscriber::registry(); + let otlp_config = ( + std::env::var("QW_LAMBDA_OPENTELEMETRY_URL"), + std::env::var("QW_LAMBDA_OPENTELEMETRY_AUTHORIZATION"), + ); + if let (Ok(ot_url), Ok(ot_auth)) = otlp_config { + registry + .with(fmt_layer(level, ansi)) + .with(otlp_layer(ot_url, ot_auth, level, build_info)) + .try_init() + .context("Failed to set up tracing.")?; + } else { + registry + .with(fmt_layer(level, ansi)) + .try_init() + .context("Failed to set up tracing.")?; + } + Ok(()) +} + +pub fn setup_lambda_tracer() -> anyhow::Result<()> { + setup_logging_and_tracing(Level::INFO, false, BuildInfo::get()) +} + +pub fn flush_tracer() { + if let Some(Some(tracer_provider)) = TRACER_PROVIDER.get() { + debug!("flush tracers"); + for res in tracer_provider.force_flush() { + if let Err(err) = res { + debug!(err=?err, "Failed to flush tracer"); + } + } + } +} diff --git a/quickwit/quickwit-lambda/src/searcher/environment.rs b/quickwit/quickwit-lambda/src/searcher/environment.rs new file mode 100644 index 00000000000..b995949dff9 --- /dev/null +++ b/quickwit/quickwit-lambda/src/searcher/environment.rs @@ -0,0 +1,37 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::env::var; + +use once_cell::sync::Lazy; + +pub(crate) const CONFIGURATION_TEMPLATE: &str = "version: 0.6 +node_id: lambda-searcher +metastore_uri: s3://${QW_LAMBDA_METASTORE_BUCKET}/index +default_index_root_uri: s3://${QW_LAMBDA_INDEX_BUCKET}/index +data_dir: /tmp +searcher: + partial_request_cache_capacity: ${QW_LAMBDA_PARTIAL_REQUEST_CACHE_CAPACITY:-64M} +"; + +pub(crate) static INDEX_ID: Lazy = + Lazy::new(|| var("QW_LAMBDA_INDEX_ID").expect("QW_LAMBDA_INDEX_ID must be set")); + +pub(crate) static DISABLE_SEARCH_CACHE: Lazy = + Lazy::new(|| var("QW_LAMBDA_DISABLE_SEARCH_CACHE").is_ok_and(|v| v.as_str() == "true")); diff --git a/quickwit/quickwit-lambda/src/searcher/handler.rs b/quickwit/quickwit-lambda/src/searcher/handler.rs new file mode 100644 index 00000000000..e57de93bdac --- /dev/null +++ b/quickwit/quickwit-lambda/src/searcher/handler.rs @@ -0,0 +1,84 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use anyhow::Context; +use flate2::write::GzEncoder; +use flate2::Compression; +use lambda_http::http::header::{CONTENT_ENCODING, CONTENT_TYPE}; +use lambda_http::{Body, Error, IntoResponse, Request, RequestExt, RequestPayloadExt, Response}; +use quickwit_search::SearchResponseRest; +use quickwit_serve::SearchRequestQueryString; +use tracing::{debug_span, error, info_span, instrument, Instrument}; + +use super::environment::{DISABLE_SEARCH_CACHE, INDEX_ID}; +use super::search::{search, SearchArgs}; +use crate::logger; +use crate::utils::LambdaContainerContext; + +#[instrument(skip_all)] +fn deflate_serialize(resp: SearchResponseRest) -> anyhow::Result> { + let value = serde_json::to_value(resp)?; + let mut buffer = Vec::new(); + let mut gz = GzEncoder::new(&mut buffer, Compression::default()); + serde_json::to_writer(&mut gz, &value)?; + gz.finish()?; + Ok(buffer) +} + +pub async fn searcher_handler(request: Request) -> Result { + let container_ctx = LambdaContainerContext::load(); + let memory = request.lambda_context().env_config.memory; + let payload = request + .payload::()? + .context("Empty payload")?; + + let search_res = search(SearchArgs { query: payload }) + .instrument(debug_span!( + "search", + memory, + env.INDEX_ID = *INDEX_ID, + env.DISABLE_SEARCH_CACHE = *DISABLE_SEARCH_CACHE, + cold = container_ctx.cold, + container_id = container_ctx.container_id, + )) + .await?; + + let response_body = deflate_serialize(search_res)?; + + let response = Response::builder() + .header(CONTENT_ENCODING, "gzip") + .header(CONTENT_TYPE, "application/json") + .header("x-lambda-request-id", request.lambda_context().request_id) + .body(Body::Binary(response_body)) + .context("Could not build response")?; + Ok(response) +} + +pub async fn handler(request: Request) -> Result { + let request_id = request.lambda_context().request_id.clone(); + let response = searcher_handler(request) + .instrument(info_span!("searcher_handler", request_id)) + .await; + + if let Err(e) = &response { + error!(err=?e, "Handler failed"); + } + logger::flush_tracer(); + response +} diff --git a/quickwit/quickwit-lambda/src/searcher/mod.rs b/quickwit/quickwit-lambda/src/searcher/mod.rs new file mode 100644 index 00000000000..ed17d810d9d --- /dev/null +++ b/quickwit/quickwit-lambda/src/searcher/mod.rs @@ -0,0 +1,24 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +mod environment; +mod handler; +mod search; + +pub use handler::handler; diff --git a/quickwit/quickwit-lambda/src/searcher/search.rs b/quickwit/quickwit-lambda/src/searcher/search.rs new file mode 100644 index 00000000000..3d0e0d249f9 --- /dev/null +++ b/quickwit/quickwit-lambda/src/searcher/search.rs @@ -0,0 +1,127 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::collections::HashSet; +use std::net::{Ipv4Addr, SocketAddr}; +use std::sync::Arc; + +use quickwit_config::service::QuickwitService; +use quickwit_config::SearcherConfig; +use quickwit_proto::metastore::MetastoreServiceClient; +use quickwit_proto::search::{SearchRequest, SearchResponse}; +use quickwit_search::{ + root_search, ClusterClient, Result as SearchResult, SearchJobPlacer, SearchResponseRest, + SearchServiceClient, SearchServiceImpl, SearcherContext, SearcherPool, +}; +use quickwit_serve::{search_request_from_api_request, SearchRequestQueryString}; +use quickwit_storage::StorageResolver; +use quickwit_telemetry::payload::{QuickwitFeature, QuickwitTelemetryInfo, TelemetryEvent}; +use tokio::sync::OnceCell; +use tracing::debug; + +use super::environment::{CONFIGURATION_TEMPLATE, DISABLE_SEARCH_CACHE, INDEX_ID}; +use crate::utils::load_node_config; + +static LAMBDA_SEARCH_CACHE: OnceCell = OnceCell::const_new(); + +#[derive(Clone)] +struct LambdaSearchCtx { + pub searcher_context: Arc, + pub cluster_client: ClusterClient, +} + +impl LambdaSearchCtx { + async fn instantiate( + searcher_config: SearcherConfig, + metastore: MetastoreServiceClient, + storage_resolver: StorageResolver, + ) -> Self { + let socket_addr = SocketAddr::new(Ipv4Addr::new(127, 0, 0, 1).into(), 7280u16); + let searcher_pool = SearcherPool::default(); + let search_job_placer = SearchJobPlacer::new(searcher_pool.clone()); + let cluster_client = ClusterClient::new(search_job_placer); + let searcher_context = Arc::new(SearcherContext::new(searcher_config, None)); + let search_service = Arc::new(SearchServiceImpl::new( + metastore, + storage_resolver, + cluster_client.clone(), + searcher_context.clone(), + )); + let search_service_client = + SearchServiceClient::from_service(search_service.clone(), socket_addr); + searcher_pool.insert(socket_addr, search_service_client); + Self { + searcher_context, + cluster_client, + } + } +} + +async fn single_node_search( + search_config: SearcherConfig, + search_request: SearchRequest, + metastore: MetastoreServiceClient, + storage_resolver: StorageResolver, +) -> SearchResult { + let lambda_search_ctx = if *DISABLE_SEARCH_CACHE { + LambdaSearchCtx::instantiate(search_config, metastore.clone(), storage_resolver).await + } else { + let cached_ctx = LAMBDA_SEARCH_CACHE + .get_or_init(|| { + LambdaSearchCtx::instantiate(search_config, metastore.clone(), storage_resolver) + }) + .await; + LambdaSearchCtx::clone(cached_ctx) + }; + root_search( + &lambda_search_ctx.searcher_context, + search_request, + metastore, + &lambda_search_ctx.cluster_client, + ) + .await +} + +#[derive(Debug, Eq, PartialEq)] +pub struct SearchArgs { + pub query: SearchRequestQueryString, +} + +pub async fn search(args: SearchArgs) -> anyhow::Result { + debug!(args=?args, "lambda-search"); + let (node_config, storage_resolver, metastore) = + load_node_config(CONFIGURATION_TEMPLATE).await?; + let services: HashSet = + HashSet::from_iter([QuickwitService::Searcher.as_str().to_string()]); + let telemetry_info = + QuickwitTelemetryInfo::new(services, HashSet::from_iter([QuickwitFeature::AwsLambda])); + let _telemetry_handle_opt = quickwit_telemetry::start_telemetry_loop(telemetry_info); + quickwit_telemetry::send_telemetry_event(TelemetryEvent::RunCommand).await; + let search_request = search_request_from_api_request(vec![INDEX_ID.clone()], args.query)?; + debug!(search_request=?search_request, "search-request"); + let search_response: SearchResponse = single_node_search( + node_config.searcher_config, + search_request, + metastore, + storage_resolver, + ) + .await?; + let search_response_rest = SearchResponseRest::try_from(search_response)?; + Ok(search_response_rest) +} diff --git a/quickwit/quickwit-lambda/src/utils.rs b/quickwit/quickwit-lambda/src/utils.rs new file mode 100644 index 00000000000..81781ff3aac --- /dev/null +++ b/quickwit/quickwit-lambda/src/utils.rs @@ -0,0 +1,67 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::sync::atomic::AtomicU32; +use std::sync::atomic::Ordering::SeqCst; + +use anyhow::Context; +use quickwit_config::{ConfigFormat, NodeConfig}; +use quickwit_metastore::MetastoreResolver; +use quickwit_proto::metastore::MetastoreServiceClient; +use quickwit_storage::StorageResolver; +use tracing::info; + +pub(crate) async fn load_node_config( + config_template: &str, +) -> anyhow::Result<(NodeConfig, StorageResolver, MetastoreServiceClient)> { + let config = NodeConfig::load(ConfigFormat::Yaml, config_template.as_bytes()) + .await + .with_context(|| format!("Failed to parse node config `{config_template}`."))?; + info!(config=?config, "loaded node config"); + let storage_resolver = StorageResolver::configured(&config.storage_configs); + let metastore_resolver = + MetastoreResolver::configured(storage_resolver.clone(), &config.metastore_configs); + let metastore: MetastoreServiceClient = + metastore_resolver.resolve(&config.metastore_uri).await?; + Ok((config, storage_resolver, metastore)) +} + +static CONTAINER_ID: AtomicU32 = AtomicU32::new(0); + +pub struct LambdaContainerContext { + pub container_id: u32, + pub cold: bool, +} + +impl LambdaContainerContext { + /// Configure and return the Lambda container context. + /// + /// The `cold` field returned will be `true` only the first time this + /// function is called. + pub fn load() -> Self { + let mut container_id = CONTAINER_ID.load(SeqCst); + let mut cold = false; + if container_id == 0 { + container_id = rand::random(); + CONTAINER_ID.store(container_id, SeqCst); + cold = true; + } + Self { container_id, cold } + } +} diff --git a/quickwit/quickwit-telemetry/src/payload.rs b/quickwit/quickwit-telemetry/src/payload.rs index 4d2f193e460..17082f5bea9 100644 --- a/quickwit/quickwit-telemetry/src/payload.rs +++ b/quickwit/quickwit-telemetry/src/payload.rs @@ -124,6 +124,7 @@ pub enum QuickwitFeature { Jaeger, Otlp, PostgresqMetastore, + AwsLambda, } fn hashed_host_username() -> String {