diff --git a/.github/workflows/cbench.yml b/.github/workflows/cbench.yml index c7fa506725c..ad4b6385363 100644 --- a/.github/workflows/cbench.yml +++ b/.github/workflows/cbench.yml @@ -9,10 +9,15 @@ on: - "quickwit/**" - "!quickwit/quickwit-ui/**" # For security reasons (to make sure the list of allowed users is - # trusted), make sure we run the workflow definition the base of the - # pull request. + # trusted), make sure we run the workflow definition from the base + # commit of the pull request. pull_request_target: +# This is required for github.rest.issues.createComment. +permissions: + issues: write + pull-requests: write + env: RUSTFLAGS: --cfg tokio_unstable @@ -26,7 +31,7 @@ jobs: # The self-hosted runner must have the system deps installed for QW and # the benchmark, because we don't have root access. runs-on: self-hosted - timeout-minutes: 40 + timeout-minutes: 60 steps: - name: Set authorized users id: authorized-users @@ -79,19 +84,54 @@ jobs: - name: Run Benchmark on SSD if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor) id: bench-run-ssd - run: python3 ./run.py --search-only --storage pd-ssd --engine quickwit --track generated-logs --tags "${{ github.event_name }}_${{ github.ref_name }}" --manage-engine --source github_workflow --binary-path ../quickwit/quickwit/target/release/quickwit --instance "{autodetect_gcp}" --export-to-endpoint=https://qw-benchmarks.104.155.161.122.nip.io --engine-data-dir "{qwdata_local}" --write-exported-run-url-to-file $GITHUB_OUTPUT + run: python3 ./run.py --search-only --storage pd-ssd --engine quickwit --track generated-logs --tags "${{ github.event_name }}_${{ github.ref_name }}" --manage-engine --source github_workflow --binary-path ../quickwit/quickwit/target/release/quickwit --instance "{autodetect_gcp}" --export-to-endpoint=https://qw-benchmarks.104.155.161.122.nip.io --engine-data-dir "{qwdata_local}" --github-workflow-user "${{ github.actor }}" --github-workflow-run-id "${{ github.run_id }}" --comparison-reference-tag="push_main" --github-pr "${{ github.event_name == 'pull_request_target' && github.event.number || 0 }}" --comparison-reference-commit "${{ github.event_name == 'pull_request_target' && github.sha || github.event.before }}" --write-exported-run-url-to-file $GITHUB_OUTPUT working-directory: ./benchmarks - name: Run Benchmark on cloud storage if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor) id: bench-run-cloud-storage - run: python3 ./run.py --search-only --storage gcs --engine quickwit --track generated-logs --tags "${{ github.event_name }}_${{ github.ref_name }}" --manage-engine --source github_workflow --binary-path ../quickwit/quickwit/target/release/quickwit --instance "{autodetect_gcp}" --export-to-endpoint=https://qw-benchmarks.104.155.161.122.nip.io --engine-data-dir "{qwdata_gcs}" --write-exported-run-url-to-file $GITHUB_OUTPUT + run: python3 ./run.py --search-only --storage gcs --engine quickwit --track generated-logs --tags "${{ github.event_name }}_${{ github.ref_name }}" --manage-engine --source github_workflow --binary-path ../quickwit/quickwit/target/release/quickwit --instance "{autodetect_gcp}" --export-to-endpoint=https://qw-benchmarks.104.155.161.122.nip.io --engine-data-dir "{qwdata_gcs}" --engine-config-file engines/quickwit/configs/cbench_quickwit_gcs.yaml --github-workflow-user "${{ github.actor }}" --github-workflow-run-id "${{ github.run_id }}" --comparison-reference-tag="push_main" --github-pr "${{ github.event_name == 'pull_request_target' && github.event.number || 0 }}" --comparison-reference-commit "${{ github.event_name == 'pull_request_target' && github.sha || github.event.before }}" --write-exported-run-url-to-file $GITHUB_OUTPUT working-directory: ./benchmarks - name: Show results links if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor) run: | echo "::notice title=Benchmark Results on SSD::${{ steps.bench-run-ssd.outputs.url }}" + echo "::notice title=Comparison of results on SSD::${{ steps.bench-run-ssd.outputs.comparison_text }}" echo "::notice title=Benchmark Results on Cloud Storage::${{ steps.bench-run-cloud-storage.outputs.url }}" + echo "::notice title=Comparison of results on Cloud Storage::${{ steps.bench-run-cloud-storage.outputs.comparison_text }}" - name: In case of auth error if: ${{ ! contains(fromJSON(steps.authorized-users.outputs.users), github.actor) }} run: | echo "::error title=User not allowed to run the benchmark::User must be in list ${{ steps.authorized-users.outputs.users }}" + - name: Add a PR comment with comparison results + uses: actions/github-script@v7 + if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor) && github.event_name == 'pull_request_target' + # Inspired from: https://github.com/actions/github-script/blob/60a0d83039c74a4aee543508d2ffcb1c3799cdea/.github/workflows/pull-request-test.yml + with: + script: | + // Get the existing comments. + const {data: comments} = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.number, + }) + + // Find any comment already made by the bot to update it. + const botComment = comments.find(comment => comment.user.id === 41898282) + const commentBody = "### On SSD:\n${{ steps.bench-run-ssd.outputs.comparison_text }}\n### On GCS:\n${{ steps.bench-run-cloud-storage.outputs.comparison_text }}\n" + if (botComment) { + // Update existing comment. + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: commentBody + }) + } else { + // New comment. + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.number, + body: commentBody + }) + } diff --git a/distribution/ecs/README.md b/distribution/ecs/README.md index e201ec04e14..a1e20c1acad 100644 --- a/distribution/ecs/README.md +++ b/distribution/ecs/README.md @@ -33,8 +33,8 @@ Metastore database backups are disabled as restoring one would lead to inconsistencies with the index store on S3. To ensure high availability, you should enable `rds_config.multi_az` instead. To use your own Postgres database instead of creating a new RDS instance, configure the -`external_postgres_uri_ssm_parameter_arn` variable (e.g -`postgres://user:password@domain:port/db`). +`external_postgres_uri_secret_arn` variable (e.g ARN of an SSM parameter with +the value `postgres://user:password@domain:port/db`). Using NAT Gateways for the image registry is quite costly (approx. $0.05/hour/AZ). If you are not already using NAT Gateways in the AZs where Quickwit will be @@ -64,7 +64,7 @@ IAM policies to indexers. We provide an example of self contained deployment with an ad-hoc VPC. > [!IMPORTANT] -> This stack costs ~$150/month to run (Fargate tasks, NAT Gateways +> This stack costs ~$200/month to run (Fargate tasks, NAT Gateways > and RDS) ### Deploy the Quickwit module and connect through a bastion diff --git a/distribution/ecs/example/terraform.tf b/distribution/ecs/example/terraform.tf index 2bdc9de2825..1479bdf0bfd 100644 --- a/distribution/ecs/example/terraform.tf +++ b/distribution/ecs/example/terraform.tf @@ -72,7 +72,7 @@ module "quickwit" { # multi_az = false # } - # external_postgres_uri_ssm_parameter_arn = aws_ssm_parameter.postgres_uri.arn + # external_postgres_uri_secret_arn = aws_ssm_parameter.postgres_uri.arn ## Example logging configuration # sidecar_container_definitions = { diff --git a/distribution/ecs/quickwit/configs.tf b/distribution/ecs/quickwit/configs.tf index 399826e5e03..e1bdd42f6fb 100644 --- a/distribution/ecs/quickwit/configs.tf +++ b/distribution/ecs/quickwit/configs.tf @@ -13,8 +13,8 @@ locals { quickwit_index_s3_prefix = var.quickwit_index_s3_prefix == "" ? aws_s3_bucket.index[0].id : var.quickwit_index_s3_prefix - use_external_rds = var.external_postgres_uri_ssm_parameter_arn != "" - postgres_uri_parameter_arn = var.external_postgres_uri_ssm_parameter_arn != "" ? var.external_postgres_uri_ssm_parameter_arn : aws_ssm_parameter.postgres_credential[0].arn + use_external_rds = var.external_postgres_uri_secret_arn != "" + postgres_uri_secret_arn = var.external_postgres_uri_secret_arn != "" ? var.external_postgres_uri_secret_arn : aws_ssm_parameter.postgres_credential[0].arn } resource "random_id" "module" { diff --git a/distribution/ecs/quickwit/iam.tf b/distribution/ecs/quickwit/iam.tf index 6536239738e..698fa584b46 100644 --- a/distribution/ecs/quickwit/iam.tf +++ b/distribution/ecs/quickwit/iam.tf @@ -46,7 +46,7 @@ data "aws_iam_policy_document" "quickwit_task_execution_permission" { statement { actions = ["ssm:GetParameters"] - resources = [local.postgres_uri_parameter_arn] + resources = [local.postgres_uri_secret_arn] } statement { diff --git a/distribution/ecs/quickwit/outputs.tf b/distribution/ecs/quickwit/outputs.tf index 3aa73c8432f..91128e9cd76 100644 --- a/distribution/ecs/quickwit/outputs.tf +++ b/distribution/ecs/quickwit/outputs.tf @@ -5,3 +5,15 @@ output "indexer_service_name" { output "searcher_service_name" { value = "${aws_service_discovery_service.searcher.name}.${aws_service_discovery_private_dns_namespace.quickwit_internal.name}" } + +output "janitor_service_name" { + value = "${aws_service_discovery_service.janitor.name}.${aws_service_discovery_private_dns_namespace.quickwit_internal.name}" +} + +output "control_plane_service_name" { + value = "${aws_service_discovery_service.control_plane.name}.${aws_service_discovery_private_dns_namespace.quickwit_internal.name}" +} + +output "metastore_service_name" { + value = "${aws_service_discovery_service.metastore.name}.${aws_service_discovery_private_dns_namespace.quickwit_internal.name}" +} diff --git a/distribution/ecs/quickwit/quickwit-control-plane.tf b/distribution/ecs/quickwit/quickwit-control-plane.tf index 3ab957b27f3..6d29e865b99 100644 --- a/distribution/ecs/quickwit/quickwit-control-plane.tf +++ b/distribution/ecs/quickwit/quickwit-control-plane.tf @@ -3,7 +3,7 @@ module "quickwit_control_plane" { service_name = "control_plane" service_discovery_registry_arn = aws_service_discovery_service.control_plane.arn cluster_arn = module.ecs_cluster.arn - postgres_credential_arn = local.postgres_uri_parameter_arn + postgres_uri_secret_arn = local.postgres_uri_secret_arn quickwit_peer_list = local.quickwit_peer_list s3_access_policy_arn = aws_iam_policy.quickwit_task_permission.arn task_execution_policy_arn = aws_iam_policy.quickwit_task_execution_permission.arn diff --git a/distribution/ecs/quickwit/quickwit-indexer.tf b/distribution/ecs/quickwit/quickwit-indexer.tf index d4725f5d01b..441a1c7a8f8 100644 --- a/distribution/ecs/quickwit/quickwit-indexer.tf +++ b/distribution/ecs/quickwit/quickwit-indexer.tf @@ -3,7 +3,7 @@ module "quickwit_indexer" { service_name = "indexer" service_discovery_registry_arn = aws_service_discovery_service.indexer.arn cluster_arn = module.ecs_cluster.arn - postgres_credential_arn = local.postgres_uri_parameter_arn + postgres_uri_secret_arn = local.postgres_uri_secret_arn quickwit_peer_list = local.quickwit_peer_list s3_access_policy_arn = aws_iam_policy.quickwit_task_permission.arn task_execution_policy_arn = aws_iam_policy.quickwit_task_execution_permission.arn diff --git a/distribution/ecs/quickwit/quickwit-janitor.tf b/distribution/ecs/quickwit/quickwit-janitor.tf index 884bdf52a6e..c1f3e39d041 100644 --- a/distribution/ecs/quickwit/quickwit-janitor.tf +++ b/distribution/ecs/quickwit/quickwit-janitor.tf @@ -3,7 +3,7 @@ module "quickwit_janitor" { service_name = "janitor" service_discovery_registry_arn = aws_service_discovery_service.janitor.arn cluster_arn = module.ecs_cluster.arn - postgres_credential_arn = local.postgres_uri_parameter_arn + postgres_uri_secret_arn = local.postgres_uri_secret_arn quickwit_peer_list = local.quickwit_peer_list s3_access_policy_arn = aws_iam_policy.quickwit_task_permission.arn task_execution_policy_arn = aws_iam_policy.quickwit_task_execution_permission.arn diff --git a/distribution/ecs/quickwit/quickwit-metastore.tf b/distribution/ecs/quickwit/quickwit-metastore.tf index 248c5987db0..571db9c9d10 100644 --- a/distribution/ecs/quickwit/quickwit-metastore.tf +++ b/distribution/ecs/quickwit/quickwit-metastore.tf @@ -3,7 +3,7 @@ module "quickwit_metastore" { service_name = "metastore" service_discovery_registry_arn = aws_service_discovery_service.metastore.arn cluster_arn = module.ecs_cluster.arn - postgres_credential_arn = local.postgres_uri_parameter_arn + postgres_uri_secret_arn = local.postgres_uri_secret_arn quickwit_peer_list = local.quickwit_peer_list s3_access_policy_arn = aws_iam_policy.quickwit_task_permission.arn task_execution_policy_arn = aws_iam_policy.quickwit_task_execution_permission.arn diff --git a/distribution/ecs/quickwit/quickwit-searcher.tf b/distribution/ecs/quickwit/quickwit-searcher.tf index 735d8aee308..26c71dc3685 100644 --- a/distribution/ecs/quickwit/quickwit-searcher.tf +++ b/distribution/ecs/quickwit/quickwit-searcher.tf @@ -3,7 +3,7 @@ module "quickwit_searcher" { service_name = "searcher" service_discovery_registry_arn = aws_service_discovery_service.searcher.arn cluster_arn = module.ecs_cluster.arn - postgres_credential_arn = local.postgres_uri_parameter_arn + postgres_uri_secret_arn = local.postgres_uri_secret_arn quickwit_peer_list = local.quickwit_peer_list s3_access_policy_arn = aws_iam_policy.quickwit_task_permission.arn task_execution_policy_arn = aws_iam_policy.quickwit_task_execution_permission.arn diff --git a/distribution/ecs/quickwit/service/ecs.tf b/distribution/ecs/quickwit/service/ecs.tf index 301a3bb7499..5b862271f77 100644 --- a/distribution/ecs/quickwit/service/ecs.tf +++ b/distribution/ecs/quickwit/service/ecs.tf @@ -27,7 +27,7 @@ module "quickwit_service" { secrets = [ { name = "QW_METASTORE_URI" - valueFrom = var.postgres_credential_arn + valueFrom = var.postgres_uri_secret_arn } ] @@ -119,10 +119,6 @@ module "quickwit_service" { } ] - task_exec_ssm_param_arns = [ - var.postgres_credential_arn - ] - tasks_iam_role_policies = local.tasks_iam_role_policies task_exec_iam_role_policies = { diff --git a/distribution/ecs/quickwit/service/variables.tf b/distribution/ecs/quickwit/service/variables.tf index 2e256945721..09de61ff3ee 100644 --- a/distribution/ecs/quickwit/service/variables.tf +++ b/distribution/ecs/quickwit/service/variables.tf @@ -32,7 +32,9 @@ variable "subnet_ids" { type = list(string) } -variable "postgres_credential_arn" {} +variable "postgres_uri_secret_arn" { + description = "ARN of the SSM parameter or Secret Manager secret containing the URI of a Postgres instance" +} variable "quickwit_image" {} diff --git a/distribution/ecs/quickwit/variables.tf b/distribution/ecs/quickwit/variables.tf index 0a343a50d01..ef3be5e1467 100644 --- a/distribution/ecs/quickwit/variables.tf +++ b/distribution/ecs/quickwit/variables.tf @@ -73,8 +73,8 @@ variable "quickwit_indexer" { description = "Indexer service sizing configurations" type = object({ desired_count = optional(number, 1) - memory = optional(number, 4096) - cpu = optional(number, 1024) + memory = optional(number, 8192) + cpu = optional(number, 2048) ephemeral_storage_gib = optional(number, 21) extra_task_policy_arns = optional(list(string), []) }) @@ -95,7 +95,7 @@ variable "quickwit_searcher" { description = "Searcher service sizing configurations" type = object({ desired_count = optional(number, 1) - memory = optional(number, 2048) + memory = optional(number, 4096) cpu = optional(number, 1024) ephemeral_storage_gib = optional(number, 21) }) @@ -131,7 +131,7 @@ variable "rds_config" { default = {} } -variable "external_postgres_uri_ssm_parameter_arn" { - description = "ARN of the SSM parameter containing the URI of a Postgres instance (postgres://{user}:{password}@{address}:{port}/{db_instance_name}). The Postgres instance should allow indbound connections from the subnets specified in `variable.subnet_ids`. If provided, the internal RDS will not be created and `var.rds_config` is ignored." +variable "external_postgres_uri_secret_arn" { + description = "ARN of the SSM parameter or Secret Manager secret containing the URI of a Postgres instance (postgres://{user}:{password}@{address}:{port}/{db_instance_name}). The Postgres instance should allow indbound connections from the subnets specified in `variable.subnet_ids`. If provided, the internal RDS will not be created and `var.rds_config` is ignored." default = "" } diff --git a/distribution/lambda/Makefile b/distribution/lambda/Makefile index d6accd9334c..0c4decfce57 100644 --- a/distribution/lambda/Makefile +++ b/distribution/lambda/Makefile @@ -28,8 +28,11 @@ package: then pushd ../../quickwit/ rustc --version + # TODO: remove --disable-optimizations when upgrading to a release containing + # https://github.com/cargo-lambda/cargo-lambda/issues/649 (> 1.2.1) cargo lambda build \ -p quickwit-lambda \ + --disable-optimizations \ --release \ --output-format zip \ --target x86_64-unknown-linux-gnu diff --git a/distribution/lambda/cdk/cli.py b/distribution/lambda/cdk/cli.py index d4264df28fc..f7374296d58 100644 --- a/distribution/lambda/cdk/cli.py +++ b/distribution/lambda/cdk/cli.py @@ -255,6 +255,13 @@ def get_logs( last_event_id = "" last_event_found = True start_time = time.time() + while time.time() - start_time < timeout: + describe_resp = client.describe_log_groups(logGroupNamePrefix=log_group_name) + group_names = [group["logGroupName"] for group in describe_resp["logGroups"]] + if log_group_name in group_names: + break + print(f"log group not found, retrying...") + time.sleep(3) while time.time() - start_time < timeout: for page in paginator.paginate( logGroupName=log_group_name, @@ -268,7 +275,6 @@ def get_logs( last_event_id = event["eventId"] yield event["message"] if event["message"].startswith("REPORT"): - lower_time_bound = int(event["timestamp"]) last_event_id = "REPORT" break if last_event_id == "REPORT": @@ -454,3 +460,4 @@ def req(method, path, body=None, expected_status=200): expected_status=400, ) req("GET", f"/api/v1/_elastic/_search?q=animal", expected_status=501) + req("GET", f"/api/v1/indexes/{mock_sales_index_id}") diff --git a/docs/configuration/metastore-config.md b/docs/configuration/metastore-config.md index a5e7919e03f..ac418187327 100644 --- a/docs/configuration/metastore-config.md +++ b/docs/configuration/metastore-config.md @@ -65,7 +65,11 @@ By default, the File-Backed Metastore is only read once when you start a Quickwi You can also configure it to poll the File-Backed Metastore periodically to keep a fresh view of it. This is useful for a Searcher instance that needs to be aware of new splits published by an Indexer running in parallel. -To configure the polling interval (in seconds only), add a URI fragment to the storage URI like this: `s3://quickwit/my-indexes#polling_interval=30s` +To configure the polling interval (in seconds), add a URI fragment to the storage URI as follows: `s3://quickwit/my-indexes#polling_interval=30s` + +:::note +The polling interval can be configured in seconds only; other units, such as minutes or hours, are not supported. +::: :::tip Amazon S3 charges $0.0004 per 1000 GET requests. Polling a metastore every 30 seconds costs $0.04 per month and index. diff --git a/docs/configuration/node-config.md b/docs/configuration/node-config.md index 1c2126f6aa1..e51313e9eda 100644 --- a/docs/configuration/node-config.md +++ b/docs/configuration/node-config.md @@ -123,17 +123,7 @@ This section may contain one configuration subsection per available metastore im ### File-backed metastore configuration -| Property | Description | Default value | -| --- | --- | --- | -| `polling_interval` | Time interval between successive polling attempts to detect metastore changes. | `30s` | - -Example of a metastore configuration for a file-backed implementation in YAML format: - -```yaml -metastore: - file: - polling_interval: 1m -``` +File-backed metastore doesn't have any node level configuration. You can configure the poll interval [at the index level](./metastore-config.md#polling-configuration). ### PostgreSQL metastore configuration @@ -163,8 +153,8 @@ This section contains the configuration options for an indexer. The split store | Property | Description | Default value | | --- | --- | --- | -| `split_store_max_num_bytes` | Maximum size in bytes allowed in the split store for each index-source pair. | `100G` | -| `split_store_max_num_splits` | Maximum number of files allowed in the split store for each index-source pair. | `1000` | +| `split_store_max_num_bytes` | Maximum size in bytes allowed in the split store. | `100G` | +| `split_store_max_num_splits` | Maximum number of files allowed in the split store. | `1000` | | `max_concurrent_split_uploads` | Maximum number of concurrent split uploads allowed on the node. | `12` | | `merge_concurrency` | Maximum number of merge operations that can be executed on the node at one point in time. | `(2 x num threads available) / 3` | | `enable_otlp_endpoint` | If true, enables the OpenTelemetry exporter endpoint to ingest logs and traces via the OpenTelemetry Protocol (OTLP). | `false` | diff --git a/docs/configuration/source-config.md b/docs/configuration/source-config.md index b588c4315fe..479c97e2365 100644 --- a/docs/configuration/source-config.md +++ b/docs/configuration/source-config.md @@ -68,7 +68,7 @@ Comma-separated list of host and port pairs that are the addresses of a subset o Defines the behavior of the source when consuming a partition for which there is no initial offset saved in the checkpoint. `earliest` consumes from the beginning of the partition, whereas `latest` (default) consumes from the end. - `enable.auto.commit` -The Kafka source manages commit offsets manually using the [checkpoint API](../overview/concepts/indexing.md#checkpoint) and disables auto-commit. +This setting is ignored because the Kafka source manages commit offsets internally using the [checkpoint API](../overview/concepts/indexing.md#checkpoint) and forces auto-commits to be disabled. - `group.id` Kafka-based distributed indexing relies on consumer groups. Unless overridden in the client parameters, the default group ID assigned to each consumer managed by the source is `quickwit-{index_uid}-{source_id}`. @@ -165,11 +165,24 @@ EOF ## Number of pipelines -`num_pipelines` parameter is only available for sources that can be distributed: Kafka, GCP PubSub and Pulsar (coming soon). +The `num_pipelines` parameter is only available for distributed sources like Kafka, GCP PubSub, and Pulsar. It defines the number of pipelines to run on a cluster for the source. The actual placement of these pipelines on the different indexer -will be decided by the control plane. Note that distributions of a source like Kafka is done by assigning a set of partitions to different pipelines. -As a result, it is recommended to make sure the number of partitions is a multiple of the number of `num_pipelines`. +will be decided by the control plane. + +:::info + +Note that distributing the indexing load of partitioned sources like Kafka is done by assigning the different partitions to different pipelines. As a result, it is important to ensure that the number of partitions is a multiple of `num_pipelines`. + +Also, assuming you are only indexing a single Kafka source in your Quickwit cluster, you should set the number of pipelines to a multiple of the number of indexers. Finally, if your indexing throughput is high, you should provision between 2 and 4 vCPUs per pipeline. + +For instance, assume you want to index a 60-partition topic, with each partition receiving a throughput of 10 MB/s. If you measured that Quickwit can index your data at a pace of 40MB/s per pipeline, a possible setting could be: +- 5 indexers with 8 vCPUs each +- 15 pipelines + +Each indexer will then be in charge of 3 pipelines, and each pipeline will cover 4 partitions. +::: + ## Transform parameters diff --git a/docs/deployment/cluster-sizing.md b/docs/deployment/cluster-sizing.md new file mode 100644 index 00000000000..e6d66e61709 --- /dev/null +++ b/docs/deployment/cluster-sizing.md @@ -0,0 +1,102 @@ +--- +title: Cluster sizing +sidebar_position: 2 +--- + +In this guide, we discuss how to size your Quickwit cluster and nodes. As shown +in the [architecture section](../overview/architecture.md), a Quickwit cluster +has 5 main components: the Indexers, Searchers, Control Plane, +Metastore and, Janitor. Each component has different resource requirements +and can be scaled independently. We will also discuss how to size the metastore +PostgreSQL database. + +:::note + +This guide provides general guidelines. The actual resource requirements depend +strongly on your workload. We recommend monitoring the resource usage and +adjusting the cluster size accordingly. + +::: + +## Quickwit services + +### Indexers + +Here are some high-level guidelines to size your Indexer nodes: +- Quickwit can index at around **7.5MB per second per core** +- For the general use case, configure 4GB of RAM per core + - Workloads with a large number of indexes or data sources consume more RAM + + - Don't use instances with less than 8GB of RAM + +- Mount the data directory to a volume of at least 110GB to store the [split + cache](../configuration/node-config.md#Indexer-configuration) and the [ingest + queue](../configuration/node-config.md#ingest-api-configuration). + +- Local SSDs are preferred for deploying Indexers since they generally provide the best performance per dollar and save some network bandwidth. However, remote disks can also if they provide roughly 20 MB/s of write throughput per core when using the ingest API or 10 MB/s when relying on other sources. For Amazon EBS volumes, this is equivalent to 320 or 160 IOPS per core (assuming 64 KB IOPS). + +:::note + +To utilize all CPUs on Indexer nodes that have more than 4 cores, your indexing +workload needs to be broken down into multiple indexing pipelines. This can be +achieved by creating multiple indexes or by using a [partitioned data +source](../configuration/source-config.md#number-of-pipelines) such as +[Kafka](../configuration/source-config.md#kafka-source). + + + +::: + + +### Searchers + +Search performance is highly dependent on the workload. For example, term queries +are usually cheaper than aggregations. A good starting point for dimensioning +Searcher nodes: +- Configure 8GB of RAM per core when using a high latency / low bandwidth object + store like AWS S3 +- Decrease the RAM / CPU ratio (e.g 4GB/core) when using a faster object store +- Provision more RAM if you expect many concurrent aggregation requests. By + default, each request can use up to 500MB of RAM on each node. +- Avoid instances with less than 4GB of RAM + +- Searcher nodes don't use disk unless the [split + cache](../configuration/node-config.md#Searcher-split-cache-configuration) is + explicitely enabled + +One strength of Quickwit is that its Searchers are stateless, which makes it +easy to scale them up and down based on the workload. Scale the number of +Searcher nodes based on: +- the number of concurrent requests expected +- aggregations that run on large amounts of data (without + [time](../overview/concepts/querying.md#time-sharding) or + [tag](../overview/concepts/querying.md#tag-pruning) pruning) + +### Other services + +The Control Plane, Metastore and, Janitor are lightweight components. +Each of these services requires 1 replica. + +The Control Plane needs a single core and 2GB of RAM. It doesn't require any disk. + +The Metastore also requires a single core and 2GB of RAM. For clusters handling +hundreds of indexes, you may increase the size to 2 cores and 4GB of RAM. It +doesn't write to disk. + +In general, the Janitor requires 1 core and 2GB of RAM and doesn't use the disk. +If you use the [delete API](https://quickwit.io/docs/overview/concepts/deletes), +the Janitor should be dimensioned like an indexer. + +### Single node deployments + +For experimentations and small scale POCs, it is possible to deploy all the +services on a single node (see +[tutorial](../get-started/tutorials/tutorial-hdfs-logs.md)). We recommend at +least 2 cores and 8GB of RAM. + +## Postgres Metastore backend + +For most use cases, a PostgreSQL instance with 4GB of RAM and 1 core is +sufficient: +- with the AWS RDS managed service, use the t4g.medium instance type. Enable + multi-AZ with one standby for high availability. diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index da8d17e0b2a..c7eb4ef1f3a 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -269,6 +269,8 @@ dependencies = [ "memchr", "pin-project-lite", "tokio", + "zstd 0.13.1", + "zstd-safe 7.1.0", ] [[package]] @@ -735,9 +737,9 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a43b56df2c529fe44cb4d92bd64d0479883fb9608ff62daede4df5405381814" +checksum = "a807d90cd50a969b3d95e4e7ad1491fcae13c6e83948d8728363ecc09d66343a" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1114,6 +1116,12 @@ version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" +[[package]] +name = "bytemuck" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" + [[package]] name = "byteorder" version = "1.5.0" @@ -1554,6 +1562,15 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +[[package]] +name = "cpp_demangle" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8227005286ec39567949b33df9896bcadfa6051bccca2488129f108ca23119" +dependencies = [ + "cfg-if", +] + [[package]] name = "cpufeatures" version = "0.2.12" @@ -1879,6 +1896,15 @@ version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f578e8e2c440e7297e008bb5486a3a8a194775224bbc23729b0dbdfaeebf162e" +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid", +] + [[package]] name = "der" version = "0.6.1" @@ -2343,6 +2369,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "findshlibs" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64" +dependencies = [ + "cc", + "lazy_static", + "libc", + "winapi 0.3.9", +] + [[package]] name = "finl_unicode" version = "1.2.0" @@ -2575,7 +2613,21 @@ dependencies = [ "libc", "log", "rustversion", - "windows", + "windows 0.48.0", +] + +[[package]] +name = "generator" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186014d53bc231d0090ef8d6f03e0920c54d85a5ed22f4f2f74315ec56cf83fb" +dependencies = [ + "cc", + "cfg-if", + "libc", + "log", + "rustversion", + "windows 0.54.0", ] [[package]] @@ -3129,7 +3181,7 @@ dependencies = [ "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows-core", + "windows-core 0.52.0", ] [[package]] @@ -3204,6 +3256,24 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" +[[package]] +name = "inferno" +version = "0.11.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "321f0f839cd44a4686e9504b0a62b4d69a50b62072144c71c68f5873c167b8d9" +dependencies = [ + "ahash 0.8.11", + "indexmap 2.1.0", + "is-terminal", + "itoa", + "log", + "num-format", + "once_cell", + "quick-xml 0.26.0", + "rgb", + "str_stack", +] + [[package]] name = "inherent" version = "1.0.11" @@ -3851,7 +3921,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" dependencies = [ "cfg-if", - "generator", + "generator 0.7.5", + "pin-utils", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator 0.8.1", "pin-utils", "scoped-tls", "tracing", @@ -4116,6 +4200,17 @@ dependencies = [ "regex", ] +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", +] + [[package]] name = "no-std-net" version = "0.6.0" @@ -4182,6 +4277,16 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-format" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" +dependencies = [ + "arrayvec 0.7.4", + "itoa", +] + [[package]] name = "num-integer" version = "0.1.46" @@ -4336,18 +4441,18 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oneshot" version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f6640c6bda7731b1fdbab747981a0f896dd1fedaf9f4a53fa237a04a84431f4" +source = "git+https://github.com/fulmicoton/oneshot.git?rev=c10a3ba#c10a3ba32adc189acf68acd579ba9755075ecb4d" dependencies = [ - "loom", + "loom 0.5.6", ] [[package]] name = "oneshot" -version = "0.1.6" -source = "git+https://github.com/fulmicoton/oneshot.git?rev=c10a3ba#c10a3ba32adc189acf68acd579ba9755075ecb4d" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071d1cf3298ad8e543dca18217d198cb6a3884443d204757b9624b935ef09fa0" dependencies = [ - "loom", + "loom 0.7.2", ] [[package]] @@ -5162,6 +5267,27 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "pprof" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef5c97c51bd34c7e742402e216abdeb44d415fbe6ae41d56b114723e953711cb" +dependencies = [ + "backtrace", + "cfg-if", + "findshlibs", + "inferno", + "libc", + "log", + "nix", + "once_cell", + "parking_lot", + "smallvec", + "symbolic-demangle", + "tempfile", + "thiserror", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -5498,6 +5624,15 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +[[package]] +name = "quick-xml" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" +dependencies = [ + "memchr", +] + [[package]] name = "quick-xml" version = "0.29.0" @@ -5732,7 +5867,6 @@ dependencies = [ "once_cell", "quickwit-common", "quickwit-doc-mapper", - "quickwit-macros", "quickwit-proto", "regex", "serde", @@ -5884,7 +6018,7 @@ dependencies = [ "libz-sys", "mockall", "once_cell", - "oneshot 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "oneshot 0.1.7", "openssl", "proptest", "prost", @@ -6315,6 +6449,7 @@ dependencies = [ "once_cell", "opentelemetry", "percent-encoding", + "pprof", "prost", "prost-types", "quickwit-actors", @@ -6757,6 +6892,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rgb" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8" +dependencies = [ + "bytemuck", +] + [[package]] name = "ring" version = "0.16.20" @@ -6965,9 +7109,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.10" +version = "0.21.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" dependencies = [ "log", "ring 0.17.8", @@ -7858,6 +8002,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "str_stack" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" + [[package]] name = "string_cache" version = "0.8.7" @@ -7909,6 +8059,29 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +[[package]] +name = "symbolic-common" +version = "12.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89d2aef0f60f62e38c472334148758afbd570ed78d20be622692e5ebfec3734f" +dependencies = [ + "debugid", + "memmap2", + "stable_deref_trait", + "uuid", +] + +[[package]] +name = "symbolic-demangle" +version = "12.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1719d1292eac816cdd3fdad12b22315624b7ce6a7bacb267a3a27fccfd286b48" +dependencies = [ + "cpp_demangle", + "rustc-demangle", + "symbolic-common", +] + [[package]] name = "syn" version = "1.0.109" @@ -8033,7 +8206,7 @@ dependencies = [ "measure_time", "memmap2", "once_cell", - "oneshot 0.1.6 (git+https://github.com/fulmicoton/oneshot.git?rev=c10a3ba)", + "oneshot 0.1.6", "rayon", "regex", "rust-stemmers", @@ -8342,9 +8515,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.37.0" +version = "1.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" +checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" dependencies = [ "backtrace", "bytes", @@ -8372,9 +8545,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", @@ -9395,6 +9568,16 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "windows" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49" +dependencies = [ + "windows-core 0.54.0", + "windows-targets 0.52.5", +] + [[package]] name = "windows-core" version = "0.52.0" @@ -9404,6 +9587,25 @@ dependencies = [ "windows-targets 0.52.5", ] +[[package]] +name = "windows-core" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65" +dependencies = [ + "windows-result", + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-result" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "749f0da9cc72d82e600d8d2e44cadd0b9eedb9038f71a1c58556ac1c5791813b" +dependencies = [ + "windows-targets 0.52.5", +] + [[package]] name = "windows-sys" version = "0.45.0" diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index a207e2a7521..84919ccb453 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -157,7 +157,7 @@ new_string_template = "1.5.1" nom = "7.1.3" numfmt = "1.1.1" once_cell = "1" -oneshot = "0.1.5" +oneshot = "0.1.7" openssl = { version = "0.10.60", default-features = false } openssl-probe = "0.1.5" opentelemetry = { version = "0.20", features = ["rt-tokio"] } @@ -171,6 +171,7 @@ postcard = { version = "1.0.4", features = [ ], default-features = false } predicates = "3" prettyplease = "0.2.0" +pprof = { version = "0.13", features = ["flamegraph"] } proc-macro2 = "1.0.50" prometheus = { version = "0.13", features = ["process"] } proptest = "1" @@ -230,7 +231,7 @@ thousands = "0.2.0" tikv-jemalloc-ctl = "0.5" tikv-jemallocator = "0.5" time = { version = "0.3", features = ["std", "formatting", "macros"] } -tokio = { version = "1.37", features = ["full"] } +tokio = { version = "1.38", features = ["full"] } tokio-metrics = { version = "0.3.1", features = ["rt"] } tokio-stream = { version = "0.1", features = ["sync"] } tokio-util = { version = "0.7", features = ["full"] } @@ -244,7 +245,11 @@ tower = { version = "0.4.13", features = [ "retry", "util", ] } -tower-http = { version = "0.4.0", features = ["compression-gzip", "cors"] } +tower-http = { version = "0.4.0", features = [ + "compression-zstd", + "compression-gzip", + "cors", +] } tracing = "0.1.37" tracing-opentelemetry = "0.20.0" tracing-subscriber = { version = "0.3.16", features = [ @@ -332,6 +337,9 @@ encoding_rs = "=0.8.32" [patch.crates-io] sasl2-sys = { git = "https://github.com/quickwit-oss/rust-sasl/", rev = "daca921" } +[patch."https://github.com/fulmicoton/oneshot.git"] +oneshot = "0.1.7" + ## this patched version of tracing helps better understand what happens inside futures (when are ## they polled, how long does poll take...) #tracing = { git = "https://github.com/trinity-1686a/tracing.git", rev = "6806cac3" } diff --git a/quickwit/quickwit-cli/Cargo.toml b/quickwit/quickwit-cli/Cargo.toml index d5c2b3f6acc..aeb14866f85 100644 --- a/quickwit/quickwit-cli/Cargo.toml +++ b/quickwit/quickwit-cli/Cargo.toml @@ -81,6 +81,7 @@ quickwit-storage = { workspace = true, features = ["testsuite"] } [features] jemalloc = ["dep:tikv-jemalloc-ctl", "dep:tikv-jemallocator"] ci-test = [] +pprof = ["quickwit-serve/pprof"] openssl-support = ["openssl-probe"] # Requires to enable tokio unstable via RUSTFLAGS="--cfg tokio_unstable" tokio-console = ["console-subscriber", "quickwit-common/named_tasks"] diff --git a/quickwit/quickwit-config/Cargo.toml b/quickwit/quickwit-config/Cargo.toml index f32393cd39f..b488ead891b 100644 --- a/quickwit/quickwit-config/Cargo.toml +++ b/quickwit/quickwit-config/Cargo.toml @@ -36,7 +36,6 @@ vrl = { workspace = true, optional = true } quickwit-common = { workspace = true } quickwit-doc-mapper = { workspace = true } -quickwit-macros = { workspace = true } quickwit-proto = { workspace = true } [dev-dependencies] diff --git a/quickwit/quickwit-config/src/index_config/mod.rs b/quickwit/quickwit-config/src/index_config/mod.rs index bdb99d59bef..574c68dea45 100644 --- a/quickwit/quickwit-config/src/index_config/mod.rs +++ b/quickwit/quickwit-config/src/index_config/mod.rs @@ -31,10 +31,7 @@ use chrono::Utc; use cron::Schedule; use humantime::parse_duration; use quickwit_common::uri::Uri; -use quickwit_doc_mapper::{ - DefaultDocMapper, DefaultDocMapperBuilder, DocMapper, FieldMappingEntry, Mode, ModeType, - QuickwitJsonOptions, TokenizerEntry, -}; +use quickwit_doc_mapper::{DefaultDocMapperBuilder, DocMapper, DocMapping, Mode}; use quickwit_proto::types::IndexId; use serde::{Deserialize, Serialize}; pub use serialize::load_index_config_from_user_config; @@ -44,57 +41,6 @@ use crate::index_config::serialize::VersionedIndexConfig; use crate::merge_policy_config::{MergePolicyConfig, StableLogMergePolicyConfig}; use crate::TestableForRegression; -// Note(fmassot): `DocMapping` is a struct only used for -// serialization/deserialization of `DocMapper` parameters. -// This is partly a duplicate of the `DefaultDocMapper` and -// can be viewed as a temporary hack for 0.2 release before -// refactoring. -#[quickwit_macros::serde_multikey] -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, utoipa::ToSchema)] -#[serde(deny_unknown_fields)] -pub struct DocMapping { - #[serde(default)] - #[schema(value_type = Vec)] - /// The mapping of the index schema fields. - /// - /// This defines the name, type and other information about the field(s). - /// - /// Properties are determined by the specified type, for more information - /// please see: - pub field_mappings: Vec, - #[schema(value_type = Vec)] - #[serde(default)] - pub tag_fields: BTreeSet, - #[serde(default)] - pub store_source: bool, - #[serde(default)] - pub index_field_presence: bool, - #[serde(default)] - pub timestamp_field: Option, - #[serde_multikey( - deserializer = Mode::from_parts, - serializer = Mode::into_parts, - fields = ( - #[serde(default)] - mode: ModeType, - #[serde(skip_serializing_if = "Option::is_none")] - dynamic_mapping: Option - ), - )] - pub mode: Mode, - #[serde(default)] - #[serde(skip_serializing_if = "Option::is_none")] - pub partition_key: Option, - #[schema(value_type = u32)] - #[serde(default = "DefaultDocMapper::default_max_num_partitions")] - pub max_num_partitions: NonZeroU32, - #[serde(default)] - pub tokenizers: Vec, - /// Record document length - #[serde(default)] - pub document_length: bool, -} - #[derive(Clone, Debug, Serialize, Deserialize, utoipa::ToSchema)] #[serde(deny_unknown_fields)] pub struct IndexingResources { @@ -440,24 +386,21 @@ impl TestableForRegression for IndexConfig { ) .unwrap(); let doc_mapping = DocMapping { - index_field_presence: true, + mode: Mode::default(), field_mappings: vec![ tenant_id_mapping, timestamp_mapping, log_level_mapping, message_mapping, ], - tag_fields: ["tenant_id", "log_level"] - .into_iter() - .map(|tag_field| tag_field.to_string()) - .collect::>(), - store_source: true, - mode: Mode::default(), + timestamp_field: Some("timestamp".to_string()), + tag_fields: BTreeSet::from_iter(["tenant_id".to_string(), "log_level".to_string()]), partition_key: Some("tenant_id".to_string()), max_num_partitions: NonZeroU32::new(100).unwrap(), - timestamp_field: Some("timestamp".to_string()), + index_field_presence: true, + store_document_size: false, + store_source: true, tokenizers: vec![tokenizer], - document_length: false, }; let retention_policy = Some(RetentionPolicy { retention_period: "90 days".to_string(), @@ -496,46 +439,20 @@ impl TestableForRegression for IndexConfig { fn assert_equality(&self, other: &Self) { assert_eq!(self.index_id, other.index_id); assert_eq!(self.index_uri, other.index_uri); - assert_eq!( - self.doc_mapping - .field_mappings - .iter() - .map(|field_mapping| &field_mapping.name) - .collect::>(), - other - .doc_mapping - .field_mappings - .iter() - .map(|field_mapping| &field_mapping.name) - .collect::>(), - ); - assert_eq!(self.doc_mapping.tag_fields, other.doc_mapping.tag_fields,); - assert_eq!( - self.doc_mapping.store_source, - other.doc_mapping.store_source, - ); + assert_eq!(self.doc_mapping, other.doc_mapping); assert_eq!(self.indexing_settings, other.indexing_settings); assert_eq!(self.search_settings, other.search_settings); } } -/// Builds and returns the doc mapper associated with index. +/// Builds and returns the doc mapper associated with an index. pub fn build_doc_mapper( doc_mapping: &DocMapping, search_settings: &SearchSettings, ) -> anyhow::Result> { let builder = DefaultDocMapperBuilder { - store_source: doc_mapping.store_source, - index_field_presence: doc_mapping.index_field_presence, + doc_mapping: doc_mapping.clone(), default_search_fields: search_settings.default_search_fields.clone(), - timestamp_field: doc_mapping.timestamp_field.clone(), - field_mappings: doc_mapping.field_mappings.clone(), - tag_fields: doc_mapping.tag_fields.iter().cloned().collect(), - mode: doc_mapping.mode.clone(), - partition_key: doc_mapping.partition_key.clone(), - max_num_partitions: doc_mapping.max_num_partitions, - tokenizers: doc_mapping.tokenizers.clone(), - document_length: doc_mapping.document_length, }; Ok(Arc::new(builder.try_build()?)) } @@ -571,6 +488,7 @@ pub(super) fn validate_index_config( mod tests { use cron::TimeUnitSpec; + use quickwit_doc_mapper::ModeType; use super::*; use crate::merge_policy_config::MergePolicyConfig; diff --git a/quickwit/quickwit-config/src/index_config/serialize.rs b/quickwit/quickwit-config/src/index_config/serialize.rs index a3ab40cd722..07885867962 100644 --- a/quickwit/quickwit-config/src/index_config/serialize.rs +++ b/quickwit/quickwit-config/src/index_config/serialize.rs @@ -37,9 +37,6 @@ type IndexConfigForSerialization = IndexConfigV0_8; pub(crate) enum VersionedIndexConfig { #[serde(rename = "0.8")] // Retro compatibility - #[serde(alias = "0.4")] - #[serde(alias = "0.5")] - #[serde(alias = "0.6")] #[serde(alias = "0.7")] V0_8(IndexConfigV0_8), } diff --git a/quickwit/quickwit-config/src/lib.rs b/quickwit/quickwit-config/src/lib.rs index bd58a139b6c..82ee8f7b0c5 100644 --- a/quickwit/quickwit-config/src/lib.rs +++ b/quickwit/quickwit-config/src/lib.rs @@ -48,9 +48,10 @@ pub use cluster_config::ClusterConfig; // See #2048 use index_config::serialize::{IndexConfigV0_8, VersionedIndexConfig}; pub use index_config::{ - build_doc_mapper, load_index_config_from_user_config, DocMapping, IndexConfig, - IndexingResources, IndexingSettings, RetentionPolicy, SearchSettings, + build_doc_mapper, load_index_config_from_user_config, IndexConfig, IndexingResources, + IndexingSettings, RetentionPolicy, SearchSettings, }; +pub use quickwit_doc_mapper::DocMapping; use serde::de::DeserializeOwned; use serde::Serialize; use serde_json::Value as JsonValue; @@ -225,7 +226,7 @@ impl ConfigFormat { serde_json::from_reader(StripComments::new(payload))?; let version_value = json_value.get_mut("version").context("missing version")?; if let Some(version_number) = version_value.as_u64() { - warn!(version_value=?version_value, "`version` is supposed to be a string"); + warn!(version_value=?version_value, "`version` should be a string"); *version_value = JsonValue::String(version_number.to_string()); } serde_json::from_value(json_value).context("failed to parse JSON file") @@ -237,7 +238,7 @@ impl ConfigFormat { toml::from_str(payload_str).context("failed to parse TOML file")?; let version_value = toml_value.get_mut("version").context("missing version")?; if let Some(version_number) = version_value.as_integer() { - warn!(version_value=?version_value, "`version` is supposed to be a string"); + warn!(version_value=?version_value, "`version` should be a string"); *version_value = toml::Value::String(version_number.to_string()); let reserialized = toml::to_string(version_value) .context("failed to reserialize toml config")?; diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs index 10d50159d93..208a929badc 100644 --- a/quickwit/quickwit-config/src/node_config/serialize.rs +++ b/quickwit/quickwit-config/src/node_config/serialize.rs @@ -153,9 +153,6 @@ enum VersionedNodeConfig { #[serde(rename = "0.8")] // Retro compatibility. #[serde(alias = "0.7")] - #[serde(alias = "0.6")] - #[serde(alias = "0.5")] - #[serde(alias = "0.4")] V0_8(NodeConfigBuilder), } diff --git a/quickwit/quickwit-config/src/source_config/serialize.rs b/quickwit/quickwit-config/src/source_config/serialize.rs index 68e6858f068..0877138c712 100644 --- a/quickwit/quickwit-config/src/source_config/serialize.rs +++ b/quickwit/quickwit-config/src/source_config/serialize.rs @@ -32,14 +32,11 @@ type SourceConfigForSerialization = SourceConfigV0_8; #[serde(deny_unknown_fields)] #[serde(tag = "version")] pub enum VersionedSourceConfig { - #[serde(rename = "0.7")] - // Retro compatibility. - #[serde(alias = "0.6")] - #[serde(alias = "0.5")] - #[serde(alias = "0.4")] - V0_7(SourceConfigV0_7), #[serde(rename = "0.8")] V0_8(SourceConfigV0_8), + // Retro compatibility. + #[serde(rename = "0.7")] + V0_7(SourceConfigV0_7), } impl From for SourceConfigForSerialization { diff --git a/quickwit/quickwit-control-plane/src/control_plane.rs b/quickwit/quickwit-control-plane/src/control_plane.rs index 6e577159a7e..33c9aa0dfcd 100644 --- a/quickwit/quickwit-control-plane/src/control_plane.rs +++ b/quickwit/quickwit-control-plane/src/control_plane.rs @@ -239,7 +239,7 @@ impl ControlPlane { &mut self, subrequests: &[GetOrCreateOpenShardsSubrequest], progress: &Progress, - ) -> ControlPlaneResult<()> { + ) -> MetastoreResult<()> { if !self.cluster_config.auto_create_indexes { return Ok(()); } @@ -425,7 +425,7 @@ impl Handler for ControlPlane { for (shard_id, position) in shard_positions_update.updated_shard_positions { if let Some(shard) = shard_entries.get_mut(&shard_id) { shard.publish_position_inclusive = - Some(shard.publish_position_inclusive().max(&position).clone()); + Some(shard.publish_position_inclusive().max(position.clone())); if position.is_eof() { // identify shards that have reached EOF but have not yet been removed. info!(shard_id=%shard_id, position=?position, "received eof shard via gossip"); @@ -459,9 +459,13 @@ impl Handler for ControlPlane { if self.disable_control_loop { return Ok(()); } - self.ingest_controller + if let Err(metastore_error) = self + .ingest_controller .rebalance_shards(&mut self.model, ctx.mailbox(), ctx.progress()) - .await; + .await + { + return convert_metastore_error::<()>(metastore_error).map(|_| ()); + } self.indexing_scheduler.control_running_plan(&self.model); ctx.schedule_self_msg(CONTROL_PLAN_LOOP_INTERVAL, ControlPlanLoop); Ok(()) @@ -482,23 +486,7 @@ fn convert_metastore_error( ) -> Result, ActorExitStatus> { // If true, we know that the transactions has not been recorded in the Metastore. // If false, we simply are not sure whether the transaction has been recorded or not. - let is_transaction_certainly_aborted = match &metastore_error { - MetastoreError::AlreadyExists(_) - | MetastoreError::FailedPrecondition { .. } - | MetastoreError::Forbidden { .. } - | MetastoreError::InvalidArgument { .. } - | MetastoreError::JsonDeserializeError { .. } - | MetastoreError::JsonSerializeError { .. } - | MetastoreError::NotFound(_) - | MetastoreError::TooManyRequests => true, - - MetastoreError::Connection { .. } - | MetastoreError::Db { .. } - | MetastoreError::Internal { .. } - | MetastoreError::Io { .. } - | MetastoreError::Timeout { .. } - | MetastoreError::Unavailable(_) => false, - }; + let is_transaction_certainly_aborted = metastore_error.is_transaction_certainly_aborted(); if is_transaction_certainly_aborted { // If the metastore transaction is certain to have been aborted, // this is actually a good thing. @@ -777,27 +765,23 @@ impl Handler for ControlPlane { request: GetOrCreateOpenShardsRequest, ctx: &ActorContext, ) -> Result { - if let Err(control_plane_error) = self + if let Err(metastore_error) = self .auto_create_indexes(&request.subrequests, ctx.progress()) .await { - return Ok(Err(control_plane_error)); + return convert_metastore_error(metastore_error); } - let response = match self + match self .ingest_controller .get_or_create_open_shards(request, &mut self.model, ctx.progress()) .await { - Ok(response) => response, - Err(ControlPlaneError::Metastore(metastore_error)) => { - return convert_metastore_error(metastore_error); - } - Err(control_plane_error) => { - return Ok(Err(control_plane_error)); + Ok(resp) => { + let _rebuild_plan_waiter = self.rebuild_plan_debounced(ctx); + Ok(Ok(resp)) } - }; - let _rebuild_plan_waiter = self.rebuild_plan_debounced(ctx); - Ok(Ok(response)) + Err(metastore_error) => convert_metastore_error(metastore_error), + } } } @@ -827,9 +811,13 @@ impl Handler for ControlPlane { local_shards_update: LocalShardsUpdate, ctx: &ActorContext, ) -> Result { - self.ingest_controller + if let Err(metastore_error) = self + .ingest_controller .handle_local_shards_update(local_shards_update, &mut self.model, ctx.progress()) - .await; + .await + { + return convert_metastore_error(metastore_error); + } let _rebuild_plan_waiter = self.rebuild_plan_debounced(ctx); Ok(Ok(())) } @@ -921,9 +909,13 @@ impl Handler for ControlPlane { message.0.node_id() ); // TODO: Update shard table. - self.ingest_controller + if let Err(metastore_error) = self + .ingest_controller .rebalance_shards(&mut self.model, ctx.mailbox(), ctx.progress()) - .await; + .await + { + return convert_metastore_error::<()>(metastore_error).map(|_| ()); + } self.indexing_scheduler.rebuild_plan(&self.model); Ok(()) } @@ -947,9 +939,13 @@ impl Handler for ControlPlane { message.0.node_id() ); // TODO: Update shard table. - self.ingest_controller + if let Err(metastore_error) = self + .ingest_controller .rebalance_shards(&mut self.model, ctx.mailbox(), ctx.progress()) - .await; + .await + { + return convert_metastore_error::<()>(metastore_error).map(|_| ()); + } self.indexing_scheduler.rebuild_plan(&self.model); Ok(()) } @@ -2118,7 +2114,7 @@ mod tests { assert_eq!(source_configs[0].source_id, INGEST_V2_SOURCE_ID); assert_eq!(source_configs[1].source_id, CLI_SOURCE_ID); - let index_uid = IndexUid::from_parts("test-index-foo", 0); + let index_uid = IndexUid::for_test("test-index-foo", 0); let mut index_metadata = IndexMetadata::new_with_index_uid(index_uid, index_config); for source_config in source_configs { diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs index 80dc39886ea..3fca19ab936 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs @@ -806,11 +806,11 @@ mod tests { #[test] fn test_build_physical_indexing_plan_simple() { let source_1 = SourceUid { - index_uid: IndexUid::from_parts("index-1", 0), + index_uid: IndexUid::for_test("index-1", 0), source_id: "source1".to_string(), }; let source_2 = SourceUid { - index_uid: IndexUid::from_parts("index-2", 0), + index_uid: IndexUid::for_test("index-2", 0), source_id: "source2".to_string(), }; let sources = vec![ @@ -887,7 +887,7 @@ mod tests { prop_compose! { fn gen_kafka_source() (index_idx in 0usize..100usize, num_pipelines in 1usize..51usize) -> (IndexUid, SourceConfig) { - let index_uid = IndexUid::from_parts(&format!("index-id-{index_idx}"), 0 /* this is the index uid */); + let index_uid = IndexUid::for_test(&format!("index-id-{index_idx}"), 0 /* this is the index uid */); let source_id = quickwit_common::rand::append_random_suffix("kafka-source"); (index_uid, SourceConfig { source_id, diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs index 7dc9419339f..5600ce6f6dc 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs @@ -727,7 +727,7 @@ mod tests { fn source_id() -> SourceUid { static COUNTER: AtomicUsize = AtomicUsize::new(0); - let index = IndexUid::from_parts("test_index", 0); + let index = IndexUid::for_test("test_index", 0); let source_id = COUNTER.fetch_add(1, Ordering::SeqCst); SourceUid { index_uid: index, diff --git a/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs b/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs index 2566721e438..4c7fe5522bb 100644 --- a/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs +++ b/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs @@ -18,11 +18,10 @@ // along with this program. If not, see . use std::collections::{BTreeSet, HashMap}; +use std::fmt; use std::future::Future; -use std::iter::zip; use std::sync::Arc; use std::time::Duration; -use std::{cmp, fmt}; use fnv::FnvHashSet; use futures::stream::FuturesUnordered; @@ -33,18 +32,22 @@ use quickwit_common::pretty::PrettySample; use quickwit_common::Progress; use quickwit_ingest::{IngesterPool, LeaderId, LocalShardsUpdate}; use quickwit_proto::control_plane::{ - AdviseResetShardsRequest, AdviseResetShardsResponse, ControlPlaneResult, - GetOrCreateOpenShardsFailure, GetOrCreateOpenShardsFailureReason, GetOrCreateOpenShardsRequest, - GetOrCreateOpenShardsResponse, GetOrCreateOpenShardsSuccess, + AdviseResetShardsRequest, AdviseResetShardsResponse, GetOrCreateOpenShardsFailureReason, + GetOrCreateOpenShardsRequest, GetOrCreateOpenShardsResponse, GetOrCreateOpenShardsSubrequest, + GetOrCreateOpenShardsSuccess, }; use quickwit_proto::ingest::ingester::{ CloseShardsRequest, CloseShardsResponse, IngesterService, InitShardFailure, InitShardSubrequest, InitShardsRequest, InitShardsResponse, RetainShardsForSource, RetainShardsRequest, }; -use quickwit_proto::ingest::{Shard, ShardIdPosition, ShardIdPositions, ShardIds, ShardPKey}; -use quickwit_proto::metastore; -use quickwit_proto::metastore::{MetastoreService, MetastoreServiceClient}; +use quickwit_proto::ingest::{ + Shard, ShardIdPosition, ShardIdPositions, ShardIds, ShardPKey, ShardState, +}; +use quickwit_proto::metastore::{ + MetastoreResult, MetastoreService, MetastoreServiceClient, OpenShardSubrequest, + OpenShardsRequest, OpenShardsResponse, +}; use quickwit_proto::types::{IndexUid, NodeId, Position, ShardId, SourceUid}; use serde::{Deserialize, Serialize}; use tokio::sync::{Mutex, OwnedMutexGuard}; @@ -121,6 +124,67 @@ impl fmt::Debug for IngestController { } } +/// Updates both the metastore and the control plane. +/// If successful, the control plane is guaranteed to be in sync with the metastore. +/// If an error is returned, the control plane might be out of sync with the metastore. +/// It is up to the client to check the error type and see if the control plane actor should be +/// restarted. +async fn open_shards_on_metastore_and_model( + open_shards_subrequests: Vec, + metastore: &mut MetastoreServiceClient, + model: &mut ControlPlaneModel, +) -> MetastoreResult { + if open_shards_subrequests.is_empty() { + return Ok(OpenShardsResponse { + subresponses: Vec::new(), + }); + } + let open_shards_request = OpenShardsRequest { + subrequests: open_shards_subrequests, + }; + let open_shards_response = metastore.open_shards(open_shards_request).await?; + for open_shard_subresponse in &open_shards_response.subresponses { + if let Some(shard) = &open_shard_subresponse.open_shard { + let shard = shard.clone(); + let index_uid = shard.index_uid().clone(); + let source_id = shard.source_id.clone(); + model.insert_shards(&index_uid, &source_id, vec![shard]); + } + } + Ok(open_shards_response) +} + +fn get_open_shard_from_model( + get_open_shards_subrequest: &GetOrCreateOpenShardsSubrequest, + model: &ControlPlaneModel, + unavailable_leaders: &FnvHashSet, +) -> Result, GetOrCreateOpenShardsFailureReason> { + let Some(index_uid) = model.index_uid(&get_open_shards_subrequest.index_id) else { + return Err(GetOrCreateOpenShardsFailureReason::IndexNotFound); + }; + let Some(open_shard_entries) = model.find_open_shards( + &index_uid, + &get_open_shards_subrequest.source_id, + unavailable_leaders, + ) else { + return Err(GetOrCreateOpenShardsFailureReason::SourceNotFound); + }; + if open_shard_entries.is_empty() { + return Ok(None); + } + // We already have open shards. Let's return them. + let open_shards: Vec = open_shard_entries + .into_iter() + .map(|shard_entry| shard_entry.shard) + .collect(); + Ok(Some(GetOrCreateOpenShardsSuccess { + subrequest_id: get_open_shards_subrequest.subrequest_id, + index_uid: index_uid.into(), + source_id: get_open_shards_subrequest.source_id.clone(), + open_shards, + })) +} + impl IngestController { pub fn new( metastore: MetastoreServiceClient, @@ -224,14 +288,14 @@ impl IngestController { local_shards_update: LocalShardsUpdate, model: &mut ControlPlaneModel, progress: &Progress, - ) { + ) -> MetastoreResult<()> { let shard_stats = model.update_shards( &local_shards_update.source_uid, &local_shards_update.shard_infos, ); if shard_stats.avg_ingestion_rate >= SCALE_UP_SHARDS_THRESHOLD_MIB_PER_SEC { self.try_scale_up_shards(local_shards_update.source_uid, shard_stats, model, progress) - .await; + .await?; } else if shard_stats.avg_ingestion_rate <= SCALE_DOWN_SHARDS_THRESHOLD_MIB_PER_SEC && shard_stats.num_open_shards > 1 { @@ -241,8 +305,9 @@ impl IngestController { model, progress, ) - .await; + .await?; } + Ok(()) } /// Finds the open shards that satisfies the [`GetOrCreateOpenShardsRequest`] request sent by an @@ -254,132 +319,95 @@ impl IngestController { get_open_shards_request: GetOrCreateOpenShardsRequest, model: &mut ControlPlaneModel, progress: &Progress, - ) -> ControlPlaneResult { + ) -> MetastoreResult { + // Closing shards is an operation performed by ingesters, + // so the control plane is not necessarily aware that they are closed. + // + // Routers can report closed shards so that we can update our + // internal state. self.handle_closed_shards(get_open_shards_request.closed_shards, model); + let num_subrequests = get_open_shards_request.subrequests.len(); + let mut get_or_create_open_shards_successes = Vec::with_capacity(num_subrequests); + let mut get_or_create_open_shards_failures = Vec::new(); + + let mut num_missing_shards_per_source_uids = HashMap::new(); + let unavailable_leaders: FnvHashSet = get_open_shards_request .unavailable_leaders .into_iter() - .map(|ingester_id| ingester_id.into()) + .map(Into::into) .collect(); - let num_subrequests = get_open_shards_request.subrequests.len(); - let mut get_or_create_open_shards_successes = Vec::with_capacity(num_subrequests); - let mut get_or_create_open_shards_failures = Vec::new(); - let mut open_shards_subrequests = Vec::new(); - - for get_open_shards_subrequest in get_open_shards_request.subrequests { - let Some(index_uid) = model.index_uid(&get_open_shards_subrequest.index_id) else { - let get_or_create_open_shards_failure = GetOrCreateOpenShardsFailure { - subrequest_id: get_open_shards_subrequest.subrequest_id, - index_id: get_open_shards_subrequest.index_id, - source_id: get_open_shards_subrequest.source_id, - reason: GetOrCreateOpenShardsFailureReason::IndexNotFound as i32, + // We do a first pass to identify the shards that are missing from the model and need to be + // created. + for get_open_shards_subrequest in &get_open_shards_request.subrequests { + if let Ok(None) = + get_open_shard_from_model(get_open_shards_subrequest, model, &unavailable_leaders) + { + // We did not find any open shard in the model, we will have to create one. + // Let's keep track of all of the source that require new shards, so we can batch + // create them after this loop. + let index_uid = model + .index_uid(&get_open_shards_subrequest.index_id) + .unwrap(); + let source_uid = SourceUid { + index_uid, + source_id: get_open_shards_subrequest.source_id.clone(), }; - get_or_create_open_shards_failures.push(get_or_create_open_shards_failure); - continue; - }; - let Some(open_shard_entries) = model.find_open_shards( - &index_uid, - &get_open_shards_subrequest.source_id, + *num_missing_shards_per_source_uids + .entry(source_uid) + .or_default() += 1; + } + } + + if let Err(metastore_error) = self + .try_open_shards( + num_missing_shards_per_source_uids, + model, &unavailable_leaders, - ) else { - let get_or_create_open_shards_failure = GetOrCreateOpenShardsFailure { - subrequest_id: get_open_shards_subrequest.subrequest_id, - index_id: get_open_shards_subrequest.index_id, - source_id: get_open_shards_subrequest.source_id, - reason: GetOrCreateOpenShardsFailureReason::SourceNotFound as i32, - }; - get_or_create_open_shards_failures.push(get_or_create_open_shards_failure); - continue; - }; - if !open_shard_entries.is_empty() { - let open_shards: Vec = open_shard_entries - .into_iter() - .map(|shard_entry| shard_entry.shard) - .collect(); - let get_or_create_open_shards_success = GetOrCreateOpenShardsSuccess { - subrequest_id: get_open_shards_subrequest.subrequest_id, - index_uid: index_uid.into(), - source_id: get_open_shards_subrequest.source_id, - open_shards, - }; - get_or_create_open_shards_successes.push(get_or_create_open_shards_success); + progress, + ) + .await + { + // We experienced a metastore error. If this is not certain abort, we need + // to restart the control plane, to make sure the control plane is not out-of-sync. + // + if !metastore_error.is_transaction_certainly_aborted() { + return Err(metastore_error); } else { - let shard_id = ShardId::from(Ulid::new()); - let open_shard_subrequest = metastore::OpenShardSubrequest { - subrequest_id: get_open_shards_subrequest.subrequest_id, - index_uid: index_uid.into(), - source_id: get_open_shards_subrequest.source_id, - shard_id: Some(shard_id), - // These attributes will be overwritten in the next stage. - leader_id: "".to_string(), - follower_id: None, - }; - open_shards_subrequests.push(open_shard_subrequest); + // If not, let's just log something. + // This is not critical. We will just end up return some failure in the response. + error!(error=?metastore_error, "failed to open shards on the metastore"); } } - if !open_shards_subrequests.is_empty() { - if let Some(leader_follower_pairs) = - self.allocate_shards(open_shards_subrequests.len(), &unavailable_leaders, model) - { - for (open_shards_subrequest, (leader_id, follower_opt)) in open_shards_subrequests - .iter_mut() - .zip(leader_follower_pairs) - { - open_shards_subrequest.leader_id = leader_id.into(); - open_shards_subrequest.follower_id = follower_opt.map(Into::into); + + for get_open_shards_subrequest in get_open_shards_request.subrequests { + match get_open_shard_from_model( + &get_open_shards_subrequest, + model, + &unavailable_leaders, + ) { + Ok(Some(success)) => { + get_or_create_open_shards_successes.push(success); } - let open_shards_request = metastore::OpenShardsRequest { - subrequests: open_shards_subrequests, - }; - let open_shards_response = progress - .protect_future(self.metastore.open_shards(open_shards_request)) - .await?; - - let init_shards_response = self - .init_shards(&open_shards_response.subresponses, progress) - .await; - - for init_shard_success in init_shards_response.successes { - let shard = init_shard_success.shard().clone(); - let index_uid = shard.index_uid().clone(); - let source_id = shard.source_id.clone(); - model.insert_shards(&index_uid, &source_id, vec![shard]); - - if let Some(open_shard_entries) = - model.find_open_shards(&index_uid, &source_id, &unavailable_leaders) - { - let open_shards = open_shard_entries - .into_iter() - .map(|shard_entry| shard_entry.shard) - .collect(); - let get_or_create_open_shards_success = GetOrCreateOpenShardsSuccess { - subrequest_id: init_shard_success.subrequest_id, - index_uid: Some(index_uid), - source_id, - open_shards, - }; - get_or_create_open_shards_successes.push(get_or_create_open_shards_success); - } + Ok(None) => { + get_or_create_open_shards_failures.push( + GetOrCreateOpenShardsFailureReason::NoIngestersAvailable + .create_failure(get_open_shards_subrequest), + ); } - } else { - for open_shards_subrequest in open_shards_subrequests { - let get_or_create_open_shards_failure = GetOrCreateOpenShardsFailure { - subrequest_id: open_shards_subrequest.subrequest_id, - index_id: open_shards_subrequest.index_uid().index_id.clone(), - source_id: open_shards_subrequest.source_id, - reason: GetOrCreateOpenShardsFailureReason::NoIngestersAvailable as i32, - }; - get_or_create_open_shards_failures.push(get_or_create_open_shards_failure); + Err(failure_reason) => { + get_or_create_open_shards_failures + .push(failure_reason.create_failure(get_open_shards_subrequest)); } } } - let response = GetOrCreateOpenShardsResponse { + + Ok(GetOrCreateOpenShardsResponse { successes: get_or_create_open_shards_successes, failures: get_or_create_open_shards_failures, - }; - Ok(response) + }) } /// Allocates and assigns new shards to ingesters. @@ -475,25 +503,21 @@ impl IngestController { /// Calls init shards on the leaders hosting newly opened shards. async fn init_shards( &self, - open_shards_subresponses: &[metastore::OpenShardSubresponse], + init_shard_subrequests: Vec, progress: &Progress, ) -> InitShardsResponse { - let mut successes = Vec::with_capacity(open_shards_subresponses.len()); + let mut successes = Vec::with_capacity(init_shard_subrequests.len()); let mut failures = Vec::new(); - let mut per_leader_shards_to_init: HashMap<&String, Vec> = + let mut per_leader_shards_to_init: HashMap> = HashMap::default(); - for subresponse in open_shards_subresponses { - let shard = subresponse.open_shard(); - let init_shards_subrequest = InitShardSubrequest { - subrequest_id: subresponse.subrequest_id, - shard: Some(shard.clone()), - }; + for init_shard_subrequest in init_shard_subrequests { + let leader_id = init_shard_subrequest.shard().leader_id.clone(); per_leader_shards_to_init - .entry(&shard.leader_id) + .entry(leader_id) .or_default() - .push(init_shards_subrequest); + .push(init_shard_subrequest); } let mut init_shards_futures = FuturesUnordered::new(); @@ -511,7 +535,7 @@ impl IngestController { } }) .collect(); - let Some(mut leader) = self.ingester_pool.get(leader_id) else { + let Some(mut leader) = self.ingester_pool.get(&leader_id) else { warn!("failed to init shards: ingester `{leader_id}` is unavailable"); failures.extend(init_shard_failures); continue; @@ -560,71 +584,157 @@ impl IngestController { shard_stats: ShardStats, model: &mut ControlPlaneModel, progress: &Progress, - ) { + ) -> MetastoreResult<()> { const NUM_PERMITS: u64 = 1; if !model .acquire_scaling_permits(&source_uid, ScalingMode::Up, NUM_PERMITS) .unwrap_or(false) { - return; + return Ok(()); } - let new_num_open_shards = shard_stats.num_open_shards + 1; - info!( - index_id=%source_uid.index_uid.index_id, - source_id=%source_uid.source_id, - "scaling up number of shards to {new_num_open_shards}" - ); - let unavailable_leaders: FnvHashSet = FnvHashSet::default(); + let new_num_open_shards = shard_stats.num_open_shards + 1; - let Some((leader_id, follower_id)) = self - .allocate_shards(1, &unavailable_leaders, model) - .and_then(|pairs| pairs.into_iter().next()) - else { - warn!("failed to scale up number of shards: no ingesters available"); - model.release_scaling_permits(&source_uid, ScalingMode::Up, NUM_PERMITS); - return; - }; - let shard_id = ShardId::from(Ulid::new()); - let open_shard_subrequest = metastore::OpenShardSubrequest { - subrequest_id: 0, - index_uid: source_uid.index_uid.clone().into(), - source_id: source_uid.source_id.clone(), - shard_id: Some(shard_id), - leader_id: leader_id.into(), - follower_id: follower_id.map(Into::into), - }; - let open_shards_request = metastore::OpenShardsRequest { - subrequests: vec![open_shard_subrequest], - }; - let open_shards_response = match progress - .protect_future(self.metastore.open_shards(open_shards_request)) - .await - { - Ok(open_shards_response) => open_shards_response, - Err(error) => { - warn!("failed to scale up number of shards: {error}"); + let new_shard_source_uids: HashMap = + std::iter::once((source_uid.clone(), 1)).collect(); + let successful_source_uids_res = self + .try_open_shards(new_shard_source_uids, model, &Default::default(), progress) + .await; + match successful_source_uids_res { + Ok(successful_source_uids) => { + assert!(successful_source_uids.len() <= 1); + if successful_source_uids.is_empty() { + // We did not manage to create the shard. + // We can release our permit. + model.release_scaling_permits(&source_uid, ScalingMode::Up, NUM_PERMITS); + warn!( + index_uid=%source_uid.index_uid, + source_id=%source_uid.source_id, + "scaling up number of shards to {new_num_open_shards} failed: shard initialization failure" + ); + } else { + info!( + index_id=%source_uid.index_uid.index_id, + source_id=%source_uid.source_id, + "successfully scaled up number of shards to {new_num_open_shards}" + ); + } + Ok(()) + } + Err(metastore_error) => { + // We did not manage to create the shard. + // We can release our permit, but we also need to return the error to the caller, in + // order to restart the control plane actor if necessary. + warn!( + index_id=%source_uid.index_uid.index_id, + source_id=%source_uid.source_id, + "scaling up number of shards to {new_num_open_shards} failed: {metastore_error:?}" + ); model.release_scaling_permits(&source_uid, ScalingMode::Up, NUM_PERMITS); - return; + Err(metastore_error) } + } + } + + /// Attempts to open shards for different sources + /// `source_uids` may contain the same source multiple times. + /// + /// This function returns the list of sources for which `try_open_shards` was successful. + /// + /// As long as no metastore error is returned this function leaves the control plane model + /// in sync with the metastore. + /// + /// Also, this function only updates the control plane model and the metastore after + /// having successfully initialized a shard (and possibly its replica) on the ingester. + /// + /// This function can be partially successful: if init_shards was unsuccessful for some shard, + /// then the successfully initialized shard will still be record in the metastore/control + /// plane model. + /// + /// The number of successfully open shards is returned. + async fn try_open_shards( + &mut self, + source_uids: HashMap, + model: &mut ControlPlaneModel, + unavailable_leaders: &FnvHashSet, + progress: &Progress, + ) -> MetastoreResult> { + let num_shards = source_uids.values().sum::(); + + if num_shards == 0 { + return Ok(HashMap::default()); + } + + // TODO unavailable leaders + let Some(leader_follower_pairs) = + self.allocate_shards(num_shards, unavailable_leaders, model) + else { + return Ok(HashMap::default()); }; - let init_shards_response = self - .init_shards(&open_shards_response.subresponses, progress) - .await; - if init_shards_response.successes.is_empty() { - warn!("failed to scale up number of shards"); - model.release_scaling_permits(&source_uid, ScalingMode::Up, NUM_PERMITS); - return; + let source_uids_with_multiplicity = source_uids + .iter() + .flat_map(|(source_uid, count)| std::iter::repeat(source_uid).take(*count)); + + let mut init_shard_subrequests: Vec = Vec::new(); + for (subrequest_id, (source_uid, (leader_id, follower_id_opt))) in + source_uids_with_multiplicity + .zip(leader_follower_pairs) + .enumerate() + { + let shard = Shard { + index_uid: Some(source_uid.index_uid.clone()), + source_id: source_uid.source_id.clone(), + shard_id: Some(ShardId::from(Ulid::new())), + leader_id: leader_id.to_string(), + follower_id: follower_id_opt.as_ref().map(ToString::to_string), + shard_state: ShardState::Open as i32, + publish_position_inclusive: Some(quickwit_proto::types::Position::default()), + publish_token: None, + }; + let init_shard_subrequest = InitShardSubrequest { + subrequest_id: subrequest_id as u32, + shard: Some(shard), + }; + init_shard_subrequests.push(init_shard_subrequest); } - for init_shard_success in init_shards_response.successes { - let open_shard = init_shard_success.shard().clone(); - let index_uid = open_shard.index_uid().clone(); - let source_id = open_shard.source_id.clone(); - let open_shards = vec![open_shard]; - model.insert_shards(&index_uid, &source_id, open_shards); + + // Let's first attempt to initialize these shards. + let init_shards_response = self.init_shards(init_shard_subrequests, progress).await; + + let open_shards_subrequests = init_shards_response + .successes + .into_iter() + .enumerate() + .map(|(subrequest_id, init_shard_success)| { + let shard = init_shard_success.shard(); + OpenShardSubrequest { + subrequest_id: subrequest_id as u32, + index_uid: shard.index_uid.clone(), + source_id: shard.source_id.clone(), + shard_id: shard.shard_id.clone(), + leader_id: shard.leader_id.clone(), + follower_id: shard.follower_id.clone(), + } + }) + .collect(); + + let OpenShardsResponse { subresponses } = progress + .protect_future(open_shards_on_metastore_and_model( + open_shards_subrequests, + &mut self.metastore, + model, + )) + .await?; + + let mut open_shards_count = HashMap::default(); + for open_shard_subresponse in subresponses { + let source_uid = open_shard_subresponse.open_shard().source_uid(); + *open_shards_count.entry(source_uid).or_default() += 1; } + + Ok(open_shards_count) } /// Attempts to decrease the number of shards. This operation is rate limited to avoid closing @@ -635,14 +745,14 @@ impl IngestController { shard_stats: ShardStats, model: &mut ControlPlaneModel, progress: &Progress, - ) { + ) -> MetastoreResult<()> { const NUM_PERMITS: u64 = 1; if !model .acquire_scaling_permits(&source_uid, ScalingMode::Down, NUM_PERMITS) .unwrap_or(false) { - return; + return Ok(()); } let new_num_open_shards = shard_stats.num_open_shards - 1; @@ -653,11 +763,11 @@ impl IngestController { ); let Some((leader_id, shard_id)) = find_scale_down_candidate(&source_uid, model) else { model.release_scaling_permits(&source_uid, ScalingMode::Down, NUM_PERMITS); - return; + return Ok(()); }; let Some(mut ingester) = self.ingester_pool.get(&leader_id) else { model.release_scaling_permits(&source_uid, ScalingMode::Down, NUM_PERMITS); - return; + return Ok(()); }; let shard_pkeys = vec![ShardPKey { index_uid: source_uid.index_uid.clone().into(), @@ -671,9 +781,10 @@ impl IngestController { { warn!("failed to scale down number of shards: {error}"); model.release_scaling_permits(&source_uid, ScalingMode::Down, NUM_PERMITS); - return; + return Ok(()); } model.close_shards(&source_uid, &[shard_id]); + Ok(()) } pub(crate) fn advise_reset_shards( @@ -705,8 +816,7 @@ impl IngestController { for shard_id in shard_ids.shard_ids { if let Some(shard_entry) = shard_entries.get(&shard_id) { - let publish_position_inclusive = - shard_entry.publish_position_inclusive().clone(); + let publish_position_inclusive = shard_entry.publish_position_inclusive(); shard_positions_to_truncate.push(ShardIdPosition { shard_id: Some(shard_id), @@ -731,9 +841,8 @@ impl IngestController { }); } } - if enabled!(Level::DEBUG) { - let shards_to_truncate: Vec<(&str, &Position)> = shards_to_truncate + let shards_to_truncate: Vec<(&str, Position)> = shards_to_truncate .iter() .flat_map(|shard_positions| { shard_positions @@ -756,127 +865,130 @@ impl IngestController { } } - /// Moves shards from ingesters with too many shards to ingesters with too few shards. Moving a - /// shard consists of closing the shard on the source ingester and opening a new one on the - /// target ingester. + /// This method just "computes"" the number of shards to move for rebalance. + /// It does not run any side effect except logging. /// - /// This method is guarded by a lock to ensure that only one rebalance operation is performed at - /// a time. - pub(crate) async fn rebalance_shards( - &mut self, - model: &mut ControlPlaneModel, - mailbox: &Mailbox, - progress: &Progress, - ) -> Option> { - let Ok(rebalance_guard) = self.rebalance_lock.clone().try_lock_owned() else { - return None; - }; - self.stats.num_rebalance_shards_ops += 1; - + /// TODO we consider the number of alive ingesters for this computation, + /// but deal with entire number of shards here. + /// This could cause problems when dealing with a lot of unavailable ingesters. + /// + /// On the other hand it biases thing the "right way": + /// If we are missing some ingesters, their shards should still be in the model, but they should + /// be missing from the ingester pool. + /// + /// As a result `num_open_shards_per_leader_threshold` should be inflated. + /// + /// TODO this implementation does not consider replica. + fn rebalance_compute_shards_to_move(&self, model: &ControlPlaneModel) -> Vec { let num_ingesters = self.ingester_pool.len(); let mut num_open_shards: usize = 0; if num_ingesters == 0 { - return None; + debug!("no ingester available"); + return Vec::new(); } - let mut per_leader_open_shards: HashMap<&str, Vec<&ShardEntry>> = - HashMap::with_capacity(num_ingesters); + + let mut per_leader_open_shards: HashMap<&str, Vec<&ShardEntry>> = HashMap::default(); for shard in model.all_shards() { if shard.is_open() { num_open_shards += 1; - per_leader_open_shards .entry(&shard.leader_id) .or_default() .push(shard); } } + let num_open_shards_per_leader_target = num_open_shards / num_ingesters; - let num_open_shards_per_leader_threshold = cmp::max( - num_open_shards_per_leader_target * 11 / 10, - num_open_shards_per_leader_target + 1, - ); - let mut shards_to_move: Vec<&ShardEntry> = Vec::new(); + + // We tolerate an ingester with 10% more shards than the average. + + // Let's first identify the list of shards we want to "move". + let num_open_shards_per_leader_threshold = + (num_open_shards_per_leader_target * 11).div_ceil(10); + let mut shards_to_move: Vec = Vec::new(); for open_shards in per_leader_open_shards.values() { - if open_shards.len() > num_open_shards_per_leader_threshold { - shards_to_move.extend(&open_shards[num_open_shards_per_leader_threshold..]); + if open_shards.len() <= num_open_shards_per_leader_threshold { + continue; } + shards_to_move.extend( + open_shards[num_open_shards_per_leader_threshold..] + .iter() + .map(|shard_entry| shard_entry.shard.clone()), + ); } + + shards_to_move + } + + /// Moves shards from ingesters with too many shards to ingesters with too few shards. Moving a + /// shard consists of closing the shard on the source ingester and opening a new one on the + /// target ingester. + /// + /// This method is guarded by a lock to ensure that only one rebalance operation is performed at + /// a time. + pub(crate) async fn rebalance_shards( + &mut self, + model: &mut ControlPlaneModel, + mailbox: &Mailbox, + progress: &Progress, + ) -> MetastoreResult>> { + let Ok(rebalance_guard) = self.rebalance_lock.clone().try_lock_owned() else { + return Ok(None); + }; + self.stats.num_rebalance_shards_ops += 1; + + let shards_to_move: Vec = self.rebalance_compute_shards_to_move(model); if shards_to_move.is_empty() { - return None; + return Ok(None); } - info!("rebalancing {} shards", shards_to_move.len()); + let num_shards_to_move = shards_to_move.len(); - let unavailable_leaders: FnvHashSet = FnvHashSet::default(); + info!("rebalancing {} shards", num_shards_to_move); - let leader_follower_pairs = - self.allocate_shards(num_shards_to_move, &unavailable_leaders, model)?; - let mut open_shards_subrequests = Vec::with_capacity(num_shards_to_move); - let mut shards_to_close: HashMap = - HashMap::with_capacity(num_shards_to_move); + let mut new_shards_source_uids: HashMap = HashMap::default(); + for shard in &shards_to_move { + *new_shards_source_uids + .entry(shard.source_uid()) + .or_default() += 1; + } - for (subrequest_id, (shard_to_move, (leader_id, follower_id_opt))) in - zip(&shards_to_move, leader_follower_pairs).enumerate() - { - let shard_id = ShardId::from(Ulid::new()); - let open_shard_subrequest = metastore::OpenShardSubrequest { - subrequest_id: subrequest_id as u32, - index_uid: shard_to_move.index_uid.clone(), - source_id: shard_to_move.source_id.clone(), - shard_id: Some(shard_id.clone()), - leader_id: leader_id.into(), - follower_id: follower_id_opt.map(Into::into), - }; - open_shards_subrequests.push(open_shard_subrequest); + let mut successfully_source_uids: HashMap = self + .try_open_shards(new_shards_source_uids, model, &Default::default(), progress) + .await?; - let leader_id = NodeId::from(shard_to_move.leader_id.clone()); - let shard_pkey = ShardPKey { - index_uid: shard_to_move.index_uid.clone(), - source_id: shard_to_move.source_id.clone(), - shard_id: shard_to_move.shard_id.clone(), - }; - shards_to_close.insert(shard_id, (leader_id, shard_pkey)); + for source_uid in successfully_source_uids.keys() { + // We temporarily disable the ability the scale down the number of shards for + // the source to avoid closing the shards we just opened. + model.drain_scaling_permits(source_uid, ScalingMode::Down); } - let open_shards_request = metastore::OpenShardsRequest { - subrequests: open_shards_subrequests, - }; - let open_shards_response = match progress - .protect_future(self.metastore.open_shards(open_shards_request)) - .await - { - Ok(open_shards_response) => open_shards_response, - Err(error) => { - error!(%error, "failed to rebalance shards"); - return None; - } - }; - let init_shards_response = self - .init_shards(&open_shards_response.subresponses, progress) - .await; - - for init_shard_success in init_shards_response.successes { - let shard = init_shard_success.shard().clone(); - let index_uid = shard.index_uid().clone(); - let source_id = shard.source_id.clone(); - model.insert_shards(&index_uid, &source_id, vec![shard]); - let source_uid = SourceUid { - index_uid, - source_id, + // Let's close one of the shard to move for every successfully newly opened shards. + let mut shards_to_close = Vec::new(); + for shard in shards_to_move { + let source_uid = shard.source_uid(); + let Some(count) = successfully_source_uids.get_mut(&source_uid) else { + continue; }; - // We temporarily disable the ability the scale down the number of shards for the source - // to avoid closing the shards we just opened. - model.drain_scaling_permits(&source_uid, ScalingMode::Down); - } - for init_shard_failure in init_shards_response.failures { - let shard_id = init_shard_failure.shard_id(); - shards_to_close.remove(shard_id); + if *count == 0 { + continue; + }; + *count -= 1; + let leader_id = NodeId::from(shard.leader_id.clone()); + let shard_pkey = ShardPKey { + index_uid: shard.index_uid.clone(), + source_id: shard.source_id.clone(), + shard_id: shard.shard_id.clone(), + }; + shards_to_close.push((leader_id, shard_pkey)); } - let close_shards_fut = self.close_shards(shards_to_close.into_values()); + let mailbox_clone = mailbox.clone(); + let close_shards_fut = self.close_shards(shards_to_close); + let close_shards_and_send_callback_fut = async move { // We wait for a few seconds before closing the shards to give the ingesters some time // to learn about the ones we just opened via gossip. @@ -893,12 +1005,13 @@ impl IngestController { }; let _ = mailbox_clone.send_message(callback).await; }; - Some(tokio::spawn(close_shards_and_send_callback_fut)) + + Ok(Some(tokio::spawn(close_shards_and_send_callback_fut))) } fn close_shards( &self, - shards_to_close: impl Iterator, + shards_to_close: Vec<(LeaderId, ShardPKey)>, ) -> impl Future> + Send + 'static { let mut per_leader_shards_to_close: HashMap> = HashMap::new(); @@ -1007,7 +1120,6 @@ fn find_scale_down_candidate( mod tests { use std::collections::BTreeSet; - use std::iter::empty; use std::str::FromStr; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; @@ -1024,7 +1136,9 @@ mod tests { MockIngesterService, RetainShardsResponse, }; use quickwit_proto::ingest::{IngestV2Error, Shard, ShardState}; - use quickwit_proto::metastore::{MetastoreError, MockMetastoreService}; + use quickwit_proto::metastore::{ + self, MetastoreError, MockMetastoreService, OpenShardSubresponse, + }; use quickwit_proto::types::{Position, SourceId}; use super::*; @@ -1084,12 +1198,10 @@ mod tests { assert_eq!(request.subrequests.len(), 1); let subrequest = &request.subrequests[0]; - assert_eq!(subrequest.subrequest_id, 1); let shard = subrequest.shard(); assert_eq!(shard.index_uid(), &index_uid_1_clone); assert_eq!(shard.source_id, "test-source"); - assert_eq!(shard.shard_id(), ShardId::from(1)); assert_eq!(shard.leader_id, "test-ingester-2"); let successes = vec![InitShardSuccess { @@ -1229,6 +1341,92 @@ mod tests { assert_eq!(model.num_shards(), 3); } + #[tokio::test] + async fn test_ingest_controller_get_or_create_open_shards_metastore_failure() { + let source_id: &'static str = "test-source"; + + let index_id_0 = "test-index-0"; + let index_metadata_0 = IndexMetadata::for_test(index_id_0, "ram://indexes/test-index-0"); + let index_uid_0 = index_metadata_0.index_uid.clone(); + let index_uid_0_clone = index_uid_0.clone(); + + let progress = Progress::default(); + + let mut mock_metastore = MockMetastoreService::new(); + mock_metastore + .expect_open_shards() + .once() + .returning(move |_| { + Err(MetastoreError::Internal { + message: "this error could be mean anything. transaction success or failure!" + .to_string(), + cause: "".to_string(), + }) + }); + let metastore = MetastoreServiceClient::from_mock(mock_metastore); + + let mut mock_ingester = MockIngesterService::new(); + mock_ingester + .expect_init_shards() + .once() + .returning(move |request| { + assert_eq!(request.subrequests.len(), 1); + + let subrequest = &request.subrequests[0]; + + let shard = subrequest.shard(); + assert_eq!(shard.index_uid(), &index_uid_0); + assert_eq!(shard.source_id, "test-source"); + assert_eq!(shard.leader_id, "test-ingester-1"); + + let successes = vec![InitShardSuccess { + subrequest_id: request.subrequests[0].subrequest_id, + shard: Some(shard.clone()), + }]; + let response = InitShardsResponse { + successes, + failures: Vec::new(), + }; + Ok(response) + }); + let ingester = IngesterServiceClient::from_mock(mock_ingester); + + let ingester_pool = IngesterPool::default(); + ingester_pool.insert("test-ingester-1".into(), ingester.clone()); + + let replication_factor = 1; + let mut ingest_controller = + IngestController::new(metastore, ingester_pool, replication_factor); + + let mut model = ControlPlaneModel::default(); + model.add_index(index_metadata_0.clone()); + + let mut source_config = SourceConfig::ingest_v2(); + source_config.source_id = source_id.to_string(); + + model + .add_source(&index_uid_0_clone, source_config.clone()) + .unwrap(); + + let subrequests = vec![GetOrCreateOpenShardsSubrequest { + subrequest_id: 0, + index_id: "test-index-0".to_string(), + source_id: source_id.to_string(), + }]; + let request = GetOrCreateOpenShardsRequest { + subrequests, + closed_shards: Vec::new(), + unavailable_leaders: Vec::new(), + }; + + let metastore_error = ingest_controller + .get_or_create_open_shards(request, &mut model, &progress) + .await + .unwrap_err(); + + assert!(!metastore_error.is_transaction_certainly_aborted()); + } + #[tokio::test] async fn test_ingest_controller_get_open_shards_handles_closed_shards() { let metastore = MetastoreServiceClient::mocked(); @@ -1545,7 +1743,7 @@ mod tests { ingester_pool.insert(ingester_id_2, ingester_2); let init_shards_response = ingest_controller - .init_shards(&[], &Progress::default()) + .init_shards(Vec::new(), &Progress::default()) .await; assert_eq!(init_shards_response.successes.len(), 0); assert_eq!(init_shards_response.failures.len(), 0); @@ -1556,10 +1754,10 @@ mod tests { // - ingester 2 will time out; // - ingester 3 will be unavailable. - let open_shards_subresponses = [ - metastore::OpenShardSubresponse { + let init_shard_sub_requests: Vec = vec![ + InitShardSubrequest { subrequest_id: 0, - open_shard: Some(Shard { + shard: Some(Shard { index_uid: IndexUid::for_test("test-index", 0).into(), source_id: "test-source".to_string(), shard_id: Some(ShardId::from(0)), @@ -1568,9 +1766,9 @@ mod tests { ..Default::default() }), }, - metastore::OpenShardSubresponse { + InitShardSubrequest { subrequest_id: 1, - open_shard: Some(Shard { + shard: Some(Shard { index_uid: IndexUid::for_test("test-index", 0).into(), source_id: "test-source".to_string(), shard_id: Some(ShardId::from(1)), @@ -1579,9 +1777,9 @@ mod tests { ..Default::default() }), }, - metastore::OpenShardSubresponse { + InitShardSubrequest { subrequest_id: 2, - open_shard: Some(Shard { + shard: Some(Shard { index_uid: IndexUid::for_test("test-index", 0).into(), source_id: "test-source".to_string(), shard_id: Some(ShardId::from(2)), @@ -1590,9 +1788,9 @@ mod tests { ..Default::default() }), }, - metastore::OpenShardSubresponse { + InitShardSubrequest { subrequest_id: 3, - open_shard: Some(Shard { + shard: Some(Shard { index_uid: IndexUid::for_test("test-index", 0).into(), source_id: "test-source".to_string(), shard_id: Some(ShardId::from(3)), @@ -1601,9 +1799,9 @@ mod tests { ..Default::default() }), }, - metastore::OpenShardSubresponse { + InitShardSubrequest { subrequest_id: 4, - open_shard: Some(Shard { + shard: Some(Shard { index_uid: IndexUid::for_test("test-index", 0).into(), source_id: "test-source".to_string(), shard_id: Some(ShardId::from(4)), @@ -1614,7 +1812,7 @@ mod tests { }, ]; let init_shards_response = ingest_controller - .init_shards(&open_shards_subresponses, &Progress::default()) + .init_shards(init_shard_sub_requests, &Progress::default()) .await; assert_eq!(init_shards_response.successes.len(), 1); assert_eq!(init_shards_response.failures.len(), 4); @@ -1649,6 +1847,35 @@ mod tests { message: "failed to open shards".to_string(), }) }); + mock_metastore + .expect_open_shards() + .once() + .returning(|request| { + assert_eq!(request.subrequests.len(), 1); + let subrequest: &OpenShardSubrequest = &request.subrequests[0]; + + assert_eq!(subrequest.index_uid(), &IndexUid::for_test("test-index", 0)); + assert_eq!(subrequest.source_id, "test-source"); + assert_eq!(subrequest.leader_id, "test-ingester"); + + let shard = Shard { + index_uid: subrequest.index_uid.clone(), + source_id: subrequest.source_id.clone(), + shard_id: subrequest.shard_id.clone(), + leader_id: subrequest.leader_id.clone(), + follower_id: subrequest.follower_id.clone(), + shard_state: ShardState::Open as i32, + publish_position_inclusive: Some(Position::Beginning), + publish_token: None, + }; + let resp = OpenShardsResponse { + subresponses: vec![OpenShardSubresponse { + subrequest_id: subrequest.subrequest_id, + open_shard: Some(shard), + }], + }; + Ok(resp) + }); let metastore = MetastoreServiceClient::from_mock(mock_metastore); let ingester_pool = IngesterPool::default(); let replication_factor = 1; @@ -1691,9 +1918,11 @@ mod tests { source_uid: source_uid.clone(), shard_infos, }; + ingest_controller .handle_local_shards_update(local_shards_update, &mut model, &progress) - .await; + .await + .unwrap(); let shard_entries: Vec = model.all_shards().cloned().collect(); assert_eq!(shard_entries.len(), 1); @@ -1716,6 +1945,20 @@ mod tests { let mut mock_ingester = MockIngesterService::new(); let index_uid_clone = index_uid.clone(); + mock_ingester.expect_init_shards().returning( + move |init_shard_request: InitShardsRequest| { + assert_eq!(init_shard_request.subrequests.len(), 1); + let init_shard_subrequest: &InitShardSubrequest = + &init_shard_request.subrequests[0]; + Ok(InitShardsResponse { + successes: vec![InitShardSuccess { + subrequest_id: init_shard_subrequest.subrequest_id, + shard: init_shard_subrequest.shard.clone(), + }], + failures: Vec::new(), + }) + }, + ); mock_ingester .expect_close_shards() .returning(move |request| { @@ -1750,7 +1993,8 @@ mod tests { }; ingest_controller .handle_local_shards_update(local_shards_update, &mut model, &progress) - .await; + .await + .unwrap(); // Test update shard ingestion rate with failing scale up. let shard_infos = BTreeSet::from_iter([ @@ -1770,9 +2014,21 @@ mod tests { source_uid: source_uid.clone(), shard_infos, }; + + // The first request fails due to an error on the metastore. + let MetastoreError::InvalidArgument { .. } = ingest_controller + .handle_local_shards_update(local_shards_update.clone(), &mut model, &progress) + .await + .unwrap_err() + else { + panic!(); + }; + + // The second request works! ingest_controller .handle_local_shards_update(local_shards_update, &mut model, &progress) - .await; + .await + .unwrap(); } #[tokio::test] @@ -1849,7 +2105,8 @@ mod tests { // Test could not find leader. ingest_controller .try_scale_up_shards(source_uid.clone(), shard_stats, &mut model, &progress) - .await; + .await + .unwrap(); let mut mock_ingester = MockIngesterService::new(); @@ -1866,7 +2123,6 @@ mod tests { let shard = request.subrequests[0].shard(); assert_eq!(shard.index_uid(), &index_uid_clone); assert_eq!(shard.source_id, INGEST_V2_SOURCE_ID); - assert_eq!(shard.shard_id(), ShardId::from(1)); assert_eq!(shard.leader_id, "test-ingester"); Err(IngestV2Error::Internal("failed to init shards".to_string())) @@ -1883,7 +2139,6 @@ mod tests { let shard = subrequest.shard(); assert_eq!(shard.index_uid(), &index_uid_clone); assert_eq!(shard.source_id, INGEST_V2_SOURCE_ID); - assert_eq!(shard.shard_id(), ShardId::from(1)); assert_eq!(shard.leader_id, "test-ingester"); let successes = vec![InitShardSuccess { @@ -1902,19 +2157,22 @@ mod tests { // Test failed to open shards. ingest_controller .try_scale_up_shards(source_uid.clone(), shard_stats, &mut model, &progress) - .await; + .await + .unwrap(); assert_eq!(model.all_shards().count(), 0); // Test failed to init shards. ingest_controller .try_scale_up_shards(source_uid.clone(), shard_stats, &mut model, &progress) - .await; + .await + .unwrap_err(); assert_eq!(model.all_shards().count(), 0); // Test successfully opened shard. ingest_controller .try_scale_up_shards(source_uid.clone(), shard_stats, &mut model, &progress) - .await; + .await + .unwrap(); assert_eq!( model.all_shards().filter(|shard| shard.is_open()).count(), 1 @@ -1947,7 +2205,8 @@ mod tests { // Test could not find a scale down candidate. ingest_controller .try_scale_down_shards(source_uid.clone(), shard_stats, &mut model, &progress) - .await; + .await + .unwrap(); let shards = vec![Shard { shard_id: Some(ShardId::from(1)), @@ -1962,7 +2221,8 @@ mod tests { // Test ingester is unavailable. ingest_controller .try_scale_down_shards(source_uid.clone(), shard_stats, &mut model, &progress) - .await; + .await + .unwrap(); let mut mock_ingester = MockIngesterService::new(); @@ -2001,13 +2261,15 @@ mod tests { // Test failed to close shard. ingest_controller .try_scale_down_shards(source_uid.clone(), shard_stats, &mut model, &progress) - .await; + .await + .unwrap(); assert!(model.all_shards().all(|shard| shard.is_open())); // Test successfully closed shard. ingest_controller .try_scale_down_shards(source_uid.clone(), shard_stats, &mut model, &progress) - .await; + .await + .unwrap(); assert!(model.all_shards().all(|shard| shard.is_closed())); let shards = vec![Shard { @@ -2023,7 +2285,8 @@ mod tests { // Test rate limited. ingest_controller .try_scale_down_shards(source_uid.clone(), shard_stats, &mut model, &progress) - .await; + .await + .unwrap(); assert!(model.all_shards().any(|shard| shard.is_open())); } @@ -2289,7 +2552,7 @@ mod tests { let ingest_controller = IngestController::new(metastore, ingester_pool.clone(), replication_factor); - let closed_shards = ingest_controller.close_shards(empty()).await; + let closed_shards = ingest_controller.close_shards(Vec::new()).await; assert_eq!(closed_shards.len(), 0); let ingester_id_0 = NodeId::from("test-ingester-0"); @@ -2397,9 +2660,7 @@ mod tests { }, ), ]; - let closed_shards = ingest_controller - .close_shards(shards_to_close.into_iter()) - .await; + let closed_shards = ingest_controller.close_shards(shards_to_close).await; assert_eq!(closed_shards.len(), 1); let closed_shard = &closed_shards[0]; @@ -2414,7 +2675,7 @@ mod tests { let mut mock_metastore = MockMetastoreService::new(); mock_metastore.expect_open_shards().return_once(|request| { - assert_eq!(request.subrequests.len(), 2); + assert_eq!(request.subrequests.len(), 1); let subrequest_0 = &request.subrequests[0]; assert_eq!(subrequest_0.subrequest_id, 0); @@ -2423,37 +2684,17 @@ mod tests { assert_eq!(subrequest_0.leader_id, "test-ingester-1"); assert!(subrequest_0.follower_id.is_none()); - let subrequest_1 = &request.subrequests[1]; - assert_eq!(subrequest_1.subrequest_id, 1); - assert_eq!(subrequest_1.index_uid(), &("test-index", 0)); - assert_eq!(subrequest_1.source_id, INGEST_V2_SOURCE_ID.to_string()); - assert_eq!(subrequest_1.leader_id, "test-ingester-1"); - assert!(subrequest_1.follower_id.is_none()); - - let subresponses = vec![ - metastore::OpenShardSubresponse { - subrequest_id: 0, - open_shard: Some(Shard { - index_uid: Some(IndexUid::for_test("test-index", 0)), - source_id: INGEST_V2_SOURCE_ID.to_string(), - shard_id: subrequest_0.shard_id.clone(), - leader_id: "test-ingester-1".to_string(), - shard_state: ShardState::Open as i32, - ..Default::default() - }), - }, - metastore::OpenShardSubresponse { - subrequest_id: 1, - open_shard: Some(Shard { - index_uid: Some(IndexUid::for_test("test-index", 0)), - source_id: INGEST_V2_SOURCE_ID.to_string(), - shard_id: subrequest_1.shard_id.clone(), - leader_id: "test-ingester-1".to_string(), - shard_state: ShardState::Open as i32, - ..Default::default() - }), - }, - ]; + let subresponses = vec![metastore::OpenShardSubresponse { + subrequest_id: 0, + open_shard: Some(Shard { + index_uid: Some(IndexUid::for_test("test-index", 0)), + source_id: INGEST_V2_SOURCE_ID.to_string(), + shard_id: subrequest_0.shard_id.clone(), + leader_id: "test-ingester-1".to_string(), + shard_state: ShardState::Open as i32, + ..Default::default() + }), + }]; let response = metastore::OpenShardsResponse { subresponses }; Ok(response) }); @@ -2471,7 +2712,8 @@ mod tests { let close_shards_task_opt = ingest_controller .rebalance_shards(&mut model, &control_plane_mailbox, &progress) - .await; + .await + .unwrap(); assert!(close_shards_task_opt.is_none()); let index_metadata = IndexMetadata::for_test("test-index", "ram://indexes/test-index"); @@ -2595,6 +2837,7 @@ mod tests { let close_shards_task = ingest_controller .rebalance_shards(&mut model, &control_plane_mailbox, &progress) .await + .unwrap() .unwrap(); tokio::time::timeout(CLOSE_SHARDS_REQUEST_TIMEOUT * 2, close_shards_task) diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/date_time_type.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/date_time_type.rs index b7ce4bfc429..0c45044ebe6 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/date_time_type.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/date_time_type.rs @@ -18,7 +18,7 @@ // along with this program. If not, see . use indexmap::IndexSet; -use quickwit_datetime::{DateTimeInputFormat, DateTimeOutputFormat}; +use quickwit_datetime::{DateTimeInputFormat, DateTimeOutputFormat, TantivyDateTime}; use serde::{Deserialize, Deserializer, Serialize}; use serde_json::Value as JsonValue; use tantivy::schema::{DateTimePrecision, OwnedValue as TantivyValue}; @@ -98,6 +98,29 @@ impl QuickwitDateTimeOptions { }; Ok(TantivyValue::Date(date_time)) } + + pub(crate) fn reparse_tantivy_value( + &self, + tantivy_value: &TantivyValue, + ) -> Option { + match tantivy_value { + TantivyValue::Date(date) => Some(*date), + TantivyValue::Str(date_time_str) => { + quickwit_datetime::parse_date_time_str(date_time_str, &self.input_formats.0).ok() + } + TantivyValue::U64(timestamp_u64) => { + let timestamp_i64 = (*timestamp_u64).try_into().ok()?; + quickwit_datetime::parse_timestamp_int(timestamp_i64, &self.input_formats.0).ok() + } + TantivyValue::I64(timestamp_i64) => { + quickwit_datetime::parse_timestamp_int(*timestamp_i64, &self.input_formats.0).ok() + } + TantivyValue::F64(timestamp_f64) => { + quickwit_datetime::parse_timestamp_float(*timestamp_f64, &self.input_formats.0).ok() + } + _ => None, + } + } } #[derive(Clone, Debug, Eq, PartialEq, Serialize)] @@ -156,7 +179,7 @@ mod tests { assert_eq!(field_mapping_entry.name, "updated_at"); let date_time_options = match field_mapping_entry.mapping_type { - FieldMappingType::DateTime(date_time_options, Cardinality::SingleValue) => { + FieldMappingType::DateTime(date_time_options, Cardinality::SingleValued) => { date_time_options } _ => panic!("Expected a date time field mapping"), @@ -226,7 +249,7 @@ mod tests { assert_eq!(field_mapping_entry.name, "updated_at"); let date_time_options = match field_mapping_entry.mapping_type { - FieldMappingType::DateTime(date_time_options, Cardinality::MultiValues) => { + FieldMappingType::DateTime(date_time_options, Cardinality::MultiValued) => { date_time_options } _ => panic!("Expected a date time field mapping."), diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs index 3daf65462d0..537ba1f460e 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs @@ -45,8 +45,9 @@ use crate::doc_mapper::{JsonObject, Partition}; use crate::query_builder::build_query; use crate::routing_expression::RoutingExpr; use crate::{ - Cardinality, DocMapper, DocParsingError, Mode, QueryParserError, TokenizerEntry, WarmupInfo, - DOCUMENT_LEN_FIELD_NAME, DYNAMIC_FIELD_NAME, FIELD_PRESENCE_FIELD_NAME, SOURCE_FIELD_NAME, + Cardinality, DocMapper, DocMapping, DocParsingError, Mode, QueryParserError, TokenizerEntry, + WarmupInfo, DOCUMENT_SIZE_FIELD_NAME, DYNAMIC_FIELD_NAME, FIELD_PRESENCE_FIELD_NAME, + SOURCE_FIELD_NAME, }; const FIELD_PRESENCE_FIELD: Field = Field::from_field_id(0u32); @@ -55,8 +56,8 @@ const FIELD_PRESENCE_FIELD: Field = Field::from_field_id(0u32); /// to tantivy index fields. /// /// The mains rules are defined by the field mappings. -#[derive(Serialize, Deserialize, Clone)] -#[serde(try_from = "DefaultDocMapperBuilder", into = "DefaultDocMapperBuilder")] +#[derive(Clone, Serialize, Deserialize)] +#[serde(into = "DefaultDocMapperBuilder", try_from = "DefaultDocMapperBuilder")] pub struct DefaultDocMapper { /// Field in which the source should be stored. /// This field is only valid when using the schema associated with the default @@ -70,7 +71,7 @@ pub struct DefaultDocMapper { /// doc mapper, and therefore cannot be used in the `query` method. dynamic_field: Option, /// Field in which the len of the source document is stored as a fast field. - document_len_field: Option, + document_size_field: Option, /// Default list of field names used for search. default_search_field_names: Vec, /// Timestamp field name. @@ -89,8 +90,6 @@ pub struct DefaultDocMapper { partition_key: RoutingExpr, /// Maximum number of partitions max_num_partitions: NonZeroU32, - /// List of required fields. Right now this is unused. - required_fields: Vec, /// Defines how unmapped fields should be handle. mode: Mode, /// User-defined tokenizers. @@ -100,21 +99,9 @@ pub struct DefaultDocMapper { } impl DefaultDocMapper { - fn check_missing_required_fields(&self, doc: &Document) -> Result<(), DocParsingError> { - for &required_field in &self.required_fields { - if doc.get_first(required_field).is_none() { - let missing_field_name = self.schema.get_field_name(required_field); - return Err(DocParsingError::RequiredField( - missing_field_name.to_string(), - )); - } - } - Ok(()) - } - /// Default maximum number of partitions. pub fn default_max_num_partitions() -> NonZeroU32 { - NonZeroU32::new(200).unwrap() + DocMapping::default_max_num_partitions() } } @@ -134,7 +121,7 @@ fn validate_timestamp_field( bail!("could not find timestamp field `{timestamp_field_path}` in field mappings"); }; if let FieldMappingType::DateTime(date_time_option, cardinality) = ×tamp_field_type { - if cardinality != &Cardinality::SingleValue { + if cardinality != &Cardinality::SingleValued { bail!("timestamp field `{timestamp_field_path}` should be single-valued"); } if !date_time_option.fast { @@ -146,50 +133,79 @@ fn validate_timestamp_field( Ok(()) } +impl From for DefaultDocMapperBuilder { + fn from(default_doc_mapper: DefaultDocMapper) -> Self { + let partition_key_str = default_doc_mapper.partition_key.to_string(); + let partition_key_opt: Option = if !partition_key_str.is_empty() { + Some(partition_key_str) + } else { + None + }; + let doc_mapping = DocMapping { + mode: default_doc_mapper.mode, + field_mappings: default_doc_mapper.field_mappings.into(), + timestamp_field: default_doc_mapper.timestamp_field_name, + tag_fields: default_doc_mapper.tag_field_names, + partition_key: partition_key_opt, + max_num_partitions: default_doc_mapper.max_num_partitions, + index_field_presence: default_doc_mapper.index_field_presence, + store_document_size: default_doc_mapper.document_size_field.is_some(), + store_source: default_doc_mapper.source_field.is_some(), + tokenizers: default_doc_mapper.tokenizer_entries, + }; + Self { + doc_mapping, + default_search_fields: default_doc_mapper.default_search_field_names, + } + } +} + impl TryFrom for DefaultDocMapper { type Error = anyhow::Error; fn try_from(builder: DefaultDocMapperBuilder) -> anyhow::Result { let mut schema_builder = Schema::builder(); + + // We want the field ID of the field presence field to be 0, so we add it to the schema + // first. let field_presence_field = schema_builder.add_u64_field(FIELD_PRESENCE_FIELD_NAME, INDEXED); assert_eq!(field_presence_field, FIELD_PRESENCE_FIELD); - let dynamic_field = if let Mode::Dynamic(json_options) = &builder.mode { + let doc_mapping = builder.doc_mapping; + + let dynamic_field = if let Mode::Dynamic(json_options) = &doc_mapping.mode { Some(schema_builder.add_json_field(DYNAMIC_FIELD_NAME, json_options.clone())) } else { None }; - - let document_len_field = if builder.document_length { - let document_len_field_options = tantivy::schema::NumericOptions::default().set_fast(); - Some(schema_builder.add_u64_field(DOCUMENT_LEN_FIELD_NAME, document_len_field_options)) + let document_size_field = if doc_mapping.store_document_size { + let document_size_field_options = tantivy::schema::NumericOptions::default().set_fast(); + Some( + schema_builder.add_u64_field(DOCUMENT_SIZE_FIELD_NAME, document_size_field_options), + ) + } else { + None + }; + let source_field = if doc_mapping.store_source { + Some(schema_builder.add_json_field(SOURCE_FIELD_NAME, STORED)) } else { None }; - - // Adding regular fields. let MappingNodeRoot { field_mappings, concatenate_dynamic_fields, - } = build_mapping_tree(&builder.field_mappings, &mut schema_builder)?; + } = build_mapping_tree(&doc_mapping.field_mappings, &mut schema_builder)?; if !concatenate_dynamic_fields.is_empty() && dynamic_field.is_none() { bail!("concatenate field has `include_dynamic_fields` set, but index isn't dynamic"); } - let source_field = if builder.store_source { - Some(schema_builder.add_json_field(SOURCE_FIELD_NAME, STORED)) - } else { - None - }; - - if let Some(timestamp_field_path) = builder.timestamp_field.as_ref() { + if let Some(timestamp_field_path) = &doc_mapping.timestamp_field { validate_timestamp_field(timestamp_field_path, &field_mappings)?; }; - let schema = schema_builder.build(); let tokenizer_manager = create_default_quickwit_tokenizer_manager(); let mut custom_tokenizer_names = HashSet::new(); - for tokenizer_config_entry in builder.tokenizers.iter() { + for tokenizer_config_entry in &doc_mapping.tokenizers { if custom_tokenizer_names.contains(&tokenizer_config_entry.name) { bail!( "duplicated custom tokenizer: `{}`", @@ -247,40 +263,38 @@ impl TryFrom for DefaultDocMapper { } // Resolve tag fields - let mut tag_field_names: BTreeSet = builder.tag_fields.iter().cloned().collect(); - for tag_field_name in &builder.tag_fields { + for tag_field_name in &doc_mapping.tag_fields { validate_tag(tag_field_name, &schema)?; } - let partition_key_expr: &str = builder.partition_key.as_deref().unwrap_or(""); + let partition_key_expr: &str = doc_mapping.partition_key.as_deref().unwrap_or(""); let partition_key = RoutingExpr::new(partition_key_expr).with_context(|| { format!("failed to interpret the partition key: `{partition_key_expr}`") })?; // If valid, partition key fields should be considered as tags. + let mut tag_field_names = doc_mapping.tag_fields; + for partition_key in partition_key.field_names() { if validate_tag(&partition_key, &schema).is_ok() { tag_field_names.insert(partition_key); } } - - let required_fields = Vec::new(); Ok(DefaultDocMapper { schema, - index_field_presence: builder.index_field_presence, + index_field_presence: doc_mapping.index_field_presence, source_field, dynamic_field, - document_len_field, + document_size_field, default_search_field_names, - timestamp_field_name: builder.timestamp_field, + timestamp_field_name: doc_mapping.timestamp_field, field_mappings, concatenate_dynamic_fields, tag_field_names, - required_fields, partition_key, - max_num_partitions: builder.max_num_partitions, - mode: builder.mode, - tokenizer_entries: builder.tokenizers, + max_num_partitions: doc_mapping.max_num_partitions, + mode: doc_mapping.mode, + tokenizer_entries: doc_mapping.tokenizers, tokenizer_manager, }) } @@ -366,32 +380,6 @@ fn validate_fields_tokenizers( Ok(()) } -impl From for DefaultDocMapperBuilder { - fn from(default_doc_mapper: DefaultDocMapper) -> Self { - let partition_key_str = default_doc_mapper.partition_key.to_string(); - let partition_key_opt: Option = if partition_key_str.is_empty() { - None - } else { - Some(partition_key_str) - }; - Self { - store_source: default_doc_mapper.source_field.is_some(), - index_field_presence: default_doc_mapper.index_field_presence, - timestamp_field: default_doc_mapper - .timestamp_field_name() - .map(ToString::to_string), - field_mappings: default_doc_mapper.field_mappings.into(), - tag_fields: default_doc_mapper.tag_field_names.into_iter().collect(), - default_search_fields: default_doc_mapper.default_search_field_names, - mode: default_doc_mapper.mode, - partition_key: partition_key_opt, - max_num_partitions: default_doc_mapper.max_num_partitions, - tokenizers: default_doc_mapper.tokenizer_entries, - document_length: false, - } - } -} - impl std::fmt::Debug for DefaultDocMapper { fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { formatter @@ -624,8 +612,8 @@ impl DocMapper for DefaultDocMapper { } } - if let Some(document_len_field) = self.document_len_field { - document.add_u64(document_len_field, document_len); + if let Some(document_size_field) = self.document_size_field { + document.add_u64(document_size_field, document_len); } // The capacity is inexact here. @@ -664,8 +652,6 @@ impl DocMapper for DefaultDocMapper { document.add_field_value(FIELD_PRESENCE_FIELD, field_presence_hash); } } - - self.check_missing_required_fields(&document)?; Ok((partition, document)) } @@ -732,6 +718,7 @@ impl DocMapper for DefaultDocMapper { #[cfg(test)] mod tests { use std::collections::{HashMap, HashSet}; + use std::iter::zip; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; @@ -745,7 +732,7 @@ mod tests { use crate::default_doc_mapper::field_mapping_entry::DEFAULT_TOKENIZER_NAME; use crate::default_doc_mapper::mapping_tree::value_to_pretokenized; use crate::{ - DefaultDocMapperBuilder, DocMapper, DocParsingError, DOCUMENT_LEN_FIELD_NAME, + DefaultDocMapperBuilder, DocMapper, DocParsingError, DOCUMENT_SIZE_FIELD_NAME, DYNAMIC_FIELD_NAME, FIELD_PRESENCE_FIELD_NAME, SOURCE_FIELD_NAME, }; @@ -1611,20 +1598,23 @@ mod tests { assert_eq!(doc.len(), 0); } + #[track_caller] fn test_doc_from_json_test_aux( doc_mapper_json: &str, field: &str, document_json: &str, - expected: Vec, + expected_values: Vec, ) { let default_doc_mapper: DefaultDocMapper = serde_json::from_str(doc_mapper_json).unwrap(); let schema = default_doc_mapper.schema(); let field = schema.get_field(field).unwrap(); let (_, doc) = default_doc_mapper.doc_from_json_str(document_json).unwrap(); - let vals: Vec<&TantivyValue> = doc.get_all(field).collect(); - assert_eq!(vals.len(), expected.len()); - for (val, exp) in vals.into_iter().zip(expected.iter()) { - assert_eq!(val, exp); + + let values: Vec<&TantivyValue> = doc.get_all(field).collect(); + assert_eq!(values.len(), expected_values.len()); + + for (value, expected_value) in zip(values, expected_values) { + assert_eq!(*value, expected_value); } } @@ -2048,7 +2038,7 @@ mod tests { "document_length": true, "mode": "dynamic" }"#, - DOCUMENT_LEN_FIELD_NAME, + DOCUMENT_SIZE_FIELD_NAME, raw_doc, vec![(raw_doc.len() as u64).into()], ); diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper_builder.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper_builder.rs index 2c60d137384..f2778c730f9 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper_builder.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper_builder.rs @@ -17,14 +17,9 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use std::num::NonZeroU32; - use serde::{Deserialize, Serialize}; -use super::tokenizer_entry::TokenizerEntry; -use super::FieldMappingEntry; -use crate::default_doc_mapper::QuickwitJsonOptions; -use crate::DefaultDocMapper; +use crate::{DefaultDocMapper, DocMapping}; /// DefaultDocMapperBuilder is here /// to create a valid DocMapper. @@ -32,126 +27,15 @@ use crate::DefaultDocMapper; /// It is also used to serialize/deserialize a DocMapper. /// note that this is not the way is the DocMapping is deserialized /// from the configuration. -#[quickwit_macros::serde_multikey] -#[derive(Serialize, Deserialize, Clone)] +#[derive(Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct DefaultDocMapperBuilder { - /// Stores the original source document when set to true. - #[serde(default)] - pub store_source: bool, - /// Indexes field presence. - #[serde(default)] - pub index_field_presence: bool, - /// Name of the fields that are searched by default, unless overridden. + /// Doc mapping. + #[serde(flatten)] + pub doc_mapping: DocMapping, + /// Default search field names. #[serde(default)] pub default_search_fields: Vec, - /// Name of the field storing the timestamp of the event for time series data. - #[serde(default)] - #[serde(skip_serializing_if = "Option::is_none")] - pub timestamp_field: Option, - /// Describes which fields are indexed and how. - #[serde(default)] - pub field_mappings: Vec, - /// Name of the fields that are tagged. - #[serde(default)] - pub tag_fields: Vec, - /// The partition key is a DSL used to route documents - /// into specific splits. - #[serde(default)] - pub partition_key: Option, - /// Maximum number of partitions. - #[serde(default = "DefaultDocMapper::default_max_num_partitions")] - pub max_num_partitions: NonZeroU32, - #[serde_multikey( - deserializer = Mode::from_parts, - serializer = Mode::into_parts, - fields = ( - /// Defines the indexing mode. - #[serde(default)] - mode: ModeType, - /// If mode is set to dynamic, `dynamic_mapping` defines - /// how the unmapped fields should be handled. - #[serde(default)] - dynamic_mapping: Option, - ), - )] - /// Defines how the unmapped fields should be handled. - pub mode: Mode, - /// User-defined tokenizers. - #[serde(default)] - pub tokenizers: Vec, - /// Record document length - #[serde(default)] - pub document_length: bool, -} - -/// Defines how an unmapped field should be handled. -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] -pub enum Mode { - /// Lenient mode: unmapped fields are just ignored. - Lenient, - /// Strict mode: when parsing a document with an unmapped field, an error is yielded. - Strict, - /// Dynamic mode: unmapped fields are captured and handled according to the provided - /// configuration. - Dynamic(QuickwitJsonOptions), -} - -impl Mode { - /// Extact the `ModeType` of this `Mode` - pub fn mode_type(&self) -> ModeType { - match self { - Mode::Lenient => ModeType::Lenient, - Mode::Strict => ModeType::Strict, - Mode::Dynamic(_) => ModeType::Dynamic, - } - } - - /// Build a Mode from its type and optional dynamic mapping options - pub fn from_parts( - mode: ModeType, - dynamic_mapping: Option, - ) -> anyhow::Result { - Ok(match (mode, dynamic_mapping) { - (ModeType::Lenient, None) => Mode::Lenient, - (ModeType::Strict, None) => Mode::Strict, - (ModeType::Dynamic, Some(dynamic_mapping)) => Mode::Dynamic(dynamic_mapping), - (ModeType::Dynamic, None) => Mode::default(), // Dynamic with default options - (_, Some(_)) => anyhow::bail!( - "`dynamic_mapping` is only allowed with mode=dynamic. (here mode=`{:?}`)", - mode - ), - }) - } - - /// Obtain the mode type and dynamic options from a Mode - pub fn into_parts(self) -> (ModeType, Option) { - match self { - Mode::Lenient => (ModeType::Lenient, None), - Mode::Strict => (ModeType::Strict, None), - Mode::Dynamic(json_options) => (ModeType::Dynamic, Some(json_options)), - } - } -} - -impl Default for Mode { - fn default() -> Self { - Mode::Dynamic(QuickwitJsonOptions::default_dynamic()) - } -} - -/// `Mode` describing how the unmapped field should be handled. -#[derive(Clone, Copy, Default, Debug, Eq, PartialEq, Serialize, Deserialize, utoipa::ToSchema)] -#[serde(rename_all = "lowercase")] -pub enum ModeType { - /// Lenient mode: unmapped fields are just ignored. - Lenient, - /// Strict mode: when parsing a document with an unmapped field, an error is yielded. - Strict, - /// Dynamic mode: unmapped fields are captured and handled according to the - /// `dynamic_mapping` configuration. - #[default] - Dynamic, } #[cfg(test)] @@ -172,17 +56,27 @@ impl DefaultDocMapperBuilder { #[cfg(test)] mod tests { use super::*; + use crate::ModeType; #[test] fn test_default_mapper_builder_deserialize_from_empty_object() { - let default_mapper_builder: DefaultDocMapperBuilder = - serde_json::from_str::("{}").unwrap(); - assert!(default_mapper_builder.default_search_fields.is_empty()); - assert!(default_mapper_builder.field_mappings.is_empty()); - assert!(default_mapper_builder.tag_fields.is_empty()); - assert_eq!(default_mapper_builder.mode.mode_type(), ModeType::Dynamic); - assert_eq!(default_mapper_builder.store_source, false); - assert!(default_mapper_builder.timestamp_field.is_none()); + let default_doc_mapper_builder: DefaultDocMapperBuilder = + serde_json::from_str("{}").unwrap(); + assert_eq!( + default_doc_mapper_builder.doc_mapping.mode.mode_type(), + ModeType::Dynamic + ); + assert!(default_doc_mapper_builder + .doc_mapping + .field_mappings + .is_empty()); + assert!(default_doc_mapper_builder + .doc_mapping + .timestamp_field + .is_none()); + assert!(default_doc_mapper_builder.doc_mapping.tag_fields.is_empty()); + assert_eq!(default_doc_mapper_builder.doc_mapping.store_source, false); + assert!(default_doc_mapper_builder.default_search_fields.is_empty()); } #[test] diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_entry.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_entry.rs index 3f556629858..450cd02f0cb 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_entry.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_entry.rs @@ -720,8 +720,8 @@ fn deserialize_mapping_type( json: JsonValue, ) -> anyhow::Result { let (typ, cardinality) = match quickwit_field_type { - QuickwitFieldType::Simple(typ) => (typ, Cardinality::SingleValue), - QuickwitFieldType::Array(typ) => (typ, Cardinality::MultiValues), + QuickwitFieldType::Simple(typ) => (typ, Cardinality::SingleValued), + QuickwitFieldType::Array(typ) => (typ, Cardinality::MultiValued), QuickwitFieldType::Object => { let object_options: QuickwitObjectOptions = serde_json::from_value(json)?; if object_options.field_mappings.is_empty() { @@ -771,7 +771,7 @@ fn deserialize_mapping_type( Type::Facet => unimplemented!("Facet are not supported in quickwit yet."), Type::Bytes => { let numeric_options: QuickwitBytesOptions = serde_json::from_value(json)?; - if numeric_options.fast && cardinality == Cardinality::MultiValues { + if numeric_options.fast && cardinality == Cardinality::MultiValued { bail!("fast field is not allowed for array"); } Ok(FieldMappingType::Bytes(numeric_options, cardinality)) @@ -1238,7 +1238,7 @@ mod tests { assert_eq!(options.indexed, true); // default assert_eq!(options.fast, false); // default assert_eq!(options.stored, true); // default - assert_eq!(cardinality, Cardinality::MultiValues); + assert_eq!(cardinality, Cardinality::MultiValued); } _ => bail!("Wrong type"), } @@ -1262,7 +1262,7 @@ mod tests { assert_eq!(options.indexed, true); // default assert_eq!(options.fast, false); // default assert_eq!(options.stored, true); // default - assert_eq!(cardinality, Cardinality::SingleValue); + assert_eq!(cardinality, Cardinality::SingleValued); } _ => bail!("Wrong type"), } @@ -1330,7 +1330,7 @@ mod tests { assert_eq!(options.indexed, true); // default assert_eq!(options.fast, false); // default assert_eq!(options.stored, true); // default - assert_eq!(cardinality, Cardinality::MultiValues); + assert_eq!(cardinality, Cardinality::MultiValued); } else { panic!("Wrong type"); } @@ -1351,7 +1351,7 @@ mod tests { assert_eq!(options.indexed, true); // default assert_eq!(options.fast, false); // default assert_eq!(options.stored, true); // default - assert_eq!(cardinality, Cardinality::SingleValue); + assert_eq!(cardinality, Cardinality::SingleValued); } else { panic!("Wrong type"); } @@ -1691,7 +1691,7 @@ mod tests { assert_eq!(&field_mapping_entry.name, "my_json_field"); assert!( matches!(field_mapping_entry.mapping_type, FieldMappingType::Json(json_config, - Cardinality::SingleValue) if json_config == expected_json_options) + Cardinality::SingleValued) if json_config == expected_json_options) ); } @@ -1738,7 +1738,7 @@ mod tests { assert_eq!(&field_mapping_entry.name, "my_json_field_multi"); assert!( matches!(field_mapping_entry.mapping_type, FieldMappingType::Json(json_config, - Cardinality::MultiValues) if json_config == expected_json_options) + Cardinality::MultiValued) if json_config == expected_json_options) ); } diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_type.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_type.rs index a32bb962938..c339dd44b0f 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_type.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_type.rs @@ -74,8 +74,8 @@ impl FieldMappingType { FieldMappingType::Concatenate(_) => return QuickwitFieldType::Concatenate, }; match cardinality { - Cardinality::SingleValue => QuickwitFieldType::Simple(primitive_type), - Cardinality::MultiValues => QuickwitFieldType::Array(primitive_type), + Cardinality::SingleValued => QuickwitFieldType::Simple(primitive_type), + Cardinality::MultiValued => QuickwitFieldType::Array(primitive_type), } } } diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs index 4fdf1af030c..702ece804cd 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs @@ -31,7 +31,6 @@ use tantivy::schema::{ }; use tantivy::tokenizer::{PreTokenizedString, Token}; use tantivy::TantivyDocument as Document; -use tracing::warn; use super::date_time_type::QuickwitDateTimeOptions; use super::field_mapping_entry::{NumericOutputFormat, QuickwitBoolOptions}; @@ -303,7 +302,7 @@ impl MappingLeaf { return Ok(()); } if let JsonValue::Array(els) = json_val { - if self.cardinality == Cardinality::SingleValue { + if self.cardinality == Cardinality::SingleValued { return Err(DocParsingError::MultiValuesNotSupported(path.join("."))); } for el_json_val in els { @@ -380,8 +379,174 @@ fn extract_json_val( .into_iter() .flat_map(|value| value_to_json(value, leaf_type)); match cardinality { - Cardinality::SingleValue => vals_with_correct_type_it.next(), - Cardinality::MultiValues => Some(JsonValue::Array(vals_with_correct_type_it.collect())), + Cardinality::SingleValued => vals_with_correct_type_it.next(), + Cardinality::MultiValued => Some(JsonValue::Array(vals_with_correct_type_it.collect())), + } +} + +fn value_to_string(value: TantivyValue) -> Result { + match value { + TantivyValue::Str(s) => return Ok(JsonValue::String(s)), + TantivyValue::U64(number) => Some(number.to_string()), + TantivyValue::I64(number) => Some(number.to_string()), + TantivyValue::F64(number) => Some(number.to_string()), + TantivyValue::Bool(b) => Some(b.to_string()), + TantivyValue::Date(date) => { + return quickwit_datetime::DateTimeOutputFormat::default() + .format_to_json(date) + .map_err(|_| value); + } + TantivyValue::IpAddr(ip) => Some(ip.to_string()), + _ => None, + } + .map(JsonValue::String) + .ok_or(value) +} + +fn value_to_bool(value: TantivyValue) -> Result { + match &value { + TantivyValue::Str(s) => s.parse().ok(), + TantivyValue::U64(number) => match number { + 0 => Some(false), + 1 => Some(true), + _ => None, + }, + TantivyValue::I64(number) => match number { + 0 => Some(false), + 1 => Some(true), + _ => None, + }, + TantivyValue::Bool(b) => Some(*b), + _ => None, + } + .map(JsonValue::Bool) + .ok_or(value) +} + +fn value_to_ip(value: TantivyValue) -> Result { + match &value { + TantivyValue::Str(s) => s + .parse::() + .or_else(|_| { + s.parse::() + .map(|ip| ip.to_ipv6_mapped()) + }) + .ok(), + TantivyValue::IpAddr(ip) => Some(*ip), + _ => None, + } + .map(|ip| { + serde_json::to_value(TantivyValue::IpAddr(ip)) + .expect("Json serialization should never fail.") + }) + .ok_or(value) +} + +fn value_to_float( + value: TantivyValue, + numeric_options: &QuickwitNumericOptions, +) -> Result { + match &value { + TantivyValue::Str(s) => s.parse().ok(), + TantivyValue::U64(number) => Some(*number as f64), + TantivyValue::I64(number) => Some(*number as f64), + TantivyValue::F64(number) => Some(*number), + TantivyValue::Bool(b) => Some(if *b { 1.0 } else { 0.0 }), + _ => None, + } + .and_then(|f64_val| f64_val.to_json(numeric_options.output_format)) + .ok_or(value) +} + +fn value_to_u64( + value: TantivyValue, + numeric_options: &QuickwitNumericOptions, +) -> Result { + match &value { + TantivyValue::Str(s) => s.parse().ok(), + TantivyValue::U64(number) => Some(*number), + TantivyValue::I64(number) => (*number).try_into().ok(), + TantivyValue::F64(number) => { + if (0.0..=(u64::MAX as f64)).contains(number) { + Some(*number as u64) + } else { + None + } + } + TantivyValue::Bool(b) => Some(*b as u64), + _ => None, + } + .and_then(|u64_val| u64_val.to_json(numeric_options.output_format)) + .ok_or(value) +} + +fn value_to_i64( + value: TantivyValue, + numeric_options: &QuickwitNumericOptions, +) -> Result { + match &value { + TantivyValue::Str(s) => s.parse().ok(), + TantivyValue::U64(number) => (*number).try_into().ok(), + TantivyValue::I64(number) => Some(*number), + TantivyValue::F64(number) => { + if ((i64::MIN as f64)..=(i64::MAX as f64)).contains(number) { + Some(*number as i64) + } else { + None + } + } + TantivyValue::Bool(b) => Some(*b as i64), + _ => None, + } + .and_then(|u64_val| u64_val.to_json(numeric_options.output_format)) + .ok_or(value) +} + +/// Transforms a tantivy object into a serde_json one, without cloning strings. +/// It still allocates maps. +// TODO we should probably move this to tantivy, it has the opposite conversion already +fn tantivy_object_to_json_value_nocopy(object: Vec<(String, TantivyValue)>) -> JsonValue { + JsonValue::Object( + object + .into_iter() + .map(|(key, value)| (key, tantivy_value_to_json_value_nocopy(value))) + .collect(), + ) +} + +fn tantivy_value_to_json_value_nocopy(value: TantivyValue) -> JsonValue { + match value { + TantivyValue::Null => JsonValue::Null, + TantivyValue::Str(s) => JsonValue::String(s), + TantivyValue::U64(number) => JsonValue::Number(number.into()), + TantivyValue::I64(number) => JsonValue::Number(number.into()), + TantivyValue::F64(f) => { + JsonValue::Number(serde_json::Number::from_f64(f).expect("expected finite f64")) + } + TantivyValue::Bool(b) => JsonValue::Bool(b), + TantivyValue::Array(array) => JsonValue::Array( + array + .into_iter() + .map(tantivy_value_to_json_value_nocopy) + .collect(), + ), + TantivyValue::Object(object) => tantivy_object_to_json_value_nocopy(object), + // we shouldn't have these types inside a json field in quickwit + TantivyValue::PreTokStr(pretok) => JsonValue::String(pretok.text), + TantivyValue::Date(date) => quickwit_datetime::DateTimeOutputFormat::Rfc3339 + .format_to_json(date) + .expect("Invalid datetime is not allowed."), + TantivyValue::Facet(facet) => JsonValue::String(facet.to_string()), + // TantivyValue::Bytes(Vec) => (), // tantivy would do b64 here + TantivyValue::IpAddr(ip_v6) => { + let ip_str = if let Some(ip_v4) = ip_v6.to_ipv4_mapped() { + ip_v4.to_string() + } else { + ip_v6.to_string() + }; + JsonValue::String(ip_str) + } + value => unimplemented!("got unexpected type {value:?} inside json field"), } } @@ -390,39 +555,48 @@ fn extract_json_val( /// Makes sure the type and value are consistent before converting. /// For certain LeafType, we use the type options to format the output. fn value_to_json(value: TantivyValue, leaf_type: &LeafType) -> Option { - match (&value, leaf_type) { - (TantivyValue::Str(_), LeafType::Text(_)) - | (TantivyValue::Bool(_), LeafType::Bool(_)) - | (TantivyValue::IpAddr(_), LeafType::IpAddr(_)) - | (TantivyValue::Object(_), LeafType::Json(_)) => { - let json_value = - serde_json::to_value(&value).expect("Json serialization should never fail."); - Some(json_value) - } - (TantivyValue::Bytes(bytes), LeafType::Bytes(bytes_options)) => { - let json_value = bytes_options.output_format.format_to_json(bytes); - Some(json_value) - } - (TantivyValue::Date(date_time), LeafType::DateTime(date_time_options)) => { - let json_value = date_time_options - .output_format - .format_to_json(*date_time) - .expect("Invalid datetime is not allowed."); - Some(json_value) - } - (TantivyValue::F64(f64_val), LeafType::F64(numeric_options)) => { - f64_val.to_json(numeric_options.output_format) - } - (TantivyValue::I64(i64_val), LeafType::I64(numeric_options)) => { - i64_val.to_json(numeric_options.output_format) + let res = match leaf_type { + LeafType::Text(_) => value_to_string(value), + LeafType::Bool(_) => value_to_bool(value), + LeafType::IpAddr(_) => value_to_ip(value), + LeafType::F64(numeric_options) => value_to_float(value, numeric_options), + LeafType::U64(numeric_options) => value_to_u64(value, numeric_options), + LeafType::I64(numeric_options) => value_to_i64(value, numeric_options), + LeafType::Json(_) => { + if let TantivyValue::Object(obj) = value { + // TODO do we want to allow almost everything here? + return Some(tantivy_object_to_json_value_nocopy(obj)); + } else { + Err(value) + } } - (TantivyValue::U64(u64_val), LeafType::U64(numeric_options)) => { - u64_val.to_json(numeric_options.output_format) + LeafType::Bytes(bytes_options) => { + if let TantivyValue::Bytes(ref bytes) = value { + // TODO we could cast str to bytes + let json_value = bytes_options.output_format.format_to_json(bytes); + Ok(json_value) + } else { + Err(value) + } } - _ => { - warn!( - "The value type `{:?}` doesn't match the requested type `{:?}`", - value, leaf_type + LeafType::DateTime(date_time_options) => date_time_options + .reparse_tantivy_value(&value) + .map(|date_time| { + date_time_options + .output_format + .format_to_json(date_time) + .expect("Invalid datetime is not allowed.") + }) + .ok_or(value), + }; + match res { + Ok(res) => Some(res), + Err(value) => { + quickwit_common::rate_limited_warn!( + limit_per_min = 2, + "the value type `{:?}` doesn't match the requested type `{:?}`", + value, + leaf_type ); None } @@ -1282,7 +1456,7 @@ mod tests { let leaf_entry = MappingLeaf { field, typ, - cardinality: Cardinality::MultiValues, + cardinality: Cardinality::MultiValued, concatenate: Vec::new(), }; let mut document = Document::default(); @@ -1334,7 +1508,7 @@ mod tests { let leaf_entry = MappingLeaf { field, typ, - cardinality: Cardinality::MultiValues, + cardinality: Cardinality::MultiValued, concatenate: Vec::new(), }; let mut document = Document::default(); @@ -1357,7 +1531,7 @@ mod tests { let leaf_entry = MappingLeaf { field, typ, - cardinality: Cardinality::MultiValues, + cardinality: Cardinality::MultiValued, concatenate: Vec::new(), }; let mut document = Document::default(); @@ -1375,7 +1549,7 @@ mod tests { let leaf_entry = MappingLeaf { field, typ, - cardinality: Cardinality::MultiValues, + cardinality: Cardinality::MultiValued, concatenate: Vec::new(), }; let mut document = Document::default(); @@ -1394,7 +1568,7 @@ mod tests { let leaf_entry = MappingLeaf { field, typ, - cardinality: Cardinality::MultiValues, + cardinality: Cardinality::MultiValued, concatenate: Vec::new(), }; let mut document = Document::default(); @@ -1540,7 +1714,7 @@ mod tests { let leaf_entry = MappingLeaf { field, typ, - cardinality: Cardinality::MultiValues, + cardinality: Cardinality::MultiValued, concatenate: Vec::new(), }; let mut document = Document::default(); @@ -1601,7 +1775,23 @@ mod tests { assert_eq!( value_to_json( TantivyValue::F64(0.1), - &LeafType::F64(numeric_options_number) + &LeafType::F64(numeric_options_number.clone()) + ) + .unwrap(), + serde_json::json!(0.1) + ); + assert_eq!( + value_to_json( + TantivyValue::U64(1), + &LeafType::F64(numeric_options_number.clone()) + ) + .unwrap(), + serde_json::json!(1.0) + ); + assert_eq!( + value_to_json( + TantivyValue::Str("0.1".to_string()), + &LeafType::F64(numeric_options_number.clone()) ) .unwrap(), serde_json::json!(0.1) @@ -1623,11 +1813,15 @@ mod tests { assert_eq!( value_to_json( TantivyValue::I64(-1), - &LeafType::I64(numeric_options_number) + &LeafType::I64(numeric_options_number.clone()) ) .unwrap(), serde_json::json!(-1) ); + assert_eq!( + value_to_json(TantivyValue::I64(1), &LeafType::I64(numeric_options_number)).unwrap(), + serde_json::json!(1) + ); let numeric_options_str = QuickwitNumericOptions { output_format: NumericOutputFormat::String, @@ -1643,7 +1837,15 @@ mod tests { fn test_tantivy_value_to_json_value_u64() { let numeric_options_number = QuickwitNumericOptions::default(); assert_eq!( - value_to_json(TantivyValue::U64(1), &LeafType::U64(numeric_options_number)).unwrap(), + value_to_json( + TantivyValue::U64(1), + &LeafType::U64(numeric_options_number.clone()) + ) + .unwrap(), + serde_json::json!(1u64) + ); + assert_eq!( + value_to_json(TantivyValue::I64(1), &LeafType::U64(numeric_options_number)).unwrap(), serde_json::json!(1u64) ); diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs index d36acff16fc..63d0921bcf3 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs @@ -26,25 +26,27 @@ mod mapping_tree; mod tokenizer_entry; use anyhow::bail; -use once_cell::sync::Lazy; -use regex::Regex; - -pub use self::default_mapper::DefaultDocMapper; -pub use self::default_mapper_builder::{DefaultDocMapperBuilder, Mode, ModeType}; -pub use self::field_mapping_entry::{ +pub use default_mapper::DefaultDocMapper; +pub use default_mapper_builder::DefaultDocMapperBuilder; +#[cfg(all(test, feature = "multilang"))] +pub(crate) use field_mapping_entry::TextIndexingOptions; +pub use field_mapping_entry::{ BinaryFormat, FastFieldOptions, FieldMappingEntry, QuickwitBytesOptions, QuickwitJsonOptions, QuickwitTextNormalizer, }; -pub(crate) use self::field_mapping_entry::{ +pub(crate) use field_mapping_entry::{ FieldMappingEntryForSerialization, IndexRecordOptionSchema, QuickwitTextTokenizer, }; -#[cfg(all(test, feature = "multilang"))] -pub(crate) use self::field_mapping_entry::{QuickwitTextOptions, TextIndexingOptions}; -pub use self::field_mapping_type::FieldMappingType; -pub use self::tokenizer_entry::{analyze_text, TokenizerConfig, TokenizerEntry}; -pub(crate) use self::tokenizer_entry::{ +#[cfg(test)] +pub(crate) use field_mapping_entry::{QuickwitNumericOptions, QuickwitTextOptions}; +pub use field_mapping_type::FieldMappingType; +use once_cell::sync::Lazy; +use regex::Regex; +pub use tokenizer_entry::{analyze_text, TokenizerConfig, TokenizerEntry}; +pub(crate) use tokenizer_entry::{ NgramTokenizerOption, RegexTokenizerOption, TokenFilterType, TokenizerType, }; + use crate::QW_RESERVED_FIELD_NAMES; /// Regular expression validating a field mapping name. diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapper.rs b/quickwit/quickwit-doc-mapper/src/doc_mapper.rs index 04860c47c97..37fcc385fa1 100644 --- a/quickwit/quickwit-doc-mapper/src/doc_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/doc_mapper.rs @@ -249,8 +249,8 @@ mod tests { use crate::default_doc_mapper::{FieldMappingType, QuickwitJsonOptions}; use crate::{ - Cardinality, DefaultDocMapper, DefaultDocMapperBuilder, DocMapper, DocParsingError, - FieldMappingEntry, Mode, TermRange, WarmupInfo, DYNAMIC_FIELD_NAME, + Cardinality, DefaultDocMapperBuilder, DocMapper, DocParsingError, FieldMappingEntry, + TermRange, WarmupInfo, DYNAMIC_FIELD_NAME, }; const JSON_DEFAULT_DOC_MAPPER: &str = r#" @@ -332,36 +332,19 @@ mod tests { } } - #[test] - fn test_serdeserialize_doc_mapper() -> anyhow::Result<()> { - let deserialized_default_doc_mapper = - serde_json::from_str::>(JSON_DEFAULT_DOC_MAPPER)?; - let expected_default_doc_mapper = DefaultDocMapperBuilder::default().try_build()?; - assert_eq!( - format!("{deserialized_default_doc_mapper:?}"), - format!("{expected_default_doc_mapper:?}"), - ); - - let serialized_doc_mapper = serde_json::to_string(&deserialized_default_doc_mapper)?; - let deserialized_default_doc_mapper = - serde_json::from_str::>(&serialized_doc_mapper)?; - let serialized_doc_mapper_2 = serde_json::to_string(&deserialized_default_doc_mapper)?; - - assert_eq!(serialized_doc_mapper, serialized_doc_mapper_2); - - Ok(()) - } - #[test] fn test_doc_mapper_query_with_json_field() { let mut doc_mapper_builder = DefaultDocMapperBuilder::default(); - doc_mapper_builder.field_mappings.push(FieldMappingEntry { - name: "json_field".to_string(), - mapping_type: FieldMappingType::Json( - QuickwitJsonOptions::default(), - Cardinality::SingleValue, - ), - }); + doc_mapper_builder + .doc_mapping + .field_mappings + .push(FieldMappingEntry { + name: "json_field".to_string(), + mapping_type: FieldMappingType::Json( + QuickwitJsonOptions::default(), + Cardinality::SingleValued, + ), + }); let doc_mapper = doc_mapper_builder.try_build().unwrap(); let schema = doc_mapper.schema(); let query_ast = UserInputQuery { @@ -380,12 +363,7 @@ mod tests { #[test] fn test_doc_mapper_query_with_json_field_default_search_fields() { - let doc_mapper: DefaultDocMapper = DefaultDocMapperBuilder { - mode: Mode::default(), - ..Default::default() - } - .try_build() - .unwrap(); + let doc_mapper = DefaultDocMapperBuilder::default().try_build().unwrap(); let schema = doc_mapper.schema(); let query_ast = query_ast_from_user_text("toto.titi:hello", None) .parse_user_query(doc_mapper.default_search_fields()) @@ -399,12 +377,7 @@ mod tests { #[test] fn test_doc_mapper_query_with_json_field_ambiguous_term() { - let doc_mapper: DefaultDocMapper = DefaultDocMapperBuilder { - mode: Mode::default(), - ..Default::default() - } - .try_build() - .unwrap(); + let doc_mapper = DefaultDocMapperBuilder::default().try_build().unwrap(); let schema = doc_mapper.schema(); let query_ast = query_ast_from_user_text("toto:5", None) .parse_user_query(&[]) @@ -582,27 +555,33 @@ mod tests { }; use crate::{TokenizerConfig, TokenizerEntry}; let mut doc_mapper_builder = DefaultDocMapperBuilder::default(); - doc_mapper_builder.field_mappings.push(FieldMappingEntry { - name: "multilang".to_string(), - mapping_type: FieldMappingType::Text( - QuickwitTextOptions { - indexing_options: Some(TextIndexingOptions { - tokenizer: QuickwitTextTokenizer::from_static("multilang"), - record: IndexRecordOption::Basic, - fieldnorms: false, - }), - ..Default::default() + doc_mapper_builder + .doc_mapping + .field_mappings + .push(FieldMappingEntry { + name: "multilang".to_string(), + mapping_type: FieldMappingType::Text( + QuickwitTextOptions { + indexing_options: Some(TextIndexingOptions { + tokenizer: QuickwitTextTokenizer::from_static("multilang"), + record: IndexRecordOption::Basic, + fieldnorms: false, + }), + ..Default::default() + }, + Cardinality::SingleValued, + ), + }); + doc_mapper_builder + .doc_mapping + .tokenizers + .push(TokenizerEntry { + name: "multilang".to_string(), + config: TokenizerConfig { + tokenizer_type: TokenizerType::Multilang, + filters: Vec::new(), }, - Cardinality::SingleValue, - ), - }); - doc_mapper_builder.tokenizers.push(TokenizerEntry { - name: "multilang".to_string(), - config: TokenizerConfig { - tokenizer_type: TokenizerType::Multilang, - filters: Vec::new(), - }, - }); + }); let doc_mapper = doc_mapper_builder.try_build().unwrap(); let schema = doc_mapper.schema(); let query_ast = quickwit_query::query_ast::QueryAst::Term(TermQuery { diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapping.rs b/quickwit/quickwit-doc-mapper/src/doc_mapping.rs new file mode 100644 index 00000000000..75400c7a206 --- /dev/null +++ b/quickwit/quickwit-doc-mapper/src/doc_mapping.rs @@ -0,0 +1,238 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::collections::BTreeSet; +use std::num::NonZeroU32; + +use serde::{Deserialize, Serialize}; + +use crate::{FieldMappingEntry, QuickwitJsonOptions, TokenizerEntry}; + +/// Defines how unmapped fields should be handled. +#[derive(Clone, Copy, Default, Debug, Eq, PartialEq, Serialize, Deserialize, utoipa::ToSchema)] +#[serde(rename_all = "lowercase")] +pub enum ModeType { + /// Lenient mode: ignores unmapped fields. + Lenient, + /// Strict mode: returns an error when an unmapped field is encountered. + Strict, + /// Dynamic mode: captures and handles unmapped fields according to the dynamic field + /// configuration. + #[default] + Dynamic, +} + +/// Defines how unmapped fields should be handled. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub enum Mode { + /// Lenient mode: ignores unmapped fields. + Lenient, + /// Strict mode: returns an error when an unmapped field is encountered. + Strict, + /// Dynamic mode: captures and handles unmapped fields according to the dynamic field + /// configuration. + Dynamic(QuickwitJsonOptions), +} + +impl Mode { + /// Extracts the [`ModeType`] of this [`Mode`] + pub fn mode_type(&self) -> ModeType { + match self { + Self::Lenient => ModeType::Lenient, + Self::Strict => ModeType::Strict, + Self::Dynamic(_) => ModeType::Dynamic, + } + } + + /// Builds a [`Mode`] from its type and optional dynamic mapping options. + pub fn from_parts( + mode: ModeType, + dynamic_mapping: Option, + ) -> anyhow::Result { + Ok(match (mode, dynamic_mapping) { + (ModeType::Lenient, None) => Self::Lenient, + (ModeType::Strict, None) => Self::Strict, + (ModeType::Dynamic, Some(dynamic_mapping)) => Self::Dynamic(dynamic_mapping), + (ModeType::Dynamic, None) => Self::default(), // Dynamic with default options + (_, Some(_)) => anyhow::bail!( + "`dynamic_mapping` is only allowed with mode=dynamic. (here mode=`{:?}`)", + mode + ), + }) + } + + /// Obtains the mode type and dynamic options from a [`Mode`]. + pub fn into_parts(self) -> (ModeType, Option) { + match self { + Self::Lenient => (ModeType::Lenient, None), + Self::Strict => (ModeType::Strict, None), + Self::Dynamic(json_options) => (ModeType::Dynamic, Some(json_options)), + } + } +} + +impl Default for Mode { + fn default() -> Self { + Self::Dynamic(QuickwitJsonOptions::default_dynamic()) + } +} + +/// Defines how the document of an index should be parsed, tokenized, partitioned, indexed, and +/// stored. +#[quickwit_macros::serde_multikey] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, utoipa::ToSchema)] +#[serde(deny_unknown_fields)] +pub struct DocMapping { + /// Defines how unmapped fields should be handled. + #[serde_multikey( + deserializer = Mode::from_parts, + serializer = Mode::into_parts, + fields = ( + #[serde(default)] + mode: ModeType, + #[serde(skip_serializing_if = "Option::is_none")] + dynamic_mapping: Option + ), + )] + pub mode: Mode, + + /// Defines the schema of ingested documents and describes how each field value should be + /// parsed, tokenized, indexed, and stored. + #[serde(default)] + #[schema(value_type = Vec)] + pub field_mappings: Vec, + + /// Declares the field which contains the date or timestamp at which the document + /// was emitted. + #[serde(default)] + pub timestamp_field: Option, + + /// Declares the low cardinality fields for which the values ​​are recorded directly in the + /// splits metadata. + #[schema(value_type = Vec)] + #[serde(default)] + pub tag_fields: BTreeSet, + + /// Expresses via a "mini-DSL" how to route documents to split partitions. + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub partition_key: Option, + + /// The maximum number of partitions that an indexer can generate. + #[schema(value_type = u32)] + #[serde(default = "DocMapping::default_max_num_partitions")] + pub max_num_partitions: NonZeroU32, + + /// Whether to record the presence of the fields of each indexed document to allow `exists` + /// queries. + #[serde(default)] + pub index_field_presence: bool, + + /// Whether to record and store the size (bytes) of each ingested document in a fast field. + #[serde(alias = "document_length")] + #[serde(default)] + pub store_document_size: bool, + + /// Whether to store the original source documents in the doc store. + #[serde(default)] + pub store_source: bool, + + /// A set of additional user-defined tokenizers to be used during indexing. + #[serde(default)] + pub tokenizers: Vec, +} + +impl DocMapping { + /// Returns the default value for `max_num_partitions`. + pub fn default_max_num_partitions() -> NonZeroU32 { + NonZeroU32::new(200).unwrap() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::default_doc_mapper::{QuickwitNumericOptions, QuickwitTextOptions}; + use crate::{ + Cardinality, FieldMappingType, RegexTokenizerOption, TokenFilterType, TokenizerConfig, + TokenizerType, + }; + + #[test] + fn test_doc_mapping_serde_roundtrip() { + let doc_mapping = DocMapping { + mode: Mode::Strict, + field_mappings: vec![ + FieldMappingEntry { + name: "timestamp".to_string(), + mapping_type: FieldMappingType::U64( + QuickwitNumericOptions::default(), + Cardinality::SingleValued, + ), + }, + FieldMappingEntry { + name: "message".to_string(), + mapping_type: FieldMappingType::Text( + QuickwitTextOptions::default(), + Cardinality::SingleValued, + ), + }, + ], + timestamp_field: Some("timestamp".to_string()), + tag_fields: BTreeSet::from_iter(["level".to_string()]), + partition_key: Some("tenant_id".to_string()), + max_num_partitions: NonZeroU32::new(100).unwrap(), + index_field_presence: true, + store_document_size: true, + store_source: true, + tokenizers: vec![TokenizerEntry { + name: "whitespace".to_string(), + config: TokenizerConfig { + tokenizer_type: TokenizerType::Regex(RegexTokenizerOption { + pattern: r"\s+".to_string(), + }), + filters: vec![TokenFilterType::LowerCaser], + }, + }], + }; + let serialized = serde_json::to_string(&doc_mapping).unwrap(); + let deserialized: DocMapping = serde_json::from_str(&serialized).unwrap(); + assert_eq!(deserialized, doc_mapping); + } + + #[test] + fn test_doc_mapping_serde_default_values() { + let doc_mapping: DocMapping = serde_json::from_str("{}").unwrap(); + assert_eq!( + doc_mapping.mode, + Mode::Dynamic(QuickwitJsonOptions::default_dynamic()) + ); + assert!(doc_mapping.field_mappings.is_empty()); + assert_eq!(doc_mapping.timestamp_field, None); + assert!(doc_mapping.tag_fields.is_empty()); + assert_eq!(doc_mapping.partition_key, None); + assert_eq!( + doc_mapping.max_num_partitions, + NonZeroU32::new(200).unwrap() + ); + assert_eq!(doc_mapping.index_field_presence, false); + assert_eq!(doc_mapping.store_document_size, false); + assert_eq!(doc_mapping.store_source, false); + } +} diff --git a/quickwit/quickwit-doc-mapper/src/lib.rs b/quickwit/quickwit-doc-mapper/src/lib.rs index 161732b28f5..9fdc35fbfb2 100644 --- a/quickwit/quickwit-doc-mapper/src/lib.rs +++ b/quickwit/quickwit-doc-mapper/src/lib.rs @@ -27,6 +27,7 @@ mod default_doc_mapper; mod doc_mapper; +mod doc_mapping; mod error; mod query_builder; mod routing_expression; @@ -36,8 +37,7 @@ pub mod tag_pruning; pub use default_doc_mapper::{ analyze_text, BinaryFormat, DefaultDocMapper, DefaultDocMapperBuilder, FieldMappingEntry, - FieldMappingType, Mode, ModeType, QuickwitBytesOptions, QuickwitJsonOptions, TokenizerConfig, - TokenizerEntry, + FieldMappingType, QuickwitBytesOptions, QuickwitJsonOptions, TokenizerConfig, TokenizerEntry, }; use default_doc_mapper::{ FastFieldOptions, FieldMappingEntryForSerialization, IndexRecordOptionSchema, @@ -45,6 +45,7 @@ use default_doc_mapper::{ TokenFilterType, TokenizerType, }; pub use doc_mapper::{DocMapper, JsonObject, NamedField, TermRange, WarmupInfo}; +pub use doc_mapping::{DocMapping, Mode, ModeType}; pub use error::{DocParsingError, QueryParserError}; use quickwit_common::shared_consts::FIELD_PRESENCE_FIELD_NAME; pub use routing_expression::RoutingExpr; @@ -56,23 +57,23 @@ pub const SOURCE_FIELD_NAME: &str = "_source"; pub const DYNAMIC_FIELD_NAME: &str = "_dynamic"; /// Field name reserved for storing the length of source document. -pub const DOCUMENT_LEN_FIELD_NAME: &str = "_doc_length"; +pub const DOCUMENT_SIZE_FIELD_NAME: &str = "_doc_length"; /// Quickwit reserved field names. const QW_RESERVED_FIELD_NAMES: &[&str] = &[ - SOURCE_FIELD_NAME, + DOCUMENT_SIZE_FIELD_NAME, DYNAMIC_FIELD_NAME, FIELD_PRESENCE_FIELD_NAME, - DOCUMENT_LEN_FIELD_NAME, + SOURCE_FIELD_NAME, ]; /// Cardinality of a field. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Cardinality { /// Single-valued field. - SingleValue, + SingleValued, /// Multivalued field. - MultiValues, + MultiValued, } #[derive(utoipa::OpenApi)] diff --git a/quickwit/quickwit-indexing/src/actors/doc_processor.rs b/quickwit/quickwit-indexing/src/actors/doc_processor.rs index 26b11155d8f..3fe17d7106a 100644 --- a/quickwit/quickwit-indexing/src/actors/doc_processor.rs +++ b/quickwit/quickwit-indexing/src/actors/doc_processor.rs @@ -82,6 +82,7 @@ impl JsonDoc { } } +#[allow(clippy::enum_variant_names)] #[derive(Error, Debug)] pub enum DocProcessorError { #[error("doc mapper parse error: {0}")] diff --git a/quickwit/quickwit-indexing/src/source/ingest/mod.rs b/quickwit/quickwit-indexing/src/source/ingest/mod.rs index 9eb5d6d5b69..692cc0a02d0 100644 --- a/quickwit/quickwit-indexing/src/source/ingest/mod.rs +++ b/quickwit/quickwit-indexing/src/source/ingest/mod.rs @@ -218,8 +218,8 @@ impl IngestSource { assigned_shard.status = IndexingStatus::Active; let partition_id = assigned_shard.partition_id.clone(); - let from_position_exclusive = fetch_payload.from_position_exclusive().clone(); - let to_position_inclusive = fetch_payload.to_position_inclusive().clone(); + let from_position_exclusive = fetch_payload.from_position_exclusive(); + let to_position_inclusive = fetch_payload.to_position_inclusive(); for mrecord in decoded_mrecords(mrecord_batch) { match mrecord { @@ -257,7 +257,7 @@ impl IngestSource { let partition_id = assigned_shard.partition_id.clone(); let from_position_exclusive = assigned_shard.current_position_inclusive.clone(); - let to_position_inclusive = fetch_eof.eof_position().clone(); + let to_position_inclusive = fetch_eof.eof_position(); batch_builder .checkpoint_delta @@ -575,8 +575,7 @@ impl Source for IngestSource { for acquired_shard in acquire_shards_response.acquired_shards { let index_uid = acquired_shard.index_uid().clone(); let shard_id = acquired_shard.shard_id().clone(); - let mut current_position_inclusive = - acquired_shard.publish_position_inclusive().clone(); + let mut current_position_inclusive = acquired_shard.publish_position_inclusive(); let leader_id: NodeId = acquired_shard.leader_id.into(); let follower_id_opt: Option = acquired_shard.follower_id.map(Into::into); let source_id: SourceId = acquired_shard.source_id; diff --git a/quickwit/quickwit-indexing/src/source/kinesis/helpers.rs b/quickwit/quickwit-indexing/src/source/kinesis/helpers.rs index faaac674dc1..18ee020b944 100644 --- a/quickwit/quickwit-indexing/src/source/kinesis/helpers.rs +++ b/quickwit/quickwit-indexing/src/source/kinesis/helpers.rs @@ -17,7 +17,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use aws_sdk_kinesis::config::{Region, SharedAsyncSleep}; +use aws_sdk_kinesis::config::{BehaviorVersion, Region, SharedAsyncSleep}; use aws_sdk_kinesis::{Client, Config}; use quickwit_aws::{get_aws_config, DEFAULT_AWS_REGION}; use quickwit_config::RegionOrEndpoint; @@ -25,7 +25,7 @@ use quickwit_config::RegionOrEndpoint; pub async fn get_kinesis_client(region_or_endpoint: RegionOrEndpoint) -> anyhow::Result { let aws_config = get_aws_config().await; - let mut kinesis_config = Config::builder(); + let mut kinesis_config = Config::builder().behavior_version(BehaviorVersion::v2024_03_28()); kinesis_config.set_retry_config(aws_config.retry_config().cloned()); kinesis_config.set_credentials_provider(aws_config.credentials_provider()); kinesis_config.set_http_client(aws_config.http_client()); diff --git a/quickwit/quickwit-indexing/src/source/mod.rs b/quickwit/quickwit-indexing/src/source/mod.rs index dae5d9f5053..db2583d0e95 100644 --- a/quickwit/quickwit-indexing/src/source/mod.rs +++ b/quickwit/quickwit-indexing/src/source/mod.rs @@ -421,7 +421,7 @@ pub async fn check_source_connectivity( #[allow(unused_variables)] SourceParams::Kafka(params) => { #[cfg(not(feature = "kafka"))] - anyhow::bail!("Quickwit binary was not compiled with the `kafka` feature"); + anyhow::bail!("Quickwit was compiled without the `kafka` feature"); #[cfg(feature = "kafka")] { @@ -432,7 +432,7 @@ pub async fn check_source_connectivity( #[allow(unused_variables)] SourceParams::Kinesis(params) => { #[cfg(not(feature = "kinesis"))] - anyhow::bail!("Quickwit binary was not compiled with the `kinesis` feature"); + anyhow::bail!("Quickwit was compiled without the `kinesis` feature"); #[cfg(feature = "kinesis")] { @@ -443,7 +443,7 @@ pub async fn check_source_connectivity( #[allow(unused_variables)] SourceParams::Pulsar(params) => { #[cfg(not(feature = "pulsar"))] - anyhow::bail!("Quickwit binary was not compiled with the `pulsar` feature"); + anyhow::bail!("Quickwit was compiled without the `pulsar` feature"); #[cfg(feature = "pulsar")] { diff --git a/quickwit/quickwit-indexing/src/test_utils.rs b/quickwit/quickwit-indexing/src/test_utils.rs index 4c320544bd2..bda258689d9 100644 --- a/quickwit/quickwit-indexing/src/test_utils.rs +++ b/quickwit/quickwit-indexing/src/test_utils.rs @@ -256,7 +256,7 @@ pub struct MockSplitBuilder { impl MockSplitBuilder { pub fn new(split_id: &str) -> Self { Self { - split_metadata: mock_split_meta(split_id, &IndexUid::from_parts("test-index", 0)), + split_metadata: mock_split_meta(split_id, &IndexUid::for_test("test-index", 0)), } } diff --git a/quickwit/quickwit-ingest/src/error.rs b/quickwit/quickwit-ingest/src/error.rs index 52a1f9b6fdd..a919a46f666 100644 --- a/quickwit/quickwit-ingest/src/error.rs +++ b/quickwit/quickwit-ingest/src/error.rs @@ -44,15 +44,17 @@ pub enum IngestServiceError { IoError(String), #[error("rate limited")] RateLimited, - #[error("ingest service is unavailable")] - Unavailable, + #[error("ingest service is unavailable ({0})")] + Unavailable(String), } impl From> for IngestServiceError { fn from(error: AskError) -> Self { match error { AskError::ErrorReply(error) => error, - AskError::MessageNotDelivered => IngestServiceError::Unavailable, + AskError::MessageNotDelivered => { + IngestServiceError::Unavailable("actor not running".to_string()) + } AskError::ProcessMessageError => IngestServiceError::Internal(error.to_string()), } } @@ -61,7 +63,7 @@ impl From> for IngestServiceError { impl From for IngestServiceError { fn from(error: BufferError) -> Self { match error { - BufferError::Closed => IngestServiceError::Unavailable, + BufferError::Closed => IngestServiceError::Unavailable(error.to_string()), BufferError::Unknown => IngestServiceError::Internal(error.to_string()), } } @@ -76,8 +78,11 @@ impl From for IngestServiceError { impl From for IngestServiceError { fn from(error: IngestV2Error) -> Self { match error { - IngestV2Error::Timeout(_) | IngestV2Error::Unavailable(_) => { - IngestServiceError::Unavailable + IngestV2Error::Timeout(error_msg) => { + IngestServiceError::Unavailable(format!("timeout {error_msg}")) + } + IngestV2Error::Unavailable(error_msg) => { + IngestServiceError::Unavailable(format!("unavailable: {error_msg}")) } IngestV2Error::Internal(message) => IngestServiceError::Internal(message), IngestV2Error::ShardNotFound { .. } => { @@ -98,7 +103,7 @@ impl ServiceError for IngestServiceError { Self::InvalidPosition(_) => ServiceErrorCode::BadRequest, Self::IoError { .. } => ServiceErrorCode::Internal, Self::RateLimited => ServiceErrorCode::TooManyRequests, - Self::Unavailable => ServiceErrorCode::Unavailable, + Self::Unavailable(_) => ServiceErrorCode::Unavailable, } } } @@ -116,8 +121,8 @@ impl GrpcServiceError for IngestServiceError { Self::RateLimited } - fn new_unavailable(_: String) -> Self { - Self::Unavailable + fn new_unavailable(error_msg: String) -> Self { + Self::Unavailable(error_msg) } } @@ -141,7 +146,7 @@ impl From for tonic::Status { IngestServiceError::InvalidPosition(_) => tonic::Code::InvalidArgument, IngestServiceError::IoError { .. } => tonic::Code::Internal, IngestServiceError::RateLimited => tonic::Code::ResourceExhausted, - IngestServiceError::Unavailable => tonic::Code::Unavailable, + IngestServiceError::Unavailable(_) => tonic::Code::Unavailable, }; let message = error.to_string(); tonic::Status::new(code, message) diff --git a/quickwit/quickwit-ingest/src/ingest_v2/broadcast.rs b/quickwit/quickwit-ingest/src/ingest_v2/broadcast.rs index 8feaaa7aa3d..9fa8d8d930a 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/broadcast.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/broadcast.rs @@ -171,7 +171,7 @@ impl BroadcastLocalShardsTask { .shards .iter() .filter_map(|(queue_id, shard)| { - if !shard.is_replica() { + if shard.is_advertisable && !shard.is_replica() { Some((queue_id.clone(), shard.shard_state)) } else { None @@ -479,22 +479,44 @@ mod tests { let mut state_guard = state.lock_partially().await.unwrap(); let index_uid: IndexUid = IndexUid::for_test("test-index", 0); + let queue_id_00 = queue_id(&index_uid, "test-source", &ShardId::from(0)); + let shard_00 = IngesterShard::new_solo( + ShardState::Open, + Position::Beginning, + Position::Beginning, + Instant::now(), + ); + state_guard.shards.insert(queue_id_00.clone(), shard_00); + let queue_id_01 = queue_id(&index_uid, "test-source", &ShardId::from(1)); - let shard = IngesterShard::new_solo( + let mut shard_01 = IngesterShard::new_solo( ShardState::Open, Position::Beginning, Position::Beginning, Instant::now(), ); - state_guard.shards.insert(queue_id_01.clone(), shard); + shard_01.is_advertisable = true; + state_guard.shards.insert(queue_id_01.clone(), shard_01); - let rate_limiter = RateLimiter::from_settings(RateLimiterSettings::default()); - let rate_meter = RateMeter::default(); + let queue_id_02 = queue_id(&index_uid, "test-source", &ShardId::from(2)); + let mut shard_02 = IngesterShard::new_replica( + NodeId::from("test-leader"), + ShardState::Open, + Position::Beginning, + Position::Beginning, + Instant::now(), + ); + shard_02.is_advertisable = true; + state_guard.shards.insert(queue_id_02.clone(), shard_02); - state_guard - .rate_trackers - .insert(queue_id_01.clone(), (rate_limiter, rate_meter)); + for queue_id in [queue_id_00, queue_id_01, queue_id_02] { + let rate_limiter = RateLimiter::from_settings(RateLimiterSettings::default()); + let rate_meter = RateMeter::default(); + state_guard + .rate_trackers + .insert(queue_id, (rate_limiter, rate_meter)); + } drop(state_guard); let new_snapshot = task.snapshot_local_shards().await.unwrap(); diff --git a/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs b/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs index 4ee586be4df..79dac00bb2a 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs @@ -171,7 +171,7 @@ impl FetchStreamTask { }; let batch_size = mrecord_batch.estimate_size(); let fetch_payload = FetchPayload { - index_uid: self.index_uid.clone().into(), + index_uid: Some(self.index_uid.clone()), source_id: self.source_id.clone(), shard_id: Some(self.shard_id.clone()), mrecord_batch: Some(mrecord_batch), @@ -209,7 +209,7 @@ impl FetchStreamTask { let eof_position = to_position_inclusive.as_eof(); let fetch_eof = FetchEof { - index_uid: self.index_uid.clone().into(), + index_uid: Some(self.index_uid.clone()), source_id: self.source_id.clone(), shard_id: Some(self.shard_id.clone()), eof_position: Some(eof_position), @@ -553,7 +553,7 @@ async fn fault_tolerant_fetch_stream( Ok(fetch_message) => match &fetch_message.message { Some(fetch_message::Message::Payload(fetch_payload)) => { let batch_size = fetch_payload.estimate_size(); - let to_position_inclusive = fetch_payload.to_position_inclusive().clone(); + let to_position_inclusive = fetch_payload.to_position_inclusive(); let in_flight_value = InFlightValue::new( fetch_message, batch_size, @@ -566,7 +566,7 @@ async fn fault_tolerant_fetch_stream( *from_position_exclusive = to_position_inclusive; } Some(fetch_message::Message::Eof(fetch_eof)) => { - let eof_position = fetch_eof.eof_position().clone(); + let eof_position = fetch_eof.eof_position(); let in_flight_value = InFlightValue::new( fetch_message, ByteSize(0), diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index f7fc40a3177..a5ec1cb717e 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -94,7 +94,7 @@ const MIN_RESET_SHARDS_INTERVAL: Duration = if cfg!(any(test, feature = "testsui /// Duration after which persist requests time out with /// [`quickwit_proto::ingest::IngestV2Error::Timeout`]. pub(super) const PERSIST_REQUEST_TIMEOUT: Duration = if cfg!(any(test, feature = "testsuite")) { - Duration::from_millis(100) + Duration::from_millis(500) } else { Duration::from_secs(6) }; @@ -472,7 +472,6 @@ impl Ingester { }; return Ok(persist_response); } - // first verify if we would locally accept each subrequest { let mut total_requested_capacity = bytesize::ByteSize::b(0); @@ -491,6 +490,11 @@ impl Ingester { persist_failures.push(persist_failure); continue; }; + // A router can only know about a newly opened shard if it has been informed by the + // control plane, which confirms that the shard was correctly opened in the + // metastore. + shard.is_advertisable = true; + if shard.is_closed() { let persist_failure = PersistFailure { subrequest_id: subrequest.subrequest_id, @@ -597,7 +601,6 @@ impl Ingester { } } } - // replicate to the follower { let mut replicate_futures = FuturesUnordered::new(); @@ -846,17 +849,22 @@ impl Ingester { open_fetch_stream_request: OpenFetchStreamRequest, ) -> IngestV2Result>> { let queue_id = open_fetch_stream_request.queue_id(); - let shard_status_rx = self - .state - .lock_partially() - .await? - .shards - .get(&queue_id) - .ok_or_else(|| IngestV2Error::ShardNotFound { - shard_id: open_fetch_stream_request.shard_id().clone(), - })? - .shard_status_rx - .clone(); + + let mut state_guard = self.state.lock_partially().await?; + + let shard = + state_guard + .shards + .get_mut(&queue_id) + .ok_or_else(|| IngestV2Error::ShardNotFound { + shard_id: open_fetch_stream_request.shard_id().clone(), + })?; + // An indexer can only know about a newly opened shard if it has been scheduled by the + // control plane, which confirms that the shard was correctly opened in the + // metastore. + shard.is_advertisable = true; + + let shard_status_rx = shard.shard_status_rx.clone(); let mrecordlog = self.state.mrecordlog(); let (service_stream, _fetch_task_handle) = FetchStreamTask::spawn( open_fetch_stream_request, @@ -1184,7 +1192,7 @@ impl EventSubscriber for WeakIngesterState { if shard_position.is_eof() { state_guard.delete_shard(&queue_id).await; } else if !shard_position.is_beginning() { - state_guard.truncate_shard(&queue_id, &shard_position).await; + state_guard.truncate_shard(&queue_id, shard_position).await; } } } @@ -1478,6 +1486,7 @@ mod tests { solo_shard_02.assert_is_closed(); solo_shard_02.assert_replication_position(Position::offset(1u64)); solo_shard_02.assert_truncation_position(Position::offset(0u64)); + assert!(solo_shard_02.is_advertisable); state_guard .mrecordlog @@ -1495,21 +1504,32 @@ mod tests { let mut state_guard = ingester.state.lock_fully().await.unwrap(); let index_uid: IndexUid = IndexUid::for_test("test-index", 0); - let queue_id_01 = queue_id(&index_uid, "test-source", &ShardId::from(1)); - let shard = IngesterShard::new_solo( + let queue_id_00 = queue_id(&index_uid, "test-source", &ShardId::from(0)); + let shard_00 = IngesterShard::new_solo( ShardState::Open, Position::Beginning, Position::Beginning, Instant::now(), ); - state_guard.shards.insert(queue_id_01.clone(), shard); - - let rate_limiter = RateLimiter::from_settings(RateLimiterSettings::default()); - let rate_meter = RateMeter::default(); - state_guard - .rate_trackers - .insert(queue_id_01.clone(), (rate_limiter, rate_meter)); + state_guard.shards.insert(queue_id_00.clone(), shard_00); + let queue_id_01 = queue_id(&index_uid, "test-source", &ShardId::from(1)); + let mut shard_01 = IngesterShard::new_solo( + ShardState::Open, + Position::Beginning, + Position::Beginning, + Instant::now(), + ); + shard_01.is_advertisable = true; + state_guard.shards.insert(queue_id_01.clone(), shard_01); + + for queue_id in [&queue_id_00, &queue_id_01] { + let rate_limiter = RateLimiter::from_settings(RateLimiterSettings::default()); + let rate_meter = RateMeter::default(); + state_guard + .rate_trackers + .insert(queue_id.clone(), (rate_limiter, rate_meter)); + } drop(state_guard); tokio::time::sleep(Duration::from_millis(100)).await; @@ -2548,12 +2568,9 @@ mod tests { .await .unwrap(); - state_guard - .shards - .get(&queue_id) - .unwrap() - .notify_shard_status(); - + let shard = state_guard.shards.get(&queue_id).unwrap(); + assert!(shard.is_advertisable); + shard.notify_shard_status(); drop(state_guard); let fetch_response = fetch_stream.next().await.unwrap().unwrap(); diff --git a/quickwit/quickwit-ingest/src/ingest_v2/models.rs b/quickwit/quickwit-ingest/src/ingest_v2/models.rs index 2b9f203880a..aee61f2130e 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/models.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/models.rs @@ -44,6 +44,13 @@ pub(super) struct IngesterShard { pub replication_position_inclusive: Position, /// Position up to which the shard has been truncated. pub truncation_position_inclusive: Position, + /// Whether the shard should be advertised to other nodes (routers) via gossip. + /// + /// Because shards are created in multiple steps, (e.g., init shard on leader, create shard in + /// metastore), we must receive a "signal" from the control plane confirming that a shard + /// was successfully opened before advertising it. Currently, this confirmation comes in the + /// form of `PersistRequest` or `FetchRequest`. + pub is_advertisable: bool, pub shard_status_tx: watch::Sender, pub shard_status_rx: watch::Receiver, /// Instant at which the shard was last written to. @@ -65,6 +72,7 @@ impl IngesterShard { shard_state, replication_position_inclusive, truncation_position_inclusive, + is_advertisable: false, shard_status_tx, shard_status_rx, last_write_instant: now, @@ -85,6 +93,9 @@ impl IngesterShard { shard_state, replication_position_inclusive, truncation_position_inclusive, + // This is irrelevant for replica shards since they are not advertised via gossip + // anyway. + is_advertisable: false, shard_status_tx, shard_status_rx, last_write_instant: now, @@ -104,6 +115,7 @@ impl IngesterShard { shard_state, replication_position_inclusive, truncation_position_inclusive, + is_advertisable: false, shard_status_tx, shard_status_rx, last_write_instant: now, @@ -240,6 +252,7 @@ mod tests { primary_shard.truncation_position_inclusive, Position::Beginning ); + assert!(!primary_shard.is_advertisable); } #[test] @@ -265,6 +278,7 @@ mod tests { replica_shard.truncation_position_inclusive, Position::Beginning ); + assert!(!replica_shard.is_advertisable); } #[test] @@ -286,5 +300,6 @@ mod tests { solo_shard.truncation_position_inclusive, Position::Beginning ); + assert!(!solo_shard.is_advertisable); } } diff --git a/quickwit/quickwit-ingest/src/ingest_v2/mrecordlog_utils.rs b/quickwit/quickwit-ingest/src/ingest_v2/mrecordlog_utils.rs index 1b40e016186..8ca72a420a0 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/mrecordlog_utils.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/mrecordlog_utils.rs @@ -177,6 +177,7 @@ pub(super) fn queue_position_range( mod tests { use super::*; + #[cfg(not(feature = "failpoints"))] #[tokio::test] async fn test_append_non_empty_doc_batch() { let tempdir = tempfile::tempdir().unwrap(); diff --git a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs index 9254d5d8b12..06c6a5b688b 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs @@ -50,7 +50,7 @@ pub(super) const SYN_REPLICATION_STREAM_CAPACITY: usize = 5; /// Duration after which replication requests time out with [`ReplicationError::Timeout`]. const REPLICATION_REQUEST_TIMEOUT: Duration = if cfg!(any(test, feature = "testsuite")) { - Duration::from_millis(100) + Duration::from_millis(250) } else { Duration::from_secs(3) }; @@ -555,7 +555,7 @@ impl ReplicationTask { for subrequest in replicate_request.subrequests { let queue_id = subrequest.queue_id(); - let from_position_exclusive = subrequest.from_position_exclusive().clone(); + let from_position_exclusive = subrequest.from_position_exclusive(); let Some(shard) = state_guard.shards.get(&queue_id) else { let replicate_failure = ReplicateFailure { @@ -1376,6 +1376,7 @@ mod tests { ); } + #[cfg(not(feature = "failpoints"))] #[tokio::test] async fn test_replication_task_deletes_dangling_shard() { let leader_id: NodeId = "test-leader".into(); diff --git a/quickwit/quickwit-ingest/src/ingest_v2/routing_table.rs b/quickwit/quickwit-ingest/src/ingest_v2/routing_table.rs index b2095b6647f..a0351bac59d 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/routing_table.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/routing_table.rs @@ -493,7 +493,7 @@ mod tests { #[test] fn test_routing_table_entry_new() { let self_node_id: NodeId = "test-node-0".into(); - let index_uid: IndexUid = IndexUid::from_parts("test-index", 0); + let index_uid = IndexUid::for_test("test-index", 0); let source_id: SourceId = "test-source".into(); let table_entry = RoutingTableEntry::new( &self_node_id, @@ -557,7 +557,7 @@ mod tests { #[test] fn test_routing_table_entry_has_open_shards() { - let index_uid: IndexUid = IndexUid::from_parts("test-index", 0); + let index_uid = IndexUid::for_test("test-index", 0); let source_id: SourceId = "test-source".into(); let table_entry = RoutingTableEntry::empty(index_uid.clone(), source_id.clone()); @@ -653,7 +653,7 @@ mod tests { #[test] fn test_routing_table_entry_next_open_shard_round_robin() { - let index_uid: IndexUid = IndexUid::from_parts("test-index", 0); + let index_uid = IndexUid::for_test("test-index", 0); let source_id: SourceId = "test-source".into(); let table_entry = RoutingTableEntry::empty(index_uid.clone(), source_id.clone()); let ingester_pool = IngesterPool::default(); @@ -770,7 +770,7 @@ mod tests { #[test] fn test_routing_table_entry_insert_open_shards() { - let index_uid_0: IndexUid = IndexUid::from_parts("test-index", 0); + let index_uid_0 = IndexUid::for_test("test-index", 0); let source_id: SourceId = "test-source".into(); let mut table_entry = RoutingTableEntry::empty(index_uid_0.clone(), source_id.clone()); @@ -847,7 +847,7 @@ mod tests { assert_eq!(table_entry.remote_shards[1].shard_state, ShardState::Closed); // Update index incarnation. - let index_uid_1: IndexUid = IndexUid::from_parts("test-index", 1); + let index_uid_1 = IndexUid::for_test("test-index", 1); table_entry.insert_open_shards( &local_node_id, &local_node_id, @@ -879,7 +879,7 @@ mod tests { #[test] fn test_routing_table_entry_close_shards() { - let index_uid: IndexUid = IndexUid::from_parts("test-index", 0); + let index_uid = IndexUid::for_test("test-index", 0); let source_id: SourceId = "test-source".into(); let mut table_entry = RoutingTableEntry::empty(index_uid.clone(), source_id.clone()); @@ -960,7 +960,7 @@ mod tests { #[test] fn test_routing_table_entry_delete_shards() { - let index_uid: IndexUid = IndexUid::from_parts("test-index", 0); + let index_uid = IndexUid::for_test("test-index", 0); let source_id: SourceId = "test-source".into(); let mut table_entry = RoutingTableEntry::empty(index_uid.clone(), source_id.clone()); diff --git a/quickwit/quickwit-ingest/src/ingest_v2/state.rs b/quickwit/quickwit-ingest/src/ingest_v2/state.rs index df165077069..8cbfe81f5f4 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/state.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/state.rs @@ -181,12 +181,14 @@ impl IngesterState { } else { Position::offset(*position_range.start() - 1) }; - let solo_shard = IngesterShard::new_solo( + let mut solo_shard = IngesterShard::new_solo( ShardState::Closed, replication_position_inclusive, truncation_position_inclusive, now, ); + // We want to advertise the shard as read-only right away. + solo_shard.is_advertisable = true; inner_guard.shards.insert(queue_id.clone(), solo_shard); let rate_limiter = RateLimiter::from_settings(rate_limiter_settings); @@ -355,7 +357,7 @@ impl FullyLockedIngesterState<'_> { pub async fn truncate_shard( &mut self, queue_id: &QueueId, - truncate_up_to_position_inclusive: &Position, + truncate_up_to_position_inclusive: Position, ) { // TODO: Replace with if-let-chains when stabilized. let Some(truncate_up_to_offset_inclusive) = truncate_up_to_position_inclusive.as_u64() @@ -365,7 +367,7 @@ impl FullyLockedIngesterState<'_> { let Some(shard) = self.inner.shards.get_mut(queue_id) else { return; }; - if shard.truncation_position_inclusive >= *truncate_up_to_position_inclusive { + if shard.truncation_position_inclusive >= truncate_up_to_position_inclusive { return; } match self @@ -374,8 +376,8 @@ impl FullyLockedIngesterState<'_> { .await { Ok(_) => { - shard.truncation_position_inclusive = truncate_up_to_position_inclusive.clone(); info!("truncated shard `{queue_id}` at {truncate_up_to_position_inclusive}"); + shard.truncation_position_inclusive = truncate_up_to_position_inclusive; } Err(TruncateError::MissingQueue(_)) => { error!("failed to truncate shard `{queue_id}`: WAL queue not found"); diff --git a/quickwit/quickwit-ingest/src/queue.rs b/quickwit/quickwit-ingest/src/queue.rs index 234073f8c9d..63a6300f010 100644 --- a/quickwit/quickwit-ingest/src/queue.rs +++ b/quickwit/quickwit-ingest/src/queue.rs @@ -41,7 +41,15 @@ pub struct Queues { impl Queues { pub async fn open(queues_dir_path: &Path) -> crate::Result { - tokio::fs::create_dir_all(queues_dir_path).await.unwrap(); + tokio::fs::create_dir_all(queues_dir_path) + .await + .map_err(|error| { + IngestServiceError::IoError(format!( + "failed to create WAL directory `{}`: {}", + queues_dir_path.display(), + error + )) + })?; let record_log = MultiRecordLogAsync::open(queues_dir_path).await?; Ok(Queues { record_log }) } diff --git a/quickwit/quickwit-integration-tests/src/tests/index_tests.rs b/quickwit/quickwit-integration-tests/src/tests/index_tests.rs index 781ce6bae3c..b805f0f8157 100644 --- a/quickwit/quickwit-integration-tests/src/tests/index_tests.rs +++ b/quickwit/quickwit-integration-tests/src/tests/index_tests.rs @@ -20,6 +20,7 @@ use std::collections::HashSet; use std::time::Duration; +use hyper::StatusCode; use quickwit_config::service::QuickwitService; use quickwit_config::ConfigFormat; use quickwit_metastore::SplitState; @@ -299,17 +300,39 @@ async fn test_ingest_v2_happy_path() { .toggle("_ingest-source", true) .await .unwrap(); - sandbox - .indexer_rest_client - .ingest( - "test_index", - ingest_json!({"body": "doc1"}), - None, - None, - CommitType::WaitFor, - ) - .await - .unwrap(); + + // The server have been detected as ready. Unfortunately, they may not have been added + // to the ingester pool yet. + // + // If we get an unavailable error, we retry up to 10 times. + // See #4213 + const MAX_NUM_RETRIES: usize = 10; + for i in 1..=MAX_NUM_RETRIES { + let ingest_res = sandbox + .indexer_rest_client + .ingest( + "test_index", + ingest_json!({"body": "doc1"}), + None, + None, + CommitType::WaitFor, + ) + .await; + let Some(ingest_error) = ingest_res.err() else { + // Success + break; + }; + assert_eq!( + ingest_error.status_code(), + Some(StatusCode::SERVICE_UNAVAILABLE) + ); + assert!( + i < MAX_NUM_RETRIES, + "service not available after {MAX_NUM_RETRIES} tries" + ); + tokio::time::sleep(Duration::from_millis(200)).await; + } + sandbox .wait_for_splits("test_index", Some(vec![SplitState::Published]), 1) .await diff --git a/quickwit/quickwit-lambda/src/indexer/ingest/helpers.rs b/quickwit/quickwit-lambda/src/indexer/ingest/helpers.rs index f35931af591..098cc1841ff 100644 --- a/quickwit/quickwit-lambda/src/indexer/ingest/helpers.rs +++ b/quickwit/quickwit-lambda/src/indexer/ingest/helpers.rs @@ -42,11 +42,12 @@ use quickwit_indexing::IndexingPipeline; use quickwit_ingest::IngesterPool; use quickwit_janitor::{start_janitor_service, JanitorService}; use quickwit_metastore::{ - CreateIndexRequestExt, CreateIndexResponseExt, IndexMetadata, IndexMetadataResponseExt, + AddSourceRequestExt, CreateIndexRequestExt, CreateIndexResponseExt, IndexMetadata, + IndexMetadataResponseExt, }; use quickwit_proto::indexing::CpuCapacity; use quickwit_proto::metastore::{ - CreateIndexRequest, IndexMetadataRequest, MetastoreError, MetastoreService, + AddSourceRequest, CreateIndexRequest, IndexMetadataRequest, MetastoreError, MetastoreService, MetastoreServiceClient, ResetSourceCheckpointRequest, }; use quickwit_proto::types::PipelineUid; @@ -60,7 +61,7 @@ use crate::indexer::environment::{ DISABLE_JANITOR, DISABLE_MERGE, INDEX_CONFIG_URI, MAX_CHECKPOINTS, }; -const LAMBDA_SOURCE_ID: &str = "_ingest-lambda-source"; +const LAMBDA_SOURCE_ID: &str = "ingest-lambda-source"; /// The indexing service needs to update its cluster chitchat state so that the control plane is /// aware of the running tasks. We thus create a fake cluster to instantiate the indexing service @@ -154,29 +155,47 @@ pub(super) async fn configure_source( } /// Check if the index exists, creating or overwriting it if necessary +/// +/// If the index exists but without the Lambda source ([`LAMBDA_SOURCE_ID`]), +/// the source is added. pub(super) async fn init_index_if_necessary( metastore: &mut MetastoreServiceClient, storage_resolver: &StorageResolver, default_index_root_uri: &Uri, overwrite: bool, + source_config: &SourceConfig, ) -> anyhow::Result { let metadata_result = metastore .index_metadata(IndexMetadataRequest::for_index_id(INDEX_ID.clone())) .await; let metadata = match metadata_result { - Ok(_) if overwrite => { - info!( - index_id = *INDEX_ID, - "Overwrite enabled, clearing existing index", - ); - let mut index_service = IndexService::new(metastore.clone(), storage_resolver.clone()); - index_service.clear_index(&INDEX_ID).await?; - metastore - .index_metadata(IndexMetadataRequest::for_index_id(INDEX_ID.clone())) - .await? - .deserialize_index_metadata()? + Ok(metadata_resp) => { + let current_metadata = metadata_resp.deserialize_index_metadata()?; + let mut metadata_changed = false; + if overwrite { + info!(index_uid = %current_metadata.index_uid, "overwrite enabled, clearing existing index"); + let mut index_service = + IndexService::new(metastore.clone(), storage_resolver.clone()); + index_service.clear_index(&INDEX_ID).await?; + metadata_changed = true; + } + if !current_metadata.sources.contains_key(LAMBDA_SOURCE_ID) { + let add_source_request = AddSourceRequest::try_from_source_config( + current_metadata.index_uid.clone(), + source_config, + )?; + metastore.add_source(add_source_request).await?; + metadata_changed = true; + } + if metadata_changed { + metastore + .index_metadata(IndexMetadataRequest::for_index_id(INDEX_ID.clone())) + .await? + .deserialize_index_metadata()? + } else { + current_metadata + } } - Ok(metadata_resp) => metadata_resp.deserialize_index_metadata()?, Err(MetastoreError::NotFound(_)) => { info!( index_id = *INDEX_ID, @@ -191,10 +210,13 @@ pub(super) async fn init_index_if_necessary( index_config.index_id, ); } - let create_resp = metastore - .create_index(CreateIndexRequest::try_from_index_config(&index_config)?) - .await?; - info!("index created"); + let create_index_request = CreateIndexRequest::try_from_index_and_source_configs( + &index_config, + std::slice::from_ref(source_config), + )?; + let create_resp = metastore.create_index(create_index_request).await?; + + info!(index_uid = %create_resp.index_uid(), "index created"); create_resp.deserialize_index_metadata()? } Err(e) => bail!(e), diff --git a/quickwit/quickwit-lambda/src/indexer/ingest/mod.rs b/quickwit/quickwit-lambda/src/indexer/ingest/mod.rs index 31c98ab73f9..6faf495b85f 100644 --- a/quickwit/quickwit-lambda/src/indexer/ingest/mod.rs +++ b/quickwit/quickwit-lambda/src/indexer/ingest/mod.rs @@ -58,17 +58,18 @@ pub async fn ingest(args: IngestArgs) -> anyhow::Result { let (config, storage_resolver, mut metastore) = load_node_config(CONFIGURATION_TEMPLATE).await?; + let source_config = + configure_source(args.input_path, args.input_format, args.vrl_script).await?; + let index_metadata = init_index_if_necessary( &mut metastore, &storage_resolver, &config.default_index_root_uri, args.overwrite, + &source_config, ) .await?; - let source_config = - configure_source(args.input_path, args.input_format, args.vrl_script).await?; - let mut services = vec![QuickwitService::Indexer]; if !*DISABLE_JANITOR { services.push(QuickwitService::Janitor); diff --git a/quickwit/quickwit-lambda/src/searcher/api.rs b/quickwit/quickwit-lambda/src/searcher/api.rs index 44c73d54c12..14a6ee46d36 100644 --- a/quickwit/quickwit-lambda/src/searcher/api.rs +++ b/quickwit/quickwit-lambda/src/searcher/api.rs @@ -88,12 +88,22 @@ fn es_compat_api( .or(es_compat_cat_indices_handler(metastore.clone())) } +fn index_api( + metastore: MetastoreServiceClient, +) -> impl Filter + Clone { + get_index_metadata_handler(metastore) +} + fn v1_searcher_api( search_service: Arc, metastore: MetastoreServiceClient, ) -> impl Filter + Clone { warp::path!("api" / "v1" / ..) - .and(native_api(search_service.clone()).or(es_compat_api(search_service, metastore))) + .and( + native_api(search_service.clone()) + .or(es_compat_api(search_service, metastore.clone())) + .or(index_api(metastore)), + ) .with(warp::filters::compression::gzip()) .recover(|rejection| { error!(?rejection, "request rejected"); diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs index 567ae67918f..2a480e3d716 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs @@ -725,7 +725,6 @@ mod tests { use std::collections::BTreeSet; use quickwit_doc_mapper::tag_pruning::TagFilterAst; - use quickwit_doc_mapper::{BinaryFormat, FieldMappingType}; use quickwit_proto::ingest::Shard; use quickwit_proto::metastore::ListShardsSubrequest; use quickwit_proto::types::{IndexUid, SourceId}; @@ -897,106 +896,4 @@ mod tests { assert!(!split_query_predicate(&&split_2, &query)); assert!(!split_query_predicate(&&split_3, &query)); } - - #[test] - fn test_index_otel_bytes_fields_format_conversion() { - // TODO: remove after 0.8 release. - let index_json_str = r#" - { - "version": "0.6", - "splits": [], - "index": { - "version": "0.6", - "sources": [], - "index_uid": "otel-traces-v0_6:00000000000000000000000000", - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "version": "0.6", - "index_id": "otel-traces-v0_6", - "index_uri": "s3://otel-traces-v0_6", - "doc_mapping": { - "field_mappings": [ - { - "name": "timestamp", - "type": "datetime", - "fast": true - }, - { - "name": "tenant_id", - "type": "bytes", - "fast": true, - "input_format": "base64", - "output_format": "base64" - }, - { - "name": "trace_id", - "type": "bytes", - "fast": true, - "input_format": "base64", - "output_format": "base64" - }, - { - "name": "span_id", - "type": "bytes", - "fast": true, - "input_format": "base64", - "output_format": "base64" - } - ], - "tag_fields": [], - "timestamp_field": "timestamp", - "store_source": false - } - } - } - } - "#; - - let file_backed_index: FileBackedIndex = serde_json::from_str(index_json_str).unwrap(); - let field_mapping = file_backed_index - .metadata - .index_config - .doc_mapping - .field_mappings; - assert_eq!( - field_mapping - .iter() - .filter(|field_mapping| field_mapping.name == "tenant_id") - .count(), - 1 - ); - assert_eq!( - field_mapping - .iter() - .filter(|field_mapping| field_mapping.name == "trace_id") - .count(), - 1 - ); - assert_eq!( - field_mapping - .iter() - .filter(|field_mapping| field_mapping.name == "span_id") - .count(), - 1 - ); - for field_mapping in &field_mapping { - if field_mapping.name == "tenant_id" { - if let FieldMappingType::Bytes(bytes_options, _) = &field_mapping.mapping_type { - assert_eq!(bytes_options.input_format, BinaryFormat::Base64); - assert_eq!(bytes_options.output_format, BinaryFormat::Base64); - } - } - if field_mapping.name == "trace_id" || field_mapping.name == "span_id" { - if let FieldMappingType::Bytes(bytes_options, _) = &field_mapping.mapping_type { - assert_eq!(bytes_options.input_format, BinaryFormat::Hex); - assert_eq!(bytes_options.output_format, BinaryFormat::Hex); - } - } - } - } } diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/serialize.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/serialize.rs index f8ecf803200..afceae2ebf5 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/serialize.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/serialize.rs @@ -20,7 +20,6 @@ use std::collections::HashMap; use itertools::Itertools; -use quickwit_doc_mapper::{BinaryFormat, FieldMappingType}; use quickwit_proto::ingest::Shard; use quickwit_proto::metastore::SourceType; use quickwit_proto::types::SourceId; @@ -37,9 +36,6 @@ pub(crate) enum VersionedFileBackedIndex { #[serde(rename = "0.8")] // Retro compatibility. #[serde(alias = "0.7")] - #[serde(alias = "0.6")] - #[serde(alias = "0.5")] - #[serde(alias = "0.4")] V0_8(FileBackedIndexV0_8), } @@ -106,35 +102,7 @@ impl From for FileBackedIndexV0_8 { } impl From for FileBackedIndex { - fn from(mut index: FileBackedIndexV0_8) -> Self { - // if the index is otel-traces-v0_6, convert set bytes fields input and output format to hex - // to be compatible with the v0_6 version. - // TODO: remove after 0.8 release. - if index.metadata.index_id() == "otel-traces-v0_6" { - index - .metadata - .index_config - .doc_mapping - .field_mappings - .iter_mut() - .filter(|field_mapping| { - field_mapping.name == "trace_id" || field_mapping.name == "span_id" - }) - .for_each(|field_mapping| { - if let FieldMappingType::Bytes(bytes_options, _) = - &mut field_mapping.mapping_type - { - bytes_options.input_format = BinaryFormat::Hex; - bytes_options.output_format = BinaryFormat::Hex; - } - }); - } - // Override split index_id to support old SplitMetadata format. - for split in index.splits.iter_mut() { - if split.split_metadata.index_uid.is_empty() { - split.split_metadata.index_uid = index.metadata.index_uid.clone(); - } - } + fn from(index: FileBackedIndexV0_8) -> Self { let mut per_source_shards: HashMap = index .shards .into_iter() diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/shards.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/shards.rs index ba43cbc995d..3f88cc382a7 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/shards.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/shards.rs @@ -76,7 +76,7 @@ impl Shards { for shard in shards_vec { let shard_id = shard.shard_id().clone(); let partition_id = PartitionId::from(shard_id.as_str()); - let position = shard.publish_position_inclusive().clone(); + let position = shard.publish_position_inclusive(); checkpoint.add_partition(partition_id, position); shards.insert(shard_id, shard); } diff --git a/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs b/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs index 36db32f24e7..f3e59fe90b6 100644 --- a/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs @@ -20,20 +20,17 @@ pub(crate) mod serialize; use std::collections::hash_map::Entry; -use std::collections::{BTreeMap, HashMap}; +use std::collections::HashMap; use quickwit_common::uri::Uri; -use quickwit_config::{ - IndexConfig, RetentionPolicy, SearchSettings, SourceConfig, TestableForRegression, -}; +use quickwit_config::{IndexConfig, RetentionPolicy, SearchSettings, SourceConfig}; use quickwit_proto::metastore::{EntityKind, MetastoreError, MetastoreResult}; -use quickwit_proto::types::{IndexUid, Position, SourceId}; +use quickwit_proto::types::{IndexUid, SourceId}; use serde::{Deserialize, Serialize}; use serialize::VersionedIndexMetadata; use time::OffsetDateTime; -use ulid::Ulid; -use crate::checkpoint::{IndexCheckpoint, PartitionId, SourceCheckpoint, SourceCheckpointDelta}; +use crate::checkpoint::IndexCheckpoint; /// An index metadata carries all meta data about an index. #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] @@ -75,7 +72,7 @@ impl IndexMetadata { /// An incarnation id of `0` will be used to complete the index id into a index uuid. #[cfg(any(test, feature = "testsuite"))] pub fn for_test(index_id: &str, index_uri: &str) -> Self { - let index_uid = IndexUid::from_parts(index_id, 0); + let index_uid = IndexUid::for_test(index_id, 0); let mut index_metadata = IndexMetadata::new(IndexConfig::for_test(index_id, index_uri)); index_metadata.index_uid = index_uid; index_metadata @@ -161,8 +158,15 @@ impl IndexMetadata { } } -impl TestableForRegression for IndexMetadata { +#[cfg(any(test, feature = "testsuite"))] +impl quickwit_config::TestableForRegression for IndexMetadata { fn sample_for_regression() -> IndexMetadata { + use std::collections::BTreeMap; + + use quickwit_proto::types::Position; + + use crate::checkpoint::{PartitionId, SourceCheckpoint, SourceCheckpointDelta}; + let mut source_checkpoint = SourceCheckpoint::default(); let delta = SourceCheckpointDelta::from_partition_delta( PartitionId::from(0i64), @@ -176,7 +180,7 @@ impl TestableForRegression for IndexMetadata { let checkpoint = IndexCheckpoint::from(per_source_checkpoint); let index_config = IndexConfig::sample_for_regression(); let mut index_metadata = IndexMetadata { - index_uid: IndexUid::from_parts(&index_config.index_id, Ulid::nil()), + index_uid: IndexUid::for_test(&index_config.index_id, 0), index_config, checkpoint, create_timestamp: 1789, diff --git a/quickwit/quickwit-metastore/src/metastore/index_metadata/serialize.rs b/quickwit/quickwit-metastore/src/metastore/index_metadata/serialize.rs index 88c89a85c39..2834c719e33 100644 --- a/quickwit/quickwit-metastore/src/metastore/index_metadata/serialize.rs +++ b/quickwit/quickwit-metastore/src/metastore/index_metadata/serialize.rs @@ -33,9 +33,6 @@ pub(crate) enum VersionedIndexMetadata { #[serde(rename = "0.8")] // Retro compatibility. #[serde(alias = "0.7")] - #[serde(alias = "0.6")] - #[serde(alias = "0.5")] - #[serde(alias = "0.4")] V0_8(IndexMetadataV0_8), } @@ -73,8 +70,6 @@ impl From for IndexMetadataV0_8 { #[derive(Clone, Debug, Serialize, Deserialize, utoipa::ToSchema)] pub(crate) struct IndexMetadataV0_8 { #[schema(value_type = String)] - // Defaults to nil for backward compatibility. - #[serde(default, alias = "index_id")] pub index_uid: IndexUid, #[schema(value_type = VersionedIndexConfig)] pub index_config: IndexConfig, @@ -98,11 +93,7 @@ impl TryFrom for IndexMetadata { sources.insert(source.source_id.clone(), source); } Ok(Self { - index_uid: if v0_8.index_uid.is_empty() { - IndexUid::from_parts(&v0_8.index_config.index_id, 0) - } else { - v0_8.index_uid - }, + index_uid: v0_8.index_uid, index_config: v0_8.index_config, checkpoint: v0_8.checkpoint, create_timestamp: v0_8.create_timestamp, diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs b/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs index 89e887c5e65..c6fcfb45ada 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs @@ -416,7 +416,7 @@ impl MetastoreService for PostgresqlMetastore { &mut self, request: IndexMetadataRequest, ) -> MetastoreResult { - let response = if let Some(index_uid) = &request.index_uid { + let pg_index_opt = if let Some(index_uid) = &request.index_uid { index_opt_for_uid(&self.connection_pool, index_uid.clone()).await? } else if let Some(index_id) = &request.index_id { index_opt(&self.connection_pool, index_id).await? @@ -427,9 +427,11 @@ impl MetastoreService for PostgresqlMetastore { cause: "".to_string(), }); }; - let index_metadata = response + let index_metadata = pg_index_opt .ok_or(MetastoreError::NotFound(EntityKind::Index { - index_id: request.index_id.expect("`index_id` should be set"), + index_id: request + .into_index_id() + .expect("`index_id` or `index_uid` should be set"), }))? .index_metadata()?; let response = IndexMetadataResponse::try_from_index_metadata(&index_metadata)?; diff --git a/quickwit/quickwit-metastore/src/split_metadata.rs b/quickwit/quickwit-metastore/src/split_metadata.rs index c67fec6f68e..8b2db8be2be 100644 --- a/quickwit/quickwit-metastore/src/split_metadata.rs +++ b/quickwit/quickwit-metastore/src/split_metadata.rs @@ -264,11 +264,9 @@ pub struct SplitInfo { #[cfg(any(test, feature = "testsuite"))] impl quickwit_config::TestableForRegression for SplitMetadata { fn sample_for_regression() -> Self { - use ulid::Ulid; - SplitMetadata { split_id: "split".to_string(), - index_uid: IndexUid::from_parts("my-index", Ulid::nil()), + index_uid: IndexUid::for_test("my-index", 0), source_id: "source".to_string(), node_id: "node".to_string(), delete_opstamp: 10, @@ -401,10 +399,7 @@ mod tests { fn test_split_metadata_debug() { let split_metadata = SplitMetadata { split_id: "split-1".to_string(), - index_uid: IndexUid::from_parts( - "00000000-0000-0000-0000-000000000000", - ulid::Ulid::nil(), - ), + index_uid: IndexUid::for_test("00000000-0000-0000-0000-000000000000", 0), partition_id: 0, source_id: "source-1".to_string(), node_id: "node-1".to_string(), diff --git a/quickwit/quickwit-metastore/src/split_metadata_version.rs b/quickwit/quickwit-metastore/src/split_metadata_version.rs index f7ebbfdf530..5b98cdb9e4b 100644 --- a/quickwit/quickwit-metastore/src/split_metadata_version.rs +++ b/quickwit/quickwit-metastore/src/split_metadata_version.rs @@ -158,9 +158,6 @@ pub(crate) enum VersionedSplitMetadata { #[serde(rename = "0.8")] // Retro compatibility. #[serde(alias = "0.7")] - #[serde(alias = "0.6")] - #[serde(alias = "0.5")] - #[serde(alias = "0.4")] V0_8(SplitMetadataV0_8), } diff --git a/quickwit/quickwit-metastore/src/tests/delete_task.rs b/quickwit/quickwit-metastore/src/tests/delete_task.rs index 86bfed90e28..1b4091a3dfb 100644 --- a/quickwit/quickwit-metastore/src/tests/delete_task.rs +++ b/quickwit/quickwit-metastore/src/tests/delete_task.rs @@ -67,7 +67,7 @@ pub async fn test_metastore_create_delete_task< // Create a delete task on an index with wrong incarnation_id let error = metastore .create_delete_task(DeleteQuery { - index_uid: Some(IndexUid::from_parts(&index_id, 12345)), + index_uid: Some(IndexUid::for_test(&index_id, 12345)), ..delete_query.clone() }) .await diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json deleted file mode 100644 index f8f5b4e6f01..00000000000 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json +++ /dev/null @@ -1,180 +0,0 @@ -{ - "delete_tasks": [ - { - "create_timestamp": 0, - "delete_query": { - "index_uid": "my-index:00000000000000000000000000", - "query_ast": "{\"type\":\"bool\",\"must\":[{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Harry\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}},{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Potter\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}}]}" - }, - "opstamp": 10 - } - ], - "index": { - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "document_length": false, - "dynamic_mapping": { - "expand_dots": true, - "fast": { - "normalizer": "raw" - }, - "indexed": true, - "record": "basic", - "stored": true, - "tokenizer": "raw" - }, - "field_mappings": [ - { - "coerce": true, - "fast": true, - "indexed": true, - "name": "tenant_id", - "output_format": "number", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "fast_precision": "seconds", - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "record": "basic", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "index_field_presence": true, - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant_id", - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp", - "tokenizers": [ - { - "filters": [], - "name": "custom_tokenizer", - "pattern": "[^\\p{L}\\p{N}]+", - "type": "regex" - } - ] - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": "50.0 MB" - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.8" - }, - "index_uid": "my-index:00000000000000000000000000", - "sources": [ - { - "enabled": true, - "input_format": "json", - "num_pipelines": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))", - "timezone": "UTC" - }, - "version": "0.8" - } - ], - "version": "0.8" - }, - "splits": [ - { - "create_timestamp": 3, - "delete_opstamp": 10, - "footer_offsets": { - "end": 2000, - "start": 1000 - }, - "index_uid": "my-index:00000000000000000000000000", - "maturity": { - "maturation_period_millis": 4000, - "type": "immature" - }, - "node_id": "node", - "num_docs": 12303, - "num_merge_ops": 3, - "partition_id": 7, - "publish_timestamp": 1789, - "source_id": "source", - "split_id": "split", - "split_state": "Published", - "tags": [ - "234", - "aaa" - ], - "time_range": { - "end": 130198, - "start": 121000 - }, - "uncompressed_docs_size_in_bytes": 234234, - "update_timestamp": 1789, - "version": "0.8" - } - ], - "version": "0.8" -} diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json deleted file mode 100644 index 51cb6ecd3e8..00000000000 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json +++ /dev/null @@ -1,180 +0,0 @@ -{ - "delete_tasks": [ - { - "create_timestamp": 0, - "delete_query": { - "index_uid": "my-index:00000000000000000000000000", - "query_ast": "{\"type\":\"bool\",\"must\":[{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Harry\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}},{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Potter\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}}]}" - }, - "opstamp": 10 - } - ], - "index": { - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "dynamic_mapping": { - "expand_dots": true, - "fast": { - "normalizer": "raw" - }, - "indexed": true, - "record": "basic", - "stored": true, - "tokenizer": "raw" - }, - "field_mappings": [ - { - "coerce": true, - "fast": true, - "indexed": true, - "name": "tenant_id", - "output_format": "number", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "fast_precision": "seconds", - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "record": "basic", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "index_field_presence": true, - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant_id", - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp", - "tokenizers": [ - { - "filters": [], - "name": "custom_tokenizer", - "pattern": "[^\\p{L}\\p{N}]+", - "type": "regex" - } - ] - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": "50.0 MB" - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.6" - }, - "index_uid": "my-index:00000000000000000000000000", - "sources": [ - { - "desired_num_pipelines": 2, - "enabled": true, - "input_format": "json", - "max_num_pipelines_per_indexer": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))", - "timezone": "UTC" - }, - "version": "0.6" - } - ], - "version": "0.6" - }, - "splits": [ - { - "create_timestamp": 3, - "delete_opstamp": 10, - "footer_offsets": { - "end": 2000, - "start": 1000 - }, - "index_uid": "my-index:00000000000000000000000000", - "maturity": { - "maturation_period_millis": 4000, - "type": "immature" - }, - "node_id": "node", - "num_docs": 12303, - "num_merge_ops": 3, - "partition_id": 7, - "publish_timestamp": 1789, - "source_id": "source", - "split_id": "split", - "split_state": "Published", - "tags": [ - "234", - "aaa" - ], - "time_range": { - "end": 130198, - "start": 121000 - }, - "uncompressed_docs_size_in_bytes": 234234, - "update_timestamp": 1789, - "version": "0.6" - } - ], - "version": "0.6" -} diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.7.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.7.expected.json index 643b3b8e4fe..147abe615a6 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.7.expected.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.7.expected.json @@ -18,7 +18,6 @@ "create_timestamp": 1789, "index_config": { "doc_mapping": { - "document_length": false, "dynamic_mapping": { "expand_dots": true, "fast": { @@ -77,6 +76,7 @@ "max_num_partitions": 100, "mode": "dynamic", "partition_key": "tenant_id", + "store_document_size": false, "store_source": true, "tag_fields": [ "log_level", diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.8.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.8.expected.json index 643b3b8e4fe..147abe615a6 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.8.expected.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.8.expected.json @@ -18,7 +18,6 @@ "create_timestamp": 1789, "index_config": { "doc_mapping": { - "document_length": false, "dynamic_mapping": { "expand_dots": true, "fast": { @@ -77,6 +76,7 @@ "max_num_partitions": 100, "mode": "dynamic", "partition_key": "tenant_id", + "store_document_size": false, "store_source": true, "tag_fields": [ "log_level", diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.8.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.8.json index 643b3b8e4fe..147abe615a6 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.8.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.8.json @@ -18,7 +18,6 @@ "create_timestamp": 1789, "index_config": { "doc_mapping": { - "document_length": false, "dynamic_mapping": { "expand_dots": true, "fast": { @@ -77,6 +76,7 @@ "max_num_partitions": 100, "mode": "dynamic", "partition_key": "tenant_id", + "store_document_size": false, "store_source": true, "tag_fields": [ "log_level", diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json deleted file mode 100644 index 574dc5d0324..00000000000 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json +++ /dev/null @@ -1,126 +0,0 @@ -{ - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "document_length": false, - "dynamic_mapping": { - "expand_dots": true, - "fast": { - "normalizer": "raw" - }, - "indexed": true, - "record": "basic", - "stored": true, - "tokenizer": "raw" - }, - "field_mappings": [ - { - "coerce": true, - "fast": true, - "indexed": true, - "name": "tenant_id", - "output_format": "number", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "fast_precision": "seconds", - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "record": "basic", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "index_field_presence": false, - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant", - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp", - "tokenizers": [] - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": "50.0 MB" - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.8" - }, - "index_uid": "my-index:00000000000000000000000000", - "sources": [ - { - "enabled": true, - "input_format": "json", - "num_pipelines": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))", - "timezone": "UTC" - }, - "version": "0.8" - } - ], - "version": "0.8" -} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.json deleted file mode 100644 index a6316834104..00000000000 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.json +++ /dev/null @@ -1,108 +0,0 @@ -{ - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "field_mappings": [ - { - "fast": true, - "indexed": true, - "name": "tenant_id", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "fast_precision": "seconds", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant", - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp" - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": 50000000 - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.4" - }, - "sources": [ - { - "desired_num_pipelines": 2, - "enabled": true, - "max_num_pipelines_per_indexer": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))" - }, - "version": "0.4" - } - ], - "version": "0.4" -} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json deleted file mode 100644 index 574dc5d0324..00000000000 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json +++ /dev/null @@ -1,126 +0,0 @@ -{ - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "document_length": false, - "dynamic_mapping": { - "expand_dots": true, - "fast": { - "normalizer": "raw" - }, - "indexed": true, - "record": "basic", - "stored": true, - "tokenizer": "raw" - }, - "field_mappings": [ - { - "coerce": true, - "fast": true, - "indexed": true, - "name": "tenant_id", - "output_format": "number", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "fast_precision": "seconds", - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "record": "basic", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "index_field_presence": false, - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant", - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp", - "tokenizers": [] - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": "50.0 MB" - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.8" - }, - "index_uid": "my-index:00000000000000000000000000", - "sources": [ - { - "enabled": true, - "input_format": "json", - "num_pipelines": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))", - "timezone": "UTC" - }, - "version": "0.8" - } - ], - "version": "0.8" -} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.json deleted file mode 100644 index 66233c904d7..00000000000 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.json +++ /dev/null @@ -1,108 +0,0 @@ -{ - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "field_mappings": [ - { - "fast": true, - "indexed": true, - "name": "tenant_id", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "fast_precision": "seconds", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant", - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp" - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": 50000000 - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.5" - }, - "sources": [ - { - "desired_num_pipelines": 2, - "enabled": true, - "max_num_pipelines_per_indexer": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))" - }, - "version": "0.5" - } - ], - "version": "0.5" -} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json deleted file mode 100644 index 52865f45aa2..00000000000 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json +++ /dev/null @@ -1,133 +0,0 @@ -{ - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "document_length": false, - "dynamic_mapping": { - "expand_dots": true, - "fast": { - "normalizer": "raw" - }, - "indexed": true, - "record": "basic", - "stored": true, - "tokenizer": "raw" - }, - "field_mappings": [ - { - "coerce": true, - "fast": true, - "indexed": true, - "name": "tenant_id", - "output_format": "number", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "fast_precision": "seconds", - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "record": "basic", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "index_field_presence": true, - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant_id", - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp", - "tokenizers": [ - { - "filters": [], - "name": "custom_tokenizer", - "pattern": "[^\\p{L}\\p{N}]+", - "type": "regex" - } - ] - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": "50.0 MB" - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.8" - }, - "index_uid": "my-index:00000000000000000000000000", - "sources": [ - { - "enabled": true, - "input_format": "json", - "num_pipelines": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))", - "timezone": "UTC" - }, - "version": "0.8" - } - ], - "version": "0.8" -} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json deleted file mode 100644 index f6522a1ba38..00000000000 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json +++ /dev/null @@ -1,133 +0,0 @@ -{ - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "dynamic_mapping": { - "expand_dots": true, - "fast": { - "normalizer": "raw" - }, - "indexed": true, - "record": "basic", - "stored": true, - "tokenizer": "raw" - }, - "field_mappings": [ - { - "coerce": true, - "fast": true, - "indexed": true, - "name": "tenant_id", - "output_format": "number", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "fast_precision": "seconds", - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "record": "basic", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "index_field_presence": true, - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant_id", - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp", - "tokenizers": [ - { - "filters": [], - "name": "custom_tokenizer", - "pattern": "[^\\p{L}\\p{N}]+", - "type": "regex" - } - ] - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": "50.0 MB" - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.6" - }, - "index_uid": "my-index:00000000000000000000000000", - "sources": [ - { - "desired_num_pipelines": 2, - "enabled": true, - "input_format": "json", - "max_num_pipelines_per_indexer": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))", - "timezone": "UTC" - }, - "version": "0.6" - } - ], - "version": "0.6" -} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.7.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.7.expected.json index 52865f45aa2..e0c572da32f 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.7.expected.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.7.expected.json @@ -7,7 +7,6 @@ "create_timestamp": 1789, "index_config": { "doc_mapping": { - "document_length": false, "dynamic_mapping": { "expand_dots": true, "fast": { @@ -66,6 +65,7 @@ "max_num_partitions": 100, "mode": "dynamic", "partition_key": "tenant_id", + "store_document_size": false, "store_source": true, "tag_fields": [ "log_level", diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.8.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.8.expected.json index 52865f45aa2..e0c572da32f 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.8.expected.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.8.expected.json @@ -7,7 +7,6 @@ "create_timestamp": 1789, "index_config": { "doc_mapping": { - "document_length": false, "dynamic_mapping": { "expand_dots": true, "fast": { @@ -66,6 +65,7 @@ "max_num_partitions": 100, "mode": "dynamic", "partition_key": "tenant_id", + "store_document_size": false, "store_source": true, "tag_fields": [ "log_level", diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.8.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.8.json index 52865f45aa2..e0c572da32f 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.8.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.8.json @@ -7,7 +7,6 @@ "create_timestamp": 1789, "index_config": { "doc_mapping": { - "document_length": false, "dynamic_mapping": { "expand_dots": true, "fast": { @@ -66,6 +65,7 @@ "max_num_partitions": 100, "mode": "dynamic", "partition_key": "tenant_id", + "store_document_size": false, "store_source": true, "tag_fields": [ "log_level", diff --git a/quickwit/quickwit-metastore/test-data/manifest/v0.7.expected.json b/quickwit/quickwit-metastore/test-data/manifest/v0.7.expected.json index d47e828440d..a951508ba44 100644 --- a/quickwit/quickwit-metastore/test-data/manifest/v0.7.expected.json +++ b/quickwit/quickwit-metastore/test-data/manifest/v0.7.expected.json @@ -8,7 +8,6 @@ { "description": "Test description.", "doc_mapping": { - "document_length": false, "dynamic_mapping": { "expand_dots": true, "fast": { @@ -47,6 +46,7 @@ "index_field_presence": false, "max_num_partitions": 200, "mode": "dynamic", + "store_document_size": false, "store_source": false, "tag_fields": [], "timestamp_field": "ts", diff --git a/quickwit/quickwit-metastore/test-data/manifest/v0.8.expected.json b/quickwit/quickwit-metastore/test-data/manifest/v0.8.expected.json index d47e828440d..a951508ba44 100644 --- a/quickwit/quickwit-metastore/test-data/manifest/v0.8.expected.json +++ b/quickwit/quickwit-metastore/test-data/manifest/v0.8.expected.json @@ -8,7 +8,6 @@ { "description": "Test description.", "doc_mapping": { - "document_length": false, "dynamic_mapping": { "expand_dots": true, "fast": { @@ -47,6 +46,7 @@ "index_field_presence": false, "max_num_partitions": 200, "mode": "dynamic", + "store_document_size": false, "store_source": false, "tag_fields": [], "timestamp_field": "ts", diff --git a/quickwit/quickwit-metastore/test-data/manifest/v0.8.json b/quickwit/quickwit-metastore/test-data/manifest/v0.8.json index d47e828440d..a951508ba44 100644 --- a/quickwit/quickwit-metastore/test-data/manifest/v0.8.json +++ b/quickwit/quickwit-metastore/test-data/manifest/v0.8.json @@ -8,7 +8,6 @@ { "description": "Test description.", "doc_mapping": { - "document_length": false, "dynamic_mapping": { "expand_dots": true, "fast": { @@ -47,6 +46,7 @@ "index_field_presence": false, "max_num_partitions": 200, "mode": "dynamic", + "store_document_size": false, "store_source": false, "tag_fields": [], "timestamp_field": "ts", diff --git a/quickwit/quickwit-metastore/test-data/split-metadata/v0.6.expected.json b/quickwit/quickwit-metastore/test-data/split-metadata/v0.6.expected.json deleted file mode 100644 index d6475bd3525..00000000000 --- a/quickwit/quickwit-metastore/test-data/split-metadata/v0.6.expected.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "create_timestamp": 3, - "delete_opstamp": 10, - "footer_offsets": { - "end": 2000, - "start": 1000 - }, - "index_uid": "my-index:00000000000000000000000000", - "maturity": { - "maturation_period_millis": 4000, - "type": "immature" - }, - "node_id": "node", - "num_docs": 12303, - "num_merge_ops": 3, - "partition_id": 7, - "source_id": "source", - "split_id": "split", - "tags": [ - "234", - "aaa" - ], - "time_range": { - "end": 130198, - "start": 121000 - }, - "uncompressed_docs_size_in_bytes": 234234, - "version": "0.8" -} diff --git a/quickwit/quickwit-metastore/test-data/split-metadata/v0.6.json b/quickwit/quickwit-metastore/test-data/split-metadata/v0.6.json deleted file mode 100644 index 9c1f6311c86..00000000000 --- a/quickwit/quickwit-metastore/test-data/split-metadata/v0.6.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "create_timestamp": 3, - "delete_opstamp": 10, - "footer_offsets": { - "end": 2000, - "start": 1000 - }, - "index_uid": "my-index:00000000000000000000000000", - "maturity": { - "maturation_period_millis": 4000, - "type": "immature" - }, - "node_id": "node", - "num_docs": 12303, - "num_merge_ops": 3, - "partition_id": 7, - "source_id": "source", - "split_id": "split", - "tags": [ - "234", - "aaa" - ], - "time_range": { - "end": 130198, - "start": 121000 - }, - "uncompressed_docs_size_in_bytes": 234234, - "version": "0.6" -} diff --git a/quickwit/quickwit-proto/src/control_plane/mod.rs b/quickwit/quickwit-proto/src/control_plane/mod.rs index 7a90f3d991d..b2aae066164 100644 --- a/quickwit/quickwit-proto/src/control_plane/mod.rs +++ b/quickwit/quickwit-proto/src/control_plane/mod.rs @@ -21,7 +21,7 @@ use quickwit_actors::AskError; use quickwit_common::tower::{MakeLoadShedError, RpcName}; use thiserror; -use crate::metastore::MetastoreError; +use crate::metastore::{MetastoreError, OpenShardSubrequest}; use crate::{GrpcServiceError, ServiceError, ServiceErrorCode}; include!("../codegen/quickwit/quickwit.control_plane.rs"); @@ -125,3 +125,31 @@ impl RpcName for AdviseResetShardsRequest { "advise_reset_shards" } } + +impl GetOrCreateOpenShardsFailureReason { + pub fn create_failure( + &self, + subrequest: impl Into, + ) -> GetOrCreateOpenShardsFailure { + let sub_request = subrequest.into(); + GetOrCreateOpenShardsFailure { + subrequest_id: sub_request.subrequest_id, + index_id: sub_request.index_id, + source_id: sub_request.source_id, + reason: *self as i32, + } + } +} + +impl From for GetOrCreateOpenShardsSubrequest { + fn from(metastore_open_shard_sub_request: OpenShardSubrequest) -> Self { + GetOrCreateOpenShardsSubrequest { + subrequest_id: metastore_open_shard_sub_request.subrequest_id, + index_id: metastore_open_shard_sub_request + .index_uid() + .index_id + .clone(), + source_id: metastore_open_shard_sub_request.source_id, + } + } +} diff --git a/quickwit/quickwit-proto/src/getters.rs b/quickwit/quickwit-proto/src/getters.rs index 0a2c3d88217..1cf2a06a3ac 100644 --- a/quickwit/quickwit-proto/src/getters.rs +++ b/quickwit/quickwit-proto/src/getters.rs @@ -6,13 +6,13 @@ use crate::ingest::*; use crate::metastore::*; use crate::types::*; -macro_rules! generate_getters{ +macro_rules! generate_getters { (impl fn $field:ident() -> $type:ty {} for $($struct:ty),+) => { $( impl $struct { // we track caller so the reported line isn't the macro invocation below #[track_caller] - pub fn $field(&self) -> &$type { + pub fn $field(&self) -> $type { self.$field .as_ref() .expect(concat!("`", @@ -23,8 +23,42 @@ macro_rules! generate_getters{ } } +macro_rules! generate_clone_getters { + (impl fn $field:ident() -> $type:ty {} for $($struct:ty),+) => { + $( + impl $struct { + // we track caller so the reported line isn't the macro invocation below + #[track_caller] + pub fn $field(&self) -> $type { + self.$field + .clone() + .expect(concat!("`", + stringify!($field), "` should be a required field")) + } + } + )* + } +} + +macro_rules! generate_copy_getters { + (impl fn $field:ident() -> $type:ty {} for $($struct:ty),+) => { + $( + impl $struct { + // we track caller so the reported line isn't the macro invocation below + #[track_caller] + pub fn $field(&self) -> $type { + self.$field + .expect(concat!("`", + stringify!($field), "` should be a required field")) + } + } + )* + } +} + +// [`IndexUid`] getters generate_getters! { - impl fn index_uid() -> IndexUid {} for + impl fn index_uid() -> &IndexUid {} for // Control Plane API GetOrCreateOpenShardsSuccess, @@ -44,6 +78,7 @@ generate_getters! { ReplicateSuccess, RetainShardsForSource, Shard, + ShardIdPositions, ShardIds, ShardPKey, TruncateShardsSubrequest, @@ -73,23 +108,84 @@ generate_getters! { UpdateSplitsDeleteOpstampRequest } -generate_getters! { - impl fn shard() -> Shard {} for +// [`PipelineUid`] getters +generate_copy_getters! { + impl fn pipeline_uid() -> PipelineUid {} for - InitShardSubrequest, - InitShardSuccess + IndexingTask } +// [`Position`] getters. We use `clone` because `Position` is an `Arc` under the hood. +generate_clone_getters! { + impl fn eof_position() -> Position {} for + + FetchEof +} + +generate_clone_getters! { + impl fn from_position_exclusive() -> Position {} for + + FetchPayload, + OpenFetchStreamRequest, + ReplicateSubrequest +} + +generate_clone_getters! { + impl fn to_position_inclusive() -> Position {} for + + FetchPayload +} + +generate_clone_getters! { + impl fn publish_position_inclusive() -> Position {} for + + Shard, + ShardIdPosition +} + +generate_clone_getters! { + impl fn replication_position_inclusive() -> Position {} for + + ReplicateSuccess +} + +generate_clone_getters! { + impl fn truncate_up_to_position_inclusive() -> Position {} for + + TruncateShardsSubrequest +} + +// [`Shard`] getters generate_getters! { - impl fn open_shard() -> Shard {} for + impl fn open_shard() -> &Shard {} for OpenShardSubresponse } generate_getters! { - impl fn shard_id() -> ShardId {} for + impl fn shard() -> &Shard {} for + InitShardSubrequest, + InitShardSuccess +} + +// [`ShardId`] getters +generate_getters! { + impl fn shard_id() -> &ShardId {} for + + FetchEof, + FetchPayload, InitShardFailure, + OpenFetchStreamRequest, OpenShardSubrequest, - ShardPKey + PersistFailure, + PersistSubrequest, + PersistSuccess, + ReplicateFailure, + ReplicateSubrequest, + ReplicateSuccess, + Shard, + ShardIdPosition, + ShardPKey, + TruncateShardsSubrequest } diff --git a/quickwit/quickwit-proto/src/indexing/mod.rs b/quickwit/quickwit-proto/src/indexing/mod.rs index 12aa94f16f6..b79311f21fd 100644 --- a/quickwit/quickwit-proto/src/indexing/mod.rs +++ b/quickwit/quickwit-proto/src/indexing/mod.rs @@ -320,13 +320,6 @@ pub struct ShardPositionsUpdate { impl Event for ShardPositionsUpdate {} -impl IndexingTask { - pub fn pipeline_uid(&self) -> PipelineUid { - self.pipeline_uid - .expect("`pipeline_uid` should be a required field") - } -} - impl RpcName for ApplyIndexingPlanRequest { fn rpc_name() -> &'static str { "apply_indexing_plan" diff --git a/quickwit/quickwit-proto/src/ingest/ingester.rs b/quickwit/quickwit-proto/src/ingest/ingester.rs index e9d1e10ee63..469a2588883 100644 --- a/quickwit/quickwit-proto/src/ingest/ingester.rs +++ b/quickwit/quickwit-proto/src/ingest/ingester.rs @@ -19,20 +19,12 @@ use bytesize::ByteSize; -use crate::types::{queue_id, Position, QueueId, ShardId}; +use crate::types::{queue_id, Position, QueueId}; include!("../codegen/quickwit/quickwit.ingest.ingester.rs"); pub use ingester_service_grpc_server::IngesterServiceGrpcServer; -impl FetchEof { - pub fn shard_id(&self) -> &ShardId { - self.shard_id - .as_ref() - .expect("`shard_id` should be a required field") - } -} - impl FetchMessage { pub fn new_payload(payload: FetchPayload) -> Self { assert!( @@ -58,12 +50,6 @@ impl FetchMessage { } impl FetchPayload { - pub fn shard_id(&self) -> &ShardId { - self.shard_id - .as_ref() - .expect("`shard_id` should be a required field") - } - pub fn queue_id(&self) -> QueueId { queue_id(self.index_uid(), &self.source_id, self.shard_id()) } @@ -83,26 +69,6 @@ impl FetchPayload { ByteSize(0) } } - - pub fn from_position_exclusive(&self) -> &Position { - self.from_position_exclusive - .as_ref() - .expect("`from_position_exclusive` should be a required field") - } - - pub fn to_position_inclusive(&self) -> &Position { - self.to_position_inclusive - .as_ref() - .expect("`to_position_inclusive` should be a required field") - } -} - -impl FetchEof { - pub fn eof_position(&self) -> &Position { - self.eof_position - .as_ref() - .expect("`eof_position` should be a required field") - } } impl IngesterStatus { @@ -119,55 +85,23 @@ impl IngesterStatus { } impl OpenFetchStreamRequest { - pub fn shard_id(&self) -> &ShardId { - self.shard_id - .as_ref() - .expect("`shard_id` should be a required field") - } - pub fn queue_id(&self) -> QueueId { queue_id(self.index_uid(), &self.source_id, self.shard_id()) } - - pub fn from_position_exclusive(&self) -> &Position { - self.from_position_exclusive - .as_ref() - .expect("`from_position_exclusive` should be a required field") - } } impl PersistSubrequest { - pub fn shard_id(&self) -> &ShardId { - self.shard_id - .as_ref() - .expect("`shard_id` should be a required field") - } - pub fn queue_id(&self) -> QueueId { queue_id(self.index_uid(), &self.source_id, self.shard_id()) } } impl PersistSuccess { - pub fn shard_id(&self) -> &ShardId { - self.shard_id - .as_ref() - .expect("`shard_id` should be a required field") - } - pub fn queue_id(&self) -> QueueId { queue_id(self.index_uid(), &self.source_id, self.shard_id()) } } -impl PersistFailure { - pub fn shard_id(&self) -> &ShardId { - self.shard_id - .as_ref() - .expect("`shard_id` should be a required field") - } -} - impl SynReplicationMessage { pub fn into_open_request(self) -> Option { match self.message { @@ -245,59 +179,13 @@ impl ReplicateRequest { } impl ReplicateSubrequest { - pub fn shard_id(&self) -> &ShardId { - self.shard_id - .as_ref() - .expect("`shard_id` should be a required field") - } - pub fn queue_id(&self) -> QueueId { queue_id(self.index_uid(), &self.source_id, self.shard_id()) } - - pub fn from_position_exclusive(&self) -> &Position { - self.from_position_exclusive - .as_ref() - .expect("`from_position_exclusive` should be a required field") - } -} - -impl ReplicateSuccess { - pub fn shard_id(&self) -> &ShardId { - self.shard_id - .as_ref() - .expect("`shard_id` should be a required field") - } - - pub fn replication_position_inclusive(&self) -> &Position { - self.replication_position_inclusive - .as_ref() - .expect("`replication_position_inclusive` should be a required field") - } -} - -impl ReplicateFailure { - pub fn shard_id(&self) -> &ShardId { - self.shard_id - .as_ref() - .expect("`shard_id` should be a required field") - } } impl TruncateShardsSubrequest { - pub fn shard_id(&self) -> &ShardId { - self.shard_id - .as_ref() - .expect("`shard_id` should be a required field") - } - pub fn queue_id(&self) -> QueueId { queue_id(self.index_uid(), &self.source_id, self.shard_id()) } - - pub fn truncate_up_to_position_inclusive(&self) -> &Position { - self.truncate_up_to_position_inclusive - .as_ref() - .expect("`truncate_up_to_position_inclusive` should be a required field") - } } diff --git a/quickwit/quickwit-proto/src/ingest/mod.rs b/quickwit/quickwit-proto/src/ingest/mod.rs index 5feba8eedad..0369c207ab0 100644 --- a/quickwit/quickwit-proto/src/ingest/mod.rs +++ b/quickwit/quickwit-proto/src/ingest/mod.rs @@ -25,7 +25,7 @@ use self::ingester::{PersistFailureReason, ReplicateFailureReason}; use self::router::IngestFailureReason; use super::types::NodeId; use super::GrpcServiceError; -use crate::types::{queue_id, IndexUid, Position, QueueId, ShardId}; +use crate::types::{queue_id, Position, QueueId, ShardId, SourceUid}; use crate::{ServiceError, ServiceErrorCode}; pub mod ingester; @@ -100,6 +100,13 @@ impl Shard { .flatten() .map(|node_id| NodeId::new(node_id.clone())) } + + pub fn source_uid(&self) -> SourceUid { + SourceUid { + index_uid: self.index_uid().clone(), + source_id: self.source_id.clone(), + } + } } impl ShardPKey { @@ -193,12 +200,6 @@ impl MRecordBatch { } impl Shard { - pub fn shard_id(&self) -> &ShardId { - self.shard_id - .as_ref() - .expect("`shard_id` should be a required field") - } - pub fn is_open(&self) -> bool { self.shard_state().is_open() } @@ -214,12 +215,6 @@ impl Shard { pub fn queue_id(&self) -> super::types::QueueId { queue_id(self.index_uid(), &self.source_id, self.shard_id()) } - - pub fn publish_position_inclusive(&self) -> &Position { - self.publish_position_inclusive - .as_ref() - .expect("`publish_position_inclusive` should be a required field") - } } impl ShardState { @@ -272,13 +267,7 @@ impl ShardIds { } impl ShardIdPositions { - pub fn index_uid(&self) -> &IndexUid { - self.index_uid - .as_ref() - .expect("`index_uid` should be a required field") - } - - pub fn queue_id_positions(&self) -> impl Iterator + '_ { + pub fn queue_id_positions(&self) -> impl Iterator + '_ { self.shard_positions.iter().map(|shard_position| { let queue_id = queue_id(self.index_uid(), &self.source_id, shard_position.shard_id()); (queue_id, shard_position.publish_position_inclusive()) @@ -286,20 +275,6 @@ impl ShardIdPositions { } } -impl ShardIdPosition { - pub fn shard_id(&self) -> &ShardId { - self.shard_id - .as_ref() - .expect("`shard_id` should be a required field") - } - - pub fn publish_position_inclusive(&self) -> &Position { - self.publish_position_inclusive - .as_ref() - .expect("`publish_position_inclusive` should be a required field") - } -} - impl From for IngestFailureReason { fn from(reason: PersistFailureReason) -> Self { match reason { diff --git a/quickwit/quickwit-proto/src/metastore/mod.rs b/quickwit/quickwit-proto/src/metastore/mod.rs index d065791ef17..a1032788067 100644 --- a/quickwit/quickwit-proto/src/metastore/mod.rs +++ b/quickwit/quickwit-proto/src/metastore/mod.rs @@ -156,6 +156,29 @@ pub enum MetastoreError { Unavailable(String), } +impl MetastoreError { + /// Returns `true` if the transaction that emitted this error is "certainly abort". + /// Returns `false` if we cannot know whether the transaction was successful or not. + pub fn is_transaction_certainly_aborted(&self) -> bool { + match self { + MetastoreError::AlreadyExists(_) + | MetastoreError::FailedPrecondition { .. } + | MetastoreError::Forbidden { .. } + | MetastoreError::InvalidArgument { .. } + | MetastoreError::JsonDeserializeError { .. } + | MetastoreError::JsonSerializeError { .. } + | MetastoreError::NotFound(_) + | MetastoreError::TooManyRequests => true, + MetastoreError::Connection { .. } + | MetastoreError::Db { .. } + | MetastoreError::Internal { .. } + | MetastoreError::Io { .. } + | MetastoreError::Timeout { .. } + | MetastoreError::Unavailable(_) => false, + } + } +} + #[cfg(feature = "postgres")] impl From for MetastoreError { fn from(error: sqlx::Error) -> Self { @@ -262,6 +285,12 @@ impl fmt::Display for SourceType { } impl IndexMetadataRequest { + pub fn into_index_id(self) -> Option { + self.index_uid + .map(|index_uid| index_uid.index_id) + .or(self.index_id) + } + pub fn for_index_id(index_id: IndexId) -> Self { Self { index_uid: None, diff --git a/quickwit/quickwit-proto/src/types/index_uid.rs b/quickwit/quickwit-proto/src/types/index_uid.rs index 9382261227d..2d964c739bf 100644 --- a/quickwit/quickwit-proto/src/types/index_uid.rs +++ b/quickwit/quickwit-proto/src/types/index_uid.rs @@ -17,6 +17,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . +use std::borrow::Cow; use std::fmt; use std::str::FromStr; @@ -37,31 +38,61 @@ pub struct IndexUid { pub incarnation_id: Ulid, } +impl fmt::Display for IndexUid { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}:{}", self.index_id, self.incarnation_id) + } +} + +impl IndexUid { + /// Creates a new index UID from an index ID using a random ULID as incarnation ID. + pub fn new_with_random_ulid(index_id: &str) -> Self { + Self::new(index_id, Ulid::new()) + } + + fn new(index_id: &str, incarnation_id: impl Into) -> Self { + assert!(!index_id.contains(':'), "index ID may not contain `:`"); + + Self { + index_id: index_id.to_string(), + incarnation_id: incarnation_id.into(), + } + } + + #[cfg(any(test, feature = "testsuite"))] + pub fn for_test(index_id: &str, incarnation_id: u128) -> Self { + Self { + index_id: index_id.to_string(), + incarnation_id: incarnation_id.into(), + } + } +} + +#[derive(Error, Debug)] +#[error("invalid index UID `{0}`")] +pub struct InvalidIndexUid(String); + impl FromStr for IndexUid { type Err = InvalidIndexUid; fn from_str(index_uid_str: &str) -> Result { - let Some((index_id, ulid)) = index_uid_str.split_once(':') else { - return Err(InvalidIndexUid { - invalid_index_uid_str: index_uid_str.to_string(), - }); + let Some((index_id, incarnation_id_str)) = index_uid_str.split_once(':') else { + return Err(InvalidIndexUid(index_uid_str.to_string())); }; - let incarnation_id = Ulid::from_string(ulid).map_err(|_| InvalidIndexUid { - invalid_index_uid_str: index_uid_str.to_string(), - })?; - Ok(IndexUid { + let incarnation_id = Ulid::from_string(incarnation_id_str) + .map_err(|_| InvalidIndexUid(index_uid_str.to_string()))?; + let index_uid = IndexUid { index_id: index_id.to_string(), incarnation_id, - }) + }; + Ok(index_uid) } } -// It is super lame, but for backward compatibility reasons we accept having a missing ulid part. -// TODO DEPRECATED ME and remove impl<'de> Deserialize<'de> for IndexUid { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { - let index_uid_str: String = String::deserialize(deserializer)?; + let index_uid_str: Cow<'de, str> = Cow::deserialize(deserializer)?; let index_uid = IndexUid::from_str(&index_uid_str).map_err(D::Error::custom)?; Ok(index_uid) } @@ -147,53 +178,6 @@ impl prost::Message for IndexUid { } } -impl fmt::Display for IndexUid { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}:{}", self.index_id, self.incarnation_id) - } -} - -impl IndexUid { - /// Creates a new index uid from index_id. - /// A random ULID will be used as incarnation - pub fn new_with_random_ulid(index_id: &str) -> Self { - Self::from_parts(index_id, Ulid::new()) - } - - pub fn from_parts(index_id: &str, incarnation_id: impl Into) -> Self { - assert!(!index_id.contains(':'), "index ID may not contain `:`"); - let incarnation_id = incarnation_id.into(); - IndexUid { - index_id: index_id.to_string(), - incarnation_id, - } - } - - pub fn is_empty(&self) -> bool { - self.index_id.is_empty() - } - - #[cfg(any(test, feature = "testsuite"))] - pub fn for_test(index_id: &str, ulid: u128) -> Self { - IndexUid { - index_id: index_id.to_string(), - incarnation_id: Ulid(ulid), - } - } -} - -impl From for String { - fn from(val: IndexUid) -> Self { - val.to_string() - } -} - -#[derive(Error, Debug)] -#[error("invalid index uid `{invalid_index_uid_str}`")] -pub struct InvalidIndexUid { - pub invalid_index_uid_str: String, -} - #[cfg(feature = "postgres")] impl TryFrom for IndexUid { type Error = InvalidIndexUid; diff --git a/quickwit/quickwit-proto/src/types/pipeline_uid.rs b/quickwit/quickwit-proto/src/types/pipeline_uid.rs index 114125901c2..67ff395d13c 100644 --- a/quickwit/quickwit-proto/src/types/pipeline_uid.rs +++ b/quickwit/quickwit-proto/src/types/pipeline_uid.rs @@ -60,7 +60,7 @@ impl FromStr for PipelineUid { fn from_str(pipeline_uid_str: &str) -> Result { let pipeline_ulid = - Ulid::from_string(pipeline_uid_str).map_err(|_| "invalid pipeline uid")?; + Ulid::from_string(pipeline_uid_str).map_err(|_| "invalid pipeline UID")?; Ok(PipelineUid(pipeline_ulid)) } } diff --git a/quickwit/quickwit-query/src/query_ast/user_input_query.rs b/quickwit/quickwit-query/src/query_ast/user_input_query.rs index d0069e44615..5f02f7553cb 100644 --- a/quickwit/quickwit-query/src/query_ast/user_input_query.rs +++ b/quickwit/quickwit-query/src/query_ast/user_input_query.rs @@ -20,7 +20,7 @@ use std::collections::{BTreeSet, HashMap}; use std::ops::Bound; -use anyhow::Context; +use anyhow::bail; use serde::{Deserialize, Serialize}; use tantivy::query_grammar::{ Delimiter, Occur, UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral, @@ -129,7 +129,15 @@ fn convert_user_input_ast_to_query_ast( lower, upper, } => { - let field: String = field.context("range query without field is not supported")?; + let field = if let Some(field) = field { + field + } else if default_search_fields.len() == 1 { + default_search_fields[0].clone() + } else if default_search_fields.is_empty() { + bail!("range query without field is not supported"); + } else { + bail!("range query with multiple fields is not supported"); + }; let convert_bound = |user_input_bound: UserInputBound| match user_input_bound { UserInputBound::Inclusive(user_text) => { Bound::Included(JsonLiteral::String(user_text)) diff --git a/quickwit/quickwit-search/src/collector.rs b/quickwit/quickwit-search/src/collector.rs index 928020c84bb..97a4a3c75d4 100644 --- a/quickwit/quickwit-search/src/collector.rs +++ b/quickwit/quickwit-search/src/collector.rs @@ -722,6 +722,18 @@ pub(crate) struct QuickwitCollector { } impl QuickwitCollector { + pub fn is_count_only(&self) -> bool { + self.max_hits == 0 && self.aggregation.is_none() + } + /// Updates search parameters affecting the returned documents. + /// Does not update aggregations. + pub fn update_search_param(&mut self, search_request: &SearchRequest) { + let sort_by = sort_by_from_request(search_request); + self.sort_by = sort_by; + self.max_hits = search_request.max_hits as usize; + self.start_offset = search_request.start_offset as usize; + self.search_after.clone_from(&search_request.search_after); + } pub fn fast_field_names(&self) -> HashSet { let mut fast_field_names = HashSet::default(); self.sort_by.first.add_fast_field(&mut fast_field_names); diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index d8ff592338d..1e66d70d5bb 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -21,7 +21,7 @@ use std::collections::{HashMap, HashSet}; use std::ops::Bound; use std::path::PathBuf; use std::str::FromStr; -use std::sync::{Arc, Mutex}; +use std::sync::{Arc, Mutex, RwLock}; use anyhow::Context; use futures::future::try_join_all; @@ -47,6 +47,7 @@ use tantivy::{DateTime, Index, ReloadPolicy, Searcher, Term}; use tracing::*; use crate::collector::{make_collector_for_split, make_merge_collector, IncrementalCollector}; +use crate::root::is_metadata_count_request_with_ast; use crate::service::{deserialize_doc_mapper, SearcherContext}; use crate::{QuickwitAggregations, SearchError}; @@ -330,6 +331,16 @@ async fn warm_up_fieldnorms(searcher: &Searcher, requires_scoring: bool) -> anyh Ok(()) } +fn get_leaf_resp_from_count(count: u64) -> LeafSearchResponse { + LeafSearchResponse { + num_hits: count, + partial_hits: Vec::new(), + failed_splits: Vec::new(), + num_attempted_splits: 1, + intermediate_aggregation_result: None, + } +} + /// Apply a leaf search on a single split. async fn leaf_search_single_split( searcher_context: &SearcherContext, @@ -337,6 +348,7 @@ async fn leaf_search_single_split( storage: Arc, split: SplitIdAndFooterOffsets, doc_mapper: Arc, + split_filter: Arc>, aggregations_limits: AggregationLimits, ) -> crate::Result { rewrite_request( @@ -362,32 +374,67 @@ async fn leaf_search_single_split( .await?; let split_schema = index.schema(); - let quickwit_collector = - make_collector_for_split(split_id.clone(), &search_request, aggregations_limits)?; - let query_ast: QueryAst = serde_json::from_str(search_request.query_ast.as_str()) - .map_err(|err| SearchError::InvalidQuery(err.to_string()))?; - let (query, mut warmup_info) = doc_mapper.query(split_schema, &query_ast, false)?; let reader = index .reader_builder() .reload_policy(ReloadPolicy::Manual) .try_into()?; let searcher = reader.searcher(); - let collector_warmup_info = quickwit_collector.warmup_info(); + let mut collector = + make_collector_for_split(split_id.clone(), &search_request, aggregations_limits)?; + let query_ast: QueryAst = serde_json::from_str(search_request.query_ast.as_str()) + .map_err(|err| SearchError::InvalidQuery(err.to_string()))?; + + // CanSplitDoBetter or rewrite_request may have changed the request to be a count only request + // This may be the case for AllQuery with a sort by date, where the current split can't have + // better results. + // + // TODO: SplitIdAndFooterOffsets could carry the number of docs in a split, so we could save + // opening the index and execute this earlier. Opening splits is typically served from the + // cache, so there may be no gain adding that info to SplitIdAndFooterOffsets. + if is_metadata_count_request_with_ast(&query_ast, &search_request) { + return Ok(get_leaf_resp_from_count(searcher.num_docs() as u64)); + } + + let (query, mut warmup_info) = doc_mapper.query(split_schema.clone(), &query_ast, false)?; + + let collector_warmup_info = collector.warmup_info(); warmup_info.merge(collector_warmup_info); warmup_info.simplify(); warmup(&searcher, &warmup_info).await?; let span = info_span!("tantivy_search"); - let leaf_search_response = crate::search_thread_pool() - .run_cpu_intensive(move || { - let _span_guard = span.enter(); - searcher.search(&query, &quickwit_collector) - }) - .await - .map_err(|_| { - crate::SearchError::Internal(format!("leaf search panicked. split={split_id}")) - })??; + + let (search_request, leaf_search_response) = { + let split = split.clone(); + + crate::search_thread_pool() + .run_cpu_intensive(move || { + let _span_guard = span.enter(); + // Our search execution has been scheduled, let's check if we can improve the + // request based on the results of the preceding searches + check_optimize_search_request(&mut search_request, &split, &split_filter); + collector.update_search_param(&search_request); + if is_metadata_count_request_with_ast(&query_ast, &search_request) { + return Ok(( + search_request, + get_leaf_resp_from_count(searcher.num_docs() as u64), + )); + } + if collector.is_count_only() { + let count = query.count(&searcher)? as u64; + Ok((search_request, get_leaf_resp_from_count(count))) + } else { + searcher + .search(&query, &collector) + .map(|resp| (search_request, resp)) + } + }) + .await + .map_err(|_| { + crate::SearchError::Internal(format!("leaf search panicked. split={split_id}")) + })?? + }; searcher_context .leaf_search_cache @@ -850,7 +897,7 @@ impl CanSplitDoBetter { /// Record the new worst-of-the-top document, that is, the document which would first be /// evicted from the list of best documents, if a better document was found. Only call this - /// funciton if you have at least max_hits documents already. + /// function if you have at least max_hits documents already. fn record_new_worst_hit(&mut self, hit: &PartialHit) { match self { CanSplitDoBetter::Uninformative => (), @@ -990,6 +1037,29 @@ async fn resolve_storage_and_leaf_search( .await } +/// Optimizes the search_request based on CanSplitDoBetter +/// Returns true if the split can return better results +fn check_optimize_search_request( + search_request: &mut SearchRequest, + split: &SplitIdAndFooterOffsets, + split_filter: &Arc>, +) -> bool { + let can_be_better = split_filter.read().unwrap().can_be_better(split); + if !can_be_better { + disable_search_request_hits(search_request); + } + can_be_better +} + +/// Alter the search request so it does not return any docs. +/// +/// This is usually done since it cannot provide better hits results than existing fetched results. +fn disable_search_request_hits(search_request: &mut SearchRequest) { + search_request.max_hits = 0; + search_request.start_offset = 0; + search_request.sort_fields.clear(); +} + /// `leaf` step of search. /// /// The leaf search collects all kind of information, and returns a set of @@ -1016,7 +1086,7 @@ pub async fn leaf_search( || (request.aggregation_request.is_some() && !matches!(split_filter, CanSplitDoBetter::FindTraceIdsAggregation(_))); - let split_filter = Arc::new(Mutex::new(split_filter)); + let split_filter = Arc::new(RwLock::new(split_filter)); let mut leaf_search_single_split_futures: Vec<_> = Vec::with_capacity(splits.len()); @@ -1034,13 +1104,9 @@ pub async fn leaf_search( let mut request = (*request).clone(); - if !split_filter.lock().unwrap().can_be_better(&split) { - if !run_all_splits { - continue; - } - request.max_hits = 0; - request.start_offset = 0; - request.sort_fields.clear(); + let can_be_better = check_optimize_search_request(&mut request, &split, &split_filter); + if !can_be_better && !run_all_splits { + continue; } leaf_search_single_split_futures.push(tokio::spawn( @@ -1100,7 +1166,7 @@ async fn leaf_search_single_split_wrapper( index_storage: Arc, doc_mapper: Arc, split: SplitIdAndFooterOffsets, - split_filter: Arc>, + split_filter: Arc>, incremental_merge_collector: Arc>, leaf_split_search_permit: tokio::sync::OwnedSemaphorePermit, aggregations_limits: AggregationLimits, @@ -1115,6 +1181,7 @@ async fn leaf_search_single_split_wrapper( index_storage, split.clone(), doc_mapper, + split_filter.clone(), aggregations_limits, ) .await; @@ -1144,10 +1211,10 @@ async fn leaf_search_single_split_wrapper( }), } if let Some(last_hit) = locked_incremental_merge_collector.peek_worst_hit() { - // TODO: we could use a RWLock instead and read the value instead of updateing it + // TODO: we could use the RWLock instead and read the value instead of updateing it // unconditionally. split_filter - .lock() + .write() .unwrap() .record_new_worst_hit(last_hit.as_ref()); } diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 718d7b307b1..449b747f162 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -21,12 +21,14 @@ use once_cell::sync::Lazy; use quickwit_common::metrics::{ - exponential_buckets, new_counter, new_histogram, Histogram, IntCounter, + exponential_buckets, new_counter, new_counter_vec, new_histogram, Histogram, IntCounter, + IntCounterVec, }; pub struct SearchMetrics { pub leaf_searches_splits_total: IntCounter, pub leaf_search_split_duration_secs: Histogram, + pub job_assigned_total: IntCounterVec<1>, } impl Default for SearchMetrics { @@ -45,6 +47,13 @@ impl Default for SearchMetrics { "search", exponential_buckets(0.005, 2.0, 10).unwrap(), ), + job_assigned_total: new_counter_vec( + "job_assigned_total", + "Number of job assigned to searchers, per affinity rank.", + "search", + &[], + ["affinity"], + ), } } } diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs index acd3712e09b..dc28ed8d5e8 100644 --- a/quickwit/quickwit-search/src/root.rs +++ b/quickwit/quickwit-search/src/root.rs @@ -590,17 +590,30 @@ async fn search_partial_hits_phase_with_scroll( /// metadata count. /// /// This is done by exclusion, so we will need to keep it up to date if fields are added. -fn is_metadata_count_request(request: &SearchRequest) -> bool { +pub fn is_metadata_count_request(request: &SearchRequest) -> bool { let query_ast: QueryAst = serde_json::from_str(&request.query_ast).unwrap(); - if query_ast != QueryAst::MatchAll { + is_metadata_count_request_with_ast(&query_ast, request) +} + +/// Check if the request is a count request without any filters, so we can just return the split +/// metadata count. +/// +/// This is done by exclusion, so we will need to keep it up to date if fields are added. +/// +/// The passed query_ast should match the serialized on in request. +pub fn is_metadata_count_request_with_ast(query_ast: &QueryAst, request: &SearchRequest) -> bool { + if query_ast != &QueryAst::MatchAll { return false; } if request.max_hits != 0 { return false; } - // TODO: if the start and end timestamp encompass the whole split, it is still a count query - // So some could be checked on metadata + // If the start and end timestamp encompass the whole split, it is still a count query. + // We remove this currently on the leaf level, but not yet on the root level. + // There's a small advantage when we would do this on the root level, since we have the + // counts available on the split. On the leaf it is currently required to open the split + // to get the count. if request.start_timestamp.is_some() || request.end_timestamp.is_some() { return false; } @@ -611,7 +624,7 @@ fn is_metadata_count_request(request: &SearchRequest) -> bool { } /// Get a leaf search response that returns the num_docs of the split -fn get_count_from_metadata(split_metadatas: &[SplitMetadata]) -> Vec { +pub fn get_count_from_metadata(split_metadatas: &[SplitMetadata]) -> Vec { split_metadatas .iter() .map(|metadata| LeafSearchResponse { diff --git a/quickwit/quickwit-search/src/search_job_placer.rs b/quickwit/quickwit-search/src/search_job_placer.rs index 24c6677d0ee..dc708763ff1 100644 --- a/quickwit/quickwit-search/src/search_job_placer.rs +++ b/quickwit/quickwit-search/src/search_job_placer.rs @@ -29,7 +29,7 @@ use quickwit_common::pubsub::EventSubscriber; use quickwit_common::rendezvous_hasher::{node_affinity, sort_by_rendez_vous_hash}; use quickwit_proto::search::{ReportSplit, ReportSplitsRequest}; -use crate::{SearchJob, SearchServiceClient, SearcherPool}; +use crate::{SearchJob, SearchServiceClient, SearcherPool, SEARCH_METRICS}; /// Job. /// The unit in which distributed search is performed. @@ -185,6 +185,16 @@ impl SearchJobPlacer { } else { 0 }; + let metric_node_idx = match chosen_node_idx { + 0 => "0", + 1 => "1", + _ => "> 1", + }; + SEARCH_METRICS + .job_assigned_total + .with_label_values([metric_node_idx]) + .inc(); + let chosen_node = &mut candidate_nodes[chosen_node_idx]; chosen_node.load += job.cost(); diff --git a/quickwit/quickwit-serve/Cargo.toml b/quickwit/quickwit-serve/Cargo.toml index dafb56ca6ae..22db2523d05 100644 --- a/quickwit/quickwit-serve/Cargo.toml +++ b/quickwit/quickwit-serve/Cargo.toml @@ -30,6 +30,7 @@ mime_guess = { workspace = true } once_cell = { workspace = true } opentelemetry = { workspace = true } percent-encoding = { workspace = true } +pprof = { workspace = true, optional = true } prost = { workspace = true } prost-types = { workspace = true } regex = { workspace = true } @@ -91,3 +92,9 @@ quickwit-opentelemetry = { workspace = true, features = ["testsuite"] } quickwit-proto = { workspace = true, features = ["testsuite"] } quickwit-search = { workspace = true, features = ["testsuite"] } quickwit-storage = { workspace = true, features = ["testsuite"] } + +[features] +pprof = [ + "dep:pprof" +] + diff --git a/quickwit/quickwit-serve/src/developer_api/mod.rs b/quickwit/quickwit-serve/src/developer_api/mod.rs index c731d705616..491f57fe663 100644 --- a/quickwit/quickwit-serve/src/developer_api/mod.rs +++ b/quickwit/quickwit-serve/src/developer_api/mod.rs @@ -19,10 +19,12 @@ mod debug; mod log_level; +mod pprof; mod server; use debug::debug_handler; use log_level::log_level_handler; +use pprof::pprof_handlers; use quickwit_cluster::Cluster; pub(crate) use server::DeveloperApiServer; use warp::{Filter, Rejection}; @@ -37,6 +39,9 @@ pub(crate) fn developer_api_routes( cluster: Cluster, env_filter_reload_fn: EnvFilterReloadFn, ) -> impl Filter + Clone { - warp::path!("api" / "developer" / ..) - .and(debug_handler(cluster.clone()).or(log_level_handler(env_filter_reload_fn.clone()))) + warp::path!("api" / "developer" / ..).and( + debug_handler(cluster.clone()) + .or(log_level_handler(env_filter_reload_fn.clone())) + .or(pprof_handlers()), + ) } diff --git a/quickwit/quickwit-serve/src/developer_api/pprof.rs b/quickwit/quickwit-serve/src/developer_api/pprof.rs new file mode 100644 index 00000000000..12083a7ec6f --- /dev/null +++ b/quickwit/quickwit-serve/src/developer_api/pprof.rs @@ -0,0 +1,162 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use warp::Filter; + +/// pprof/start disabled +/// pprof/flamegraph disabled +#[cfg(not(feature = "pprof"))] +pub fn pprof_handlers() -> impl Filter + Clone +{ + let start_profiler = { + warp::path!("pprof" / "start").map(move || { + warp::reply::with_status( + "Quickwit was compiled without the `pprof` feature", + warp::http::StatusCode::NOT_IMPLEMENTED, + ) + }) + }; + let stop_profiler = { + warp::path!("pprof" / "flamegraph").map(move || { + warp::reply::with_status( + "Quickwit was compiled without the `pprof` feature", + warp::http::StatusCode::NOT_IMPLEMENTED, + ) + }) + }; + start_profiler.or(stop_profiler) +} + +/// pprof/start to start cpu profiling. +/// pprof/start?max_duration=5&sampling=1000 to start a short high frequency cpu profiling +/// pprof/flamegraph to stop the current cpu profiling and return a flamegraph or return the last +/// flamegraph +/// +/// Query parameters: +/// - max_duration: max duration of the profiling in seconds, default is 30 seconds. max value is +/// 300 +/// - sampling: the sampling rate, default is 100, max value is 1000 +#[cfg(feature = "pprof")] +pub fn pprof_handlers() -> impl Filter + Clone +{ + use std::sync::{Arc, Mutex}; + + use pprof::ProfilerGuard; + use serde::Deserialize; + use tokio::task::spawn_blocking; + use tokio::time::{self, Duration}; + use warp::reply::Reply; + + struct ProfilerState { + profiler_guard: Option>, + // We will keep the latest flamegraph and return it at the flamegraph endpoint + // A new run will overwrite the flamegraph_data + flamegraph_data: Option>, + } + + let profiler_state = Arc::new(Mutex::new(ProfilerState { + profiler_guard: None, + flamegraph_data: None, + })); + + #[derive(Deserialize)] + struct ProfilerQueryParams { + max_duration: Option, // max allowed value is 300 seconds, default is 30 seconds + sampling: Option, // max value is 1000, default is 100 + } + + let start_profiler = { + let profiler_state = Arc::clone(&profiler_state); + warp::path!("pprof" / "start") + .and(warp::query::()) + .and_then(move |params: ProfilerQueryParams| { + start_profiler_handler(profiler_state.clone(), params) + }) + }; + + let stop_profiler = { + let profiler_state = Arc::clone(&profiler_state); + warp::path!("pprof" / "flamegraph") + .and_then(move || stop_profiler_handler(Arc::clone(&profiler_state))) + }; + + async fn start_profiler_handler( + profiler_state: Arc>, + params: ProfilerQueryParams, + ) -> Result { + let mut state = profiler_state.lock().unwrap(); + + if state.profiler_guard.is_none() { + let max_duration = params.max_duration.unwrap_or(30).min(300); + let sampling = params.sampling.unwrap_or(100).min(1000); + state.profiler_guard = Some(pprof::ProfilerGuard::new(sampling).unwrap()); + let profiler_state = Arc::clone(&profiler_state); + tokio::spawn(async move { + time::sleep(Duration::from_secs(max_duration)).await; + save_flamegraph(profiler_state).await; + }); + Ok(warp::reply::with_status( + "CPU profiling started", + warp::http::StatusCode::OK, + )) + } else { + Ok(warp::reply::with_status( + "CPU profiling is already running", + warp::http::StatusCode::BAD_REQUEST, + )) + } + } + + async fn stop_profiler_handler( + profiler_state: Arc>, + ) -> Result { + save_flamegraph(profiler_state.clone()).await; + + let state = profiler_state.lock().unwrap(); + + if let Some(data) = state.flamegraph_data.clone() { + Ok(warp::reply::with_header(data, "Content-Type", "image/svg+xml").into_response()) + } else { + Ok(warp::reply::with_status( + "flamegraph is not available", + warp::http::StatusCode::BAD_REQUEST, + ) + .into_response()) + } + } + + async fn save_flamegraph( + profiler_state: Arc>, + ) -> tokio::task::JoinHandle<()> { + spawn_blocking(move || { + let mut state = profiler_state.lock().unwrap(); + + if let Some(profiler) = state.profiler_guard.take() { + if let Ok(report) = profiler.report().build() { + let mut buffer = Vec::new(); + if report.flamegraph(&mut buffer).is_ok() { + state.flamegraph_data = Some(buffer); + } + } + } + }) + } + + start_profiler.or(stop_profiler) +} diff --git a/quickwit/quickwit-serve/src/index_api/mod.rs b/quickwit/quickwit-serve/src/index_api/mod.rs index ab831526c81..64d58cddb94 100644 --- a/quickwit/quickwit-serve/src/index_api/mod.rs +++ b/quickwit/quickwit-serve/src/index_api/mod.rs @@ -20,5 +20,6 @@ mod rest_handler; pub use self::rest_handler::{ - index_management_handlers, IndexApi, IndexUpdates, ListSplitsQueryParams, ListSplitsResponse, + get_index_metadata_handler, index_management_handlers, IndexApi, IndexUpdates, + ListSplitsQueryParams, ListSplitsResponse, }; diff --git a/quickwit/quickwit-serve/src/index_api/rest_handler.rs b/quickwit/quickwit-serve/src/index_api/rest_handler.rs index b132a00d2e4..f5a783901d4 100644 --- a/quickwit/quickwit-serve/src/index_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/index_api/rest_handler.rs @@ -114,7 +114,7 @@ fn json_body( warp::body::content_length_limit(1024 * 1024).and(warp::body::json()) } -fn get_index_metadata_handler( +pub fn get_index_metadata_handler( metastore: MetastoreServiceClient, ) -> impl Filter + Clone { warp::path!("indexes" / String) diff --git a/quickwit/quickwit-serve/src/ingest_api/rest_handler.rs b/quickwit/quickwit-serve/src/ingest_api/rest_handler.rs index 582218fe472..df89b5f0e4d 100644 --- a/quickwit/quickwit-serve/src/ingest_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/ingest_api/rest_handler.rs @@ -177,7 +177,9 @@ fn convert_ingest_response_v2( ingest_failure.index_id )), IngestFailureReason::Internal => IngestServiceError::Internal("internal error".to_string()), - IngestFailureReason::NoShardsAvailable => IngestServiceError::Unavailable, + IngestFailureReason::NoShardsAvailable => { + IngestServiceError::Unavailable("no shards available".to_string()) + } IngestFailureReason::RateLimited => IngestServiceError::RateLimited, IngestFailureReason::ResourceExhausted => IngestServiceError::RateLimited, IngestFailureReason::Timeout => { diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index c59a870ca43..22c6730fc43 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -73,8 +73,8 @@ use quickwit_common::runtimes::RuntimesConfig; use quickwit_common::spawn_named_task; use quickwit_common::tower::{ BalanceChannel, BoxFutureInfaillible, BufferLayer, Change, ConstantRate, EstimateRateLayer, - EventListenerLayer, GrpcMetricsLayer, LoadShedLayer, OneTaskPerCallLayer, RateLimitLayer, - RetryLayer, RetryPolicy, SmaRateEstimator, + EventListenerLayer, GrpcMetricsLayer, LoadShedLayer, RateLimitLayer, RetryLayer, RetryPolicy, + SmaRateEstimator, }; use quickwit_common::uri::Uri; use quickwit_config::service::QuickwitService; @@ -331,20 +331,9 @@ async fn start_control_plane_if_needed( .await?; let control_plane_server_opt = Some(control_plane_mailbox.clone()); - - // These layers apply to all the RPCs of the control plane. - let shared_layers = ServiceBuilder::new() - .layer(CP_GRPC_SERVER_METRICS_LAYER.clone()) - .layer(LoadShedLayer::new(100)) - .into_inner(); let control_plane_client = ControlPlaneServiceClient::tower() - .stack_layer(shared_layers) - .stack_create_index_layer(OneTaskPerCallLayer) - .stack_delete_index_layer(OneTaskPerCallLayer) - .stack_add_source_layer(OneTaskPerCallLayer) - .stack_toggle_source_layer(OneTaskPerCallLayer) - .stack_delete_source_layer(OneTaskPerCallLayer) - .stack_get_or_create_open_shards_layer(OneTaskPerCallLayer) + .stack_layer(CP_GRPC_SERVER_METRICS_LAYER.clone()) + .stack_layer(LoadShedLayer::new(100)) .build_from_mailbox(control_plane_mailbox); Ok((control_plane_server_opt, control_plane_client)) } else { @@ -1259,6 +1248,7 @@ pub mod lambda_search_api { es_compat_index_stats_handler, es_compat_scroll_handler, es_compat_search_handler, es_compat_stats_handler, }; + pub use crate::index_api::get_index_metadata_handler; pub use crate::rest::recover_fn; pub use crate::search_api::{search_get_handler, search_post_handler}; } diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index 85f80eb9325..8945faf783f 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -26,7 +26,7 @@ use hyper::{http, Method, StatusCode}; use quickwit_common::tower::BoxFutureInfaillible; use tower::make::Shared; use tower::ServiceBuilder; -use tower_http::compression::predicate::{DefaultPredicate, Predicate, SizeAbove}; +use tower_http::compression::predicate::{NotForContentType, Predicate, SizeAbove}; use tower_http::compression::CompressionLayer; use tower_http::cors::CorsLayer; use tracing::{error, info}; @@ -52,10 +52,6 @@ use crate::template_api::index_template_api_handlers; use crate::ui_handler::ui_handler; use crate::{BodyFormat, BuildInfo, QuickwitServices, RuntimeInfo}; -/// The minimum size a response body must be in order to -/// be automatically compressed with gzip. -const MINIMUM_RESPONSE_COMPRESSION_SIZE: u16 = 10 << 10; - #[derive(Debug)] pub(crate) struct InvalidJsonRequest(pub serde_json::Error); @@ -88,6 +84,39 @@ impl std::fmt::Display for InternalError { } } +/// Env variable key to define the minimum size above which a response should be compressed. +/// If unset, no compression is applied. +const QW_MINIMUM_COMPRESSION_SIZE_KEY: &str = "QW_MINIMUM_COMPRESSION_SIZE"; + +#[derive(Clone, Copy)] +struct CompressionPredicate { + size_above_opt: Option, +} + +impl CompressionPredicate { + fn from_env() -> CompressionPredicate { + let minimum_compression_size_opt: Option = quickwit_common::get_from_env_opt::( + QW_MINIMUM_COMPRESSION_SIZE_KEY, + ) + .map(|minimum_compression_size: usize| { + u16::try_from(minimum_compression_size).unwrap_or(u16::MAX) + }); + let size_above_opt = minimum_compression_size_opt.map(SizeAbove::new); + CompressionPredicate { size_above_opt } + } +} + +impl Predicate for CompressionPredicate { + fn should_compress(&self, response: &http::Response) -> bool + where B: hyper::body::HttpBody { + if let Some(size_above) = self.size_above_opt { + size_above.should_compress(response) + } else { + false + } + } +} + /// Starts REST services. pub(crate) async fn start_rest_server( rest_listen_addr: SocketAddr, @@ -128,7 +157,6 @@ pub(crate) async fn start_rest_server( quickwit_services.cluster.clone(), quickwit_services.env_filter_reload_fn.clone(), ); - // `/api/v1/*` routes. let api_v1_root_route = api_v1_routes(quickwit_services.clone()); @@ -158,14 +186,15 @@ pub(crate) async fn start_rest_server( .boxed(); let warp_service = warp::service(rest_routes); - let compression_predicate = - DefaultPredicate::new().and(SizeAbove::new(MINIMUM_RESPONSE_COMPRESSION_SIZE)); + let compression_predicate = CompressionPredicate::from_env().and(NotForContentType::IMAGES); let cors = build_cors(&quickwit_services.node_config.rest_config.cors_allow_origins); let service = ServiceBuilder::new() .layer( CompressionLayer::new() + .zstd(true) .gzip(true) + .quality(tower_http::CompressionLevel::Fastest) .compress_when(compression_predicate), ) .layer(cors) diff --git a/quickwit/rest-api-tests/docker-compose.yaml b/quickwit/rest-api-tests/docker-compose.yaml index 46889df11c3..0612b8d92da 100644 --- a/quickwit/rest-api-tests/docker-compose.yaml +++ b/quickwit/rest-api-tests/docker-compose.yaml @@ -1,7 +1,7 @@ # This docker-compose file is useful to start up # a single node elasticsearch to test our rest api tests # against. -version: '3.7' +version: "3.7" services: elasticsearch: @@ -14,3 +14,7 @@ services: ports: - 9200:9200 - 9300:9300 + # If you see elasticsearch lacking disk space, you can mount a local directory + # as follows like this. + #volumes: + # - /Users/fulmicoton/git/quickwit/quickwit/rest-api-tests/esdata:/usr/share/elasticsearch/data diff --git a/quickwit/rest-api-tests/scenarii/aggregations/_setup.quickwit.yaml b/quickwit/rest-api-tests/scenarii/aggregations/_setup.quickwit.yaml index 28cd5a2f6d2..11ee82ec67f 100644 --- a/quickwit/rest-api-tests/scenarii/aggregations/_setup.quickwit.yaml +++ b/quickwit/rest-api-tests/scenarii/aggregations/_setup.quickwit.yaml @@ -11,14 +11,13 @@ status_code: null method: POST endpoint: indexes/ json: - version: "0.7" + version: "0.8" index_id: aggregations doc_mapping: mode: dynamic dynamic_mapping: tokenizer: default fast: true - document_length: true field_mappings: - name: date type: datetime @@ -26,12 +25,13 @@ json: - rfc3339 fast_precision: seconds fast: true + store_document_size: true --- # Create empty index method: POST endpoint: indexes/ json: - version: "0.7" + version: "0.8" index_id: empty_aggregations doc_mapping: mode: dynamic diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0005-query_string_query.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0005-query_string_query.yaml index 099ee2bc5d2..d341b8c6b97 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/0005-query_string_query.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0005-query_string_query.yaml @@ -199,3 +199,25 @@ expected: hits: total: value: 1 +--- +json: + query: + query_string: + default_field: actor.id + query: ">=10791466" + lenient: true +expected: + hits: + total: + value: 2 +--- +json: + query: + query_string: + default_field: actor.id + query: ">10791466" + lenient: true +expected: + hits: + total: + value: 1