From 0052b381130cceec0a710d71ca10d2174ec3545b Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Thu, 6 Jun 2024 15:48:31 +0200 Subject: [PATCH 1/3] Support secret manager for the URI (#5088) --- distribution/ecs/README.md | 4 ++-- distribution/ecs/example/terraform.tf | 2 +- distribution/ecs/quickwit/configs.tf | 4 ++-- distribution/ecs/quickwit/iam.tf | 2 +- distribution/ecs/quickwit/quickwit-control-plane.tf | 2 +- distribution/ecs/quickwit/quickwit-indexer.tf | 2 +- distribution/ecs/quickwit/quickwit-janitor.tf | 2 +- distribution/ecs/quickwit/quickwit-metastore.tf | 2 +- distribution/ecs/quickwit/quickwit-searcher.tf | 2 +- distribution/ecs/quickwit/service/ecs.tf | 6 +----- distribution/ecs/quickwit/service/variables.tf | 4 +++- distribution/ecs/quickwit/variables.tf | 4 ++-- 12 files changed, 17 insertions(+), 19 deletions(-) diff --git a/distribution/ecs/README.md b/distribution/ecs/README.md index bb6415f482c..a1e20c1acad 100644 --- a/distribution/ecs/README.md +++ b/distribution/ecs/README.md @@ -33,8 +33,8 @@ Metastore database backups are disabled as restoring one would lead to inconsistencies with the index store on S3. To ensure high availability, you should enable `rds_config.multi_az` instead. To use your own Postgres database instead of creating a new RDS instance, configure the -`external_postgres_uri_ssm_parameter_arn` variable (e.g -`postgres://user:password@domain:port/db`). +`external_postgres_uri_secret_arn` variable (e.g ARN of an SSM parameter with +the value `postgres://user:password@domain:port/db`). Using NAT Gateways for the image registry is quite costly (approx. $0.05/hour/AZ). If you are not already using NAT Gateways in the AZs where Quickwit will be diff --git a/distribution/ecs/example/terraform.tf b/distribution/ecs/example/terraform.tf index 2bdc9de2825..1479bdf0bfd 100644 --- a/distribution/ecs/example/terraform.tf +++ b/distribution/ecs/example/terraform.tf @@ -72,7 +72,7 @@ module "quickwit" { # multi_az = false # } - # external_postgres_uri_ssm_parameter_arn = aws_ssm_parameter.postgres_uri.arn + # external_postgres_uri_secret_arn = aws_ssm_parameter.postgres_uri.arn ## Example logging configuration # sidecar_container_definitions = { diff --git a/distribution/ecs/quickwit/configs.tf b/distribution/ecs/quickwit/configs.tf index 399826e5e03..e1bdd42f6fb 100644 --- a/distribution/ecs/quickwit/configs.tf +++ b/distribution/ecs/quickwit/configs.tf @@ -13,8 +13,8 @@ locals { quickwit_index_s3_prefix = var.quickwit_index_s3_prefix == "" ? aws_s3_bucket.index[0].id : var.quickwit_index_s3_prefix - use_external_rds = var.external_postgres_uri_ssm_parameter_arn != "" - postgres_uri_parameter_arn = var.external_postgres_uri_ssm_parameter_arn != "" ? var.external_postgres_uri_ssm_parameter_arn : aws_ssm_parameter.postgres_credential[0].arn + use_external_rds = var.external_postgres_uri_secret_arn != "" + postgres_uri_secret_arn = var.external_postgres_uri_secret_arn != "" ? var.external_postgres_uri_secret_arn : aws_ssm_parameter.postgres_credential[0].arn } resource "random_id" "module" { diff --git a/distribution/ecs/quickwit/iam.tf b/distribution/ecs/quickwit/iam.tf index 6536239738e..698fa584b46 100644 --- a/distribution/ecs/quickwit/iam.tf +++ b/distribution/ecs/quickwit/iam.tf @@ -46,7 +46,7 @@ data "aws_iam_policy_document" "quickwit_task_execution_permission" { statement { actions = ["ssm:GetParameters"] - resources = [local.postgres_uri_parameter_arn] + resources = [local.postgres_uri_secret_arn] } statement { diff --git a/distribution/ecs/quickwit/quickwit-control-plane.tf b/distribution/ecs/quickwit/quickwit-control-plane.tf index 3ab957b27f3..6d29e865b99 100644 --- a/distribution/ecs/quickwit/quickwit-control-plane.tf +++ b/distribution/ecs/quickwit/quickwit-control-plane.tf @@ -3,7 +3,7 @@ module "quickwit_control_plane" { service_name = "control_plane" service_discovery_registry_arn = aws_service_discovery_service.control_plane.arn cluster_arn = module.ecs_cluster.arn - postgres_credential_arn = local.postgres_uri_parameter_arn + postgres_uri_secret_arn = local.postgres_uri_secret_arn quickwit_peer_list = local.quickwit_peer_list s3_access_policy_arn = aws_iam_policy.quickwit_task_permission.arn task_execution_policy_arn = aws_iam_policy.quickwit_task_execution_permission.arn diff --git a/distribution/ecs/quickwit/quickwit-indexer.tf b/distribution/ecs/quickwit/quickwit-indexer.tf index d4725f5d01b..441a1c7a8f8 100644 --- a/distribution/ecs/quickwit/quickwit-indexer.tf +++ b/distribution/ecs/quickwit/quickwit-indexer.tf @@ -3,7 +3,7 @@ module "quickwit_indexer" { service_name = "indexer" service_discovery_registry_arn = aws_service_discovery_service.indexer.arn cluster_arn = module.ecs_cluster.arn - postgres_credential_arn = local.postgres_uri_parameter_arn + postgres_uri_secret_arn = local.postgres_uri_secret_arn quickwit_peer_list = local.quickwit_peer_list s3_access_policy_arn = aws_iam_policy.quickwit_task_permission.arn task_execution_policy_arn = aws_iam_policy.quickwit_task_execution_permission.arn diff --git a/distribution/ecs/quickwit/quickwit-janitor.tf b/distribution/ecs/quickwit/quickwit-janitor.tf index 884bdf52a6e..c1f3e39d041 100644 --- a/distribution/ecs/quickwit/quickwit-janitor.tf +++ b/distribution/ecs/quickwit/quickwit-janitor.tf @@ -3,7 +3,7 @@ module "quickwit_janitor" { service_name = "janitor" service_discovery_registry_arn = aws_service_discovery_service.janitor.arn cluster_arn = module.ecs_cluster.arn - postgres_credential_arn = local.postgres_uri_parameter_arn + postgres_uri_secret_arn = local.postgres_uri_secret_arn quickwit_peer_list = local.quickwit_peer_list s3_access_policy_arn = aws_iam_policy.quickwit_task_permission.arn task_execution_policy_arn = aws_iam_policy.quickwit_task_execution_permission.arn diff --git a/distribution/ecs/quickwit/quickwit-metastore.tf b/distribution/ecs/quickwit/quickwit-metastore.tf index 248c5987db0..571db9c9d10 100644 --- a/distribution/ecs/quickwit/quickwit-metastore.tf +++ b/distribution/ecs/quickwit/quickwit-metastore.tf @@ -3,7 +3,7 @@ module "quickwit_metastore" { service_name = "metastore" service_discovery_registry_arn = aws_service_discovery_service.metastore.arn cluster_arn = module.ecs_cluster.arn - postgres_credential_arn = local.postgres_uri_parameter_arn + postgres_uri_secret_arn = local.postgres_uri_secret_arn quickwit_peer_list = local.quickwit_peer_list s3_access_policy_arn = aws_iam_policy.quickwit_task_permission.arn task_execution_policy_arn = aws_iam_policy.quickwit_task_execution_permission.arn diff --git a/distribution/ecs/quickwit/quickwit-searcher.tf b/distribution/ecs/quickwit/quickwit-searcher.tf index 735d8aee308..26c71dc3685 100644 --- a/distribution/ecs/quickwit/quickwit-searcher.tf +++ b/distribution/ecs/quickwit/quickwit-searcher.tf @@ -3,7 +3,7 @@ module "quickwit_searcher" { service_name = "searcher" service_discovery_registry_arn = aws_service_discovery_service.searcher.arn cluster_arn = module.ecs_cluster.arn - postgres_credential_arn = local.postgres_uri_parameter_arn + postgres_uri_secret_arn = local.postgres_uri_secret_arn quickwit_peer_list = local.quickwit_peer_list s3_access_policy_arn = aws_iam_policy.quickwit_task_permission.arn task_execution_policy_arn = aws_iam_policy.quickwit_task_execution_permission.arn diff --git a/distribution/ecs/quickwit/service/ecs.tf b/distribution/ecs/quickwit/service/ecs.tf index 301a3bb7499..5b862271f77 100644 --- a/distribution/ecs/quickwit/service/ecs.tf +++ b/distribution/ecs/quickwit/service/ecs.tf @@ -27,7 +27,7 @@ module "quickwit_service" { secrets = [ { name = "QW_METASTORE_URI" - valueFrom = var.postgres_credential_arn + valueFrom = var.postgres_uri_secret_arn } ] @@ -119,10 +119,6 @@ module "quickwit_service" { } ] - task_exec_ssm_param_arns = [ - var.postgres_credential_arn - ] - tasks_iam_role_policies = local.tasks_iam_role_policies task_exec_iam_role_policies = { diff --git a/distribution/ecs/quickwit/service/variables.tf b/distribution/ecs/quickwit/service/variables.tf index 2e256945721..09de61ff3ee 100644 --- a/distribution/ecs/quickwit/service/variables.tf +++ b/distribution/ecs/quickwit/service/variables.tf @@ -32,7 +32,9 @@ variable "subnet_ids" { type = list(string) } -variable "postgres_credential_arn" {} +variable "postgres_uri_secret_arn" { + description = "ARN of the SSM parameter or Secret Manager secret containing the URI of a Postgres instance" +} variable "quickwit_image" {} diff --git a/distribution/ecs/quickwit/variables.tf b/distribution/ecs/quickwit/variables.tf index 6ff953c8a01..ef3be5e1467 100644 --- a/distribution/ecs/quickwit/variables.tf +++ b/distribution/ecs/quickwit/variables.tf @@ -131,7 +131,7 @@ variable "rds_config" { default = {} } -variable "external_postgres_uri_ssm_parameter_arn" { - description = "ARN of the SSM parameter containing the URI of a Postgres instance (postgres://{user}:{password}@{address}:{port}/{db_instance_name}). The Postgres instance should allow indbound connections from the subnets specified in `variable.subnet_ids`. If provided, the internal RDS will not be created and `var.rds_config` is ignored." +variable "external_postgres_uri_secret_arn" { + description = "ARN of the SSM parameter or Secret Manager secret containing the URI of a Postgres instance (postgres://{user}:{password}@{address}:{port}/{db_instance_name}). The Postgres instance should allow indbound connections from the subnets specified in `variable.subnet_ids`. If provided, the internal RDS will not be created and `var.rds_config` is ignored." default = "" } From c2049ee26e71e94c1eb1bc9e123ef2f5558b25d2 Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Thu, 6 Jun 2024 15:01:55 -0400 Subject: [PATCH 2/3] Remove `v0.{4,5,6}` backward compatibility files --- .../src/index_config/serialize.rs | 3 - .../src/node_config/serialize.rs | 3 - .../src/source_config/serialize.rs | 9 +- .../src/control_plane.rs | 2 +- .../src/indexing_scheduler/mod.rs | 6 +- .../src/indexing_scheduler/scheduling/mod.rs | 2 +- quickwit/quickwit-indexing/src/test_utils.rs | 2 +- .../src/ingest_v2/routing_table.rs | 14 +- .../file_backed/file_backed_index/mod.rs | 103 ---------- .../file_backed_index/serialize.rs | 34 +--- .../src/metastore/index_metadata/mod.rs | 6 +- .../src/metastore/index_metadata/serialize.rs | 11 +- .../quickwit-metastore/src/split_metadata.rs | 9 +- .../src/split_metadata_version.rs | 3 - .../src/tests/delete_task.rs | 2 +- .../file-backed-index/v0.6.expected.json | 180 ------------------ .../test-data/file-backed-index/v0.6.json | 180 ------------------ .../index-metadata/v0.4.expected.json | 126 ------------ .../test-data/index-metadata/v0.4.json | 108 ----------- .../index-metadata/v0.5.expected.json | 126 ------------ .../test-data/index-metadata/v0.5.json | 108 ----------- .../index-metadata/v0.6.expected.json | 133 ------------- .../test-data/index-metadata/v0.6.json | 133 ------------- .../split-metadata/v0.6.expected.json | 29 --- .../test-data/split-metadata/v0.6.json | 29 --- .../quickwit-proto/src/types/index_uid.rs | 102 +++++----- .../quickwit-proto/src/types/pipeline_uid.rs | 2 +- 27 files changed, 68 insertions(+), 1397 deletions(-) delete mode 100644 quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json delete mode 100644 quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json delete mode 100644 quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json delete mode 100644 quickwit/quickwit-metastore/test-data/index-metadata/v0.4.json delete mode 100644 quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json delete mode 100644 quickwit/quickwit-metastore/test-data/index-metadata/v0.5.json delete mode 100644 quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json delete mode 100644 quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json delete mode 100644 quickwit/quickwit-metastore/test-data/split-metadata/v0.6.expected.json delete mode 100644 quickwit/quickwit-metastore/test-data/split-metadata/v0.6.json diff --git a/quickwit/quickwit-config/src/index_config/serialize.rs b/quickwit/quickwit-config/src/index_config/serialize.rs index a3ab40cd722..07885867962 100644 --- a/quickwit/quickwit-config/src/index_config/serialize.rs +++ b/quickwit/quickwit-config/src/index_config/serialize.rs @@ -37,9 +37,6 @@ type IndexConfigForSerialization = IndexConfigV0_8; pub(crate) enum VersionedIndexConfig { #[serde(rename = "0.8")] // Retro compatibility - #[serde(alias = "0.4")] - #[serde(alias = "0.5")] - #[serde(alias = "0.6")] #[serde(alias = "0.7")] V0_8(IndexConfigV0_8), } diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs index 10d50159d93..208a929badc 100644 --- a/quickwit/quickwit-config/src/node_config/serialize.rs +++ b/quickwit/quickwit-config/src/node_config/serialize.rs @@ -153,9 +153,6 @@ enum VersionedNodeConfig { #[serde(rename = "0.8")] // Retro compatibility. #[serde(alias = "0.7")] - #[serde(alias = "0.6")] - #[serde(alias = "0.5")] - #[serde(alias = "0.4")] V0_8(NodeConfigBuilder), } diff --git a/quickwit/quickwit-config/src/source_config/serialize.rs b/quickwit/quickwit-config/src/source_config/serialize.rs index 68e6858f068..0877138c712 100644 --- a/quickwit/quickwit-config/src/source_config/serialize.rs +++ b/quickwit/quickwit-config/src/source_config/serialize.rs @@ -32,14 +32,11 @@ type SourceConfigForSerialization = SourceConfigV0_8; #[serde(deny_unknown_fields)] #[serde(tag = "version")] pub enum VersionedSourceConfig { - #[serde(rename = "0.7")] - // Retro compatibility. - #[serde(alias = "0.6")] - #[serde(alias = "0.5")] - #[serde(alias = "0.4")] - V0_7(SourceConfigV0_7), #[serde(rename = "0.8")] V0_8(SourceConfigV0_8), + // Retro compatibility. + #[serde(rename = "0.7")] + V0_7(SourceConfigV0_7), } impl From for SourceConfigForSerialization { diff --git a/quickwit/quickwit-control-plane/src/control_plane.rs b/quickwit/quickwit-control-plane/src/control_plane.rs index 6e8b5332f1b..33c9aa0dfcd 100644 --- a/quickwit/quickwit-control-plane/src/control_plane.rs +++ b/quickwit/quickwit-control-plane/src/control_plane.rs @@ -2114,7 +2114,7 @@ mod tests { assert_eq!(source_configs[0].source_id, INGEST_V2_SOURCE_ID); assert_eq!(source_configs[1].source_id, CLI_SOURCE_ID); - let index_uid = IndexUid::from_parts("test-index-foo", 0); + let index_uid = IndexUid::for_test("test-index-foo", 0); let mut index_metadata = IndexMetadata::new_with_index_uid(index_uid, index_config); for source_config in source_configs { diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs index 80dc39886ea..3fca19ab936 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs @@ -806,11 +806,11 @@ mod tests { #[test] fn test_build_physical_indexing_plan_simple() { let source_1 = SourceUid { - index_uid: IndexUid::from_parts("index-1", 0), + index_uid: IndexUid::for_test("index-1", 0), source_id: "source1".to_string(), }; let source_2 = SourceUid { - index_uid: IndexUid::from_parts("index-2", 0), + index_uid: IndexUid::for_test("index-2", 0), source_id: "source2".to_string(), }; let sources = vec![ @@ -887,7 +887,7 @@ mod tests { prop_compose! { fn gen_kafka_source() (index_idx in 0usize..100usize, num_pipelines in 1usize..51usize) -> (IndexUid, SourceConfig) { - let index_uid = IndexUid::from_parts(&format!("index-id-{index_idx}"), 0 /* this is the index uid */); + let index_uid = IndexUid::for_test(&format!("index-id-{index_idx}"), 0 /* this is the index uid */); let source_id = quickwit_common::rand::append_random_suffix("kafka-source"); (index_uid, SourceConfig { source_id, diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs index 7dc9419339f..5600ce6f6dc 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs @@ -727,7 +727,7 @@ mod tests { fn source_id() -> SourceUid { static COUNTER: AtomicUsize = AtomicUsize::new(0); - let index = IndexUid::from_parts("test_index", 0); + let index = IndexUid::for_test("test_index", 0); let source_id = COUNTER.fetch_add(1, Ordering::SeqCst); SourceUid { index_uid: index, diff --git a/quickwit/quickwit-indexing/src/test_utils.rs b/quickwit/quickwit-indexing/src/test_utils.rs index 4c320544bd2..bda258689d9 100644 --- a/quickwit/quickwit-indexing/src/test_utils.rs +++ b/quickwit/quickwit-indexing/src/test_utils.rs @@ -256,7 +256,7 @@ pub struct MockSplitBuilder { impl MockSplitBuilder { pub fn new(split_id: &str) -> Self { Self { - split_metadata: mock_split_meta(split_id, &IndexUid::from_parts("test-index", 0)), + split_metadata: mock_split_meta(split_id, &IndexUid::for_test("test-index", 0)), } } diff --git a/quickwit/quickwit-ingest/src/ingest_v2/routing_table.rs b/quickwit/quickwit-ingest/src/ingest_v2/routing_table.rs index b2095b6647f..a0351bac59d 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/routing_table.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/routing_table.rs @@ -493,7 +493,7 @@ mod tests { #[test] fn test_routing_table_entry_new() { let self_node_id: NodeId = "test-node-0".into(); - let index_uid: IndexUid = IndexUid::from_parts("test-index", 0); + let index_uid = IndexUid::for_test("test-index", 0); let source_id: SourceId = "test-source".into(); let table_entry = RoutingTableEntry::new( &self_node_id, @@ -557,7 +557,7 @@ mod tests { #[test] fn test_routing_table_entry_has_open_shards() { - let index_uid: IndexUid = IndexUid::from_parts("test-index", 0); + let index_uid = IndexUid::for_test("test-index", 0); let source_id: SourceId = "test-source".into(); let table_entry = RoutingTableEntry::empty(index_uid.clone(), source_id.clone()); @@ -653,7 +653,7 @@ mod tests { #[test] fn test_routing_table_entry_next_open_shard_round_robin() { - let index_uid: IndexUid = IndexUid::from_parts("test-index", 0); + let index_uid = IndexUid::for_test("test-index", 0); let source_id: SourceId = "test-source".into(); let table_entry = RoutingTableEntry::empty(index_uid.clone(), source_id.clone()); let ingester_pool = IngesterPool::default(); @@ -770,7 +770,7 @@ mod tests { #[test] fn test_routing_table_entry_insert_open_shards() { - let index_uid_0: IndexUid = IndexUid::from_parts("test-index", 0); + let index_uid_0 = IndexUid::for_test("test-index", 0); let source_id: SourceId = "test-source".into(); let mut table_entry = RoutingTableEntry::empty(index_uid_0.clone(), source_id.clone()); @@ -847,7 +847,7 @@ mod tests { assert_eq!(table_entry.remote_shards[1].shard_state, ShardState::Closed); // Update index incarnation. - let index_uid_1: IndexUid = IndexUid::from_parts("test-index", 1); + let index_uid_1 = IndexUid::for_test("test-index", 1); table_entry.insert_open_shards( &local_node_id, &local_node_id, @@ -879,7 +879,7 @@ mod tests { #[test] fn test_routing_table_entry_close_shards() { - let index_uid: IndexUid = IndexUid::from_parts("test-index", 0); + let index_uid = IndexUid::for_test("test-index", 0); let source_id: SourceId = "test-source".into(); let mut table_entry = RoutingTableEntry::empty(index_uid.clone(), source_id.clone()); @@ -960,7 +960,7 @@ mod tests { #[test] fn test_routing_table_entry_delete_shards() { - let index_uid: IndexUid = IndexUid::from_parts("test-index", 0); + let index_uid = IndexUid::for_test("test-index", 0); let source_id: SourceId = "test-source".into(); let mut table_entry = RoutingTableEntry::empty(index_uid.clone(), source_id.clone()); diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs index 567ae67918f..2a480e3d716 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs @@ -725,7 +725,6 @@ mod tests { use std::collections::BTreeSet; use quickwit_doc_mapper::tag_pruning::TagFilterAst; - use quickwit_doc_mapper::{BinaryFormat, FieldMappingType}; use quickwit_proto::ingest::Shard; use quickwit_proto::metastore::ListShardsSubrequest; use quickwit_proto::types::{IndexUid, SourceId}; @@ -897,106 +896,4 @@ mod tests { assert!(!split_query_predicate(&&split_2, &query)); assert!(!split_query_predicate(&&split_3, &query)); } - - #[test] - fn test_index_otel_bytes_fields_format_conversion() { - // TODO: remove after 0.8 release. - let index_json_str = r#" - { - "version": "0.6", - "splits": [], - "index": { - "version": "0.6", - "sources": [], - "index_uid": "otel-traces-v0_6:00000000000000000000000000", - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "version": "0.6", - "index_id": "otel-traces-v0_6", - "index_uri": "s3://otel-traces-v0_6", - "doc_mapping": { - "field_mappings": [ - { - "name": "timestamp", - "type": "datetime", - "fast": true - }, - { - "name": "tenant_id", - "type": "bytes", - "fast": true, - "input_format": "base64", - "output_format": "base64" - }, - { - "name": "trace_id", - "type": "bytes", - "fast": true, - "input_format": "base64", - "output_format": "base64" - }, - { - "name": "span_id", - "type": "bytes", - "fast": true, - "input_format": "base64", - "output_format": "base64" - } - ], - "tag_fields": [], - "timestamp_field": "timestamp", - "store_source": false - } - } - } - } - "#; - - let file_backed_index: FileBackedIndex = serde_json::from_str(index_json_str).unwrap(); - let field_mapping = file_backed_index - .metadata - .index_config - .doc_mapping - .field_mappings; - assert_eq!( - field_mapping - .iter() - .filter(|field_mapping| field_mapping.name == "tenant_id") - .count(), - 1 - ); - assert_eq!( - field_mapping - .iter() - .filter(|field_mapping| field_mapping.name == "trace_id") - .count(), - 1 - ); - assert_eq!( - field_mapping - .iter() - .filter(|field_mapping| field_mapping.name == "span_id") - .count(), - 1 - ); - for field_mapping in &field_mapping { - if field_mapping.name == "tenant_id" { - if let FieldMappingType::Bytes(bytes_options, _) = &field_mapping.mapping_type { - assert_eq!(bytes_options.input_format, BinaryFormat::Base64); - assert_eq!(bytes_options.output_format, BinaryFormat::Base64); - } - } - if field_mapping.name == "trace_id" || field_mapping.name == "span_id" { - if let FieldMappingType::Bytes(bytes_options, _) = &field_mapping.mapping_type { - assert_eq!(bytes_options.input_format, BinaryFormat::Hex); - assert_eq!(bytes_options.output_format, BinaryFormat::Hex); - } - } - } - } } diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/serialize.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/serialize.rs index f8ecf803200..afceae2ebf5 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/serialize.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/serialize.rs @@ -20,7 +20,6 @@ use std::collections::HashMap; use itertools::Itertools; -use quickwit_doc_mapper::{BinaryFormat, FieldMappingType}; use quickwit_proto::ingest::Shard; use quickwit_proto::metastore::SourceType; use quickwit_proto::types::SourceId; @@ -37,9 +36,6 @@ pub(crate) enum VersionedFileBackedIndex { #[serde(rename = "0.8")] // Retro compatibility. #[serde(alias = "0.7")] - #[serde(alias = "0.6")] - #[serde(alias = "0.5")] - #[serde(alias = "0.4")] V0_8(FileBackedIndexV0_8), } @@ -106,35 +102,7 @@ impl From for FileBackedIndexV0_8 { } impl From for FileBackedIndex { - fn from(mut index: FileBackedIndexV0_8) -> Self { - // if the index is otel-traces-v0_6, convert set bytes fields input and output format to hex - // to be compatible with the v0_6 version. - // TODO: remove after 0.8 release. - if index.metadata.index_id() == "otel-traces-v0_6" { - index - .metadata - .index_config - .doc_mapping - .field_mappings - .iter_mut() - .filter(|field_mapping| { - field_mapping.name == "trace_id" || field_mapping.name == "span_id" - }) - .for_each(|field_mapping| { - if let FieldMappingType::Bytes(bytes_options, _) = - &mut field_mapping.mapping_type - { - bytes_options.input_format = BinaryFormat::Hex; - bytes_options.output_format = BinaryFormat::Hex; - } - }); - } - // Override split index_id to support old SplitMetadata format. - for split in index.splits.iter_mut() { - if split.split_metadata.index_uid.is_empty() { - split.split_metadata.index_uid = index.metadata.index_uid.clone(); - } - } + fn from(index: FileBackedIndexV0_8) -> Self { let mut per_source_shards: HashMap = index .shards .into_iter() diff --git a/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs b/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs index 36db32f24e7..3d4b56bbcac 100644 --- a/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs @@ -31,7 +31,6 @@ use quickwit_proto::types::{IndexUid, Position, SourceId}; use serde::{Deserialize, Serialize}; use serialize::VersionedIndexMetadata; use time::OffsetDateTime; -use ulid::Ulid; use crate::checkpoint::{IndexCheckpoint, PartitionId, SourceCheckpoint, SourceCheckpointDelta}; @@ -75,7 +74,7 @@ impl IndexMetadata { /// An incarnation id of `0` will be used to complete the index id into a index uuid. #[cfg(any(test, feature = "testsuite"))] pub fn for_test(index_id: &str, index_uri: &str) -> Self { - let index_uid = IndexUid::from_parts(index_id, 0); + let index_uid = IndexUid::for_test(index_id, 0); let mut index_metadata = IndexMetadata::new(IndexConfig::for_test(index_id, index_uri)); index_metadata.index_uid = index_uid; index_metadata @@ -161,6 +160,7 @@ impl IndexMetadata { } } +#[cfg(any(test, feature = "testsuite"))] impl TestableForRegression for IndexMetadata { fn sample_for_regression() -> IndexMetadata { let mut source_checkpoint = SourceCheckpoint::default(); @@ -176,7 +176,7 @@ impl TestableForRegression for IndexMetadata { let checkpoint = IndexCheckpoint::from(per_source_checkpoint); let index_config = IndexConfig::sample_for_regression(); let mut index_metadata = IndexMetadata { - index_uid: IndexUid::from_parts(&index_config.index_id, Ulid::nil()), + index_uid: IndexUid::for_test(&index_config.index_id, 0), index_config, checkpoint, create_timestamp: 1789, diff --git a/quickwit/quickwit-metastore/src/metastore/index_metadata/serialize.rs b/quickwit/quickwit-metastore/src/metastore/index_metadata/serialize.rs index 88c89a85c39..2834c719e33 100644 --- a/quickwit/quickwit-metastore/src/metastore/index_metadata/serialize.rs +++ b/quickwit/quickwit-metastore/src/metastore/index_metadata/serialize.rs @@ -33,9 +33,6 @@ pub(crate) enum VersionedIndexMetadata { #[serde(rename = "0.8")] // Retro compatibility. #[serde(alias = "0.7")] - #[serde(alias = "0.6")] - #[serde(alias = "0.5")] - #[serde(alias = "0.4")] V0_8(IndexMetadataV0_8), } @@ -73,8 +70,6 @@ impl From for IndexMetadataV0_8 { #[derive(Clone, Debug, Serialize, Deserialize, utoipa::ToSchema)] pub(crate) struct IndexMetadataV0_8 { #[schema(value_type = String)] - // Defaults to nil for backward compatibility. - #[serde(default, alias = "index_id")] pub index_uid: IndexUid, #[schema(value_type = VersionedIndexConfig)] pub index_config: IndexConfig, @@ -98,11 +93,7 @@ impl TryFrom for IndexMetadata { sources.insert(source.source_id.clone(), source); } Ok(Self { - index_uid: if v0_8.index_uid.is_empty() { - IndexUid::from_parts(&v0_8.index_config.index_id, 0) - } else { - v0_8.index_uid - }, + index_uid: v0_8.index_uid, index_config: v0_8.index_config, checkpoint: v0_8.checkpoint, create_timestamp: v0_8.create_timestamp, diff --git a/quickwit/quickwit-metastore/src/split_metadata.rs b/quickwit/quickwit-metastore/src/split_metadata.rs index c67fec6f68e..8b2db8be2be 100644 --- a/quickwit/quickwit-metastore/src/split_metadata.rs +++ b/quickwit/quickwit-metastore/src/split_metadata.rs @@ -264,11 +264,9 @@ pub struct SplitInfo { #[cfg(any(test, feature = "testsuite"))] impl quickwit_config::TestableForRegression for SplitMetadata { fn sample_for_regression() -> Self { - use ulid::Ulid; - SplitMetadata { split_id: "split".to_string(), - index_uid: IndexUid::from_parts("my-index", Ulid::nil()), + index_uid: IndexUid::for_test("my-index", 0), source_id: "source".to_string(), node_id: "node".to_string(), delete_opstamp: 10, @@ -401,10 +399,7 @@ mod tests { fn test_split_metadata_debug() { let split_metadata = SplitMetadata { split_id: "split-1".to_string(), - index_uid: IndexUid::from_parts( - "00000000-0000-0000-0000-000000000000", - ulid::Ulid::nil(), - ), + index_uid: IndexUid::for_test("00000000-0000-0000-0000-000000000000", 0), partition_id: 0, source_id: "source-1".to_string(), node_id: "node-1".to_string(), diff --git a/quickwit/quickwit-metastore/src/split_metadata_version.rs b/quickwit/quickwit-metastore/src/split_metadata_version.rs index f7ebbfdf530..5b98cdb9e4b 100644 --- a/quickwit/quickwit-metastore/src/split_metadata_version.rs +++ b/quickwit/quickwit-metastore/src/split_metadata_version.rs @@ -158,9 +158,6 @@ pub(crate) enum VersionedSplitMetadata { #[serde(rename = "0.8")] // Retro compatibility. #[serde(alias = "0.7")] - #[serde(alias = "0.6")] - #[serde(alias = "0.5")] - #[serde(alias = "0.4")] V0_8(SplitMetadataV0_8), } diff --git a/quickwit/quickwit-metastore/src/tests/delete_task.rs b/quickwit/quickwit-metastore/src/tests/delete_task.rs index 86bfed90e28..1b4091a3dfb 100644 --- a/quickwit/quickwit-metastore/src/tests/delete_task.rs +++ b/quickwit/quickwit-metastore/src/tests/delete_task.rs @@ -67,7 +67,7 @@ pub async fn test_metastore_create_delete_task< // Create a delete task on an index with wrong incarnation_id let error = metastore .create_delete_task(DeleteQuery { - index_uid: Some(IndexUid::from_parts(&index_id, 12345)), + index_uid: Some(IndexUid::for_test(&index_id, 12345)), ..delete_query.clone() }) .await diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json deleted file mode 100644 index 1c725515ee4..00000000000 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json +++ /dev/null @@ -1,180 +0,0 @@ -{ - "delete_tasks": [ - { - "create_timestamp": 0, - "delete_query": { - "index_uid": "my-index:00000000000000000000000000", - "query_ast": "{\"type\":\"bool\",\"must\":[{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Harry\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}},{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Potter\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}}]}" - }, - "opstamp": 10 - } - ], - "index": { - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "dynamic_mapping": { - "expand_dots": true, - "fast": { - "normalizer": "raw" - }, - "indexed": true, - "record": "basic", - "stored": true, - "tokenizer": "raw" - }, - "field_mappings": [ - { - "coerce": true, - "fast": true, - "indexed": true, - "name": "tenant_id", - "output_format": "number", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "fast_precision": "seconds", - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "record": "basic", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "index_field_presence": true, - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant_id", - "store_document_size": false, - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp", - "tokenizers": [ - { - "filters": [], - "name": "custom_tokenizer", - "pattern": "[^\\p{L}\\p{N}]+", - "type": "regex" - } - ] - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": "50.0 MB" - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.8" - }, - "index_uid": "my-index:00000000000000000000000000", - "sources": [ - { - "enabled": true, - "input_format": "json", - "num_pipelines": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))", - "timezone": "UTC" - }, - "version": "0.8" - } - ], - "version": "0.8" - }, - "splits": [ - { - "create_timestamp": 3, - "delete_opstamp": 10, - "footer_offsets": { - "end": 2000, - "start": 1000 - }, - "index_uid": "my-index:00000000000000000000000000", - "maturity": { - "maturation_period_millis": 4000, - "type": "immature" - }, - "node_id": "node", - "num_docs": 12303, - "num_merge_ops": 3, - "partition_id": 7, - "publish_timestamp": 1789, - "source_id": "source", - "split_id": "split", - "split_state": "Published", - "tags": [ - "234", - "aaa" - ], - "time_range": { - "end": 130198, - "start": 121000 - }, - "uncompressed_docs_size_in_bytes": 234234, - "update_timestamp": 1789, - "version": "0.8" - } - ], - "version": "0.8" -} diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json deleted file mode 100644 index 51cb6ecd3e8..00000000000 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json +++ /dev/null @@ -1,180 +0,0 @@ -{ - "delete_tasks": [ - { - "create_timestamp": 0, - "delete_query": { - "index_uid": "my-index:00000000000000000000000000", - "query_ast": "{\"type\":\"bool\",\"must\":[{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Harry\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}},{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Potter\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}}]}" - }, - "opstamp": 10 - } - ], - "index": { - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "dynamic_mapping": { - "expand_dots": true, - "fast": { - "normalizer": "raw" - }, - "indexed": true, - "record": "basic", - "stored": true, - "tokenizer": "raw" - }, - "field_mappings": [ - { - "coerce": true, - "fast": true, - "indexed": true, - "name": "tenant_id", - "output_format": "number", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "fast_precision": "seconds", - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "record": "basic", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "index_field_presence": true, - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant_id", - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp", - "tokenizers": [ - { - "filters": [], - "name": "custom_tokenizer", - "pattern": "[^\\p{L}\\p{N}]+", - "type": "regex" - } - ] - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": "50.0 MB" - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.6" - }, - "index_uid": "my-index:00000000000000000000000000", - "sources": [ - { - "desired_num_pipelines": 2, - "enabled": true, - "input_format": "json", - "max_num_pipelines_per_indexer": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))", - "timezone": "UTC" - }, - "version": "0.6" - } - ], - "version": "0.6" - }, - "splits": [ - { - "create_timestamp": 3, - "delete_opstamp": 10, - "footer_offsets": { - "end": 2000, - "start": 1000 - }, - "index_uid": "my-index:00000000000000000000000000", - "maturity": { - "maturation_period_millis": 4000, - "type": "immature" - }, - "node_id": "node", - "num_docs": 12303, - "num_merge_ops": 3, - "partition_id": 7, - "publish_timestamp": 1789, - "source_id": "source", - "split_id": "split", - "split_state": "Published", - "tags": [ - "234", - "aaa" - ], - "time_range": { - "end": 130198, - "start": 121000 - }, - "uncompressed_docs_size_in_bytes": 234234, - "update_timestamp": 1789, - "version": "0.6" - } - ], - "version": "0.6" -} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json deleted file mode 100644 index 63896de5727..00000000000 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json +++ /dev/null @@ -1,126 +0,0 @@ -{ - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "dynamic_mapping": { - "expand_dots": true, - "fast": { - "normalizer": "raw" - }, - "indexed": true, - "record": "basic", - "stored": true, - "tokenizer": "raw" - }, - "field_mappings": [ - { - "coerce": true, - "fast": true, - "indexed": true, - "name": "tenant_id", - "output_format": "number", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "fast_precision": "seconds", - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "record": "basic", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "index_field_presence": false, - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant", - "store_document_size": false, - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp", - "tokenizers": [] - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": "50.0 MB" - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.8" - }, - "index_uid": "my-index:00000000000000000000000000", - "sources": [ - { - "enabled": true, - "input_format": "json", - "num_pipelines": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))", - "timezone": "UTC" - }, - "version": "0.8" - } - ], - "version": "0.8" -} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.json deleted file mode 100644 index a6316834104..00000000000 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.json +++ /dev/null @@ -1,108 +0,0 @@ -{ - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "field_mappings": [ - { - "fast": true, - "indexed": true, - "name": "tenant_id", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "fast_precision": "seconds", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant", - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp" - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": 50000000 - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.4" - }, - "sources": [ - { - "desired_num_pipelines": 2, - "enabled": true, - "max_num_pipelines_per_indexer": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))" - }, - "version": "0.4" - } - ], - "version": "0.4" -} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json deleted file mode 100644 index 63896de5727..00000000000 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json +++ /dev/null @@ -1,126 +0,0 @@ -{ - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "dynamic_mapping": { - "expand_dots": true, - "fast": { - "normalizer": "raw" - }, - "indexed": true, - "record": "basic", - "stored": true, - "tokenizer": "raw" - }, - "field_mappings": [ - { - "coerce": true, - "fast": true, - "indexed": true, - "name": "tenant_id", - "output_format": "number", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "fast_precision": "seconds", - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "record": "basic", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "index_field_presence": false, - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant", - "store_document_size": false, - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp", - "tokenizers": [] - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": "50.0 MB" - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.8" - }, - "index_uid": "my-index:00000000000000000000000000", - "sources": [ - { - "enabled": true, - "input_format": "json", - "num_pipelines": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))", - "timezone": "UTC" - }, - "version": "0.8" - } - ], - "version": "0.8" -} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.json deleted file mode 100644 index 66233c904d7..00000000000 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.json +++ /dev/null @@ -1,108 +0,0 @@ -{ - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "field_mappings": [ - { - "fast": true, - "indexed": true, - "name": "tenant_id", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "fast_precision": "seconds", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant", - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp" - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": 50000000 - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.5" - }, - "sources": [ - { - "desired_num_pipelines": 2, - "enabled": true, - "max_num_pipelines_per_indexer": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))" - }, - "version": "0.5" - } - ], - "version": "0.5" -} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json deleted file mode 100644 index e0c572da32f..00000000000 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json +++ /dev/null @@ -1,133 +0,0 @@ -{ - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "dynamic_mapping": { - "expand_dots": true, - "fast": { - "normalizer": "raw" - }, - "indexed": true, - "record": "basic", - "stored": true, - "tokenizer": "raw" - }, - "field_mappings": [ - { - "coerce": true, - "fast": true, - "indexed": true, - "name": "tenant_id", - "output_format": "number", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "fast_precision": "seconds", - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "record": "basic", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "index_field_presence": true, - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant_id", - "store_document_size": false, - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp", - "tokenizers": [ - { - "filters": [], - "name": "custom_tokenizer", - "pattern": "[^\\p{L}\\p{N}]+", - "type": "regex" - } - ] - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": "50.0 MB" - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.8" - }, - "index_uid": "my-index:00000000000000000000000000", - "sources": [ - { - "enabled": true, - "input_format": "json", - "num_pipelines": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))", - "timezone": "UTC" - }, - "version": "0.8" - } - ], - "version": "0.8" -} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json deleted file mode 100644 index f6522a1ba38..00000000000 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json +++ /dev/null @@ -1,133 +0,0 @@ -{ - "checkpoint": { - "kafka-source": { - "00000000000000000000": "00000000000000000042" - } - }, - "create_timestamp": 1789, - "index_config": { - "doc_mapping": { - "dynamic_mapping": { - "expand_dots": true, - "fast": { - "normalizer": "raw" - }, - "indexed": true, - "record": "basic", - "stored": true, - "tokenizer": "raw" - }, - "field_mappings": [ - { - "coerce": true, - "fast": true, - "indexed": true, - "name": "tenant_id", - "output_format": "number", - "stored": true, - "type": "u64" - }, - { - "fast": true, - "fast_precision": "seconds", - "indexed": true, - "input_formats": [ - "rfc3339", - "unix_timestamp" - ], - "name": "timestamp", - "output_format": "rfc3339", - "stored": true, - "type": "datetime" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "log_level", - "record": "basic", - "stored": true, - "tokenizer": "raw", - "type": "text" - }, - { - "fast": false, - "fieldnorms": false, - "indexed": true, - "name": "message", - "record": "position", - "stored": true, - "tokenizer": "default", - "type": "text" - } - ], - "index_field_presence": true, - "max_num_partitions": 100, - "mode": "dynamic", - "partition_key": "tenant_id", - "store_source": true, - "tag_fields": [ - "log_level", - "tenant_id" - ], - "timestamp_field": "timestamp", - "tokenizers": [ - { - "filters": [], - "name": "custom_tokenizer", - "pattern": "[^\\p{L}\\p{N}]+", - "type": "regex" - } - ] - }, - "index_id": "my-index", - "index_uri": "s3://quickwit-indexes/my-index", - "indexing_settings": { - "commit_timeout_secs": 301, - "docstore_blocksize": 1000000, - "docstore_compression_level": 8, - "merge_policy": { - "maturation_period": "2days", - "max_merge_factor": 11, - "merge_factor": 9, - "min_level_num_docs": 100000, - "type": "stable_log" - }, - "resources": { - "heap_size": "50.0 MB" - }, - "split_num_docs_target": 10000001 - }, - "retention": { - "period": "90 days", - "schedule": "daily" - }, - "search_settings": { - "default_search_fields": [ - "message" - ] - }, - "version": "0.6" - }, - "index_uid": "my-index:00000000000000000000000000", - "sources": [ - { - "desired_num_pipelines": 2, - "enabled": true, - "input_format": "json", - "max_num_pipelines_per_indexer": 2, - "params": { - "client_params": {}, - "topic": "kafka-topic" - }, - "source_id": "kafka-source", - "source_type": "kafka", - "transform": { - "script": ".message = downcase(string!(.message))", - "timezone": "UTC" - }, - "version": "0.6" - } - ], - "version": "0.6" -} diff --git a/quickwit/quickwit-metastore/test-data/split-metadata/v0.6.expected.json b/quickwit/quickwit-metastore/test-data/split-metadata/v0.6.expected.json deleted file mode 100644 index d6475bd3525..00000000000 --- a/quickwit/quickwit-metastore/test-data/split-metadata/v0.6.expected.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "create_timestamp": 3, - "delete_opstamp": 10, - "footer_offsets": { - "end": 2000, - "start": 1000 - }, - "index_uid": "my-index:00000000000000000000000000", - "maturity": { - "maturation_period_millis": 4000, - "type": "immature" - }, - "node_id": "node", - "num_docs": 12303, - "num_merge_ops": 3, - "partition_id": 7, - "source_id": "source", - "split_id": "split", - "tags": [ - "234", - "aaa" - ], - "time_range": { - "end": 130198, - "start": 121000 - }, - "uncompressed_docs_size_in_bytes": 234234, - "version": "0.8" -} diff --git a/quickwit/quickwit-metastore/test-data/split-metadata/v0.6.json b/quickwit/quickwit-metastore/test-data/split-metadata/v0.6.json deleted file mode 100644 index 9c1f6311c86..00000000000 --- a/quickwit/quickwit-metastore/test-data/split-metadata/v0.6.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "create_timestamp": 3, - "delete_opstamp": 10, - "footer_offsets": { - "end": 2000, - "start": 1000 - }, - "index_uid": "my-index:00000000000000000000000000", - "maturity": { - "maturation_period_millis": 4000, - "type": "immature" - }, - "node_id": "node", - "num_docs": 12303, - "num_merge_ops": 3, - "partition_id": 7, - "source_id": "source", - "split_id": "split", - "tags": [ - "234", - "aaa" - ], - "time_range": { - "end": 130198, - "start": 121000 - }, - "uncompressed_docs_size_in_bytes": 234234, - "version": "0.6" -} diff --git a/quickwit/quickwit-proto/src/types/index_uid.rs b/quickwit/quickwit-proto/src/types/index_uid.rs index 9382261227d..2d964c739bf 100644 --- a/quickwit/quickwit-proto/src/types/index_uid.rs +++ b/quickwit/quickwit-proto/src/types/index_uid.rs @@ -17,6 +17,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . +use std::borrow::Cow; use std::fmt; use std::str::FromStr; @@ -37,31 +38,61 @@ pub struct IndexUid { pub incarnation_id: Ulid, } +impl fmt::Display for IndexUid { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}:{}", self.index_id, self.incarnation_id) + } +} + +impl IndexUid { + /// Creates a new index UID from an index ID using a random ULID as incarnation ID. + pub fn new_with_random_ulid(index_id: &str) -> Self { + Self::new(index_id, Ulid::new()) + } + + fn new(index_id: &str, incarnation_id: impl Into) -> Self { + assert!(!index_id.contains(':'), "index ID may not contain `:`"); + + Self { + index_id: index_id.to_string(), + incarnation_id: incarnation_id.into(), + } + } + + #[cfg(any(test, feature = "testsuite"))] + pub fn for_test(index_id: &str, incarnation_id: u128) -> Self { + Self { + index_id: index_id.to_string(), + incarnation_id: incarnation_id.into(), + } + } +} + +#[derive(Error, Debug)] +#[error("invalid index UID `{0}`")] +pub struct InvalidIndexUid(String); + impl FromStr for IndexUid { type Err = InvalidIndexUid; fn from_str(index_uid_str: &str) -> Result { - let Some((index_id, ulid)) = index_uid_str.split_once(':') else { - return Err(InvalidIndexUid { - invalid_index_uid_str: index_uid_str.to_string(), - }); + let Some((index_id, incarnation_id_str)) = index_uid_str.split_once(':') else { + return Err(InvalidIndexUid(index_uid_str.to_string())); }; - let incarnation_id = Ulid::from_string(ulid).map_err(|_| InvalidIndexUid { - invalid_index_uid_str: index_uid_str.to_string(), - })?; - Ok(IndexUid { + let incarnation_id = Ulid::from_string(incarnation_id_str) + .map_err(|_| InvalidIndexUid(index_uid_str.to_string()))?; + let index_uid = IndexUid { index_id: index_id.to_string(), incarnation_id, - }) + }; + Ok(index_uid) } } -// It is super lame, but for backward compatibility reasons we accept having a missing ulid part. -// TODO DEPRECATED ME and remove impl<'de> Deserialize<'de> for IndexUid { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { - let index_uid_str: String = String::deserialize(deserializer)?; + let index_uid_str: Cow<'de, str> = Cow::deserialize(deserializer)?; let index_uid = IndexUid::from_str(&index_uid_str).map_err(D::Error::custom)?; Ok(index_uid) } @@ -147,53 +178,6 @@ impl prost::Message for IndexUid { } } -impl fmt::Display for IndexUid { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}:{}", self.index_id, self.incarnation_id) - } -} - -impl IndexUid { - /// Creates a new index uid from index_id. - /// A random ULID will be used as incarnation - pub fn new_with_random_ulid(index_id: &str) -> Self { - Self::from_parts(index_id, Ulid::new()) - } - - pub fn from_parts(index_id: &str, incarnation_id: impl Into) -> Self { - assert!(!index_id.contains(':'), "index ID may not contain `:`"); - let incarnation_id = incarnation_id.into(); - IndexUid { - index_id: index_id.to_string(), - incarnation_id, - } - } - - pub fn is_empty(&self) -> bool { - self.index_id.is_empty() - } - - #[cfg(any(test, feature = "testsuite"))] - pub fn for_test(index_id: &str, ulid: u128) -> Self { - IndexUid { - index_id: index_id.to_string(), - incarnation_id: Ulid(ulid), - } - } -} - -impl From for String { - fn from(val: IndexUid) -> Self { - val.to_string() - } -} - -#[derive(Error, Debug)] -#[error("invalid index uid `{invalid_index_uid_str}`")] -pub struct InvalidIndexUid { - pub invalid_index_uid_str: String, -} - #[cfg(feature = "postgres")] impl TryFrom for IndexUid { type Error = InvalidIndexUid; diff --git a/quickwit/quickwit-proto/src/types/pipeline_uid.rs b/quickwit/quickwit-proto/src/types/pipeline_uid.rs index 114125901c2..67ff395d13c 100644 --- a/quickwit/quickwit-proto/src/types/pipeline_uid.rs +++ b/quickwit/quickwit-proto/src/types/pipeline_uid.rs @@ -60,7 +60,7 @@ impl FromStr for PipelineUid { fn from_str(pipeline_uid_str: &str) -> Result { let pipeline_ulid = - Ulid::from_string(pipeline_uid_str).map_err(|_| "invalid pipeline uid")?; + Ulid::from_string(pipeline_uid_str).map_err(|_| "invalid pipeline UID")?; Ok(PipelineUid(pipeline_ulid)) } } From 1fd7d0b78340af1c80e8a8b2fd4d0cb56d1edd94 Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Thu, 6 Jun 2024 15:56:53 -0400 Subject: [PATCH 3/3] Fix clippy warnings --- .../src/metastore/index_metadata/mod.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs b/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs index 3d4b56bbcac..f3e59fe90b6 100644 --- a/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs @@ -20,19 +20,17 @@ pub(crate) mod serialize; use std::collections::hash_map::Entry; -use std::collections::{BTreeMap, HashMap}; +use std::collections::HashMap; use quickwit_common::uri::Uri; -use quickwit_config::{ - IndexConfig, RetentionPolicy, SearchSettings, SourceConfig, TestableForRegression, -}; +use quickwit_config::{IndexConfig, RetentionPolicy, SearchSettings, SourceConfig}; use quickwit_proto::metastore::{EntityKind, MetastoreError, MetastoreResult}; -use quickwit_proto::types::{IndexUid, Position, SourceId}; +use quickwit_proto::types::{IndexUid, SourceId}; use serde::{Deserialize, Serialize}; use serialize::VersionedIndexMetadata; use time::OffsetDateTime; -use crate::checkpoint::{IndexCheckpoint, PartitionId, SourceCheckpoint, SourceCheckpointDelta}; +use crate::checkpoint::IndexCheckpoint; /// An index metadata carries all meta data about an index. #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] @@ -161,8 +159,14 @@ impl IndexMetadata { } #[cfg(any(test, feature = "testsuite"))] -impl TestableForRegression for IndexMetadata { +impl quickwit_config::TestableForRegression for IndexMetadata { fn sample_for_regression() -> IndexMetadata { + use std::collections::BTreeMap; + + use quickwit_proto::types::Position; + + use crate::checkpoint::{PartitionId, SourceCheckpoint, SourceCheckpointDelta}; + let mut source_checkpoint = SourceCheckpoint::default(); let delta = SourceCheckpointDelta::from_partition_delta( PartitionId::from(0i64),