From 5b85f7c84619482c2f3d5420c2b4602a9c302f52 Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Wed, 19 Jun 2024 11:56:25 -0400 Subject: [PATCH] Add doc mapping UID attribute to `DocMapping` (#5087) * Add `doc_mapping_uid` field to `DocMapper` * Bump config file format version * Leave doc mapping UID out of doc mapping comparison --- quickwit/Cargo.lock | 2 + quickwit/Cargo.toml | 2 +- quickwit/quickwit-cli/src/tool.rs | 4 +- quickwit/quickwit-config/Cargo.toml | 2 + .../quickwit-config/src/index_config/mod.rs | 22 +- .../src/index_config/serialize.rs | 24 ++- .../quickwit-config/src/index_template/mod.rs | 6 +- .../src/index_template/serialize.rs | 3 +- .../quickwit-config/src/source_config/mod.rs | 5 +- .../src/source_config/serialize.rs | 3 +- .../src/control_plane.rs | 10 +- .../src/indexing_scheduler/scheduling/mod.rs | 14 +- quickwit/quickwit-doc-mapper/Cargo.toml | 1 + .../src/default_doc_mapper/default_mapper.rs | 16 +- .../quickwit-doc-mapper/src/doc_mapper.rs | 4 + .../quickwit-doc-mapper/src/doc_mapping.rs | 23 +++ quickwit/quickwit-doc-mapper/src/lib.rs | 2 + .../src/backward_compatibility_tests/mod.rs | 26 ++- .../file_backed_index/serialize.rs | 3 +- .../src/metastore/file_backed/manifest.rs | 24 ++- .../src/metastore/index_metadata/mod.rs | 11 +- .../src/metastore/index_metadata/serialize.rs | 3 +- .../quickwit-metastore/src/split_metadata.rs | 2 +- .../src/split_metadata_version.rs | 3 +- .../file-backed-index/v0.7.expected.json | 11 +- .../file-backed-index/v0.8.expected.json | 11 +- .../file-backed-index/v0.9.expected.json | 194 ++++++++++++++++++ .../test-data/file-backed-index/v0.9.json | 194 ++++++++++++++++++ .../index-metadata/v0.7.expected.json | 7 +- .../index-metadata/v0.8.expected.json | 7 +- .../index-metadata/v0.9.expected.json | 134 ++++++++++++ .../test-data/index-metadata/v0.9.json | 134 ++++++++++++ .../test-data/manifest/v0.7.expected.json | 5 +- .../test-data/manifest/v0.8.expected.json | 5 +- .../test-data/manifest/v0.9.expected.json | 90 ++++++++ .../test-data/manifest/v0.9.json | 90 ++++++++ .../split-metadata/v0.7.expected.json | 2 +- .../split-metadata/v0.8.expected.json | 2 +- .../split-metadata/v0.9.expected.json | 29 +++ .../test-data/split-metadata/v0.9.json | 29 +++ .../src/types/doc_mapping_uid.rs | 167 +++++++++++++++ quickwit/quickwit-proto/src/types/mod.rs | 2 + .../quickwit-proto/src/types/pipeline_uid.rs | 4 +- quickwit/quickwit-search/src/collector.rs | 5 + 44 files changed, 1256 insertions(+), 81 deletions(-) create mode 100644 quickwit/quickwit-metastore/test-data/file-backed-index/v0.9.expected.json create mode 100644 quickwit/quickwit-metastore/test-data/file-backed-index/v0.9.json create mode 100644 quickwit/quickwit-metastore/test-data/index-metadata/v0.9.expected.json create mode 100644 quickwit/quickwit-metastore/test-data/index-metadata/v0.9.json create mode 100644 quickwit/quickwit-metastore/test-data/manifest/v0.9.expected.json create mode 100644 quickwit/quickwit-metastore/test-data/manifest/v0.9.json create mode 100644 quickwit/quickwit-metastore/test-data/split-metadata/v0.9.expected.json create mode 100644 quickwit/quickwit-metastore/test-data/split-metadata/v0.9.json create mode 100644 quickwit/quickwit-proto/src/types/doc_mapping_uid.rs diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 6ec1c8a07f5..6b10d53ae4b 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -5934,6 +5934,7 @@ dependencies = [ "quickwit-common", "quickwit-datetime", "quickwit-macros", + "quickwit-proto", "quickwit-query", "regex", "serde", @@ -9115,6 +9116,7 @@ dependencies = [ "proc-macro2", "quote", "syn 2.0.59", + "ulid", ] [[package]] diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 25776cda630..46cbeef33e7 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -261,7 +261,7 @@ ttl_cache = "0.5" typetag = "0.2" ulid = "1.1" username = "0.2" -utoipa = "4.2.0" +utoipa = { version = "4.2", features = ["time", "ulid"] } uuid = { version = "1.8", features = ["v4", "serde"] } vrl = { version = "0.8.1", default-features = false, features = [ "compiler", diff --git a/quickwit/quickwit-cli/src/tool.rs b/quickwit/quickwit-cli/src/tool.rs index 7b1e6ad9435..b936cbc4897 100644 --- a/quickwit/quickwit-cli/src/tool.rs +++ b/quickwit/quickwit-cli/src/tool.rs @@ -472,7 +472,7 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< .ask_for_res(SpawnPipeline { index_id: args.index_id.clone(), source_config, - pipeline_uid: PipelineUid::new(), + pipeline_uid: PipelineUid::random(), }) .await?; let merge_pipeline_handle = indexing_server_mailbox @@ -611,7 +611,7 @@ pub async fn merge_cli(args: MergeArgs) -> anyhow::Result<()> { transform_config: None, input_format: SourceInputFormat::Json, }, - pipeline_uid: PipelineUid::new(), + pipeline_uid: PipelineUid::random(), }) .await?; let pipeline_handle: ActorHandle = indexing_service_mailbox diff --git a/quickwit/quickwit-config/Cargo.toml b/quickwit/quickwit-config/Cargo.toml index b488ead891b..a3f300742ea 100644 --- a/quickwit/quickwit-config/Cargo.toml +++ b/quickwit/quickwit-config/Cargo.toml @@ -41,6 +41,8 @@ quickwit-proto = { workspace = true } [dev-dependencies] tokio = { workspace = true } +quickwit-proto = { workspace = true, features = ["testsuite"] } + [features] testsuite = [] vrl = ["dep:vrl"] diff --git a/quickwit/quickwit-config/src/index_config/mod.rs b/quickwit/quickwit-config/src/index_config/mod.rs index 48adf301b75..08ebee23ff5 100644 --- a/quickwit/quickwit-config/src/index_config/mod.rs +++ b/quickwit/quickwit-config/src/index_config/mod.rs @@ -19,8 +19,6 @@ pub(crate) mod serialize; -use std::collections::BTreeSet; -use std::num::NonZeroU32; use std::str::FromStr; use std::sync::Arc; use std::time::Duration; @@ -31,15 +29,14 @@ use chrono::Utc; use cron::Schedule; use humantime::parse_duration; use quickwit_common::uri::Uri; -use quickwit_doc_mapper::{DefaultDocMapperBuilder, DocMapper, DocMapping, Mode}; +use quickwit_doc_mapper::{DefaultDocMapperBuilder, DocMapper, DocMapping}; use quickwit_proto::types::IndexId; use serde::{Deserialize, Serialize}; pub use serialize::{load_index_config_from_user_config, load_index_config_update}; use tracing::warn; use crate::index_config::serialize::VersionedIndexConfig; -use crate::merge_policy_config::{MergePolicyConfig, StableLogMergePolicyConfig}; -use crate::TestableForRegression; +use crate::merge_policy_config::MergePolicyConfig; #[derive(Clone, Debug, Serialize, Deserialize, utoipa::ToSchema)] #[serde(deny_unknown_fields)] @@ -260,6 +257,7 @@ impl IndexConfig { pub fn for_test(index_id: &str, index_uri: &str) -> Self { let index_uri = Uri::from_str(index_uri).unwrap(); let doc_mapping_json = r#"{ + "doc_mapping_uid": "00000000000000000000000000", "mode": "lenient", "field_mappings": [ { @@ -342,8 +340,17 @@ impl IndexConfig { } } -impl TestableForRegression for IndexConfig { +#[cfg(any(test, feature = "testsuite"))] +impl crate::TestableForRegression for IndexConfig { fn sample_for_regression() -> Self { + use std::collections::BTreeSet; + use std::num::NonZeroU32; + + use quickwit_doc_mapper::Mode; + use quickwit_proto::types::DocMappingUid; + + use crate::merge_policy_config::StableLogMergePolicyConfig; + let tenant_id_mapping = serde_json::from_str( r#"{ "name": "tenant_id", @@ -386,6 +393,7 @@ impl TestableForRegression for IndexConfig { ) .unwrap(); let doc_mapping = DocMapping { + doc_mapping_uid: DocMappingUid::for_test(1), mode: Mode::default(), field_mappings: vec![ tenant_id_mapping, @@ -550,7 +558,7 @@ mod tests { assert_eq!(index_config.indexing_settings.commit_timeout_secs, 61); assert_eq!( index_config.indexing_settings.merge_policy, - MergePolicyConfig::StableLog(StableLogMergePolicyConfig { + MergePolicyConfig::StableLog(crate::StableLogMergePolicyConfig { merge_factor: 9, max_merge_factor: 11, maturation_period: Duration::from_secs(48 * 3600), diff --git a/quickwit/quickwit-config/src/index_config/serialize.rs b/quickwit/quickwit-config/src/index_config/serialize.rs index b02322d5339..cc05728b79c 100644 --- a/quickwit/quickwit-config/src/index_config/serialize.rs +++ b/quickwit/quickwit-config/src/index_config/serialize.rs @@ -19,7 +19,7 @@ use anyhow::{ensure, Context}; use quickwit_common::uri::Uri; -use quickwit_proto::types::IndexId; +use quickwit_proto::types::{DocMappingUid, IndexId}; use serde::{Deserialize, Serialize}; use tracing::info; @@ -35,8 +35,12 @@ type IndexConfigForSerialization = IndexConfigV0_8; #[derive(Clone, Debug, Serialize, Deserialize, utoipa::ToSchema)] #[serde(tag = "version")] pub(crate) enum VersionedIndexConfig { - #[serde(rename = "0.8")] + // The two versions use the same format but for v0.8 and below, we need to set the + // `doc_mapping_uid` to the nil value upon deserialization. + #[serde(rename = "0.9")] + V0_9(IndexConfigV0_8), // Retro compatibility + #[serde(rename = "0.8")] #[serde(alias = "0.7")] V0_8(IndexConfigV0_8), } @@ -45,6 +49,7 @@ impl From for IndexConfigForSerialization { fn from(versioned_config: VersionedIndexConfig) -> IndexConfigForSerialization { match versioned_config { VersionedIndexConfig::V0_8(v0_8) => v0_8, + VersionedIndexConfig::V0_9(v0_8) => v0_8, } } } @@ -75,7 +80,7 @@ pub fn load_index_config_update( let current_index_parent_dir = ¤t_index_config .index_uri .parent() - .context("Unexpected `index_uri` format on current configuration")?; + .expect("index URI should have a parent"); let new_index_config = load_index_config_from_user_config( config_format, index_config_bytes, @@ -94,7 +99,9 @@ pub fn load_index_config_update( new_index_config.index_uri ); ensure!( - current_index_config.doc_mapping == new_index_config.doc_mapping, + current_index_config + .doc_mapping + .eq_ignore_doc_mapping_uid(&new_index_config.doc_mapping), "`doc_mapping` cannot be updated" ); Ok(new_index_config) @@ -147,7 +154,7 @@ impl IndexConfigForSerialization { impl From for VersionedIndexConfig { fn from(index_config: IndexConfig) -> Self { - VersionedIndexConfig::V0_8(index_config.into()) + VersionedIndexConfig::V0_9(index_config.into()) } } @@ -156,7 +163,12 @@ impl TryFrom for IndexConfig { fn try_from(versioned_index_config: VersionedIndexConfig) -> anyhow::Result { match versioned_index_config { - VersionedIndexConfig::V0_8(v0_8) => v0_8.build_and_validate(None), + VersionedIndexConfig::V0_8(mut v0_8) => { + // Override the randomly generated doc mapping UID with the nil value. + v0_8.doc_mapping.doc_mapping_uid = DocMappingUid::default(); + v0_8.build_and_validate(None) + } + VersionedIndexConfig::V0_9(v0_8) => v0_8.build_and_validate(None), } } } diff --git a/quickwit/quickwit-config/src/index_template/mod.rs b/quickwit/quickwit-config/src/index_template/mod.rs index cce634be843..ea57a8907c0 100644 --- a/quickwit/quickwit-config/src/index_template/mod.rs +++ b/quickwit/quickwit-config/src/index_template/mod.rs @@ -28,7 +28,7 @@ pub use serialize::{IndexTemplateV0_8, VersionedIndexTemplate}; use crate::index_config::validate_index_config; use crate::{ validate_identifier, validate_index_id_pattern, DocMapping, IndexConfig, IndexingSettings, - RetentionPolicy, SearchSettings, TestableForRegression, + RetentionPolicy, SearchSettings, }; pub type IndexTemplateId = String; @@ -135,7 +135,8 @@ impl IndexTemplate { } } -impl TestableForRegression for IndexTemplate { +#[cfg(any(test, feature = "testsuite"))] +impl crate::TestableForRegression for IndexTemplate { fn sample_for_regression() -> Self { let template_id = "test-template".to_string(); let index_id_patterns = vec![ @@ -144,6 +145,7 @@ impl TestableForRegression for IndexTemplate { ]; let doc_mapping_json = r#"{ + "doc_mapping_uid": "00000000000000000000000001", "field_mappings": [ { "name": "ts", diff --git a/quickwit/quickwit-config/src/index_template/serialize.rs b/quickwit/quickwit-config/src/index_template/serialize.rs index 8bba1138fb4..6ddc39b3c18 100644 --- a/quickwit/quickwit-config/src/index_template/serialize.rs +++ b/quickwit/quickwit-config/src/index_template/serialize.rs @@ -26,7 +26,8 @@ use crate::{DocMapping, IndexingSettings, RetentionPolicy, SearchSettings}; #[derive(Clone, Debug, Serialize, Deserialize, utoipa::ToSchema)] #[serde(tag = "version")] pub enum VersionedIndexTemplate { - #[serde(rename = "0.8")] + #[serde(rename = "0.9")] + #[serde(alias = "0.8")] #[serde(alias = "0.7")] V0_8(IndexTemplateV0_8), } diff --git a/quickwit/quickwit-config/src/source_config/mod.rs b/quickwit/quickwit-config/src/source_config/mod.rs index d1235df8a38..2354b608d8e 100644 --- a/quickwit/quickwit-config/src/source_config/mod.rs +++ b/quickwit/quickwit-config/src/source_config/mod.rs @@ -35,7 +35,7 @@ pub use serialize::load_source_config_from_user_config; // For backward compatibility. use serialize::VersionedSourceConfig; -use crate::{disable_ingest_v1, enable_ingest_v2, TestableForRegression}; +use crate::{disable_ingest_v1, enable_ingest_v2}; /// Reserved source ID for the `quickwit index ingest` CLI command. pub const CLI_SOURCE_ID: &str = "_ingest-cli-source"; @@ -153,7 +153,8 @@ impl SourceConfig { } } -impl TestableForRegression for SourceConfig { +#[cfg(any(test, feature = "testsuite"))] +impl crate::TestableForRegression for SourceConfig { fn sample_for_regression() -> Self { SourceConfig { source_id: "kafka-source".to_string(), diff --git a/quickwit/quickwit-config/src/source_config/serialize.rs b/quickwit/quickwit-config/src/source_config/serialize.rs index 0877138c712..00b76923775 100644 --- a/quickwit/quickwit-config/src/source_config/serialize.rs +++ b/quickwit/quickwit-config/src/source_config/serialize.rs @@ -32,7 +32,8 @@ type SourceConfigForSerialization = SourceConfigV0_8; #[serde(deny_unknown_fields)] #[serde(tag = "version")] pub enum VersionedSourceConfig { - #[serde(rename = "0.8")] + #[serde(rename = "0.9")] + #[serde(alias = "0.8")] V0_8(SourceConfigV0_8), // Retro compatibility. #[serde(rename = "0.7")] diff --git a/quickwit/quickwit-control-plane/src/control_plane.rs b/quickwit/quickwit-control-plane/src/control_plane.rs index 0f70d3b5ae2..bcaddd5a4e1 100644 --- a/quickwit/quickwit-control-plane/src/control_plane.rs +++ b/quickwit/quickwit-control-plane/src/control_plane.rs @@ -1690,10 +1690,12 @@ mod tests { .unwrap(); assert!(indexing_tasks.is_empty()); - let results: Vec = - client_inbox.drain_for_test_typed::(); - assert_eq!(results.len(), 1); - assert!(results[0].indexing_tasks.is_empty()); + let apply_plan_requests = client_inbox.drain_for_test_typed::(); + assert!(!apply_plan_requests.is_empty()); + + for apply_plan_request in &apply_plan_requests { + assert!(apply_plan_request.indexing_tasks.is_empty()); + } universe.assert_quit().await; } diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs index 5600ce6f6dc..ead4509f4aa 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs @@ -267,7 +267,7 @@ fn convert_scheduling_solution_to_physical_plan_single_node_single_source( IndexingTask { index_uid: Some(source.source_uid.index_uid.clone()), source_id: source.source_uid.source_id.clone(), - pipeline_uid: Some(PipelineUid::new()), + pipeline_uid: Some(PipelineUid::random()), shard_ids: Vec::new(), } }); @@ -283,7 +283,7 @@ fn convert_scheduling_solution_to_physical_plan_single_node_single_source( vec![IndexingTask { index_uid: Some(source.source_uid.index_uid.clone()), source_id: source.source_uid.source_id.clone(), - pipeline_uid: Some(PipelineUid::new()), + pipeline_uid: Some(PipelineUid::random()), shard_ids: Vec::new(), }] } @@ -551,7 +551,7 @@ fn add_shard_to_indexer( indexer_tasks.push(IndexingTask { index_uid: Some(source_uid.index_uid.clone()), source_id: source_uid.source_id.clone(), - pipeline_uid: Some(PipelineUid::new()), + pipeline_uid: Some(PipelineUid::random()), shard_ids: vec![missing_shard], }); } @@ -1189,13 +1189,13 @@ mod tests { let previous_task1 = IndexingTask { index_uid: Some(source_uid.index_uid.clone()), source_id: source_uid.source_id.to_string(), - pipeline_uid: Some(PipelineUid::new()), + pipeline_uid: Some(PipelineUid::random()), shard_ids: vec![ShardId::from(1), ShardId::from(4), ShardId::from(5)], }; let previous_task2 = IndexingTask { index_uid: Some(source_uid.index_uid.clone()), source_id: source_uid.source_id.to_string(), - pipeline_uid: Some(PipelineUid::new()), + pipeline_uid: Some(PipelineUid::random()), shard_ids: vec![ ShardId::from(6), ShardId::from(7), @@ -1259,14 +1259,14 @@ mod tests { index_uid: IndexUid::new_with_random_ulid("testindex"), source_id: "testsource".to_string(), }; - let pipeline_uid1 = PipelineUid::new(); + let pipeline_uid1 = PipelineUid::random(); let previous_task1 = IndexingTask { index_uid: Some(source_uid.index_uid.clone()), source_id: source_uid.source_id.to_string(), pipeline_uid: Some(pipeline_uid1), shard_ids: Vec::new(), }; - let pipeline_uid2 = PipelineUid::new(); + let pipeline_uid2 = PipelineUid::random(); let previous_task2 = IndexingTask { index_uid: Some(source_uid.index_uid.clone()), source_id: source_uid.source_id.to_string(), diff --git a/quickwit/quickwit-doc-mapper/Cargo.toml b/quickwit/quickwit-doc-mapper/Cargo.toml index cbc3cba567a..f48cc1072d6 100644 --- a/quickwit/quickwit-doc-mapper/Cargo.toml +++ b/quickwit/quickwit-doc-mapper/Cargo.toml @@ -33,6 +33,7 @@ utoipa = { workspace = true } quickwit-common = { workspace = true } quickwit-datetime = { workspace = true } quickwit-macros = { workspace = true } +quickwit-proto = { workspace = true } quickwit-query = { workspace = true } [dev-dependencies] diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs index 34e26e67634..e3f4cc9ba7e 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs @@ -23,6 +23,7 @@ use std::num::NonZeroU32; use anyhow::{bail, Context}; use fnv::FnvHashSet; use quickwit_common::PathHasher; +use quickwit_proto::types::DocMappingUid; use quickwit_query::create_default_quickwit_tokenizer_manager; use quickwit_query::query_ast::QueryAst; use quickwit_query::tokenizers::TokenizerManager; @@ -60,6 +61,8 @@ const FIELD_PRESENCE_FIELD: Field = Field::from_field_id(0u32); #[derive(Clone, Serialize, Deserialize)] #[serde(into = "DefaultDocMapperBuilder", try_from = "DefaultDocMapperBuilder")] pub struct DefaultDocMapper { + /// The UID of the doc mapping. + doc_mapping_uid: DocMappingUid, /// Field in which the source should be stored. /// This field is only valid when using the schema associated with the default /// doc mapper, and therefore cannot be used in the `query` method. @@ -99,13 +102,6 @@ pub struct DefaultDocMapper { tokenizer_manager: TokenizerManager, } -impl DefaultDocMapper { - /// Default maximum number of partitions. - pub fn default_max_num_partitions() -> NonZeroU32 { - DocMapping::default_max_num_partitions() - } -} - fn validate_timestamp_field( timestamp_field_path: &str, mapping_root_node: &MappingNode, @@ -143,6 +139,7 @@ impl From for DefaultDocMapperBuilder { None }; let doc_mapping = DocMapping { + doc_mapping_uid: default_doc_mapper.doc_mapping_uid, mode: default_doc_mapper.mode, field_mappings: default_doc_mapper.field_mappings.into(), timestamp_field: default_doc_mapper.timestamp_field_name, @@ -282,6 +279,7 @@ impl TryFrom for DefaultDocMapper { } } Ok(DefaultDocMapper { + doc_mapping_uid: doc_mapping.doc_mapping_uid, schema, index_field_presence: doc_mapping.index_field_presence, source_field, @@ -505,6 +503,10 @@ fn populate_field_presence_for_json_obj<'a, Iter: Iterator DocMappingUid { + self.doc_mapping_uid + } + fn doc_from_json_obj( &self, json_obj: JsonObject, diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapper.rs b/quickwit/quickwit-doc-mapper/src/doc_mapper.rs index 37fcc385fa1..db1fae3b312 100644 --- a/quickwit/quickwit-doc-mapper/src/doc_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/doc_mapper.rs @@ -24,6 +24,7 @@ use std::ops::Bound; use anyhow::Context; use dyn_clone::{clone_trait_object, DynClone}; +use quickwit_proto::types::DocMappingUid; use quickwit_query::query_ast::QueryAst; use quickwit_query::tokenizers::TokenizerManager; use serde_json::Value as JsonValue; @@ -47,6 +48,9 @@ use crate::{DocParsingError, QueryParserError}; /// - supplying a tantivy [`Schema`] #[typetag::serde(tag = "type")] pub trait DocMapper: Send + Sync + Debug + DynClone + 'static { + /// Returns the unique identifier of the doc mapping. + fn doc_mapping_uid(&self) -> DocMappingUid; + /// Transforms a JSON object into a tantivy [`Document`] according to the rules /// defined for the `DocMapper`. fn doc_from_json_obj( diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapping.rs b/quickwit/quickwit-doc-mapper/src/doc_mapping.rs index 75400c7a206..f455ef86b5f 100644 --- a/quickwit/quickwit-doc-mapper/src/doc_mapping.rs +++ b/quickwit/quickwit-doc-mapper/src/doc_mapping.rs @@ -20,6 +20,7 @@ use std::collections::BTreeSet; use std::num::NonZeroU32; +use quickwit_proto::types::DocMappingUid; use serde::{Deserialize, Serialize}; use crate::{FieldMappingEntry, QuickwitJsonOptions, TokenizerEntry}; @@ -99,6 +100,13 @@ impl Default for Mode { #[derive(Clone, Debug, PartialEq, Serialize, Deserialize, utoipa::ToSchema)] #[serde(deny_unknown_fields)] pub struct DocMapping { + /// Doc mapping UID. + /// + /// Splits with the same doc mapping UID share the same schema and should use the same doc + /// mapper during indexing and querying. + #[serde(default = "DocMappingUid::random")] + pub doc_mapping_uid: DocMappingUid, + /// Defines how unmapped fields should be handled. #[serde_multikey( deserializer = Mode::from_parts, @@ -163,6 +171,20 @@ impl DocMapping { pub fn default_max_num_partitions() -> NonZeroU32 { NonZeroU32::new(200).unwrap() } + + /// Returns whether the `other` doc mapping is equal to `self` leaving their respective doc + /// mapping UIDs out of the comparison. + pub fn eq_ignore_doc_mapping_uid(&self, other: &Self) -> bool { + let doc_mapping_uid = DocMappingUid::default(); + + let mut left = self.clone(); + left.doc_mapping_uid = doc_mapping_uid; + + let mut right = other.clone(); + right.doc_mapping_uid = doc_mapping_uid; + + left == right + } } #[cfg(test)] @@ -177,6 +199,7 @@ mod tests { #[test] fn test_doc_mapping_serde_roundtrip() { let doc_mapping = DocMapping { + doc_mapping_uid: DocMappingUid::random(), mode: Mode::Strict, field_mappings: vec![ FieldMappingEntry { diff --git a/quickwit/quickwit-doc-mapper/src/lib.rs b/quickwit/quickwit-doc-mapper/src/lib.rs index 9fdc35fbfb2..854faf91d2f 100644 --- a/quickwit/quickwit-doc-mapper/src/lib.rs +++ b/quickwit/quickwit-doc-mapper/src/lib.rs @@ -48,6 +48,7 @@ pub use doc_mapper::{DocMapper, JsonObject, NamedField, TermRange, WarmupInfo}; pub use doc_mapping::{DocMapping, Mode, ModeType}; pub use error::{DocParsingError, QueryParserError}; use quickwit_common::shared_consts::FIELD_PRESENCE_FIELD_NAME; +use quickwit_proto::types::DocMappingUid; pub use routing_expression::RoutingExpr; /// Field name reserved for storing the source document. @@ -78,6 +79,7 @@ pub enum Cardinality { #[derive(utoipa::OpenApi)] #[openapi(components(schemas( + DocMappingUid, FastFieldOptions, FieldMappingEntryForSerialization, IndexRecordOptionSchema, diff --git a/quickwit/quickwit-metastore/src/backward_compatibility_tests/mod.rs b/quickwit/quickwit-metastore/src/backward_compatibility_tests/mod.rs index e707a145e1f..53ad17346ee 100644 --- a/quickwit/quickwit-metastore/src/backward_compatibility_tests/mod.rs +++ b/quickwit/quickwit-metastore/src/backward_compatibility_tests/mod.rs @@ -21,7 +21,7 @@ use std::fs; use std::path::Path; use anyhow::{bail, Context}; -use quickwit_config::TestableForRegression; +use quickwit_config::{IndexConfig, IndexTemplate, SourceConfig, TestableForRegression}; use serde::{Deserialize, Serialize}; use serde_json::Value as JsonValue; @@ -46,7 +46,7 @@ use crate::{IndexMetadata, SplitMetadata}; /// #[serde(rename="0.2")] /// V0_2(MyResourceV1) //< there was no change in this version. /// } -const GLOBAL_QUICKWIT_RESOURCE_VERSION: &str = "0.8"; +const GLOBAL_QUICKWIT_RESOURCE_VERSION: &str = "0.9"; /// This test makes sure that the resource is using the current `GLOBAL_QUICKWIT_RESOURCE_VERSION`. fn test_global_version(serializable: &T) -> anyhow::Result<()> { @@ -197,10 +197,12 @@ where for<'a> T: Serialize { pub(crate) fn test_json_backward_compatibility_helper(test_name: &str) -> anyhow::Result<()> where T: TestableForRegression + std::fmt::Debug { let sample_instance: T = T::sample_for_regression(); + test_global_version(&sample_instance).unwrap(); + let test_dir = Path::new("test-data").join(test_name); - test_global_version(&sample_instance).context("version is not the global version")?; test_backward_compatibility::(&test_dir).context("backward-compatibility")?; test_and_update_expected_files::(&test_dir).context("test-and-update")?; + test_and_create_new_test::(&test_dir, sample_instance) .context("test-and-create-new-test")?; Ok(()) @@ -216,6 +218,18 @@ fn test_index_metadata_backward_compatibility() { test_json_backward_compatibility_helper::("index-metadata").unwrap(); } +#[test] +fn test_index_config_global_version() { + let sample_instance = IndexConfig::sample_for_regression(); + test_global_version(&sample_instance).unwrap(); +} + +#[test] +fn test_source_config_global_version() { + let sample_instance = SourceConfig::sample_for_regression(); + test_global_version(&sample_instance).unwrap(); +} + #[test] fn test_file_backed_index_backward_compatibility() { test_json_backward_compatibility_helper::("file-backed-index").unwrap(); @@ -225,3 +239,9 @@ fn test_file_backed_index_backward_compatibility() { fn test_file_backed_metastore_manifest_backward_compatibility() { test_json_backward_compatibility_helper::("manifest").unwrap(); } + +#[test] +fn test_index_template_global_version() { + let sample_instance = IndexTemplate::sample_for_regression(); + test_global_version(&sample_instance).unwrap(); +} diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/serialize.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/serialize.rs index afceae2ebf5..5d0b79c1501 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/serialize.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/serialize.rs @@ -33,8 +33,9 @@ use crate::{IndexMetadata, Split}; #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(tag = "version")] pub(crate) enum VersionedFileBackedIndex { - #[serde(rename = "0.8")] + #[serde(rename = "0.9")] // Retro compatibility. + #[serde(alias = "0.8")] #[serde(alias = "0.7")] V0_8(FileBackedIndexV0_8), } diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/manifest.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/manifest.rs index a24dc571ed8..475b8c077e5 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/manifest.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/manifest.rs @@ -22,9 +22,9 @@ use std::path::Path; use itertools::Itertools; use quickwit_common::uri::Uri; -use quickwit_config::{IndexTemplate, IndexTemplateId, TestableForRegression}; +use quickwit_config::{IndexTemplate, IndexTemplateId}; use quickwit_proto::metastore::{serde_utils, MetastoreError, MetastoreResult}; -use quickwit_proto::types::IndexId; +use quickwit_proto::types::{DocMappingUid, IndexId}; use quickwit_storage::{OwnedBytes, Storage, StorageError, StorageErrorKind, StorageResult}; use serde::{Deserialize, Serialize}; use tracing::error; @@ -74,21 +74,32 @@ pub(crate) struct Manifest { #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(tag = "version")] enum VersionedManifest { - #[serde(rename = "0.8")] + // The two versions use the same format but for v0.8 and below, we need to set the + // `doc_mapping_uid` to the nil value upon deserialization. + #[serde(rename = "0.9")] + V0_9(ManifestV0_8), + #[serde(alias = "0.8")] #[serde(alias = "0.7")] V0_8(ManifestV0_8), } impl From for VersionedManifest { fn from(manifest: Manifest) -> Self { - VersionedManifest::V0_8(manifest.into()) + VersionedManifest::V0_9(manifest.into()) } } impl From for Manifest { fn from(versioned_manifest: VersionedManifest) -> Self { match versioned_manifest { - VersionedManifest::V0_8(manifest) => manifest.into(), + VersionedManifest::V0_8(mut manifest) => { + for template in &mut manifest.templates { + // Override the randomly generated doc mapping UID with the nil value. + template.doc_mapping.doc_mapping_uid = DocMappingUid::default(); + } + manifest.into() + } + VersionedManifest::V0_9(manifest) => manifest.into(), } } } @@ -125,7 +136,8 @@ impl From for Manifest { } } -impl TestableForRegression for Manifest { +#[cfg(any(test, feature = "testsuite"))] +impl quickwit_config::TestableForRegression for Manifest { fn sample_for_regression() -> Self { let mut indexes = BTreeMap::new(); indexes.insert("test-index-1".to_string(), IndexStatus::Creating); diff --git a/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs b/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs index 4176be081d3..c7040f8cfb1 100644 --- a/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/index_metadata/mod.rs @@ -179,6 +179,8 @@ impl quickwit_config::TestableForRegression for IndexMetadata { use crate::checkpoint::{PartitionId, SourceCheckpoint, SourceCheckpointDelta}; + let index_config = IndexConfig::sample_for_regression(); + let mut source_checkpoint = SourceCheckpoint::default(); let delta = SourceCheckpointDelta::from_partition_delta( PartitionId::from(0i64), @@ -187,12 +189,13 @@ impl quickwit_config::TestableForRegression for IndexMetadata { ) .unwrap(); source_checkpoint.try_apply_delta(delta).unwrap(); - let mut per_source_checkpoint: BTreeMap = BTreeMap::default(); - per_source_checkpoint.insert("kafka-source".to_string(), source_checkpoint); + + let per_source_checkpoint: BTreeMap = + BTreeMap::from_iter([("kafka-source".to_string(), source_checkpoint)]); let checkpoint = IndexCheckpoint::from(per_source_checkpoint); - let index_config = IndexConfig::sample_for_regression(); + let mut index_metadata = IndexMetadata { - index_uid: IndexUid::for_test(&index_config.index_id, 0), + index_uid: IndexUid::for_test(&index_config.index_id, 1), index_config, checkpoint, create_timestamp: 1789, diff --git a/quickwit/quickwit-metastore/src/metastore/index_metadata/serialize.rs b/quickwit/quickwit-metastore/src/metastore/index_metadata/serialize.rs index 2834c719e33..73491d3ddf0 100644 --- a/quickwit/quickwit-metastore/src/metastore/index_metadata/serialize.rs +++ b/quickwit/quickwit-metastore/src/metastore/index_metadata/serialize.rs @@ -30,8 +30,9 @@ use crate::IndexMetadata; #[derive(Clone, Debug, Serialize, Deserialize, utoipa::ToSchema)] #[serde(tag = "version")] pub(crate) enum VersionedIndexMetadata { - #[serde(rename = "0.8")] + #[serde(rename = "0.9")] // Retro compatibility. + #[serde(alias = "0.8")] #[serde(alias = "0.7")] V0_8(IndexMetadataV0_8), } diff --git a/quickwit/quickwit-metastore/src/split_metadata.rs b/quickwit/quickwit-metastore/src/split_metadata.rs index 8b2db8be2be..c40ff2256b6 100644 --- a/quickwit/quickwit-metastore/src/split_metadata.rs +++ b/quickwit/quickwit-metastore/src/split_metadata.rs @@ -266,7 +266,7 @@ impl quickwit_config::TestableForRegression for SplitMetadata { fn sample_for_regression() -> Self { SplitMetadata { split_id: "split".to_string(), - index_uid: IndexUid::for_test("my-index", 0), + index_uid: IndexUid::for_test("my-index", 1), source_id: "source".to_string(), node_id: "node".to_string(), delete_opstamp: 10, diff --git a/quickwit/quickwit-metastore/src/split_metadata_version.rs b/quickwit/quickwit-metastore/src/split_metadata_version.rs index 5b98cdb9e4b..ad777636251 100644 --- a/quickwit/quickwit-metastore/src/split_metadata_version.rs +++ b/quickwit/quickwit-metastore/src/split_metadata_version.rs @@ -155,8 +155,9 @@ impl From for SplitMetadataV0_8 { #[derive(Serialize, Deserialize, utoipa::ToSchema)] #[serde(tag = "version")] pub(crate) enum VersionedSplitMetadata { - #[serde(rename = "0.8")] + #[serde(rename = "0.9")] // Retro compatibility. + #[serde(alias = "0.8")] #[serde(alias = "0.7")] V0_8(SplitMetadataV0_8), } diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.7.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.7.expected.json index 147abe615a6..831a543694f 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.7.expected.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.7.expected.json @@ -18,6 +18,7 @@ "create_timestamp": 1789, "index_config": { "doc_mapping": { + "doc_mapping_uid": "00000000000000000000000000", "dynamic_mapping": { "expand_dots": true, "fast": { @@ -119,7 +120,7 @@ "message" ] }, - "version": "0.8" + "version": "0.9" }, "index_uid": "my-index:00000000000000000000000000", "sources": [ @@ -137,10 +138,10 @@ "script": ".message = downcase(string!(.message))", "timezone": "UTC" }, - "version": "0.8" + "version": "0.9" } ], - "version": "0.8" + "version": "0.9" }, "shards": { "_ingest-source": [ @@ -186,8 +187,8 @@ }, "uncompressed_docs_size_in_bytes": 234234, "update_timestamp": 1789, - "version": "0.8" + "version": "0.9" } ], - "version": "0.8" + "version": "0.9" } diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.8.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.8.expected.json index 147abe615a6..831a543694f 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.8.expected.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.8.expected.json @@ -18,6 +18,7 @@ "create_timestamp": 1789, "index_config": { "doc_mapping": { + "doc_mapping_uid": "00000000000000000000000000", "dynamic_mapping": { "expand_dots": true, "fast": { @@ -119,7 +120,7 @@ "message" ] }, - "version": "0.8" + "version": "0.9" }, "index_uid": "my-index:00000000000000000000000000", "sources": [ @@ -137,10 +138,10 @@ "script": ".message = downcase(string!(.message))", "timezone": "UTC" }, - "version": "0.8" + "version": "0.9" } ], - "version": "0.8" + "version": "0.9" }, "shards": { "_ingest-source": [ @@ -186,8 +187,8 @@ }, "uncompressed_docs_size_in_bytes": 234234, "update_timestamp": 1789, - "version": "0.8" + "version": "0.9" } ], - "version": "0.8" + "version": "0.9" } diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.9.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.9.expected.json new file mode 100644 index 00000000000..9f97b2b9570 --- /dev/null +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.9.expected.json @@ -0,0 +1,194 @@ +{ + "delete_tasks": [ + { + "create_timestamp": 0, + "delete_query": { + "index_uid": "my-index:00000000000000000000000001", + "query_ast": "{\"type\":\"bool\",\"must\":[{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Harry\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}},{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Potter\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}}]}" + }, + "opstamp": 10 + } + ], + "index": { + "checkpoint": { + "kafka-source": { + "00000000000000000000": "00000000000000000042" + } + }, + "create_timestamp": 1789, + "index_config": { + "doc_mapping": { + "doc_mapping_uid": "00000000000000000000000001", + "dynamic_mapping": { + "expand_dots": true, + "fast": { + "normalizer": "raw" + }, + "indexed": true, + "record": "basic", + "stored": true, + "tokenizer": "raw" + }, + "field_mappings": [ + { + "coerce": true, + "fast": true, + "indexed": true, + "name": "tenant_id", + "output_format": "number", + "stored": true, + "type": "u64" + }, + { + "fast": true, + "fast_precision": "seconds", + "indexed": true, + "input_formats": [ + "rfc3339", + "unix_timestamp" + ], + "name": "timestamp", + "output_format": "rfc3339", + "stored": true, + "type": "datetime" + }, + { + "fast": false, + "fieldnorms": false, + "indexed": true, + "name": "log_level", + "record": "basic", + "stored": true, + "tokenizer": "raw", + "type": "text" + }, + { + "fast": false, + "fieldnorms": false, + "indexed": true, + "name": "message", + "record": "position", + "stored": true, + "tokenizer": "default", + "type": "text" + } + ], + "index_field_presence": true, + "max_num_partitions": 100, + "mode": "dynamic", + "partition_key": "tenant_id", + "store_document_size": false, + "store_source": true, + "tag_fields": [ + "log_level", + "tenant_id" + ], + "timestamp_field": "timestamp", + "tokenizers": [ + { + "filters": [], + "name": "custom_tokenizer", + "pattern": "[^\\p{L}\\p{N}]+", + "type": "regex" + } + ] + }, + "index_id": "my-index", + "index_uri": "s3://quickwit-indexes/my-index", + "indexing_settings": { + "commit_timeout_secs": 301, + "docstore_blocksize": 1000000, + "docstore_compression_level": 8, + "merge_policy": { + "maturation_period": "2days", + "max_merge_factor": 11, + "merge_factor": 9, + "min_level_num_docs": 100000, + "type": "stable_log" + }, + "resources": { + "heap_size": "50.0 MB" + }, + "split_num_docs_target": 10000001 + }, + "retention": { + "period": "90 days", + "schedule": "daily" + }, + "search_settings": { + "default_search_fields": [ + "message" + ] + }, + "version": "0.9" + }, + "index_uid": "my-index:00000000000000000000000001", + "sources": [ + { + "enabled": true, + "input_format": "json", + "num_pipelines": 2, + "params": { + "client_params": {}, + "topic": "kafka-topic" + }, + "source_id": "kafka-source", + "source_type": "kafka", + "transform": { + "script": ".message = downcase(string!(.message))", + "timezone": "UTC" + }, + "version": "0.9" + } + ], + "version": "0.9" + }, + "shards": { + "_ingest-source": [ + { + "follower_id": "follower-ingester", + "index_uid": "my-index:00000000000000000000000001", + "leader_id": "leader-ingester", + "publish_position_inclusive": "", + "shard_id": "00000000000000000001", + "shard_state": 1, + "source_id": "_ingest-source" + } + ] + }, + "splits": [ + { + "create_timestamp": 3, + "delete_opstamp": 10, + "footer_offsets": { + "end": 2000, + "start": 1000 + }, + "index_uid": "my-index:00000000000000000000000001", + "maturity": { + "maturation_period_millis": 4000, + "type": "immature" + }, + "node_id": "node", + "num_docs": 12303, + "num_merge_ops": 3, + "partition_id": 7, + "publish_timestamp": 1789, + "source_id": "source", + "split_id": "split", + "split_state": "Published", + "tags": [ + "234", + "aaa" + ], + "time_range": { + "end": 130198, + "start": 121000 + }, + "uncompressed_docs_size_in_bytes": 234234, + "update_timestamp": 1789, + "version": "0.9" + } + ], + "version": "0.9" +} diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.9.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.9.json new file mode 100644 index 00000000000..9f97b2b9570 --- /dev/null +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.9.json @@ -0,0 +1,194 @@ +{ + "delete_tasks": [ + { + "create_timestamp": 0, + "delete_query": { + "index_uid": "my-index:00000000000000000000000001", + "query_ast": "{\"type\":\"bool\",\"must\":[{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Harry\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}},{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Potter\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}}]}" + }, + "opstamp": 10 + } + ], + "index": { + "checkpoint": { + "kafka-source": { + "00000000000000000000": "00000000000000000042" + } + }, + "create_timestamp": 1789, + "index_config": { + "doc_mapping": { + "doc_mapping_uid": "00000000000000000000000001", + "dynamic_mapping": { + "expand_dots": true, + "fast": { + "normalizer": "raw" + }, + "indexed": true, + "record": "basic", + "stored": true, + "tokenizer": "raw" + }, + "field_mappings": [ + { + "coerce": true, + "fast": true, + "indexed": true, + "name": "tenant_id", + "output_format": "number", + "stored": true, + "type": "u64" + }, + { + "fast": true, + "fast_precision": "seconds", + "indexed": true, + "input_formats": [ + "rfc3339", + "unix_timestamp" + ], + "name": "timestamp", + "output_format": "rfc3339", + "stored": true, + "type": "datetime" + }, + { + "fast": false, + "fieldnorms": false, + "indexed": true, + "name": "log_level", + "record": "basic", + "stored": true, + "tokenizer": "raw", + "type": "text" + }, + { + "fast": false, + "fieldnorms": false, + "indexed": true, + "name": "message", + "record": "position", + "stored": true, + "tokenizer": "default", + "type": "text" + } + ], + "index_field_presence": true, + "max_num_partitions": 100, + "mode": "dynamic", + "partition_key": "tenant_id", + "store_document_size": false, + "store_source": true, + "tag_fields": [ + "log_level", + "tenant_id" + ], + "timestamp_field": "timestamp", + "tokenizers": [ + { + "filters": [], + "name": "custom_tokenizer", + "pattern": "[^\\p{L}\\p{N}]+", + "type": "regex" + } + ] + }, + "index_id": "my-index", + "index_uri": "s3://quickwit-indexes/my-index", + "indexing_settings": { + "commit_timeout_secs": 301, + "docstore_blocksize": 1000000, + "docstore_compression_level": 8, + "merge_policy": { + "maturation_period": "2days", + "max_merge_factor": 11, + "merge_factor": 9, + "min_level_num_docs": 100000, + "type": "stable_log" + }, + "resources": { + "heap_size": "50.0 MB" + }, + "split_num_docs_target": 10000001 + }, + "retention": { + "period": "90 days", + "schedule": "daily" + }, + "search_settings": { + "default_search_fields": [ + "message" + ] + }, + "version": "0.9" + }, + "index_uid": "my-index:00000000000000000000000001", + "sources": [ + { + "enabled": true, + "input_format": "json", + "num_pipelines": 2, + "params": { + "client_params": {}, + "topic": "kafka-topic" + }, + "source_id": "kafka-source", + "source_type": "kafka", + "transform": { + "script": ".message = downcase(string!(.message))", + "timezone": "UTC" + }, + "version": "0.9" + } + ], + "version": "0.9" + }, + "shards": { + "_ingest-source": [ + { + "follower_id": "follower-ingester", + "index_uid": "my-index:00000000000000000000000001", + "leader_id": "leader-ingester", + "publish_position_inclusive": "", + "shard_id": "00000000000000000001", + "shard_state": 1, + "source_id": "_ingest-source" + } + ] + }, + "splits": [ + { + "create_timestamp": 3, + "delete_opstamp": 10, + "footer_offsets": { + "end": 2000, + "start": 1000 + }, + "index_uid": "my-index:00000000000000000000000001", + "maturity": { + "maturation_period_millis": 4000, + "type": "immature" + }, + "node_id": "node", + "num_docs": 12303, + "num_merge_ops": 3, + "partition_id": 7, + "publish_timestamp": 1789, + "source_id": "source", + "split_id": "split", + "split_state": "Published", + "tags": [ + "234", + "aaa" + ], + "time_range": { + "end": 130198, + "start": 121000 + }, + "uncompressed_docs_size_in_bytes": 234234, + "update_timestamp": 1789, + "version": "0.9" + } + ], + "version": "0.9" +} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.7.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.7.expected.json index e0c572da32f..3d0b3d92628 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.7.expected.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.7.expected.json @@ -7,6 +7,7 @@ "create_timestamp": 1789, "index_config": { "doc_mapping": { + "doc_mapping_uid": "00000000000000000000000000", "dynamic_mapping": { "expand_dots": true, "fast": { @@ -108,7 +109,7 @@ "message" ] }, - "version": "0.8" + "version": "0.9" }, "index_uid": "my-index:00000000000000000000000000", "sources": [ @@ -126,8 +127,8 @@ "script": ".message = downcase(string!(.message))", "timezone": "UTC" }, - "version": "0.8" + "version": "0.9" } ], - "version": "0.8" + "version": "0.9" } diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.8.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.8.expected.json index e0c572da32f..3d0b3d92628 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.8.expected.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.8.expected.json @@ -7,6 +7,7 @@ "create_timestamp": 1789, "index_config": { "doc_mapping": { + "doc_mapping_uid": "00000000000000000000000000", "dynamic_mapping": { "expand_dots": true, "fast": { @@ -108,7 +109,7 @@ "message" ] }, - "version": "0.8" + "version": "0.9" }, "index_uid": "my-index:00000000000000000000000000", "sources": [ @@ -126,8 +127,8 @@ "script": ".message = downcase(string!(.message))", "timezone": "UTC" }, - "version": "0.8" + "version": "0.9" } ], - "version": "0.8" + "version": "0.9" } diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.9.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.9.expected.json new file mode 100644 index 00000000000..3316f18fde5 --- /dev/null +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.9.expected.json @@ -0,0 +1,134 @@ +{ + "checkpoint": { + "kafka-source": { + "00000000000000000000": "00000000000000000042" + } + }, + "create_timestamp": 1789, + "index_config": { + "doc_mapping": { + "doc_mapping_uid": "00000000000000000000000001", + "dynamic_mapping": { + "expand_dots": true, + "fast": { + "normalizer": "raw" + }, + "indexed": true, + "record": "basic", + "stored": true, + "tokenizer": "raw" + }, + "field_mappings": [ + { + "coerce": true, + "fast": true, + "indexed": true, + "name": "tenant_id", + "output_format": "number", + "stored": true, + "type": "u64" + }, + { + "fast": true, + "fast_precision": "seconds", + "indexed": true, + "input_formats": [ + "rfc3339", + "unix_timestamp" + ], + "name": "timestamp", + "output_format": "rfc3339", + "stored": true, + "type": "datetime" + }, + { + "fast": false, + "fieldnorms": false, + "indexed": true, + "name": "log_level", + "record": "basic", + "stored": true, + "tokenizer": "raw", + "type": "text" + }, + { + "fast": false, + "fieldnorms": false, + "indexed": true, + "name": "message", + "record": "position", + "stored": true, + "tokenizer": "default", + "type": "text" + } + ], + "index_field_presence": true, + "max_num_partitions": 100, + "mode": "dynamic", + "partition_key": "tenant_id", + "store_document_size": false, + "store_source": true, + "tag_fields": [ + "log_level", + "tenant_id" + ], + "timestamp_field": "timestamp", + "tokenizers": [ + { + "filters": [], + "name": "custom_tokenizer", + "pattern": "[^\\p{L}\\p{N}]+", + "type": "regex" + } + ] + }, + "index_id": "my-index", + "index_uri": "s3://quickwit-indexes/my-index", + "indexing_settings": { + "commit_timeout_secs": 301, + "docstore_blocksize": 1000000, + "docstore_compression_level": 8, + "merge_policy": { + "maturation_period": "2days", + "max_merge_factor": 11, + "merge_factor": 9, + "min_level_num_docs": 100000, + "type": "stable_log" + }, + "resources": { + "heap_size": "50.0 MB" + }, + "split_num_docs_target": 10000001 + }, + "retention": { + "period": "90 days", + "schedule": "daily" + }, + "search_settings": { + "default_search_fields": [ + "message" + ] + }, + "version": "0.9" + }, + "index_uid": "my-index:00000000000000000000000001", + "sources": [ + { + "enabled": true, + "input_format": "json", + "num_pipelines": 2, + "params": { + "client_params": {}, + "topic": "kafka-topic" + }, + "source_id": "kafka-source", + "source_type": "kafka", + "transform": { + "script": ".message = downcase(string!(.message))", + "timezone": "UTC" + }, + "version": "0.9" + } + ], + "version": "0.9" +} diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.9.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.9.json new file mode 100644 index 00000000000..3316f18fde5 --- /dev/null +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.9.json @@ -0,0 +1,134 @@ +{ + "checkpoint": { + "kafka-source": { + "00000000000000000000": "00000000000000000042" + } + }, + "create_timestamp": 1789, + "index_config": { + "doc_mapping": { + "doc_mapping_uid": "00000000000000000000000001", + "dynamic_mapping": { + "expand_dots": true, + "fast": { + "normalizer": "raw" + }, + "indexed": true, + "record": "basic", + "stored": true, + "tokenizer": "raw" + }, + "field_mappings": [ + { + "coerce": true, + "fast": true, + "indexed": true, + "name": "tenant_id", + "output_format": "number", + "stored": true, + "type": "u64" + }, + { + "fast": true, + "fast_precision": "seconds", + "indexed": true, + "input_formats": [ + "rfc3339", + "unix_timestamp" + ], + "name": "timestamp", + "output_format": "rfc3339", + "stored": true, + "type": "datetime" + }, + { + "fast": false, + "fieldnorms": false, + "indexed": true, + "name": "log_level", + "record": "basic", + "stored": true, + "tokenizer": "raw", + "type": "text" + }, + { + "fast": false, + "fieldnorms": false, + "indexed": true, + "name": "message", + "record": "position", + "stored": true, + "tokenizer": "default", + "type": "text" + } + ], + "index_field_presence": true, + "max_num_partitions": 100, + "mode": "dynamic", + "partition_key": "tenant_id", + "store_document_size": false, + "store_source": true, + "tag_fields": [ + "log_level", + "tenant_id" + ], + "timestamp_field": "timestamp", + "tokenizers": [ + { + "filters": [], + "name": "custom_tokenizer", + "pattern": "[^\\p{L}\\p{N}]+", + "type": "regex" + } + ] + }, + "index_id": "my-index", + "index_uri": "s3://quickwit-indexes/my-index", + "indexing_settings": { + "commit_timeout_secs": 301, + "docstore_blocksize": 1000000, + "docstore_compression_level": 8, + "merge_policy": { + "maturation_period": "2days", + "max_merge_factor": 11, + "merge_factor": 9, + "min_level_num_docs": 100000, + "type": "stable_log" + }, + "resources": { + "heap_size": "50.0 MB" + }, + "split_num_docs_target": 10000001 + }, + "retention": { + "period": "90 days", + "schedule": "daily" + }, + "search_settings": { + "default_search_fields": [ + "message" + ] + }, + "version": "0.9" + }, + "index_uid": "my-index:00000000000000000000000001", + "sources": [ + { + "enabled": true, + "input_format": "json", + "num_pipelines": 2, + "params": { + "client_params": {}, + "topic": "kafka-topic" + }, + "source_id": "kafka-source", + "source_type": "kafka", + "transform": { + "script": ".message = downcase(string!(.message))", + "timezone": "UTC" + }, + "version": "0.9" + } + ], + "version": "0.9" +} diff --git a/quickwit/quickwit-metastore/test-data/manifest/v0.7.expected.json b/quickwit/quickwit-metastore/test-data/manifest/v0.7.expected.json index a951508ba44..534884da59f 100644 --- a/quickwit/quickwit-metastore/test-data/manifest/v0.7.expected.json +++ b/quickwit/quickwit-metastore/test-data/manifest/v0.7.expected.json @@ -8,6 +8,7 @@ { "description": "Test description.", "doc_mapping": { + "doc_mapping_uid": "00000000000000000000000000", "dynamic_mapping": { "expand_dots": true, "fast": { @@ -82,8 +83,8 @@ "default_search_fields": [] }, "template_id": "test-template", - "version": "0.8" + "version": "0.9" } ], - "version": "0.8" + "version": "0.9" } diff --git a/quickwit/quickwit-metastore/test-data/manifest/v0.8.expected.json b/quickwit/quickwit-metastore/test-data/manifest/v0.8.expected.json index a951508ba44..534884da59f 100644 --- a/quickwit/quickwit-metastore/test-data/manifest/v0.8.expected.json +++ b/quickwit/quickwit-metastore/test-data/manifest/v0.8.expected.json @@ -8,6 +8,7 @@ { "description": "Test description.", "doc_mapping": { + "doc_mapping_uid": "00000000000000000000000000", "dynamic_mapping": { "expand_dots": true, "fast": { @@ -82,8 +83,8 @@ "default_search_fields": [] }, "template_id": "test-template", - "version": "0.8" + "version": "0.9" } ], - "version": "0.8" + "version": "0.9" } diff --git a/quickwit/quickwit-metastore/test-data/manifest/v0.9.expected.json b/quickwit/quickwit-metastore/test-data/manifest/v0.9.expected.json new file mode 100644 index 00000000000..a21adb65ebe --- /dev/null +++ b/quickwit/quickwit-metastore/test-data/manifest/v0.9.expected.json @@ -0,0 +1,90 @@ +{ + "indexes": { + "test-index-1": "creating", + "test-index-2": "active", + "test-index-3": "deleting" + }, + "templates": [ + { + "description": "Test description.", + "doc_mapping": { + "doc_mapping_uid": "00000000000000000000000001", + "dynamic_mapping": { + "expand_dots": true, + "fast": { + "normalizer": "raw" + }, + "indexed": true, + "record": "basic", + "stored": true, + "tokenizer": "raw" + }, + "field_mappings": [ + { + "fast": true, + "fast_precision": "seconds", + "indexed": true, + "input_formats": [ + "rfc3339", + "unix_timestamp" + ], + "name": "ts", + "output_format": "rfc3339", + "stored": true, + "type": "datetime" + }, + { + "expand_dots": true, + "fast": false, + "indexed": true, + "name": "message", + "record": "basic", + "stored": true, + "tokenizer": "raw", + "type": "json" + } + ], + "index_field_presence": false, + "max_num_partitions": 200, + "mode": "dynamic", + "store_document_size": false, + "store_source": false, + "tag_fields": [], + "timestamp_field": "ts", + "tokenizers": [] + }, + "index_id_patterns": [ + "test-index-foo*", + "-test-index-foobar" + ], + "index_root_uri": "ram:///indexes", + "indexing_settings": { + "commit_timeout_secs": 60, + "docstore_blocksize": 1000000, + "docstore_compression_level": 8, + "merge_policy": { + "maturation_period": "2days", + "max_merge_factor": 12, + "merge_factor": 10, + "min_level_num_docs": 100000, + "type": "stable_log" + }, + "resources": { + "heap_size": "2.0 GB" + }, + "split_num_docs_target": 10000000 + }, + "priority": 100, + "retention": { + "period": "42 days", + "schedule": "daily" + }, + "search_settings": { + "default_search_fields": [] + }, + "template_id": "test-template", + "version": "0.9" + } + ], + "version": "0.9" +} diff --git a/quickwit/quickwit-metastore/test-data/manifest/v0.9.json b/quickwit/quickwit-metastore/test-data/manifest/v0.9.json new file mode 100644 index 00000000000..a21adb65ebe --- /dev/null +++ b/quickwit/quickwit-metastore/test-data/manifest/v0.9.json @@ -0,0 +1,90 @@ +{ + "indexes": { + "test-index-1": "creating", + "test-index-2": "active", + "test-index-3": "deleting" + }, + "templates": [ + { + "description": "Test description.", + "doc_mapping": { + "doc_mapping_uid": "00000000000000000000000001", + "dynamic_mapping": { + "expand_dots": true, + "fast": { + "normalizer": "raw" + }, + "indexed": true, + "record": "basic", + "stored": true, + "tokenizer": "raw" + }, + "field_mappings": [ + { + "fast": true, + "fast_precision": "seconds", + "indexed": true, + "input_formats": [ + "rfc3339", + "unix_timestamp" + ], + "name": "ts", + "output_format": "rfc3339", + "stored": true, + "type": "datetime" + }, + { + "expand_dots": true, + "fast": false, + "indexed": true, + "name": "message", + "record": "basic", + "stored": true, + "tokenizer": "raw", + "type": "json" + } + ], + "index_field_presence": false, + "max_num_partitions": 200, + "mode": "dynamic", + "store_document_size": false, + "store_source": false, + "tag_fields": [], + "timestamp_field": "ts", + "tokenizers": [] + }, + "index_id_patterns": [ + "test-index-foo*", + "-test-index-foobar" + ], + "index_root_uri": "ram:///indexes", + "indexing_settings": { + "commit_timeout_secs": 60, + "docstore_blocksize": 1000000, + "docstore_compression_level": 8, + "merge_policy": { + "maturation_period": "2days", + "max_merge_factor": 12, + "merge_factor": 10, + "min_level_num_docs": 100000, + "type": "stable_log" + }, + "resources": { + "heap_size": "2.0 GB" + }, + "split_num_docs_target": 10000000 + }, + "priority": 100, + "retention": { + "period": "42 days", + "schedule": "daily" + }, + "search_settings": { + "default_search_fields": [] + }, + "template_id": "test-template", + "version": "0.9" + } + ], + "version": "0.9" +} diff --git a/quickwit/quickwit-metastore/test-data/split-metadata/v0.7.expected.json b/quickwit/quickwit-metastore/test-data/split-metadata/v0.7.expected.json index d6475bd3525..b7a7ffef08b 100644 --- a/quickwit/quickwit-metastore/test-data/split-metadata/v0.7.expected.json +++ b/quickwit/quickwit-metastore/test-data/split-metadata/v0.7.expected.json @@ -25,5 +25,5 @@ "start": 121000 }, "uncompressed_docs_size_in_bytes": 234234, - "version": "0.8" + "version": "0.9" } diff --git a/quickwit/quickwit-metastore/test-data/split-metadata/v0.8.expected.json b/quickwit/quickwit-metastore/test-data/split-metadata/v0.8.expected.json index d6475bd3525..b7a7ffef08b 100644 --- a/quickwit/quickwit-metastore/test-data/split-metadata/v0.8.expected.json +++ b/quickwit/quickwit-metastore/test-data/split-metadata/v0.8.expected.json @@ -25,5 +25,5 @@ "start": 121000 }, "uncompressed_docs_size_in_bytes": 234234, - "version": "0.8" + "version": "0.9" } diff --git a/quickwit/quickwit-metastore/test-data/split-metadata/v0.9.expected.json b/quickwit/quickwit-metastore/test-data/split-metadata/v0.9.expected.json new file mode 100644 index 00000000000..9fe4c60aae1 --- /dev/null +++ b/quickwit/quickwit-metastore/test-data/split-metadata/v0.9.expected.json @@ -0,0 +1,29 @@ +{ + "create_timestamp": 3, + "delete_opstamp": 10, + "footer_offsets": { + "end": 2000, + "start": 1000 + }, + "index_uid": "my-index:00000000000000000000000001", + "maturity": { + "maturation_period_millis": 4000, + "type": "immature" + }, + "node_id": "node", + "num_docs": 12303, + "num_merge_ops": 3, + "partition_id": 7, + "source_id": "source", + "split_id": "split", + "tags": [ + "234", + "aaa" + ], + "time_range": { + "end": 130198, + "start": 121000 + }, + "uncompressed_docs_size_in_bytes": 234234, + "version": "0.9" +} diff --git a/quickwit/quickwit-metastore/test-data/split-metadata/v0.9.json b/quickwit/quickwit-metastore/test-data/split-metadata/v0.9.json new file mode 100644 index 00000000000..9fe4c60aae1 --- /dev/null +++ b/quickwit/quickwit-metastore/test-data/split-metadata/v0.9.json @@ -0,0 +1,29 @@ +{ + "create_timestamp": 3, + "delete_opstamp": 10, + "footer_offsets": { + "end": 2000, + "start": 1000 + }, + "index_uid": "my-index:00000000000000000000000001", + "maturity": { + "maturation_period_millis": 4000, + "type": "immature" + }, + "node_id": "node", + "num_docs": 12303, + "num_merge_ops": 3, + "partition_id": 7, + "source_id": "source", + "split_id": "split", + "tags": [ + "234", + "aaa" + ], + "time_range": { + "end": 130198, + "start": 121000 + }, + "uncompressed_docs_size_in_bytes": 234234, + "version": "0.9" +} diff --git a/quickwit/quickwit-proto/src/types/doc_mapping_uid.rs b/quickwit/quickwit-proto/src/types/doc_mapping_uid.rs new file mode 100644 index 00000000000..50f9cd23cae --- /dev/null +++ b/quickwit/quickwit-proto/src/types/doc_mapping_uid.rs @@ -0,0 +1,167 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::borrow::Cow; +use std::fmt; + +use serde::de::Error; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +pub use ulid::Ulid; + +use crate::types::pipeline_uid::ULID_SIZE; + +/// Unique identifier for a document mapping. +#[derive(Clone, Copy, Default, Hash, Eq, PartialEq, Ord, PartialOrd, utoipa::ToSchema)] +pub struct DocMappingUid(Ulid); + +impl fmt::Debug for DocMappingUid { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "DocMapping({})", self.0) + } +} + +impl fmt::Display for DocMappingUid { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +impl From for DocMappingUid { + fn from(ulid: Ulid) -> Self { + Self(ulid) + } +} + +impl DocMappingUid { + /// Creates a new random doc mapping UID. + pub fn random() -> Self { + Self(Ulid::new()) + } + + #[cfg(any(test, feature = "testsuite"))] + pub fn for_test(ulid_u128: u128) -> DocMappingUid { + Self(Ulid::from(ulid_u128)) + } +} + +impl<'de> Deserialize<'de> for DocMappingUid { + fn deserialize(deserializer: D) -> Result + where D: Deserializer<'de> { + let ulid_str: Cow<'de, str> = Cow::deserialize(deserializer)?; + let ulid = Ulid::from_string(&ulid_str).map_err(D::Error::custom)?; + Ok(Self(ulid)) + } +} + +impl Serialize for DocMappingUid { + fn serialize(&self, serializer: S) -> Result + where S: Serializer { + serializer.collect_str(&self.0) + } +} + +impl prost::Message for DocMappingUid { + fn encode_raw(&self, buf: &mut B) + where B: prost::bytes::BufMut { + // TODO: when `bytes::encode` supports `&[u8]`, we can remove this allocation. + prost::encoding::bytes::encode(1u32, &self.0.to_bytes().to_vec(), buf); + } + + fn merge_field( + &mut self, + tag: u32, + wire_type: prost::encoding::WireType, + buf: &mut B, + ctx: prost::encoding::DecodeContext, + ) -> ::core::result::Result<(), prost::DecodeError> + where + B: prost::bytes::Buf, + { + const STRUCT_NAME: &str = "DocMappingUid"; + + match tag { + 1u32 => { + let mut buffer = Vec::with_capacity(ULID_SIZE); + + prost::encoding::bytes::merge(wire_type, &mut buffer, buf, ctx).map_err( + |mut error| { + error.push(STRUCT_NAME, "doc_mapping_uid"); + error + }, + )?; + let ulid_bytes: [u8; ULID_SIZE] = + buffer.try_into().map_err(|buffer: Vec| { + prost::DecodeError::new(format!( + "invalid length for field `doc_mapping_uid`, expected 16 bytes, got {}", + buffer.len() + )) + })?; + self.0 = Ulid::from_bytes(ulid_bytes); + Ok(()) + } + _ => prost::encoding::skip_field(wire_type, tag, buf, ctx), + } + } + + #[inline] + fn encoded_len(&self) -> usize { + prost::encoding::key_len(1u32) + + prost::encoding::encoded_len_varint(ULID_SIZE as u64) + + ULID_SIZE + } + + fn clear(&mut self) { + self.0 = Ulid::nil(); + } +} + +#[cfg(test)] +mod tests { + use bytes::Bytes; + use prost::Message; + + use super::*; + + #[test] + fn test_doc_mapping_uid_json_serde_roundtrip() { + let doc_mapping_uid = DocMappingUid::default(); + let serialized = serde_json::to_string(&doc_mapping_uid).unwrap(); + assert_eq!(serialized, r#""00000000000000000000000000""#); + + let deserialized: DocMappingUid = serde_json::from_str(&serialized).unwrap(); + assert_eq!(deserialized, doc_mapping_uid); + } + + #[test] + fn test_doc_mapping_uid_prost_serde_roundtrip() { + let doc_mapping_uid = DocMappingUid::random(); + + let encoded = doc_mapping_uid.encode_to_vec(); + assert_eq!( + DocMappingUid::decode(Bytes::from(encoded)).unwrap(), + doc_mapping_uid + ); + + let encoded = doc_mapping_uid.encode_length_delimited_to_vec(); + assert_eq!( + DocMappingUid::decode_length_delimited(Bytes::from(encoded)).unwrap(), + doc_mapping_uid + ); + } +} diff --git a/quickwit/quickwit-proto/src/types/mod.rs b/quickwit/quickwit-proto/src/types/mod.rs index f1fa01a852a..f45f3efbc55 100644 --- a/quickwit/quickwit-proto/src/types/mod.rs +++ b/quickwit/quickwit-proto/src/types/mod.rs @@ -28,11 +28,13 @@ use serde::{Deserialize, Serialize}; use tracing::warn; pub use ulid::Ulid; +mod doc_mapping_uid; mod index_uid; mod pipeline_uid; mod position; mod shard_id; +pub use doc_mapping_uid::DocMappingUid; pub use index_uid::IndexUid; pub use pipeline_uid::PipelineUid; pub use position::Position; diff --git a/quickwit/quickwit-proto/src/types/pipeline_uid.rs b/quickwit/quickwit-proto/src/types/pipeline_uid.rs index 67ff395d13c..bb2b37669d1 100644 --- a/quickwit/quickwit-proto/src/types/pipeline_uid.rs +++ b/quickwit/quickwit-proto/src/types/pipeline_uid.rs @@ -45,7 +45,7 @@ impl Display for PipelineUid { impl PipelineUid { /// Creates a new random pipeline uid. - pub fn new() -> Self { + pub fn random() -> Self { Self(Ulid::new()) } @@ -154,7 +154,7 @@ mod tests { #[test] fn test_pipeline_uid_prost_serde_roundtrip() { - let pipeline_uid = PipelineUid::new(); + let pipeline_uid = PipelineUid::random(); let encoded = pipeline_uid.encode_to_vec(); assert_eq!( diff --git a/quickwit/quickwit-search/src/collector.rs b/quickwit/quickwit-search/src/collector.rs index 97a4a3c75d4..4c44cd25be6 100644 --- a/quickwit/quickwit-search/src/collector.rs +++ b/quickwit/quickwit-search/src/collector.rs @@ -1267,6 +1267,7 @@ mod tests { LeafSearchResponse, PartialHit, SearchRequest, SortByValue, SortField, SortOrder, SortValue, SplitSearchError, }; + use quickwit_proto::types::DocMappingUid; use tantivy::collector::Collector; use tantivy::TantivyDocument; @@ -1338,6 +1339,10 @@ mod tests { #[typetag::serde(name = "mock")] impl quickwit_doc_mapper::DocMapper for MockDocMapper { + fn doc_mapping_uid(&self) -> DocMappingUid { + DocMappingUid::default() + } + // Required methods fn doc_from_json_obj( &self,