From 4aab7d9e311fcc480e8094d2bf4561b80abd257d Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 15 Dec 2023 16:10:15 +0800 Subject: [PATCH] remove schema fallback --- quickwit/Cargo.lock | 1 - .../quickwit-indexing/src/actors/packager.rs | 19 +++++- quickwit/quickwit-proto/Cargo.toml | 1 - .../protos/quickwit/search.proto | 2 - .../src/codegen/quickwit/quickwit.search.rs | 3 - quickwit/quickwit-proto/src/search/mod.rs | 18 ------ quickwit/quickwit-search/src/list_fields.rs | 59 ++----------------- quickwit/quickwit-search/src/service.rs | 2 - 8 files changed, 21 insertions(+), 84 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index d6297ec0c2a..c71b5f4e147 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -5831,7 +5831,6 @@ dependencies = [ "serde", "serde_json", "sqlx", - "tantivy", "thiserror", "tokio", "tonic 0.9.2", diff --git a/quickwit/quickwit-indexing/src/actors/packager.rs b/quickwit/quickwit-indexing/src/actors/packager.rs index 9e3e01f8293..0acc6d0de42 100644 --- a/quickwit/quickwit-indexing/src/actors/packager.rs +++ b/quickwit/quickwit-indexing/src/actors/packager.rs @@ -35,7 +35,7 @@ use quickwit_doc_mapper::NamedField; use quickwit_proto::search::{ serialize_split_fields, ListFieldType, ListFields, ListFieldsEntryResponse, }; -use tantivy::schema::FieldType; +use tantivy::schema::{FieldType, Type}; use tantivy::{FieldMetadata, InvertedIndexReader, ReloadPolicy, SegmentMeta}; use tokio::runtime::Handle; use tracing::{debug, info, instrument, warn}; @@ -343,12 +343,27 @@ pub fn serialize_field_metadata(fields_metadata: &[FieldMetadata]) -> Vec { serialize_split_fields(ListFields { fields }) } +fn tantivy_type_to_list_field_type(typ: Type) -> ListFieldType { + match typ { + Type::Str => ListFieldType::Str, + Type::U64 => ListFieldType::U64, + Type::I64 => ListFieldType::I64, + Type::F64 => ListFieldType::F64, + Type::Bool => ListFieldType::Bool, + Type::Date => ListFieldType::Date, + Type::Facet => ListFieldType::Facet, + Type::Bytes => ListFieldType::Bytes, + Type::Json => ListFieldType::Json, + Type::IpAddr => ListFieldType::IpAddr, + } +} + fn field_metadata_to_list_field_serialized( field_metadata: &FieldMetadata, ) -> ListFieldsEntryResponse { ListFieldsEntryResponse { field_name: field_metadata.field_name.to_string(), - field_type: ListFieldType::from(field_metadata.typ) as i32, + field_type: tantivy_type_to_list_field_type(field_metadata.typ) as i32, searchable: field_metadata.indexed, aggregatable: field_metadata.fast, index_ids: vec![], diff --git a/quickwit/quickwit-proto/Cargo.toml b/quickwit/quickwit-proto/Cargo.toml index e9c7bdb0fa8..4d81ac71eeb 100644 --- a/quickwit/quickwit-proto/Cargo.toml +++ b/quickwit/quickwit-proto/Cargo.toml @@ -25,7 +25,6 @@ prost-types = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } sqlx = { workspace = true, optional = true } -tantivy = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } tonic = { workspace = true } diff --git a/quickwit/quickwit-proto/protos/quickwit/search.proto b/quickwit/quickwit-proto/protos/quickwit/search.proto index eef0bfa15b2..7f5b05231bc 100644 --- a/quickwit/quickwit-proto/protos/quickwit/search.proto +++ b/quickwit/quickwit-proto/protos/quickwit/search.proto @@ -141,8 +141,6 @@ message LeafListFieldsRequest { // Wildcard expressions are supported. repeated string fields = 4; - // `DocMapper` as json serialized trait. - string doc_mapper = 5; } message ListFieldsResponse { diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs index b0fd5ba0772..2c55a308fa5 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs @@ -90,9 +90,6 @@ pub struct LeafListFieldsRequest { /// Wildcard expressions are supported. #[prost(string, repeated, tag = "4")] pub fields: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, - /// `DocMapper` as json serialized trait. - #[prost(string, tag = "5")] - pub doc_mapper: ::prost::alloc::string::String, } #[derive(Serialize, Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] diff --git a/quickwit/quickwit-proto/src/search/mod.rs b/quickwit/quickwit-proto/src/search/mod.rs index c5fcfcfb4d8..682c5041bd1 100644 --- a/quickwit/quickwit-proto/src/search/mod.rs +++ b/quickwit/quickwit-proto/src/search/mod.rs @@ -23,7 +23,6 @@ use std::io::{self, Read}; use prost::Message; pub use sort_by_value::SortValue; -use tantivy::schema::Type; include!("../codegen/quickwit/quickwit.search.rs"); @@ -247,23 +246,6 @@ impl PartialHit { } } -impl From for ListFieldType { - fn from(typ: Type) -> Self { - match typ { - Type::Str => ListFieldType::Str, - Type::U64 => ListFieldType::U64, - Type::I64 => ListFieldType::I64, - Type::F64 => ListFieldType::F64, - Type::Bool => ListFieldType::Bool, - Type::Date => ListFieldType::Date, - Type::Facet => ListFieldType::Facet, - Type::Bytes => ListFieldType::Bytes, - Type::Json => ListFieldType::Json, - Type::IpAddr => ListFieldType::IpAddr, - } - } -} - /// Serializes the Split fields. /// /// `fields_metadata` has to be sorted. diff --git a/quickwit/quickwit-search/src/list_fields.rs b/quickwit/quickwit-search/src/list_fields.rs index 888bf973fe6..ebb052e787b 100644 --- a/quickwit/quickwit-search/src/list_fields.rs +++ b/quickwit/quickwit-search/src/list_fields.rs @@ -28,15 +28,13 @@ use futures::future::try_join_all; use itertools::Itertools; use quickwit_common::shared_consts::SPLIT_FIELDS_FILE_NAME; use quickwit_common::uri::Uri; -use quickwit_config::build_doc_mapper; -use quickwit_doc_mapper::DocMapper; use quickwit_metastore::{ListIndexesMetadataResponseExt, SplitMetadata}; use quickwit_proto::metastore::{ ListIndexesMetadataRequest, MetastoreService, MetastoreServiceClient, }; use quickwit_proto::search::{ - deserialize_split_fields, LeafListFieldsRequest, ListFieldType, ListFields, - ListFieldsEntryResponse, ListFieldsRequest, ListFieldsResponse, SplitIdAndFooterOffsets, + deserialize_split_fields, LeafListFieldsRequest, ListFields, ListFieldsEntryResponse, + ListFieldsRequest, ListFieldsResponse, SplitIdAndFooterOffsets, }; use quickwit_proto::types::IndexUid; use quickwit_storage::Storage; @@ -92,32 +90,6 @@ pub async fn get_fields_from_split<'a>( Ok(Box::new(list_fields.into_iter())) } -/// Get the list of splits for the request which we need to scan. -pub fn get_fields_from_schema( - index_id: String, - doc_mapper: Arc, -) -> Box + Send> { - let schema = doc_mapper.schema(); - let mut list_fields = schema - .fields() - .map(|(_field, entry)| ListFieldsEntryResponse { - field_name: entry.name().to_string(), - field_type: ListFieldType::from(entry.field_type().value_type()) as i32, - index_ids: vec![index_id.to_string()], - searchable: entry.is_indexed(), - aggregatable: entry.is_fast(), - non_searchable_index_ids: Vec::new(), - non_aggregatable_index_ids: Vec::new(), - }) - .collect_vec(); - // Prepare for grouping by field name and type - list_fields.sort_by(|left, right| match left.field_name.cmp(&right.field_name) { - Ordering::Equal => left.field_type.cmp(&right.field_type), - other => other, - }); - Box::new(list_fields.into_iter()) -} - /// `current_group` needs to contain at least one element. /// The group needs to be of the same field name and type. fn merge_same_field_group( @@ -262,7 +234,6 @@ pub async fn leaf_list_fields( index_storage: Arc, searcher_context: &SearcherContext, split_ids: &[SplitIdAndFooterOffsets], - doc_mapper: Arc, field_patterns: &[String], ) -> crate::Result { let mut iter_per_split = Vec::new(); @@ -282,10 +253,7 @@ pub async fn leaf_list_fields( for fields in result { let list_fields_iter = match fields { Ok(fields) => fields, - Err(_err) => { - // Schema fallback - get_fields_from_schema(index_id.to_string(), doc_mapper.clone()) - } + Err(_err) => Box::new(std::iter::empty()), }; let list_fields_iter = list_fields_iter .map(|mut entry| { @@ -309,8 +277,6 @@ pub struct IndexMetasForLeafSearch { pub index_id: String, /// Index URI. pub index_uri: Uri, - /// Doc mapper json string. - pub doc_mapper_str: String, } /// Performs a distributed list fields request. @@ -345,25 +311,9 @@ pub async fn root_list_fields( let index_uid_to_index_meta: HashMap = indexes_metadatas .iter() .map(|index_metadata| { - let doc_mapper = build_doc_mapper( - &index_metadata.index_config.doc_mapping, - &index_metadata.index_config.search_settings, - ) - .map_err(|err| { - SearchError::Internal(format!("failed to build doc mapper. cause: {err}")) - }) - .unwrap(); - let index_metadata_for_leaf_search = IndexMetasForLeafSearch { index_uri: index_metadata.index_uri().clone(), index_id: index_metadata.index_config.index_id.to_string(), - doc_mapper_str: serde_json::to_string(&doc_mapper) - .map_err(|err| { - SearchError::Internal(format!( - "failed to serialize doc mapper. cause: {err}" - )) - }) - .unwrap(), }; ( @@ -423,7 +373,6 @@ pub fn jobs_to_leaf_requests( let leaf_search_request = LeafListFieldsRequest { index_id: index_meta.index_id.to_string(), index_uri: index_meta.index_uri.to_string(), - doc_mapper: index_meta.doc_mapper_str.to_string(), fields: search_request_for_leaf.fields.clone(), split_offsets: job_group.into_iter().map(|job| job.offsets).collect(), }; @@ -434,7 +383,7 @@ pub fn jobs_to_leaf_requests( #[cfg(test)] mod tests { - use quickwit_proto::search::ListFieldsEntryResponse; + use quickwit_proto::search::{ListFieldType, ListFieldsEntryResponse}; use super::*; diff --git a/quickwit/quickwit-search/src/service.rs b/quickwit/quickwit-search/src/service.rs index 04ffd2e39d6..ae9b494f5a3 100644 --- a/quickwit/quickwit-search/src/service.rs +++ b/quickwit/quickwit-search/src/service.rs @@ -349,13 +349,11 @@ impl SearchService for SearchServiceImpl { let storage = self.storage_resolver.resolve(&index_uri).await?; let index_id = list_fields_req.index_id; let split_ids = list_fields_req.split_offsets; - let doc_mapper = deserialize_doc_mapper(&list_fields_req.doc_mapper)?; leaf_list_fields( index_id, storage, &self.searcher_context, &split_ids[..], - doc_mapper, &list_fields_req.fields, ) .await