Skip to content

Commit

Permalink
remove schema fallback
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Dec 15, 2023
1 parent 9bc39ca commit 4aab7d9
Show file tree
Hide file tree
Showing 8 changed files with 21 additions and 84 deletions.
1 change: 0 additions & 1 deletion quickwit/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 17 additions & 2 deletions quickwit/quickwit-indexing/src/actors/packager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use quickwit_doc_mapper::NamedField;
use quickwit_proto::search::{
serialize_split_fields, ListFieldType, ListFields, ListFieldsEntryResponse,
};
use tantivy::schema::FieldType;
use tantivy::schema::{FieldType, Type};
use tantivy::{FieldMetadata, InvertedIndexReader, ReloadPolicy, SegmentMeta};
use tokio::runtime::Handle;
use tracing::{debug, info, instrument, warn};
Expand Down Expand Up @@ -343,12 +343,27 @@ pub fn serialize_field_metadata(fields_metadata: &[FieldMetadata]) -> Vec<u8> {
serialize_split_fields(ListFields { fields })
}

fn tantivy_type_to_list_field_type(typ: Type) -> ListFieldType {
match typ {
Type::Str => ListFieldType::Str,
Type::U64 => ListFieldType::U64,
Type::I64 => ListFieldType::I64,
Type::F64 => ListFieldType::F64,
Type::Bool => ListFieldType::Bool,
Type::Date => ListFieldType::Date,
Type::Facet => ListFieldType::Facet,
Type::Bytes => ListFieldType::Bytes,
Type::Json => ListFieldType::Json,
Type::IpAddr => ListFieldType::IpAddr,
}
}

fn field_metadata_to_list_field_serialized(
field_metadata: &FieldMetadata,
) -> ListFieldsEntryResponse {
ListFieldsEntryResponse {
field_name: field_metadata.field_name.to_string(),
field_type: ListFieldType::from(field_metadata.typ) as i32,
field_type: tantivy_type_to_list_field_type(field_metadata.typ) as i32,
searchable: field_metadata.indexed,
aggregatable: field_metadata.fast,
index_ids: vec![],
Expand Down
1 change: 0 additions & 1 deletion quickwit/quickwit-proto/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ prost-types = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
sqlx = { workspace = true, optional = true }
tantivy = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
tonic = { workspace = true }
Expand Down
2 changes: 0 additions & 2 deletions quickwit/quickwit-proto/protos/quickwit/search.proto
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,6 @@ message LeafListFieldsRequest {
// Wildcard expressions are supported.
repeated string fields = 4;

// `DocMapper` as json serialized trait.
string doc_mapper = 5;
}

message ListFieldsResponse {
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 0 additions & 18 deletions quickwit/quickwit-proto/src/search/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use std::io::{self, Read};

use prost::Message;
pub use sort_by_value::SortValue;
use tantivy::schema::Type;

include!("../codegen/quickwit/quickwit.search.rs");

Expand Down Expand Up @@ -247,23 +246,6 @@ impl PartialHit {
}
}

impl From<Type> for ListFieldType {
fn from(typ: Type) -> Self {
match typ {
Type::Str => ListFieldType::Str,
Type::U64 => ListFieldType::U64,
Type::I64 => ListFieldType::I64,
Type::F64 => ListFieldType::F64,
Type::Bool => ListFieldType::Bool,
Type::Date => ListFieldType::Date,
Type::Facet => ListFieldType::Facet,
Type::Bytes => ListFieldType::Bytes,
Type::Json => ListFieldType::Json,
Type::IpAddr => ListFieldType::IpAddr,
}
}
}

/// Serializes the Split fields.
///
/// `fields_metadata` has to be sorted.
Expand Down
59 changes: 4 additions & 55 deletions quickwit/quickwit-search/src/list_fields.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,13 @@ use futures::future::try_join_all;
use itertools::Itertools;
use quickwit_common::shared_consts::SPLIT_FIELDS_FILE_NAME;
use quickwit_common::uri::Uri;
use quickwit_config::build_doc_mapper;
use quickwit_doc_mapper::DocMapper;
use quickwit_metastore::{ListIndexesMetadataResponseExt, SplitMetadata};
use quickwit_proto::metastore::{
ListIndexesMetadataRequest, MetastoreService, MetastoreServiceClient,
};
use quickwit_proto::search::{
deserialize_split_fields, LeafListFieldsRequest, ListFieldType, ListFields,
ListFieldsEntryResponse, ListFieldsRequest, ListFieldsResponse, SplitIdAndFooterOffsets,
deserialize_split_fields, LeafListFieldsRequest, ListFields, ListFieldsEntryResponse,
ListFieldsRequest, ListFieldsResponse, SplitIdAndFooterOffsets,
};
use quickwit_proto::types::IndexUid;
use quickwit_storage::Storage;
Expand Down Expand Up @@ -92,32 +90,6 @@ pub async fn get_fields_from_split<'a>(
Ok(Box::new(list_fields.into_iter()))
}

/// Get the list of splits for the request which we need to scan.
pub fn get_fields_from_schema(
index_id: String,
doc_mapper: Arc<dyn DocMapper>,
) -> Box<dyn Iterator<Item = ListFieldsEntryResponse> + Send> {
let schema = doc_mapper.schema();
let mut list_fields = schema
.fields()
.map(|(_field, entry)| ListFieldsEntryResponse {
field_name: entry.name().to_string(),
field_type: ListFieldType::from(entry.field_type().value_type()) as i32,
index_ids: vec![index_id.to_string()],
searchable: entry.is_indexed(),
aggregatable: entry.is_fast(),
non_searchable_index_ids: Vec::new(),
non_aggregatable_index_ids: Vec::new(),
})
.collect_vec();
// Prepare for grouping by field name and type
list_fields.sort_by(|left, right| match left.field_name.cmp(&right.field_name) {
Ordering::Equal => left.field_type.cmp(&right.field_type),
other => other,
});
Box::new(list_fields.into_iter())
}

/// `current_group` needs to contain at least one element.
/// The group needs to be of the same field name and type.
fn merge_same_field_group(
Expand Down Expand Up @@ -262,7 +234,6 @@ pub async fn leaf_list_fields(
index_storage: Arc<dyn Storage>,
searcher_context: &SearcherContext,
split_ids: &[SplitIdAndFooterOffsets],
doc_mapper: Arc<dyn DocMapper>,
field_patterns: &[String],
) -> crate::Result<ListFieldsResponse> {
let mut iter_per_split = Vec::new();
Expand All @@ -282,10 +253,7 @@ pub async fn leaf_list_fields(
for fields in result {
let list_fields_iter = match fields {
Ok(fields) => fields,
Err(_err) => {
// Schema fallback
get_fields_from_schema(index_id.to_string(), doc_mapper.clone())
}
Err(_err) => Box::new(std::iter::empty()),
};
let list_fields_iter = list_fields_iter
.map(|mut entry| {
Expand All @@ -309,8 +277,6 @@ pub struct IndexMetasForLeafSearch {
pub index_id: String,
/// Index URI.
pub index_uri: Uri,
/// Doc mapper json string.
pub doc_mapper_str: String,
}

/// Performs a distributed list fields request.
Expand Down Expand Up @@ -345,25 +311,9 @@ pub async fn root_list_fields(
let index_uid_to_index_meta: HashMap<IndexUid, IndexMetasForLeafSearch> = indexes_metadatas
.iter()
.map(|index_metadata| {
let doc_mapper = build_doc_mapper(
&index_metadata.index_config.doc_mapping,
&index_metadata.index_config.search_settings,
)
.map_err(|err| {
SearchError::Internal(format!("failed to build doc mapper. cause: {err}"))
})
.unwrap();

let index_metadata_for_leaf_search = IndexMetasForLeafSearch {
index_uri: index_metadata.index_uri().clone(),
index_id: index_metadata.index_config.index_id.to_string(),
doc_mapper_str: serde_json::to_string(&doc_mapper)
.map_err(|err| {
SearchError::Internal(format!(
"failed to serialize doc mapper. cause: {err}"
))
})
.unwrap(),
};

(
Expand Down Expand Up @@ -423,7 +373,6 @@ pub fn jobs_to_leaf_requests(
let leaf_search_request = LeafListFieldsRequest {
index_id: index_meta.index_id.to_string(),
index_uri: index_meta.index_uri.to_string(),
doc_mapper: index_meta.doc_mapper_str.to_string(),
fields: search_request_for_leaf.fields.clone(),
split_offsets: job_group.into_iter().map(|job| job.offsets).collect(),
};
Expand All @@ -434,7 +383,7 @@ pub fn jobs_to_leaf_requests(

#[cfg(test)]
mod tests {
use quickwit_proto::search::ListFieldsEntryResponse;
use quickwit_proto::search::{ListFieldType, ListFieldsEntryResponse};

use super::*;

Expand Down
2 changes: 0 additions & 2 deletions quickwit/quickwit-search/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,13 +349,11 @@ impl SearchService for SearchServiceImpl {
let storage = self.storage_resolver.resolve(&index_uri).await?;
let index_id = list_fields_req.index_id;
let split_ids = list_fields_req.split_offsets;
let doc_mapper = deserialize_doc_mapper(&list_fields_req.doc_mapper)?;
leaf_list_fields(
index_id,
storage,
&self.searcher_context,
&split_ids[..],
doc_mapper,
&list_fields_req.fields,
)
.await
Expand Down

0 comments on commit 4aab7d9

Please sign in to comment.