diff --git a/quickwit/quickwit-doc-mapper/src/query_builder.rs b/quickwit/quickwit-doc-mapper/src/query_builder.rs index 9dffeef0ad7..d6f5c41e349 100644 --- a/quickwit/quickwit-doc-mapper/src/query_builder.rs +++ b/quickwit/quickwit-doc-mapper/src/query_builder.rs @@ -151,12 +151,11 @@ impl<'a> QueryAstVisitor<'a> for ExtractTermSetFields<'_> { fn visit_term_set(&mut self, term_set_query: &'a TermSetQuery) -> anyhow::Result<()> { for field in term_set_query.terms_per_field.keys() { - if let Ok((field, _field_entry, _path)) = find_field_or_hit_dynamic(field, self.schema) - { - self.term_dict_fields_to_warm_up.insert(field); - } else { - anyhow::bail!("field does not exist: {}", field); - } + match find_field_or_hit_dynamic(field, self.schema) { + Ok((field, _field_entry, _path)) => self.term_dict_fields_to_warm_up.insert(field), + Err(InvalidQuery::FieldDoesNotExist { .. }) => continue, + Err(_) => anyhow::bail!("field does not exist: `{}`", field), + }; } Ok(()) } @@ -528,6 +527,10 @@ mod test { Vec::new(), TestExpectation::Ok("TermQuery"), ); + } + + #[test] + fn test_term_set_query() { check_build_query_static_mode( "title: IN [hello]", Vec::new(), @@ -538,6 +541,16 @@ mod test { Vec::new(), TestExpectation::Err("set query need to target a specific field"), ); + check_build_query_static_mode( + "foo: IN [hello]", + Vec::new(), + TestExpectation::Err("field does not exist: `foo`"), + ); + check_build_query_static_lenient_mode( + "foo: IN [hello]", + Vec::new(), + TestExpectation::Ok("EmptyQuery"), + ); } #[test] @@ -591,6 +604,16 @@ mod test { Vec::new(), TestExpectation::Ok("2023-01-10T08:38:51.16Z"), ); + check_build_query_static_mode( + &format!("foo:<{end_date_time_str}"), + Vec::new(), + TestExpectation::Err("invalid query: field does not exist: `foo`"), + ); + check_build_query_static_lenient_mode( + &format!("foo:<{end_date_time_str}"), + Vec::new(), + TestExpectation::Ok("EmptyQuery"), + ); } // Check range on datetime in microseconds and truncation to milliseconds. diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index 815f5ef7015..5a9a682dec0 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -702,6 +702,7 @@ fn build_search_query( field: "span_start_timestamp_nanos".to_string(), lower_bound: Bound::Unbounded, upper_bound: Bound::Unbounded, + lenient: false, }; if let Some(min_span_start_timestamp_secs) = min_span_start_timestamp_secs_opt { @@ -731,6 +732,7 @@ fn build_search_query( field: "span_duration_millis".to_string(), lower_bound: Bound::Unbounded, upper_bound: Bound::Unbounded, + lenient: false, }; if let Some(min_span_duration_millis) = min_span_duration_millis_opt { @@ -1521,7 +1523,8 @@ mod tests { vec![RangeQuery { field: "span_start_timestamp_nanos".to_string(), lower_bound: Bound::Included("1970-01-01T00:00:03Z".to_string().into()), - upper_bound: Bound::Unbounded + upper_bound: Bound::Unbounded, + lenient: false, } .into()] ); @@ -1550,6 +1553,7 @@ mod tests { field: "span_start_timestamp_nanos".to_string(), lower_bound: Bound::Unbounded, upper_bound: Bound::Included("1970-01-01T00:00:33Z".to_string().into()), + lenient: false, } .into()] ); @@ -1578,6 +1582,7 @@ mod tests { field: "span_start_timestamp_nanos".to_string(), lower_bound: Bound::Included("1970-01-01T00:00:03Z".to_string().into()), upper_bound: Bound::Included("1970-01-01T00:00:33Z".to_string().into()), + lenient: false, } .into()] ); @@ -1605,7 +1610,8 @@ mod tests { vec![RangeQuery { field: "span_duration_millis".to_string(), lower_bound: Bound::Included(7u64.into()), - upper_bound: Bound::Unbounded + upper_bound: Bound::Unbounded, + lenient: false, } .into()] ); @@ -1634,6 +1640,7 @@ mod tests { field: "span_duration_millis".to_string(), lower_bound: Bound::Unbounded, upper_bound: Bound::Included(77u64.into()), + lenient: false, } .into()] ); @@ -1662,6 +1669,7 @@ mod tests { field: "span_duration_millis".to_string(), lower_bound: Bound::Included(7u64.into()), upper_bound: Bound::Included(77u64.into()), + lenient: false, } .into()] ); @@ -1896,12 +1904,14 @@ mod tests { field: "span_start_timestamp_nanos".to_string(), lower_bound: Bound::Included("1970-01-01T00:00:03Z".to_string().into()), upper_bound: Bound::Included("1970-01-01T00:00:33Z".to_string().into()), + lenient: false, } .into(), RangeQuery { field: "span_duration_millis".to_string(), lower_bound: Bound::Included(7u64.into()), upper_bound: Bound::Included(77u64.into()), + lenient: false, } .into(), ] diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs index 337ec019e9d..3b182d5f828 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs @@ -23,6 +23,7 @@ use quickwit_datetime::StrptimeParser; use serde::Deserialize; use time::format_description::well_known::Rfc3339; +use super::LeniencyBool; use crate::elastic_query_dsl::one_field_map::OneFieldMap; use crate::elastic_query_dsl::ConvertibleToQueryAst; use crate::not_nan_f32::NotNaNf32; @@ -44,6 +45,8 @@ pub struct RangeQueryParams { boost: Option, #[serde(default)] format: Option, + #[serde(default)] + lenient: LeniencyBool, } pub type RangeQuery = OneFieldMap; @@ -58,6 +61,7 @@ impl ConvertibleToQueryAst for RangeQuery { lte, boost, format, + lenient, } = self.value; let (gt, gte, lt, lte) = if let Some(JsonLiteral::String(java_date_format)) = format { let parser = StrptimeParser::from_java_datetime_format(&java_date_format) @@ -90,6 +94,7 @@ impl ConvertibleToQueryAst for RangeQuery { (None, Some(lte)) => Bound::Included(lte), (None, None) => Bound::Unbounded, }, + lenient, }; let ast: QueryAst = range_query_ast.into(); Ok(ast.boost(boost)) @@ -126,6 +131,7 @@ mod tests { lte: None, boost: None, format: JsonLiteral::String("yyyy-MM-dd['T'HH:mm:ss]".to_string()).into(), + lenient: true, }; let range_query: ElasticRangeQuery = ElasticRangeQuery { field: "date".to_string(), @@ -138,6 +144,7 @@ mod tests { field, lower_bound: Bound::Excluded(lower_bound), upper_bound: Bound::Unbounded, + lenient: true, }) if field == "date" && lower_bound == JsonLiteral::String("2021-01-03T13:32:43Z".to_string()) )); @@ -152,6 +159,7 @@ mod tests { lte: Some(JsonLiteral::String("2024-09-28T10:22:55.797Z".to_string())), boost: None, format: JsonLiteral::String("strict_date_optional_time".to_string()).into(), + lenient: false, }; let range_query: ElasticRangeQuery = ElasticRangeQuery { field: "timestamp".to_string(), @@ -164,6 +172,7 @@ mod tests { field, lower_bound: Bound::Unbounded, upper_bound: Bound::Included(upper_bound), + lenient: false, }) if field == "timestamp" && upper_bound == JsonLiteral::String("2024-09-28T10:22:55.797Z".to_string()) )); diff --git a/quickwit/quickwit-query/src/query_ast/range_query.rs b/quickwit/quickwit-query/src/query_ast/range_query.rs index 52ce26bc306..9cf903f50b2 100644 --- a/quickwit/quickwit-query/src/query_ast/range_query.rs +++ b/quickwit/quickwit-query/src/query_ast/range_query.rs @@ -39,6 +39,8 @@ pub struct RangeQuery { pub field: String, pub lower_bound: Bound, pub upper_bound: Bound, + /// Support missing fields + pub lenient: bool, } /// Converts a given bound JsonLiteral bound into a bound of type T. @@ -121,7 +123,14 @@ impl BuildTantivyAst for RangeQuery { _with_validation: bool, ) -> Result { let (field, field_entry, json_path) = - super::utils::find_field_or_hit_dynamic(&self.field, schema)?; + match super::utils::find_field_or_hit_dynamic(&self.field, schema) { + Ok(res) => res, + Err(InvalidQuery::FieldDoesNotExist { .. }) if self.lenient => { + return Ok(TantivyQueryAst::match_none()); + } + Err(e) => return Err(e), + }; + if !field_entry.is_fast() { return Err(InvalidQuery::SchemaError(format!( "range queries are only supported for fast fields. (`{}` is not a fast field)", @@ -322,6 +331,7 @@ mod tests { field: field.to_string(), lower_bound: Bound::Included(lower_value), upper_bound: Bound::Included(upper_value), + lenient: false, }; let tantivy_ast = range_query .build_tantivy_ast_call( @@ -369,6 +379,7 @@ mod tests { field: "missing_field.toto".to_string(), lower_bound: Bound::Included(JsonLiteral::String("1980".to_string())), upper_bound: Bound::Included(JsonLiteral::String("1989".to_string())), + lenient: false, }; // with validation let invalid_query: InvalidQuery = range_query @@ -395,6 +406,24 @@ mod tests { .const_predicate(), Some(MatchAllOrNone::MatchNone) ); + let range_query = RangeQuery { + field: "missing_field.toto".to_string(), + lower_bound: Bound::Included(JsonLiteral::String("1980".to_string())), + upper_bound: Bound::Included(JsonLiteral::String("1989".to_string())), + lenient: true, + }; + assert_eq!( + range_query + .build_tantivy_ast_call( + &schema, + &create_default_quickwit_tokenizer_manager(), + &[], + true + ) + .unwrap() + .const_predicate(), + Some(MatchAllOrNone::MatchNone) + ); } #[test] @@ -403,6 +432,7 @@ mod tests { field: "hello".to_string(), lower_bound: Bound::Included(JsonLiteral::String("1980".to_string())), upper_bound: Bound::Included(JsonLiteral::String("1989".to_string())), + lenient: false, }; let schema = make_schema(true); let tantivy_ast = range_query @@ -431,6 +461,7 @@ mod tests { field: "my_u64_not_fastfield".to_string(), lower_bound: Bound::Included(JsonLiteral::String("1980".to_string())), upper_bound: Bound::Included(JsonLiteral::String("1989".to_string())), + lenient: false, }; let schema = make_schema(false); let err = range_query diff --git a/quickwit/quickwit-query/src/query_ast/term_query.rs b/quickwit/quickwit-query/src/query_ast/term_query.rs index 6c0e09c35a3..762876012f2 100644 --- a/quickwit/quickwit-query/src/query_ast/term_query.rs +++ b/quickwit/quickwit-query/src/query_ast/term_query.rs @@ -41,16 +41,6 @@ impl From for QueryAst { } } -impl TermQuery { - #[cfg(test)] - pub fn from_field_value(field: impl ToString, value: impl ToString) -> Self { - Self { - field: field.to_string(), - value: value.to_string(), - } - } -} - impl BuildTantivyAst for TermQuery { fn build_tantivy_ast_impl( &self, diff --git a/quickwit/quickwit-query/src/query_ast/term_set_query.rs b/quickwit/quickwit-query/src/query_ast/term_set_query.rs index 68ea20cc4ab..0ce012e8907 100644 --- a/quickwit/quickwit-query/src/query_ast/term_set_query.rs +++ b/quickwit/quickwit-query/src/query_ast/term_set_query.rs @@ -34,6 +34,7 @@ use crate::InvalidQuery; #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] pub struct TermSetQuery { pub terms_per_field: HashMap>, + pub lenient: bool, } impl TermSetQuery { @@ -56,8 +57,13 @@ impl TermSetQuery { field: full_path.to_string(), value: value.to_string(), }; - let ast = - term_query.build_tantivy_ast_call(schema, tokenizer_manager, &[], false)?; + let ast = term_query.build_tantivy_ast_call( + schema, + tokenizer_manager, + &[], + // disable term query validation when doing a lenient set query + !self.lenient, + )?; let tantivy_query: Box = ast.simplify().into(); tantivy_query.query_terms(&mut |term, _| { terms.insert(term.clone()); @@ -76,9 +82,13 @@ impl BuildTantivyAst for TermSetQuery { _search_fields: &[String], _with_validation: bool, ) -> Result { - let terms_it = self.make_term_iterator(schema, tokenizer_manager)?; - let term_set_query = tantivy::query::TermSetQuery::new(terms_it); - Ok(term_set_query.into()) + let terms = self.make_term_iterator(schema, tokenizer_manager)?; + if terms.is_empty() { + Ok(TantivyQueryAst::match_none()) + } else { + let term_set_query = tantivy::query::TermSetQuery::new(terms); + Ok(term_set_query.into()) + } } } diff --git a/quickwit/quickwit-query/src/query_ast/user_input_query.rs b/quickwit/quickwit-query/src/query_ast/user_input_query.rs index 279f41b4676..0ca38f2251e 100644 --- a/quickwit/quickwit-query/src/query_ast/user_input_query.rs +++ b/quickwit/quickwit-query/src/query_ast/user_input_query.rs @@ -162,6 +162,7 @@ fn convert_user_input_ast_to_query_ast( field, lower_bound: convert_bound(lower), upper_bound: convert_bound(upper), + lenient, }; Ok(range_query.into()) } @@ -179,7 +180,10 @@ fn convert_user_input_ast_to_query_ast( for field in field_names { terms_per_field.insert(field.to_string(), terms.clone()); } - let term_set_query = query_ast::TermSetQuery { terms_per_field }; + let term_set_query = query_ast::TermSetQuery { + terms_per_field, + lenient, + }; Ok(term_set_query.into()) } UserInputLeaf::Exists { field } => Ok(FieldPresenceQuery { field }.into()), diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 5ad92f63aa2..b4e49f53c77 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -677,6 +677,7 @@ fn remove_redundant_timestamp_range( upper_bound: map_bound(final_end_timestamp, |bound| { bound.into_timestamp_nanos().into() }), + lenient: false, }; new_ast = if let QueryAst::Bool(mut bool_query) = new_ast { if bool_query.must.is_empty() @@ -1477,6 +1478,7 @@ mod tests { lower_bound: Bound::Included(time1.into()), // *1000 has no impact, we detect timestamp in ms instead of s upper_bound: Bound::Included((time4 * 1000).into()), + lenient: false, })) .unwrap(), ..SearchRequest::default() @@ -1488,6 +1490,7 @@ mod tests { field: timestamp_field.to_string(), lower_bound: Bound::Included(time1.into()), upper_bound: Bound::Included(time3.into()), + lenient: false, })) .unwrap(), ..SearchRequest::default() @@ -1507,12 +1510,14 @@ mod tests { field: timestamp_field.to_string(), lower_bound: Bound::Unbounded, upper_bound: Bound::Excluded((time3 * S_TO_NS).into()), + lenient: false, }; let search_request = SearchRequest { query_ast: serde_json::to_string(&QueryAst::Range(RangeQuery { field: timestamp_field.to_string(), lower_bound: Bound::Included(time1.into()), upper_bound: Bound::Excluded(time3.into()), + lenient: false, })) .unwrap(), ..SearchRequest::default() @@ -1539,12 +1544,14 @@ mod tests { field: timestamp_field.to_string(), lower_bound: Bound::Excluded((time2 * S_TO_NS).into()), upper_bound: Bound::Unbounded, + lenient: false, }; let search_request = SearchRequest { query_ast: serde_json::to_string(&QueryAst::Range(RangeQuery { field: timestamp_field.to_string(), lower_bound: Bound::Excluded(time2.into()), upper_bound: Bound::Included(time3.into()), + lenient: false, })) .unwrap(), ..SearchRequest::default() @@ -1566,12 +1573,14 @@ mod tests { field: timestamp_field.to_string(), lower_bound: Bound::Unbounded, upper_bound: Bound::Excluded((time2 * S_TO_NS).into()), + lenient: false, }; let search_request = SearchRequest { query_ast: serde_json::to_string(&QueryAst::Range(RangeQuery { field: timestamp_field.to_string(), lower_bound: Bound::Included(time1.into()), upper_bound: Bound::Included(time3.into()), + lenient: false, })) .unwrap(), start_timestamp: Some(time1), @@ -1584,12 +1593,14 @@ mod tests { field: timestamp_field.to_string(), lower_bound: Bound::Unbounded, upper_bound: Bound::Included((time2 * S_TO_NS).into()), + lenient: false, }; let search_request = SearchRequest { query_ast: serde_json::to_string(&QueryAst::Range(RangeQuery { field: timestamp_field.to_string(), lower_bound: Bound::Included(time1.into()), upper_bound: Bound::Included(time2.into()), + lenient: false, })) .unwrap(), start_timestamp: Some(time1), @@ -1602,6 +1613,7 @@ mod tests { field: timestamp_field.to_string(), lower_bound: Bound::Included((time3 * S_TO_NS).into()), upper_bound: Bound::Unbounded, + lenient: false, }; let search_request = SearchRequest { @@ -1609,6 +1621,7 @@ mod tests { field: timestamp_field.to_string(), lower_bound: Bound::Included(time2.into()), upper_bound: Bound::Included(time4.into()), + lenient: false, })) .unwrap(), start_timestamp: Some(time3), @@ -1622,6 +1635,7 @@ mod tests { field: timestamp_field.to_string(), lower_bound: Bound::Included(time3.into()), upper_bound: Bound::Included(time4.into()), + lenient: false, })) .unwrap(), start_timestamp: Some(time2), @@ -1685,6 +1699,7 @@ mod tests { field: "timestamp".to_string(), lower_bound: Bound::Included(1_700_002_000_000_000_000u64.into()), upper_bound: Bound::Unbounded, + lenient: false, } .into()], ..BoolQuery::default() diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs index 608bc87e479..8cd51443f11 100644 --- a/quickwit/quickwit-search/src/root.rs +++ b/quickwit/quickwit-search/src/root.rs @@ -4056,6 +4056,7 @@ mod tests { field: timestamp_field.to_string(), lower_bound: Bound::Included(JsonLiteral::String("2021-04-13T22:45:41Z".to_owned())), upper_bound: Bound::Excluded(JsonLiteral::String("2021-05-06T06:51:19Z".to_owned())), + lenient: false, } .into(); @@ -4114,6 +4115,7 @@ mod tests { field: timestamp_field.to_string(), lower_bound: Bound::Excluded(JsonLiteral::String("2021-04-13T22:45:41Z".to_owned())), upper_bound: Bound::Included(JsonLiteral::String("2021-05-06T06:51:19Z".to_owned())), + lenient: false, } .into(); timestamp_range_extractor.start_timestamp = None; @@ -4126,6 +4128,7 @@ mod tests { field: "other_field".to_string(), lower_bound: Bound::Included(JsonLiteral::String("2021-04-13T22:45:41Z".to_owned())), upper_bound: Bound::Excluded(JsonLiteral::String("2021-05-06T06:51:19Z".to_owned())), + lenient: false, } .into(); timestamp_range_extractor.start_timestamp = None; @@ -4142,6 +4145,7 @@ mod tests { upper_bound: Bound::Excluded(JsonLiteral::String( "2021-05-06T06:51:19.001Z".to_owned(), )), + lenient: false, } .into();