Skip to content

Commit

Permalink
Enable lenient mode for term set query and range query
Browse files Browse the repository at this point in the history
  • Loading branch information
rdettai committed Dec 9, 2024
1 parent d8bba00 commit 5391af8
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 25 deletions.
35 changes: 29 additions & 6 deletions quickwit/quickwit-doc-mapper/src/query_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,11 @@ impl<'a> QueryAstVisitor<'a> for ExtractTermSetFields<'_> {

fn visit_term_set(&mut self, term_set_query: &'a TermSetQuery) -> anyhow::Result<()> {
for field in term_set_query.terms_per_field.keys() {
if let Ok((field, _field_entry, _path)) = find_field_or_hit_dynamic(field, self.schema)
{
self.term_dict_fields_to_warm_up.insert(field);
} else {
anyhow::bail!("field does not exist: {}", field);
}
match find_field_or_hit_dynamic(field, self.schema) {
Ok((field, _field_entry, _path)) => self.term_dict_fields_to_warm_up.insert(field),
Err(InvalidQuery::FieldDoesNotExist { .. }) => continue,
Err(_) => anyhow::bail!("field does not exist: `{}`", field),
};
}
Ok(())
}
Expand Down Expand Up @@ -528,6 +527,10 @@ mod test {
Vec::new(),
TestExpectation::Ok("TermQuery"),
);
}

#[test]
fn test_term_set_query() {
check_build_query_static_mode(
"title: IN [hello]",
Vec::new(),
Expand All @@ -538,6 +541,16 @@ mod test {
Vec::new(),
TestExpectation::Err("set query need to target a specific field"),
);
check_build_query_static_mode(
"foo: IN [hello]",
Vec::new(),
TestExpectation::Err("field does not exist: `foo`"),
);
check_build_query_static_lenient_mode(
"foo: IN [hello]",
Vec::new(),
TestExpectation::Ok("EmptyQuery"),
);
}

#[test]
Expand Down Expand Up @@ -591,6 +604,16 @@ mod test {
Vec::new(),
TestExpectation::Ok("2023-01-10T08:38:51.16Z"),
);
check_build_query_static_mode(
&format!("foo:<{end_date_time_str}"),
Vec::new(),
TestExpectation::Err("invalid query: field does not exist: `foo`"),
);
check_build_query_static_lenient_mode(
&format!("foo:<{end_date_time_str}"),
Vec::new(),
TestExpectation::Ok("EmptyQuery"),
);
}

// Check range on datetime in microseconds and truncation to milliseconds.
Expand Down
14 changes: 12 additions & 2 deletions quickwit/quickwit-jaeger/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,7 @@ fn build_search_query(
field: "span_start_timestamp_nanos".to_string(),
lower_bound: Bound::Unbounded,
upper_bound: Bound::Unbounded,
lenient: false,
};

if let Some(min_span_start_timestamp_secs) = min_span_start_timestamp_secs_opt {
Expand Down Expand Up @@ -731,6 +732,7 @@ fn build_search_query(
field: "span_duration_millis".to_string(),
lower_bound: Bound::Unbounded,
upper_bound: Bound::Unbounded,
lenient: false,
};

if let Some(min_span_duration_millis) = min_span_duration_millis_opt {
Expand Down Expand Up @@ -1521,7 +1523,8 @@ mod tests {
vec![RangeQuery {
field: "span_start_timestamp_nanos".to_string(),
lower_bound: Bound::Included("1970-01-01T00:00:03Z".to_string().into()),
upper_bound: Bound::Unbounded
upper_bound: Bound::Unbounded,
lenient: false,
}
.into()]
);
Expand Down Expand Up @@ -1550,6 +1553,7 @@ mod tests {
field: "span_start_timestamp_nanos".to_string(),
lower_bound: Bound::Unbounded,
upper_bound: Bound::Included("1970-01-01T00:00:33Z".to_string().into()),
lenient: false,
}
.into()]
);
Expand Down Expand Up @@ -1578,6 +1582,7 @@ mod tests {
field: "span_start_timestamp_nanos".to_string(),
lower_bound: Bound::Included("1970-01-01T00:00:03Z".to_string().into()),
upper_bound: Bound::Included("1970-01-01T00:00:33Z".to_string().into()),
lenient: false,
}
.into()]
);
Expand Down Expand Up @@ -1605,7 +1610,8 @@ mod tests {
vec![RangeQuery {
field: "span_duration_millis".to_string(),
lower_bound: Bound::Included(7u64.into()),
upper_bound: Bound::Unbounded
upper_bound: Bound::Unbounded,
lenient: false,
}
.into()]
);
Expand Down Expand Up @@ -1634,6 +1640,7 @@ mod tests {
field: "span_duration_millis".to_string(),
lower_bound: Bound::Unbounded,
upper_bound: Bound::Included(77u64.into()),
lenient: false,
}
.into()]
);
Expand Down Expand Up @@ -1662,6 +1669,7 @@ mod tests {
field: "span_duration_millis".to_string(),
lower_bound: Bound::Included(7u64.into()),
upper_bound: Bound::Included(77u64.into()),
lenient: false,
}
.into()]
);
Expand Down Expand Up @@ -1896,12 +1904,14 @@ mod tests {
field: "span_start_timestamp_nanos".to_string(),
lower_bound: Bound::Included("1970-01-01T00:00:03Z".to_string().into()),
upper_bound: Bound::Included("1970-01-01T00:00:33Z".to_string().into()),
lenient: false,
}
.into(),
RangeQuery {
field: "span_duration_millis".to_string(),
lower_bound: Bound::Included(7u64.into()),
upper_bound: Bound::Included(77u64.into()),
lenient: false,
}
.into(),
]
Expand Down
9 changes: 9 additions & 0 deletions quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use quickwit_datetime::StrptimeParser;
use serde::Deserialize;
use time::format_description::well_known::Rfc3339;

use super::LeniencyBool;
use crate::elastic_query_dsl::one_field_map::OneFieldMap;
use crate::elastic_query_dsl::ConvertibleToQueryAst;
use crate::not_nan_f32::NotNaNf32;
Expand All @@ -44,6 +45,8 @@ pub struct RangeQueryParams {
boost: Option<NotNaNf32>,
#[serde(default)]
format: Option<JsonLiteral>,
#[serde(default)]
lenient: LeniencyBool,
}

pub type RangeQuery = OneFieldMap<RangeQueryParams>;
Expand All @@ -58,6 +61,7 @@ impl ConvertibleToQueryAst for RangeQuery {
lte,
boost,
format,
lenient,
} = self.value;
let (gt, gte, lt, lte) = if let Some(JsonLiteral::String(java_date_format)) = format {
let parser = StrptimeParser::from_java_datetime_format(&java_date_format)
Expand Down Expand Up @@ -90,6 +94,7 @@ impl ConvertibleToQueryAst for RangeQuery {
(None, Some(lte)) => Bound::Included(lte),
(None, None) => Bound::Unbounded,
},
lenient,
};
let ast: QueryAst = range_query_ast.into();
Ok(ast.boost(boost))
Expand Down Expand Up @@ -126,6 +131,7 @@ mod tests {
lte: None,
boost: None,
format: JsonLiteral::String("yyyy-MM-dd['T'HH:mm:ss]".to_string()).into(),
lenient: true,
};
let range_query: ElasticRangeQuery = ElasticRangeQuery {
field: "date".to_string(),
Expand All @@ -138,6 +144,7 @@ mod tests {
field,
lower_bound: Bound::Excluded(lower_bound),
upper_bound: Bound::Unbounded,
lenient: true,
})
if field == "date" && lower_bound == JsonLiteral::String("2021-01-03T13:32:43Z".to_string())
));
Expand All @@ -152,6 +159,7 @@ mod tests {
lte: Some(JsonLiteral::String("2024-09-28T10:22:55.797Z".to_string())),
boost: None,
format: JsonLiteral::String("strict_date_optional_time".to_string()).into(),
lenient: false,
};
let range_query: ElasticRangeQuery = ElasticRangeQuery {
field: "timestamp".to_string(),
Expand All @@ -164,6 +172,7 @@ mod tests {
field,
lower_bound: Bound::Unbounded,
upper_bound: Bound::Included(upper_bound),
lenient: false,
})
if field == "timestamp" && upper_bound == JsonLiteral::String("2024-09-28T10:22:55.797Z".to_string())
));
Expand Down
33 changes: 32 additions & 1 deletion quickwit/quickwit-query/src/query_ast/range_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ pub struct RangeQuery {
pub field: String,
pub lower_bound: Bound<JsonLiteral>,
pub upper_bound: Bound<JsonLiteral>,
/// Support missing fields
pub lenient: bool,
}

/// Converts a given bound JsonLiteral bound into a bound of type T.
Expand Down Expand Up @@ -121,7 +123,14 @@ impl BuildTantivyAst for RangeQuery {
_with_validation: bool,
) -> Result<TantivyQueryAst, InvalidQuery> {
let (field, field_entry, json_path) =
super::utils::find_field_or_hit_dynamic(&self.field, schema)?;
match super::utils::find_field_or_hit_dynamic(&self.field, schema) {
Ok(res) => res,
Err(InvalidQuery::FieldDoesNotExist { .. }) if self.lenient => {
return Ok(TantivyQueryAst::match_none());
}
Err(e) => return Err(e),
};

if !field_entry.is_fast() {
return Err(InvalidQuery::SchemaError(format!(
"range queries are only supported for fast fields. (`{}` is not a fast field)",
Expand Down Expand Up @@ -322,6 +331,7 @@ mod tests {
field: field.to_string(),
lower_bound: Bound::Included(lower_value),
upper_bound: Bound::Included(upper_value),
lenient: false,
};
let tantivy_ast = range_query
.build_tantivy_ast_call(
Expand Down Expand Up @@ -369,6 +379,7 @@ mod tests {
field: "missing_field.toto".to_string(),
lower_bound: Bound::Included(JsonLiteral::String("1980".to_string())),
upper_bound: Bound::Included(JsonLiteral::String("1989".to_string())),
lenient: false,
};
// with validation
let invalid_query: InvalidQuery = range_query
Expand All @@ -395,6 +406,24 @@ mod tests {
.const_predicate(),
Some(MatchAllOrNone::MatchNone)
);
let range_query = RangeQuery {
field: "missing_field.toto".to_string(),
lower_bound: Bound::Included(JsonLiteral::String("1980".to_string())),
upper_bound: Bound::Included(JsonLiteral::String("1989".to_string())),
lenient: true,
};
assert_eq!(
range_query
.build_tantivy_ast_call(
&schema,
&create_default_quickwit_tokenizer_manager(),
&[],
true
)
.unwrap()
.const_predicate(),
Some(MatchAllOrNone::MatchNone)
);
}

#[test]
Expand All @@ -403,6 +432,7 @@ mod tests {
field: "hello".to_string(),
lower_bound: Bound::Included(JsonLiteral::String("1980".to_string())),
upper_bound: Bound::Included(JsonLiteral::String("1989".to_string())),
lenient: false,
};
let schema = make_schema(true);
let tantivy_ast = range_query
Expand Down Expand Up @@ -431,6 +461,7 @@ mod tests {
field: "my_u64_not_fastfield".to_string(),
lower_bound: Bound::Included(JsonLiteral::String("1980".to_string())),
upper_bound: Bound::Included(JsonLiteral::String("1989".to_string())),
lenient: false,
};
let schema = make_schema(false);
let err = range_query
Expand Down
10 changes: 0 additions & 10 deletions quickwit/quickwit-query/src/query_ast/term_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,6 @@ impl From<TermQuery> for QueryAst {
}
}

impl TermQuery {
#[cfg(test)]
pub fn from_field_value(field: impl ToString, value: impl ToString) -> Self {
Self {
field: field.to_string(),
value: value.to_string(),
}
}
}

impl BuildTantivyAst for TermQuery {
fn build_tantivy_ast_impl(
&self,
Expand Down
20 changes: 15 additions & 5 deletions quickwit/quickwit-query/src/query_ast/term_set_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use crate::InvalidQuery;
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct TermSetQuery {
pub terms_per_field: HashMap<String, BTreeSet<String>>,
pub lenient: bool,
}

impl TermSetQuery {
Expand All @@ -56,8 +57,13 @@ impl TermSetQuery {
field: full_path.to_string(),
value: value.to_string(),
};
let ast =
term_query.build_tantivy_ast_call(schema, tokenizer_manager, &[], false)?;
let ast = term_query.build_tantivy_ast_call(
schema,
tokenizer_manager,
&[],
// disable term query validation when doing a lenient set query
!self.lenient,
)?;
let tantivy_query: Box<dyn crate::TantivyQuery> = ast.simplify().into();
tantivy_query.query_terms(&mut |term, _| {
terms.insert(term.clone());
Expand All @@ -76,9 +82,13 @@ impl BuildTantivyAst for TermSetQuery {
_search_fields: &[String],
_with_validation: bool,
) -> Result<TantivyQueryAst, InvalidQuery> {
let terms_it = self.make_term_iterator(schema, tokenizer_manager)?;
let term_set_query = tantivy::query::TermSetQuery::new(terms_it);
Ok(term_set_query.into())
let terms = self.make_term_iterator(schema, tokenizer_manager)?;
if terms.is_empty() {
Ok(TantivyQueryAst::match_none())
} else {
let term_set_query = tantivy::query::TermSetQuery::new(terms);
Ok(term_set_query.into())
}
}
}

Expand Down
6 changes: 5 additions & 1 deletion quickwit/quickwit-query/src/query_ast/user_input_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ fn convert_user_input_ast_to_query_ast(
field,
lower_bound: convert_bound(lower),
upper_bound: convert_bound(upper),
lenient,
};
Ok(range_query.into())
}
Expand All @@ -179,7 +180,10 @@ fn convert_user_input_ast_to_query_ast(
for field in field_names {
terms_per_field.insert(field.to_string(), terms.clone());
}
let term_set_query = query_ast::TermSetQuery { terms_per_field };
let term_set_query = query_ast::TermSetQuery {
terms_per_field,
lenient,
};
Ok(term_set_query.into())
}
UserInputLeaf::Exists { field } => Ok(FieldPresenceQuery { field }.into()),
Expand Down
Loading

0 comments on commit 5391af8

Please sign in to comment.