Skip to content

Commit

Permalink
Use TantivyFastFieldRangeQuery for datetime range query. (#4116)
Browse files Browse the repository at this point in the history
* Use TantivyFastFieldRangeQuery for datetime range query.

* Add/fix tests.
  • Loading branch information
fmassot authored Nov 11, 2023
1 parent 3f449ee commit 1d6753b
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 20 deletions.
74 changes: 60 additions & 14 deletions quickwit/quickwit-doc-mapper/src/query_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,16 +256,19 @@ fn extract_prefix_term_ranges(

#[cfg(test)]
mod test {
use quickwit_datetime::{parse_date_time_str, DateTimeInputFormat};
use quickwit_query::create_default_quickwit_tokenizer_manager;
use quickwit_query::query_ast::query_ast_from_user_text;
use tantivy::columnar::MonotonicallyMappableToU64;
use tantivy::schema::{Schema, FAST, INDEXED, STORED, TEXT};
use tantivy::{DateOptions, DateTime, DateTimePrecision};

use super::build_query;
use crate::{DYNAMIC_FIELD_NAME, SOURCE_FIELD_NAME};

enum TestExpectation {
Err(&'static str),
Ok(&'static str),
enum TestExpectation<'a> {
Err(&'a str),
Ok(&'a str),
}

fn make_schema(dynamic_mode: bool) -> Schema {
Expand All @@ -279,7 +282,10 @@ mod test {
schema_builder.add_ip_addr_field("ip", FAST | STORED);
schema_builder.add_ip_addr_field("ips", FAST);
schema_builder.add_ip_addr_field("ip_notff", STORED);
schema_builder.add_date_field("dt", FAST);
let date_options = DateOptions::default()
.set_fast()
.set_precision(tantivy::DateTimePrecision::Milliseconds);
schema_builder.add_date_field("dt", date_options);
schema_builder.add_u64_field("u64_fast", FAST | STORED);
schema_builder.add_i64_field("i64_fast", FAST | STORED);
schema_builder.add_f64_field("f64_fast", FAST | STORED);
Expand Down Expand Up @@ -477,16 +483,56 @@ mod test {

#[test]
fn test_datetime_range_query() {
check_build_query_static_mode(
"dt:[2023-01-10T15:13:35Z TO 2023-01-10T15:13:40Z]",
Vec::new(),
TestExpectation::Ok("RangeQuery { field: \"dt\", value_type: Date"),
);
check_build_query_static_mode(
"dt:<2023-01-10T15:13:35Z",
Vec::new(),
TestExpectation::Ok("RangeQuery { field: \"dt\", value_type: Date"),
);
let input_formats = [DateTimeInputFormat::Rfc3339];
{
// Check range on datetime in millisecond, precision has no impact as it is in
// milliseconds.
let start_date_time_str = "2023-01-10T08:38:51.150Z";
let start_date_time = parse_date_time_str(start_date_time_str, &input_formats).unwrap();
let start_date_time_u64 = start_date_time.to_u64();
let end_date_time_str = "2023-01-10T08:38:51.160Z";
let end_date_time: DateTime =
parse_date_time_str(end_date_time_str, &input_formats).unwrap();
let end_date_time_u64 = end_date_time.to_u64();
let expectation_with_lower_and_upper_bounds = format!(
r#"FastFieldRangeWeight {{ field: "dt", lower_bound: Included({start_date_time_u64}), upper_bound: Included({end_date_time_u64}), column_type_opt: Some(DateTime) }}"#,
);
check_build_query_static_mode(
&format!("dt:[{start_date_time_str} TO {end_date_time_str}]"),
Vec::new(),
TestExpectation::Ok(&expectation_with_lower_and_upper_bounds),
);
let expectation_with_upper_bound = format!(
r#"FastFieldRangeWeight {{ field: "dt", lower_bound: Unbounded, upper_bound: Excluded({end_date_time_u64}), column_type_opt: Some(DateTime) }}"#,
);
check_build_query_static_mode(
&format!("dt:<{end_date_time_str}"),
Vec::new(),
TestExpectation::Ok(&expectation_with_upper_bound),
);
}

// Check range on datetime in microseconds and truncation to milliseconds.
{
let start_date_time_str = "2023-01-10T08:38:51.000150Z";
let start_date_time = parse_date_time_str(start_date_time_str, &input_formats)
.unwrap()
.truncate(DateTimePrecision::Milliseconds);
let start_date_time_u64 = start_date_time.to_u64();
let end_date_time_str = "2023-01-10T08:38:51.000151Z";
let end_date_time: DateTime = parse_date_time_str(end_date_time_str, &input_formats)
.unwrap()
.truncate(DateTimePrecision::Milliseconds);
let end_date_time_u64 = end_date_time.to_u64();
let expectation_with_lower_and_upper_bounds = format!(
r#"FastFieldRangeWeight {{ field: "dt", lower_bound: Included({start_date_time_u64}), upper_bound: Included({end_date_time_u64}), column_type_opt: Some(DateTime) }}"#,
);
check_build_query_static_mode(
&format!("dt:[{start_date_time_str} TO {end_date_time_str}]"),
Vec::new(),
TestExpectation::Ok(&expectation_with_lower_and_upper_bounds),
);
}
}

#[test]
Expand Down
45 changes: 39 additions & 6 deletions quickwit/quickwit-query/src/query_ast/range_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use tantivy::query::{
FastFieldRangeWeight as TantivyFastFieldRangeQuery, RangeQuery as TantivyRangeQuery,
};
use tantivy::schema::Schema as TantivySchema;
use tantivy::DateTime;

use super::QueryAst;
use crate::json_literal::InterpretUserInput;
Expand Down Expand Up @@ -266,11 +267,19 @@ impl BuildTantivyAst for RangeQuery {
field_name: field_entry.name().to_string(),
});
}
tantivy::schema::FieldType::Date(_) => {
tantivy::schema::FieldType::Date(date_options) => {
let (lower_bound, upper_bound) =
convert_bounds(&self.lower_bound, &self.upper_bound, field_entry.name())?;
TantivyRangeQuery::new_date_bounds(self.field.clone(), lower_bound, upper_bound)
.into()
let truncate_datetime =
|date: &DateTime| date.truncate(date_options.get_precision());
let truncated_lower_bound = map_bound(&lower_bound, truncate_datetime);
let truncated_upper_bound = map_bound(&upper_bound, truncate_datetime);
TantivyFastFieldRangeQuery::new::<DateTime>(
self.field.clone(),
truncated_lower_bound,
truncated_upper_bound,
)
.into()
}
tantivy::schema::FieldType::Facet(_) => {
return Err(InvalidQuery::RangeQueryNotSupportedForField {
Expand Down Expand Up @@ -333,11 +342,20 @@ impl BuildTantivyAst for RangeQuery {
}
}

fn map_bound<TFrom, TTo>(bound: &Bound<TFrom>, transform: impl Fn(&TFrom) -> TTo) -> Bound<TTo> {
match bound {
Bound::Excluded(ref from_val) => Bound::Excluded(transform(from_val)),
Bound::Included(ref from_val) => Bound::Included(transform(from_val)),
Bound::Unbounded => Bound::Unbounded,
}
}

#[cfg(test)]
mod tests {
use std::ops::Bound;

use tantivy::schema::{Schema, FAST, STORED, TEXT};
use tantivy::DateOptions;

use super::RangeQuery;
use crate::query_ast::tantivy_query_ast::TantivyBoolQuery;
Expand All @@ -352,19 +370,28 @@ mod tests {
schema_builder.add_u64_field("my_u64_field", FAST);
schema_builder.add_f64_field("my_f64_field", FAST);
schema_builder.add_text_field("my_str_field", FAST);
let date_options = DateOptions::default()
.set_fast()
.set_precision(tantivy::DateTimePrecision::Milliseconds);
schema_builder.add_date_field("my_date_field", date_options);
schema_builder.add_u64_field("my_u64_not_fastfield", STORED);
if dynamic_mode {
schema_builder.add_json_field("_dynamic", TEXT | STORED | FAST);
}
schema_builder.build()
}

fn test_range_query_typed_field_util(field: &str, expected: &str) {
fn test_range_query_typed_field_util(
field: &str,
lower_value: JsonLiteral,
upper_value: JsonLiteral,
expected: &str,
) {
let schema = make_schema(false);
let range_query = RangeQuery {
field: field.to_string(),
lower_bound: Bound::Included(JsonLiteral::String("1980".to_string())),
upper_bound: Bound::Included(JsonLiteral::String("1989".to_string())),
lower_bound: Bound::Included(lower_value),
upper_bound: Bound::Included(upper_value),
};
let tantivy_ast = range_query
.build_tantivy_ast_call(
Expand All @@ -384,17 +411,23 @@ mod tests {
fn test_range_query_typed_field() {
test_range_query_typed_field_util(
"my_i64_field",
JsonLiteral::String("1980".to_string()),
JsonLiteral::String("1989".to_string()),
"FastFieldRangeWeight { field: \"my_i64_field\", lower_bound: \
Included(9223372036854777788), upper_bound: Included(9223372036854777797), \
column_type_opt: Some(I64) }",
);
test_range_query_typed_field_util(
"my_u64_field",
JsonLiteral::String("1980".to_string()),
JsonLiteral::String("1989".to_string()),
"FastFieldRangeWeight { field: \"my_u64_field\", lower_bound: Included(1980), \
upper_bound: Included(1989), column_type_opt: Some(U64) }",
);
test_range_query_typed_field_util(
"my_f64_field",
JsonLiteral::String("1980".to_string()),
JsonLiteral::String("1989".to_string()),
"FastFieldRangeWeight { field: \"my_f64_field\", lower_bound: \
Included(13879794984393113600), upper_bound: Included(13879834566811713536), \
column_type_opt: Some(F64) }",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,31 @@ expected:
total:
value: 86
relation: "eq"
---
# Timestamp field with milliseconds precision 2015-02-01T00:00:00.001
json:
query:
range:
created_at:
gte: "2015-02-01T00:00:00.001Z"
lt: "2015-02-01T00:00:00.002Z"
expected:
hits:
total:
value: 1
relation: "eq"
---
# Timestamp field with range in microseconds.
# Datetime will be truncated at milliseconds as
# defined in the doc mapper.
json:
query:
range:
created_at:
gte: "2015-02-01T00:00:00.001999Z"
lte: "2015-02-01T00:00:00.001999Z"
expected:
hits:
total:
value: 1
relation: "eq"
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ json:
- name: created_at
type: datetime
fast: true
fast_precision: milliseconds
dynamic_mapping:
expand_dots: true
tokenizer: default
Expand Down
Binary file not shown.

0 comments on commit 1d6753b

Please sign in to comment.