diff --git a/quickwit/quickwit-doc-mapper/src/query_builder.rs b/quickwit/quickwit-doc-mapper/src/query_builder.rs index d2cf5edc3c7..9532313178f 100644 --- a/quickwit/quickwit-doc-mapper/src/query_builder.rs +++ b/quickwit/quickwit-doc-mapper/src/query_builder.rs @@ -256,16 +256,19 @@ fn extract_prefix_term_ranges( #[cfg(test)] mod test { + use quickwit_datetime::{parse_date_time_str, DateTimeInputFormat}; use quickwit_query::create_default_quickwit_tokenizer_manager; use quickwit_query::query_ast::query_ast_from_user_text; + use tantivy::columnar::MonotonicallyMappableToU64; use tantivy::schema::{Schema, FAST, INDEXED, STORED, TEXT}; + use tantivy::{DateOptions, DateTime, DateTimePrecision}; use super::build_query; use crate::{DYNAMIC_FIELD_NAME, SOURCE_FIELD_NAME}; - enum TestExpectation { - Err(&'static str), - Ok(&'static str), + enum TestExpectation<'a> { + Err(&'a str), + Ok(&'a str), } fn make_schema(dynamic_mode: bool) -> Schema { @@ -279,7 +282,10 @@ mod test { schema_builder.add_ip_addr_field("ip", FAST | STORED); schema_builder.add_ip_addr_field("ips", FAST); schema_builder.add_ip_addr_field("ip_notff", STORED); - schema_builder.add_date_field("dt", FAST); + let date_options = DateOptions::default() + .set_fast() + .set_precision(tantivy::DateTimePrecision::Milliseconds); + schema_builder.add_date_field("dt", date_options); schema_builder.add_u64_field("u64_fast", FAST | STORED); schema_builder.add_i64_field("i64_fast", FAST | STORED); schema_builder.add_f64_field("f64_fast", FAST | STORED); @@ -477,16 +483,56 @@ mod test { #[test] fn test_datetime_range_query() { - check_build_query_static_mode( - "dt:[2023-01-10T15:13:35Z TO 2023-01-10T15:13:40Z]", - Vec::new(), - TestExpectation::Ok("RangeQuery { field: \"dt\", value_type: Date"), - ); - check_build_query_static_mode( - "dt:<2023-01-10T15:13:35Z", - Vec::new(), - TestExpectation::Ok("RangeQuery { field: \"dt\", value_type: Date"), - ); + let input_formats = [DateTimeInputFormat::Rfc3339]; + { + // Check range on datetime in millisecond, precision has no impact as it is in + // milliseconds. + let start_date_time_str = "2023-01-10T08:38:51.150Z"; + let start_date_time = parse_date_time_str(start_date_time_str, &input_formats).unwrap(); + let start_date_time_u64 = start_date_time.to_u64(); + let end_date_time_str = "2023-01-10T08:38:51.160Z"; + let end_date_time: DateTime = + parse_date_time_str(end_date_time_str, &input_formats).unwrap(); + let end_date_time_u64 = end_date_time.to_u64(); + let expectation_with_lower_and_upper_bounds = format!( + r#"FastFieldRangeWeight {{ field: "dt", lower_bound: Included({start_date_time_u64}), upper_bound: Included({end_date_time_u64}), column_type_opt: Some(DateTime) }}"#, + ); + check_build_query_static_mode( + &format!("dt:[{start_date_time_str} TO {end_date_time_str}]"), + Vec::new(), + TestExpectation::Ok(&expectation_with_lower_and_upper_bounds), + ); + let expectation_with_upper_bound = format!( + r#"FastFieldRangeWeight {{ field: "dt", lower_bound: Unbounded, upper_bound: Excluded({end_date_time_u64}), column_type_opt: Some(DateTime) }}"#, + ); + check_build_query_static_mode( + &format!("dt:<{end_date_time_str}"), + Vec::new(), + TestExpectation::Ok(&expectation_with_upper_bound), + ); + } + + // Check range on datetime in microseconds and truncation to milliseconds. + { + let start_date_time_str = "2023-01-10T08:38:51.000150Z"; + let start_date_time = parse_date_time_str(start_date_time_str, &input_formats) + .unwrap() + .truncate(DateTimePrecision::Milliseconds); + let start_date_time_u64 = start_date_time.to_u64(); + let end_date_time_str = "2023-01-10T08:38:51.000151Z"; + let end_date_time: DateTime = parse_date_time_str(end_date_time_str, &input_formats) + .unwrap() + .truncate(DateTimePrecision::Milliseconds); + let end_date_time_u64 = end_date_time.to_u64(); + let expectation_with_lower_and_upper_bounds = format!( + r#"FastFieldRangeWeight {{ field: "dt", lower_bound: Included({start_date_time_u64}), upper_bound: Included({end_date_time_u64}), column_type_opt: Some(DateTime) }}"#, + ); + check_build_query_static_mode( + &format!("dt:[{start_date_time_str} TO {end_date_time_str}]"), + Vec::new(), + TestExpectation::Ok(&expectation_with_lower_and_upper_bounds), + ); + } } #[test] diff --git a/quickwit/quickwit-query/src/query_ast/range_query.rs b/quickwit/quickwit-query/src/query_ast/range_query.rs index ef5d654af8a..16ada6047fe 100644 --- a/quickwit/quickwit-query/src/query_ast/range_query.rs +++ b/quickwit/quickwit-query/src/query_ast/range_query.rs @@ -24,6 +24,7 @@ use tantivy::query::{ FastFieldRangeWeight as TantivyFastFieldRangeQuery, RangeQuery as TantivyRangeQuery, }; use tantivy::schema::Schema as TantivySchema; +use tantivy::DateTime; use super::QueryAst; use crate::json_literal::InterpretUserInput; @@ -266,11 +267,19 @@ impl BuildTantivyAst for RangeQuery { field_name: field_entry.name().to_string(), }); } - tantivy::schema::FieldType::Date(_) => { + tantivy::schema::FieldType::Date(date_options) => { let (lower_bound, upper_bound) = convert_bounds(&self.lower_bound, &self.upper_bound, field_entry.name())?; - TantivyRangeQuery::new_date_bounds(self.field.clone(), lower_bound, upper_bound) - .into() + let truncate_datetime = + |date: &DateTime| date.truncate(date_options.get_precision()); + let truncated_lower_bound = map_bound(&lower_bound, truncate_datetime); + let truncated_upper_bound = map_bound(&upper_bound, truncate_datetime); + TantivyFastFieldRangeQuery::new::( + self.field.clone(), + truncated_lower_bound, + truncated_upper_bound, + ) + .into() } tantivy::schema::FieldType::Facet(_) => { return Err(InvalidQuery::RangeQueryNotSupportedForField { @@ -333,11 +342,20 @@ impl BuildTantivyAst for RangeQuery { } } +fn map_bound(bound: &Bound, transform: impl Fn(&TFrom) -> TTo) -> Bound { + match bound { + Bound::Excluded(ref from_val) => Bound::Excluded(transform(from_val)), + Bound::Included(ref from_val) => Bound::Included(transform(from_val)), + Bound::Unbounded => Bound::Unbounded, + } +} + #[cfg(test)] mod tests { use std::ops::Bound; use tantivy::schema::{Schema, FAST, STORED, TEXT}; + use tantivy::DateOptions; use super::RangeQuery; use crate::query_ast::tantivy_query_ast::TantivyBoolQuery; @@ -352,6 +370,10 @@ mod tests { schema_builder.add_u64_field("my_u64_field", FAST); schema_builder.add_f64_field("my_f64_field", FAST); schema_builder.add_text_field("my_str_field", FAST); + let date_options = DateOptions::default() + .set_fast() + .set_precision(tantivy::DateTimePrecision::Milliseconds); + schema_builder.add_date_field("my_date_field", date_options); schema_builder.add_u64_field("my_u64_not_fastfield", STORED); if dynamic_mode { schema_builder.add_json_field("_dynamic", TEXT | STORED | FAST); @@ -359,12 +381,17 @@ mod tests { schema_builder.build() } - fn test_range_query_typed_field_util(field: &str, expected: &str) { + fn test_range_query_typed_field_util( + field: &str, + lower_value: JsonLiteral, + upper_value: JsonLiteral, + expected: &str, + ) { let schema = make_schema(false); let range_query = RangeQuery { field: field.to_string(), - lower_bound: Bound::Included(JsonLiteral::String("1980".to_string())), - upper_bound: Bound::Included(JsonLiteral::String("1989".to_string())), + lower_bound: Bound::Included(lower_value), + upper_bound: Bound::Included(upper_value), }; let tantivy_ast = range_query .build_tantivy_ast_call( @@ -384,17 +411,23 @@ mod tests { fn test_range_query_typed_field() { test_range_query_typed_field_util( "my_i64_field", + JsonLiteral::String("1980".to_string()), + JsonLiteral::String("1989".to_string()), "FastFieldRangeWeight { field: \"my_i64_field\", lower_bound: \ Included(9223372036854777788), upper_bound: Included(9223372036854777797), \ column_type_opt: Some(I64) }", ); test_range_query_typed_field_util( "my_u64_field", + JsonLiteral::String("1980".to_string()), + JsonLiteral::String("1989".to_string()), "FastFieldRangeWeight { field: \"my_u64_field\", lower_bound: Included(1980), \ upper_bound: Included(1989), column_type_opt: Some(U64) }", ); test_range_query_typed_field_util( "my_f64_field", + JsonLiteral::String("1980".to_string()), + JsonLiteral::String("1989".to_string()), "FastFieldRangeWeight { field: \"my_f64_field\", lower_bound: \ Included(13879794984393113600), upper_bound: Included(13879834566811713536), \ column_type_opt: Some(F64) }", diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0007-range_queries.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0007-range_queries.yaml index a66284f6754..c3c625395c4 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/0007-range_queries.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0007-range_queries.yaml @@ -159,3 +159,31 @@ expected: total: value: 86 relation: "eq" +--- +# Timestamp field with milliseconds precision 2015-02-01T00:00:00.001 +json: + query: + range: + created_at: + gte: "2015-02-01T00:00:00.001Z" + lt: "2015-02-01T00:00:00.002Z" +expected: + hits: + total: + value: 1 + relation: "eq" +--- +# Timestamp field with range in microseconds. +# Datetime will be truncated at milliseconds as +# defined in the doc mapper. +json: + query: + range: + created_at: + gte: "2015-02-01T00:00:00.001999Z" + lte: "2015-02-01T00:00:00.001999Z" +expected: + hits: + total: + value: 1 + relation: "eq" diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml index 24089348d94..5eea75dd277 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml @@ -27,6 +27,7 @@ json: - name: created_at type: datetime fast: true + fast_precision: milliseconds dynamic_mapping: expand_dots: true tokenizer: default diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/gharchive-bulk.json.gz b/quickwit/rest-api-tests/scenarii/es_compatibility/gharchive-bulk.json.gz index 459830d8abb..9b4bb284afc 100644 Binary files a/quickwit/rest-api-tests/scenarii/es_compatibility/gharchive-bulk.json.gz and b/quickwit/rest-api-tests/scenarii/es_compatibility/gharchive-bulk.json.gz differ