Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ES API compatible date time parsers #5461

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions quickwit/quickwit-datetime/src/date_time_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ pub enum DateTimeInputFormat {
Rfc3339,
Strptime(StrptimeParser),
Timestamp,
ESStrictDateOptionalTime,
ESStrictDateTimeNoMillis,
}

impl DateTimeInputFormat {
Expand All @@ -172,6 +174,8 @@ impl DateTimeInputFormat {
DateTimeInputFormat::Rfc3339 => "rfc3339",
DateTimeInputFormat::Strptime(parser) => parser.borrow_strptime_format(),
DateTimeInputFormat::Timestamp => "unix_timestamp",
DateTimeInputFormat::ESStrictDateOptionalTime => "strict_date_optional_time",
DateTimeInputFormat::ESStrictDateTimeNoMillis => "strict_date_time_no_millis",
}
}
}
Expand All @@ -191,6 +195,8 @@ impl FromStr for DateTimeInputFormat {
"rfc2822" => DateTimeInputFormat::Rfc2822,
"rfc3339" => DateTimeInputFormat::Rfc3339,
"unix_timestamp" => DateTimeInputFormat::Timestamp,
"strict_date_optional_time" => DateTimeInputFormat::ESStrictDateOptionalTime,
"strict_date_time_no_millis" => DateTimeInputFormat::ESStrictDateTimeNoMillis,
_ => {
if !is_strftime_formatting(date_time_format_str) {
return Err(format!(
Expand Down
180 changes: 179 additions & 1 deletion quickwit/quickwit-datetime/src/date_time_parsing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ use std::time::Duration;

use itertools::Itertools;
use time::format_description::well_known::{Iso8601, Rfc2822, Rfc3339};
use time::OffsetDateTime;
use time::format_description::FormatItem;
use time::macros::format_description;
use time::{Date, Month, OffsetDateTime, PrimitiveDateTime, Time};

use super::date_time_format::DateTimeInputFormat;
use crate::TantivyDateTime;
Expand Down Expand Up @@ -52,6 +54,16 @@ pub fn parse_date_time_str(
.map(TantivyDateTime::from_utc)
.ok(),
DateTimeInputFormat::Timestamp => parse_timestamp_str(date_time_str),
DateTimeInputFormat::ESStrictDateTimeNoMillis => {
parse_es_strict_date_time_no_millis(date_time_str)
.map(TantivyDateTime::from_utc)
.ok()
}
DateTimeInputFormat::ESStrictDateOptionalTime => {
parse_es_strict_date_optional_time(date_time_str)
.map(TantivyDateTime::from_utc)
.ok()
}
};
if let Some(date_time) = date_time_opt {
return Ok(date_time);
Expand Down Expand Up @@ -141,6 +153,99 @@ fn parse_rfc3339(value: &str) -> Result<OffsetDateTime, String> {
OffsetDateTime::parse(value, &Rfc3339).map_err(|error| error.to_string())
}

/// Parses a date in the ElasticSearch strict_date_time_no_millis format (yyyy-MM-dd'T'HH:mm:ssZ).
fn parse_es_strict_date_time_no_millis(value: &str) -> Result<OffsetDateTime, String> {
static FORMAT_WITH_OFFSET: &[FormatItem<'_>] = format_description!("[year]-[month]-[day]T[hour]:[minute]:[second][offset_hour sign:mandatory][optional [:[offset_minute]]]");
static FORMAT_UTC: &[FormatItem<'_>] =
format_description!("[year]-[month]-[day]T[hour]:[minute]:[second]Z");

if value.ends_with('Z') {
PrimitiveDateTime::parse(value, FORMAT_UTC)
.map(|pdt| pdt.assume_utc())
.map_err(|error| error.to_string())
} else {
OffsetDateTime::parse(value, FORMAT_WITH_OFFSET).map_err(|error| error.to_string())
}
}

/// Parses a date in the ElasticSearch strict_date_optional_time format.
/// This format allows for a flexible date-time string where only the year is required.
fn parse_es_strict_date_optional_time(value: &str) -> Result<OffsetDateTime, String> {
// Define format descriptions for various possible inputs

// Formats with 'Z' at the end (indicating UTC)
static FORMAT_DATE_TIME_MILLIS_Z: &[FormatItem<'_>] =
format_description!("[year]-[month]-[day]T[hour]:[minute]:[second].[subsecond]Z");
static FORMAT_DATE_TIME_SECONDS_Z: &[FormatItem<'_>] =
format_description!("[year]-[month]-[day]T[hour]:[minute]:[second]Z");

// Formats with time zone at the end
static FORMAT_DATE_TIME_MILLIS_ZONE: &[FormatItem<'_>] = format_description!(
"[year]-[month]-[day]T[hour]:[minute]:[second].[subsecond][offset_hour sign:mandatory][optional [:[offset_minute]]]"
);
static FORMAT_DATE_TIME_SECONDS_ZONE: &[FormatItem<'_>] = format_description!(
"[year]-[month]-[day]T[hour]:[minute]:[second][offset_hour sign:mandatory][optional [:[offset_minute]]]"
);

// Formats without 'Z'
static FORMAT_DATE_TIME_MILLIS: &[FormatItem<'_>] =
format_description!("[year]-[month]-[day]T[hour]:[minute]:[second].[subsecond]");
static FORMAT_DATE_TIME_SECONDS: &[FormatItem<'_>] =
format_description!("[year]-[month]-[day]T[hour]:[minute]:[second]");
static FORMAT_DATE_TIME: &[FormatItem<'_>] =
format_description!("[year]-[month]-[day]T[hour]:[minute]");

// Date-only format
static FORMAT_DATE: &[FormatItem<'_>] = format_description!("[year]-[month]-[day]");

// Try parsing with different formats, from most specific to least specific
let result = OffsetDateTime::parse(value, FORMAT_DATE_TIME_MILLIS_ZONE)
.or_else(|_| OffsetDateTime::parse(value, FORMAT_DATE_TIME_SECONDS_ZONE))
.or_else(|_| {
PrimitiveDateTime::parse(value, FORMAT_DATE_TIME_MILLIS_Z).map(|dt| dt.assume_utc())
})
.or_else(|_| {
PrimitiveDateTime::parse(value, FORMAT_DATE_TIME_SECONDS_Z).map(|dt| dt.assume_utc())
})
.or_else(|_| {
PrimitiveDateTime::parse(value, FORMAT_DATE_TIME_MILLIS).map(|dt| dt.assume_utc())
})
.or_else(|_| {
PrimitiveDateTime::parse(value, FORMAT_DATE_TIME_SECONDS).map(|dt| dt.assume_utc())
})
.or_else(|_| PrimitiveDateTime::parse(value, FORMAT_DATE_TIME).map(|dt| dt.assume_utc()))
.or_else(|_| {
// Handle date-only string
Date::parse(value, FORMAT_DATE).map(|date| date.with_time(Time::MIDNIGHT).assume_utc())
})
.or_else(|_| {
// Handle 'year-month' and 'year' cases
if let Some((year_str, month_str)) = value.split_once('-') {
if let (Ok(year), Ok(month)) = (year_str.parse::<i32>(), month_str.parse::<u8>()) {
if let Ok(month_enum) = Month::try_from(month) {
let date = Date::from_calendar_date(year, month_enum, 1)
.map_err(|e| e.to_string())?;
let dt = date.with_time(Time::MIDNIGHT).assume_utc();
return Ok(dt);
}
}
} else if let Ok(year) = value.parse::<i32>() {
let date =
Date::from_calendar_date(year, Month::January, 1).map_err(|e| e.to_string())?;
let dt = date.with_time(Time::MIDNIGHT).assume_utc();
return Ok(dt);
}
Err("Failed to parse date".to_string())
});

result.map_err(|_| {
format!(
"Failed to parse date string '{}' as strict_date_optional_time",
value
)
})
}

/// Returns the appropriate [`TantivyDateTime`] for the specified Unix timestamp.
///
/// This function will choose the timestamp precision based on the value range.
Expand Down Expand Up @@ -568,4 +673,77 @@ mod tests {
assert_eq!(date_time.into_timestamp_micros(), max_ts_micros);
}
}

#[test]
fn test_parse_es_strict_date_time_no_millis() {
let test_cases = [
(
"2019-03-23T21:34:46-04:00",
datetime!(2019-03-24 01:34:46 UTC),
),
(
"2019-03-23T21:34:46+05:30",
datetime!(2019-03-23 16:04:46 UTC),
),
("2019-03-23T21:34:46+01", datetime!(2019-03-23 20:34:46 UTC)),
(
"2019-03-23T21:34:46+00:00",
datetime!(2019-03-23 21:34:46 UTC),
),
("2019-03-23T21:34:46Z", datetime!(2019-03-23 21:34:46 UTC)),
];

for (input, expected) in test_cases {
let date_time =
parse_date_time_str(input, &[DateTimeInputFormat::ESStrictDateTimeNoMillis])
.unwrap_or_else(|e| panic!("Failed to parse {}: {}", input, e));

assert_eq!(
date_time.into_timestamp_secs(),
expected.unix_timestamp(),
"Failed for input: {}",
input
);
}
}
#[test]
fn test_parse_es_strict_date_optional_time() {
let test_cases = [
("2019", datetime!(2019-01-01 00:00:00 UTC)),
("2019-03", datetime!(2019-03-01 00:00:00 UTC)),
("2019-03-23", datetime!(2019-03-23 00:00:00 UTC)),
("2019-03-23T21:34", datetime!(2019-03-23 21:34:00 UTC)),
("2019-03-23T21:34:46", datetime!(2019-03-23 21:34:46 UTC)),
("2019-03-23T21:34:46Z", datetime!(2019-03-23 21:34:46 UTC)),
(
"2019-03-23T21:34:46+01:00",
datetime!(2019-03-23 20:34:46 UTC),
),
(
"2019-03-23T21:34:46.123",
datetime!(2019-03-23 21:34:46.123 UTC),
),
(
"2019-03-23T21:34:46.123Z",
datetime!(2019-03-23 21:34:46.123 UTC),
),
(
"2019-03-23T21:34:46.123+01:00",
datetime!(2019-03-23 20:34:46.123 UTC),
),
];

for (input, expected) in test_cases {
let date_time =
parse_date_time_str(input, &[DateTimeInputFormat::ESStrictDateOptionalTime])
.unwrap_or_else(|e| panic!("Failed to parse {}: {}", input, e));

assert_eq!(
date_time.into_timestamp_nanos(),
expected.unix_timestamp_nanos() as i64,
"Failed for input: {}",
input
);
}
}
}
65 changes: 53 additions & 12 deletions quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
use std::ops::Bound;
use std::str::FromStr;

use quickwit_datetime::StrptimeParser;
use quickwit_datetime::DateTimeInputFormat;
use serde::Deserialize;

use crate::elastic_query_dsl::one_field_map::OneFieldMap;
Expand Down Expand Up @@ -60,14 +60,18 @@ impl ConvertibleToQueryAst for RangeQuery {
format,
} = self.value;
let (gt, gte, lt, lte) = if let Some(JsonLiteral::String(fmt)) = format {
let parser = StrptimeParser::from_str(&fmt).map_err(|reason| {
let date_time_format = DateTimeInputFormat::from_str(&fmt).map_err(|reason| {
anyhow::anyhow!("failed to create parser from : {}; reason: {}", fmt, reason)
})?;
(
gt.map(|v| parse_and_convert(v, &parser)).transpose()?,
gte.map(|v| parse_and_convert(v, &parser)).transpose()?,
lt.map(|v| parse_and_convert(v, &parser)).transpose()?,
lte.map(|v| parse_and_convert(v, &parser)).transpose()?,
gt.map(|v| parse_and_convert(v, &date_time_format))
.transpose()?,
gte.map(|v| parse_and_convert(v, &date_time_format))
.transpose()?,
lt.map(|v| parse_and_convert(v, &date_time_format))
.transpose()?,
lte.map(|v| parse_and_convert(v, &date_time_format))
.transpose()?,
)
} else {
(gt, gte, lt, lte)
Expand Down Expand Up @@ -97,11 +101,18 @@ impl ConvertibleToQueryAst for RangeQuery {
}
}

fn parse_and_convert(literal: JsonLiteral, parser: &StrptimeParser) -> anyhow::Result<JsonLiteral> {
fn parse_and_convert(
literal: JsonLiteral,
date_time_format: &DateTimeInputFormat,
) -> anyhow::Result<JsonLiteral> {
if let JsonLiteral::String(date_time_str) = literal {
let parsed_date_time = parser
.parse_date_time(&date_time_str)
.map_err(|reason| anyhow::anyhow!("Failed to parse date time: {}", reason))?;
let parsed_date_time = quickwit_datetime::parse_date_time_str(
date_time_str.as_str(),
&[date_time_format.clone()],
)
.map_err(|reason| anyhow::anyhow!("Failed to parse date time: {}", reason))?
.into_utc();

Ok(JsonLiteral::String(parsed_date_time.to_string()))
} else {
Ok(literal)
Expand All @@ -112,14 +123,14 @@ fn parse_and_convert(literal: JsonLiteral, parser: &StrptimeParser) -> anyhow::R
mod tests {
use std::str::FromStr;

use quickwit_datetime::StrptimeParser;
use quickwit_datetime::DateTimeInputFormat;

use crate::elastic_query_dsl::range_query::parse_and_convert;
use crate::JsonLiteral;

#[test]
fn test_parse_and_convert() -> anyhow::Result<()> {
let parser = StrptimeParser::from_str("%Y-%m-%d %H:%M:%S").unwrap();
let parser = DateTimeInputFormat::from_str("%Y-%m-%d %H:%M:%S").unwrap();

// valid datetime
let input = JsonLiteral::String("2022-12-30 05:45:00".to_string());
Expand All @@ -145,4 +156,34 @@ mod tests {

Ok(())
}

#[test]
fn test_named_iso8601_parse() -> anyhow::Result<()> {
let parser = DateTimeInputFormat::from_str("iso8601").unwrap();

// valid datetime
let input = JsonLiteral::String("2015-06-26T16:43:23+0200".to_string());
let result = parse_and_convert(input, &parser)?;
assert_eq!(
result,
JsonLiteral::String("2015-06-26 14:43:23.0 +00:00:00".to_string())
);

Ok(())
}

#[test]
fn test_named_strict_date_optional_time_parse() -> anyhow::Result<()> {
let parser = DateTimeInputFormat::from_str("strict_date_optional_time").unwrap();

// valid datetime
let input = JsonLiteral::String("2015-06-26T16:43:23Z".to_string());
let result = parse_and_convert(input, &parser)?;
assert_eq!(
result,
JsonLiteral::String("2015-06-26 16:43:23.0 +00:00:00".to_string())
);

Ok(())
}
}
Loading