From 81bae66063d5da59db9561ca3131489405c8f9df Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Fri, 3 Jan 2025 17:24:10 +0100 Subject: [PATCH] Remove support for 2-digit years in java datetime parser (#5596) --- .../src/java_date_time_format.rs | 104 ++++++------------ 1 file changed, 31 insertions(+), 73 deletions(-) diff --git a/quickwit/quickwit-datetime/src/java_date_time_format.rs b/quickwit/quickwit-datetime/src/java_date_time_format.rs index 1cc035c90f3..1a13bbdc0dc 100644 --- a/quickwit/quickwit-datetime/src/java_date_time_format.rs +++ b/quickwit/quickwit-datetime/src/java_date_time_format.rs @@ -36,7 +36,6 @@ use crate::date_time_format; const JAVA_DATE_FORMAT_TOKENS: &[&str] = &[ "yyyy", "xxxx", - "xx[xx]", "SSSSSSSSS", // For nanoseconds "SSSSSSS", // For microseconds "SSSSSS", // For fractional seconds up to six digits @@ -45,10 +44,8 @@ const JAVA_DATE_FORMAT_TOKENS: &[&str] = &[ "SSS", "SS", "ZZ", - "xx", "ww", "w[w]", - "yy", "MM", "dd", "HH", @@ -112,29 +109,12 @@ fn build_zone_offset(_: &str) -> Option { )) } -fn build_year_item(ptn: &str) -> Option { - let mut full_year = Year::default(); - full_year.repr = YearRepr::Full; - let full_year_component = OwnedFormatItem::Component(Component::Year(full_year)); - - let mut short_year = Year::default(); - short_year.repr = YearRepr::LastTwo; - let short_year_component = OwnedFormatItem::Component(Component::Year(short_year)); - - if ptn.len() == 4 { - Some(full_year_component) - } else if ptn.len() == 2 { - Some(short_year_component) - } else { - Some(OwnedFormatItem::First( - vec![full_year_component, short_year_component].into_boxed_slice(), - )) - } -} - -fn build_week_based_year_item(ptn: &str) -> Option { - // TODO no `Component` for that - build_year_item(ptn) +// There is a `YearRepr::LastTwo` representation in the time crate, but the parser is unreliable, so +// we only support `YearRepr::Full` for now. See also https://github.com/time-rs/time/issues/649. +const fn year_item() -> Option { + let mut year_component = Year::default(); + year_component.repr = YearRepr::Full; + Some(OwnedFormatItem::Component(Component::Year(year_component))) } fn build_month_item(ptn: &str) -> Option { @@ -256,8 +236,7 @@ fn match_java_date_format_token( } let format_item = match *token { - "yyyy" | "yy" => build_year_item(token), - "xxxx" | "xx[xx]" | "xx" => build_week_based_year_item(token), + "yyyy" | "xxxx" => year_item(), "MM" | "M" => build_month_item(token), "dd" | "d" => build_day_item(token), "HH" | "H" => build_hour_item(token), @@ -269,7 +248,7 @@ fn match_java_date_format_token( "Z" => build_zone_offset(token), "ww" | "w[w]" | "w" => build_week_of_year_item(token), "e" => build_day_of_week_item(token), - _ => return Err(format!("Unrecognized token '{}'", token)), + _ => return Err(format!("unrecognized token '{token}'")), }; return Ok(format_item); } @@ -299,16 +278,16 @@ fn resolve_java_datetime_format_alias(java_datetime_format: &str) -> &str { m.insert("basic_date", "yyyyMMdd"); m.insert("strict_basic_week_date", "xxxx'W'wwe"); - m.insert("basic_week_date", "xx[xx]'W'wwe"); + m.insert("basic_week_date", "xxxx'W'wwe"); m.insert("strict_basic_week_date_time", "xxxx'W'wwe'T'HHmmss.SSSZ"); - m.insert("basic_week_date_time", "xx[xx]'W'wwe'T'HHmmss.SSSZ"); + m.insert("basic_week_date_time", "xxxx'W'wwe'T'HHmmss.SSSZ"); m.insert( "strict_basic_week_date_time_no_millis", "xxxx'W'wwe'T'HHmmssZ", ); - m.insert("basic_week_date_time_no_millis", "xx[xx]'W'wwe'T'HHmmssZ"); + m.insert("basic_week_date_time_no_millis", "xxxx'W'wwe'T'HHmmssZ"); m.insert("strict_week_date", "xxxx-'W'ww-e"); m.insert("week_date", "xxxx-'W'w[w]-e"); @@ -356,8 +335,8 @@ impl StrptimeParser { .is_empty() { return Err(format!( - "datetime string `{}` does not match strptime format `{}`", - date_time_str, &self.strptime_format + "datetime string `{date_time_str}` does not match strptime format `{}`", + self.strptime_format )); } @@ -590,17 +569,12 @@ mod tests { "2024W313", datetime!(2024-08-01 0:00:00.0 +00:00:00), ); - test_parse_java_datetime_aux( - "basic_week_date", - "24W313", - datetime!(2024-08-01 0:00:00.0 +00:00:00), - ); - // // ❌ 'the 'year' component could not be parsed' - // test_parse_java_datetime_aux( - // "basic_week_date", - // "1W313", - // datetime!(2018-08-02 0:00:00.0 +00:00:00), - // ); + let parser = StrptimeParser::from_java_datetime_format("basic_week_date").unwrap(); + parser.parse_date_time("24W313").unwrap_err(); + + let parser = StrptimeParser::from_java_datetime_format("basic_week_date").unwrap(); + parser.parse_date_time("1W313").unwrap_err(); + test_parse_java_datetime_aux( "basic_week_date_time", "2018W313T121212.1Z", @@ -706,7 +680,7 @@ mod tests { for (date_str, &expected_dt) in dates.iter().zip(expected.iter()) { let parsed_dt = parser .parse_date_time(date_str) - .unwrap_or_else(|e| panic!("Failed to parse {}: {}", date_str, e)); + .unwrap_or_else(|error| panic!("failed to parse {date_str}: {error}")); assert_eq!(parsed_dt, expected_dt); } } @@ -736,18 +710,18 @@ mod tests { for (date_str, &expected_dt) in dates.iter().zip(expected.iter()) { let parsed_dt = parser .parse_date_time(date_str) - .unwrap_or_else(|e| panic!("Failed to parse {}: {}", date_str, e)); + .unwrap_or_else(|error| panic!("failed to parse {date_str}: {error}")); assert_eq!(parsed_dt, expected_dt); } } #[test] fn test_parse_java_datetime_format_items() { - let format_str = "xx[xx]'W'wwe"; + let format_str = "xxxx'W'wwe"; let result = parse_java_datetime_format_items(format_str).unwrap(); // We expect the tokens to be parsed as: - // - 'xx[xx]' (week-based year) with optional length + // - 'xxxx' (week-based year) // - 'W' (literal) // - 'ww' (week of year) // - 'e' (day of week) @@ -756,37 +730,22 @@ mod tests { // Verify each token match &result[0] { - OwnedFormatItem::First(boxed_slice) => { - assert_eq!(boxed_slice.len(), 2); - match (&boxed_slice[0], &boxed_slice[1]) { - ( - OwnedFormatItem::Component(Component::Year(_)), - OwnedFormatItem::Component(Component::Year(_)), - ) => {} - unexpected => { - panic!("Expected two Year components, but found: {:?}", unexpected) - } - } + OwnedFormatItem::Component(Component::Year(year)) => { + assert_eq!(year.repr, YearRepr::Full); } - unexpected => panic!( - "Expected First with two Year components, but found: {:?}", - unexpected - ), + unexpected => panic!("expected Year, but found: {unexpected:?}",), } - match &result[1] { OwnedFormatItem::Literal(lit) => assert_eq!(lit.as_ref(), b"W"), - unexpected => panic!("Expected literal 'W', but found: {:?}", unexpected), + unexpected => panic!("expected literal 'W', but found: {unexpected:?}"), } - match &result[2] { OwnedFormatItem::Component(Component::WeekNumber(_)) => {} - unexpected => panic!("Expected WeekNumber component, but found: {:?}", unexpected), + unexpected => panic!("expected WeekNumber component, but found: {unexpected:?}"), } - match &result[3] { OwnedFormatItem::Component(Component::Weekday(_)) => {} - unexpected => panic!("Expected Weekday component, but found: {:?}", unexpected), + unexpected => panic!("expected Weekday component, but found: {unexpected:?}"), } } @@ -803,15 +762,14 @@ mod tests { for (input, expected) in test_cases.iter() { let result = parser.parse_date_time(input).unwrap(); - assert_eq!(result, *expected, "Failed to parse {}", input); + assert_eq!(result, *expected, "failed to parse {input}"); } // Test error case let error_case = "2023-1430"; assert!( parser.parse_date_time(error_case).is_err(), - "Expected error for input: {}", - error_case + "expected error for input: {error_case}", ); } }