From 7bd77477b3aa65d23fc096db140a08617ba2bd55 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Sun, 15 Sep 2024 15:01:11 +0100 Subject: [PATCH] fix interval units parsing (#12448) * fix interval units properly * fix remaining tests :fingers-crossed: * fix logical conflict --------- Co-authored-by: Andrew Lamb --- datafusion/sql/src/expr/value.rs | 59 ++++--------------- datafusion/sql/src/statement.rs | 4 ++ datafusion/sqllogictest/test_files/expr.slt | 6 -- .../sqllogictest/test_files/interval.slt | 41 +++++++++++++ 4 files changed, 55 insertions(+), 55 deletions(-) diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs index 64575645bc44..be0909b58468 100644 --- a/datafusion/sql/src/expr/value.rs +++ b/datafusion/sql/src/expr/value.rs @@ -16,7 +16,9 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use arrow::compute::kernels::cast_utils::parse_interval_month_day_nano; +use arrow::compute::kernels::cast_utils::{ + parse_interval_month_day_nano_config, IntervalParseConfig, IntervalUnit, +}; use arrow::datatypes::DECIMAL128_MAX_PRECISION; use arrow_schema::DataType; use datafusion_common::{ @@ -232,27 +234,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let value = interval_literal(*interval.value, negative)?; - let value = if has_units(&value) { - // If the interval already contains a unit - // `INTERVAL '5 month' rather than `INTERVAL '5' month` - // skip the other unit - value - } else { - // leading_field really means the unit if specified - // for example, "month" in `INTERVAL '5' month` - match interval.leading_field.as_ref() { - Some(leading_field) => { - format!("{value} {leading_field}") - } - None => { - // default to seconds for the units - // `INTERVAL '5' is parsed as '5 seconds' - format!("{value} seconds") - } - } + // leading_field really means the unit if specified + // for example, "month" in `INTERVAL '5' month` + let value = match interval.leading_field.as_ref() { + Some(leading_field) => format!("{value} {leading_field}"), + None => value, }; - let val = parse_interval_month_day_nano(&value)?; + let config = IntervalParseConfig::new(IntervalUnit::Second); + let val = parse_interval_month_day_nano_config(&value, config)?; Ok(lit(ScalarValue::IntervalMonthDayNano(Some(val)))) } } @@ -292,35 +282,6 @@ fn interval_literal(interval_value: SQLExpr, negative: bool) -> Result { } } -// TODO make interval parsing better in arrow-rs / expose `IntervalType` -fn has_units(val: &str) -> bool { - let val = val.to_lowercase(); - val.ends_with("century") - || val.ends_with("centuries") - || val.ends_with("decade") - || val.ends_with("decades") - || val.ends_with("year") - || val.ends_with("years") - || val.ends_with("month") - || val.ends_with("months") - || val.ends_with("week") - || val.ends_with("weeks") - || val.ends_with("day") - || val.ends_with("days") - || val.ends_with("hour") - || val.ends_with("hours") - || val.ends_with("minute") - || val.ends_with("minutes") - || val.ends_with("second") - || val.ends_with("seconds") - || val.ends_with("millisecond") - || val.ends_with("milliseconds") - || val.ends_with("microsecond") - || val.ends_with("microseconds") - || val.ends_with("nanosecond") - || val.ends_with("nanoseconds") -} - /// Try to decode bytes from hex literal string. /// /// None will be returned if the input literal is hex-invalid. diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 34214f711a85..d9719e08052f 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -253,6 +253,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { order_by, partition_by, cluster_by, + clustered_by, options, strict, copy_grants, @@ -346,6 +347,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { if cluster_by.is_some() { return not_impl_err!("Cluster by not supported")?; } + if clustered_by.is_some() { + return not_impl_err!("Clustered by not supported")?; + } if options.is_some() { return not_impl_err!("Options not supported")?; } diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt index a478e3617261..e8d8329d34e1 100644 --- a/datafusion/sqllogictest/test_files/expr.slt +++ b/datafusion/sqllogictest/test_files/expr.slt @@ -229,12 +229,6 @@ SELECT interval '5 day' ---- 5 days -# Hour is ignored, this matches PostgreSQL -query ? -SELECT interval '5 day' hour ----- -5 days - query ? SELECT interval '5 day 4 hours 3 minutes 2 seconds 100 milliseconds' ---- diff --git a/datafusion/sqllogictest/test_files/interval.slt b/datafusion/sqllogictest/test_files/interval.slt index c73e340f9115..db453adf12ba 100644 --- a/datafusion/sqllogictest/test_files/interval.slt +++ b/datafusion/sqllogictest/test_files/interval.slt @@ -486,6 +486,31 @@ select i - d from t; query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types select i - ts from t; +# interval unit abreiviation and plurals +query ? +select interval '1s' +---- +1.000000000 secs + +query ? +select '1s'::interval +---- +1.000000000 secs + +query ? +select interval'1sec' +---- +1.000000000 secs + +query ? +select interval '1ms' +---- +0.001000000 secs + +query ? +select interval '1 y' + interval '1 year' +---- +24 mons # interval (scalar) + date / timestamp (array) query D @@ -502,6 +527,22 @@ select '1 month'::interval + ts from t; 2000-02-01T12:11:10 2000-03-01T00:00:00 +# trailing extra unit, this matches PostgreSQL +query ? +select interval '5 day 1' hour +---- +5 days 1 hours + +# trailing extra unit, this matches PostgreSQL +query ? +select interval '5 day 0' hour +---- +5 days + +# This is interpreted as "0 hours" with PostgreSQL, should be fixed with +query error DataFusion error: Arrow error: Parser error: Invalid input syntax for type interval: "5 day HOUR" +SELECT interval '5 day' hour + # expected error interval (scalar) - date / timestamp (array) query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types select '1 month'::interval - d from t;