Skip to content

Commit

Permalink
feat: Support Substrait's IntervalCompound type/literal instead of in…
Browse files Browse the repository at this point in the history
…terval-month-day-nano UDT (#12112)

* feat(substrait): use IntervalCompound instead of interval-month-day-nano UDT

* clippy

* more clippy

* even more clippy

* fix precision exponent

* add a test

* update deprecation version

* update deprecation comments
  • Loading branch information
Blizzara authored Oct 29, 2024
1 parent d764c4a commit 444a673
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 171 deletions.
116 changes: 101 additions & 15 deletions datafusion/substrait/src/logical_plan/consumer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,18 @@ use crate::variation_const::{
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
VIEW_CONTAINER_TYPE_VARIATION_REF,
};
#[allow(deprecated)]
use crate::variation_const::{
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_REF,
INTERVAL_YEAR_MONTH_TYPE_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
TIMESTAMP_SECOND_TYPE_VARIATION_REF,
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
};
use datafusion::arrow::array::{new_empty_array, AsArray};
use datafusion::arrow::temporal_conversions::NANOSECONDS;
use datafusion::common::scalar::ScalarStructBuilder;
use datafusion::dataframe::DataFrame;
use datafusion::logical_expr::expr::InList;
Expand All @@ -71,10 +72,10 @@ use datafusion::{
use std::collections::HashSet;
use std::sync::Arc;
use substrait::proto::exchange_rel::ExchangeKind;
use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
use substrait::proto::expression::literal::user_defined::Val;
use substrait::proto::expression::literal::{
IntervalDayToSecond, IntervalYearToMonth, UserDefined,
interval_day_to_second, IntervalCompound, IntervalDayToSecond, IntervalYearToMonth,
UserDefined,
};
use substrait::proto::expression::subquery::SubqueryType;
use substrait::proto::expression::{FieldReference, Literal, ScalarFunction};
Expand Down Expand Up @@ -1845,9 +1846,14 @@ fn from_substrait_type(
Ok(DataType::Interval(IntervalUnit::YearMonth))
}
r#type::Kind::IntervalDay(_) => Ok(DataType::Interval(IntervalUnit::DayTime)),
r#type::Kind::IntervalCompound(_) => {
Ok(DataType::Interval(IntervalUnit::MonthDayNano))
}
r#type::Kind::UserDefined(u) => {
if let Some(name) = extensions.types.get(&u.type_reference) {
#[allow(deprecated)]
match name.as_ref() {
// Kept for backwards compatibility, producers should use IntervalCompound instead
INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
_ => not_impl_err!(
"Unsupported Substrait user defined type with ref {} and variation {}",
Expand All @@ -1856,18 +1862,17 @@ fn from_substrait_type(
),
}
} else {
// Kept for backwards compatibility, new plans should include the extension instead
#[allow(deprecated)]
match u.type_reference {
// Kept for backwards compatibility, use IntervalYear instead
// Kept for backwards compatibility, producers should use IntervalYear instead
INTERVAL_YEAR_MONTH_TYPE_REF => {
Ok(DataType::Interval(IntervalUnit::YearMonth))
}
// Kept for backwards compatibility, use IntervalDay instead
// Kept for backwards compatibility, producers should use IntervalDay instead
INTERVAL_DAY_TIME_TYPE_REF => {
Ok(DataType::Interval(IntervalUnit::DayTime))
}
// Not supported yet by Substrait
// Kept for backwards compatibility, producers should use IntervalCompound instead
INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
Ok(DataType::Interval(IntervalUnit::MonthDayNano))
}
Expand Down Expand Up @@ -2285,6 +2290,7 @@ fn from_substrait_literal(
subseconds,
precision_mode,
})) => {
use interval_day_to_second::PrecisionMode;
// DF only supports millisecond precision, so for any more granular type we lose precision
let milliseconds = match precision_mode {
Some(PrecisionMode::Microseconds(ms)) => ms / 1000,
Expand All @@ -2309,6 +2315,35 @@ fn from_substrait_literal(
Some(LiteralType::IntervalYearToMonth(IntervalYearToMonth { years, months })) => {
ScalarValue::new_interval_ym(*years, *months)
}
Some(LiteralType::IntervalCompound(IntervalCompound {
interval_year_to_month,
interval_day_to_second,
})) => match (interval_year_to_month, interval_day_to_second) {
(
Some(IntervalYearToMonth { years, months }),
Some(IntervalDayToSecond {
days,
seconds,
subseconds,
precision_mode:
Some(interval_day_to_second::PrecisionMode::Precision(p)),
}),
) => {
if *p < 0 || *p > 9 {
return plan_err!(
"Unsupported Substrait interval day to second precision: {}",
p
);
}
let nanos = *subseconds * i64::pow(10, (9 - p) as u32);
ScalarValue::new_interval_mdn(
*years * 12 + months,
*days,
*seconds as i64 * NANOSECONDS + nanos,
)
}
_ => return plan_err!("Substrait compound interval missing components"),
},
Some(LiteralType::FixedChar(c)) => ScalarValue::Utf8(Some(c.clone())),
Some(LiteralType::UserDefined(user_defined)) => {
// Helper function to prevent duplicating this code - can be inlined once the non-extension path is removed
Expand Down Expand Up @@ -2339,6 +2374,8 @@ fn from_substrait_literal(

if let Some(name) = extensions.types.get(&user_defined.type_reference) {
match name.as_ref() {
// Kept for backwards compatibility - producers should use IntervalCompound instead
#[allow(deprecated)]
INTERVAL_MONTH_DAY_NANO_TYPE_NAME => {
interval_month_day_nano(user_defined)?
}
Expand All @@ -2351,10 +2388,9 @@ fn from_substrait_literal(
}
}
} else {
// Kept for backwards compatibility - new plans should include extension instead
#[allow(deprecated)]
match user_defined.type_reference {
// Kept for backwards compatibility, use IntervalYearToMonth instead
// Kept for backwards compatibility, producers should useIntervalYearToMonth instead
INTERVAL_YEAR_MONTH_TYPE_REF => {
let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
return substrait_err!("Interval year month value is empty");
Expand All @@ -2369,7 +2405,7 @@ fn from_substrait_literal(
value_slice,
)))
}
// Kept for backwards compatibility, use IntervalDayToSecond instead
// Kept for backwards compatibility, producers should useIntervalDayToSecond instead
INTERVAL_DAY_TIME_TYPE_REF => {
let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
return substrait_err!("Interval day time value is empty");
Expand All @@ -2389,6 +2425,7 @@ fn from_substrait_literal(
milliseconds,
}))
}
// Kept for backwards compatibility, producers should useIntervalCompound instead
INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
interval_month_day_nano(user_defined)?
}
Expand Down Expand Up @@ -2738,3 +2775,52 @@ impl BuiltinExprBuilder {
}))
}
}

#[cfg(test)]
mod test {
use crate::extensions::Extensions;
use crate::logical_plan::consumer::from_substrait_literal_without_names;
use arrow_buffer::IntervalMonthDayNano;
use datafusion::error::Result;
use datafusion::scalar::ScalarValue;
use substrait::proto::expression::literal::{
interval_day_to_second, IntervalCompound, IntervalDayToSecond,
IntervalYearToMonth, LiteralType,
};
use substrait::proto::expression::Literal;

#[test]
fn interval_compound_different_precision() -> Result<()> {
// DF producer (and thus roundtrip) always uses precision = 9,
// this test exists to test with some other value.
let substrait = Literal {
nullable: false,
type_variation_reference: 0,
literal_type: Some(LiteralType::IntervalCompound(IntervalCompound {
interval_year_to_month: Some(IntervalYearToMonth {
years: 1,
months: 2,
}),
interval_day_to_second: Some(IntervalDayToSecond {
days: 3,
seconds: 4,
subseconds: 5,
precision_mode: Some(
interval_day_to_second::PrecisionMode::Precision(6),
),
}),
})),
};

assert_eq!(
from_substrait_literal_without_names(&substrait, &Extensions::default())?,
ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
months: 14,
days: 3,
nanoseconds: 4_000_005_000
}))
);

Ok(())
}
}
Loading

0 comments on commit 444a673

Please sign in to comment.