From 1f06308037426c46d1a9c4c2d3a8f335c4ad26d1 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Thu, 12 Sep 2024 11:23:59 -0400 Subject: [PATCH] Support timestamps and steps of less than a day for range/generate_series (#12400) * Support timestamps and steps of less than a day for timestamps. * Updated docs for range and generate_series to add additional info wrt timestamp support. * Updates based on code review. * Cleanup error message Co-authored-by: Andrew Lamb --------- Co-authored-by: Andrew Lamb --- datafusion/functions-nested/src/range.rs | 179 ++++++++++++++++-- datafusion/sqllogictest/test_files/array.slt | 156 ++++++++++++++- .../source/user-guide/sql/scalar_functions.md | 17 +- 3 files changed, 322 insertions(+), 30 deletions(-) diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs index 90cf8bcbd057..b3d8010cb668 100644 --- a/datafusion/functions-nested/src/range.rs +++ b/datafusion/functions-nested/src/range.rs @@ -18,19 +18,31 @@ //! [`ScalarUDFImpl`] definitions for range and gen_series functions. use crate::utils::make_scalar_function; -use arrow::array::{Array, ArrayRef, Date32Builder, Int64Array, ListArray, ListBuilder}; +use arrow::array::{Array, ArrayRef, Int64Array, ListArray, ListBuilder}; use arrow::datatypes::{DataType, Field}; -use arrow_array::types::{Date32Type, IntervalMonthDayNanoType}; -use arrow_array::NullArray; +use arrow_array::builder::{Date32Builder, TimestampNanosecondBuilder}; +use arrow_array::temporal_conversions::as_datetime_with_timezone; +use arrow_array::timezone::Tz; +use arrow_array::types::{ + Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType as TSNT, +}; +use arrow_array::{NullArray, TimestampNanosecondArray}; use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer}; use arrow_schema::DataType::*; use arrow_schema::IntervalUnit::MonthDayNano; -use datafusion_common::cast::{as_date32_array, as_int64_array, as_interval_mdn_array}; -use datafusion_common::{exec_err, not_impl_datafusion_err, Result}; +use arrow_schema::TimeUnit::Nanosecond; +use datafusion_common::cast::{ + as_date32_array, as_int64_array, as_interval_mdn_array, as_timestamp_nanosecond_array, +}; +use datafusion_common::{ + exec_datafusion_err, exec_err, internal_err, not_impl_datafusion_err, Result, +}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use itertools::Itertools; use std::any::Any; +use std::cmp::Ordering; use std::iter::from_fn; +use std::str::FromStr; use std::sync::Arc; make_udf_expr_and_func!( @@ -78,7 +90,7 @@ impl ScalarUDFImpl for Range { UInt16 => Ok(Int64), UInt32 => Ok(Int64), UInt64 => Ok(Int64), - Timestamp(_, _) => Ok(Date32), + Timestamp(_, tz) => Ok(Timestamp(Nanosecond, tz.clone())), Date32 => Ok(Date32), Date64 => Ok(Date32), Utf8 => Ok(Date32), @@ -109,8 +121,11 @@ impl ScalarUDFImpl for Range { match args[0].data_type() { Int64 => make_scalar_function(|args| gen_range_inner(args, false))(args), Date32 => make_scalar_function(|args| gen_range_date(args, false))(args), - _ => { - exec_err!("unsupported type for range") + Timestamp(_, _) => { + make_scalar_function(|args| gen_range_timestamp(args, false))(args) + } + dt => { + exec_err!("unsupported type for RANGE. Expected Int64, Date32 or Timestamp, got: {dt}") } } } @@ -152,8 +167,8 @@ impl ScalarUDFImpl for GenSeries { &self.signature } - fn coerce_types(&self, _arg_types: &[DataType]) -> Result> { - _arg_types + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + arg_types .iter() .map(|arg_type| match arg_type { Null => Ok(Null), @@ -165,7 +180,7 @@ impl ScalarUDFImpl for GenSeries { UInt16 => Ok(Int64), UInt32 => Ok(Int64), UInt64 => Ok(Int64), - Timestamp(_, _) => Ok(Date32), + Timestamp(_, tz) => Ok(Timestamp(Nanosecond, tz.clone())), Date32 => Ok(Date32), Date64 => Ok(Date32), Utf8 => Ok(Date32), @@ -196,9 +211,12 @@ impl ScalarUDFImpl for GenSeries { match args[0].data_type() { Int64 => make_scalar_function(|args| gen_range_inner(args, true))(args), Date32 => make_scalar_function(|args| gen_range_date(args, true))(args), + Timestamp(_, _) => { + make_scalar_function(|args| gen_range_timestamp(args, true))(args) + } dt => { exec_err!( - "unsupported type for gen_series. Expected Int64 or Date32, got: {}", + "unsupported type for GENERATE_SERIES. Expected Int64, Date32 or Timestamp, got: {}", dt ) } @@ -334,7 +352,7 @@ fn gen_range_iter( } } -fn gen_range_date(args: &[ArrayRef], include_upper: bool) -> Result { +fn gen_range_date(args: &[ArrayRef], include_upper_bound: bool) -> Result { if args.len() != 3 { return exec_err!("arguments length does not match"); } @@ -372,7 +390,7 @@ fn gen_range_date(args: &[ArrayRef], include_upper: bool) -> Result { } let neg = months < 0 || days < 0; - if !include_upper { + if !include_upper_bound { stop = Date32Type::subtract_month_day_nano(stop, step); } let mut new_date = start; @@ -394,3 +412,136 @@ fn gen_range_date(args: &[ArrayRef], include_upper: bool) -> Result { Ok(arr) } + +fn gen_range_timestamp(args: &[ArrayRef], include_upper_bound: bool) -> Result { + if args.len() != 3 { + return exec_err!( + "Arguments length must be 3 for {}", + if include_upper_bound { + "GENERATE_SERIES" + } else { + "RANGE" + } + ); + } + + // coerce_types fn should coerce all types to Timestamp(Nanosecond, tz) + let (start_arr, start_tz_opt) = cast_timestamp_arg(&args[0], include_upper_bound)?; + let (stop_arr, stop_tz_opt) = cast_timestamp_arg(&args[1], include_upper_bound)?; + let step_arr = as_interval_mdn_array(&args[2])?; + let start_tz = parse_tz(start_tz_opt)?; + let stop_tz = parse_tz(stop_tz_opt)?; + + // values are timestamps + let values_builder = start_tz_opt + .clone() + .map_or_else(TimestampNanosecondBuilder::new, |start_tz_str| { + TimestampNanosecondBuilder::new().with_timezone(start_tz_str) + }); + let mut list_builder = ListBuilder::new(values_builder); + + for idx in 0..start_arr.len() { + if start_arr.is_null(idx) || stop_arr.is_null(idx) || step_arr.is_null(idx) { + list_builder.append_null(); + continue; + } + + let start = start_arr.value(idx); + let stop = stop_arr.value(idx); + let step = step_arr.value(idx); + + let (months, days, ns) = IntervalMonthDayNanoType::to_parts(step); + if months == 0 && days == 0 && ns == 0 { + return exec_err!( + "Interval argument to {} must not be 0", + if include_upper_bound { + "GENERATE_SERIES" + } else { + "RANGE" + } + ); + } + + let neg = TSNT::add_month_day_nano(start, step, start_tz) + .ok_or(exec_datafusion_err!( + "Cannot generate timestamp range where start + step overflows" + ))? + .cmp(&start) + == Ordering::Less; + + let stop_dt = as_datetime_with_timezone::(stop, stop_tz).ok_or( + exec_datafusion_err!( + "Cannot generate timestamp for stop: {}: {:?}", + stop, + stop_tz + ), + )?; + + let mut current = start; + let mut current_dt = as_datetime_with_timezone::(current, start_tz).ok_or( + exec_datafusion_err!( + "Cannot generate timestamp for start: {}: {:?}", + current, + start_tz + ), + )?; + + let values = from_fn(|| { + if (include_upper_bound + && ((neg && current_dt < stop_dt) || (!neg && current_dt > stop_dt))) + || (!include_upper_bound + && ((neg && current_dt <= stop_dt) + || (!neg && current_dt >= stop_dt))) + { + return None; + } + + let prev_current = current; + + if let Some(ts) = TSNT::add_month_day_nano(current, step, start_tz) { + current = ts; + current_dt = as_datetime_with_timezone::(current, start_tz)?; + + Some(Some(prev_current)) + } else { + // we failed to parse the timestamp here so terminate the series + None + } + }); + + list_builder.append_value(values); + } + + let arr = Arc::new(list_builder.finish()); + + Ok(arr) +} + +fn cast_timestamp_arg( + arg: &ArrayRef, + include_upper: bool, +) -> Result<(&TimestampNanosecondArray, &Option>)> { + match arg.data_type() { + Timestamp(Nanosecond, tz_opt) => { + Ok((as_timestamp_nanosecond_array(arg)?, tz_opt)) + } + _ => { + internal_err!( + "Unexpected argument type for {} : {}", + if include_upper { + "GENERATE_SERIES" + } else { + "RANGE" + }, + arg.data_type() + ) + } + } +} + +fn parse_tz(tz: &Option>) -> Result { + let tz = tz.as_ref().map_or_else(|| "+00", |s| s); + + Tz::from_str(tz) + .map_err(|op| exec_datafusion_err!("failed to parse timezone {tz}: {:?}", op)) +} diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index b20b59339b64..f92bcfdf3421 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5881,34 +5881,70 @@ select range(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); ---- NULL +## should return NULL +query ? +select range(TIMESTAMP '1992-09-01', NULL, INTERVAL '1' YEAR); +---- +NULL + query ? select range(DATE '1992-09-01', DATE '1993-03-01', NULL); ---- NULL +query ? +select range(TIMESTAMP '1992-09-01', TIMESTAMP '1993-03-01', NULL); +---- +NULL + query ? select range(NULL, DATE '1993-03-01', INTERVAL '1' YEAR); ---- NULL +query ? +select range(NULL, TIMESTAMP '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + query ? select range(NULL, NULL, NULL); ---- NULL +query ? +select range(NULL::timestamp, NULL::timestamp, NULL); +---- +NULL + query ? select range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR) ---- [] +query ? +select range(TIMESTAMP '1989-04-01', TIMESTAMP '1993-03-01', INTERVAL '-1' YEAR) +---- +[] + query ? select range(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) ---- [] +query ? +select range(TIMESTAMP '1993-03-01', TIMESTAMP '1989-04-01', INTERVAL '1' YEAR) +---- +[] + query error DataFusion error: Execution error: Cannot generate date range less than 1 day\. select range(DATE '1993-03-01', DATE '1993-03-01', INTERVAL '1' HOUR) +query ? +select range(TIMESTAMP '1993-03-01', TIMESTAMP '1993-03-01', INTERVAL '1' HOUR) +---- +[] + query ????????? select generate_series(5), generate_series(2, 5), @@ -5923,8 +5959,31 @@ select generate_series(5), ---- [0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10, 7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01, 1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] [1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] -query error DataFusion error: Execution error: Cannot generate date range less than 1 day\. -select generate_series('2021-01-01'::timestamp, '2021-01-02'::timestamp, INTERVAL '1' HOUR); +query ? +select generate_series('2021-01-01'::timestamp, '2021-01-01T15:00:00'::timestamp, INTERVAL '1' HOUR); +---- +[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] + +query ? +select generate_series('2021-01-01T00:00:00EST'::timestamp, '2021-01-01T15:00:00-12:00'::timestamp, INTERVAL '1' HOUR); +---- +[2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00, 2021-01-01T16:00:00, 2021-01-01T17:00:00, 2021-01-01T18:00:00, 2021-01-01T19:00:00, 2021-01-01T20:00:00, 2021-01-01T21:00:00, 2021-01-01T22:00:00, 2021-01-01T23:00:00, 2021-01-02T00:00:00, 2021-01-02T01:00:00, 2021-01-02T02:00:00, 2021-01-02T03:00:00] + +query ? +select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), arrow_cast('2021-01-01T15:00:00', 'Timestamp(Nanosecond, Some("+05:00"))'), INTERVAL '1' HOUR); +---- +[2021-01-01T00:00:00-05:00, 2021-01-01T01:00:00-05:00, 2021-01-01T02:00:00-05:00, 2021-01-01T03:00:00-05:00, 2021-01-01T04:00:00-05:00, 2021-01-01T05:00:00-05:00] + +## -5500000000 ns is -5.5 sec +query ? +select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), arrow_cast('2021-01-01T06:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), INTERVAL '1 HOUR 30 MINUTE -5500000000 NANOSECOND'); +---- +[2021-01-01T00:00:00-05:00, 2021-01-01T01:29:54.500-05:00, 2021-01-01T02:59:49-05:00, 2021-01-01T04:29:43.500-05:00, 2021-01-01T05:59:38-05:00] + +## mixing types for timestamps is not supported +query error DataFusion error: Internal error: Unexpected argument type for GENERATE_SERIES : Date32 +select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), DATE '2021-01-02', INTERVAL '1' HOUR); + ## should return NULL query ? @@ -5932,41 +5991,92 @@ select generate_series(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); ---- NULL +## should return NULL +query ? +select generate_series(TIMESTAMP '1992-09-01', NULL, INTERVAL '1' YEAR); +---- +NULL + query ? select generate_series(DATE '1992-09-01', DATE '1993-03-01', NULL); ---- NULL +query ? +select generate_series(TIMESTAMP '1992-09-01', DATE '1993-03-01', NULL); +---- +NULL + query ? select generate_series(NULL, DATE '1993-03-01', INTERVAL '1' YEAR); ---- NULL +query ? +select generate_series(NULL, TIMESTAMP '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + query ? select generate_series(NULL, NULL, NULL); ---- NULL +query ? +select generate_series(NULL::timestamp, NULL::timestamp, NULL); +---- +NULL + query ? select generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR) ---- [] +query ? +select generate_series(TIMESTAMP '1989-04-01', TIMESTAMP '1993-03-01', INTERVAL '-1' YEAR) +---- +[] + query ? select generate_series(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) ---- [] +query ? +select generate_series(TIMESTAMP '1993-03-01', TIMESTAMP '1989-04-01', INTERVAL '1' YEAR) +---- +[] + query error DataFusion error: Execution error: Cannot generate date range less than 1 day. select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '1' HOUR) query error DataFusion error: Execution error: Cannot generate date range less than 1 day. select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '-1' HOUR) +query ? +select generate_series(TIMESTAMP '2000-01-01', TIMESTAMP '2000-01-02', INTERVAL '1' HOUR) +---- +[2000-01-01T00:00:00, 2000-01-01T01:00:00, 2000-01-01T02:00:00, 2000-01-01T03:00:00, 2000-01-01T04:00:00, 2000-01-01T05:00:00, 2000-01-01T06:00:00, 2000-01-01T07:00:00, 2000-01-01T08:00:00, 2000-01-01T09:00:00, 2000-01-01T10:00:00, 2000-01-01T11:00:00, 2000-01-01T12:00:00, 2000-01-01T13:00:00, 2000-01-01T14:00:00, 2000-01-01T15:00:00, 2000-01-01T16:00:00, 2000-01-01T17:00:00, 2000-01-01T18:00:00, 2000-01-01T19:00:00, 2000-01-01T20:00:00, 2000-01-01T21:00:00, 2000-01-01T22:00:00, 2000-01-01T23:00:00, 2000-01-02T00:00:00] + +query ? +select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '-1' HOUR) +---- +[2000-01-02T00:00:00, 2000-01-01T23:00:00, 2000-01-01T22:00:00, 2000-01-01T21:00:00, 2000-01-01T20:00:00, 2000-01-01T19:00:00, 2000-01-01T18:00:00, 2000-01-01T17:00:00, 2000-01-01T16:00:00, 2000-01-01T15:00:00, 2000-01-01T14:00:00, 2000-01-01T13:00:00, 2000-01-01T12:00:00, 2000-01-01T11:00:00, 2000-01-01T10:00:00, 2000-01-01T09:00:00, 2000-01-01T08:00:00, 2000-01-01T07:00:00, 2000-01-01T06:00:00, 2000-01-01T05:00:00, 2000-01-01T04:00:00, 2000-01-01T03:00:00, 2000-01-01T02:00:00, 2000-01-01T01:00:00, 2000-01-01T00:00:00] + +# Test generate_series with small intervals +query ? +select generate_series('2000-01-01T00:00:00.000000001Z'::timestamp, '2000-01-01T00:00:00.00000001Z'::timestamp, INTERVAL '1' NANOSECONDS) +---- +[2000-01-01T00:00:00.000000001, 2000-01-01T00:00:00.000000002, 2000-01-01T00:00:00.000000003, 2000-01-01T00:00:00.000000004, 2000-01-01T00:00:00.000000005, 2000-01-01T00:00:00.000000006, 2000-01-01T00:00:00.000000007, 2000-01-01T00:00:00.000000008, 2000-01-01T00:00:00.000000009, 2000-01-01T00:00:00.000000010] + # Test generate_series with zero step query error DataFusion error: Execution error: step can't be 0 for function generate_series\(start \[, stop, step\]\) select generate_series(1, 1, 0); +# Test generate_series with zero step +query error DataFusion error: Execution error: Interval argument to GENERATE_SERIES must not be 0 +select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '0' MINUTE); + # Test generate_series with big steps query ???? select @@ -6021,7 +6131,7 @@ select generate_series(NULL) ---- NULL -# Test generate_series with a table of values +# Test generate_series with a table of date values statement ok CREATE TABLE date_table( start DATE, @@ -6053,15 +6163,45 @@ select generate_series(start, '1993-03-01'::date, INTERVAL '1 year') from date_t [1993-02-01] [1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] +# Test generate_series with a table of timestamp values +statement ok +CREATE TABLE timestamp_table( + start TIMESTAMP, + stop TIMESTAMP, + step INTERVAL +) AS VALUES + (TIMESTAMP '1992-01-01T00:00:00', TIMESTAMP '1993-01-02T00:00:00', INTERVAL '1' MONTH), + (TIMESTAMP '1993-02-01T00:00:00', TIMESTAMP '1993-01-01T00:00:00', INTERVAL '-1' DAY), + (TIMESTAMP '1989-04-01T00:00:00', TIMESTAMP '1993-03-01T00:00:00', INTERVAL '1' YEAR); -# https://github.com/apache/datafusion/issues/11922 query ? -select generate_series(start, '1993-03-01', INTERVAL '1 year') from date_table; +select generate_series(start, stop, step) from timestamp_table; ---- -[1992-01-01, 1993-01-01] -[1993-02-01] -[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] +[1992-01-01T00:00:00, 1992-02-01T00:00:00, 1992-03-01T00:00:00, 1992-04-01T00:00:00, 1992-05-01T00:00:00, 1992-06-01T00:00:00, 1992-07-01T00:00:00, 1992-08-01T00:00:00, 1992-09-01T00:00:00, 1992-10-01T00:00:00, 1992-11-01T00:00:00, 1992-12-01T00:00:00, 1993-01-01T00:00:00] +[1993-02-01T00:00:00, 1993-01-31T00:00:00, 1993-01-30T00:00:00, 1993-01-29T00:00:00, 1993-01-28T00:00:00, 1993-01-27T00:00:00, 1993-01-26T00:00:00, 1993-01-25T00:00:00, 1993-01-24T00:00:00, 1993-01-23T00:00:00, 1993-01-22T00:00:00, 1993-01-21T00:00:00, 1993-01-20T00:00:00, 1993-01-19T00:00:00, 1993-01-18T00:00:00, 1993-01-17T00:00:00, 1993-01-16T00:00:00, 1993-01-15T00:00:00, 1993-01-14T00:00:00, 1993-01-13T00:00:00, 1993-01-12T00:00:00, 1993-01-11T00:00:00, 1993-01-10T00:00:00, 1993-01-09T00:00:00, 1993-01-08T00:00:00, 1993-01-07T00:00:00, 1993-01-06T00:00:00, 1993-01-05T00:00:00, 1993-01-04T00:00:00, 1993-01-03T00:00:00, 1993-01-02T00:00:00, 1993-01-01T00:00:00] +[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] + +query ? +select generate_series(start, stop, INTERVAL '1 year') from timestamp_table; +---- +[1992-01-01T00:00:00, 1993-01-01T00:00:00] +[] +[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] +query ? +select generate_series(start, '1993-03-01T00:00:00'::timestamp, INTERVAL '1 year') from timestamp_table; +---- +[1992-01-01T00:00:00, 1993-01-01T00:00:00] +[1993-02-01T00:00:00] +[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] + +# https://github.com/apache/datafusion/issues/11922 +query ? +select generate_series(start, '1993-03-01T00:00:00'::timestamp, INTERVAL '1 year') from timestamp_table; +---- +[1992-01-01T00:00:00, 1993-01-01T00:00:00] +[1993-02-01T00:00:00] +[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] ## array_except diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 14852935dca4..6a2181b8db40 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3223,9 +3223,9 @@ generate_series(start, stop, step) #### Arguments -- **start**: start of the range -- **end**: end of the range (included) -- **step**: increase by step (can not be 0) +- **start**: start of the series. Ints, timestamps, dates or string types that can be coerced to Date32 are supported. +- **end**: end of the series (included). Type must be the same as start. +- **step**: increase by step (can not be 0). Steps less than a day are supported only for timestamp ranges. #### Example @@ -3516,7 +3516,7 @@ The range start..end contains all values with start <= x < end. It is empty if s Step can not be 0 (then the range will be nonsense.). -Note that when the required range is a number, it accepts (stop), (start, stop), and (start, stop, step) as parameters, but when the required range is a date, it must be 3 non-NULL parameters. +Note that when the required range is a number, it accepts (stop), (start, stop), and (start, stop, step) as parameters, but when the required range is a date or timestamp, it must be 3 non-NULL parameters. For example, ``` @@ -3527,10 +3527,11 @@ SELECT range(1,5,1); are allowed in number ranges -but in date ranges, only +but in date and timestamp ranges, only ``` SELECT range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH); +SELECT range(TIMESTAMP '1992-09-01', TIMESTAMP '1993-03-01', INTERVAL '1' MONTH); ``` is allowed, and @@ -3545,9 +3546,9 @@ are not allowed #### Arguments -- **start**: start of the range -- **end**: end of the range (not included) -- **step**: increase by step (can not be 0) +- **start**: start of the range. Ints, timestamps, dates or string types that can be coerced to Date32 are supported. +- **end**: end of the range (not included). Type must be the same as start. +- **step**: increase by step (can not be 0). Steps less than a day are supported only for timestamp ranges. #### Aliases