Skip to content

Commit

Permalink
Add a make_date function (#9040)
Browse files Browse the repository at this point in the history
* Add a make_date function #9024

* Fixed error message #9024

* minor typo fix.
  • Loading branch information
Omega359 authored Jan 30, 2024
1 parent 4d389c2 commit 78447d6
Show file tree
Hide file tree
Showing 13 changed files with 569 additions and 7 deletions.
1 change: 1 addition & 0 deletions datafusion-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ cargo run --example csv_sql
- [`simple_udaf.rs`](examples/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF)
- [`advanced_udaf.rs`](examples/advanced_udaf.rs): Define and invoke a more complicated User Defined Aggregate Function (UDAF)
- [`simple_udfw.rs`](examples/simple_udwf.rs): Define and invoke a User Defined Window Function (UDWF)
- [`make_date.rs`](examples/make_date.rs): Examples of using the make_date function
- [`to_timestamp.rs`](examples/to_timestamp.rs): Examples of using the to_timestamp functions
- [`advanced_udwf.rs`](examples/advanced_udwf.rs): Define and invoke a more complicated User Defined Window Function (UDWF)

Expand Down
120 changes: 120 additions & 0 deletions datafusion-examples/examples/make_date.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use std::sync::Arc;

use datafusion::arrow::array::Int32Array;
use datafusion::arrow::datatypes::{DataType, Field, Schema};
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::error::Result;
use datafusion::prelude::*;
use datafusion_common::assert_contains;

/// This example demonstrates how to use the make_date
/// function in the DataFrame API as well as via sql.
#[tokio::main]
async fn main() -> Result<()> {
// define a schema.
let schema = Arc::new(Schema::new(vec![
Field::new("y", DataType::Int32, false),
Field::new("m", DataType::Int32, false),
Field::new("d", DataType::Int32, false),
]));

// define data.
let batch = RecordBatch::try_new(
schema,
vec![
Arc::new(Int32Array::from(vec![2020, 2021, 2022, 2023, 2024])),
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])),
Arc::new(Int32Array::from(vec![15, 16, 17, 18, 19])),
],
)?;

// declare a new context. In spark API, this corresponds to a new spark SQLsession
let ctx = SessionContext::new();

// declare a table in memory. In spark API, this corresponds to createDataFrame(...).
ctx.register_batch("t", batch)?;
let df = ctx.table("t").await?;

// use make_date function to convert col 'y', 'm' & 'd' to a date
let df = df.with_column("a", make_date(col("y"), col("m"), col("d")))?;
// use make_date function to convert col 'y' & 'm' with a static day to a date
let df = df.with_column("b", make_date(col("y"), col("m"), lit(22)))?;

let df = df.select_columns(&["a", "b"])?;

// print the results
df.show().await?;

// use sql to convert col 'y', 'm' & 'd' to a date
let df = ctx.sql("select make_date(y, m, d) from t").await?;

// print the results
df.show().await?;

// use sql to convert col 'y' & 'm' with a static string day to a date
let df = ctx.sql("select make_date(y, m, '22') from t").await?;

// print the results
df.show().await?;

// math expressions work
let df = ctx.sql("select make_date(y + 1, m, d) from t").await?;

// print the results
df.show().await?;

// you can cast to supported types (int, bigint, varchar) if required
let df = ctx
.sql("select make_date(2024::bigint, 01::bigint, 27::varchar(3))")
.await?;

// print the results
df.show().await?;

// arrow casts also work
let df = ctx
.sql("select make_date(arrow_cast(2024, 'Int64'), arrow_cast(1, 'Int64'), arrow_cast(27, 'Int64'))")
.await?;

// print the results
df.show().await?;

// invalid column values will result in an error
let result = ctx
.sql("select make_date(2024, null, 23)")
.await?
.collect()
.await;

let expected = "Execution error: Unable to parse date from null/empty value";
assert_contains!(result.unwrap_err().to_string(), expected);

// invalid date values will also result in an error
let result = ctx
.sql("select make_date(2024, 01, 32)")
.await?
.collect()
.await;

let expected = "Execution error: Unable to parse date from 2024, 1, 32";
assert_contains!(result.unwrap_err().to_string(), expected);

Ok(())
}
10 changes: 10 additions & 0 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ pub enum BuiltinScalarFunction {
CurrentDate,
/// current_time
CurrentTime,
/// make_date
MakeDate,
/// translate
Translate,
/// trim
Expand Down Expand Up @@ -484,6 +486,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ToTimestampMicros => Volatility::Immutable,
BuiltinScalarFunction::ToTimestampNanos => Volatility::Immutable,
BuiltinScalarFunction::ToTimestampSeconds => Volatility::Immutable,
BuiltinScalarFunction::MakeDate => Volatility::Immutable,
BuiltinScalarFunction::Translate => Volatility::Immutable,
BuiltinScalarFunction::Trim => Volatility::Immutable,
BuiltinScalarFunction::Upper => Volatility::Immutable,
Expand Down Expand Up @@ -834,6 +837,7 @@ impl BuiltinScalarFunction {
}
BuiltinScalarFunction::CurrentDate => Ok(Date32),
BuiltinScalarFunction::CurrentTime => Ok(Time64(Nanosecond)),
BuiltinScalarFunction::MakeDate => Ok(Date32),
BuiltinScalarFunction::Translate => {
utf8_to_str_type(&input_expr_types[0], "translate")
}
Expand Down Expand Up @@ -1379,6 +1383,11 @@ impl BuiltinScalarFunction {
| BuiltinScalarFunction::CurrentTime => {
Signature::uniform(0, vec![], self.volatility())
}
BuiltinScalarFunction::MakeDate => Signature::uniform(
3,
vec![Int32, Int64, UInt32, UInt64, Utf8],
self.volatility(),
),
BuiltinScalarFunction::Isnan | BuiltinScalarFunction::Iszero => {
Signature::one_of(
vec![Exact(vec![Float32]), Exact(vec![Float64])],
Expand Down Expand Up @@ -1523,6 +1532,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Now => &["now"],
BuiltinScalarFunction::CurrentDate => &["current_date", "today"],
BuiltinScalarFunction::CurrentTime => &["current_time"],
BuiltinScalarFunction::MakeDate => &["make_date"],
BuiltinScalarFunction::DateBin => &["date_bin"],
BuiltinScalarFunction::DateTrunc => &["date_trunc", "datetrunc"],
BuiltinScalarFunction::DatePart => &["date_part", "datepart"],
Expand Down
1 change: 1 addition & 0 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,7 @@ scalar_expr!(
scalar_expr!(CurrentDate, current_date, ,"returns current UTC date as a [`DataType::Date32`] value");
scalar_expr!(Now, now, ,"returns current timestamp in nanoseconds, using the same value for all instances of now() in same statement");
scalar_expr!(CurrentTime, current_time, , "returns current UTC time as a [`DataType::Time64`] value");
scalar_expr!(MakeDate, make_date, year month day, "make a date from year, month and day component parts");
scalar_expr!(Nanvl, nanvl, x y, "returns x if x is not NaN otherwise returns y");
scalar_expr!(
Isnan,
Expand Down
Loading

0 comments on commit 78447d6

Please sign in to comment.