-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add ScalarUDFImpl::invoke_with_args
to support passing the return type created for the udf instance
#13290
Add ScalarUDFImpl::invoke_with_args
to support passing the return type created for the udf instance
#13290
Changes from all commits
ae73371
88e5608
9ec7e4a
99a65b5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -203,10 +203,7 @@ impl ScalarUDF { | |
self.inner.simplify(args, info) | ||
} | ||
|
||
/// Invoke the function on `args`, returning the appropriate result. | ||
/// | ||
/// See [`ScalarUDFImpl::invoke`] for more details. | ||
#[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] | ||
#[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")] | ||
pub fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> { | ||
#[allow(deprecated)] | ||
self.inner.invoke(args) | ||
|
@@ -216,20 +213,27 @@ impl ScalarUDF { | |
self.inner.is_nullable(args, schema) | ||
} | ||
|
||
/// Invoke the function with `args` and number of rows, returning the appropriate result. | ||
/// | ||
/// See [`ScalarUDFImpl::invoke_batch`] for more details. | ||
#[deprecated(since = "43.0.0", note = "Use `invoke_with_args` instead")] | ||
pub fn invoke_batch( | ||
&self, | ||
args: &[ColumnarValue], | ||
number_rows: usize, | ||
) -> Result<ColumnarValue> { | ||
#[allow(deprecated)] | ||
self.inner.invoke_batch(args, number_rows) | ||
} | ||
|
||
/// Invoke the function on `args`, returning the appropriate result. | ||
/// | ||
/// See [`ScalarUDFImpl::invoke_with_args`] for details. | ||
pub fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> { | ||
self.inner.invoke_with_args(args) | ||
} | ||
|
||
/// Invoke the function without `args` but number of rows, returning the appropriate result. | ||
/// | ||
/// See [`ScalarUDFImpl::invoke_no_args`] for more details. | ||
/// Note: This method is deprecated and will be removed in future releases. | ||
/// User defined functions should implement [`Self::invoke_with_args`] instead. | ||
#[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] | ||
pub fn invoke_no_args(&self, number_rows: usize) -> Result<ColumnarValue> { | ||
#[allow(deprecated)] | ||
|
@@ -324,26 +328,37 @@ where | |
} | ||
} | ||
|
||
/// Trait for implementing [`ScalarUDF`]. | ||
pub struct ScalarFunctionArgs<'a> { | ||
// The evaluated arguments to the function | ||
pub args: &'a [ColumnarValue], | ||
// The number of rows in record batch being evaluated | ||
pub number_rows: usize, | ||
// The return type of the scalar function returned (from `return_type` or `return_type_from_exprs`) | ||
// when creating the physical expression from the logical expression | ||
pub return_type: &'a DataType, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should also be easy to add as a follow-up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, as long as it's done in this release cycle so that we don't churn the api even more between cycles There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes -- I will file a ticket There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tracking with #13519 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (BTW the change in the design to a struct I think allows for much easier non-breaking API additions in the future) |
||
} | ||
|
||
/// Trait for implementing user defined scalar functions. | ||
/// | ||
/// This trait exposes the full API for implementing user defined functions and | ||
/// can be used to implement any function. | ||
/// | ||
/// See [`advanced_udf.rs`] for a full example with complete implementation and | ||
/// [`ScalarUDF`] for other available options. | ||
/// | ||
/// | ||
/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs | ||
/// | ||
/// # Basic Example | ||
/// ``` | ||
/// # use std::any::Any; | ||
/// # use std::sync::OnceLock; | ||
/// # use arrow::datatypes::DataType; | ||
/// # use datafusion_common::{DataFusionError, plan_err, Result}; | ||
/// # use datafusion_expr::{col, ColumnarValue, Documentation, Signature, Volatility}; | ||
/// # use datafusion_expr::{col, ColumnarValue, Documentation, ScalarFunctionArgs, Signature, Volatility}; | ||
/// # use datafusion_expr::{ScalarUDFImpl, ScalarUDF}; | ||
/// # use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; | ||
/// | ||
/// /// This struct for a simple UDF that adds one to an int32 | ||
/// #[derive(Debug)] | ||
/// struct AddOne { | ||
/// signature: Signature, | ||
|
@@ -356,7 +371,7 @@ where | |
/// } | ||
/// } | ||
/// } | ||
/// | ||
/// | ||
/// static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new(); | ||
/// | ||
/// fn get_doc() -> &'static Documentation { | ||
|
@@ -383,7 +398,9 @@ where | |
/// Ok(DataType::Int32) | ||
/// } | ||
/// // The actual implementation would add one to the argument | ||
/// fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> { unimplemented!() } | ||
/// fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> { | ||
/// unimplemented!() | ||
/// } | ||
/// fn documentation(&self) -> Option<&Documentation> { | ||
/// Some(get_doc()) | ||
/// } | ||
|
@@ -479,24 +496,9 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { | |
|
||
/// Invoke the function on `args`, returning the appropriate result | ||
/// | ||
/// The function will be invoked passed with the slice of [`ColumnarValue`] | ||
/// (either scalar or array). | ||
/// | ||
/// If the function does not take any arguments, please use [invoke_no_args] | ||
/// instead and return [not_impl_err] for this function. | ||
/// | ||
/// | ||
/// # Performance | ||
/// | ||
/// For the best performance, the implementations of `invoke` should handle | ||
/// the common case when one or more of their arguments are constant values | ||
/// (aka [`ColumnarValue::Scalar`]). | ||
/// | ||
/// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments | ||
/// to arrays, which will likely be simpler code, but be slower. | ||
/// | ||
/// [invoke_no_args]: ScalarUDFImpl::invoke_no_args | ||
#[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] | ||
/// Note: This method is deprecated and will be removed in future releases. | ||
/// User defined functions should implement [`Self::invoke_with_args`] instead. | ||
#[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")] | ||
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> { | ||
not_impl_err!( | ||
"Function {} does not implement invoke but called", | ||
|
@@ -507,17 +509,12 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { | |
/// Invoke the function with `args` and the number of rows, | ||
/// returning the appropriate result. | ||
/// | ||
/// The function will be invoked with the slice of [`ColumnarValue`] | ||
/// (either scalar or array). | ||
/// | ||
/// # Performance | ||
/// Note: See notes on [`Self::invoke_with_args`] | ||
/// | ||
/// For the best performance, the implementations should handle the common case | ||
/// when one or more of their arguments are constant values (aka | ||
/// [`ColumnarValue::Scalar`]). | ||
/// Note: This method is deprecated and will be removed in future releases. | ||
/// User defined functions should implement [`Self::invoke_with_args`] instead. | ||
/// | ||
/// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments | ||
/// to arrays, which will likely be simpler code, but be slower. | ||
/// See <https://github.com/apache/datafusion/issues/13515> for more details. | ||
fn invoke_batch( | ||
&self, | ||
args: &[ColumnarValue], | ||
|
@@ -537,9 +534,27 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { | |
} | ||
} | ||
|
||
/// Invoke the function returning the appropriate result. | ||
/// | ||
/// # Performance | ||
/// | ||
/// For the best performance, the implementations should handle the common case | ||
/// when one or more of their arguments are constant values (aka | ||
/// [`ColumnarValue::Scalar`]). | ||
/// | ||
/// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments | ||
/// to arrays, which will likely be simpler code, but be slower. | ||
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> { | ||
#[allow(deprecated)] | ||
self.invoke_batch(args.args, args.number_rows) | ||
} | ||
|
||
/// Invoke the function without `args`, instead the number of rows are provided, | ||
/// returning the appropriate result. | ||
#[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] | ||
/// | ||
/// Note: This method is deprecated and will be removed in future releases. | ||
/// User defined functions should implement [`Self::invoke_with_args`] instead. | ||
#[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")] | ||
fn invoke_no_args(&self, _number_rows: usize) -> Result<ColumnarValue> { | ||
not_impl_err!( | ||
"Function {} does not implement invoke_no_args but called", | ||
|
@@ -767,6 +782,7 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl { | |
args: &[ColumnarValue], | ||
number_rows: usize, | ||
) -> Result<ColumnarValue> { | ||
#[allow(deprecated)] | ||
self.inner.invoke_batch(args, number_rows) | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if this structure also provides a potential place for "preparing" a scalar function (e.g. to pre-compile a regex 🤔 ) - #8051
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would you suggest having a opaque block returned from a (new interface method) which is that that value to invoke?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I was thinking something like