Skip to content

Commit

Permalink
Support n-ary monotonic functions in ordering equivalence (#13841)
Browse files Browse the repository at this point in the history
* Support n-ary monotonic functions in `discover_new_orderings`

* Add tests for n-ary monotonic functions in `discover_new_orderings`

* Fix tests

* Fix non-monotonic test case

* Fix unintended simplification

* Minor comment changes

* Fix tests

* Add `preserves_lex_ordering` field

* Use `preserves_lex_ordering` on `discover_new_orderings()`

* Add `output_ordering` and `output_preserves_lex_ordering` implementations for `ConcatFunc`

* Update tests

* Move logic to UDF

* Cargo fmt

* Refactor

* Cargo fmt

* Simply use false value on default implementation

* Remove unnecessary import

* Clippy fix

* Update Cargo.lock

* Move dep to dev-dependencies

* Rename output_preserves_lex_ordering to preserves_lex_ordering

* minor

---------

Co-authored-by: berkaysynnada <[email protected]>
  • Loading branch information
gokselk and berkaysynnada authored Dec 20, 2024
1 parent 5d563d9 commit 95d296c
Show file tree
Hide file tree
Showing 9 changed files with 269 additions and 38 deletions.
21 changes: 19 additions & 2 deletions datafusion/expr-common/src/sort_properties.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,30 @@ impl Neg for SortProperties {
}
}

/// Represents the properties of a `PhysicalExpr`, including its sorting and range attributes.
/// Represents the properties of a `PhysicalExpr`, including its sorting,
/// range, and whether it preserves lexicographical ordering.
#[derive(Debug, Clone)]
pub struct ExprProperties {
/// Properties that describe the sorting behavior of the expression,
/// such as whether it is ordered, unordered, or a singleton value.
pub sort_properties: SortProperties,
/// A closed interval representing the range of possible values for
/// the expression. Used to compute reliable bounds.
pub range: Interval,
/// Indicates whether the expression preserves lexicographical ordering
/// of its inputs. For example, string concatenation preserves ordering,
/// while addition does not.
pub preserves_lex_ordering: bool,
}

impl ExprProperties {
/// Creates a new `ExprProperties` instance with unknown sort properties and unknown range.
/// Creates a new `ExprProperties` instance with unknown sort properties,
/// unknown range, and unknown lexicographical ordering preservation.
pub fn new_unknown() -> Self {
Self {
sort_properties: SortProperties::default(),
range: Interval::make_unbounded(&DataType::Null).unwrap(),
preserves_lex_ordering: false,
}
}

Expand All @@ -156,4 +167,10 @@ impl ExprProperties {
self.range = range;
self
}

/// Sets whether the expression maintains lexicographical ordering and returns the modified instance.
pub fn with_preserves_lex_ordering(mut self, preserves_lex_ordering: bool) -> Self {
self.preserves_lex_ordering = preserves_lex_ordering;
self
}
}
36 changes: 32 additions & 4 deletions datafusion/expr/src/udf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,10 @@ impl ScalarUDF {
self.inner.output_ordering(inputs)
}

pub fn preserves_lex_ordering(&self, inputs: &[ExprProperties]) -> Result<bool> {
self.inner.preserves_lex_ordering(inputs)
}

/// See [`ScalarUDFImpl::coerce_types`] for more details.
pub fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
self.inner.coerce_types(arg_types)
Expand Down Expand Up @@ -650,10 +654,30 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
Ok(Some(vec![]))
}

/// Calculates the [`SortProperties`] of this function based on its
/// children's properties.
fn output_ordering(&self, _inputs: &[ExprProperties]) -> Result<SortProperties> {
Ok(SortProperties::Unordered)
/// Calculates the [`SortProperties`] of this function based on its children's properties.
fn output_ordering(&self, inputs: &[ExprProperties]) -> Result<SortProperties> {
if !self.preserves_lex_ordering(inputs)? {
return Ok(SortProperties::Unordered);
}

let Some(first_order) = inputs.first().map(|p| &p.sort_properties) else {
return Ok(SortProperties::Singleton);
};

if inputs
.iter()
.skip(1)
.all(|input| &input.sort_properties == first_order)
{
Ok(*first_order)
} else {
Ok(SortProperties::Unordered)
}
}

/// Whether the function preserves lexicographical ordering based on the input ordering
fn preserves_lex_ordering(&self, _inputs: &[ExprProperties]) -> Result<bool> {
Ok(false)
}

/// Coerce arguments of a function call to types that the function can evaluate.
Expand Down Expand Up @@ -809,6 +833,10 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
self.inner.output_ordering(inputs)
}

fn preserves_lex_ordering(&self, inputs: &[ExprProperties]) -> Result<bool> {
self.inner.preserves_lex_ordering(inputs)
}

fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
self.inner.coerce_types(arg_types)
}
Expand Down
5 changes: 5 additions & 0 deletions datafusion/functions/src/string/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

use arrow::array::{as_largestring_array, Array};
use arrow::datatypes::DataType;
use datafusion_expr::sort_properties::ExprProperties;
use std::any::Any;
use std::sync::{Arc, OnceLock};

Expand Down Expand Up @@ -265,6 +266,10 @@ impl ScalarUDFImpl for ConcatFunc {
fn documentation(&self) -> Option<&Documentation> {
Some(get_concat_doc())
}

fn preserves_lex_ordering(&self, _inputs: &[ExprProperties]) -> Result<bool> {
Ok(true)
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
Expand Down
1 change: 1 addition & 0 deletions datafusion/physical-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ petgraph = "0.6.2"
[dev-dependencies]
arrow = { workspace = true, features = ["test_utils"] }
criterion = "0.5"
datafusion-functions = { workspace = true }
rand = { workspace = true }
rstest = { workspace = true }

Expand Down
Loading

0 comments on commit 95d296c

Please sign in to comment.