From 667c77a85836bffdf7747a25403cd475f0c07be8 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Fri, 20 Dec 2024 23:06:09 +0200 Subject: [PATCH] feat(function): add `least` function (#13786) * start adding least fn * feat(function): add least function * update function name * fix scalar smaller function * add tests * run Clippy and Fmt * Generated docs using `./dev/update_function_docs.sh` * add comment why `descending: false` * update comment * Update least.rs Co-authored-by: Bruce Ritchie * Update scalar_functions.md * run ./dev/update_function_docs.sh to update docs * merge greatest and least implementation to one * add header --------- Co-authored-by: Bruce Ritchie Co-authored-by: Andrew Lamb --- datafusion/functions/src/core/greatest.rs | 183 +++++----------- .../src/core/greatest_least_utils.rs | 133 ++++++++++++ datafusion/functions/src/core/least.rs | 200 ++++++++++++++++++ datafusion/functions/src/core/mod.rs | 8 + .../sqllogictest/test_files/functions.slt | 198 +++++++++++++++++ .../source/user-guide/sql/scalar_functions.md | 24 +++ 6 files changed, 612 insertions(+), 134 deletions(-) create mode 100644 datafusion/functions/src/core/greatest_least_utils.rs create mode 100644 datafusion/functions/src/core/least.rs diff --git a/datafusion/functions/src/core/greatest.rs b/datafusion/functions/src/core/greatest.rs index 3ea2eadf22ee..e91ec2b0c4d8 100644 --- a/datafusion/functions/src/core/greatest.rs +++ b/datafusion/functions/src/core/greatest.rs @@ -15,20 +15,19 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{make_comparator, Array, ArrayRef, BooleanArray}; +use crate::core::greatest_least_utils::GreatestLeastOperator; +use arrow::array::{make_comparator, Array, BooleanArray}; use arrow::compute::kernels::cmp; -use arrow::compute::kernels::zip::zip; use arrow::compute::SortOptions; use arrow::datatypes::DataType; use arrow_buffer::BooleanBuffer; -use datafusion_common::{exec_err, plan_err, Result, ScalarValue}; +use datafusion_common::{internal_err, Result, ScalarValue}; use datafusion_doc::Documentation; -use datafusion_expr::binary::type_union_resolution; use datafusion_expr::scalar_doc_sections::DOC_SECTION_CONDITIONAL; use datafusion_expr::ColumnarValue; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::OnceLock; const SORT_OPTIONS: SortOptions = SortOptions { // We want greatest first @@ -57,79 +56,57 @@ impl GreatestFunc { } } -fn get_logical_null_count(arr: &dyn Array) -> usize { - arr.logical_nulls() - .map(|n| n.null_count()) - .unwrap_or_default() -} +impl GreatestLeastOperator for GreatestFunc { + const NAME: &'static str = "greatest"; -/// Return boolean array where `arr[i] = lhs[i] >= rhs[i]` for all i, where `arr` is the result array -/// Nulls are always considered smaller than any other value -fn get_larger(lhs: &dyn Array, rhs: &dyn Array) -> Result { - // Fast path: - // If both arrays are not nested, have the same length and no nulls, we can use the faster vectorised kernel - // - If both arrays are not nested: Nested types, such as lists, are not supported as the null semantics are not well-defined. - // - both array does not have any nulls: cmp::gt_eq will return null if any of the input is null while we want to return false in that case - if !lhs.data_type().is_nested() - && get_logical_null_count(lhs) == 0 - && get_logical_null_count(rhs) == 0 - { - return cmp::gt_eq(&lhs, &rhs).map_err(|e| e.into()); - } + fn keep_scalar<'a>( + lhs: &'a ScalarValue, + rhs: &'a ScalarValue, + ) -> Result<&'a ScalarValue> { + if !lhs.data_type().is_nested() { + return if lhs >= rhs { Ok(lhs) } else { Ok(rhs) }; + } - let cmp = make_comparator(lhs, rhs, SORT_OPTIONS)?; + // If complex type we can't compare directly as we want null values to be smaller + let cmp = make_comparator( + lhs.to_array()?.as_ref(), + rhs.to_array()?.as_ref(), + SORT_OPTIONS, + )?; - if lhs.len() != rhs.len() { - return exec_err!( - "All arrays should have the same length for greatest comparison" - ); + if cmp(0, 0).is_ge() { + Ok(lhs) + } else { + Ok(rhs) + } } - let values = BooleanBuffer::collect_bool(lhs.len(), |i| cmp(i, i).is_ge()); - - // No nulls as we only want to keep the values that are larger, its either true or false - Ok(BooleanArray::new(values, None)) -} - -/// Return array where the largest value at each index is kept -fn keep_larger(lhs: ArrayRef, rhs: ArrayRef) -> Result { - // True for values that we should keep from the left array - let keep_lhs = get_larger(lhs.as_ref(), rhs.as_ref())?; - - let larger = zip(&keep_lhs, &lhs, &rhs)?; + /// Return boolean array where `arr[i] = lhs[i] >= rhs[i]` for all i, where `arr` is the result array + /// Nulls are always considered smaller than any other value + fn get_indexes_to_keep(lhs: &dyn Array, rhs: &dyn Array) -> Result { + // Fast path: + // If both arrays are not nested, have the same length and no nulls, we can use the faster vectorised kernel + // - If both arrays are not nested: Nested types, such as lists, are not supported as the null semantics are not well-defined. + // - both array does not have any nulls: cmp::gt_eq will return null if any of the input is null while we want to return false in that case + if !lhs.data_type().is_nested() + && lhs.logical_null_count() == 0 + && rhs.logical_null_count() == 0 + { + return cmp::gt_eq(&lhs, &rhs).map_err(|e| e.into()); + } - Ok(larger) -} + let cmp = make_comparator(lhs, rhs, SORT_OPTIONS)?; -fn keep_larger_scalar<'a>( - lhs: &'a ScalarValue, - rhs: &'a ScalarValue, -) -> Result<&'a ScalarValue> { - if !lhs.data_type().is_nested() { - return if lhs >= rhs { Ok(lhs) } else { Ok(rhs) }; - } - - // If complex type we can't compare directly as we want null values to be smaller - let cmp = make_comparator( - lhs.to_array()?.as_ref(), - rhs.to_array()?.as_ref(), - SORT_OPTIONS, - )?; + if lhs.len() != rhs.len() { + return internal_err!( + "All arrays should have the same length for greatest comparison" + ); + } - if cmp(0, 0).is_ge() { - Ok(lhs) - } else { - Ok(rhs) - } -} + let values = BooleanBuffer::collect_bool(lhs.len(), |i| cmp(i, i).is_ge()); -fn find_coerced_type(data_types: &[DataType]) -> Result { - if data_types.is_empty() { - plan_err!("greatest was called without any arguments. It requires at least 1.") - } else if let Some(coerced_type) = type_union_resolution(data_types) { - Ok(coerced_type) - } else { - plan_err!("Cannot find a common type for arguments") + // No nulls as we only want to keep the values that are larger, its either true or false + Ok(BooleanArray::new(values, None)) } } @@ -151,74 +128,12 @@ impl ScalarUDFImpl for GreatestFunc { } fn invoke(&self, args: &[ColumnarValue]) -> Result { - if args.is_empty() { - return exec_err!( - "greatest was called with no arguments. It requires at least 1." - ); - } - - // Some engines (e.g. SQL Server) allow greatest with single arg, it's a noop - if args.len() == 1 { - return Ok(args[0].clone()); - } - - // Split to scalars and arrays for later optimization - let (scalars, arrays): (Vec<_>, Vec<_>) = args.iter().partition(|x| match x { - ColumnarValue::Scalar(_) => true, - ColumnarValue::Array(_) => false, - }); - - let mut arrays_iter = arrays.iter().map(|x| match x { - ColumnarValue::Array(a) => a, - _ => unreachable!(), - }); - - let first_array = arrays_iter.next(); - - let mut largest: ArrayRef; - - // Optimization: merge all scalars into one to avoid recomputing - if !scalars.is_empty() { - let mut scalars_iter = scalars.iter().map(|x| match x { - ColumnarValue::Scalar(s) => s, - _ => unreachable!(), - }); - - // We have at least one scalar - let mut largest_scalar = scalars_iter.next().unwrap(); - - for scalar in scalars_iter { - largest_scalar = keep_larger_scalar(largest_scalar, scalar)?; - } - - // If we only have scalars, return the largest one - if arrays.is_empty() { - return Ok(ColumnarValue::Scalar(largest_scalar.clone())); - } - - // We have at least one array - let first_array = first_array.unwrap(); - - // Start with the largest value - largest = keep_larger( - Arc::clone(first_array), - largest_scalar.to_array_of_size(first_array.len())?, - )?; - } else { - // If we only have arrays, start with the first array - // (We must have at least one array) - largest = Arc::clone(first_array.unwrap()); - } - - for array in arrays_iter { - largest = keep_larger(Arc::clone(array), largest)?; - } - - Ok(ColumnarValue::Array(largest)) + super::greatest_least_utils::execute_conditional::(args) } fn coerce_types(&self, arg_types: &[DataType]) -> Result> { - let coerced_type = find_coerced_type(arg_types)?; + let coerced_type = + super::greatest_least_utils::find_coerced_type::(arg_types)?; Ok(vec![coerced_type; arg_types.len()]) } diff --git a/datafusion/functions/src/core/greatest_least_utils.rs b/datafusion/functions/src/core/greatest_least_utils.rs new file mode 100644 index 000000000000..46b3645e703a --- /dev/null +++ b/datafusion/functions/src/core/greatest_least_utils.rs @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{Array, ArrayRef, BooleanArray}; +use arrow::compute::kernels::zip::zip; +use arrow::datatypes::DataType; +use datafusion_common::{internal_err, plan_err, Result, ScalarValue}; +use datafusion_expr_common::columnar_value::ColumnarValue; +use datafusion_expr_common::type_coercion::binary::type_union_resolution; +use std::sync::Arc; + +pub(super) trait GreatestLeastOperator { + const NAME: &'static str; + + fn keep_scalar<'a>( + lhs: &'a ScalarValue, + rhs: &'a ScalarValue, + ) -> Result<&'a ScalarValue>; + + /// Return array with true for values that we should keep from the lhs array + fn get_indexes_to_keep(lhs: &dyn Array, rhs: &dyn Array) -> Result; +} + +fn keep_array( + lhs: ArrayRef, + rhs: ArrayRef, +) -> Result { + // True for values that we should keep from the left array + let keep_lhs = Op::get_indexes_to_keep(lhs.as_ref(), rhs.as_ref())?; + + let result = zip(&keep_lhs, &lhs, &rhs)?; + + Ok(result) +} + +pub(super) fn execute_conditional( + args: &[ColumnarValue], +) -> Result { + if args.is_empty() { + return internal_err!( + "{} was called with no arguments. It requires at least 1.", + Op::NAME + ); + } + + // Some engines (e.g. SQL Server) allow greatest/least with single arg, it's a noop + if args.len() == 1 { + return Ok(args[0].clone()); + } + + // Split to scalars and arrays for later optimization + let (scalars, arrays): (Vec<_>, Vec<_>) = args.iter().partition(|x| match x { + ColumnarValue::Scalar(_) => true, + ColumnarValue::Array(_) => false, + }); + + let mut arrays_iter = arrays.iter().map(|x| match x { + ColumnarValue::Array(a) => a, + _ => unreachable!(), + }); + + let first_array = arrays_iter.next(); + + let mut result: ArrayRef; + + // Optimization: merge all scalars into one to avoid recomputing (constant folding) + if !scalars.is_empty() { + let mut scalars_iter = scalars.iter().map(|x| match x { + ColumnarValue::Scalar(s) => s, + _ => unreachable!(), + }); + + // We have at least one scalar + let mut result_scalar = scalars_iter.next().unwrap(); + + for scalar in scalars_iter { + result_scalar = Op::keep_scalar(result_scalar, scalar)?; + } + + // If we only have scalars, return the one that we should keep (largest/least) + if arrays.is_empty() { + return Ok(ColumnarValue::Scalar(result_scalar.clone())); + } + + // We have at least one array + let first_array = first_array.unwrap(); + + // Start with the result value + result = keep_array::( + Arc::clone(first_array), + result_scalar.to_array_of_size(first_array.len())?, + )?; + } else { + // If we only have arrays, start with the first array + // (We must have at least one array) + result = Arc::clone(first_array.unwrap()); + } + + for array in arrays_iter { + result = keep_array::(Arc::clone(array), result)?; + } + + Ok(ColumnarValue::Array(result)) +} + +pub(super) fn find_coerced_type( + data_types: &[DataType], +) -> Result { + if data_types.is_empty() { + plan_err!( + "{} was called without any arguments. It requires at least 1.", + Op::NAME + ) + } else if let Some(coerced_type) = type_union_resolution(data_types) { + Ok(coerced_type) + } else { + plan_err!("Cannot find a common type for arguments") + } +} diff --git a/datafusion/functions/src/core/least.rs b/datafusion/functions/src/core/least.rs new file mode 100644 index 000000000000..b9ea65cdb732 --- /dev/null +++ b/datafusion/functions/src/core/least.rs @@ -0,0 +1,200 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::core::greatest_least_utils::GreatestLeastOperator; +use arrow::array::{make_comparator, Array, BooleanArray}; +use arrow::compute::kernels::cmp; +use arrow::compute::SortOptions; +use arrow::datatypes::DataType; +use arrow_buffer::BooleanBuffer; +use datafusion_common::{internal_err, Result, ScalarValue}; +use datafusion_doc::Documentation; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_CONDITIONAL; +use datafusion_expr::ColumnarValue; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; +use std::sync::OnceLock; + +const SORT_OPTIONS: SortOptions = SortOptions { + // Having the smallest result first + descending: false, + + // NULL will be greater than any other value + nulls_first: false, +}; + +#[derive(Debug)] +pub struct LeastFunc { + signature: Signature, +} + +impl Default for LeastFunc { + fn default() -> Self { + LeastFunc::new() + } +} + +impl LeastFunc { + pub fn new() -> Self { + Self { + signature: Signature::user_defined(Volatility::Immutable), + } + } +} + +impl GreatestLeastOperator for LeastFunc { + const NAME: &'static str = "least"; + + fn keep_scalar<'a>( + lhs: &'a ScalarValue, + rhs: &'a ScalarValue, + ) -> Result<&'a ScalarValue> { + // Manual checking for nulls as: + // 1. If we're going to use <=, in Rust None is smaller than Some(T), which we don't want + // 2. And we can't use make_comparator as it has no natural order (Arrow error) + if lhs.is_null() { + return Ok(rhs); + } + + if rhs.is_null() { + return Ok(lhs); + } + + if !lhs.data_type().is_nested() { + return if lhs <= rhs { Ok(lhs) } else { Ok(rhs) }; + } + + // Not using <= as in Rust None is smaller than Some(T) + + // If complex type we can't compare directly as we want null values to be larger + let cmp = make_comparator( + lhs.to_array()?.as_ref(), + rhs.to_array()?.as_ref(), + SORT_OPTIONS, + )?; + + if cmp(0, 0).is_le() { + Ok(lhs) + } else { + Ok(rhs) + } + } + + /// Return boolean array where `arr[i] = lhs[i] <= rhs[i]` for all i, where `arr` is the result array + /// Nulls are always considered larger than any other value + fn get_indexes_to_keep(lhs: &dyn Array, rhs: &dyn Array) -> Result { + // Fast path: + // If both arrays are not nested, have the same length and no nulls, we can use the faster vectorised kernel + // - If both arrays are not nested: Nested types, such as lists, are not supported as the null semantics are not well-defined. + // - both array does not have any nulls: cmp::lt_eq will return null if any of the input is null while we want to return false in that case + if !lhs.data_type().is_nested() + && lhs.logical_null_count() == 0 + && rhs.logical_null_count() == 0 + { + return cmp::lt_eq(&lhs, &rhs).map_err(|e| e.into()); + } + + let cmp = make_comparator(lhs, rhs, SORT_OPTIONS)?; + + if lhs.len() != rhs.len() { + return internal_err!( + "All arrays should have the same length for least comparison" + ); + } + + let values = BooleanBuffer::collect_bool(lhs.len(), |i| cmp(i, i).is_le()); + + // No nulls as we only want to keep the values that are smaller, its either true or false + Ok(BooleanArray::new(values, None)) + } +} + +impl ScalarUDFImpl for LeastFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "least" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + Ok(arg_types[0].clone()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + super::greatest_least_utils::execute_conditional::(args) + } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + let coerced_type = + super::greatest_least_utils::find_coerced_type::(arg_types)?; + + Ok(vec![coerced_type; arg_types.len()]) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_smallest_doc()) + } +} +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_smallest_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder( + DOC_SECTION_CONDITIONAL, + "Returns the smallest value in a list of expressions. Returns _null_ if all expressions are _null_.", + "least(expression1[, ..., expression_n])") + .with_sql_example(r#"```sql +> select least(4, 7, 5); ++---------------------------+ +| least(4,7,5) | ++---------------------------+ +| 4 | ++---------------------------+ +```"#, + ) + .with_argument( + "expression1, expression_n", + "Expressions to compare and return the smallest value. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary." + ) + .build() + }) +} + +#[cfg(test)] +mod test { + use crate::core::least::LeastFunc; + use arrow::datatypes::DataType; + use datafusion_expr::ScalarUDFImpl; + + #[test] + fn test_least_return_types_without_common_supertype_in_arg_type() { + let least = LeastFunc::new(); + let return_type = least + .coerce_types(&[DataType::Decimal128(10, 3), DataType::Decimal128(10, 4)]) + .unwrap(); + assert_eq!( + return_type, + vec![DataType::Decimal128(11, 4), DataType::Decimal128(11, 4)] + ); + } +} diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs index bd8305cd56d8..ba8255d2e472 100644 --- a/datafusion/functions/src/core/mod.rs +++ b/datafusion/functions/src/core/mod.rs @@ -26,6 +26,8 @@ pub mod coalesce; pub mod expr_ext; pub mod getfield; pub mod greatest; +mod greatest_least_utils; +pub mod least; pub mod named_struct; pub mod nullif; pub mod nvl; @@ -45,6 +47,7 @@ make_udf_function!(named_struct::NamedStructFunc, named_struct); make_udf_function!(getfield::GetFieldFunc, get_field); make_udf_function!(coalesce::CoalesceFunc, coalesce); make_udf_function!(greatest::GreatestFunc, greatest); +make_udf_function!(least::LeastFunc, least); make_udf_function!(version::VersionFunc, version); pub mod expr_fn { @@ -86,6 +89,10 @@ pub mod expr_fn { greatest, "Returns `greatest(args...)`, which evaluates to the greatest value in the list of expressions or NULL if all the expressions are NULL", args, + ),( + least, + "Returns `least(args...)`, which evaluates to the smallest value in the list of expressions or NULL if all the expressions are NULL", + args, )); #[doc = "Returns the value of the field with the given name from the struct"] @@ -113,6 +120,7 @@ pub fn functions() -> Vec> { get_field(), coalesce(), greatest(), + least(), version(), r#struct(), ] diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt index 4b770a19fe20..4213de0235e4 100644 --- a/datafusion/sqllogictest/test_files/functions.slt +++ b/datafusion/sqllogictest/test_files/functions.slt @@ -955,3 +955,201 @@ Infinity statement ok drop table t1 + +# test for least +statement ok +CREATE TABLE t1 (a int, b int, c int) as VALUES +(4, NULL, NULL), +(1, 2, 3), +(3, 1, 2), +(1, NULL, -1), +(NULL, NULL, NULL), +(3, 0, -1); + +query I +SELECT least(a, b, c) FROM t1 +---- +4 +1 +1 +-1 +NULL +-1 + +statement ok +drop table t1 + +query I +SELECT least(1) +---- +1 + +query I +SELECT least(1, 2) +---- +1 + +query I +SELECT least(3, 1) +---- +1 + +query ? +SELECT least(NULL) +---- +NULL + +query I +SELECT least(1, NULL, -1) +---- +-1 + +query I +SELECT least((3), (0), (-1)); +---- +-1 + +query ? +SELECT least([4, 3], [4, 2], [4, 4]); +---- +[4, 2] + +query ? +SELECT least([2, 3], [1, 4], [5, 0]); +---- +[1, 4] + +query I +SELECT least(1::int, 2::text) +---- +1 + +query R +SELECT least(-1, 1, 2.3, 123456789, 3 + 5, -(-4)) +---- +-1 + +query R +SELECT least(-1.123, 1.21313, 2.3, 123456789.321, 3 + 5.3213, -(-4.3213), abs(-9)) +---- +-1.123 + +query R +SELECT least(-1, 1, 2.3, 123456789, 3 + 5, -(-4), abs(-9.0)) +---- +-1 + + +query error least does not support zero arguments +SELECT least() + +query I +SELECT least(4, 5, 7, 1, 2) +---- +1 + +query I +SELECT least(4, NULL, 7, 1, 2) +---- +1 + +query I +SELECT least(NULL, NULL, 7, NULL, 2) +---- +2 + +query I +SELECT least(NULL, NULL, NULL, NULL, 2) +---- +2 + +query I +SELECT least(2, NULL, NULL, NULL, NULL) +---- +2 + +query ? +SELECT least(NULL, NULL, NULL) +---- +NULL + +query I +SELECT least(2, '4') +---- +2 + +query T +SELECT least('foo', 'bar', 'foobar') +---- +bar + +query R +SELECT least(1, 1.2) +---- +1 + +statement ok +CREATE TABLE foo (a int) + +statement ok +INSERT INTO foo (a) VALUES (1) + +# Test homogenous functions that can't be constant folded. +query I +SELECT least(NULL, a, 5, NULL) FROM foo +---- +1 + +query I +SELECT least(NULL, NULL, NULL, a, -1) FROM foo +---- +-1 + +statement ok +drop table foo + +query R +select least(arrow_cast('NAN','Float64'), arrow_cast('NAN','Float64')) +---- +NaN + +query R +select least(arrow_cast('NAN','Float64'), arrow_cast('NAN','Float32')) +---- +NaN + +query R +select least(arrow_cast('NAN','Float64'), '+Inf'::Double) +---- +Infinity + +query R +select least(arrow_cast('NAN','Float64'), NULL) +---- +NaN + +query R +select least(NULL, '+Inf'::Double) +---- +Infinity + +query R +select least(NULL, '-Inf'::Double) +---- +-Infinity + +statement ok +CREATE TABLE t1 (a double, b double, c double) as VALUES +(1, arrow_cast('NAN', 'Float64'), '+Inf'::Double), +(NULL, arrow_cast('NAN','Float64'), '+Inf'::Double), +(1, '+Inf'::Double, NULL); + +query R +SELECT least(a, b, c) FROM t1 +---- +1 +Infinity +1 + +statement ok +drop table t1 diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 4e74cfc54ae5..2e4147f96e0f 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -549,6 +549,7 @@ trunc(numeric_expression[, decimal_places]) - [coalesce](#coalesce) - [greatest](#greatest) - [ifnull](#ifnull) +- [least](#least) - [nullif](#nullif) - [nvl](#nvl) - [nvl2](#nvl2) @@ -603,6 +604,29 @@ greatest(expression1[, ..., expression_n]) _Alias of [nvl](#nvl)._ +### `least` + +Returns the smallest value in a list of expressions. Returns _null_ if all expressions are _null_. + +``` +least(expression1[, ..., expression_n]) +``` + +#### Arguments + +- **expression1, expression_n**: Expressions to compare and return the smallest value. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary. + +#### Example + +```sql +> select least(4, 7, 5); ++---------------------------+ +| least(4,7,5) | ++---------------------------+ +| 4 | ++---------------------------+ +``` + ### `nullif` Returns _null_ if _expression1_ equals _expression2_; otherwise it returns _expression1_.