From a584b9edb478b612595ceb83ab414d7db415b32b Mon Sep 17 00:00:00 2001 From: kamille Date: Sun, 14 Jul 2024 12:30:58 +0800 Subject: [PATCH 1/9] return scalar result when all inputs are constants. --- datafusion/functions/src/core/map.rs | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/datafusion/functions/src/core/map.rs b/datafusion/functions/src/core/map.rs index 8a8a19d7af52..e076af6465a2 100644 --- a/datafusion/functions/src/core/map.rs +++ b/datafusion/functions/src/core/map.rs @@ -28,7 +28,15 @@ use datafusion_common::{exec_err, internal_err, ScalarValue}; use datafusion_common::{not_impl_err, Result}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +#[inline] +fn can_evaluate_to_const(args: &[ColumnarValue]) -> bool { + args.iter().all(|arg| matches!(arg, ColumnarValue::Scalar(_))) +} + + fn make_map(args: &[ColumnarValue]) -> Result { + let can_evaluate_to_const = can_evaluate_to_const(args); + let (key, value): (Vec<_>, Vec<_>) = args .chunks_exact(2) .map(|chunk| { @@ -58,7 +66,7 @@ fn make_map(args: &[ColumnarValue]) -> Result { Ok(value) => value, Err(e) => return internal_err!("Error concatenating values: {}", e), }; - make_map_batch_internal(key, value) + make_map_batch_internal(key, value, can_evaluate_to_const) } fn make_map_batch(args: &[ColumnarValue]) -> Result { @@ -68,9 +76,12 @@ fn make_map_batch(args: &[ColumnarValue]) -> Result { args.len() ); } + + let can_evaluate_to_const = can_evaluate_to_const(args); + let key = get_first_array_ref(&args[0])?; let value = get_first_array_ref(&args[1])?; - make_map_batch_internal(key, value) + make_map_batch_internal(key, value, can_evaluate_to_const) } fn get_first_array_ref(columnar_value: &ColumnarValue) -> Result { @@ -85,7 +96,7 @@ fn get_first_array_ref(columnar_value: &ColumnarValue) -> Result { } } -fn make_map_batch_internal(keys: ArrayRef, values: ArrayRef) -> Result { +fn make_map_batch_internal(keys: ArrayRef, values: ArrayRef, can_evaluate_to_const: bool) -> Result { if keys.null_count() > 0 { return exec_err!("map key cannot be null"); } @@ -124,8 +135,15 @@ fn make_map_batch_internal(keys: ArrayRef, values: ArrayRef) -> Result Date: Sun, 14 Jul 2024 12:55:12 +0800 Subject: [PATCH 2/9] support convert map array to scalar. --- datafusion/common/src/scalar/mod.rs | 5 ++++- datafusion/functions/src/core/map.rs | 21 +++++++++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 6c03e8698e80..c891e85aa59b 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -2678,7 +2678,10 @@ impl ScalarValue { DataType::Duration(TimeUnit::Nanosecond) => { typed_cast!(array, index, DurationNanosecondArray, DurationNanosecond)? } - + DataType::Map(_, _) => { + let a = array.slice(index, 1); + Self::Map(Arc::new(a.as_map().to_owned())) + } other => { return _not_impl_err!( "Can't create a scalar from array of type \"{other:?}\"" diff --git a/datafusion/functions/src/core/map.rs b/datafusion/functions/src/core/map.rs index e076af6465a2..51b8f7a40778 100644 --- a/datafusion/functions/src/core/map.rs +++ b/datafusion/functions/src/core/map.rs @@ -28,12 +28,19 @@ use datafusion_common::{exec_err, internal_err, ScalarValue}; use datafusion_common::{not_impl_err, Result}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +/// Check if we can evaluate the expr to constant directly. +/// +/// # Example +/// ```sql +/// SELECT make_map('type', 'test') from test +/// ``` +/// We can evaluate the result of `make_map` directly. #[inline] fn can_evaluate_to_const(args: &[ColumnarValue]) -> bool { - args.iter().all(|arg| matches!(arg, ColumnarValue::Scalar(_))) + args.iter() + .all(|arg| matches!(arg, ColumnarValue::Scalar(_))) } - fn make_map(args: &[ColumnarValue]) -> Result { let can_evaluate_to_const = can_evaluate_to_const(args); @@ -96,7 +103,11 @@ fn get_first_array_ref(columnar_value: &ColumnarValue) -> Result { } } -fn make_map_batch_internal(keys: ArrayRef, values: ArrayRef, can_evaluate_to_const: bool) -> Result { +fn make_map_batch_internal( + keys: ArrayRef, + values: ArrayRef, + can_evaluate_to_const: bool, +) -> Result { if keys.null_count() > 0 { return exec_err!("map key cannot be null"); } @@ -137,13 +148,11 @@ fn make_map_batch_internal(keys: ArrayRef, values: ArrayRef, can_evaluate_to_con .build()?; let map_array = Arc::new(MapArray::from(map_data)); - // If all inputs are constants(such as `make_map('type', 'test')`), - // the result should be scalar. Ok(if can_evaluate_to_const { ColumnarValue::Scalar(ScalarValue::try_from_array(map_array.as_ref(), 0)?) } else { ColumnarValue::Array(map_array) - }) + }) } #[derive(Debug)] From ae1f0f2669bf0d8cbce2f67c4d119e1e2f4110f0 Mon Sep 17 00:00:00 2001 From: kamille Date: Sun, 14 Jul 2024 13:18:15 +0800 Subject: [PATCH 3/9] disable the const evaluate for Map type before impl its hash calculation. --- .../simplify_expressions/expr_simplifier.rs | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 17855e17bef8..3812d75c992d 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -656,12 +656,32 @@ impl<'a> ConstEvaluator<'a> { } else { // Non-ListArray match ScalarValue::try_from_array(&a, 0) { - Ok(s) => ConstSimplifyResult::Simplified(s), + Ok(s) => { + // TODO: support the optimization for `Map` type after support impl hash for it + if matches!(&s, ScalarValue::Map(_)) { + ConstSimplifyResult::SimplifyRuntimeError(DataFusionError::Execution("const evaluate for Map type is still not supported".to_string()), expr) + } else { + ConstSimplifyResult::Simplified(s) + } + } Err(err) => ConstSimplifyResult::SimplifyRuntimeError(err, expr), } } } - ColumnarValue::Scalar(s) => ConstSimplifyResult::Simplified(s), + ColumnarValue::Scalar(s) => { + // TODO: support the optimization for `Map` type after support impl hash for it + if matches!(&s, ScalarValue::Map(_)) { + ConstSimplifyResult::SimplifyRuntimeError( + DataFusionError::Execution( + "const evaluate for Map type is still not supported" + .to_string(), + ), + expr, + ) + } else { + ConstSimplifyResult::Simplified(s) + } + } } } } From 0956bcd980040da695bd01626941a100c7b753d2 Mon Sep 17 00:00:00 2001 From: kamille Date: Sun, 14 Jul 2024 13:40:45 +0800 Subject: [PATCH 4/9] add tests in map.slt. --- datafusion/sqllogictest/test_files/map.slt | 92 ++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index abf5b2ebbf98..1e64bfc22872 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -212,3 +212,95 @@ SELECT map(column5, column6) FROM t; # {k1:1, k2:2} # {k3: 3} # {k5: 5} + +query error +SELECT map(column5, column6) FROM t; +# TODO: support array value +# ---- +# {k1:1, k2:2} +# {k3: 3} +# {k5: 5} + +query ? +SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', 30, 'OPTION', 29, 'GET', 27, 'PUT', 25, 'DELETE', 24) AS method_count from t; +---- +{POST: 41, HEAD: 33, PATCH: 30, OPTION: 29, GET: 27, PUT: 25, DELETE: 24} +{POST: 41, HEAD: 33, PATCH: 30, OPTION: 29, GET: 27, PUT: 25, DELETE: 24} +{POST: 41, HEAD: 33, PATCH: 30, OPTION: 29, GET: 27, PUT: 25, DELETE: 24} + +query I +SELECT MAKE_MAP('POST', 41, 'HEAD', 33)['POST'] from t; +---- +41 +41 +41 + +query ? +SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', null) from t; +---- +{POST: 41, HEAD: 33, PATCH: } +{POST: 41, HEAD: 33, PATCH: } +{POST: 41, HEAD: 33, PATCH: } + +query ? +SELECT MAKE_MAP('POST', null, 'HEAD', 33, 'PATCH', null) from t; +---- +{POST: , HEAD: 33, PATCH: } +{POST: , HEAD: 33, PATCH: } +{POST: , HEAD: 33, PATCH: } + +query ? +SELECT MAKE_MAP(1, null, 2, 33, 3, null) from t; +---- +{1: , 2: 33, 3: } +{1: , 2: 33, 3: } +{1: , 2: 33, 3: } + +query ? +SELECT MAKE_MAP([1,2], ['a', 'b'], [3,4], ['b']) from t; +---- +{[1, 2]: [a, b], [3, 4]: [b]} +{[1, 2]: [a, b], [3, 4]: [b]} +{[1, 2]: [a, b], [3, 4]: [b]} + +query ? +SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, 30]) from t; +---- +{POST: 41, HEAD: 33, PATCH: 30} +{POST: 41, HEAD: 33, PATCH: 30} +{POST: 41, HEAD: 33, PATCH: 30} + +query ? +SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]) from t; +---- +{POST: 41, HEAD: 33, PATCH: } +{POST: 41, HEAD: 33, PATCH: } +{POST: 41, HEAD: 33, PATCH: } + +query ? +SELECT MAP([[1,2], [3,4]], ['a', 'b']) from t; +---- +{[1, 2]: a, [3, 4]: b} +{[1, 2]: a, [3, 4]: b} +{[1, 2]: a, [3, 4]: b} + +query ? +SELECT MAP(make_array('POST', 'HEAD', 'PATCH'), make_array(41, 33, 30)) from t; +---- +{POST: 41, HEAD: 33, PATCH: 30} +{POST: 41, HEAD: 33, PATCH: 30} +{POST: 41, HEAD: 33, PATCH: 30} + +query ? +SELECT MAP(arrow_cast(make_array('POST', 'HEAD', 'PATCH'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array(41, 33, 30), 'FixedSizeList(3, Int64)')) from t; +---- +{POST: 41, HEAD: 33, PATCH: 30} +{POST: 41, HEAD: 33, PATCH: 30} +{POST: 41, HEAD: 33, PATCH: 30} + +query ? +SELECT MAP(arrow_cast(make_array('POST', 'HEAD', 'PATCH'), 'LargeList(Utf8)'), arrow_cast(make_array(41, 33, 30), 'LargeList(Int64)')) from t; +---- +{POST: 41, HEAD: 33, PATCH: 30} +{POST: 41, HEAD: 33, PATCH: 30} +{POST: 41, HEAD: 33, PATCH: 30} From 48314ae1445652c0d264a2b5e04d680ea11d0158 Mon Sep 17 00:00:00 2001 From: kamille Date: Sun, 14 Jul 2024 14:20:49 +0800 Subject: [PATCH 5/9] improve error return. --- .../src/simplify_expressions/expr_simplifier.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 3812d75c992d..93b3f636c756 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -29,6 +29,7 @@ use arrow::{ use datafusion_common::{ cast::{as_large_list_array, as_list_array}, + not_impl_err, tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter}, }; use datafusion_common::{internal_err, DFSchema, DataFusionError, Result, ScalarValue}; @@ -659,7 +660,10 @@ impl<'a> ConstEvaluator<'a> { Ok(s) => { // TODO: support the optimization for `Map` type after support impl hash for it if matches!(&s, ScalarValue::Map(_)) { - ConstSimplifyResult::SimplifyRuntimeError(DataFusionError::Execution("const evaluate for Map type is still not supported".to_string()), expr) + ConstSimplifyResult::SimplifyRuntimeError( + not_impl_err!("Const evaluate for Map type is still not supported"), + expr, + ) } else { ConstSimplifyResult::Simplified(s) } @@ -672,9 +676,8 @@ impl<'a> ConstEvaluator<'a> { // TODO: support the optimization for `Map` type after support impl hash for it if matches!(&s, ScalarValue::Map(_)) { ConstSimplifyResult::SimplifyRuntimeError( - DataFusionError::Execution( - "const evaluate for Map type is still not supported" - .to_string(), + not_impl_err!( + "Const evaluate for Map type is still not supported" ), expr, ) From 1217e963097e3d99a14f8480a31d0673908e0d8b Mon Sep 17 00:00:00 2001 From: kamille Date: Sun, 14 Jul 2024 16:10:48 +0800 Subject: [PATCH 6/9] fix error. --- .../optimizer/src/simplify_expressions/expr_simplifier.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 93b3f636c756..db4c0bf84a6f 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -661,7 +661,7 @@ impl<'a> ConstEvaluator<'a> { // TODO: support the optimization for `Map` type after support impl hash for it if matches!(&s, ScalarValue::Map(_)) { ConstSimplifyResult::SimplifyRuntimeError( - not_impl_err!("Const evaluate for Map type is still not supported"), + DataFusionError::NotImplemented("Const evaluate for Map type is still not supported".to_string()), expr, ) } else { @@ -676,8 +676,9 @@ impl<'a> ConstEvaluator<'a> { // TODO: support the optimization for `Map` type after support impl hash for it if matches!(&s, ScalarValue::Map(_)) { ConstSimplifyResult::SimplifyRuntimeError( - not_impl_err!( + DataFusionError::NotImplemented( "Const evaluate for Map type is still not supported" + .to_string(), ), expr, ) From 46e57ea9e743b527e9eb334c877ad89cd12ec3d9 Mon Sep 17 00:00:00 2001 From: kamille Date: Sun, 14 Jul 2024 16:13:37 +0800 Subject: [PATCH 7/9] fix remove unused import. --- datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index db4c0bf84a6f..8414f39f3060 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -29,7 +29,6 @@ use arrow::{ use datafusion_common::{ cast::{as_large_list_array, as_list_array}, - not_impl_err, tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter}, }; use datafusion_common::{internal_err, DFSchema, DataFusionError, Result, ScalarValue}; From 6c8c699713e9571fc9f9dfcf8a1aa7fcf7c07219 Mon Sep 17 00:00:00 2001 From: kamille Date: Sun, 14 Jul 2024 23:29:06 +0800 Subject: [PATCH 8/9] remove duplicated testcase. --- datafusion/sqllogictest/test_files/map.slt | 8 -------- 1 file changed, 8 deletions(-) diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 1e64bfc22872..fb8917a5f4fe 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -213,14 +213,6 @@ SELECT map(column5, column6) FROM t; # {k3: 3} # {k5: 5} -query error -SELECT map(column5, column6) FROM t; -# TODO: support array value -# ---- -# {k1:1, k2:2} -# {k3: 3} -# {k5: 5} - query ? SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', 30, 'OPTION', 29, 'GET', 27, 'PUT', 25, 'DELETE', 24) AS method_count from t; ---- From 3a6d37aedc5ef1fa328ad6c9e0deb83b89ea0a9b Mon Sep 17 00:00:00 2001 From: kamille Date: Mon, 15 Jul 2024 21:20:05 +0800 Subject: [PATCH 9/9] remove inline. --- datafusion/functions/src/core/map.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/functions/src/core/map.rs b/datafusion/functions/src/core/map.rs index 51b8f7a40778..6626831c8034 100644 --- a/datafusion/functions/src/core/map.rs +++ b/datafusion/functions/src/core/map.rs @@ -35,7 +35,6 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; /// SELECT make_map('type', 'test') from test /// ``` /// We can evaluate the result of `make_map` directly. -#[inline] fn can_evaluate_to_const(args: &[ColumnarValue]) -> bool { args.iter() .all(|arg| matches!(arg, ColumnarValue::Scalar(_)))