From 334d6ec50f36659403c96e1bffef4228be7c458e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Berkay=20=C5=9Eahin?= <124376117+berkaysynnada@users.noreply.github.com> Date: Fri, 10 Jan 2025 17:22:16 +0300 Subject: [PATCH 1/6] update (#14070) --- datafusion-cli/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index ec4a58fab346..8754612bfb11 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -66,7 +66,7 @@ url = "2.5.4" [dev-dependencies] assert_cmd = "2.0" -ctor = "0.2.0" +ctor = "0.2.9" predicates = "3.0" rstest = "0.22" From 4b7281e9c5ae42c7934ca9c9fce3aaadc5d2df48 Mon Sep 17 00:00:00 2001 From: niebayes Date: Sat, 11 Jan 2025 09:37:55 +0800 Subject: [PATCH 2/6] fix: make get_valid_types handle TypeSignature::Numeric correctly (#14060) * fix get_valid_types with TypeSignature::Numeric * fix sqllogictest --- .../expr/src/type_coercion/functions.rs | 65 +++++++++++++++++++ datafusion/sqllogictest/test_files/math.slt | 6 +- 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 5294cc526d38..650619e6de4c 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -544,6 +544,10 @@ fn get_valid_types( // and their default type is double precision if logical_data_type == NativeType::Null { valid_type = DataType::Float64; + } else if !logical_data_type.is_numeric() { + return plan_err!( + "The signature expected NativeType::Numeric but received {logical_data_type}" + ); } vec![vec![valid_type; *number]] @@ -997,6 +1001,67 @@ mod tests { } } + #[test] + fn test_get_valid_types_numeric() -> Result<()> { + let get_valid_types_flatten = + |signature: &TypeSignature, current_types: &[DataType]| { + get_valid_types(signature, current_types) + .unwrap() + .into_iter() + .flatten() + .collect::>() + }; + + // Trivial case. + let got = get_valid_types_flatten(&TypeSignature::Numeric(1), &[DataType::Int32]); + assert_eq!(got, [DataType::Int32]); + + // Args are coerced into a common numeric type. + let got = get_valid_types_flatten( + &TypeSignature::Numeric(2), + &[DataType::Int32, DataType::Int64], + ); + assert_eq!(got, [DataType::Int64, DataType::Int64]); + + // Args are coerced into a common numeric type, specifically, int would be coerced to float. + let got = get_valid_types_flatten( + &TypeSignature::Numeric(3), + &[DataType::Int32, DataType::Int64, DataType::Float64], + ); + assert_eq!( + got, + [DataType::Float64, DataType::Float64, DataType::Float64] + ); + + // Cannot coerce args to a common numeric type. + let got = get_valid_types( + &TypeSignature::Numeric(2), + &[DataType::Int32, DataType::Utf8], + ) + .unwrap_err(); + assert_contains!( + got.to_string(), + "The signature expected NativeType::Numeric but received NativeType::String" + ); + + // Fallbacks to float64 if the arg is of type null. + let got = get_valid_types_flatten(&TypeSignature::Numeric(1), &[DataType::Null]); + assert_eq!(got, [DataType::Float64]); + + // Rejects non-numeric arg. + let got = get_valid_types( + &TypeSignature::Numeric(1), + &[DataType::Timestamp(TimeUnit::Second, None)], + ) + .unwrap_err(); + assert_contains!( + got.to_string(), + "The signature expected NativeType::Numeric but received NativeType::Timestamp(Second, None)" + ); + + Ok(()) + } + #[test] fn test_get_valid_types_one_of() -> Result<()> { let signature = diff --git a/datafusion/sqllogictest/test_files/math.slt b/datafusion/sqllogictest/test_files/math.slt index e86d78a62353..37b5a378fc02 100644 --- a/datafusion/sqllogictest/test_files/math.slt +++ b/datafusion/sqllogictest/test_files/math.slt @@ -126,15 +126,15 @@ statement error SELECT abs(1, 2); # abs: unsupported argument type -query error This feature is not implemented: Unsupported data type Utf8 for function abs +query error DataFusion error: Error during planning: The signature expected NativeType::Numeric but received NativeType::String SELECT abs('foo'); # abs: numeric string # TODO: In Postgres, '-1.2' is unknown type and interpreted to float8 so they don't fail on this query -query error DataFusion error: This feature is not implemented: Unsupported data type Utf8 for function abs +query error DataFusion error: Error during planning: The signature expected NativeType::Numeric but received NativeType::String select abs('-1.2'); -query error DataFusion error: This feature is not implemented: Unsupported data type Utf8 for function abs +query error DataFusion error: Error during planning: The signature expected NativeType::Numeric but received NativeType::String select abs(arrow_cast('-1.2', 'Utf8')); statement ok From b8135139c97918c95f855dd9c865ebdc2525b600 Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Sat, 11 Jan 2025 11:07:57 +0800 Subject: [PATCH 3/6] Minor: Make `group_schema` as `PhysicalGroupBy` method (#14064) * group shema as method Signed-off-by: Jay Zhan * fmt Signed-off-by: Jay Zhan --------- Signed-off-by: Jay Zhan --- datafusion/physical-plan/src/aggregates/mod.rs | 8 ++++---- datafusion/physical-plan/src/aggregates/row_hash.rs | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 52fd6f90e595..cc8d6e74f4b9 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -250,6 +250,10 @@ impl PhysicalGroupBy { } } + pub fn group_schema(&self, schema: &Schema) -> Result { + Ok(Arc::new(Schema::new(self.group_fields(schema)?))) + } + /// Returns the fields that are used as the grouping keys. fn group_fields(&self, input_schema: &Schema) -> Result> { let mut fields = Vec::with_capacity(self.num_group_exprs()); @@ -924,10 +928,6 @@ fn create_schema( )) } -fn group_schema(input_schema: &Schema, group_by: &PhysicalGroupBy) -> Result { - Ok(Arc::new(Schema::new(group_by.group_fields(input_schema)?))) -} - /// Determines the lexical ordering requirement for an aggregate expression. /// /// # Parameters diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs index cdb3b2199cdc..cc95ce51c15b 100644 --- a/datafusion/physical-plan/src/aggregates/row_hash.rs +++ b/datafusion/physical-plan/src/aggregates/row_hash.rs @@ -24,8 +24,8 @@ use std::vec; use crate::aggregates::group_values::{new_group_values, GroupValues}; use crate::aggregates::order::GroupOrderingFull; use crate::aggregates::{ - create_schema, evaluate_group_by, evaluate_many, evaluate_optional, group_schema, - AggregateMode, PhysicalGroupBy, + create_schema, evaluate_group_by, evaluate_many, evaluate_optional, AggregateMode, + PhysicalGroupBy, }; use crate::metrics::{BaselineMetrics, MetricBuilder, RecordOutput}; use crate::sorts::sort::sort_batch; @@ -489,7 +489,7 @@ impl GroupedHashAggregateStream { .map(create_group_accumulator) .collect::>()?; - let group_schema = group_schema(&agg.input().schema(), &agg_group_by)?; + let group_schema = agg_group_by.group_schema(&agg.input().schema())?; // fix https://github.com/apache/datafusion/issues/13949 // Builds a **partial aggregation** schema by combining the group columns and From f1671bbcaa1ced5393fb0dda6b77fb66d0aa8a6c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 10 Jan 2025 22:09:03 -0500 Subject: [PATCH 4/6] Minor: Move `LimitPushdown` tests to be in the same file as the code (#14076) * Minor: move limit_pushdown tests to be with their pass * Fix clippy * cleaup use * fmt --- .../physical_optimizer/limit_pushdown.rs | 490 ------------------ .../core/tests/physical_optimizer/mod.rs | 1 - .../physical-optimizer/src/limit_pushdown.rs | 477 +++++++++++++++++ 3 files changed, 477 insertions(+), 491 deletions(-) delete mode 100644 datafusion/core/tests/physical_optimizer/limit_pushdown.rs diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs deleted file mode 100644 index 1b4c28d41d19..000000000000 --- a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs +++ /dev/null @@ -1,490 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use arrow_schema::{DataType, Field, Schema, SchemaRef, SortOptions}; -use datafusion_common::config::ConfigOptions; -use datafusion_execution::{SendableRecordBatchStream, TaskContext}; -use datafusion_expr::Operator; -use datafusion_physical_expr::expressions::BinaryExpr; -use datafusion_physical_expr::expressions::{col, lit}; -use datafusion_physical_expr::Partitioning; -use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; -use datafusion_physical_optimizer::limit_pushdown::LimitPushdown; -use datafusion_physical_optimizer::PhysicalOptimizerRule; -use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec; -use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; -use datafusion_physical_plan::empty::EmptyExec; -use datafusion_physical_plan::filter::FilterExec; -use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec}; -use datafusion_physical_plan::projection::ProjectionExec; -use datafusion_physical_plan::repartition::RepartitionExec; -use datafusion_physical_plan::sorts::sort::SortExec; -use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; -use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec}; -use datafusion_physical_plan::{get_plan_string, ExecutionPlan, ExecutionPlanProperties}; -use std::sync::Arc; - -#[derive(Debug)] -struct DummyStreamPartition { - schema: SchemaRef, -} -impl PartitionStream for DummyStreamPartition { - fn schema(&self) -> &SchemaRef { - &self.schema - } - fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { - unreachable!() - } -} - -#[test] -fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero( -) -> datafusion_common::Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(schema)?; - let global_limit = global_limit_exec(streaming_table, 0, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) -} - -#[test] -fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero( -) -> datafusion_common::Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(schema)?; - let global_limit = global_limit_exec(streaming_table, 2, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=2, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "GlobalLimitExec: skip=2, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) -} - -#[test] -fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit( -) -> datafusion_common::Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(schema.clone())?; - let repartition = repartition_exec(streaming_table)?; - let filter = filter_exec(schema, repartition)?; - let coalesce_batches = coalesce_batches_exec(filter); - let local_limit = local_limit_exec(coalesce_batches, 5); - let coalesce_partitions = coalesce_partitions_exec(local_limit); - let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " CoalescePartitionsExec", - " LocalLimitExec: fetch=5", - " CoalesceBatchesExec: target_batch_size=8192", - " FilterExec: c3@2 > 0", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "GlobalLimitExec: skip=0, fetch=5", - " CoalescePartitionsExec", - " CoalesceBatchesExec: target_batch_size=8192, fetch=5", - " FilterExec: c3@2 > 0", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) -} - -#[test] -fn pushes_global_limit_exec_through_projection_exec() -> datafusion_common::Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(schema.clone())?; - let filter = filter_exec(schema.clone(), streaming_table)?; - let projection = projection_exec(schema, filter)?; - let global_limit = global_limit_exec(projection, 0, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " FilterExec: c3@2 > 0", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " GlobalLimitExec: skip=0, fetch=5", - " FilterExec: c3@2 > 0", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) -} - -#[test] -fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version( -) -> datafusion_common::Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(schema.clone()).unwrap(); - let coalesce_batches = coalesce_batches_exec(streaming_table); - let projection = projection_exec(schema, coalesce_batches)?; - let global_limit = global_limit_exec(projection, 0, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " CoalesceBatchesExec: target_batch_size=8192", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " CoalesceBatchesExec: target_batch_size=8192, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) -} - -#[test] -fn pushes_global_limit_into_multiple_fetch_plans() -> datafusion_common::Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(schema.clone()).unwrap(); - let coalesce_batches = coalesce_batches_exec(streaming_table); - let projection = projection_exec(schema.clone(), coalesce_batches)?; - let repartition = repartition_exec(projection)?; - let sort = sort_exec( - vec![PhysicalSortExpr { - expr: col("c1", &schema)?, - options: SortOptions::default(), - }], - repartition, - ); - let spm = sort_preserving_merge_exec(sort.output_ordering().unwrap().to_vec(), sort); - let global_limit = global_limit_exec(spm, 0, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " SortPreservingMergeExec: [c1@0 ASC]", - " SortExec: expr=[c1@0 ASC], preserve_partitioning=[false]", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " CoalesceBatchesExec: target_batch_size=8192", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "SortPreservingMergeExec: [c1@0 ASC], fetch=5", - " SortExec: TopK(fetch=5), expr=[c1@0 ASC], preserve_partitioning=[false]", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " CoalesceBatchesExec: target_batch_size=8192", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) -} - -#[test] -fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions( -) -> datafusion_common::Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(schema.clone())?; - let repartition = repartition_exec(streaming_table)?; - let filter = filter_exec(schema, repartition)?; - let coalesce_partitions = coalesce_partitions_exec(filter); - let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " CoalescePartitionsExec", - " FilterExec: c3@2 > 0", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "GlobalLimitExec: skip=0, fetch=5", - " CoalescePartitionsExec", - " FilterExec: c3@2 > 0", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) -} - -#[test] -fn merges_local_limit_with_local_limit() -> datafusion_common::Result<()> { - let schema = create_schema(); - let empty_exec = empty_exec(schema); - let child_local_limit = local_limit_exec(empty_exec, 10); - let parent_local_limit = local_limit_exec(child_local_limit, 20); - - let initial = get_plan_string(&parent_local_limit); - let expected_initial = [ - "LocalLimitExec: fetch=20", - " LocalLimitExec: fetch=10", - " EmptyExec", - ]; - - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(parent_local_limit, &ConfigOptions::new())?; - - let expected = ["GlobalLimitExec: skip=0, fetch=10", " EmptyExec"]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) -} - -#[test] -fn merges_global_limit_with_global_limit() -> datafusion_common::Result<()> { - let schema = create_schema(); - let empty_exec = empty_exec(schema); - let child_global_limit = global_limit_exec(empty_exec, 10, Some(30)); - let parent_global_limit = global_limit_exec(child_global_limit, 10, Some(20)); - - let initial = get_plan_string(&parent_global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=10, fetch=20", - " GlobalLimitExec: skip=10, fetch=30", - " EmptyExec", - ]; - - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(parent_global_limit, &ConfigOptions::new())?; - - let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) -} - -#[test] -fn merges_global_limit_with_local_limit() -> datafusion_common::Result<()> { - let schema = create_schema(); - let empty_exec = empty_exec(schema); - let local_limit = local_limit_exec(empty_exec, 40); - let global_limit = global_limit_exec(local_limit, 20, Some(30)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=20, fetch=30", - " LocalLimitExec: fetch=40", - " EmptyExec", - ]; - - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) -} - -#[test] -fn merges_local_limit_with_global_limit() -> datafusion_common::Result<()> { - let schema = create_schema(); - let empty_exec = empty_exec(schema); - let global_limit = global_limit_exec(empty_exec, 20, Some(30)); - let local_limit = local_limit_exec(global_limit, 20); - - let initial = get_plan_string(&local_limit); - let expected_initial = [ - "LocalLimitExec: fetch=20", - " GlobalLimitExec: skip=20, fetch=30", - " EmptyExec", - ]; - - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(local_limit, &ConfigOptions::new())?; - - let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) -} - -fn create_schema() -> SchemaRef { - Arc::new(Schema::new(vec![ - Field::new("c1", DataType::Int32, true), - Field::new("c2", DataType::Int32, true), - Field::new("c3", DataType::Int32, true), - ])) -} - -fn streaming_table_exec( - schema: SchemaRef, -) -> datafusion_common::Result> { - Ok(Arc::new(StreamingTableExec::try_new( - schema.clone(), - vec![Arc::new(DummyStreamPartition { schema }) as _], - None, - None, - true, - None, - )?)) -} - -fn global_limit_exec( - input: Arc, - skip: usize, - fetch: Option, -) -> Arc { - Arc::new(GlobalLimitExec::new(input, skip, fetch)) -} - -fn local_limit_exec( - input: Arc, - fetch: usize, -) -> Arc { - Arc::new(LocalLimitExec::new(input, fetch)) -} - -fn sort_exec( - sort_exprs: impl IntoIterator, - input: Arc, -) -> Arc { - let sort_exprs = sort_exprs.into_iter().collect(); - Arc::new(SortExec::new(sort_exprs, input)) -} - -fn sort_preserving_merge_exec( - sort_exprs: impl IntoIterator, - input: Arc, -) -> Arc { - let sort_exprs = sort_exprs.into_iter().collect(); - Arc::new(SortPreservingMergeExec::new(sort_exprs, input)) -} - -fn projection_exec( - schema: SchemaRef, - input: Arc, -) -> datafusion_common::Result> { - Ok(Arc::new(ProjectionExec::try_new( - vec![ - (col("c1", schema.as_ref()).unwrap(), "c1".to_string()), - (col("c2", schema.as_ref()).unwrap(), "c2".to_string()), - (col("c3", schema.as_ref()).unwrap(), "c3".to_string()), - ], - input, - )?)) -} - -fn filter_exec( - schema: SchemaRef, - input: Arc, -) -> datafusion_common::Result> { - Ok(Arc::new(FilterExec::try_new( - Arc::new(BinaryExpr::new( - col("c3", schema.as_ref()).unwrap(), - Operator::Gt, - lit(0), - )), - input, - )?)) -} - -fn coalesce_batches_exec(input: Arc) -> Arc { - Arc::new(CoalesceBatchesExec::new(input, 8192)) -} - -fn coalesce_partitions_exec( - local_limit: Arc, -) -> Arc { - Arc::new(CoalescePartitionsExec::new(local_limit)) -} - -fn repartition_exec( - streaming_table: Arc, -) -> datafusion_common::Result> { - Ok(Arc::new(RepartitionExec::try_new( - streaming_table, - Partitioning::RoundRobinBatch(8), - )?)) -} - -fn empty_exec(schema: SchemaRef) -> Arc { - Arc::new(EmptyExec::new(schema)) -} diff --git a/datafusion/core/tests/physical_optimizer/mod.rs b/datafusion/core/tests/physical_optimizer/mod.rs index c06783aa0277..efe377891128 100644 --- a/datafusion/core/tests/physical_optimizer/mod.rs +++ b/datafusion/core/tests/physical_optimizer/mod.rs @@ -16,6 +16,5 @@ // under the License. mod combine_partial_final_agg; -mod limit_pushdown; mod limited_distinct_aggregation; mod test_util; diff --git a/datafusion/physical-optimizer/src/limit_pushdown.rs b/datafusion/physical-optimizer/src/limit_pushdown.rs index 8f392b683077..7a44b2e90dde 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown.rs @@ -339,3 +339,480 @@ fn add_global_limit( } // See tests in datafusion/core/tests/physical_optimizer + +#[cfg(test)] +mod test { + use super::*; + use arrow::compute::SortOptions; + use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; + use datafusion_common::config::ConfigOptions; + use datafusion_execution::{SendableRecordBatchStream, TaskContext}; + use datafusion_expr::Operator; + use datafusion_physical_expr::expressions::BinaryExpr; + use datafusion_physical_expr::expressions::{col, lit}; + use datafusion_physical_expr::{Partitioning, PhysicalSortExpr}; + use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec; + use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; + use datafusion_physical_plan::empty::EmptyExec; + use datafusion_physical_plan::filter::FilterExec; + use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec}; + use datafusion_physical_plan::projection::ProjectionExec; + use datafusion_physical_plan::repartition::RepartitionExec; + use datafusion_physical_plan::sorts::sort::SortExec; + use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; + use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec}; + use datafusion_physical_plan::{ + get_plan_string, ExecutionPlan, ExecutionPlanProperties, + }; + use std::sync::Arc; + + #[derive(Debug)] + struct DummyStreamPartition { + schema: SchemaRef, + } + impl PartitionStream for DummyStreamPartition { + fn schema(&self) -> &SchemaRef { + &self.schema + } + fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { + unreachable!() + } + } + + #[test] + fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero( + ) -> Result<()> { + let schema = create_schema(); + let streaming_table = streaming_table_exec(schema)?; + let global_limit = global_limit_exec(streaming_table, 0, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=0, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) + } + + #[test] + fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero( + ) -> Result<()> { + let schema = create_schema(); + let streaming_table = streaming_table_exec(schema)?; + let global_limit = global_limit_exec(streaming_table, 2, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=2, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "GlobalLimitExec: skip=2, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) + } + + #[test] + fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit( + ) -> Result<()> { + let schema = create_schema(); + let streaming_table = streaming_table_exec(Arc::clone(&schema))?; + let repartition = repartition_exec(streaming_table)?; + let filter = filter_exec(schema, repartition)?; + let coalesce_batches = coalesce_batches_exec(filter); + let local_limit = local_limit_exec(coalesce_batches, 5); + let coalesce_partitions = coalesce_partitions_exec(local_limit); + let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=0, fetch=5", + " CoalescePartitionsExec", + " LocalLimitExec: fetch=5", + " CoalesceBatchesExec: target_batch_size=8192", + " FilterExec: c3@2 > 0", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "GlobalLimitExec: skip=0, fetch=5", + " CoalescePartitionsExec", + " CoalesceBatchesExec: target_batch_size=8192, fetch=5", + " FilterExec: c3@2 > 0", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) + } + + #[test] + fn pushes_global_limit_exec_through_projection_exec() -> Result<()> { + let schema = create_schema(); + let streaming_table = streaming_table_exec(Arc::clone(&schema))?; + let filter = filter_exec(Arc::clone(&schema), streaming_table)?; + let projection = projection_exec(schema, filter)?; + let global_limit = global_limit_exec(projection, 0, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=0, fetch=5", + " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " FilterExec: c3@2 > 0", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " GlobalLimitExec: skip=0, fetch=5", + " FilterExec: c3@2 > 0", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) + } + + #[test] + fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version( + ) -> Result<()> { + let schema = create_schema(); + let streaming_table = streaming_table_exec(Arc::clone(&schema)).unwrap(); + let coalesce_batches = coalesce_batches_exec(streaming_table); + let projection = projection_exec(schema, coalesce_batches)?; + let global_limit = global_limit_exec(projection, 0, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=0, fetch=5", + " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " CoalesceBatchesExec: target_batch_size=8192", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " CoalesceBatchesExec: target_batch_size=8192, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) + } + + #[test] + fn pushes_global_limit_into_multiple_fetch_plans() -> Result<()> { + let schema = create_schema(); + let streaming_table = streaming_table_exec(Arc::clone(&schema)).unwrap(); + let coalesce_batches = coalesce_batches_exec(streaming_table); + let projection = projection_exec(Arc::clone(&schema), coalesce_batches)?; + let repartition = repartition_exec(projection)?; + let sort = sort_exec( + vec![PhysicalSortExpr { + expr: col("c1", &schema)?, + options: SortOptions::default(), + }], + repartition, + ); + let spm = + sort_preserving_merge_exec(sort.output_ordering().unwrap().to_vec(), sort); + let global_limit = global_limit_exec(spm, 0, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=0, fetch=5", + " SortPreservingMergeExec: [c1@0 ASC]", + " SortExec: expr=[c1@0 ASC], preserve_partitioning=[false]", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " CoalesceBatchesExec: target_batch_size=8192", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "SortPreservingMergeExec: [c1@0 ASC], fetch=5", + " SortExec: TopK(fetch=5), expr=[c1@0 ASC], preserve_partitioning=[false]", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " CoalesceBatchesExec: target_batch_size=8192", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) + } + + #[test] + fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions( + ) -> Result<()> { + let schema = create_schema(); + let streaming_table = streaming_table_exec(Arc::clone(&schema))?; + let repartition = repartition_exec(streaming_table)?; + let filter = filter_exec(schema, repartition)?; + let coalesce_partitions = coalesce_partitions_exec(filter); + let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=0, fetch=5", + " CoalescePartitionsExec", + " FilterExec: c3@2 > 0", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "GlobalLimitExec: skip=0, fetch=5", + " CoalescePartitionsExec", + " FilterExec: c3@2 > 0", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) + } + + #[test] + fn merges_local_limit_with_local_limit() -> Result<()> { + let schema = create_schema(); + let empty_exec = empty_exec(schema); + let child_local_limit = local_limit_exec(empty_exec, 10); + let parent_local_limit = local_limit_exec(child_local_limit, 20); + + let initial = get_plan_string(&parent_local_limit); + let expected_initial = [ + "LocalLimitExec: fetch=20", + " LocalLimitExec: fetch=10", + " EmptyExec", + ]; + + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(parent_local_limit, &ConfigOptions::new())?; + + let expected = ["GlobalLimitExec: skip=0, fetch=10", " EmptyExec"]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) + } + + #[test] + fn merges_global_limit_with_global_limit() -> Result<()> { + let schema = create_schema(); + let empty_exec = empty_exec(schema); + let child_global_limit = global_limit_exec(empty_exec, 10, Some(30)); + let parent_global_limit = global_limit_exec(child_global_limit, 10, Some(20)); + + let initial = get_plan_string(&parent_global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=10, fetch=20", + " GlobalLimitExec: skip=10, fetch=30", + " EmptyExec", + ]; + + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(parent_global_limit, &ConfigOptions::new())?; + + let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) + } + + #[test] + fn merges_global_limit_with_local_limit() -> Result<()> { + let schema = create_schema(); + let empty_exec = empty_exec(schema); + let local_limit = local_limit_exec(empty_exec, 40); + let global_limit = global_limit_exec(local_limit, 20, Some(30)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=20, fetch=30", + " LocalLimitExec: fetch=40", + " EmptyExec", + ]; + + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) + } + + #[test] + fn merges_local_limit_with_global_limit() -> Result<()> { + let schema = create_schema(); + let empty_exec = empty_exec(schema); + let global_limit = global_limit_exec(empty_exec, 20, Some(30)); + let local_limit = local_limit_exec(global_limit, 20); + + let initial = get_plan_string(&local_limit); + let expected_initial = [ + "LocalLimitExec: fetch=20", + " GlobalLimitExec: skip=20, fetch=30", + " EmptyExec", + ]; + + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(local_limit, &ConfigOptions::new())?; + + let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) + } + + fn create_schema() -> SchemaRef { + Arc::new(Schema::new(vec![ + Field::new("c1", DataType::Int32, true), + Field::new("c2", DataType::Int32, true), + Field::new("c3", DataType::Int32, true), + ])) + } + + fn streaming_table_exec(schema: SchemaRef) -> Result> { + Ok(Arc::new(StreamingTableExec::try_new( + Arc::clone(&schema), + vec![Arc::new(DummyStreamPartition { schema }) as _], + None, + None, + true, + None, + )?)) + } + + fn global_limit_exec( + input: Arc, + skip: usize, + fetch: Option, + ) -> Arc { + Arc::new(GlobalLimitExec::new(input, skip, fetch)) + } + + fn local_limit_exec( + input: Arc, + fetch: usize, + ) -> Arc { + Arc::new(LocalLimitExec::new(input, fetch)) + } + + fn sort_exec( + sort_exprs: impl IntoIterator, + input: Arc, + ) -> Arc { + let sort_exprs = sort_exprs.into_iter().collect(); + Arc::new(SortExec::new(sort_exprs, input)) + } + + fn sort_preserving_merge_exec( + sort_exprs: impl IntoIterator, + input: Arc, + ) -> Arc { + let sort_exprs = sort_exprs.into_iter().collect(); + Arc::new(SortPreservingMergeExec::new(sort_exprs, input)) + } + + fn projection_exec( + schema: SchemaRef, + input: Arc, + ) -> Result> { + Ok(Arc::new(ProjectionExec::try_new( + vec![ + (col("c1", schema.as_ref()).unwrap(), "c1".to_string()), + (col("c2", schema.as_ref()).unwrap(), "c2".to_string()), + (col("c3", schema.as_ref()).unwrap(), "c3".to_string()), + ], + input, + )?)) + } + + fn filter_exec( + schema: SchemaRef, + input: Arc, + ) -> Result> { + Ok(Arc::new(FilterExec::try_new( + Arc::new(BinaryExpr::new( + col("c3", schema.as_ref()).unwrap(), + Operator::Gt, + lit(0), + )), + input, + )?)) + } + + fn coalesce_batches_exec(input: Arc) -> Arc { + Arc::new(CoalesceBatchesExec::new(input, 8192)) + } + + fn coalesce_partitions_exec( + local_limit: Arc, + ) -> Arc { + Arc::new(CoalescePartitionsExec::new(local_limit)) + } + + fn repartition_exec( + streaming_table: Arc, + ) -> Result> { + Ok(Arc::new(RepartitionExec::try_new( + streaming_table, + Partitioning::RoundRobinBatch(8), + )?)) + } + + fn empty_exec(schema: SchemaRef) -> Arc { + Arc::new(EmptyExec::new(schema)) + } +} From d91a7c0f5b93bfb7061dcb6aa8b78dd31b7273b3 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 10 Jan 2025 22:09:39 -0500 Subject: [PATCH 5/6] Add comments to physical optimizer tests (#14075) --- .../tests/physical_optimizer/combine_partial_final_agg.rs | 4 ++++ .../physical_optimizer/limited_distinct_aggregation.rs | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs index 85076abdaf29..b8a96f0f5a22 100644 --- a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs +++ b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs @@ -15,6 +15,10 @@ // specific language governing permissions and limitations // under the License. +//! Tests for [`CombinePartialFinalAggregate`] physical optimizer rule +//! +//! Note these tests are not in the same module as the optimizer pass because +//! they rely on `ParquetExec` which is in the core crate. use std::sync::Arc; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs index 6910db6285a3..46a56fe1fb75 100644 --- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs +++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs @@ -15,8 +15,10 @@ // specific language governing permissions and limitations // under the License. -//! Tests for the limited distinct aggregation optimizer rule - +//! Tests for [`LimitedDistinctAggregation`] physical optimizer rule +//! +//! Note these tests are not in the same module as the optimizer pass because +//! they rely on `ParquetExec` which is in the core crate. use super::test_util::{parquet_exec_with_sort, schema, trim_plan_display}; use std::sync::Arc; From 17446ada1261bebf10f33646c98e29c22140d7be Mon Sep 17 00:00:00 2001 From: Jagdish Parihar Date: Fri, 10 Jan 2025 20:41:04 -0700 Subject: [PATCH 6/6] added "DEFAULT_CLI_FORMAT_OPTIONS" for cli and sqllogic test (#14052) * added "DEFAULT_CLI_FORMAT_OPTIONS" for cli and sqllotic test * cargo fmt fix * fixed few errors --- datafusion-cli/src/print_format.rs | 8 +- datafusion/common/src/format.rs | 4 + datafusion/functions-nested/src/map.rs | 2 +- .../engines/datafusion_engine/normalize.rs | 5 +- .../sqllogictest/test_files/aggregate.slt | 2 +- datafusion/sqllogictest/test_files/array.slt | 546 +++++++++--------- datafusion/sqllogictest/test_files/avro.slt | 4 +- datafusion/sqllogictest/test_files/expr.slt | 4 +- datafusion/sqllogictest/test_files/map.slt | 78 +-- .../sqllogictest/test_files/parquet.slt | 4 +- datafusion/sqllogictest/test_files/scalar.slt | 2 +- datafusion/sqllogictest/test_files/struct.slt | 36 +- datafusion/sqllogictest/test_files/unnest.slt | 38 +- .../source/user-guide/sql/scalar_functions.md | 2 +- 14 files changed, 370 insertions(+), 365 deletions(-) diff --git a/datafusion-cli/src/print_format.rs b/datafusion-cli/src/print_format.rs index 92cb106d622b..1fc949593512 100644 --- a/datafusion-cli/src/print_format.rs +++ b/datafusion-cli/src/print_format.rs @@ -26,7 +26,7 @@ use arrow::datatypes::SchemaRef; use arrow::json::{ArrayWriter, LineDelimitedWriter}; use arrow::record_batch::RecordBatch; use arrow::util::pretty::pretty_format_batches_with_options; -use datafusion::common::format::DEFAULT_FORMAT_OPTIONS; +use datafusion::common::format::DEFAULT_CLI_FORMAT_OPTIONS; use datafusion::error::Result; /// Allow records to be printed in different formats @@ -133,7 +133,7 @@ fn format_batches_with_maxrows( let formatted = pretty_format_batches_with_options( &filtered_batches, - &DEFAULT_FORMAT_OPTIONS, + &DEFAULT_CLI_FORMAT_OPTIONS, )?; if over_limit { let mut formatted_str = format!("{}", formatted); @@ -145,7 +145,7 @@ fn format_batches_with_maxrows( } MaxRows::Unlimited => { let formatted = - pretty_format_batches_with_options(batches, &DEFAULT_FORMAT_OPTIONS)?; + pretty_format_batches_with_options(batches, &DEFAULT_CLI_FORMAT_OPTIONS)?; writeln!(writer, "{}", formatted)?; } } @@ -201,7 +201,7 @@ impl PrintFormat { let empty_batch = RecordBatch::new_empty(schema); let formatted = pretty_format_batches_with_options( &[empty_batch], - &DEFAULT_FORMAT_OPTIONS, + &DEFAULT_CLI_FORMAT_OPTIONS, )?; writeln!(writer, "{}", formatted)?; } diff --git a/datafusion/common/src/format.rs b/datafusion/common/src/format.rs index 484a7f2388f5..23cfb72314a3 100644 --- a/datafusion/common/src/format.rs +++ b/datafusion/common/src/format.rs @@ -27,3 +27,7 @@ pub const DEFAULT_CAST_OPTIONS: CastOptions<'static> = CastOptions { safe: false, format_options: DEFAULT_FORMAT_OPTIONS, }; + +pub const DEFAULT_CLI_FORMAT_OPTIONS: FormatOptions<'static> = FormatOptions::new() + .with_duration_format(DurationFormat::Pretty) + .with_null("NULL"); diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index 0b098a30b758..d484cc834262 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -195,7 +195,7 @@ SELECT MAP('type', 'test'); SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); ---- -{POST: 41, HEAD: 33, PATCH: } +{POST: 41, HEAD: 33, PATCH: NULL} SELECT MAP([[1,2], [3,4]], ['a', 'b']); ---- diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs index ced497de22a7..5376d170d942 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs @@ -20,7 +20,7 @@ use arrow::array::{Array, AsArray}; use arrow::datatypes::Fields; use arrow::util::display::ArrayFormatter; use arrow::{array, array::ArrayRef, datatypes::DataType, record_batch::RecordBatch}; -use datafusion_common::format::DEFAULT_FORMAT_OPTIONS; +use datafusion_common::format::DEFAULT_CLI_FORMAT_OPTIONS; use datafusion_common::DataFusionError; use std::path::PathBuf; use std::sync::LazyLock; @@ -240,7 +240,8 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result { Ok(cell_to_string(dict.values(), key)?) } _ => { - let f = ArrayFormatter::try_new(col.as_ref(), &DEFAULT_FORMAT_OPTIONS); + let f = + ArrayFormatter::try_new(col.as_ref(), &DEFAULT_CLI_FORMAT_OPTIONS); Ok(f.unwrap().value(row).to_string()) } } diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 0aedd2ad9601..bd3b40089519 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -4236,7 +4236,7 @@ query T? SELECT tag, array_agg(millis - arrow_cast(secs, 'Timestamp(Millisecond, None)')) FROM t GROUP BY tag ORDER BY tag; ---- X [0 days 0 hours 0 mins 0.011 secs, 0 days 0 hours 0 mins 0.123 secs] -Y [, 0 days 0 hours 0 mins 0.432 secs] +Y [NULL, 0 days 0 hours 0 mins 0.432 secs] statement ok drop table t_source; diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 83f228b90115..57b1b0fc19d2 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -704,13 +704,13 @@ List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int6 query ??? select column1, column2, column3 from arrays; ---- -[[, 2], [3, ]] [1.1, 2.2, 3.3] [L, o, r, e, m] -[[3, 4], [5, 6]] [, 5.5, 6.6] [i, p, , u, m] -[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, , l, o, r] -[[7, ], [9, 10]] [10.1, , 12.2] [s, i, t] +[[NULL, 2], [3, NULL]] [1.1, 2.2, 3.3] [L, o, r, e, m] +[[3, 4], [5, 6]] [NULL, 5.5, 6.6] [i, p, NULL, u, m] +[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r] +[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t] NULL [13.3, 14.4, 15.5] [a, m, e, t] [[11, 12], [13, 14]] NULL [,] -[[15, 16], [, 18]] [16.6, 17.7, 18.8] NULL +[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL # nested_arrays table query ??I?? @@ -737,34 +737,34 @@ NULL 9 10 5.5 amet query ?IIT select column1, column2, column3, column4 from arrays_values; ---- -[, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 , -[11, 12, 13, 14, 15, 16, 17, 18, , 20] 12 2 . -[21, 22, 23, , 25, 26, 27, 28, 29, 30] 23 3 - -[31, 32, 33, 34, 35, , 37, 38, 39, 40] 34 4 ok +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 , +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] 12 2 . +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] 23 3 - +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] 34 4 ok NULL 44 5 @ [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 $ -[51, 52, , 54, 55, 56, 57, 58, 59, 60] 55 NULL ^ +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] 55 NULL ^ [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] 66 7 NULL # slices table query ?II select column1, column2, column3 from slices; ---- -[, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 -[11, 12, 13, 14, 15, 16, 17, 18, , 20] 2 -4 -[21, 22, 23, , 25, 26, 27, 28, 29, 30] 0 0 -[31, 32, 33, 34, 35, , 37, 38, 39, 40] -4 -7 +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] 2 -4 +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] 0 0 +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -4 -7 NULL 4 5 [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 -[51, 52, , 54, 55, 56, 57, 58, 59, 60] 5 NULL +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] 5 NULL query ??I? select column1, column2, column3, column4 from arrays_values_v2; ---- -[, 2, 3] [4, 5, ] 12 [[30, 40, 50]] -NULL [7, , 8] 13 [[, , 60]] -[9, , 10] NULL 14 [[70, , ]] -[, 1] [, 21] NULL NULL +[NULL, 2, 3] [4, 5, NULL] 12 [[30, 40, 50]] +NULL [7, NULL, 8] 13 [[NULL, NULL, 60]] +[9, NULL, 10] NULL 14 [[70, NULL, NULL]] +[NULL, 1] [NULL, 21] NULL NULL [11, 12] NULL NULL NULL NULL NULL NULL NULL @@ -829,13 +829,13 @@ select make_array(1, 2, 3)[1 + 2 - 1], make_array(1.0, 2.0, 3.0)[2 * 1 * 0 - 2], query ?RT select column1[2], column2[3], column3[1] from arrays; ---- -[3, ] 3.3 L +[3, NULL] 3.3 L [5, 6] 6.6 i [7, 8] 9.9 d [9, 10] 12.2 s NULL 15.5 a [13, 14] NULL , -[, 18] 18.8 NULL +[NULL, 18] 18.8 NULL # single index with columns #2 (zero index) query ?RT @@ -853,10 +853,10 @@ NULL NULL NULL query ?RT select column1[-2], column2[-3], column3[-1] from arrays; ---- -[, 2] 1.1 m +[NULL, 2] 1.1 m [3, 4] NULL m [5, 6] 7.7 r -[7, ] 10.1 t +[7, NULL] 10.1 t NULL 13.3 t [11, 12] NULL , [15, 16] 16.6 NULL @@ -865,13 +865,13 @@ NULL 13.3 t query ?RT select column1[9 - 7], column2[2 * 0], column3[1 - 3] from arrays; ---- -[3, ] NULL e +[3, NULL] NULL e [5, 6] NULL u [7, 8] NULL o [9, 10] NULL i NULL NULL e [13, 14] NULL NULL -[, 18] NULL NULL +[NULL, 18] NULL NULL # TODO: support index as column # single index with columns #5 (index as column) @@ -936,32 +936,32 @@ select arrow_cast([1, 2, 3], 'LargeList(Int64)')[1]; query ??? select column1[2:4], column2[1:4], column3[3:4] from arrays; ---- -[[3, ]] [1.1, 2.2, 3.3] [r, e] -[[5, 6]] [, 5.5, 6.6] [, u] +[[3, NULL]] [1.1, 2.2, 3.3] [r, e] +[[5, 6]] [NULL, 5.5, 6.6] [NULL, u] [[7, 8]] [7.7, 8.8, 9.9] [l, o] -[[9, 10]] [10.1, , 12.2] [t] +[[9, 10]] [10.1, NULL, 12.2] [t] [] [13.3, 14.4, 15.5] [e, t] [[13, 14]] [] [] -[[, 18]] [16.6, 17.7, 18.8] [] +[[NULL, 18]] [16.6, 17.7, 18.8] [] # multiple index with columns #2 (zero index) query ??? select column1[0:5], column2[0:3], column3[0:9] from arrays; ---- -[[, 2], [3, ]] [1.1, 2.2, 3.3] [L, o, r, e, m] -[[3, 4], [5, 6]] [, 5.5, 6.6] [i, p, , u, m] -[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, , l, o, r] -[[7, ], [9, 10]] [10.1, , 12.2] [s, i, t] +[[NULL, 2], [3, NULL]] [1.1, 2.2, 3.3] [L, o, r, e, m] +[[3, 4], [5, 6]] [NULL, 5.5, 6.6] [i, p, NULL, u, m] +[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r] +[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t] [] [13.3, 14.4, 15.5] [a, m, e, t] [[11, 12], [13, 14]] [] [,] -[[15, 16], [, 18]] [16.6, 17.7, 18.8] [] +[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] [] # TODO: support negative index # multiple index with columns #3 (negative index) # query ?RT # select column1[-2:-4], column2[-3:-5], column3[-1:-4] from arrays; # ---- -# [, 2] 1.1 m +# [NULL, 2] 1.1 m # TODO: support complex index # multiple index with columns #4 (complex index) @@ -1012,22 +1012,22 @@ select make_array(1, 2, 3)[0:0:2], make_array(1.0, 2.0, 3.0)[0:2:2], make_array( query ??? select column1[2:4:2], column2[1:4:2], column3[3:4:2] from arrays; ---- -[[3, ]] [1.1, 3.3] [r] -[[5, 6]] [, 6.6] [] +[[3, NULL]] [1.1, 3.3] [r] +[[5, 6]] [NULL, 6.6] [NULL] [[7, 8]] [7.7, 9.9] [l] [[9, 10]] [10.1, 12.2] [t] [] [13.3, 15.5] [e] [[13, 14]] [] [] -[[, 18]] [16.6, 18.8] [] +[[NULL, 18]] [16.6, 18.8] [] # multiple index with columns #2 (zero index) query ??? select column1[0:5:2], column2[0:3:2], column3[0:9:2] from arrays; ---- -[[, 2]] [1.1, 3.3] [L, r, m] -[[3, 4]] [, 6.6] [i, , m] +[[NULL, 2]] [1.1, 3.3] [L, r, m] +[[3, 4]] [NULL, 6.6] [i, NULL, m] [[5, 6]] [7.7, 9.9] [d, l, r] -[[7, ]] [10.1, 12.2] [s, t] +[[7, NULL]] [10.1, 12.2] [s, t] [] [13.3, 15.5] [a, e] [[11, 12]] [] [,] [[15, 16]] [16.6, 18.8] [] @@ -1090,19 +1090,19 @@ select make_list(1, 2, 3), make_list(1.0, 2.0, 3.0), make_list('h', 'e', 'l', 'l query ??? select make_array(1, NULL, 3), make_array(NULL, 2.0, NULL), make_array('h', NULL, 'l', NULL, 'o'); ---- -[1, , 3] [, 2.0, ] [h, , l, , o] +[1, NULL, 3] [NULL, 2.0, NULL] [h, NULL, l, NULL, o] # make_array scalar function with nulls #2 query ?? select make_array(1, 2, NULL), make_array(make_array(NULL, 2), make_array(NULL, 3)); ---- -[1, 2, ] [[, 2], [, 3]] +[1, 2, NULL] [[NULL, 2], [NULL, 3]] # make_array scalar function with nulls #3 query ??? select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NULL, NULL), make_array(NULL, NULL)); ---- -[] [, , ] [[, ], [, ]] +[NULL] [NULL, NULL, NULL] [[NULL, NULL], [NULL, NULL]] # make_array with 1 columns query ??? @@ -1112,11 +1112,11 @@ select make_array(a), make_array(d), make_array(e) from values; [2] [2.2] [ipsum] [3] [3.3] [dolor] [4] [4.4] [sit] -[] [5.5] [amet] +[NULL] [5.5] [amet] [5] [6.6] [,] [6] [7.7] [consectetur] -[7] [] [adipiscing] -[8] [8.8] [] +[7] [NULL] [adipiscing] +[8] [8.8] [NULL] # make_array with 2 columns #1 query ?? @@ -1125,12 +1125,12 @@ select make_array(b, c), make_array(e, f) from values; [1, 2] [Lorem, A] [3, 4] [ipsum, ] [5, 6] [dolor, BB] -[7, 8] [sit, ] +[7, 8] [sit, NULL] [9, 10] [amet, CCC] -[, 12] [,, DD] -[11, ] [consectetur, E] +[NULL, 12] [,, DD] +[11, NULL] [consectetur, E] [13, 14] [adipiscing, F] -[15, 16] [, ] +[15, 16] [NULL, ] # make_array with 4 columns query ? @@ -1140,10 +1140,10 @@ select make_array(a, b, c, d) from values; [2.0, 3.0, 4.0, 2.2] [3.0, 5.0, 6.0, 3.3] [4.0, 7.0, 8.0, 4.4] -[, 9.0, 10.0, 5.5] -[5.0, , 12.0, 6.6] -[6.0, 11.0, , 7.7] -[7.0, 13.0, 14.0, ] +[NULL, 9.0, 10.0, 5.5] +[5.0, NULL, 12.0, 6.6] +[6.0, 11.0, NULL, 7.7] +[7.0, 13.0, 14.0, NULL] [8.0, 15.0, 16.0, 8.8] # make_array with column of list @@ -1485,17 +1485,17 @@ select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, query ?? select array_pop_back(make_array(1, 2, 3, 4, NULL)), array_pop_back(make_array(NULL, 'e', 'l', NULL, 'o')); ---- -[1, 2, 3, 4] [, e, l, ] +[1, 2, 3, 4] [NULL, e, l, NULL] query ?? select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'LargeList(Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'LargeList(Utf8)')); ---- -[1, 2, 3, 4] [, e, l, ] +[1, 2, 3, 4] [NULL, e, l, NULL] query ?? select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'FixedSizeList(5, Utf8)')); ---- -[1, 2, 3, 4] [, e, l, ] +[1, 2, 3, 4] [NULL, e, l, NULL] # array_pop_back scalar function #5 (array_pop_back the nested arrays) query ? @@ -1533,17 +1533,17 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9 query ? select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4))); ---- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], ] +[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] query ? select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'LargeList(List(Int64))')); ---- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], ] +[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] query ? select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'FixedSizeList(5, List(Int64))')); ---- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], ] +[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] # array_pop_back scalar function #8 (after array_pop_back, nested array is empty) query ? @@ -1567,40 +1567,40 @@ select array_pop_back(column1) from arrayspop; ---- [1, 2] [3, 4, 5] -[6, 7, 8, ] -[, ] +[6, 7, 8, NULL] +[NULL, NULL] [] -[, 10, 11] +[NULL, 10, 11] query ? select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from arrayspop; ---- [1, 2] [3, 4, 5] -[6, 7, 8, ] -[, ] +[6, 7, 8, NULL] +[NULL, NULL] [] -[, 10, 11] +[NULL, 10, 11] query ? select array_pop_back(column1) from large_arrayspop; ---- [1, 2] [3, 4, 5] -[6, 7, 8, ] -[, ] +[6, 7, 8, NULL] +[NULL, NULL] [] -[, 10, 11] +[NULL, 10, 11] query ? select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from large_arrayspop; ---- [1, 2] [3, 4, 5] -[6, 7, 8, ] -[, ] +[6, 7, 8, NULL] +[NULL, NULL] [] -[, 10, 11] +[NULL, 10, 11] ## array_pop_front (aliases: `list_pop_front`) @@ -1968,7 +1968,7 @@ select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, query ? select array_slice(column1, column2, column3) from slices; ---- -[] +[NULL] [12, 13, 14, 15, 16, 17] [] [] @@ -1979,7 +1979,7 @@ select array_slice(column1, column2, column3) from slices; query ? select array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from slices; ---- -[] +[NULL] [12, 13, 14, 15, 16, 17] [] [] @@ -1992,24 +1992,24 @@ select array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) fr query ??? select array_slice(make_array(1, 2, 3, 4, 5), column2, column3), array_slice(column1, 3, column3), array_slice(column1, column2, 5) from slices; ---- -[1] [] [, 2, 3, 4, 5] +[1] [] [NULL, 2, 3, 4, 5] [2] [13, 14, 15, 16, 17] [12, 13, 14, 15] -[] [] [21, 22, 23, , 25] +[] [] [21, 22, 23, NULL, 25] [] [33, 34] [] [4, 5] [] [] [1, 2, 3, 4, 5] [43, 44, 45, 46] [41, 42, 43, 44, 45] -[5] [, 54, 55, 56, 57, 58, 59, 60] [55] +[5] [NULL, 54, 55, 56, 57, 58, 59, 60] [55] query ??? select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2, column3), array_slice(arrow_cast(column1, 'LargeList(Int64)'), 3, column3), array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, 5) from slices; ---- -[1] [] [, 2, 3, 4, 5] +[1] [] [NULL, 2, 3, 4, 5] [2] [13, 14, 15, 16, 17] [12, 13, 14, 15] -[] [] [21, 22, 23, , 25] +[] [] [21, 22, 23, NULL, 25] [] [33, 34] [] [4, 5] [] [] [1, 2, 3, 4, 5] [43, 44, 45, 46] [41, 42, 43, 44, 45] -[5] [, 54, 55, 56, 57, 58, 59, 60] [55] +[5] [NULL, 54, 55, 56, 57, 58, 59, 60] [55] # Test issue: https://github.com/apache/datafusion/issues/10425 # `from` may be larger than `to` and `stride` is positive @@ -2058,17 +2058,17 @@ select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'FixedSizeLis query ? select array_any_value(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10))); ---- -[, 1, 2, 3, 4, 5] +[NULL, 1, 2, 3, 4, 5] query ? select array_any_value(arrow_cast(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10)), 'LargeList(List(Int64))')); ---- -[, 1, 2, 3, 4, 5] +[NULL, 1, 2, 3, 4, 5] query ? select array_any_value(arrow_cast(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10)), 'FixedSizeList(3, List(Int64))')); ---- -[, 1, 2, 3, 4, 5] +[NULL, 1, 2, 3, 4, 5] # array_any_value scalar function #3 (using function alias `list_any_value`) query IT @@ -2164,49 +2164,49 @@ select make_array(make_array('a','b'), null), make_array([1,2,3], null, make_array(4,5,6,7)), make_array(null, 1, null, 2, null, 3, null, null, 4, 5); ---- -[[a, b], ] [[a, b], , [c, d]] [, [a, b], ] [, [a, b], , , [c, d]] [[a, bc, def], , [rust]] [[1, 2, 3], , [4, 5, 6, 7]] [, 1, , 2, , 3, , , 4, 5] +[[a, b], NULL] [[a, b], NULL, [c, d]] [NULL, [a, b], NULL] [NULL, [a, b], NULL, NULL, [c, d]] [[a, bc, def], NULL, [rust]] [[1, 2, 3], NULL, [4, 5, 6, 7]] [NULL, 1, NULL, 2, NULL, 3, NULL, NULL, 4, 5] query ? select make_array(column5, null, column5) from arrays_values_without_nulls; ---- -[[2, 3], , [2, 3]] -[[4, 5], , [4, 5]] -[[6, 7], , [6, 7]] -[[8, 9], , [8, 9]] +[[2, 3], NULL, [2, 3]] +[[4, 5], NULL, [4, 5]] +[[6, 7], NULL, [6, 7]] +[[8, 9], NULL, [8, 9]] query ? select make_array(['a','b'], null); ---- -[[a, b], ] +[[a, b], NULL] ## array_sort (aliases: `list_sort`) query ??? select array_sort(make_array(1, 3, null, 5, NULL, -5)), array_sort(make_array(1, 3, null, 2), 'ASC'), array_sort(make_array(1, 3, null, 2), 'desc', 'NULLS FIRST'); ---- -[, , -5, 1, 3, 5] [, 1, 2, 3] [, 3, 2, 1] +[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] query ? select array_sort(column1, 'DESC', 'NULLS LAST') from arrays_values; ---- -[10, 9, 8, 7, 6, 5, 4, 3, 2, ] -[20, 18, 17, 16, 15, 14, 13, 12, 11, ] -[30, 29, 28, 27, 26, 25, 23, 22, 21, ] -[40, 39, 38, 37, 35, 34, 33, 32, 31, ] +[10, 9, 8, 7, 6, 5, 4, 3, 2, NULL] +[20, 18, 17, 16, 15, 14, 13, 12, 11, NULL] +[30, 29, 28, 27, 26, 25, 23, 22, 21, NULL] +[40, 39, 38, 37, 35, 34, 33, 32, 31, NULL] NULL [50, 49, 48, 47, 46, 45, 44, 43, 42, 41] -[60, 59, 58, 57, 56, 55, 54, 52, 51, ] +[60, 59, 58, 57, 56, 55, 54, 52, 51, NULL] [70, 69, 68, 67, 66, 65, 64, 63, 62, 61] query ? select array_sort(column1, 'ASC', 'NULLS FIRST') from arrays_values; ---- -[, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[, 11, 12, 13, 14, 15, 16, 17, 18, 20] -[, 21, 22, 23, 25, 26, 27, 28, 29, 30] -[, 31, 32, 33, 34, 35, 37, 38, 39, 40] +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[NULL, 11, 12, 13, 14, 15, 16, 17, 18, 20] +[NULL, 21, 22, 23, 25, 26, 27, 28, 29, 30] +[NULL, 31, 32, 33, 34, 35, 37, 38, 39, 40] NULL [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[, 51, 52, 54, 55, 56, 57, 58, 59, 60] +[NULL, 51, 52, 54, 55, 56, 57, 58, 59, 60] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] # test with empty array @@ -2233,7 +2233,7 @@ drop table t1; query ??? select list_sort(make_array(1, 3, null, 5, NULL, -5)), list_sort(make_array(1, 3, null, 2), 'ASC'), list_sort(make_array(1, 3, null, 2), 'desc', 'NULLS FIRST'); ---- -[, , -5, 1, 3, 5] [, 1, 2, 3] [, 3, 2, 1] +[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] ## array_append (aliases: `list_append`, `array_push_back`, `list_push_back`) @@ -2257,7 +2257,7 @@ select array_append(make_array(null, null), 1) ; ---- -[4] [] [1, , 3, 4] [, , 1] +[4] [NULL] [1, NULL, 3, 4] [NULL, NULL, 1] query ???? select @@ -2267,7 +2267,7 @@ select array_append(arrow_cast(make_array(null, null), 'LargeList(Int64)'), 1) ; ---- -[4] [] [1, , 3, 4] [, , 1] +[4] [NULL] [1, NULL, 3, 4] [NULL, NULL, 1] query ?? select @@ -2275,7 +2275,7 @@ select array_append(arrow_cast(make_array(null, null), 'FixedSizeList(2, Int64)'), 1) ; ---- -[1, , 3, 4] [, , 1] +[1, NULL, 3, 4] [NULL, NULL, 1] # test invalid (non-null) query error @@ -2292,21 +2292,21 @@ select array_append(make_array(make_array(1, null, 3)), make_array(null)), array_append(make_array(make_array(1, null, 3)), null); ---- -[[1, , 3], []] [[1, , 3], ] +[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] query ?? select array_append(arrow_cast(make_array(make_array(1, null, 3)), 'LargeList(LargeList(Int64))'), arrow_cast(make_array(null), 'LargeList(Int64)')), array_append(arrow_cast(make_array(make_array(1, null, 3)), 'LargeList(LargeList(Int64))'), null); ---- -[[1, , 3], []] [[1, , 3], ] +[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] query ?? select array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), [null]), array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), null); ---- -[[1, , 3], []] [[1, , 3], ] +[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] # array_append scalar function #3 query ??? @@ -2377,37 +2377,37 @@ select list_push_back(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), l query ? select array_append(column1, column2) from arrays_values; ---- -[, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] -[11, 12, 13, 14, 15, 16, 17, 18, , 20, 12] -[21, 22, 23, , 25, 26, 27, 28, 29, 30, 23] -[31, 32, 33, 34, 35, , 37, 38, 39, 40, 34] +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] [44] -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, ] -[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] query ? select array_append(column1, column2) from large_arrays_values; ---- -[, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] -[11, 12, 13, 14, 15, 16, 17, 18, , 20, 12] -[21, 22, 23, , 25, 26, 27, 28, 29, 30, 23] -[31, 32, 33, 34, 35, , 37, 38, 39, 40, 34] +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] [44] -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, ] -[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] query ? select array_append(column1, column2) from fixed_arrays_values; ---- -[, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] -[11, 12, 13, 14, 15, 16, 17, 18, , 20, 12] -[21, 22, 23, , 25, 26, 27, 28, 29, 30, 23] -[31, 32, 33, 34, 35, , 37, 38, 39, 40, 34] -[, , , , , , , , , , 44] -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, ] -[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55] +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] +[NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 44] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] # array_append with columns #2 (element is list) @@ -2434,9 +2434,9 @@ query ?? select array_append(column2, 100.1), array_append(column3, '.') from arrays; ---- [1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] -[, 5.5, 6.6, 100.1] [i, p, , u, m, .] -[7.7, 8.8, 9.9, 100.1] [d, , l, o, r, .] -[10.1, , 12.2, 100.1] [s, i, t, .] +[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] +[10.1, NULL, 12.2, 100.1] [s, i, t, .] [13.3, 14.4, 15.5, 100.1] [a, m, e, t, .] [100.1] [,, .] [16.6, 17.7, 18.8, 100.1] [.] @@ -2445,9 +2445,9 @@ query ?? select array_append(column2, 100.1), array_append(column3, '.') from large_arrays; ---- [1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] -[, 5.5, 6.6, 100.1] [i, p, , u, m, .] -[7.7, 8.8, 9.9, 100.1] [d, , l, o, r, .] -[10.1, , 12.2, 100.1] [s, i, t, .] +[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] +[10.1, NULL, 12.2, 100.1] [s, i, t, .] [13.3, 14.4, 15.5, 100.1] [a, m, e, t, .] [100.1] [,, .] [16.6, 17.7, 18.8, 100.1] [.] @@ -2456,12 +2456,12 @@ query ?? select array_append(column2, 100.1), array_append(column3, '.') from fixed_size_arrays; ---- [1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] -[, 5.5, 6.6, 100.1] [i, p, , u, m, .] -[7.7, 8.8, 9.9, 100.1] [d, , l, o, r, .] -[10.1, , 12.2, 100.1] [s, i, t, a, b, .] +[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] +[10.1, NULL, 12.2, 100.1] [s, i, t, a, b, .] [13.3, 14.4, 15.5, 100.1] [a, m, e, t, x, .] -[, , , 100.1] [,, a, b, c, d, .] -[16.6, 17.7, 18.8, 100.1] [, , , , , .] +[NULL, NULL, NULL, 100.1] [,, a, b, c, d, .] +[16.6, 17.7, 18.8, 100.1] [NULL, NULL, NULL, NULL, NULL, .] # array_append with columns and scalars #2 query ?? @@ -2500,24 +2500,24 @@ select array_prepend(4, []); query ? select array_prepend(4, [null]); ---- -[4, ] +[4, NULL] # DuckDB: [null] # ClickHouse: [null] query ? select array_prepend(null, []); ---- -[] +[NULL] query ? select array_prepend(null, [1]); ---- -[, 1] +[NULL, 1] query ? select array_prepend(null, [[1,2,3]]); ---- -[, [1, 2, 3]] +[NULL, [1, 2, 3]] # DuckDB: [[]] # ClickHouse: [[]] @@ -2534,7 +2534,7 @@ select array_prepend(null, null); query ? select array_append([], null); ---- -[] +[NULL] # array_prepend scalar function #3 @@ -2610,37 +2610,37 @@ select list_push_front(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), query ? select array_prepend(column2, column1) from arrays_values; ---- -[1, , 2, 3, 4, 5, 6, 7, 8, 9, 10] -[12, 11, 12, 13, 14, 15, 16, 17, 18, , 20] -[23, 21, 22, 23, , 25, 26, 27, 28, 29, 30] -[34, 31, 32, 33, 34, 35, , 37, 38, 39, 40] +[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] +[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] [44] -[, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60] +[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] [66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] query ? select array_prepend(column2, column1) from large_arrays_values; ---- -[1, , 2, 3, 4, 5, 6, 7, 8, 9, 10] -[12, 11, 12, 13, 14, 15, 16, 17, 18, , 20] -[23, 21, 22, 23, , 25, 26, 27, 28, 29, 30] -[34, 31, 32, 33, 34, 35, , 37, 38, 39, 40] +[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] +[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] [44] -[, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60] +[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] [66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] query ? select array_prepend(column2, column1) from fixed_arrays_values; ---- -[1, , 2, 3, 4, 5, 6, 7, 8, 9, 10] -[12, 11, 12, 13, 14, 15, 16, 17, 18, , 20] -[23, 21, 22, 23, , 25, 26, 27, 28, 29, 30] -[34, 31, 32, 33, 34, 35, , 37, 38, 39, 40] -[44, , , , , , , , , , ] -[, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60] +[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] +[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] +[44, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] +[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] [66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] # array_prepend with columns #2 (element is list) @@ -2667,9 +2667,9 @@ query ?? select array_prepend(100.1, column2), array_prepend('.', column3) from arrays; ---- [100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] -[100.1, , 5.5, 6.6] [., i, p, , u, m] -[100.1, 7.7, 8.8, 9.9] [., d, , l, o, r] -[100.1, 10.1, , 12.2] [., s, i, t] +[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] +[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] +[100.1, 10.1, NULL, 12.2] [., s, i, t] [100.1, 13.3, 14.4, 15.5] [., a, m, e, t] [100.1] [., ,] [100.1, 16.6, 17.7, 18.8] [.] @@ -2678,9 +2678,9 @@ query ?? select array_prepend(100.1, column2), array_prepend('.', column3) from large_arrays; ---- [100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] -[100.1, , 5.5, 6.6] [., i, p, , u, m] -[100.1, 7.7, 8.8, 9.9] [., d, , l, o, r] -[100.1, 10.1, , 12.2] [., s, i, t] +[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] +[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] +[100.1, 10.1, NULL, 12.2] [., s, i, t] [100.1, 13.3, 14.4, 15.5] [., a, m, e, t] [100.1] [., ,] [100.1, 16.6, 17.7, 18.8] [.] @@ -2689,12 +2689,12 @@ query ?? select array_prepend(100.1, column2), array_prepend('.', column3) from fixed_size_arrays; ---- [100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] -[100.1, , 5.5, 6.6] [., i, p, , u, m] -[100.1, 7.7, 8.8, 9.9] [., d, , l, o, r] -[100.1, 10.1, , 12.2] [., s, i, t, a, b] +[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] +[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] +[100.1, 10.1, NULL, 12.2] [., s, i, t, a, b] [100.1, 13.3, 14.4, 15.5] [., a, m, e, t, x] -[100.1, , , ] [., ,, a, b, c, d] -[100.1, 16.6, 17.7, 18.8] [., , , , , ] +[100.1, NULL, NULL, NULL] [., ,, a, b, c, d] +[100.1, 16.6, 17.7, 18.8] [., NULL, NULL, NULL, NULL, NULL] # array_prepend with columns and scalars #2 (element is list) query ?? @@ -2729,7 +2729,7 @@ select list_repeat('rust', 4), list_repeat(null, 0); ---- -[1, 1, 1, 1, 1] [3.14, 3.14, 3.14] [l, l, l, l] [, ] [-1, -1, -1, -1, -1] [] [rust, rust, rust, rust] [] +[1, 1, 1, 1, 1] [3.14, 3.14, 3.14] [l, l, l, l] [NULL, NULL] [-1, -1, -1, -1, -1] [] [rust, rust, rust, rust] [] # array_repeat scalar function #2 (element as list) query ???? @@ -2739,7 +2739,7 @@ select array_repeat([null, null], 3), array_repeat([[1, 2], [3, 4]], 2); ---- -[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[, ], [, ], [, ]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] +[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[NULL, NULL], [NULL, NULL], [NULL, NULL]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] query ???? select @@ -2748,7 +2748,7 @@ select array_repeat(arrow_cast([null, null], 'LargeList(Int64)'), 3), array_repeat(arrow_cast([[1, 2], [3, 4]], 'LargeList(List(Int64))'), 2); ---- -[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[, ], [, ], [, ]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] +[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[NULL, NULL], [NULL, NULL], [NULL, NULL]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] # array_repeat with columns #1 @@ -2781,7 +2781,7 @@ select from array_repeat_table; ---- [1] [1.1] [a] [[4, 5, 6]] [1, 1, 1] [[1]] -[, ] [, ] [, ] [, ] [, , ] [[1], [1]] +[NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL, NULL] [[1], [1]] [2, 2, 2] [2.2, 2.2, 2.2] [rust, rust, rust] [[7], [7], [7]] [2, 2, 2] [[1], [1], [1]] [] [] [] [] [3, 3, 3] [] @@ -2796,7 +2796,7 @@ select from large_array_repeat_table; ---- [1] [1.1] [a] [[4, 5, 6]] [1, 1, 1] [[1]] -[, ] [, ] [, ] [, ] [, , ] [[1], [1]] +[NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL, NULL] [[1], [1]] [2, 2, 2] [2.2, 2.2, 2.2] [rust, rust, rust] [[7], [7], [7]] [2, 2, 2] [[1], [1], [1]] [] [] [] [] [3, 3, 3] [] @@ -2962,7 +2962,7 @@ select array_concat(make_array(column2), make_array(0)) from arrays_values; [23, 0] [34, 0] [44, 0] -[, 0] +[NULL, 0] [55, 0] [66, 0] @@ -2970,33 +2970,33 @@ select array_concat(make_array(column2), make_array(0)) from arrays_values; query ??? select array_concat(column1, column1), array_concat(column2, column2), array_concat(column3, column3) from arrays; ---- -[[, 2], [3, ], [, 2], [3, ]] [1.1, 2.2, 3.3, 1.1, 2.2, 3.3] [L, o, r, e, m, L, o, r, e, m] -[[3, 4], [5, 6], [3, 4], [5, 6]] [, 5.5, 6.6, , 5.5, 6.6] [i, p, , u, m, i, p, , u, m] -[[5, 6], [7, 8], [5, 6], [7, 8]] [7.7, 8.8, 9.9, 7.7, 8.8, 9.9] [d, , l, o, r, d, , l, o, r] -[[7, ], [9, 10], [7, ], [9, 10]] [10.1, , 12.2, 10.1, , 12.2] [s, i, t, s, i, t] +[[NULL, 2], [3, NULL], [NULL, 2], [3, NULL]] [1.1, 2.2, 3.3, 1.1, 2.2, 3.3] [L, o, r, e, m, L, o, r, e, m] +[[3, 4], [5, 6], [3, 4], [5, 6]] [NULL, 5.5, 6.6, NULL, 5.5, 6.6] [i, p, NULL, u, m, i, p, NULL, u, m] +[[5, 6], [7, 8], [5, 6], [7, 8]] [7.7, 8.8, 9.9, 7.7, 8.8, 9.9] [d, NULL, l, o, r, d, NULL, l, o, r] +[[7, NULL], [9, 10], [7, NULL], [9, 10]] [10.1, NULL, 12.2, 10.1, NULL, 12.2] [s, i, t, s, i, t] NULL [13.3, 14.4, 15.5, 13.3, 14.4, 15.5] [a, m, e, t, a, m, e, t] [[11, 12], [13, 14], [11, 12], [13, 14]] NULL [,, ,] -[[15, 16], [, 18], [15, 16], [, 18]] [16.6, 17.7, 18.8, 16.6, 17.7, 18.8] NULL +[[15, 16], [NULL, 18], [15, 16], [NULL, 18]] [16.6, 17.7, 18.8, 16.6, 17.7, 18.8] NULL # array_concat column-wise #6 query ?? select array_concat(column1, make_array(make_array(1, 2), make_array(3, 4))), array_concat(column2, make_array(1.1, 2.2, 3.3)) from arrays; ---- -[[, 2], [3, ], [1, 2], [3, 4]] [1.1, 2.2, 3.3, 1.1, 2.2, 3.3] -[[3, 4], [5, 6], [1, 2], [3, 4]] [, 5.5, 6.6, 1.1, 2.2, 3.3] +[[NULL, 2], [3, NULL], [1, 2], [3, 4]] [1.1, 2.2, 3.3, 1.1, 2.2, 3.3] +[[3, 4], [5, 6], [1, 2], [3, 4]] [NULL, 5.5, 6.6, 1.1, 2.2, 3.3] [[5, 6], [7, 8], [1, 2], [3, 4]] [7.7, 8.8, 9.9, 1.1, 2.2, 3.3] -[[7, ], [9, 10], [1, 2], [3, 4]] [10.1, , 12.2, 1.1, 2.2, 3.3] +[[7, NULL], [9, 10], [1, 2], [3, 4]] [10.1, NULL, 12.2, 1.1, 2.2, 3.3] [[1, 2], [3, 4]] [13.3, 14.4, 15.5, 1.1, 2.2, 3.3] [[11, 12], [13, 14], [1, 2], [3, 4]] [1.1, 2.2, 3.3] -[[15, 16], [, 18], [1, 2], [3, 4]] [16.6, 17.7, 18.8, 1.1, 2.2, 3.3] +[[15, 16], [NULL, 18], [1, 2], [3, 4]] [16.6, 17.7, 18.8, 1.1, 2.2, 3.3] # array_concat column-wise #7 query ? select array_concat(column3, make_array('.', '.', '.')) from arrays; ---- [L, o, r, e, m, ., ., .] -[i, p, , u, m, ., ., .] -[d, , l, o, r, ., ., .] +[i, p, NULL, u, m, ., ., .] +[d, NULL, l, o, r, ., ., .] [s, i, t, ., ., .] [a, m, e, t, ., ., .] [,, ., ., .] @@ -3005,10 +3005,10 @@ select array_concat(column3, make_array('.', '.', '.')) from arrays; # query ??I? # select column1, column2, column3, column4 from arrays_values_v2; # ---- -# [, 2, 3] [4, 5, ] 12 [[30, 40, 50]] -# NULL [7, , 8] 13 [[, , 60]] -# [9, , 10] NULL 14 [[70, , ]] -# [, 1] [, 21] NULL NULL +# [NULL, 2, 3] [4, 5, NULL] 12 [[30, 40, 50]] +# NULL [7, NULL, 8] 13 [[NULL, NULL, 60]] +# [9, NULL, 10] NULL 14 [[70, NULL, NULL]] +# [NULL, 1] [NULL, 21] NULL NULL # [11, 12] NULL NULL NULL # NULL NULL NULL NULL @@ -3017,10 +3017,10 @@ select array_concat(column3, make_array('.', '.', '.')) from arrays; query ? select array_concat(column1, column2) from arrays_values_v2; ---- -[, 2, 3, 4, 5, ] -[7, , 8] -[9, , 10] -[, 1, , 21] +[NULL, 2, 3, 4, 5, NULL] +[7, NULL, 8] +[9, NULL, 10] +[NULL, 1, NULL, 21] [11, 12] NULL @@ -3029,11 +3029,11 @@ query ? select array_concat(column4, make_array(column3)) from arrays_values_v2; ---- [[30, 40, 50], [12]] -[[, , 60], [13]] -[[70, , ], [14]] -[[]] -[[]] -[[]] +[[NULL, NULL, 60], [13]] +[[70, NULL, NULL], [14]] +[[NULL]] +[[NULL]] +[[NULL]] # array_concat column-wise #10 (3D + 2D + 1D) query ? @@ -3046,23 +3046,23 @@ select array_concat(column4, column1, column2) from nested_arrays; query ? select array_concat(column4, column1) from arrays_values_v2; ---- -[[30, 40, 50], [, 2, 3]] -[[, , 60], ] -[[70, , ], [9, , 10]] -[[, 1]] +[[30, 40, 50], [NULL, 2, 3]] +[[NULL, NULL, 60], NULL] +[[70, NULL, NULL], [9, NULL, 10]] +[[NULL, 1]] [[11, 12]] -[] +[NULL] # array_concat column-wise #12 (1D + 1D + 1D) query ? select array_concat(make_array(column3), column1, column2) from arrays_values_v2; ---- -[12, , 2, 3, 4, 5, ] -[13, 7, , 8] -[14, 9, , 10] -[, , 1, , 21] -[, 11, 12] -[] +[12, NULL, 2, 3, 4, 5, NULL] +[13, 7, NULL, 8] +[14, 9, NULL, 10] +[NULL, NULL, 1, NULL, 21] +[NULL, 11, 12] +[NULL] ## array_position (aliases: `list_position`, `array_indexof`, `list_indexof`) @@ -3908,10 +3908,10 @@ create table t as values query ?III? select column1, column2, column3, column4, array_replace_n(column1, column2, column3, column4) from t; ---- -[3, 1, , 3] 3 4 2 [4, 1, , 4] -[3, 1, , 3] NULL 5 2 [3, 1, 5, 3] +[3, 1, NULL, 3] 3 4 2 [4, 1, NULL, 4] +[3, 1, NULL, 3] NULL 5 2 [3, 1, 5, 3] NULL 3 2 1 NULL -[3, 1, 3] 3 NULL 1 [, 1, 3] +[3, 1, 3] 3 NULL 1 [NULL, 1, 3] @@ -3981,7 +3981,7 @@ GROUP BY column1 ORDER BY column1; ---- 1 [foo, foo] foo,foo -2 [] (empty) +2 [NULL] (empty) 3 [bar] bar NULL [baz] baz @@ -4100,23 +4100,23 @@ select array_union(arrow_cast([], 'LargeList(Int64)'), arrow_cast([], 'LargeList query ? select array_union([[null]], []); ---- -[[]] +[[NULL]] query ? select array_union(arrow_cast([[null]], 'LargeList(List(Int64))'), arrow_cast([], 'LargeList(Int64)')); ---- -[[]] +[[NULL]] # array_union scalar function #8 query ? select array_union([null], [null]); ---- -[] +[NULL] query ? select array_union(arrow_cast([[null]], 'LargeList(List(Int64))'), arrow_cast([[null]], 'LargeList(List(Int64))')); ---- -[[]] +[[NULL]] # array_union scalar function #9 query ? @@ -4240,13 +4240,13 @@ h,-,-,-,o nil-2-nil-4-5 1|0|3 # For reference # select column1, column4 from arrays_values; # ---- -# [, 2, 3, 4, 5, 6, 7, 8, 9, 10] , -# [11, 12, 13, 14, 15, 16, 17, 18, , 20] . -# [21, 22, 23, , 25, 26, 27, 28, 29, 30] - -# [31, 32, 33, 34, 35, , 37, 38, 39, 40] ok +# [NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] , +# [11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] . +# [21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] - +# [31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] ok # NULL @ # [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] $ -# [51, 52, , 54, 55, 56, 57, 58, 59, 60] ^ +# [51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] ^ # [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] NULL query T @@ -4435,7 +4435,7 @@ select array_remove(make_array(1.1, null, 2.2, 3.3), 1.1), array_remove(make_array('a', null, 'bc'), 'a'); ---- -[1, , 3] [, 2.2, 3.3] [, bc] +[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] query ??? select @@ -4443,7 +4443,7 @@ select array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'LargeList(Float64)'), 1.1), array_remove(arrow_cast(make_array('a', null, 'bc'), 'LargeList(Utf8)'), 'a'); ---- -[1, , 3] [, 2.2, 3.3] [, bc] +[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] query ??? select @@ -4451,7 +4451,7 @@ select array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'FixedSizeList(4, Float64)'), 1.1), array_remove(arrow_cast(make_array('a', null, 'bc'), 'FixedSizeList(3, Utf8)'), 'a'); ---- -[1, , 3] [, 2.2, 3.3] [, bc] +[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] #TODO: https://github.com/apache/datafusion/issues/7142 # follow PostgreSQL behavior @@ -4466,21 +4466,21 @@ select array_remove(make_array(1, null, 2), null), array_remove(make_array(1, null, 2, null), null); ---- -[1, 2] [1, 2, ] +[1, 2] [1, 2, NULL] query ?? select array_remove(arrow_cast(make_array(1, null, 2), 'LargeList(Int64)'), null), array_remove(arrow_cast(make_array(1, null, 2, null), 'LargeList(Int64)'), null); ---- -[1, 2] [1, 2, ] +[1, 2] [1, 2, NULL] query ?? select array_remove(arrow_cast(make_array(1, null, 2), 'FixedSizeList(3, Int64)'), null), array_remove(arrow_cast(make_array(1, null, 2, null), 'FixedSizeList(4, Int64)'), null); ---- -[1, 2] [1, 2, ] +[1, 2] [1, 2, NULL] # array_remove scalar function #2 (element is list) query ?? @@ -5718,7 +5718,7 @@ from array_distinct_table_2D; ---- [[1, 2], [3, 4], [5, 6]] [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] -[, [5, 6]] +[NULL, [5, 6]] query ? select array_distinct(column1) @@ -5750,7 +5750,7 @@ from array_distinct_table_2D_fixed; ---- [[1, 2], [3, 4], [5, 6]] [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] -[, [5, 6]] +[NULL, [5, 6]] query ??? select array_intersect(column1, column2), @@ -6645,19 +6645,19 @@ select make_array(1, 2.0) query ? select make_array(null, 1.0) ---- -[, 1.0] +[NULL, 1.0] # make_array scalar function #3 query ? select make_array(1, 2.0, null, 3) ---- -[1.0, 2.0, , 3.0] +[1.0, 2.0, NULL, 3.0] # make_array scalar function #4 query ? select make_array(1.0, '2', null) ---- -[1.0, 2.0, ] +[1.0, 2.0, NULL] ### FixedSizeListArray @@ -6716,10 +6716,10 @@ List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int6 query ? select array_concat(column1, [7]) from arrays_values_v2; ---- -[, 2, 3, 7] +[NULL, 2, 3, 7] [7] -[9, , 10, 7] -[, 1, 7] +[9, NULL, 10, 7] +[NULL, 1, 7] [11, 12, 7] [7] @@ -6738,21 +6738,21 @@ select flatten(make_array(1, 2, 1, 3, 2)), flatten(make_array([1], [2, 3], [null], make_array(4, null, 5))), flatten(make_array([[1.1]], [[2.2]], [[3.3], [4.4]])); ---- -[1, 2, 1, 3, 2] [1, 2, 3, , 4, , 5] [1.1, 2.2, 3.3, 4.4] +[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [1.1, 2.2, 3.3, 4.4] query ??? select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'LargeList(Int64)')), flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'LargeList(LargeList(Int64))')), flatten(arrow_cast(make_array([[1.1]], [[2.2]], [[3.3], [4.4]]), 'LargeList(LargeList(LargeList(Float64)))')); ---- -[1, 2, 1, 3, 2] [1, 2, 3, , 4, , 5] [1.1, 2.2, 3.3, 4.4] +[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [1.1, 2.2, 3.3, 4.4] query ??? select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'FixedSizeList(5, Int64)')), flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, List(Int64))')), flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'FixedSizeList(2, List(List(Float64)))')); ---- -[1, 2, 1, 3, 2] [1, 2, 3, , 4, , 5] [1.1, 2.2, 3.3, 4.4] +[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [1.1, 2.2, 3.3, 4.4] # flatten with column values query ???? @@ -6963,7 +6963,7 @@ SELECT string_to_array('abc', NULL) query ? SELECT string_to_array('abc def', ' ', 'def') ---- -[abc, ] +[abc, NULL] query ? select string_to_array(e, ',') from values; @@ -7013,7 +7013,7 @@ NULL query ? select string_to_array(arrow_cast(e, 'LargeUtf8'), ',', arrow_cast('Lorem', 'LargeUtf8')) from values; ---- -[] +[NULL] [ipsum] [dolor] [sit] @@ -7079,12 +7079,12 @@ select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 1); query ? select array_resize(make_array(1, 2, 3), 5); ---- -[1, 2, 3, , ] +[1, 2, 3, NULL, NULL] query ? select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 5); ---- -[1, 2, 3, , ] +[1, 2, 3, NULL, NULL] # array_resize scalar function #3 query ? @@ -7116,25 +7116,25 @@ select array_resize(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), query ? select array_resize(column1, column2, column3) from arrays_values; ---- -[] -[11, 12, 13, 14, 15, 16, 17, 18, , 20, 2, 2] -[21, 22, 23, , 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] -[31, 32, 33, 34, 35, , 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] +[NULL] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 2, 2] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] NULL [] -[51, 52, , 54, 55, 56, 57, 58, 59, 60, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] query ? select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from arrays_values; ---- -[] -[11, 12, 13, 14, 15, 16, 17, 18, , 20, 2, 2] -[21, 22, 23, , 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] -[31, 32, 33, 34, 35, , 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] +[NULL] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 2, 2] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] NULL [] -[51, 52, , 54, 55, 56, 57, 58, 59, 60, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] # array_resize scalar function #5 @@ -7171,26 +7171,26 @@ AS VALUES query ? select array_resize(column1, column2, column3) from array_resize_values; ---- -[1, ] -[11, 12, , 14, 15] -[21, 22, 23, 24, , 26, 27, 28] -[31, 32, 33, 34, 35, 36, , 38, 39, 40, 4, 4] +[1, NULL] +[11, 12, NULL, 14, 15] +[21, 22, 23, 24, NULL, 26, 27, 28] +[31, 32, 33, 34, 35, 36, NULL, 38, 39, 40, 4, 4] NULL [] -[51, 52, 53, 54, 55, , 57, 58, 59, 60, , , ] +[51, 52, 53, 54, 55, NULL, 57, 58, 59, 60, NULL, NULL, NULL] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7] # array_resize columnar test #2 query ? select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from array_resize_values; ---- -[1, ] -[11, 12, , 14, 15] -[21, 22, 23, 24, , 26, 27, 28] -[31, 32, 33, 34, 35, 36, , 38, 39, 40, 4, 4] +[1, NULL] +[11, 12, NULL, 14, 15] +[21, 22, 23, 24, NULL, 26, 27, 28] +[31, 32, 33, 34, 35, 36, NULL, 38, 39, 40, 4, 4] NULL [] -[51, 52, 53, 54, 55, , 57, 58, 59, 60, , , ] +[51, 52, 53, 54, 55, NULL, 57, 58, 59, 60, NULL, NULL, NULL] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7] ## array_reverse @@ -7218,13 +7218,13 @@ NULL [] query ?? select array_reverse(column1), column1 from arrays_values; ---- -[10, 9, 8, 7, 6, 5, 4, 3, 2, ] [, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[20, , 18, 17, 16, 15, 14, 13, 12, 11] [11, 12, 13, 14, 15, 16, 17, 18, , 20] -[30, 29, 28, 27, 26, 25, , 23, 22, 21] [21, 22, 23, , 25, 26, 27, 28, 29, 30] -[40, 39, 38, 37, , 35, 34, 33, 32, 31] [31, 32, 33, 34, 35, , 37, 38, 39, 40] +[10, 9, 8, 7, 6, 5, 4, 3, 2, NULL] [NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[20, NULL, 18, 17, 16, 15, 14, 13, 12, 11] [11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] +[30, 29, 28, 27, 26, 25, NULL, 23, 22, 21] [21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] +[40, 39, 38, 37, NULL, 35, 34, 33, 32, 31] [31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] NULL NULL [50, 49, 48, 47, 46, 45, 44, 43, 42, 41] [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[60, 59, 58, 57, 56, 55, 54, , 52, 51] [51, 52, , 54, 55, 56, 57, 58, 59, 60] +[60, 59, 58, 57, 56, 55, 54, NULL, 52, 51] [51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] [70, 69, 68, 67, 66, 65, 64, 63, 62, 61] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] diff --git a/datafusion/sqllogictest/test_files/avro.slt b/datafusion/sqllogictest/test_files/avro.slt index 8282331f995e..d6323b88f159 100644 --- a/datafusion/sqllogictest/test_files/avro.slt +++ b/datafusion/sqllogictest/test_files/avro.slt @@ -219,8 +219,8 @@ SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_multi_files query ???? SELECT f1, f2, f3, f4 FROM nested_records ---- -{f1_1: aaa, f1_2: 10, f1_3: {f1_3_1: 3.14}} [{f2_1: true, f2_2: 1.2}, {f2_1: true, f2_2: 2.2}] {f3_1: xyz} [{f4_1: 200}, ] -{f1_1: bbb, f1_2: 20, f1_3: {f1_3_1: 3.14}} [{f2_1: false, f2_2: 10.2}] NULL [, {f4_1: 300}] +{f1_1: aaa, f1_2: 10, f1_3: {f1_3_1: 3.14}} [{f2_1: true, f2_2: 1.2}, {f2_1: true, f2_2: 2.2}] {f3_1: xyz} [{f4_1: 200}, NULL] +{f1_1: bbb, f1_2: 20, f1_3: {f1_3_1: 3.14}} [{f2_1: false, f2_2: 10.2}] NULL [NULL, {f4_1: 300}] # test avro enum query TTT diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt index 1647d61bf750..06e2f4154ddd 100644 --- a/datafusion/sqllogictest/test_files/expr.slt +++ b/datafusion/sqllogictest/test_files/expr.slt @@ -107,7 +107,7 @@ SELECT STRUCT(true, false), STRUCT('str1', 'str2') ---- -{c0: 1, c1: 2, c2: 3, c3: 4, c4: 5} {c0: } {c0: 2} {c0: 1, c1: } {c0: true, c1: false} {c0: str1, c1: str2} +{c0: 1, c1: 2, c2: 3, c3: 4, c4: 5} {c0: NULL} {c0: 2} {c0: 1, c1: NULL} {c0: true, c1: false} {c0: str1, c1: str2} # test binary_bitwise_shift query IIII @@ -1783,7 +1783,7 @@ select struct(time,load1,load2,host) from t1; {c0: 2018-05-22T19:53:26, c1: 1.1, c2: 101.0, c3: host1} {c0: 2018-05-22T19:53:26, c1: 2.2, c2: 202.0, c3: host2} {c0: 2018-05-22T19:53:26, c1: 3.3, c2: 303.0, c3: host3} -{c0: 2018-05-22T19:53:26, c1: 1.1, c2: 101.0, c3: } +{c0: 2018-05-22T19:53:26, c1: 1.1, c2: 101.0, c3: NULL} # can have an aggregate function with an inner coalesce query TR diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 28fc2f4b0b80..706a98ee8c52 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -130,17 +130,17 @@ SELECT MAKE_MAP('POST', 41, 'HEAD', 33)['POST']; query ? SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', null); ---- -{POST: 41, HEAD: 33, PATCH: } +{POST: 41, HEAD: 33, PATCH: NULL} query ? SELECT MAKE_MAP('POST', null, 'HEAD', 33, 'PATCH', null); ---- -{POST: , HEAD: 33, PATCH: } +{POST: NULL, HEAD: 33, PATCH: NULL} query ? SELECT MAKE_MAP(1, null, 2, 33, 3, null); ---- -{1: , 2: 33, 3: } +{1: NULL, 2: 33, 3: NULL} query ? SELECT MAKE_MAP([1,2], ['a', 'b'], [3,4], ['b']); @@ -175,7 +175,7 @@ SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, 30]); query ? SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); ---- -{POST: 41, HEAD: 33, PATCH: } +{POST: 41, HEAD: 33, PATCH: NULL} query ? SELECT MAP([[1,2], [3,4]], ['a', 'b']); @@ -230,7 +230,7 @@ SELECT map(column5, column6) FROM duplicate_keys_table; # key is a nested type query error DataFusion error: Execution error: map key must be unique, duplicate key found: \[1, 2\] -SELECT MAP([[1,2], [1,2], []], [41, 33, null]); +SELECT MAP([[1,2], [1,2], [NULL]], [41, 33, null]); query error DataFusion error: Execution error: map key must be unique, duplicate key found: \[\{1:1\}\] SELECT MAP([Map {1:'1'}, Map {1:'1'}, Map {2:'2'}], [41, 33, null]); @@ -286,14 +286,14 @@ select Map {column6: column7} from t; ---- {[1, 2]: POST} {[3]: PUT} -{[5]: } +{[5]: NULL} query ? select Map {column8: column7} from t; ---- {[[1, 2, 3]]: POST} {[[4]]: PUT} -{[[1, 2]]: } +{[[1, 2]]: NULL} query error select Map {column7: column8} from t; @@ -315,23 +315,23 @@ SELECT MAKE_MAP('POST', 41, 'HEAD', 33)['POST'] from t; query ? SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', null) from t; ---- -{POST: 41, HEAD: 33, PATCH: } -{POST: 41, HEAD: 33, PATCH: } -{POST: 41, HEAD: 33, PATCH: } +{POST: 41, HEAD: 33, PATCH: NULL} +{POST: 41, HEAD: 33, PATCH: NULL} +{POST: 41, HEAD: 33, PATCH: NULL} query ? SELECT MAKE_MAP('POST', null, 'HEAD', 33, 'PATCH', null) from t; ---- -{POST: , HEAD: 33, PATCH: } -{POST: , HEAD: 33, PATCH: } -{POST: , HEAD: 33, PATCH: } +{POST: NULL, HEAD: 33, PATCH: NULL} +{POST: NULL, HEAD: 33, PATCH: NULL} +{POST: NULL, HEAD: 33, PATCH: NULL} query ? SELECT MAKE_MAP(1, null, 2, 33, 3, null) from t; ---- -{1: , 2: 33, 3: } -{1: , 2: 33, 3: } -{1: , 2: 33, 3: } +{1: NULL, 2: 33, 3: NULL} +{1: NULL, 2: 33, 3: NULL} +{1: NULL, 2: 33, 3: NULL} query ? SELECT MAKE_MAP([1,2], ['a', 'b'], [3,4], ['b']) from t; @@ -350,9 +350,9 @@ SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, 30]) from t; query ? SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]) from t; ---- -{POST: 41, HEAD: 33, PATCH: } -{POST: 41, HEAD: 33, PATCH: } -{POST: 41, HEAD: 33, PATCH: } +{POST: 41, HEAD: 33, PATCH: NULL} +{POST: 41, HEAD: 33, PATCH: NULL} +{POST: 41, HEAD: 33, PATCH: NULL} query ? SELECT MAP([[1,2], [3,4]], ['a', 'b']) from t; @@ -423,7 +423,7 @@ SELECT MAP {}; query ? SELECT MAP {'a': 1, 'b': null}; ---- -{a: 1, b: } +{a: 1, b: NULL} # keys contain null query error DataFusion error: Execution error: map key cannot be null @@ -556,21 +556,21 @@ query ???? select map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'b'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'c'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'd'); ---- -[1] [] [3] [] +[1] [NULL] [3] [NULL] # key is integer query ???? select map_extract(MAP {1: 1, 2: NULL, 3:3}, 1), map_extract(MAP {1: 1, 2: NULL, 3:3}, 2), map_extract(MAP {1: 1, 2: NULL, 3:3}, 3), map_extract(MAP {1: 1, 2: NULL, 3:3}, 4); ---- -[1] [] [3] [] +[1] [NULL] [3] [NULL] # value is list query ???? select map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 1), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 2), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 3), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 4); ---- -[[1, 2]] [] [[3]] [] +[[1, 2]] [NULL] [[3]] [NULL] # key in map and query key are different types query ????? @@ -578,36 +578,36 @@ select map_extract(MAP {1: 1, 2: 2, 3:3}, '1'), map_extract(MAP {1: 1, 2: 2, 3:3 map_extract(MAP {1.0: 1, 2: 2, 3:3}, '1'), map_extract(MAP {'1': 1, '2': 2, '3':3}, 1.0), map_extract(MAP {arrow_cast('1', 'Utf8View'): 1, arrow_cast('2', 'Utf8View'): 2, arrow_cast('3', 'Utf8View'):3}, '1'); ---- -[1] [1] [1] [] [1] +[1] [1] [1] [NULL] [1] # map_extract with columns query ??? select map_extract(column1, 1), map_extract(column1, 5), map_extract(column1, 7) from map_array_table_1; ---- -[[1, , 3]] [] [] -[] [[4, , 6]] [] -[] [] [[1, , 3]] +[[1, NULL, 3]] [NULL] [NULL] +[NULL] [[4, NULL, 6]] [NULL] +[NULL] [NULL] [[1, NULL, 3]] query ??? select map_extract(column1, column2), map_extract(column1, column3), map_extract(column1, column4) from map_array_table_1; ---- -[[1, , 3]] [[1, , 3]] [[1, , 3]] -[[4, , 6]] [[4, , 6]] [[4, , 6]] -[] [] [] +[[1, NULL, 3]] [[1, NULL, 3]] [[1, NULL, 3]] +[[4, NULL, 6]] [[4, NULL, 6]] [[4, NULL, 6]] +[NULL] [NULL] [NULL] query ??? select map_extract(column1, column2), map_extract(column1, column3), map_extract(column1, column4) from map_array_table_2; ---- -[[1, , 3]] [] [[1, , 3]] -[[4, , 6]] [] [[4, , 6]] -[] [] [] +[[1, NULL, 3]] [NULL] [[1, NULL, 3]] +[[4, NULL, 6]] [NULL] [[4, NULL, 6]] +[NULL] [NULL] [NULL] query ??? select map_extract(column1, 1), map_extract(column1, 5), map_extract(column1, 7) from map_array_table_2; ---- -[[1, , 3]] [] [] -[] [[4, , 6]] [] -[] [] [[1, , 3]] +[[1, NULL, 3]] [NULL] [NULL] +[NULL] [[4, NULL, 6]] [NULL] +[NULL] [NULL] [[1, NULL, 3]] # Tests for map_keys @@ -706,9 +706,9 @@ SELECT map_values(Map{}); query ? SELECT map_values(column1) from map_array_table_1; ---- -[[1, , 3], [4, , 6], [7, 8, 9]] -[[1, , 3], [4, , 6], [7, 8, 9]] -[[1, , 3], [9, , 6], [7, 8, 9]] +[[1, NULL, 3], [4, NULL, 6], [7, 8, 9]] +[[1, NULL, 3], [4, NULL, 6], [7, 8, 9]] +[[1, NULL, 3], [9, NULL, 6], [7, 8, 9]] statement ok drop table map_array_table_1; diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt index 253ebb9ea0ac..0faa7bd6b0bf 100644 --- a/datafusion/sqllogictest/test_files/parquet.slt +++ b/datafusion/sqllogictest/test_files/parquet.slt @@ -339,8 +339,8 @@ query ?? SELECT int64_list, utf8_list FROM list_columns ---- [1, 2, 3] [abc, efg, hij] -[, 1] NULL -[4] [efg, , hij, xyz] +[NULL, 1] NULL +[4] [efg, NULL, hij, xyz] statement ok DROP TABLE list_columns; diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index 6f60ed8583c3..107721c5fe9d 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -1729,7 +1729,7 @@ SELECT make_array(c1, cast(c2 as varchar)) FROM test ---- [, 0] [a, 1] -[aa, ] +[aa, NULL] [aaa, 3] statement ok diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index 7596b820c688..b05e86e5ea91 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -170,47 +170,47 @@ select named_struct(values.c, 'c') from values; query ? select named_struct('scalar', 27, 'array', values.a, 'null', NULL) from values; ---- -{scalar: 27, array: 1, null: } -{scalar: 27, array: 2, null: } -{scalar: 27, array: 3, null: } +{scalar: 27, array: 1, null: NULL} +{scalar: 27, array: 2, null: NULL} +{scalar: 27, array: 3, null: NULL} query ? select {'scalar': 27, 'array': values.a, 'null': NULL} from values; ---- -{scalar: 27, array: 1, null: } -{scalar: 27, array: 2, null: } -{scalar: 27, array: 3, null: } +{scalar: 27, array: 1, null: NULL} +{scalar: 27, array: 2, null: NULL} +{scalar: 27, array: 3, null: NULL} # named_struct with mixed scalar and array values #2 query ? select named_struct('array', values.a, 'scalar', 27, 'null', NULL) from values; ---- -{array: 1, scalar: 27, null: } -{array: 2, scalar: 27, null: } -{array: 3, scalar: 27, null: } +{array: 1, scalar: 27, null: NULL} +{array: 2, scalar: 27, null: NULL} +{array: 3, scalar: 27, null: NULL} query ? select {'array': values.a, 'scalar': 27, 'null': NULL} from values; ---- -{array: 1, scalar: 27, null: } -{array: 2, scalar: 27, null: } -{array: 3, scalar: 27, null: } +{array: 1, scalar: 27, null: NULL} +{array: 2, scalar: 27, null: NULL} +{array: 3, scalar: 27, null: NULL} # named_struct with mixed scalar and array values #3 query ? select named_struct('null', NULL, 'array', values.a, 'scalar', 27) from values; ---- -{null: , array: 1, scalar: 27} -{null: , array: 2, scalar: 27} -{null: , array: 3, scalar: 27} +{null: NULL, array: 1, scalar: 27} +{null: NULL, array: 2, scalar: 27} +{null: NULL, array: 3, scalar: 27} # named_struct with mixed scalar and array values #4 query ? select named_struct('null_array', values.n, 'array', values.a, 'scalar', 27, 'null', NULL) from values; ---- -{null_array: , array: 1, scalar: 27, null: } -{null_array: , array: 2, scalar: 27, null: } -{null_array: , array: 3, scalar: 27, null: } +{null_array: NULL, array: 1, scalar: 27, null: NULL} +{null_array: NULL, array: 2, scalar: 27, null: NULL} +{null_array: NULL, array: 3, scalar: 27, null: NULL} # named_struct arrays only query ? diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index 2685e18427ca..362f0df11a7c 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -560,17 +560,17 @@ select unnest(column2), unnest(unnest(column2)), unnest(unnest(unnest(column2))) [[1], [2]] [1, 1] NULL NULL [[1, 1]] NULL NULL NULL [[3, 4], [5]] [3, 4] 3 4 -[[, 6], , [7, 8]] [5] 4 5 -[[3, 4], [5]] [, 6] 5 6 -[[, 6], , [7, 8]] NULL NULL NULL +[[NULL, 6], NULL, [7, 8]] [5] 4 5 +[[3, 4], [5]] [NULL, 6] 5 6 +[[NULL, 6], NULL, [7, 8]] NULL NULL NULL NULL [7, 8] NULL NULL [[3, 4], [5]] [3, 4] NULL NULL -[[, 6], , [7, 8]] [5] 6 7 -[[3, 4], [5]] [, 6] NULL NULL -[[, 6], , [7, 8]] NULL NULL NULL +[[NULL, 6], NULL, [7, 8]] [5] 6 7 +[[3, 4], [5]] [NULL, 6] NULL NULL +[[NULL, 6], NULL, [7, 8]] NULL NULL NULL NULL [7, 8] NULL NULL [[3, 4], [5]] NULL 7 8 -[[, 6], , [7, 8]] NULL 8 9 +[[NULL, 6], NULL, [7, 8]] NULL 8 9 ## the same composite expr (unnest(field_access(unnest(col)))) which containing unnest is referened multiple times query ??II @@ -624,13 +624,13 @@ select unnest(unnest(unnest(column2))), column2 from recursive_unnest_table; 2 [[[1], [2]], [[1, 1]]] 1 [[[1], [2]], [[1, 1]]] 1 [[[1], [2]], [[1, 1]]] -3 [[[3, 4], [5]], [[, 6], , [7, 8]]] -4 [[[3, 4], [5]], [[, 6], , [7, 8]]] -5 [[[3, 4], [5]], [[, 6], , [7, 8]]] -NULL [[[3, 4], [5]], [[, 6], , [7, 8]]] -6 [[[3, 4], [5]], [[, 6], , [7, 8]]] -7 [[[3, 4], [5]], [[, 6], , [7, 8]]] -8 [[[3, 4], [5]], [[, 6], , [7, 8]]] +3 [[[3, 4], [5]], [[NULL, 6], NULL, [7, 8]]] +4 [[[3, 4], [5]], [[NULL, 6], NULL, [7, 8]]] +5 [[[3, 4], [5]], [[NULL, 6], NULL, [7, 8]]] +NULL [[[3, 4], [5]], [[NULL, 6], NULL, [7, 8]]] +6 [[[3, 4], [5]], [[NULL, 6], NULL, [7, 8]]] +7 [[[3, 4], [5]], [[NULL, 6], NULL, [7, 8]]] +8 [[[3, 4], [5]], [[NULL, 6], NULL, [7, 8]]] query I?? @@ -780,7 +780,7 @@ select unnest(unnest(column2)) c2 from recursive_unnest_table group by c2 order [3, 4] [5] [7, 8] -[, 6] +[NULL, 6] NULL query ?I @@ -792,7 +792,7 @@ select unnest(unnest(column2)) c2, count(column3) from recursive_unnest_table gr [3, 4] 1 [5] 1 [7, 8] 1 -[, 6] 1 +[NULL, 6] 1 NULL 1 query error DataFusion error: Error during planning: Projection references non\-aggregate values @@ -808,7 +808,7 @@ select unnest(column5), * from unnest_table; 1 2 [1, 2, 3] [7] 1 [13, 14] {c0: 1, c1: 2} 3 4 [4, 5] [8, 9, 10] 2 [15, 16] {c0: 3, c1: 4} NULL NULL [6] [11, 12] 3 NULL NULL -7 8 [12] [, 42, ] NULL NULL {c0: 7, c1: 8} +7 8 [12] [NULL, 42, NULL] NULL NULL {c0: 7, c1: 8} NULL NULL NULL NULL 4 [17, 18] NULL query TT???? @@ -821,7 +821,7 @@ query ????? select unnest(unnest(column3)), * from recursive_unnest_table ---- [1] [[1, 2]] {c0: [1], c1: a} [[[1], [2]], [[1, 1]]] [{c0: [1], c1: [[1, 2]]}] -[2] [[3], [4]] {c0: [2], c1: b} [[[3, 4], [5]], [[, 6], , [7, 8]]] [{c0: [2], c1: [[3], [4]]}] +[2] [[3], [4]] {c0: [2], c1: b} [[[3, 4], [5]], [[NULL, 6], NULL, [7, 8]]] [{c0: [2], c1: [[3], [4]]}] statement ok CREATE TABLE join_table @@ -844,7 +844,7 @@ select unnest(column5), * except (column5, column1) from unnest_table; 1 2 [7] 1 [13, 14] 3 4 [8, 9, 10] 2 [15, 16] NULL NULL [11, 12] 3 NULL -7 8 [, 42, ] NULL NULL +7 8 [NULL, 42, NULL] NULL NULL NULL NULL NULL 4 [17, 18] query III diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index ac0978683c36..a64ed597e007 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -4076,7 +4076,7 @@ SELECT MAP('type', 'test'); SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); ---- -{POST: 41, HEAD: 33, PATCH: } +{POST: 41, HEAD: 33, PATCH: NULL} SELECT MAP([[1,2], [3,4]], ['a', 'b']); ----