From 98e894240cc621a790864567b079c82cd4ec3b95 Mon Sep 17 00:00:00 2001 From: Ian Lai <108986288+Chen-Yuan-Lai@users.noreply.github.com> Date: Mon, 20 Jan 2025 09:10:31 +0800 Subject: [PATCH] refactor: switch BooleanBufferBuilder to NullBufferBuilder in functions-nested functions (#14201) Co-authored-by: Cheng-Yuan-Lai --- datafusion/functions-nested/src/concat.rs | 11 +++++------ datafusion/functions-nested/src/range.rs | 10 +++++----- datafusion/functions-nested/src/replace.rs | 12 ++++++------ datafusion/functions-nested/src/resize.rs | 11 +++++------ 4 files changed, 21 insertions(+), 23 deletions(-) diff --git a/datafusion/functions-nested/src/concat.rs b/datafusion/functions-nested/src/concat.rs index 934c5a5fec73..a6557e36da37 100644 --- a/datafusion/functions-nested/src/concat.rs +++ b/datafusion/functions-nested/src/concat.rs @@ -22,7 +22,7 @@ use std::{any::Any, cmp::Ordering}; use arrow::array::{Capacities, MutableArrayData}; use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; -use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer}; +use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; use arrow_schema::{DataType, Field}; use datafusion_common::Result; use datafusion_common::{ @@ -354,7 +354,7 @@ fn concat_internal(args: &[ArrayRef]) -> Result { let mut array_lengths = vec![]; let mut arrays = vec![]; - let mut valid = BooleanBufferBuilder::new(row_count); + let mut valid = NullBufferBuilder::new(row_count); for i in 0..row_count { let nulls = list_arrays .iter() @@ -365,7 +365,7 @@ fn concat_internal(args: &[ArrayRef]) -> Result { let is_null = nulls.iter().all(|&x| x); if is_null { array_lengths.push(0); - valid.append(false); + valid.append_null(); } else { // Get all the arrays on i-th row let values = list_arrays @@ -382,12 +382,11 @@ fn concat_internal(args: &[ArrayRef]) -> Result { let concatenated_array = arrow::compute::concat(elements.as_slice())?; array_lengths.push(concatenated_array.len()); arrays.push(concatenated_array); - valid.append(true); + valid.append_non_null(); } } // Assume all arrays have the same data type let data_type = list_arrays[0].value_type(); - let buffer = valid.finish(); let elements = arrays .iter() @@ -398,7 +397,7 @@ fn concat_internal(args: &[ArrayRef]) -> Result { Arc::new(Field::new_list_field(data_type, true)), OffsetBuffer::from_lengths(array_lengths), Arc::new(arrow::compute::concat(elements.as_slice())?), - Some(NullBuffer::new(buffer)), + valid.finish(), ); Ok(Arc::new(list_arr)) diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs index 4f8132f59e85..ff148f04ac5f 100644 --- a/datafusion/functions-nested/src/range.rs +++ b/datafusion/functions-nested/src/range.rs @@ -27,7 +27,7 @@ use arrow_array::types::{ Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType as TSNT, }; use arrow_array::{NullArray, TimestampNanosecondArray}; -use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer}; +use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; use arrow_schema::DataType::*; use arrow_schema::IntervalUnit::MonthDayNano; use arrow_schema::TimeUnit::Nanosecond; @@ -345,7 +345,7 @@ pub(super) fn gen_range_inner( let mut values = vec![]; let mut offsets = vec![0]; - let mut valid = BooleanBufferBuilder::new(stop_array.len()); + let mut valid = NullBufferBuilder::new(stop_array.len()); for (idx, stop) in stop_array.iter().enumerate() { match retrieve_range_args(start_array, stop, step_array, idx) { Some((_, _, 0)) => { @@ -369,12 +369,12 @@ pub(super) fn gen_range_inner( .step_by(step_abs), ); offsets.push(values.len() as i32); - valid.append(true); + valid.append_non_null(); } // If any of the arguments is NULL, append a NULL value to the result. None => { offsets.push(values.len() as i32); - valid.append(false); + valid.append_null(); } }; } @@ -382,7 +382,7 @@ pub(super) fn gen_range_inner( Arc::new(Field::new_list_field(Int64, true)), OffsetBuffer::new(offsets.into()), Arc::new(Int64Array::from(values)), - Some(NullBuffer::new(valid.finish())), + valid.finish(), )?); Ok(arr) } diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs index 0d3db07c647f..106887c51396 100644 --- a/datafusion/functions-nested/src/replace.rs +++ b/datafusion/functions-nested/src/replace.rs @@ -23,7 +23,7 @@ use arrow::array::{ use arrow::datatypes::DataType; use arrow_array::GenericListArray; -use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer}; +use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; use arrow_schema::Field; use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, Result}; @@ -306,12 +306,12 @@ fn general_replace( capacity, ); - let mut valid = BooleanBufferBuilder::new(list_array.len()); + let mut valid = NullBufferBuilder::new(list_array.len()); for (row_index, offset_window) in list_array.offsets().windows(2).enumerate() { if list_array.is_null(row_index) { offsets.push(offsets[row_index]); - valid.append(false); + valid.append_null(); continue; } @@ -338,7 +338,7 @@ fn general_replace( end.to_usize().unwrap(), ); offsets.push(offsets[row_index] + (end - start)); - valid.append(true); + valid.append_non_null(); continue; } @@ -367,7 +367,7 @@ fn general_replace( } offsets.push(offsets[row_index] + (end - start)); - valid.append(true); + valid.append_non_null(); } let data = mutable.freeze(); @@ -376,7 +376,7 @@ fn general_replace( Arc::new(Field::new_list_field(list_array.value_type(), true)), OffsetBuffer::::new(offsets.into()), arrow_array::make_array(data), - Some(NullBuffer::new(valid.finish())), + valid.finish(), )?)) } diff --git a/datafusion/functions-nested/src/resize.rs b/datafusion/functions-nested/src/resize.rs index a2b95debd206..441f44e47f6e 100644 --- a/datafusion/functions-nested/src/resize.rs +++ b/datafusion/functions-nested/src/resize.rs @@ -22,7 +22,7 @@ use arrow::array::{Capacities, MutableArrayData}; use arrow_array::{ new_null_array, Array, ArrayRef, GenericListArray, Int64Array, OffsetSizeTrait, }; -use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, OffsetBuffer}; +use arrow_buffer::{ArrowNativeType, NullBufferBuilder, OffsetBuffer}; use arrow_schema::DataType::{FixedSizeList, LargeList, List}; use arrow_schema::{DataType, FieldRef}; use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array}; @@ -198,15 +198,15 @@ fn general_list_resize>( capacity, ); - let mut null_builder = BooleanBufferBuilder::new(array.len()); + let mut null_builder = NullBufferBuilder::new(array.len()); for (row_index, offset_window) in array.offsets().windows(2).enumerate() { if array.is_null(row_index) { - null_builder.append(false); + null_builder.append_null(); offsets.push(offsets[row_index]); continue; } - null_builder.append(true); + null_builder.append_non_null(); let count = count_array.value(row_index).to_usize().ok_or_else(|| { internal_datafusion_err!("array_resize: failed to convert size to usize") @@ -234,12 +234,11 @@ fn general_list_resize>( } let data = mutable.freeze(); - let null_bit_buffer: NullBuffer = null_builder.finish().into(); Ok(Arc::new(GenericListArray::::try_new( Arc::clone(field), OffsetBuffer::::new(offsets.into()), arrow_array::make_array(data), - Some(null_bit_buffer), + null_builder.finish(), )?)) }