Skip to content

Commit

Permalink
refactor: switch BooleanBufferBuilder to NullBufferBuilder in functio…
Browse files Browse the repository at this point in the history
…ns-nested functions (apache#14201)

Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
  • Loading branch information
Chen-Yuan-Lai and Cheng-Yuan-Lai authored Jan 20, 2025
1 parent 12c4c86 commit 98e8942
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 23 deletions.
11 changes: 5 additions & 6 deletions datafusion/functions-nested/src/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use std::{any::Any, cmp::Ordering};

use arrow::array::{Capacities, MutableArrayData};
use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
use arrow_schema::{DataType, Field};
use datafusion_common::Result;
use datafusion_common::{
Expand Down Expand Up @@ -354,7 +354,7 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {

let mut array_lengths = vec![];
let mut arrays = vec![];
let mut valid = BooleanBufferBuilder::new(row_count);
let mut valid = NullBufferBuilder::new(row_count);
for i in 0..row_count {
let nulls = list_arrays
.iter()
Expand All @@ -365,7 +365,7 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let is_null = nulls.iter().all(|&x| x);
if is_null {
array_lengths.push(0);
valid.append(false);
valid.append_null();
} else {
// Get all the arrays on i-th row
let values = list_arrays
Expand All @@ -382,12 +382,11 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let concatenated_array = arrow::compute::concat(elements.as_slice())?;
array_lengths.push(concatenated_array.len());
arrays.push(concatenated_array);
valid.append(true);
valid.append_non_null();
}
}
// Assume all arrays have the same data type
let data_type = list_arrays[0].value_type();
let buffer = valid.finish();

let elements = arrays
.iter()
Expand All @@ -398,7 +397,7 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
Arc::new(Field::new_list_field(data_type, true)),
OffsetBuffer::from_lengths(array_lengths),
Arc::new(arrow::compute::concat(elements.as_slice())?),
Some(NullBuffer::new(buffer)),
valid.finish(),
);

Ok(Arc::new(list_arr))
Expand Down
10 changes: 5 additions & 5 deletions datafusion/functions-nested/src/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use arrow_array::types::{
Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType as TSNT,
};
use arrow_array::{NullArray, TimestampNanosecondArray};
use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
use arrow_schema::DataType::*;
use arrow_schema::IntervalUnit::MonthDayNano;
use arrow_schema::TimeUnit::Nanosecond;
Expand Down Expand Up @@ -345,7 +345,7 @@ pub(super) fn gen_range_inner(

let mut values = vec![];
let mut offsets = vec![0];
let mut valid = BooleanBufferBuilder::new(stop_array.len());
let mut valid = NullBufferBuilder::new(stop_array.len());
for (idx, stop) in stop_array.iter().enumerate() {
match retrieve_range_args(start_array, stop, step_array, idx) {
Some((_, _, 0)) => {
Expand All @@ -369,20 +369,20 @@ pub(super) fn gen_range_inner(
.step_by(step_abs),
);
offsets.push(values.len() as i32);
valid.append(true);
valid.append_non_null();
}
// If any of the arguments is NULL, append a NULL value to the result.
None => {
offsets.push(values.len() as i32);
valid.append(false);
valid.append_null();
}
};
}
let arr = Arc::new(ListArray::try_new(
Arc::new(Field::new_list_field(Int64, true)),
OffsetBuffer::new(offsets.into()),
Arc::new(Int64Array::from(values)),
Some(NullBuffer::new(valid.finish())),
valid.finish(),
)?);
Ok(arr)
}
Expand Down
12 changes: 6 additions & 6 deletions datafusion/functions-nested/src/replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use arrow::array::{
use arrow::datatypes::DataType;

use arrow_array::GenericListArray;
use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
use arrow_schema::Field;
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
Expand Down Expand Up @@ -306,12 +306,12 @@ fn general_replace<O: OffsetSizeTrait>(
capacity,
);

let mut valid = BooleanBufferBuilder::new(list_array.len());
let mut valid = NullBufferBuilder::new(list_array.len());

for (row_index, offset_window) in list_array.offsets().windows(2).enumerate() {
if list_array.is_null(row_index) {
offsets.push(offsets[row_index]);
valid.append(false);
valid.append_null();
continue;
}

Expand All @@ -338,7 +338,7 @@ fn general_replace<O: OffsetSizeTrait>(
end.to_usize().unwrap(),
);
offsets.push(offsets[row_index] + (end - start));
valid.append(true);
valid.append_non_null();
continue;
}

Expand Down Expand Up @@ -367,7 +367,7 @@ fn general_replace<O: OffsetSizeTrait>(
}

offsets.push(offsets[row_index] + (end - start));
valid.append(true);
valid.append_non_null();
}

let data = mutable.freeze();
Expand All @@ -376,7 +376,7 @@ fn general_replace<O: OffsetSizeTrait>(
Arc::new(Field::new_list_field(list_array.value_type(), true)),
OffsetBuffer::<O>::new(offsets.into()),
arrow_array::make_array(data),
Some(NullBuffer::new(valid.finish())),
valid.finish(),
)?))
}

Expand Down
11 changes: 5 additions & 6 deletions datafusion/functions-nested/src/resize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use arrow::array::{Capacities, MutableArrayData};
use arrow_array::{
new_null_array, Array, ArrayRef, GenericListArray, Int64Array, OffsetSizeTrait,
};
use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, OffsetBuffer};
use arrow_buffer::{ArrowNativeType, NullBufferBuilder, OffsetBuffer};
use arrow_schema::DataType::{FixedSizeList, LargeList, List};
use arrow_schema::{DataType, FieldRef};
use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array};
Expand Down Expand Up @@ -198,15 +198,15 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
capacity,
);

let mut null_builder = BooleanBufferBuilder::new(array.len());
let mut null_builder = NullBufferBuilder::new(array.len());

for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
if array.is_null(row_index) {
null_builder.append(false);
null_builder.append_null();
offsets.push(offsets[row_index]);
continue;
}
null_builder.append(true);
null_builder.append_non_null();

let count = count_array.value(row_index).to_usize().ok_or_else(|| {
internal_datafusion_err!("array_resize: failed to convert size to usize")
Expand Down Expand Up @@ -234,12 +234,11 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
}

let data = mutable.freeze();
let null_bit_buffer: NullBuffer = null_builder.finish().into();

Ok(Arc::new(GenericListArray::<O>::try_new(
Arc::clone(field),
OffsetBuffer::<O>::new(offsets.into()),
arrow_array::make_array(data),
Some(null_bit_buffer),
null_builder.finish(),
)?))
}

0 comments on commit 98e8942

Please sign in to comment.