Skip to content

Commit

Permalink
refactor: replace usage of ArrayData by clone (#2827)
Browse files Browse the repository at this point in the history
* refactor: use array clone()

* refactor: slice

* chore: clippy
  • Loading branch information
QuenKar authored Nov 30, 2023
1 parent 9ccd182 commit 2332305
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 334 deletions.
7 changes: 3 additions & 4 deletions src/datatypes/src/vectors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,17 +229,16 @@ macro_rules! impl_try_from_arrow_array_for_vector {
) -> crate::error::Result<$Vector> {
use snafu::OptionExt;

let data = array
let arrow_array = array
.as_ref()
.as_any()
.downcast_ref::<$Array>()
.with_context(|| crate::error::ConversionSnafu {
from: std::format!("{:?}", array.as_ref().data_type()),
})?
.to_data();
.clone();

let concrete_array = $Array::from(data);
Ok($Vector::from(concrete_array))
Ok($Vector::from(arrow_array))
}
}
};
Expand Down
12 changes: 3 additions & 9 deletions src/datatypes/src/vectors/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
use std::any::Any;
use std::sync::Arc;

use arrow::array::{Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef};
use arrow::array::{Array, ArrayBuilder, ArrayIter, ArrayRef};
use snafu::ResultExt;

use crate::arrow_array::{BinaryArray, MutableBinaryArray};
Expand All @@ -36,10 +36,6 @@ impl BinaryVector {
pub(crate) fn as_arrow(&self) -> &dyn Array {
&self.array
}

fn to_array_data(&self) -> ArrayData {
self.array.to_data()
}
}

impl From<BinaryArray> for BinaryVector {
Expand Down Expand Up @@ -74,13 +70,11 @@ impl Vector for BinaryVector {
}

fn to_arrow_array(&self) -> ArrayRef {
let data = self.to_array_data();
Arc::new(BinaryArray::from(data))
Arc::new(self.array.clone())
}

fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
let data = self.to_array_data();
Box::new(BinaryArray::from(data))
Box::new(self.array.clone())
}

fn validity(&self) -> Validity {
Expand Down
23 changes: 4 additions & 19 deletions src/datatypes/src/vectors/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ use std::any::Any;
use std::borrow::Borrow;
use std::sync::Arc;

use arrow::array::{
Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef, BooleanArray, BooleanBuilder,
};
use arrow::array::{Array, ArrayBuilder, ArrayIter, ArrayRef, BooleanArray, BooleanBuilder};
use snafu::ResultExt;

use crate::data_type::ConcreteDataType;
Expand All @@ -44,16 +42,6 @@ impl BooleanVector {
&self.array
}

fn to_array_data(&self) -> ArrayData {
self.array.to_data()
}

fn from_array_data(data: ArrayData) -> BooleanVector {
BooleanVector {
array: BooleanArray::from(data),
}
}

pub(crate) fn false_count(&self) -> usize {
self.array.false_count()
}
Expand Down Expand Up @@ -107,13 +95,11 @@ impl Vector for BooleanVector {
}

fn to_arrow_array(&self) -> ArrayRef {
let data = self.to_array_data();
Arc::new(BooleanArray::from(data))
Arc::new(self.array.clone())
}

fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
let data = self.to_array_data();
Box::new(BooleanArray::from(data))
Box::new(self.array.clone())
}

fn validity(&self) -> Validity {
Expand All @@ -133,8 +119,7 @@ impl Vector for BooleanVector {
}

fn slice(&self, offset: usize, length: usize) -> VectorRef {
let data = self.array.to_data().slice(offset, length);
Arc::new(Self::from_array_data(data))
Arc::new(Self::from(self.array.slice(offset, length)))
}

fn get(&self, index: usize) -> Value {
Expand Down
39 changes: 19 additions & 20 deletions src/datatypes/src/vectors/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,7 @@ impl Vector for Decimal128Vector {
}

fn slice(&self, offset: usize, length: usize) -> VectorRef {
let array = self.array.slice(offset, length);
Arc::new(Self { array })
Arc::new(self.get_slice(offset, length))
}

fn get(&self, index: usize) -> Value {
Expand Down Expand Up @@ -535,23 +534,23 @@ pub mod tests {
// because 100 is out of Decimal(3, 1) range, so it will be null
assert!(array.is_null(4));
}
}

#[test]
fn test_decimal28_vector_iter_data() {
let vector = Decimal128Vector::from_values(vec![1, 2, 3, 4])
.with_precision_and_scale(3, 1)
.unwrap();
let mut iter = vector.iter_data();
assert_eq!(iter.next(), Some(Some(Decimal128::new(1, 3, 1))));
assert_eq!(iter.next(), Some(Some(Decimal128::new(2, 3, 1))));
assert_eq!(iter.next(), Some(Some(Decimal128::new(3, 3, 1))));
assert_eq!(iter.next(), Some(Some(Decimal128::new(4, 3, 1))));
assert_eq!(iter.next(), None);

let values = vector
.iter_data()
.filter_map(|v| v.map(|x| x.val() * 2))
.collect::<Vec<_>>();
assert_eq!(values, vec![2, 4, 6, 8]);
#[test]
fn test_decimal28_vector_iter_data() {
let vector = Decimal128Vector::from_values(vec![1, 2, 3, 4])
.with_precision_and_scale(3, 1)
.unwrap();
let mut iter = vector.iter_data();
assert_eq!(iter.next(), Some(Some(Decimal128::new(1, 3, 1))));
assert_eq!(iter.next(), Some(Some(Decimal128::new(2, 3, 1))));
assert_eq!(iter.next(), Some(Some(Decimal128::new(3, 3, 1))));
assert_eq!(iter.next(), Some(Some(Decimal128::new(4, 3, 1))));
assert_eq!(iter.next(), None);

let values = vector
.iter_data()
.filter_map(|v| v.map(|x| x.val() * 2))
.collect::<Vec<_>>();
assert_eq!(values, vec![2, 4, 6, 8]);
}
}
64 changes: 30 additions & 34 deletions src/datatypes/src/vectors/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,23 +284,21 @@ impl Helper {
ArrowDataType::Date64 => Arc::new(DateTimeVector::try_from_arrow_array(array)?),
ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?),
ArrowDataType::Timestamp(unit, _) => match unit {
TimeUnit::Second => Arc::new(
TimestampSecondVector::try_from_arrow_timestamp_array(array)?,
),
TimeUnit::Millisecond => Arc::new(
TimestampMillisecondVector::try_from_arrow_timestamp_array(array)?,
),
TimeUnit::Microsecond => Arc::new(
TimestampMicrosecondVector::try_from_arrow_timestamp_array(array)?,
),
TimeUnit::Nanosecond => Arc::new(
TimestampNanosecondVector::try_from_arrow_timestamp_array(array)?,
),
TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?),
TimeUnit::Millisecond => {
Arc::new(TimestampMillisecondVector::try_from_arrow_array(array)?)
}
TimeUnit::Microsecond => {
Arc::new(TimestampMicrosecondVector::try_from_arrow_array(array)?)
}
TimeUnit::Nanosecond => {
Arc::new(TimestampNanosecondVector::try_from_arrow_array(array)?)
}
},
ArrowDataType::Time32(unit) => match unit {
TimeUnit::Second => Arc::new(TimeSecondVector::try_from_arrow_time_array(array)?),
TimeUnit::Second => Arc::new(TimeSecondVector::try_from_arrow_array(array)?),
TimeUnit::Millisecond => {
Arc::new(TimeMillisecondVector::try_from_arrow_time_array(array)?)
Arc::new(TimeMillisecondVector::try_from_arrow_array(array)?)
}
// Arrow use time32 for second/millisecond.
_ => unreachable!(
Expand All @@ -310,10 +308,10 @@ impl Helper {
},
ArrowDataType::Time64(unit) => match unit {
TimeUnit::Microsecond => {
Arc::new(TimeMicrosecondVector::try_from_arrow_time_array(array)?)
Arc::new(TimeMicrosecondVector::try_from_arrow_array(array)?)
}
TimeUnit::Nanosecond => {
Arc::new(TimeNanosecondVector::try_from_arrow_time_array(array)?)
Arc::new(TimeNanosecondVector::try_from_arrow_array(array)?)
}
// Arrow use time64 for microsecond/nanosecond.
_ => unreachable!(
Expand All @@ -322,29 +320,27 @@ impl Helper {
),
},
ArrowDataType::Interval(unit) => match unit {
IntervalUnit::YearMonth => Arc::new(
IntervalYearMonthVector::try_from_arrow_interval_array(array)?,
),
IntervalUnit::YearMonth => {
Arc::new(IntervalYearMonthVector::try_from_arrow_array(array)?)
}
IntervalUnit::DayTime => {
Arc::new(IntervalDayTimeVector::try_from_arrow_interval_array(array)?)
Arc::new(IntervalDayTimeVector::try_from_arrow_array(array)?)
}
IntervalUnit::MonthDayNano => {
Arc::new(IntervalMonthDayNanoVector::try_from_arrow_array(array)?)
}
IntervalUnit::MonthDayNano => Arc::new(
IntervalMonthDayNanoVector::try_from_arrow_interval_array(array)?,
),
},
ArrowDataType::Duration(unit) => match unit {
TimeUnit::Second => {
Arc::new(DurationSecondVector::try_from_arrow_duration_array(array)?)
TimeUnit::Second => Arc::new(DurationSecondVector::try_from_arrow_array(array)?),
TimeUnit::Millisecond => {
Arc::new(DurationMillisecondVector::try_from_arrow_array(array)?)
}
TimeUnit::Microsecond => {
Arc::new(DurationMicrosecondVector::try_from_arrow_array(array)?)
}
TimeUnit::Nanosecond => {
Arc::new(DurationNanosecondVector::try_from_arrow_array(array)?)
}
TimeUnit::Millisecond => Arc::new(
DurationMillisecondVector::try_from_arrow_duration_array(array)?,
),
TimeUnit::Microsecond => Arc::new(
DurationMicrosecondVector::try_from_arrow_duration_array(array)?,
),
TimeUnit::Nanosecond => Arc::new(
DurationNanosecondVector::try_from_arrow_duration_array(array)?,
),
},
ArrowDataType::Decimal128(_, _) => {
Arc::new(Decimal128Vector::try_from_arrow_array(array)?)
Expand Down
23 changes: 6 additions & 17 deletions src/datatypes/src/vectors/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,6 @@ impl ListVector {
.map(|value_opt| value_opt.map(Helper::try_into_vector).transpose())
}

fn to_array_data(&self) -> ArrayData {
self.array.to_data()
}

fn from_array_data_and_type(data: ArrayData, item_type: ConcreteDataType) -> Self {
Self {
array: ListArray::from(data),
item_type,
}
}

pub(crate) fn as_arrow(&self) -> &dyn Array {
&self.array
}
Expand All @@ -80,13 +69,11 @@ impl Vector for ListVector {
}

fn to_arrow_array(&self) -> ArrayRef {
let data = self.to_array_data();
Arc::new(ListArray::from(data))
Arc::new(self.array.clone())
}

fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
let data = self.to_array_data();
Box::new(ListArray::from(data))
Box::new(self.array.clone())
}

fn validity(&self) -> Validity {
Expand All @@ -106,8 +93,10 @@ impl Vector for ListVector {
}

fn slice(&self, offset: usize, length: usize) -> VectorRef {
let data = self.array.to_data().slice(offset, length);
Arc::new(Self::from_array_data_and_type(data, self.item_type.clone()))
Arc::new(Self {
array: self.array.slice(offset, length),
item_type: self.item_type.clone(),
})
}

fn get(&self, index: usize) -> Value {
Expand Down
Loading

0 comments on commit 2332305

Please sign in to comment.