From 9011483d2df163ae68de2db4488cb91f1e8de9f9 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Mon, 23 Sep 2024 14:32:59 +0100 Subject: [PATCH 1/5] VortexScanExec reports statistics to datafusion --- vortex-array/src/array/chunked/variants.rs | 28 ++++++- vortex-array/src/array/constant/variants.rs | 11 ++- vortex-array/src/array/sparse/variants.rs | 21 +++++- vortex-array/src/array/struct_/mod.rs | 13 ++-- vortex-array/src/stats/statsset.rs | 4 + vortex-array/src/variants.rs | 5 +- vortex-datafusion/src/lib.rs | 81 ++++++++++++++++++--- vortex-scalar/src/struct_.rs | 31 +++++++- 8 files changed, 173 insertions(+), 21 deletions(-) diff --git a/vortex-array/src/array/chunked/variants.rs b/vortex-array/src/array/chunked/variants.rs index e5bb6ef84f..d5a3eb17af 100644 --- a/vortex-array/src/array/chunked/variants.rs +++ b/vortex-array/src/array/chunked/variants.rs @@ -1,5 +1,6 @@ +use vortex_dtype::field::Field; use vortex_dtype::DType; -use vortex_error::vortex_panic; +use vortex_error::{vortex_err, vortex_panic, VortexResult}; use crate::array::chunked::ChunkedArray; use crate::variants::{ @@ -65,8 +66,7 @@ impl StructArrayTrait for ChunkedArray { fn field(&self, idx: usize) -> Option { let mut chunks = Vec::with_capacity(self.nchunks()); for chunk in self.chunks() { - let array = chunk.with_dyn(|a| a.as_struct_array().and_then(|s| s.field(idx)))?; - chunks.push(array); + chunks.push(chunk.with_dyn(|a| a.as_struct_array().and_then(|s| s.field(idx)))?); } let projected_dtype = self.dtype().as_struct().and_then(|s| s.dtypes().get(idx))?; @@ -81,6 +81,28 @@ impl StructArrayTrait for ChunkedArray { .into_array(); Some(chunked) } + + fn project(&self, projection: &[Field]) -> VortexResult { + let mut chunks = Vec::with_capacity(self.nchunks()); + for chunk in self.chunks() { + chunks.push(chunk.with_dyn(|a| { + a.as_struct_array() + .ok_or_else(|| vortex_err!("Chunk was not a StructArray"))? + .project(projection) + })?); + } + + let projected_dtype = self + .dtype() + .as_struct() + .ok_or_else(|| vortex_err!("Not a struct dtype"))? + .project(projection)?; + ChunkedArray::try_new( + chunks, + DType::Struct(projected_dtype, self.dtype().nullability()), + ) + .map(|a| a.into_array()) + } } impl ListArrayTrait for ChunkedArray {} diff --git a/vortex-array/src/array/constant/variants.rs b/vortex-array/src/array/constant/variants.rs index 56f6fad9d6..6a3d6a4418 100644 --- a/vortex-array/src/array/constant/variants.rs +++ b/vortex-array/src/array/constant/variants.rs @@ -1,8 +1,9 @@ use std::iter; use std::sync::Arc; +use vortex_dtype::field::Field; use vortex_dtype::{DType, PType}; -use vortex_error::{vortex_panic, VortexError, VortexExpect as _}; +use vortex_error::{vortex_panic, VortexError, VortexExpect as _, VortexResult}; use vortex_scalar::{ExtScalar, Scalar, ScalarValue, StructScalar}; use crate::array::constant::ConstantArray; @@ -190,6 +191,14 @@ impl StructArrayTrait for ConstantArray { .field_by_idx(idx) .map(|scalar| ConstantArray::new(scalar, self.len()).into_array()) } + + fn project(&self, projection: &[Field]) -> VortexResult { + Ok(ConstantArray::new( + StructScalar::try_from(self.scalar())?.project(projection)?, + self.len(), + ) + .into_array()) + } } impl ListArrayTrait for ConstantArray {} diff --git a/vortex-array/src/array/sparse/variants.rs b/vortex-array/src/array/sparse/variants.rs index ed399800e3..b0109e1519 100644 --- a/vortex-array/src/array/sparse/variants.rs +++ b/vortex-array/src/array/sparse/variants.rs @@ -1,5 +1,6 @@ +use vortex_dtype::field::Field; use vortex_dtype::DType; -use vortex_error::VortexExpect; +use vortex_error::{vortex_err, VortexExpect, VortexResult}; use vortex_scalar::StructScalar; use crate::array::sparse::SparseArray; @@ -83,6 +84,24 @@ impl StructArrayTrait for SparseArray { .into_array(), ) } + + fn project(&self, projection: &[Field]) -> VortexResult { + let values = self.values().with_dyn(|s| { + s.as_struct_array() + .ok_or_else(|| vortex_err!("Chunk was not a StructArray"))? + .project(projection) + })?; + let scalar = StructScalar::try_from(self.fill_value())?.project(projection)?; + + SparseArray::try_new_with_offset( + self.indices().clone(), + values, + self.len(), + self.indices_offset(), + scalar, + ) + .map(|a| a.into_array()) + } } impl ListArrayTrait for SparseArray {} diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index 4c5f199f8a..cc3c5fafc4 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -8,7 +8,9 @@ use crate::stats::{ArrayStatisticsCompute, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; use crate::variants::{ArrayVariants, StructArrayTrait}; use crate::visitor::{AcceptArrayVisitor, ArrayVisitor}; -use crate::{impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoCanonical}; +use crate::{ + impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArray, IntoCanonical, +}; mod compute; @@ -99,10 +101,7 @@ impl StructArray { /// which specifies the new ordering of columns in the struct. The projection can be used to /// perform column re-ordering, deletion, or duplication at a logical level, without any data /// copying. - /// - /// # Panics - /// This function will panic an error if the projection references columns not within the - /// schema boundaries. + #[allow(clippy::same_name_method)] pub fn project(&self, projection: &[Field]) -> VortexResult { let mut children = Vec::with_capacity(projection.len()); let mut names = Vec::with_capacity(projection.len()); @@ -149,6 +148,10 @@ impl StructArrayTrait for StructArray { .unwrap_or_else(|e| vortex_panic!(e, "StructArray: field {} not found", idx)) }) } + + fn project(&self, projection: &[Field]) -> VortexResult { + self.project(projection).map(|a| a.into_array()) + } } impl IntoCanonical for StructArray { diff --git a/vortex-array/src/stats/statsset.rs b/vortex-array/src/stats/statsset.rs index 0124516f8d..b8e5c8331f 100644 --- a/vortex-array/src/stats/statsset.rs +++ b/vortex-array/src/stats/statsset.rs @@ -62,6 +62,10 @@ impl StatsSet { Self::from(stats) } + pub fn values(&self) -> impl Iterator + '_ { + self.values.iter() + } + pub fn of(stat: Stat, value: Scalar) -> Self { Self::from(HashMap::from([(stat, value)])) } diff --git a/vortex-array/src/variants.rs b/vortex-array/src/variants.rs index 3eb2cd4d39..ad01b69e65 100644 --- a/vortex-array/src/variants.rs +++ b/vortex-array/src/variants.rs @@ -3,8 +3,9 @@ //! When callers only want to make assumptions about the DType, and not about any specific //! encoding, they can use these traits to write encoding-agnostic code. +use vortex_dtype::field::Field; use vortex_dtype::{DType, ExtDType, FieldNames}; -use vortex_error::{vortex_panic, VortexExpect as _}; +use vortex_error::{vortex_panic, VortexExpect as _, VortexResult}; use crate::iter::{AccessorRef, VectorizedArrayIter}; use crate::{Array, ArrayTrait}; @@ -227,6 +228,8 @@ pub trait StructArrayTrait: ArrayTrait { field_idx.and_then(|field_idx| self.field(field_idx)) } + + fn project(&self, projection: &[Field]) -> VortexResult; } pub trait ListArrayTrait: ArrayTrait {} diff --git a/vortex-datafusion/src/lib.rs b/vortex-datafusion/src/lib.rs index d893fd242f..6ed0ed88ce 100644 --- a/vortex-datafusion/src/lib.rs +++ b/vortex-datafusion/src/lib.rs @@ -12,7 +12,11 @@ use arrow_array::RecordBatch; use arrow_schema::{DataType, Schema, SchemaRef}; use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext}; use datafusion::prelude::{DataFrame, SessionContext}; -use datafusion_common::{exec_datafusion_err, DataFusionError, Result as DFResult}; +use datafusion_common::stats::Precision; +use datafusion_common::{ + exec_datafusion_err, ColumnStatistics, DataFusionError, Result as DFResult, ScalarValue, + Statistics, +}; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_expr::{Expr, Operator}; use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties}; @@ -21,9 +25,10 @@ use memory::{VortexMemTable, VortexMemTableOptions}; use persistent::config::VortexTableOptions; use persistent::provider::VortexFileTableProvider; use vortex::array::ChunkedArray; +use vortex::stats::{ArrayStatistics, Stat}; use vortex::{Array, ArrayDType, IntoArrayVariant}; use vortex_dtype::field::Field; -use vortex_error::vortex_err; +use vortex_error::{vortex_err, VortexExpect}; pub mod memory; pub mod persistent; @@ -209,23 +214,21 @@ pub(crate) struct VortexRecordBatchStream { impl Stream for VortexRecordBatchStream { type Item = DFResult; - fn poll_next(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { - let this = self.get_mut(); - if this.idx >= this.num_chunks { + fn poll_next(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + if self.idx >= self.num_chunks { return Poll::Ready(None); } // Grab next chunk, project and convert to Arrow. - let chunk = this.chunks.chunk(this.idx)?; - this.idx += 1; + let chunk = self.chunks.chunk(self.idx)?; + self.idx += 1; let struct_array = chunk - .clone() .into_struct() .map_err(|vortex_error| DataFusionError::Execution(format!("{}", vortex_error)))?; let projected_struct = struct_array - .project(&this.projection) + .project(&self.projection) .map_err(|vortex_err| { exec_datafusion_err!("projection pushdown to Vortex failed: {vortex_err}") })?; @@ -288,4 +291,64 @@ impl ExecutionPlan for VortexScanExec { .collect(), })) } + + fn statistics(&self) -> DFResult { + let projection = self + .scan_projection + .iter() + .copied() + .map(Field::from) + .collect::>(); + let projected = self + .array + .as_ref() + .with_dyn(|a| { + a.as_struct_array() + .ok_or_else(|| vortex_err!("Not a struct array")) + .and_then(|s| s.project(&projection)) + }) + .map_err(|vortex_err| { + exec_datafusion_err!("projection pushdown to Vortex failed: {vortex_err}") + })?; + let column_statistics = projected.with_dyn(|a| { + let struct_arr = a.as_struct_array_unchecked(); + (0..struct_arr.nfields()) + .map(|i| { + let arr = struct_arr.field(i).vortex_expect("iterating over field"); + ColumnStatistics { + null_count: arr + .statistics() + .get_as::(Stat::NullCount) + .map(|n| n as usize) + .map(Precision::Exact) + .unwrap_or(Precision::Absent), + max_value: arr + .statistics() + .get(Stat::Max) + .map(|n| { + ScalarValue::try_from(n) + .vortex_expect("cannot convert scalar to df scalar") + }) + .map(Precision::Exact) + .unwrap_or(Precision::Absent), + min_value: arr + .statistics() + .get(Stat::Min) + .map(|n| { + ScalarValue::try_from(n) + .vortex_expect("cannot convert scalar to df scalar") + }) + .map(Precision::Exact) + .unwrap_or(Precision::Absent), + distinct_count: Precision::Absent, + } + }) + .collect() + }); + Ok(Statistics { + num_rows: Precision::Exact(self.array.len()), + total_byte_size: Precision::Exact(projected.nbytes()), + column_statistics, + }) + } } diff --git a/vortex-scalar/src/struct_.rs b/vortex-scalar/src/struct_.rs index 6842276ca8..11dc1d5e4d 100644 --- a/vortex-scalar/src/struct_.rs +++ b/vortex-scalar/src/struct_.rs @@ -1,7 +1,8 @@ use std::sync::Arc; +use vortex_dtype::field::Field; use vortex_dtype::DType; -use vortex_error::{vortex_bail, VortexError, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexError, VortexExpect, VortexResult}; use crate::value::ScalarValue; use crate::Scalar; @@ -83,6 +84,34 @@ impl<'a> StructScalar<'a> { Ok(Scalar::null(dtype.clone())) } } + + pub fn project(&self, projection: &[Field]) -> VortexResult { + let struct_dtype = self + .dtype + .as_struct() + .ok_or_else(|| vortex_err!("Not a struct dtype"))?; + let projected_dtype = struct_dtype.project(projection)?; + let new_fields = if let Some(fs) = self.fields() { + ScalarValue::List( + projection + .iter() + .map(|p| match p { + Field::Name(n) => struct_dtype + .find_name(n) + .vortex_expect("DType has been successfully projected already"), + Field::Index(i) => *i, + }) + .map(|i| fs[i].clone()) + .collect(), + ) + } else { + ScalarValue::Null + }; + Ok(Scalar::new( + DType::Struct(projected_dtype, self.dtype().nullability()), + new_fields, + )) + } } impl Scalar { From 4bb2100eb9f6c48b16c4b6ebc14e24d7224b2e40 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Mon, 23 Sep 2024 14:34:39 +0100 Subject: [PATCH 2/5] less --- vortex-array/src/stats/statsset.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vortex-array/src/stats/statsset.rs b/vortex-array/src/stats/statsset.rs index b8e5c8331f..0124516f8d 100644 --- a/vortex-array/src/stats/statsset.rs +++ b/vortex-array/src/stats/statsset.rs @@ -62,10 +62,6 @@ impl StatsSet { Self::from(stats) } - pub fn values(&self) -> impl Iterator + '_ { - self.values.iter() - } - pub fn of(stat: Stat, value: Scalar) -> Self { Self::from(HashMap::from([(stat, value)])) } From 4ccc3b854e0238c8e425b3157393fc20e9a086b9 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Mon, 23 Sep 2024 14:52:58 +0100 Subject: [PATCH 3/5] simpler --- vortex-datafusion/src/lib.rs | 44 ++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/vortex-datafusion/src/lib.rs b/vortex-datafusion/src/lib.rs index 6ed0ed88ce..1dc78f6e32 100644 --- a/vortex-datafusion/src/lib.rs +++ b/vortex-datafusion/src/lib.rs @@ -21,14 +21,16 @@ use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_expr::{Expr, Operator}; use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties}; use futures::Stream; +use itertools::Itertools; use memory::{VortexMemTable, VortexMemTableOptions}; use persistent::config::VortexTableOptions; use persistent::provider::VortexFileTableProvider; use vortex::array::ChunkedArray; +use vortex::compute::Len; use vortex::stats::{ArrayStatistics, Stat}; use vortex::{Array, ArrayDType, IntoArrayVariant}; use vortex_dtype::field::Field; -use vortex_error::{vortex_err, VortexExpect}; +use vortex_error::{vortex_err, VortexExpect, VortexResult}; pub mod memory; pub mod persistent; @@ -293,28 +295,20 @@ impl ExecutionPlan for VortexScanExec { } fn statistics(&self) -> DFResult { - let projection = self - .scan_projection - .iter() - .copied() - .map(Field::from) - .collect::>(); - let projected = self - .array - .as_ref() - .with_dyn(|a| { - a.as_struct_array() - .ok_or_else(|| vortex_err!("Not a struct array")) - .and_then(|s| s.project(&projection)) - }) - .map_err(|vortex_err| { - exec_datafusion_err!("projection pushdown to Vortex failed: {vortex_err}") - })?; - let column_statistics = projected.with_dyn(|a| { - let struct_arr = a.as_struct_array_unchecked(); - (0..struct_arr.nfields()) + let mut nbytes: usize = 0; + let column_statistics = self.array.as_ref().with_dyn(|a| { + let struct_arr = a + .as_struct_array() + .ok_or_else(|| vortex_err!("Not a struct array"))?; + self.scan_projection + .iter() .map(|i| { - let arr = struct_arr.field(i).vortex_expect("iterating over field"); + struct_arr + .field(*i) + .ok_or_else(|| vortex_err!("Projection references unknown field {i}")) + }) + .map_ok(|arr| { + nbytes += arr.nbytes(); ColumnStatistics { null_count: arr .statistics() @@ -343,11 +337,11 @@ impl ExecutionPlan for VortexScanExec { distinct_count: Precision::Absent, } }) - .collect() - }); + .collect::>>() + })?; Ok(Statistics { num_rows: Precision::Exact(self.array.len()), - total_byte_size: Precision::Exact(projected.nbytes()), + total_byte_size: Precision::Exact(nbytes), column_statistics, }) } From db2310340cc54bf4f86d4b6e91b28991cb278815 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Mon, 23 Sep 2024 14:56:49 +0100 Subject: [PATCH 4/5] fewer --- vortex-array/src/array/chunked/variants.rs | 28 ++----------------- vortex-array/src/array/constant/variants.rs | 11 +------- vortex-array/src/array/sparse/variants.rs | 21 +------------- vortex-array/src/array/struct_/mod.rs | 13 ++++----- vortex-array/src/variants.rs | 5 +--- vortex-scalar/src/struct_.rs | 31 +-------------------- 6 files changed, 12 insertions(+), 97 deletions(-) diff --git a/vortex-array/src/array/chunked/variants.rs b/vortex-array/src/array/chunked/variants.rs index d5a3eb17af..e5bb6ef84f 100644 --- a/vortex-array/src/array/chunked/variants.rs +++ b/vortex-array/src/array/chunked/variants.rs @@ -1,6 +1,5 @@ -use vortex_dtype::field::Field; use vortex_dtype::DType; -use vortex_error::{vortex_err, vortex_panic, VortexResult}; +use vortex_error::vortex_panic; use crate::array::chunked::ChunkedArray; use crate::variants::{ @@ -66,7 +65,8 @@ impl StructArrayTrait for ChunkedArray { fn field(&self, idx: usize) -> Option { let mut chunks = Vec::with_capacity(self.nchunks()); for chunk in self.chunks() { - chunks.push(chunk.with_dyn(|a| a.as_struct_array().and_then(|s| s.field(idx)))?); + let array = chunk.with_dyn(|a| a.as_struct_array().and_then(|s| s.field(idx)))?; + chunks.push(array); } let projected_dtype = self.dtype().as_struct().and_then(|s| s.dtypes().get(idx))?; @@ -81,28 +81,6 @@ impl StructArrayTrait for ChunkedArray { .into_array(); Some(chunked) } - - fn project(&self, projection: &[Field]) -> VortexResult { - let mut chunks = Vec::with_capacity(self.nchunks()); - for chunk in self.chunks() { - chunks.push(chunk.with_dyn(|a| { - a.as_struct_array() - .ok_or_else(|| vortex_err!("Chunk was not a StructArray"))? - .project(projection) - })?); - } - - let projected_dtype = self - .dtype() - .as_struct() - .ok_or_else(|| vortex_err!("Not a struct dtype"))? - .project(projection)?; - ChunkedArray::try_new( - chunks, - DType::Struct(projected_dtype, self.dtype().nullability()), - ) - .map(|a| a.into_array()) - } } impl ListArrayTrait for ChunkedArray {} diff --git a/vortex-array/src/array/constant/variants.rs b/vortex-array/src/array/constant/variants.rs index 6a3d6a4418..56f6fad9d6 100644 --- a/vortex-array/src/array/constant/variants.rs +++ b/vortex-array/src/array/constant/variants.rs @@ -1,9 +1,8 @@ use std::iter; use std::sync::Arc; -use vortex_dtype::field::Field; use vortex_dtype::{DType, PType}; -use vortex_error::{vortex_panic, VortexError, VortexExpect as _, VortexResult}; +use vortex_error::{vortex_panic, VortexError, VortexExpect as _}; use vortex_scalar::{ExtScalar, Scalar, ScalarValue, StructScalar}; use crate::array::constant::ConstantArray; @@ -191,14 +190,6 @@ impl StructArrayTrait for ConstantArray { .field_by_idx(idx) .map(|scalar| ConstantArray::new(scalar, self.len()).into_array()) } - - fn project(&self, projection: &[Field]) -> VortexResult { - Ok(ConstantArray::new( - StructScalar::try_from(self.scalar())?.project(projection)?, - self.len(), - ) - .into_array()) - } } impl ListArrayTrait for ConstantArray {} diff --git a/vortex-array/src/array/sparse/variants.rs b/vortex-array/src/array/sparse/variants.rs index b0109e1519..ed399800e3 100644 --- a/vortex-array/src/array/sparse/variants.rs +++ b/vortex-array/src/array/sparse/variants.rs @@ -1,6 +1,5 @@ -use vortex_dtype::field::Field; use vortex_dtype::DType; -use vortex_error::{vortex_err, VortexExpect, VortexResult}; +use vortex_error::VortexExpect; use vortex_scalar::StructScalar; use crate::array::sparse::SparseArray; @@ -84,24 +83,6 @@ impl StructArrayTrait for SparseArray { .into_array(), ) } - - fn project(&self, projection: &[Field]) -> VortexResult { - let values = self.values().with_dyn(|s| { - s.as_struct_array() - .ok_or_else(|| vortex_err!("Chunk was not a StructArray"))? - .project(projection) - })?; - let scalar = StructScalar::try_from(self.fill_value())?.project(projection)?; - - SparseArray::try_new_with_offset( - self.indices().clone(), - values, - self.len(), - self.indices_offset(), - scalar, - ) - .map(|a| a.into_array()) - } } impl ListArrayTrait for SparseArray {} diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index cc3c5fafc4..4c5f199f8a 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -8,9 +8,7 @@ use crate::stats::{ArrayStatisticsCompute, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; use crate::variants::{ArrayVariants, StructArrayTrait}; use crate::visitor::{AcceptArrayVisitor, ArrayVisitor}; -use crate::{ - impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArray, IntoCanonical, -}; +use crate::{impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoCanonical}; mod compute; @@ -101,7 +99,10 @@ impl StructArray { /// which specifies the new ordering of columns in the struct. The projection can be used to /// perform column re-ordering, deletion, or duplication at a logical level, without any data /// copying. - #[allow(clippy::same_name_method)] + /// + /// # Panics + /// This function will panic an error if the projection references columns not within the + /// schema boundaries. pub fn project(&self, projection: &[Field]) -> VortexResult { let mut children = Vec::with_capacity(projection.len()); let mut names = Vec::with_capacity(projection.len()); @@ -148,10 +149,6 @@ impl StructArrayTrait for StructArray { .unwrap_or_else(|e| vortex_panic!(e, "StructArray: field {} not found", idx)) }) } - - fn project(&self, projection: &[Field]) -> VortexResult { - self.project(projection).map(|a| a.into_array()) - } } impl IntoCanonical for StructArray { diff --git a/vortex-array/src/variants.rs b/vortex-array/src/variants.rs index ad01b69e65..3eb2cd4d39 100644 --- a/vortex-array/src/variants.rs +++ b/vortex-array/src/variants.rs @@ -3,9 +3,8 @@ //! When callers only want to make assumptions about the DType, and not about any specific //! encoding, they can use these traits to write encoding-agnostic code. -use vortex_dtype::field::Field; use vortex_dtype::{DType, ExtDType, FieldNames}; -use vortex_error::{vortex_panic, VortexExpect as _, VortexResult}; +use vortex_error::{vortex_panic, VortexExpect as _}; use crate::iter::{AccessorRef, VectorizedArrayIter}; use crate::{Array, ArrayTrait}; @@ -228,8 +227,6 @@ pub trait StructArrayTrait: ArrayTrait { field_idx.and_then(|field_idx| self.field(field_idx)) } - - fn project(&self, projection: &[Field]) -> VortexResult; } pub trait ListArrayTrait: ArrayTrait {} diff --git a/vortex-scalar/src/struct_.rs b/vortex-scalar/src/struct_.rs index 11dc1d5e4d..6842276ca8 100644 --- a/vortex-scalar/src/struct_.rs +++ b/vortex-scalar/src/struct_.rs @@ -1,8 +1,7 @@ use std::sync::Arc; -use vortex_dtype::field::Field; use vortex_dtype::DType; -use vortex_error::{vortex_bail, vortex_err, VortexError, VortexExpect, VortexResult}; +use vortex_error::{vortex_bail, VortexError, VortexResult}; use crate::value::ScalarValue; use crate::Scalar; @@ -84,34 +83,6 @@ impl<'a> StructScalar<'a> { Ok(Scalar::null(dtype.clone())) } } - - pub fn project(&self, projection: &[Field]) -> VortexResult { - let struct_dtype = self - .dtype - .as_struct() - .ok_or_else(|| vortex_err!("Not a struct dtype"))?; - let projected_dtype = struct_dtype.project(projection)?; - let new_fields = if let Some(fs) = self.fields() { - ScalarValue::List( - projection - .iter() - .map(|p| match p { - Field::Name(n) => struct_dtype - .find_name(n) - .vortex_expect("DType has been successfully projected already"), - Field::Index(i) => *i, - }) - .map(|i| fs[i].clone()) - .collect(), - ) - } else { - ScalarValue::Null - }; - Ok(Scalar::new( - DType::Struct(projected_dtype, self.dtype().nullability()), - new_fields, - )) - } } impl Scalar { From c5254563113fe2b7f9848413331b289f11653b29 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Mon, 23 Sep 2024 15:00:55 +0100 Subject: [PATCH 5/5] less --- vortex-datafusion/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/vortex-datafusion/src/lib.rs b/vortex-datafusion/src/lib.rs index 1dc78f6e32..a42c5ddd31 100644 --- a/vortex-datafusion/src/lib.rs +++ b/vortex-datafusion/src/lib.rs @@ -26,7 +26,6 @@ use memory::{VortexMemTable, VortexMemTableOptions}; use persistent::config::VortexTableOptions; use persistent::provider::VortexFileTableProvider; use vortex::array::ChunkedArray; -use vortex::compute::Len; use vortex::stats::{ArrayStatistics, Stat}; use vortex::{Array, ArrayDType, IntoArrayVariant}; use vortex_dtype::field::Field;