Skip to content

Commit

Permalink
refactor: move Array enum out of public interface (#1212)
Browse files Browse the repository at this point in the history
Makes `Array` an opaque struct, containing a (private) inner enum with
our two current variants.

In the future we can make changes to the inner enum without a breaking
API change.

The other place this would be useful is for `Buffer`. If folks like this
change can make that one as a follow up
  • Loading branch information
a10y authored Nov 5, 2024
1 parent 6904ea0 commit 8282118
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 129 deletions.
21 changes: 0 additions & 21 deletions vortex-array/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,27 +116,6 @@ impl ArrayData {
}
}

impl ToArray for ArrayData {
fn to_array(&self) -> Array {
Array::Data(self.clone())
}
}

impl From<Array> for ArrayData {
fn from(value: Array) -> ArrayData {
match value {
Array::Data(d) => d,
Array::View(_) => value.with_dyn(|v| v.to_array_data()),
}
}
}

impl From<ArrayData> for Array {
fn from(value: ArrayData) -> Array {
Array::Data(value)
}
}

impl Statistics for ArrayData {
fn get(&self, stat: Stat) -> Option<Scalar> {
self.stats_map
Expand Down
45 changes: 5 additions & 40 deletions vortex-array/src/implementation.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
//! The core Vortex macro to create new encodings and array types.
use vortex_buffer::Buffer;
use vortex_dtype::DType;
use vortex_error::{vortex_bail, VortexError, VortexExpect as _, VortexResult};

use crate::array::visitor::ArrayVisitor;
use crate::encoding::{ArrayEncoding, ArrayEncodingExt, ArrayEncodingRef, EncodingId, EncodingRef};
use crate::stats::{ArrayStatistics, Statistics};
use crate::stats::ArrayStatistics;
use crate::{
Array, ArrayDType, ArrayData, ArrayLen, ArrayMetadata, ArrayTrait, GetArrayMetadata, IntoArray,
Array, ArrayDType, ArrayData, ArrayMetadata, ArrayTrait, GetArrayMetadata, Inner, IntoArray,
ToArrayData, TryDeserializeArrayMetadata,
};

Expand Down Expand Up @@ -169,49 +168,15 @@ impl<T: AsRef<Array>> ArrayEncodingRef for T {
}
}

impl<T: AsRef<Array>> ArrayDType for T {
fn dtype(&self) -> &DType {
match self.as_ref() {
Array::Data(d) => d.dtype(),
Array::View(v) => v.dtype(),
}
}
}

impl<T: AsRef<Array>> ArrayLen for T {
fn len(&self) -> usize {
match self.as_ref() {
Array::Data(d) => d.len(),
Array::View(v) => v.len(),
}
}

fn is_empty(&self) -> bool {
match self.as_ref() {
Array::Data(d) => d.is_empty(),
Array::View(v) => v.is_empty(),
}
}
}

impl<T: AsRef<Array>> ArrayStatistics for T {
fn statistics(&self) -> &(dyn Statistics + '_) {
match self.as_ref() {
Array::Data(d) => d.statistics(),
Array::View(v) => v.statistics(),
}
}
}

impl<D> ToArrayData for D
where
D: IntoArray + ArrayEncodingRef + ArrayStatistics + GetArrayMetadata + Clone,
{
fn to_array_data(&self) -> ArrayData {
let array = self.clone().into_array();
match array {
Array::Data(d) => d,
Array::View(ref view) => {
match array.0 {
Inner::Data(d) => d,
Inner::View(ref view) => {
struct Visitor {
buffer: Option<Buffer>,
children: Vec<Array>,
Expand Down
164 changes: 132 additions & 32 deletions vortex-array/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
//! arrays can be [canonicalized](Canonical) into for ease of access in compute functions.
//!
use std::borrow::Cow;
use std::fmt::{Debug, Display, Formatter};
use std::future::ready;

Expand All @@ -19,11 +20,12 @@ pub use implementation::*;
use itertools::Itertools;
pub use metadata::*;
pub use paste;
use stats::Statistics;
pub use typed::*;
pub use view::*;
use vortex_buffer::Buffer;
use vortex_dtype::DType;
use vortex_error::{vortex_panic, VortexExpect, VortexResult};
use vortex_error::{vortex_err, vortex_panic, VortexExpect, VortexResult};

use crate::array::visitor::{AcceptArrayVisitor, ArrayVisitor};
use crate::compute::ArrayCompute;
Expand Down Expand Up @@ -65,7 +67,10 @@ pub mod flatbuffers {
///
/// This is the main entrypoint for working with in-memory Vortex data, and dispatches work over the underlying encoding or memory representations.
#[derive(Debug, Clone)]
pub enum Array {
pub struct Array(pub(crate) Inner);

#[derive(Debug, Clone)]
pub(crate) enum Inner {
/// Owned [`Array`] with serialized metadata, backed by heap-allocated memory.
Data(ArrayData),
/// Zero-copy view over flatbuffer-encoded [`Array`] data, created without eager serialization.
Expand All @@ -74,25 +79,25 @@ pub enum Array {

impl Array {
pub fn encoding(&self) -> EncodingRef {
match self {
Self::Data(d) => d.encoding(),
Self::View(v) => v.encoding(),
match &self.0 {
Inner::Data(d) => d.encoding(),
Inner::View(v) => v.encoding(),
}
}

/// Returns the number of logical elements in the array.
#[allow(clippy::same_name_method)]
pub fn len(&self) -> usize {
match self {
Self::Data(d) => d.len(),
Self::View(v) => v.len(),
match &self.0 {
Inner::Data(d) => d.len(),
Inner::View(v) => v.len(),
}
}

pub fn is_empty(&self) -> bool {
match self {
Self::Data(d) => d.is_empty(),
Self::View(v) => v.is_empty(),
match &self.0 {
Inner::Data(d) => d.is_empty(),
Inner::View(v) => v.is_empty(),
}
}

Expand All @@ -102,25 +107,27 @@ impl Array {
}

pub fn child<'a>(&'a self, idx: usize, dtype: &'a DType, len: usize) -> VortexResult<Self> {
match self {
Self::Data(d) => d.child(idx, dtype, len).cloned(),
Self::View(v) => v.child(idx, dtype, len).map(Array::View),
match &self.0 {
Inner::Data(d) => d.child(idx, dtype, len).cloned(),
Inner::View(v) => v
.child(idx, dtype, len)
.map(|view| Array(Inner::View(view))),
}
}

/// Returns a Vec of Arrays with all of the array's child arrays.
pub fn children(&self) -> Vec<Array> {
match self {
Array::Data(d) => d.children().iter().cloned().collect_vec(),
Array::View(v) => v.children(),
match &self.0 {
Inner::Data(d) => d.children().iter().cloned().collect_vec(),
Inner::View(v) => v.children(),
}
}

/// Returns the number of child arrays
pub fn nchildren(&self) -> usize {
match self {
Self::Data(d) => d.nchildren(),
Self::View(v) => v.nchildren(),
match &self.0 {
Inner::Data(d) => d.nchildren(),
Inner::View(v) => v.nchildren(),
}
}

Expand Down Expand Up @@ -157,17 +164,43 @@ impl Array {
offsets
}

/// Get back the (possibly owned) metadata for the array.
///
/// View arrays will return a reference to their bytes, while heap-backed arrays
/// must first serialize their metadata, returning an owned byte array to the caller.
pub fn metadata(&self) -> VortexResult<Cow<[u8]>> {
match &self.0 {
Inner::Data(array_data) => {
// Heap-backed arrays must first try and serialize the metadata.
let owned_meta: Vec<u8> = array_data
.metadata()
.try_serialize_metadata()?
.as_ref()
.to_owned();

Ok(Cow::Owned(owned_meta))
}
Inner::View(array_view) => {
// View arrays have direct access to metadata bytes.
array_view
.metadata()
.ok_or_else(|| vortex_err!("things"))
.map(Cow::Borrowed)
}
}
}

pub fn buffer(&self) -> Option<&Buffer> {
match self {
Self::Data(d) => d.buffer(),
Self::View(v) => v.buffer(),
match &self.0 {
Inner::Data(d) => d.buffer(),
Inner::View(v) => v.buffer(),
}
}

pub fn into_buffer(self) -> Option<Buffer> {
match self {
Self::Data(d) => d.into_buffer(),
Self::View(v) => v.buffer().cloned(),
match self.0 {
Inner::Data(d) => d.into_buffer(),
Inner::View(v) => v.buffer().cloned(),
}
}

Expand Down Expand Up @@ -295,12 +328,37 @@ pub trait ArrayDType {
fn dtype(&self) -> &DType;
}

impl<T: AsRef<Array>> ArrayDType for T {
fn dtype(&self) -> &DType {
match &self.as_ref().0 {
Inner::Data(array_data) => array_data.dtype(),
Inner::View(array_view) => array_view.dtype(),
}
}
}

pub trait ArrayLen {
fn len(&self) -> usize;

fn is_empty(&self) -> bool;
}

impl<T: AsRef<Array>> ArrayLen for T {
fn len(&self) -> usize {
match &self.as_ref().0 {
Inner::Data(d) => d.len(),
Inner::View(v) => v.len(),
}
}

fn is_empty(&self) -> bool {
match &self.as_ref().0 {
Inner::Data(d) => d.is_empty(),
Inner::View(v) => v.is_empty(),
}
}
}

struct NBytesVisitor(usize);

impl ArrayVisitor for NBytesVisitor {
Expand All @@ -317,9 +375,9 @@ impl ArrayVisitor for NBytesVisitor {

impl Display for Array {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let prefix = match self {
Self::Data(_) => "",
Self::View(_) => "$",
let prefix = match &self.0 {
Inner::Data(_) => "",
Inner::View(_) => "$",
};
write!(
f,
Expand All @@ -334,9 +392,51 @@ impl Display for Array {

impl ToArrayData for Array {
fn to_array_data(&self) -> ArrayData {
match self {
Self::Data(d) => d.clone(),
Self::View(_) => self.with_dyn(|a| a.to_array_data()),
match &self.0 {
Inner::Data(d) => d.clone(),
Inner::View(_) => self.with_dyn(|a| a.to_array_data()),
}
}
}

impl ToArray for ArrayData {
fn to_array(&self) -> Array {
Array(Inner::Data(self.clone()))
}
}

impl ToArray for ArrayView {
fn to_array(&self) -> Array {
Array(Inner::View(self.clone()))
}
}

impl IntoArray for ArrayView {
fn into_array(self) -> Array {
Array(Inner::View(self))
}
}

impl From<Array> for ArrayData {
fn from(value: Array) -> ArrayData {
match value.0 {
Inner::Data(d) => d,
Inner::View(_) => value.with_dyn(|v| v.to_array_data()),
}
}
}

impl From<ArrayData> for Array {
fn from(value: ArrayData) -> Array {
Array(Inner::Data(value))
}
}

impl<T: AsRef<Array>> ArrayStatistics for T {
fn statistics(&self) -> &(dyn Statistics + '_) {
match &self.as_ref().0 {
Inner::Data(d) => d.statistics(),
Inner::View(v) => v.statistics(),
}
}
}
Loading

0 comments on commit 8282118

Please sign in to comment.