Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: store min, max, null count, and true count in column metadata #1164

Merged
merged 16 commits into from
Nov 1, 2024
Merged
2 changes: 1 addition & 1 deletion docs/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ similar to or smaller than Parquet.

>>> from os.path import getsize
>>> getsize("example.vortex") / getsize("_static/example.parquet")
2.1...
2...

Read
^^^^
Expand Down
6 changes: 6 additions & 0 deletions vortex-buffer/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,9 @@ impl AsRef<str> for BufferString {
self.as_str()
}
}

impl AsRef<[u8]> for BufferString {
fn as_ref(&self) -> &[u8] {
self.as_str().as_bytes()
}
}
10 changes: 10 additions & 0 deletions vortex-dtype/src/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@ pub type FieldNames = Arc<[FieldName]>;

pub type Metadata = Vec<u8>;

pub fn fieldnames_from_strings(value: Vec<String>) -> FieldNames {
Arc::from(
value
.iter()
.map(|x| Arc::from(x.as_str()))
.collect::<Vec<_>>()
.into_boxed_slice(),
)
}

/// Array logical types.
///
/// Vortex arrays preserve a single logical type, while the encodings allow for multiple
Expand Down
45 changes: 42 additions & 3 deletions vortex-scalar/src/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,48 @@ impl<'a> TryFrom<&'a Scalar> for BinaryScalar<'a> {
impl<'a> TryFrom<&'a Scalar> for Buffer {
type Error = VortexError;

fn try_from(value: &'a Scalar) -> VortexResult<Self> {
BinaryScalar::try_from(value)?
.value()
fn try_from(scalar: &'a Scalar) -> VortexResult<Self> {
Buffer::try_from(scalar.value())
}
}

impl TryFrom<Scalar> for Buffer {
type Error = VortexError;

fn try_from(scalar: Scalar) -> VortexResult<Self> {
Buffer::try_from(&scalar)
}
}

impl TryFrom<&ScalarValue> for Buffer {
type Error = VortexError;

fn try_from(value: &ScalarValue) -> Result<Self, Self::Error> {
Option::<Buffer>::try_from(value)?
.ok_or_else(|| vortex_err!("Can't extract present value from null scalar"))
}
}

impl TryFrom<ScalarValue> for Buffer {
type Error = VortexError;

fn try_from(value: ScalarValue) -> Result<Self, Self::Error> {
Buffer::try_from(&value)
}
}

impl TryFrom<&ScalarValue> for Option<Buffer> {
type Error = VortexError;

fn try_from(value: &ScalarValue) -> Result<Self, Self::Error> {
value.as_buffer()
}
}

impl TryFrom<ScalarValue> for Option<Buffer> {
type Error = VortexError;

fn try_from(value: ScalarValue) -> Result<Self, Self::Error> {
Option::<Buffer>::try_from(&value)
}
}
41 changes: 36 additions & 5 deletions vortex-scalar/src/bool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ impl TryFrom<&Scalar> for bool {
}
}

impl TryFrom<Scalar> for bool {
type Error = VortexError;

fn try_from(value: Scalar) -> VortexResult<Self> {
bool::try_from(&value)
}
}

impl From<bool> for Scalar {
fn from(value: bool) -> Self {
Self {
Expand All @@ -73,19 +81,42 @@ impl From<bool> for Scalar {
}
}

impl From<bool> for ScalarValue {
fn from(value: bool) -> Self {
ScalarValue::Bool(value)
}
}

impl TryFrom<&ScalarValue> for Option<bool> {
type Error = VortexError;

fn try_from(value: &ScalarValue) -> VortexResult<Self> {
value.as_bool()
}
}

impl TryFrom<ScalarValue> for Option<bool> {
type Error = VortexError;

fn try_from(value: ScalarValue) -> VortexResult<Self> {
Option::<bool>::try_from(&value)
}
}

impl TryFrom<&ScalarValue> for bool {
type Error = VortexError;

fn try_from(value: &ScalarValue) -> VortexResult<Self> {
value
.as_bool()?
Option::<bool>::try_from(value)?
.ok_or_else(|| vortex_err!("Can't extract present value from null scalar"))
}
}

impl From<bool> for ScalarValue {
fn from(value: bool) -> Self {
ScalarValue::Bool(value)
impl TryFrom<ScalarValue> for bool {
type Error = VortexError;

fn try_from(value: ScalarValue) -> VortexResult<Self> {
bool::try_from(&value)
}
}

Expand Down
1 change: 1 addition & 0 deletions vortex-scalar/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ mod datafusion;
mod display;
mod extension;
mod list;
mod null;
mod primitive;
mod pvalue;
mod scalar_type;
Expand Down
35 changes: 35 additions & 0 deletions vortex-scalar/src/null.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
use vortex_error::VortexError;

use crate::{Scalar, ScalarValue};

impl TryFrom<&Scalar> for () {
type Error = VortexError;

fn try_from(scalar: &Scalar) -> Result<Self, Self::Error> {
scalar.value().as_null()
}
}

impl TryFrom<Scalar> for () {
type Error = VortexError;

fn try_from(scalar: Scalar) -> Result<Self, Self::Error> {
<()>::try_from(&scalar)
}
}

impl TryFrom<&ScalarValue> for () {
type Error = VortexError;

fn try_from(value: &ScalarValue) -> Result<Self, Self::Error> {
value.as_null()
}
}

impl TryFrom<ScalarValue> for () {
type Error = VortexError;

fn try_from(value: ScalarValue) -> Result<Self, Self::Error> {
<()>::try_from(&value)
}
}
24 changes: 21 additions & 3 deletions vortex-scalar/src/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,37 @@ macro_rules! primitive_scalar {
impl TryFrom<&ScalarValue> for $T {
type Error = VortexError;

fn try_from(value: &ScalarValue) -> Result<Self, Self::Error> {
Option::<$T>::try_from(value)?
.ok_or_else(|| vortex_err!("Can't extract present value from null scalar"))
}
}

impl TryFrom<ScalarValue> for $T {
type Error = VortexError;

fn try_from(value: ScalarValue) -> Result<Self, Self::Error> {
<$T>::try_from(&value)
}
}

impl TryFrom<&ScalarValue> for Option<$T> {
type Error = VortexError;

fn try_from(value: &ScalarValue) -> Result<Self, Self::Error> {
match value {
ScalarValue::Primitive(pvalue) => <$T>::try_from(*pvalue),
ScalarValue::Null => Ok(None),
ScalarValue::Primitive(pvalue) => Ok(Some(<$T>::try_from(*pvalue)?)),
_ => vortex_bail!("expected primitive"),
}
}
}

impl TryFrom<ScalarValue> for $T {
impl TryFrom<ScalarValue> for Option<$T> {
type Error = VortexError;

fn try_from(value: ScalarValue) -> Result<Self, Self::Error> {
<$T>::try_from(&value)
Option::<$T>::try_from(&value)
}
}
};
Expand Down
59 changes: 49 additions & 10 deletions vortex-scalar/src/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,6 @@ impl<'a> TryFrom<&'a Scalar> for Utf8Scalar<'a> {
}
}

impl<'a> TryFrom<&'a Scalar> for BufferString {
type Error = VortexError;

fn try_from(value: &'a Scalar) -> VortexResult<Self> {
Utf8Scalar::try_from(value)?
.value()
.ok_or_else(|| vortex_err!("Can't extract present value from null scalar"))
}
}

impl<'a> TryFrom<&'a Scalar> for String {
type Error = VortexError;

Expand All @@ -88,3 +78,52 @@ impl From<&str> for Scalar {
}
}
}

impl<'a> TryFrom<&'a Scalar> for BufferString {
type Error = VortexError;

fn try_from(scalar: &'a Scalar) -> VortexResult<Self> {
BufferString::try_from(scalar.value())
}
}

impl TryFrom<Scalar> for BufferString {
type Error = VortexError;

fn try_from(scalar: Scalar) -> Result<Self, Self::Error> {
BufferString::try_from(&scalar)
}
}

impl TryFrom<&ScalarValue> for BufferString {
type Error = VortexError;

fn try_from(value: &ScalarValue) -> Result<Self, Self::Error> {
Option::<BufferString>::try_from(value)?
.ok_or_else(|| vortex_err!("Can't extract present value from null scalar"))
}
}

impl TryFrom<ScalarValue> for BufferString {
type Error = VortexError;

fn try_from(value: ScalarValue) -> Result<Self, Self::Error> {
BufferString::try_from(&value)
}
}

impl TryFrom<&ScalarValue> for Option<BufferString> {
type Error = VortexError;

fn try_from(value: &ScalarValue) -> Result<Self, Self::Error> {
value.as_buffer_string()
}
}

impl TryFrom<ScalarValue> for Option<BufferString> {
type Error = VortexError;

fn try_from(value: ScalarValue) -> Result<Self, Self::Error> {
Option::<BufferString>::try_from(&value)
}
}
7 changes: 7 additions & 0 deletions vortex-scalar/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,13 @@ impl ScalarValue {
}
}

pub fn as_null(&self) -> VortexResult<()> {
match self {
Self::Null => Ok(()),
_ => Err(vortex_err!("Expected a Null scalar, found {:?}", self)),
}
}

pub fn as_bool(&self) -> VortexResult<Option<bool>> {
match self {
Self::Null => Ok(None),
Expand Down
Loading
Loading