diff --git a/Cargo.lock b/Cargo.lock index c0faecc985..f22e5c2493 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3133,6 +3133,7 @@ dependencies = [ "flexbuffers", "futures", "itertools 0.13.0", + "lazy_static", "log", "paste", "pyo3", @@ -3140,13 +3141,17 @@ dependencies = [ "tokio", "vortex-alp", "vortex-array", + "vortex-bytebool", + "vortex-datetime-parts", "vortex-dict", "vortex-dtype", "vortex-error", "vortex-expr", "vortex-fastlanes", + "vortex-fsst", "vortex-roaring", "vortex-runend", + "vortex-runend-bool", "vortex-sampling-compressor", "vortex-scalar", "vortex-serde", diff --git a/pyvortex/Cargo.toml b/pyvortex/Cargo.toml index 9eb4089990..4f7ae9dfd6 100644 --- a/pyvortex/Cargo.toml +++ b/pyvortex/Cargo.toml @@ -27,6 +27,7 @@ arrow = { workspace = true, features = ["pyarrow"] } flexbuffers = { workspace = true } futures = { workspace = true } itertools = { workspace = true } +lazy_static = { workspace = true } log = { workspace = true } paste = { workspace = true } pyo3 = { workspace = true } @@ -35,13 +36,17 @@ tokio = { workspace = true, features = ["fs"] } vortex-alp = { workspace = true } vortex-array = { workspace = true } +vortex-bytebool = { workspace = true } +vortex-datetime-parts = { workspace = true } vortex-dict = { workspace = true } vortex-dtype = { workspace = true } vortex-error = { workspace = true } vortex-expr = { workspace = true } +vortex-fsst = { workspace = true } vortex-fastlanes = { workspace = true } vortex-roaring = { workspace = true } vortex-runend = { workspace = true } +vortex-runend-bool = { workspace = true } vortex-sampling-compressor = { workspace = true } vortex-scalar = { workspace = true } vortex-serde = { workspace = true, features = ["tokio"] } diff --git a/pyvortex/src/io.rs b/pyvortex/src/io.rs index 1ee8c9db7c..d5184dab33 100644 --- a/pyvortex/src/io.rs +++ b/pyvortex/src/io.rs @@ -1,23 +1,55 @@ use std::path::Path; +use std::sync::Arc; use futures::TryStreamExt; +use lazy_static::lazy_static; use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; use pyo3::pyfunction; use pyo3::types::{PyList, PyLong, PyString}; use tokio::fs::File; use vortex::array::ChunkedArray; +use vortex::encoding::EncodingRef; use vortex::{Array, Context}; +use vortex_alp::{ALPEncoding, ALPRDEncoding}; +use vortex_bytebool::ByteBoolEncoding; +use vortex_datetime_parts::DateTimePartsEncoding; +use vortex_dict::DictEncoding; use vortex_dtype::field::Field; use vortex_error::{vortex_panic, VortexResult}; +use vortex_fastlanes::{BitPackedEncoding, DeltaEncoding, FoREncoding}; +use vortex_fsst::FSSTEncoding; +use vortex_roaring::{RoaringBoolEncoding, RoaringIntEncoding}; +use vortex_runend::RunEndEncoding; +use vortex_runend_bool::RunEndBoolEncoding; use vortex_serde::layouts::{ LayoutContext, LayoutDeserializer, LayoutReaderBuilder, LayoutWriter, Projection, RowFilter, }; +use vortex_zigzag::ZigZagEncoding; use crate::error::PyVortexError; use crate::expr::PyExpr; use crate::PyArray; +lazy_static! { + pub static ref MAXIMAL_CTX: Arc = Arc::new(Context::default().with_encodings([ + &ALPEncoding as EncodingRef, + &ByteBoolEncoding, + &DateTimePartsEncoding, + &DictEncoding, + &BitPackedEncoding, + &DeltaEncoding, + &FoREncoding, + &FSSTEncoding, + &RoaringBoolEncoding, + &RoaringIntEncoding, + &RunEndEncoding, + &RunEndBoolEncoding, + &ZigZagEncoding, + &ALPRDEncoding, + ])); +} + /// Read a vortex struct array from the local filesystem. /// /// Parameters @@ -142,7 +174,7 @@ pub fn read<'py>( let mut builder: LayoutReaderBuilder = LayoutReaderBuilder::new( file, - LayoutDeserializer::new(Context::default().into(), LayoutContext::default().into()), + LayoutDeserializer::new(MAXIMAL_CTX.clone(), LayoutContext::default().into()), ) .with_projection(projection); diff --git a/vortex-array/src/context.rs b/vortex-array/src/context.rs index 41a8f44081..404f99536c 100644 --- a/vortex-array/src/context.rs +++ b/vortex-array/src/context.rs @@ -1,8 +1,8 @@ use std::collections::HashMap; use crate::array::{ - BoolEncoding, ChunkedEncoding, ConstantEncoding, ExtensionEncoding, PrimitiveEncoding, - SparseEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding, + BoolEncoding, ChunkedEncoding, ConstantEncoding, ExtensionEncoding, NullEncoding, + PrimitiveEncoding, SparseEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding, }; use crate::encoding::EncodingRef; @@ -36,15 +36,16 @@ impl Default for Context { fn default() -> Self { Self { encodings: [ - &BoolEncoding as EncodingRef, - &ChunkedEncoding, - &ConstantEncoding, - &ExtensionEncoding, + &NullEncoding as EncodingRef, + &BoolEncoding, &PrimitiveEncoding, - &SparseEncoding, &StructEncoding, &VarBinEncoding, &VarBinViewEncoding, + &ExtensionEncoding, + &SparseEncoding, + &ConstantEncoding, + &ChunkedEncoding, ] .into_iter() .map(|e| (e.id().code(), e))