Skip to content

Commit

Permalink
fix: teach pyvortex all our encodings (#998)
Browse files Browse the repository at this point in the history
Python users presumably just want to read their data not figure out
which encodings need to be explicitly enabled.

I also reordered the default context encodings to match the order in
encoding.rs. Null was also missing, so I added it.
  • Loading branch information
danking authored Oct 9, 2024
1 parent b822b6e commit f15b162
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 8 deletions.
5 changes: 5 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pyvortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ arrow = { workspace = true, features = ["pyarrow"] }
flexbuffers = { workspace = true }
futures = { workspace = true }
itertools = { workspace = true }
lazy_static = { workspace = true }
log = { workspace = true }
paste = { workspace = true }
pyo3 = { workspace = true }
Expand All @@ -35,13 +36,17 @@ tokio = { workspace = true, features = ["fs"] }

vortex-alp = { workspace = true }
vortex-array = { workspace = true }
vortex-bytebool = { workspace = true }
vortex-datetime-parts = { workspace = true }
vortex-dict = { workspace = true }
vortex-dtype = { workspace = true }
vortex-error = { workspace = true }
vortex-expr = { workspace = true }
vortex-fsst = { workspace = true }
vortex-fastlanes = { workspace = true }
vortex-roaring = { workspace = true }
vortex-runend = { workspace = true }
vortex-runend-bool = { workspace = true }
vortex-sampling-compressor = { workspace = true }
vortex-scalar = { workspace = true }
vortex-serde = { workspace = true, features = ["tokio"] }
Expand Down
34 changes: 33 additions & 1 deletion pyvortex/src/io.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,55 @@
use std::path::Path;
use std::sync::Arc;

use futures::TryStreamExt;
use lazy_static::lazy_static;
use pyo3::exceptions::PyTypeError;
use pyo3::prelude::*;
use pyo3::pyfunction;
use pyo3::types::{PyList, PyLong, PyString};
use tokio::fs::File;
use vortex::array::ChunkedArray;
use vortex::encoding::EncodingRef;
use vortex::{Array, Context};
use vortex_alp::{ALPEncoding, ALPRDEncoding};
use vortex_bytebool::ByteBoolEncoding;
use vortex_datetime_parts::DateTimePartsEncoding;
use vortex_dict::DictEncoding;
use vortex_dtype::field::Field;
use vortex_error::{vortex_panic, VortexResult};
use vortex_fastlanes::{BitPackedEncoding, DeltaEncoding, FoREncoding};
use vortex_fsst::FSSTEncoding;
use vortex_roaring::{RoaringBoolEncoding, RoaringIntEncoding};
use vortex_runend::RunEndEncoding;
use vortex_runend_bool::RunEndBoolEncoding;
use vortex_serde::layouts::{
LayoutContext, LayoutDeserializer, LayoutReaderBuilder, LayoutWriter, Projection, RowFilter,
};
use vortex_zigzag::ZigZagEncoding;

use crate::error::PyVortexError;
use crate::expr::PyExpr;
use crate::PyArray;

lazy_static! {
pub static ref MAXIMAL_CTX: Arc<Context> = Arc::new(Context::default().with_encodings([
&ALPEncoding as EncodingRef,
&ByteBoolEncoding,
&DateTimePartsEncoding,
&DictEncoding,
&BitPackedEncoding,
&DeltaEncoding,
&FoREncoding,
&FSSTEncoding,
&RoaringBoolEncoding,
&RoaringIntEncoding,
&RunEndEncoding,
&RunEndBoolEncoding,
&ZigZagEncoding,
&ALPRDEncoding,
]));
}

/// Read a vortex struct array from the local filesystem.
///
/// Parameters
Expand Down Expand Up @@ -142,7 +174,7 @@ pub fn read<'py>(

let mut builder: LayoutReaderBuilder<File> = LayoutReaderBuilder::new(
file,
LayoutDeserializer::new(Context::default().into(), LayoutContext::default().into()),
LayoutDeserializer::new(MAXIMAL_CTX.clone(), LayoutContext::default().into()),
)
.with_projection(projection);

Expand Down
15 changes: 8 additions & 7 deletions vortex-array/src/context.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::collections::HashMap;

use crate::array::{
BoolEncoding, ChunkedEncoding, ConstantEncoding, ExtensionEncoding, PrimitiveEncoding,
SparseEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
BoolEncoding, ChunkedEncoding, ConstantEncoding, ExtensionEncoding, NullEncoding,
PrimitiveEncoding, SparseEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
};
use crate::encoding::EncodingRef;

Expand Down Expand Up @@ -36,15 +36,16 @@ impl Default for Context {
fn default() -> Self {
Self {
encodings: [
&BoolEncoding as EncodingRef,
&ChunkedEncoding,
&ConstantEncoding,
&ExtensionEncoding,
&NullEncoding as EncodingRef,
&BoolEncoding,
&PrimitiveEncoding,
&SparseEncoding,
&StructEncoding,
&VarBinEncoding,
&VarBinViewEncoding,
&ExtensionEncoding,
&SparseEncoding,
&ConstantEncoding,
&ChunkedEncoding,
]
.into_iter()
.map(|e| (e.id().code(), e))
Expand Down

0 comments on commit f15b162

Please sign in to comment.