Skip to content

Commit

Permalink
Introduce a new vortex-schema crate (#819)
Browse files Browse the repository at this point in the history
The `Schema` struct needs to be accessed from multiple crates to enable
more advanced usage (initially - filter re-ordering).
  • Loading branch information
AdamGS authored Sep 16, 2024
1 parent 02b752c commit 8377a76
Show file tree
Hide file tree
Showing 13 changed files with 51 additions and 15 deletions.
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ members = [
"vortex-proto",
"vortex-sampling-compressor",
"vortex-scalar",
"vortex-schema",
"vortex-serde",
"xtask",
]
Expand Down Expand Up @@ -147,6 +148,7 @@ vortex-roaring = { version = "0.8.0", path = "./encodings/roaring" }
vortex-runend = { version = "0.8.0", path = "./encodings/runend" }
vortex-runend-bool = { version = "0.8.0", path = "./encodings/runend-bool" }
vortex-scalar = { version = "0.8.0", path = "./vortex-scalar", default-features = false }
vortex-schema = { version = "0.8.0", path = "./vortex-schema" }
vortex-serde = { version = "0.8.0", path = "./vortex-serde", default-features = false }
vortex-sampling-compressor = { version = "0.8.0", path = "./vortex-sampling-compressor" }
vortex-zigzag = { version = "0.8.0", path = "./encodings/zigzag" }
Expand Down
4 changes: 2 additions & 2 deletions pyvortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ doctest = false
arrow = { workspace = true, features = ["pyarrow"] }
flexbuffers = { workspace = true }
futures = { workspace = true }
itertools = { workspace = true }
log = { workspace = true }
paste = { workspace = true }
pyo3 = { workspace = true }
Expand All @@ -48,11 +49,10 @@ vortex-proto = { workspace = true }
vortex-roaring = { workspace = true }
vortex-runend = { workspace = true }
vortex-runend-bool = { workspace = true }
vortex-sampling-compressor = { workspace = true }
vortex-scalar = { workspace = true, default-features = false }
vortex-serde = { workspace = true, default-features = false, features = ["tokio"] }
vortex-sampling-compressor = { workspace = true }
vortex-zigzag = { workspace = true }
itertools = { workspace = true }

# We may need this workaround?
# https://pyo3.rs/v0.20.2/faq.html#i-cant-run-cargo-test-or-i-cant-build-in-a-cargo-workspace-im-having-linker-issues-like-symbol-not-found-or-undefined-reference-to-_pyexc_systemerror
2 changes: 1 addition & 1 deletion pyvortex/src/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ pub fn read<'py>(

let stream = builder.build().await?;

let dtype = stream.schema().into_dtype();
let dtype = stream.schema().into();

let vecs: Vec<Array> = stream.try_collect().await?;

Expand Down
2 changes: 1 addition & 1 deletion vortex-sampling-compressor/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ edition = { workspace = true }
rust-version = { workspace = true }

[dependencies]
fsst-rs = { workspace = true }
arbitrary = { workspace = true, optional = true }
fsst-rs = { workspace = true }
lazy_static = { workspace = true }
log = { workspace = true }
rand = { workspace = true }
Expand Down
18 changes: 18 additions & 0 deletions vortex-schema/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "vortex-schema"
version = { workspace = true }
homepage = { workspace = true }
repository = { workspace = true }
authors = { workspace = true }
license = { workspace = true }
keywords = { workspace = true }
include = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }

[dependencies]
vortex-dtype = { workspace = true }
vortex-error = { workspace = true }

[lints]
workspace = true
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
use vortex_dtype::DType;
use vortex_error::{vortex_bail, VortexResult};

use super::projections::Projection;
use self::projection::Projection;

pub mod projection;

#[derive(Clone, Debug)]
pub struct Schema(pub(crate) DType);

impl Schema {
pub fn new(schema_dtype: DType) -> Self {
Self(schema_dtype)
}

pub fn project(&self, projection: Projection) -> VortexResult<Self> {
match projection {
Projection::All => Ok(self.clone()),
Expand All @@ -23,8 +29,10 @@ impl Schema {
pub fn dtype(&self) -> &DType {
&self.0
}
}

pub fn into_dtype(self) -> DType {
self.0
impl From<Schema> for DType {
fn from(value: Schema) -> Self {
value.0
}
}
File renamed without changes.
1 change: 1 addition & 0 deletions vortex-serde/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ vortex-error = { workspace = true, features = ["object_store"] }
vortex-expr = { workspace = true }
vortex-flatbuffers = { workspace = true, features = ["file"] }
vortex-scalar = { workspace = true, features = ["flatbuffers"] }
vortex-schema = { workspace = true }

[dev-dependencies]
arrow = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion vortex-serde/src/layouts/read/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ use bytes::BytesMut;
use vortex::{Array, ArrayDType};
use vortex_dtype::field::Field;
use vortex_error::{vortex_bail, VortexResult};
use vortex_schema::projection::Projection;

use crate::io::VortexReadAt;
use crate::layouts::read::cache::{LayoutMessageCache, RelativeLayoutCache};
use crate::layouts::read::context::LayoutDeserializer;
use crate::layouts::read::filtering::RowFilter;
use crate::layouts::read::footer::Footer;
use crate::layouts::read::projections::Projection;
use crate::layouts::read::stream::LayoutBatchStream;
use crate::layouts::read::{Scan, DEFAULT_BATCH_SIZE, FILE_POSTSCRIPT_SIZE, INITIAL_READ_SIZE};
use crate::layouts::MAGIC_BYTES;
Expand Down
2 changes: 1 addition & 1 deletion vortex-serde/src/layouts/read/layouts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use vortex_dtype::field::Field;
use vortex_dtype::DType;
use vortex_error::{vortex_bail, vortex_err, VortexExpect as _, VortexResult};
use vortex_flatbuffers::footer as fb;
use vortex_schema::projection::Projection;

use super::projections::Projection;
use crate::layouts::read::batch::BatchReader;
use crate::layouts::read::buffered::BufferedReader;
use crate::layouts::read::cache::RelativeLayoutCache;
Expand Down
6 changes: 2 additions & 4 deletions vortex-serde/src/layouts/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,14 @@ mod context;
mod filtering;
mod footer;
mod layouts;
mod projections;
mod schema;
mod stream;

pub use builder::LayoutReaderBuilder;
pub use context::*;
pub use filtering::RowFilter;
pub use projections::Projection;
pub use schema::Schema;
pub use stream::LayoutBatchStream;
pub use vortex_schema::projection::Projection;
pub use vortex_schema::Schema;

use crate::stream_writer::ByteRange;

Expand Down
4 changes: 2 additions & 2 deletions vortex-serde/src/layouts/read/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ use vortex::{Array, IntoArray, IntoArrayVariant};
use vortex_dtype::{match_each_integer_ptype, DType};
use vortex_error::{vortex_err, vortex_panic, VortexError, VortexResult};
use vortex_scalar::Scalar;
use vortex_schema::Schema;

use crate::io::VortexReadAt;
use crate::layouts::read::cache::LayoutMessageCache;
use crate::layouts::read::schema::Schema;
use crate::layouts::read::{Layout, MessageId, ReadResult, Scan};
use crate::layouts::Projection;
use crate::stream_writer::ByteRange;
Expand Down Expand Up @@ -56,7 +56,7 @@ impl<R: VortexReadAt> LayoutBatchStream<R> {
}

pub fn schema(&self) -> Schema {
Schema(self.dtype.clone())
Schema::new(self.dtype.clone())
}

// TODO(robert): Push this logic down to layouts
Expand Down

0 comments on commit 8377a76

Please sign in to comment.