Skip to content

Commit

Permalink
fix: specify features in vortex-serde & test default features (#1168)
Browse files Browse the repository at this point in the history
  • Loading branch information
danking authored Oct 31, 2024
1 parent 0234790 commit e5d1275
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 48 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,14 @@ jobs:
run: cargo doc --no-deps
- name: Rust Test
run: cargo test --workspace --all-features
- name: Rust Build

- name: Rust Build (Default features)
run: cargo build --all-targets

- name: Clean cargo to keep disk usage below limit
run: cargo clean

- name: Rust Build (All Features)
run: cargo build --all-features --all-targets

- name: Pytest - PyVortex
Expand Down
2 changes: 1 addition & 1 deletion pyvortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ futures = { workspace = true }
itertools = { workspace = true }
lazy_static = { workspace = true }
log = { workspace = true }
object_store = { workspace = true, features = ["aws", "gcp", "azure", "http"] }
paste = { workspace = true }
pyo3 = { workspace = true }
pyo3-log = { workspace = true }
tokio = { workspace = true, features = ["fs"] }
url = { workspace = true }
object_store = { workspace = true, features = ["aws", "gcp", "azure", "http"] }

vortex-alp = { workspace = true }
vortex-array = { workspace = true }
Expand Down
5 changes: 3 additions & 2 deletions pyvortex/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use vortex_serde::layouts::{
};

use crate::expr::PyExpr;
use crate::object_store_urls::vortex_read_at_from_url;
use crate::{PyArray, TOKIO_RUNTIME};

pub async fn layout_stream_from_reader<T: VortexReadAt + Unpin>(
Expand Down Expand Up @@ -194,11 +195,11 @@ pub struct ObjectStoreUrlDataset {

impl ObjectStoreUrlDataset {
async fn reader(&self) -> VortexResult<ObjectStoreReadAt> {
ObjectStoreReadAt::try_new_from_url(&self.url).await
vortex_read_at_from_url(&self.url).await
}

pub async fn try_new(url: String) -> VortexResult<Self> {
let reader = ObjectStoreReadAt::try_new_from_url(&url).await?;
let reader = vortex_read_at_from_url(&url).await?;
let schema = Arc::new(infer_schema(&read_dtype_from_reader(&reader).await?)?);

Ok(Self { url, schema })
Expand Down
1 change: 1 addition & 0 deletions pyvortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod dtype;
mod encode;
mod expr;
mod io;
mod object_store_urls;
mod python_repr;
mod scalar;
use lazy_static::lazy_static;
Expand Down
47 changes: 47 additions & 0 deletions pyvortex/src/object_store_urls.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use std::sync::Arc;

use object_store::aws::AmazonS3Builder;
use object_store::azure::MicrosoftAzureBuilder;
use object_store::gcp::GoogleCloudStorageBuilder;
use object_store::http::HttpBuilder;
use object_store::local::LocalFileSystem;
use object_store::path::Path;
use object_store::{ObjectStore, ObjectStoreScheme};
use url::Url;
use vortex_error::{vortex_bail, VortexResult};
use vortex_serde::io::ObjectStoreReadAt;

fn better_parse_url(url_str: &str) -> VortexResult<(Box<dyn ObjectStore>, Path)> {
let url = Url::parse(url_str)?;

let (scheme, path) = ObjectStoreScheme::parse(&url).map_err(object_store::Error::from)?;
let store: Box<dyn ObjectStore> = match scheme {
ObjectStoreScheme::Local => Box::new(LocalFileSystem::default()),
ObjectStoreScheme::AmazonS3 => {
Box::new(AmazonS3Builder::from_env().with_url(url_str).build()?)
}
ObjectStoreScheme::GoogleCloudStorage => Box::new(
GoogleCloudStorageBuilder::from_env()
.with_url(url_str)
.build()?,
),
ObjectStoreScheme::MicrosoftAzure => Box::new(
MicrosoftAzureBuilder::from_env()
.with_url(url_str)
.build()?,
),
ObjectStoreScheme::Http => Box::new(
HttpBuilder::new()
.with_url(&url[..url::Position::BeforePath])
.build()?,
),
otherwise => vortex_bail!("unrecognized object store scheme: {:?}", otherwise),
};

Ok((store, path))
}

pub async fn vortex_read_at_from_url(url: &str) -> VortexResult<ObjectStoreReadAt> {
let (object_store, location) = better_parse_url(url)?;
Ok(ObjectStoreReadAt::new(Arc::from(object_store), location))
}
3 changes: 2 additions & 1 deletion vortex-serde/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ url = { workspace = true }
vortex-array = { workspace = true }
vortex-buffer = { workspace = true }
vortex-dtype = { workspace = true, features = ["flatbuffers"] }
vortex-error = { workspace = true, features = ["object_store"] }
vortex-error = { workspace = true }
vortex-expr = { workspace = true }
vortex-flatbuffers = { workspace = true, features = ["file"] }
vortex-scalar = { workspace = true, features = ["flatbuffers"] }
Expand Down Expand Up @@ -61,6 +61,7 @@ default = ["futures", "monoio", "tokio"]
futures = ["futures-util/io"]
monoio = ["dep:monoio"]
tokio = ["dep:tokio"]
object_store = ["dep:object_store", "vortex-error/object_store"]

[[bench]]
name = "ipc_take"
Expand Down
45 changes: 2 additions & 43 deletions vortex-serde/src/io/object_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,11 @@ use std::sync::Arc;
use std::{io, mem};

use bytes::BytesMut;
use object_store::aws::AmazonS3Builder;
use object_store::azure::MicrosoftAzureBuilder;
use object_store::gcp::GoogleCloudStorageBuilder;
use object_store::http::HttpBuilder;
use object_store::local::LocalFileSystem;
use object_store::path::Path;
use object_store::{ObjectStore, ObjectStoreScheme, WriteMultipart};
use url::Url;
use object_store::{ObjectStore, WriteMultipart};
use vortex_buffer::io_buf::IoBuf;
use vortex_buffer::Buffer;
use vortex_error::{vortex_bail, vortex_panic, VortexError, VortexResult};
use vortex_error::{vortex_panic, VortexError, VortexResult};

use crate::io::{VortexRead, VortexReadAt, VortexWrite};

Expand Down Expand Up @@ -71,41 +65,6 @@ impl ObjectStoreReadAt {
location,
}
}

fn better_parse_url(url_str: &str) -> VortexResult<(Box<dyn ObjectStore>, Path)> {
let url = Url::parse(url_str)?;

let (scheme, path) = ObjectStoreScheme::parse(&url).map_err(object_store::Error::from)?;
let store: Box<dyn ObjectStore> = match scheme {
ObjectStoreScheme::Local => Box::new(LocalFileSystem::default()),
ObjectStoreScheme::AmazonS3 => {
Box::new(AmazonS3Builder::from_env().with_url(url_str).build()?)
}
ObjectStoreScheme::GoogleCloudStorage => Box::new(
GoogleCloudStorageBuilder::from_env()
.with_url(url_str)
.build()?,
),
ObjectStoreScheme::MicrosoftAzure => Box::new(
MicrosoftAzureBuilder::from_env()
.with_url(url_str)
.build()?,
),
ObjectStoreScheme::Http => Box::new(
HttpBuilder::new()
.with_url(&url[..url::Position::BeforePath])
.build()?,
),
otherwise => vortex_bail!("unrecognized object store scheme: {:?}", otherwise),
};

Ok((store, path))
}

pub async fn try_new_from_url(url: &str) -> VortexResult<Self> {
let (object_store, location) = Self::better_parse_url(url)?;
Ok(Self::new(Arc::from(object_store), location))
}
}

impl VortexReadAt for ObjectStoreReadAt {
Expand Down

0 comments on commit e5d1275

Please sign in to comment.