From e5d127563afac7150724cd3c53d58ffc3750c86f Mon Sep 17 00:00:00 2001 From: Dan King Date: Thu, 31 Oct 2024 12:24:57 -0400 Subject: [PATCH] fix: specify features in vortex-serde & test default features (#1168) --- .github/workflows/ci.yml | 9 +++++- pyvortex/Cargo.toml | 2 +- pyvortex/src/dataset.rs | 5 +-- pyvortex/src/lib.rs | 1 + pyvortex/src/object_store_urls.rs | 47 +++++++++++++++++++++++++++++ vortex-serde/Cargo.toml | 3 +- vortex-serde/src/io/object_store.rs | 45 ++------------------------- 7 files changed, 64 insertions(+), 48 deletions(-) create mode 100644 pyvortex/src/object_store_urls.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ce023357d7..17e6e6386d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,7 +38,14 @@ jobs: run: cargo doc --no-deps - name: Rust Test run: cargo test --workspace --all-features - - name: Rust Build + + - name: Rust Build (Default features) + run: cargo build --all-targets + + - name: Clean cargo to keep disk usage below limit + run: cargo clean + + - name: Rust Build (All Features) run: cargo build --all-features --all-targets - name: Pytest - PyVortex diff --git a/pyvortex/Cargo.toml b/pyvortex/Cargo.toml index 04e87418e0..41df4fe26b 100644 --- a/pyvortex/Cargo.toml +++ b/pyvortex/Cargo.toml @@ -29,12 +29,12 @@ futures = { workspace = true } itertools = { workspace = true } lazy_static = { workspace = true } log = { workspace = true } -object_store = { workspace = true, features = ["aws", "gcp", "azure", "http"] } paste = { workspace = true } pyo3 = { workspace = true } pyo3-log = { workspace = true } tokio = { workspace = true, features = ["fs"] } url = { workspace = true } +object_store = { workspace = true, features = ["aws", "gcp", "azure", "http"] } vortex-alp = { workspace = true } vortex-array = { workspace = true } diff --git a/pyvortex/src/dataset.rs b/pyvortex/src/dataset.rs index 0c979ba37a..92664dcf54 100644 --- a/pyvortex/src/dataset.rs +++ b/pyvortex/src/dataset.rs @@ -21,6 +21,7 @@ use vortex_serde::layouts::{ }; use crate::expr::PyExpr; +use crate::object_store_urls::vortex_read_at_from_url; use crate::{PyArray, TOKIO_RUNTIME}; pub async fn layout_stream_from_reader( @@ -194,11 +195,11 @@ pub struct ObjectStoreUrlDataset { impl ObjectStoreUrlDataset { async fn reader(&self) -> VortexResult { - ObjectStoreReadAt::try_new_from_url(&self.url).await + vortex_read_at_from_url(&self.url).await } pub async fn try_new(url: String) -> VortexResult { - let reader = ObjectStoreReadAt::try_new_from_url(&url).await?; + let reader = vortex_read_at_from_url(&url).await?; let schema = Arc::new(infer_schema(&read_dtype_from_reader(&reader).await?)?); Ok(Self { url, schema }) diff --git a/pyvortex/src/lib.rs b/pyvortex/src/lib.rs index 09a6bd664a..f67a77d61d 100644 --- a/pyvortex/src/lib.rs +++ b/pyvortex/src/lib.rs @@ -13,6 +13,7 @@ mod dtype; mod encode; mod expr; mod io; +mod object_store_urls; mod python_repr; mod scalar; use lazy_static::lazy_static; diff --git a/pyvortex/src/object_store_urls.rs b/pyvortex/src/object_store_urls.rs new file mode 100644 index 0000000000..c49a71ea5d --- /dev/null +++ b/pyvortex/src/object_store_urls.rs @@ -0,0 +1,47 @@ +use std::sync::Arc; + +use object_store::aws::AmazonS3Builder; +use object_store::azure::MicrosoftAzureBuilder; +use object_store::gcp::GoogleCloudStorageBuilder; +use object_store::http::HttpBuilder; +use object_store::local::LocalFileSystem; +use object_store::path::Path; +use object_store::{ObjectStore, ObjectStoreScheme}; +use url::Url; +use vortex_error::{vortex_bail, VortexResult}; +use vortex_serde::io::ObjectStoreReadAt; + +fn better_parse_url(url_str: &str) -> VortexResult<(Box, Path)> { + let url = Url::parse(url_str)?; + + let (scheme, path) = ObjectStoreScheme::parse(&url).map_err(object_store::Error::from)?; + let store: Box = match scheme { + ObjectStoreScheme::Local => Box::new(LocalFileSystem::default()), + ObjectStoreScheme::AmazonS3 => { + Box::new(AmazonS3Builder::from_env().with_url(url_str).build()?) + } + ObjectStoreScheme::GoogleCloudStorage => Box::new( + GoogleCloudStorageBuilder::from_env() + .with_url(url_str) + .build()?, + ), + ObjectStoreScheme::MicrosoftAzure => Box::new( + MicrosoftAzureBuilder::from_env() + .with_url(url_str) + .build()?, + ), + ObjectStoreScheme::Http => Box::new( + HttpBuilder::new() + .with_url(&url[..url::Position::BeforePath]) + .build()?, + ), + otherwise => vortex_bail!("unrecognized object store scheme: {:?}", otherwise), + }; + + Ok((store, path)) +} + +pub async fn vortex_read_at_from_url(url: &str) -> VortexResult { + let (object_store, location) = better_parse_url(url)?; + Ok(ObjectStoreReadAt::new(Arc::from(object_store), location)) +} diff --git a/vortex-serde/Cargo.toml b/vortex-serde/Cargo.toml index 5798ac7aa9..573fa51b05 100644 --- a/vortex-serde/Cargo.toml +++ b/vortex-serde/Cargo.toml @@ -33,7 +33,7 @@ url = { workspace = true } vortex-array = { workspace = true } vortex-buffer = { workspace = true } vortex-dtype = { workspace = true, features = ["flatbuffers"] } -vortex-error = { workspace = true, features = ["object_store"] } +vortex-error = { workspace = true } vortex-expr = { workspace = true } vortex-flatbuffers = { workspace = true, features = ["file"] } vortex-scalar = { workspace = true, features = ["flatbuffers"] } @@ -61,6 +61,7 @@ default = ["futures", "monoio", "tokio"] futures = ["futures-util/io"] monoio = ["dep:monoio"] tokio = ["dep:tokio"] +object_store = ["dep:object_store", "vortex-error/object_store"] [[bench]] name = "ipc_take" diff --git a/vortex-serde/src/io/object_store.rs b/vortex-serde/src/io/object_store.rs index 40f7a97555..13782a8cc5 100644 --- a/vortex-serde/src/io/object_store.rs +++ b/vortex-serde/src/io/object_store.rs @@ -7,17 +7,11 @@ use std::sync::Arc; use std::{io, mem}; use bytes::BytesMut; -use object_store::aws::AmazonS3Builder; -use object_store::azure::MicrosoftAzureBuilder; -use object_store::gcp::GoogleCloudStorageBuilder; -use object_store::http::HttpBuilder; -use object_store::local::LocalFileSystem; use object_store::path::Path; -use object_store::{ObjectStore, ObjectStoreScheme, WriteMultipart}; -use url::Url; +use object_store::{ObjectStore, WriteMultipart}; use vortex_buffer::io_buf::IoBuf; use vortex_buffer::Buffer; -use vortex_error::{vortex_bail, vortex_panic, VortexError, VortexResult}; +use vortex_error::{vortex_panic, VortexError, VortexResult}; use crate::io::{VortexRead, VortexReadAt, VortexWrite}; @@ -71,41 +65,6 @@ impl ObjectStoreReadAt { location, } } - - fn better_parse_url(url_str: &str) -> VortexResult<(Box, Path)> { - let url = Url::parse(url_str)?; - - let (scheme, path) = ObjectStoreScheme::parse(&url).map_err(object_store::Error::from)?; - let store: Box = match scheme { - ObjectStoreScheme::Local => Box::new(LocalFileSystem::default()), - ObjectStoreScheme::AmazonS3 => { - Box::new(AmazonS3Builder::from_env().with_url(url_str).build()?) - } - ObjectStoreScheme::GoogleCloudStorage => Box::new( - GoogleCloudStorageBuilder::from_env() - .with_url(url_str) - .build()?, - ), - ObjectStoreScheme::MicrosoftAzure => Box::new( - MicrosoftAzureBuilder::from_env() - .with_url(url_str) - .build()?, - ), - ObjectStoreScheme::Http => Box::new( - HttpBuilder::new() - .with_url(&url[..url::Position::BeforePath]) - .build()?, - ), - otherwise => vortex_bail!("unrecognized object store scheme: {:?}", otherwise), - }; - - Ok((store, path)) - } - - pub async fn try_new_from_url(url: &str) -> VortexResult { - let (object_store, location) = Self::better_parse_url(url)?; - Ok(Self::new(Arc::from(object_store), location)) - } } impl VortexReadAt for ObjectStoreReadAt {