Skip to content

Commit

Permalink
feat: more Results, fewer panics, always have backtraces (#761)
Browse files Browse the repository at this point in the history
fixes #564 

The goals of this PR is to ensure the following:
- reasonable developer ergonomics / not too verbose
- return a `VortexResult` with a well-formed error from most fallible
functions
- panics are allowed in cases of programmer error / invariants being
violated (i.e., panics should correspond logically to places where we
might write an `assert`)
- ensure a good error message and a backtrace in case of panic

To accomplish this, this PR does several things. 

First, it adds much stricter clippy lints (e.g., deny on `unwrap`,
`expect`, `panic`; forbids fallible `From` impls; etc.). I did an audit
to make functions that are truly fallible return `VortexResult` instead
of panicking. The remaining ones that I saw but didn't fix are tracked
under #765

For places where we truly want to panic (specifically because an
internal invariant has been violated, typically indicating programmer
error), this PR adds:
- a new macro (`vortex_panic!`)
- `VortexUnwrap` trait, which is implemented only on `VortexResult` and
adds a `vortex_unwrap()` function
- `VortexExpect` trait, which is implemented on `VortexResult` and
`Option<T>` and adds a `vortex_expect` function that takes a string
literal

Basically, the state after this PR is one-off good. The main downsides
are that we have a special snowflake unwrap/expect instead of the
traditional ones, and we don't have an effective lint rule to prevent
calling those in result functions... but I think the latter at least is
acceptable (I know @AdamGS finds those lints overly restrictive anyway).

ALTERNATIVELY, a middle ground would be to replace calls to
`vortex_unwrap` and `vortex_expect` with `expect` and un-deny
`expect_used`. This would be more "typical" Rust but with the
disadvantage that we wouldn't get backtraces by default in the cases
where we have bugs. Or we could do `unwrap_or_else(|err|
vortex_panic!(err))` or `ok_or_else(|| vortex_panic!(...))` everywhere,
but that's pretty verbose.

---------

Co-authored-by: Robert Kruszewski <[email protected]>
Co-authored-by: Adam Gutglick <[email protected]>
  • Loading branch information
3 people authored Sep 10, 2024
1 parent a82eb07 commit 2c3da0c
Show file tree
Hide file tree
Showing 145 changed files with 1,223 additions and 761 deletions.
53 changes: 33 additions & 20 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 21 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ uuid = "1.8.0"
vortex-alp = { version = "0.8.0", path = "./encodings/alp" }
vortex-array = { version = "0.8.0", path = "./vortex-array" }
vortex-buffer = { version = "0.8.0", path = "./vortex-buffer" }
vortex-bytebool = { version = "0.8.0", path = "./encodings/byte-bool" }
vortex-bytebool = { version = "0.8.0", path = "./encodings/bytebool" }
vortex-datafusion = { version = "0.8.0", path = "./vortex-datafusion" }
vortex-datetime-dtype = { version = "0.8.0", path = "./vortex-datetime-dtype" }
vortex-datetime-parts = { version = "0.8.0", path = "./encodings/datetime-parts" }
Expand Down Expand Up @@ -171,11 +171,31 @@ unsafe_op_in_unsafe_fn = "deny"

[workspace.lints.clippy]
all = { level = "deny", priority = -1 }
as_ptr_cast_mut = { level = "deny" }
borrow_as_ptr = { level = "deny" }
collection_is_never_read = { level = "deny" }
cognitive_complexity = { level = "deny" }
debug_assert_with_mut_call = { level = "deny" }
derive_partial_eq_without_eq = { level = "deny" }
exit = { level = "deny" }
expect_fun_call = { level = "deny" }
expect_used = { level = "deny" }
equatable_if_let = { level = "deny" }
fallible_impl_from = { level = "deny" }
get_unwrap = { level = "deny" }
host_endian_bytes = { level = "deny" }
if_then_some_else_none = { level = "deny" }
inconsistent_struct_constructor = { level = "deny" }
manual_assert = { level = "deny" }
manual_is_variant_and = { level = "deny" }
mem_forget = { level = "deny" }
or_fun_call = "deny"
panic_in_result_fn = { level = "deny" }
panic = { level = "deny" }
same_name_method = { level = "deny" }
tests_outside_test_module = { level = "deny" }
# todo = { level = "deny" }
# unimplemented = { level = "deny" }
unwrap_in_result = { level = "deny" }
unwrap_used = { level = "deny" }
use_debug = { level = "deny" }
9 changes: 7 additions & 2 deletions bench-vortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,13 @@ include = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }

[lints]
workspace = true
[lints.rust]
warnings = "deny"
unsafe_op_in_unsafe_fn = "deny"

[lints.clippy]
all = { level = "deny", priority = -1 }
or_fun_call = "deny"

[dependencies]
anyhow = { workspace = true }
Expand Down
4 changes: 1 addition & 3 deletions bench-vortex/benches/datafusion_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#![allow(clippy::use_debug)]

use std::collections::HashSet;
use std::sync::Arc;

Expand Down Expand Up @@ -83,7 +81,7 @@ fn toy_dataset_arrow() -> RecordBatch {
}

fn toy_dataset_vortex(compress: bool) -> Array {
let uncompressed = toy_dataset_arrow().into();
let uncompressed = toy_dataset_arrow().try_into().unwrap();

if !compress {
return uncompressed;
Expand Down
2 changes: 0 additions & 2 deletions bench-vortex/benches/tpch_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#![allow(clippy::use_debug)]

use bench_vortex::tpch::dbgen::{DBGen, DBGenOptions};
use bench_vortex::tpch::{load_datasets, run_tpch_query, tpch_queries, Format};
use criterion::{criterion_group, criterion_main, Criterion};
Expand Down
2 changes: 0 additions & 2 deletions bench-vortex/src/bin/tpch_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#![allow(clippy::use_debug)]

use std::collections::HashMap;
use std::process::ExitCode;
use std::sync;
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/src/data_downloads.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ pub fn data_vortex_uncompressed(fname_out: &str, downloaded_data: PathBuf) -> Pa
let array = ChunkedArray::try_new(
reader
.into_iter()
.map(|batch_result| Array::from(batch_result.unwrap()))
.map(|batch_result| Array::try_from(batch_result.unwrap()).unwrap())
.collect(),
dtype,
)
Expand Down
7 changes: 4 additions & 3 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,8 @@ pub fn compress_taxi_data() -> Array {
let chunks = reader
.into_iter()
.map(|batch_result| batch_result.unwrap())
.map(Array::from)
.map(Array::try_from)
.map(Result::unwrap)
.map(|array| {
uncompressed_size += array.nbytes();
compressor.compress(&array).unwrap()
Expand Down Expand Up @@ -288,7 +289,7 @@ mod test {
let struct_arrow: ArrowStructArray = record_batch.into();
let arrow_array: ArrowArrayRef = Arc::new(struct_arrow);
let vortex_array = Array::from_arrow(arrow_array.clone(), false);
let vortex_as_arrow = vortex_array.into_canonical().unwrap().into_arrow();
let vortex_as_arrow = vortex_array.into_canonical().unwrap().into_arrow().unwrap();
assert_eq!(vortex_as_arrow.deref(), arrow_array.deref());
}
}
Expand All @@ -309,7 +310,7 @@ mod test {
let vortex_array = Array::from_arrow(arrow_array.clone(), false);

let compressed = compressor.compress(&vortex_array).unwrap();
let compressed_as_arrow = compressed.into_canonical().unwrap().into_arrow();
let compressed_as_arrow = compressed.into_canonical().unwrap().into_arrow().unwrap();
assert_eq!(compressed_as_arrow.deref(), arrow_array.deref());
}
}
Expand Down
9 changes: 5 additions & 4 deletions bench-vortex/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use object_store::ObjectStore;
use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
use parquet::arrow::async_reader::{AsyncFileReader, ParquetObjectReader};
use parquet::arrow::ParquetRecordBatchStreamBuilder;
use parquet::file::metadata::RowGroupMetaData;
use serde::{Deserialize, Serialize};
use stream::StreamExt;
use vortex::array::{ChunkedArray, PrimitiveArray};
Expand Down Expand Up @@ -58,7 +59,7 @@ pub async fn open_vortex(path: &Path) -> VortexResult<Array> {
.into_array_stream()
.collect_chunked()
.await
.map(|a| a.into_array())
.map(IntoArray::into_array)
}

pub async fn rewrite_parquet_as_vortex<W: VortexWrite>(
Expand Down Expand Up @@ -101,7 +102,7 @@ pub fn compress_parquet_to_vortex(parquet_path: &Path) -> VortexResult<ChunkedAr
let chunks = reader
.map(|batch_result| batch_result.unwrap())
.map(|record_batch| {
let vortex_array = Array::from(record_batch);
let vortex_array = Array::try_from(record_batch).unwrap();
compressor.compress(&vortex_array).unwrap()
})
.collect_vec();
Expand Down Expand Up @@ -219,7 +220,7 @@ async fn parquet_take_from_stream<T: AsyncFileReader + Unpin + Send + 'static>(
.metadata()
.row_groups()
.iter()
.map(|rg| rg.num_rows())
.map(RowGroupMetaData::num_rows)
.scan(0i64, |acc, x| {
*acc += x;
Some(*acc)
Expand All @@ -238,7 +239,7 @@ async fn parquet_take_from_stream<T: AsyncFileReader + Unpin + Send + 'static>(
let row_group_indices = row_groups
.keys()
.sorted()
.map(|i| row_groups.get(i).unwrap().clone())
.map(|i| row_groups[i].clone())
.collect_vec();

let reader = builder
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/src/tpch/dbgen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ fn get_or_cache_toolchain(
zip_file
.url()
.path_segments()
.and_then(|segments| segments.last())
.and_then(Iterator::last)
.unwrap(),
);

Expand Down
6 changes: 3 additions & 3 deletions bench-vortex/src/tpch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,8 @@ async fn register_vortex_file(
let sts = record_batches
.iter()
.cloned()
.map(Array::from)
.map(|a| a.into_struct().unwrap())
.map(Array::try_from)
.map(|a| a.unwrap().into_struct().unwrap())
.collect::<Vec<_>>();

let mut arrays_map: HashMap<Arc<str>, Vec<Array>> = HashMap::default();
Expand All @@ -272,7 +272,7 @@ async fn register_vortex_file(
.iter()
.map(|field| {
let name: Arc<str> = field.name().as_str().into();
let dtype = types_map.get(&name).unwrap().clone();
let dtype = types_map[&name].clone();
let chunks = arrays_map.remove(&name).unwrap();

(
Expand Down
1 change: 1 addition & 0 deletions clippy.toml
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
allow-expect-in-tests = true
allow-unwrap-in-tests = true
4 changes: 3 additions & 1 deletion encodings/alp/benches/alp_compress.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(clippy::unwrap_used)]

use arrow::array::{as_primitive_array, ArrowNativeTypeOp, ArrowPrimitiveType};
use arrow::datatypes::{Float32Type, Float64Type};
use divan::{black_box, Bencher};
Expand Down Expand Up @@ -49,7 +51,7 @@ where

fn alp_canonicalize_sum<T: ArrowPrimitiveType>(array: ALPArray) -> T::Native {
let array = array.into_canonical().unwrap().into_arrow();
let arrow_primitive = as_primitive_array::<T>(array.as_ref());
let arrow_primitive = as_primitive_array::<T>(array.as_ref().unwrap());
arrow_primitive
.iter()
.fold(T::default_value(), |acc, value| {
Expand Down
12 changes: 10 additions & 2 deletions encodings/alp/src/alp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::mem::size_of;
use itertools::Itertools;
use num_traits::{Float, NumCast, PrimInt, Zero};
use serde::{Deserialize, Serialize};
use vortex_error::vortex_panic;

const SAMPLE_SIZE: usize = 32;

Expand All @@ -20,7 +21,7 @@ impl Display for Exponents {
}

pub trait ALPFloat: Float + Display + 'static {
type ALPInt: PrimInt;
type ALPInt: PrimInt + Display;

const FRACTIONAL_BITS: u8;
const MAX_EXPONENT: u8;
Expand Down Expand Up @@ -118,7 +119,14 @@ pub trait ALPFloat: Float + Display + 'static {

#[inline]
fn decode_single(encoded: Self::ALPInt, exponents: Exponents) -> Self {
let encoded_float: Self = Self::from(encoded).unwrap();
let encoded_float: Self = Self::from(encoded).unwrap_or_else(|| {
vortex_panic!(
"Failed to convert encoded value {} from {} to {} in ALPFloat::decode_single",
encoded,
std::any::type_name::<Self::ALPInt>(),
std::any::type_name::<Self>()
)
});
encoded_float * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize]
}
}
Expand Down
Loading

0 comments on commit 2c3da0c

Please sign in to comment.