Skip to content

Commit

Permalink
feat: concise benchmark names
Browse files Browse the repository at this point in the history
Currently the Taxi and l_comment datasets are their own Criterion "group" which
causes them to be plotted separately. With this PR, there is one plot for
"compress time" which is measured in seconds and has every dataset.

I also eliminated the comparison to uncompressed Parquet because (except for
a tiny 1024 byte dataset) we always beat uncompressed Parquet. Parquet with
ZSTD is a sufficient comparison.

Finally, I removed some characters from several benchmark names in hopes that
the Markdown tables on GitHub are more legible.
  • Loading branch information
danking committed Oct 9, 2024
1 parent 4aa30c0 commit 284cf99
Showing 1 changed file with 10 additions and 36 deletions.
46 changes: 10 additions & 36 deletions bench-vortex/benches/compress_noci.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ fn vortex_written_size(array: &Array) -> u64 {
fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(
compressor: &SamplingCompressor<'_>,
make_uncompressed: F,
group_name: &str,
group: &mut BenchmarkGroup<'_, T>,
bench_name: &str,
) where
Expand All @@ -100,7 +99,7 @@ fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(
let mut compressed_size = 0;

group.throughput(Throughput::Bytes(uncompressed_size as u64));
group.bench_function(format!("{} compression", bench_name), |b| {
group.bench_function(bench_name, |b| {
b.iter_with_large_drop(|| {
let compressed = black_box(compressor.compress(uncompressed.as_ref(), None)).unwrap();
compressed_size = compressed.nbytes();
Expand All @@ -124,13 +123,10 @@ fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(
Compression::ZSTD(ZstdLevel::default()),
);

let parquet_uncompressed_nbytes =
parquet_written_size(uncompressed.as_ref(), Compression::UNCOMPRESSED);

println!(
"{}",
serde_json::to_string(&GenericBenchmarkResults {
name: &format!("{} Vortex-to-ParquetZstd Ratio/{}", group_name, bench_name),
name: &format!("vortex:parquet-zstd size/{}", bench_name),
value: (vortex_nbytes as f64) / (parquet_zstd_nbytes as f64),
unit: "ratio",
range: 0.0,
Expand All @@ -141,21 +137,7 @@ fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(
println!(
"{}",
serde_json::to_string(&GenericBenchmarkResults {
name: &format!(
"{} Vortex-to-ParquetUncompressed Ratio/{}",
group_name, bench_name
),
value: (vortex_nbytes as f64) / (parquet_uncompressed_nbytes as f64),
unit: "ratio",
range: 0.0,
})
.unwrap()
);

println!(
"{}",
serde_json::to_string(&GenericBenchmarkResults {
name: &format!("{} Compression Ratio/{}", group_name, bench_name),
name: &format!("vortex:raw size/{}", bench_name),
value: (compressed_size as f64) / (uncompressed_size as f64),
unit: "ratio",
range: 0.0,
Expand All @@ -166,7 +148,7 @@ fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(
println!(
"{}",
serde_json::to_string(&GenericBenchmarkResults {
name: &format!("{} Compression Size/{}", group_name, bench_name),
name: &format!("vortex size/{}", bench_name),
value: compressed_size as f64,
unit: "bytes",
range: 0.0,
Expand All @@ -178,22 +160,19 @@ fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(

fn yellow_taxi_trip_data(c: &mut Criterion) {
taxi_data_parquet();
let group_name = "Yellow Taxi Trip Data";
let mut group = c.benchmark_group(format!("{} Compression Time", group_name));
let mut group = c.benchmark_group("compress time");
group.sample_size(10);
benchmark_compress(
&SamplingCompressor::default(),
fetch_taxi_data,
group_name,
&mut group,
"taxi",
);
group.finish()
}

fn public_bi_benchmark(c: &mut Criterion) {
let group_name = "Public BI";
let mut group = c.benchmark_group(format!("{} Compression Time", group_name));
let mut group = c.benchmark_group("compress time");
group.sample_size(10);
// group.measurement_time(Duration::new(10, 0));

Expand All @@ -216,7 +195,6 @@ fn public_bi_benchmark(c: &mut Criterion) {
benchmark_compress(
&SamplingCompressor::default(),
|| dataset.to_vortex_array().unwrap(),
group_name,
&mut group,
dataset_handle.dataset_name(),
);
Expand All @@ -239,8 +217,7 @@ fn tpc_h_l_comment(c: &mut Criterion) {
let compressor = SamplingCompressor::default().excluding(&FSSTCompressor);
let compressor_fsst = SamplingCompressor::default();

let group_name = "TPC-H l_comment";
let mut group = c.benchmark_group(format!("{} Compression Time", group_name));
let mut group = c.benchmark_group("compress time");
group.sample_size(10);
group.measurement_time(Duration::new(15, 0));

Expand All @@ -263,17 +240,15 @@ fn tpc_h_l_comment(c: &mut Criterion) {
benchmark_compress(
&compressor,
|| &comments,
group_name,
&mut group,
"chunked-without-fsst",
"TPC-H l_comment chunked without fsst",
);

benchmark_compress(
&compressor_fsst,
|| &comments,
group_name,
&mut group,
"chunked-with-fsst",
"TPC-H l_comment chunked",
);

let comments_canonical = comments
Expand All @@ -289,9 +264,8 @@ fn tpc_h_l_comment(c: &mut Criterion) {
benchmark_compress(
&compressor_fsst,
|| &comments_canonical_chunked,
group_name,
&mut group,
"canonical-with-fsst",
"TPC-H l_comment canonical",
);

group.finish();
Expand Down

0 comments on commit 284cf99

Please sign in to comment.