feat: concise benchmark names

Currently the Taxi and l_comment datasets are their own Criterion "group" which causes them to be plotted separately. With this PR, there is one plot for "compress time" which is measured in seconds and has every dataset. I also eliminated the comparison to uncompressed Parquet because (except for a tiny 1024 byte dataset) we always beat uncompressed Parquet. Parquet with ZSTD is a sufficient comparison. Finally, I removed some characters from several benchmark names in hopes that the Markdown tables on GitHub are more legible.
spiraldb · Oct 9, 2024 · 284cf99 · 284cf99
1 parent 4aa30c0
commit 284cf99
Showing 1 changed file with 10 additions and 36 deletions.
diff --git a/bench-vortex/benches/compress_noci.rs b/bench-vortex/benches/compress_noci.rs
@@ -87,7 +87,6 @@ fn vortex_written_size(array: &Array) -> u64 {
 fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(
     compressor: &SamplingCompressor<'_>,
     make_uncompressed: F,
-    group_name: &str,
     group: &mut BenchmarkGroup<'_, T>,
     bench_name: &str,
 ) where
@@ -100,7 +99,7 @@ fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(
     let mut compressed_size = 0;
 
     group.throughput(Throughput::Bytes(uncompressed_size as u64));
-    group.bench_function(format!("{} compression", bench_name), |b| {
+    group.bench_function(bench_name, |b| {
         b.iter_with_large_drop(|| {
             let compressed = black_box(compressor.compress(uncompressed.as_ref(), None)).unwrap();
             compressed_size = compressed.nbytes();
@@ -124,13 +123,10 @@ fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(
             Compression::ZSTD(ZstdLevel::default()),
         );
 
-        let parquet_uncompressed_nbytes =
-            parquet_written_size(uncompressed.as_ref(), Compression::UNCOMPRESSED);
-
         println!(
             "{}",
             serde_json::to_string(&GenericBenchmarkResults {
-                name: &format!("{} Vortex-to-ParquetZstd Ratio/{}", group_name, bench_name),
+                name: &format!("vortex:parquet-zstd size/{}", bench_name),
                 value: (vortex_nbytes as f64) / (parquet_zstd_nbytes as f64),
                 unit: "ratio",
                 range: 0.0,
@@ -141,21 +137,7 @@ fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(
         println!(
             "{}",
             serde_json::to_string(&GenericBenchmarkResults {
-                name: &format!(
-                    "{} Vortex-to-ParquetUncompressed Ratio/{}",
-                    group_name, bench_name
-                ),
-                value: (vortex_nbytes as f64) / (parquet_uncompressed_nbytes as f64),
-                unit: "ratio",
-                range: 0.0,
-            })
-            .unwrap()
-        );
-
-        println!(
-            "{}",
-            serde_json::to_string(&GenericBenchmarkResults {
-                name: &format!("{} Compression Ratio/{}", group_name, bench_name),
+                name: &format!("vortex:raw size/{}", bench_name),
                 value: (compressed_size as f64) / (uncompressed_size as f64),
                 unit: "ratio",
                 range: 0.0,
@@ -166,7 +148,7 @@ fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(
         println!(
             "{}",
             serde_json::to_string(&GenericBenchmarkResults {
-                name: &format!("{} Compression Size/{}", group_name, bench_name),
+                name: &format!("vortex size/{}", bench_name),
                 value: compressed_size as f64,
                 unit: "bytes",
                 range: 0.0,
@@ -178,22 +160,19 @@ fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(
 
 fn yellow_taxi_trip_data(c: &mut Criterion) {
     taxi_data_parquet();
-    let group_name = "Yellow Taxi Trip Data";
-    let mut group = c.benchmark_group(format!("{} Compression Time", group_name));
+    let mut group = c.benchmark_group("compress time");
     group.sample_size(10);
     benchmark_compress(
         &SamplingCompressor::default(),
         fetch_taxi_data,
-        group_name,
         &mut group,
         "taxi",
     );
     group.finish()
 }
 
 fn public_bi_benchmark(c: &mut Criterion) {
-    let group_name = "Public BI";
-    let mut group = c.benchmark_group(format!("{} Compression Time", group_name));
+    let mut group = c.benchmark_group("compress time");
     group.sample_size(10);
     // group.measurement_time(Duration::new(10, 0));
 
@@ -216,7 +195,6 @@ fn public_bi_benchmark(c: &mut Criterion) {
         benchmark_compress(
             &SamplingCompressor::default(),
             || dataset.to_vortex_array().unwrap(),
-            group_name,
             &mut group,
             dataset_handle.dataset_name(),
         );
@@ -239,8 +217,7 @@ fn tpc_h_l_comment(c: &mut Criterion) {
     let compressor = SamplingCompressor::default().excluding(&FSSTCompressor);
     let compressor_fsst = SamplingCompressor::default();
 
-    let group_name = "TPC-H l_comment";
-    let mut group = c.benchmark_group(format!("{} Compression Time", group_name));
+    let mut group = c.benchmark_group("compress time");
     group.sample_size(10);
     group.measurement_time(Duration::new(15, 0));
 
@@ -263,17 +240,15 @@ fn tpc_h_l_comment(c: &mut Criterion) {
     benchmark_compress(
         &compressor,
         || &comments,
-        group_name,
         &mut group,
-        "chunked-without-fsst",
+        "TPC-H l_comment chunked without fsst",
     );
 
     benchmark_compress(
         &compressor_fsst,
         || &comments,
-        group_name,
         &mut group,
-        "chunked-with-fsst",
+        "TPC-H l_comment chunked",
     );
 
     let comments_canonical = comments
@@ -289,9 +264,8 @@ fn tpc_h_l_comment(c: &mut Criterion) {
     benchmark_compress(
         &compressor_fsst,
         || &comments_canonical_chunked,
-        group_name,
         &mut group,
-        "canonical-with-fsst",
+        "TPC-H l_comment canonical",
     );
 
     group.finish();