Skip to content

Commit

Permalink
feat: add BENCH_VORTEX_RATIOS variable to filter ratio benchmarks (#970)
Browse files Browse the repository at this point in the history
Ratio benchmarks are not supported by criterion. Instead, back in #882,
I added some code to generate ratios and print them in the format
expected by our GitHub Action. Unfortunately, this code currently runs
unconditionally which is annoying when you are filtering benchmarks.

Now you can do this:

```
BENCH_VORTEX_RATIOS=AirlineSentiment cargo bench --bench compress_noci -- AirlineSentiment
```

And you'll receive both ratios and compression time benchmarks for
AirlineSentiment and no output for other datasets.

But when you do this:

```
cargo bench --bench compress_noci -- AirlineSentiment
```

You only get compression time benchmarks for AirlineSentiment.
  • Loading branch information
danking authored Oct 3, 2024
1 parent fbb09ab commit 74d7aa9
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 65 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/bench-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
run: |
cargo install cargo-criterion
cargo criterion --bench ${{ matrix.benchmark.id }} --message-format=json 2>&1 | tee out.json
BENCH_VORTEX_RATIOS='.*' cargo criterion --bench ${{ matrix.benchmark.id }} --message-format=json 2>&1 | tee out.json
cat out.json
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
run: |
cargo install cargo-criterion
cargo criterion --bench ${{ matrix.benchmark.id }} --message-format=json 2>&1 | tee out.json
BENCH_VORTEX_RATIOS='.*' cargo criterion --bench ${{ matrix.benchmark.id }} --message-format=json 2>&1 | tee out.json
cat out.json
Expand Down
13 changes: 7 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ pyo3 = { version = "0.22.2", features = ["extension-module", "abi3-py311"] }
pyo3-log = "0.11.0"
rand = "0.8.5"
rayon = "1.10.0"
regex = "1.11.0"
reqwest = { version = "0.12.0", features = ["blocking"] }
rstest = "0.23"
seq-macro = "0.3.5"
Expand Down
1 change: 1 addition & 0 deletions bench-vortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ parquet = { workspace = true, features = [] }
prettytable-rs = { workspace = true }
rand = { workspace = true }
rayon = { workspace = true }
regex = { workspace = true }
reqwest = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
Expand Down
121 changes: 64 additions & 57 deletions bench-vortex/benches/compress_noci.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::fs;
use std::io::Cursor;
use std::path::Path;
use std::time::Duration;
use std::{env, fs};

use arrow_array::RecordBatch;
use bench_vortex::data_downloads::BenchmarkDataset;
Expand All @@ -16,6 +16,7 @@ use criterion::{
use parquet::arrow::ArrowWriter;
use parquet::basic::{Compression, ZstdLevel};
use parquet::file::properties::WriterProperties;
use regex::Regex;
use vortex::array::{ChunkedArray, StructArray};
use vortex::{Array, ArrayDType, IntoArray, IntoCanonical};
use vortex_dtype::field::Field;
Expand Down Expand Up @@ -106,67 +107,73 @@ fn benchmark_compress<T: criterion::measurement::Measurement, F, U>(
});
});

let vortex_nbytes = vortex_written_size(
&compressor
.compress(uncompressed.as_ref(), None)
.unwrap()
.into_array(),
);

let parquet_zstd_nbytes = parquet_written_size(
uncompressed.as_ref(),
Compression::ZSTD(ZstdLevel::default()),
);
if env::var("BENCH_VORTEX_RATIOS")
.ok()
.map(|x| Regex::new(&x).unwrap().is_match(bench_name))
.unwrap_or(false)
{
let vortex_nbytes = vortex_written_size(
&compressor
.compress(uncompressed.as_ref(), None)
.unwrap()
.into_array(),
);

let parquet_uncompressed_nbytes =
parquet_written_size(uncompressed.as_ref(), Compression::UNCOMPRESSED);
let parquet_zstd_nbytes = parquet_written_size(
uncompressed.as_ref(),
Compression::ZSTD(ZstdLevel::default()),
);

println!(
"{}",
serde_json::to_string(&GenericBenchmarkResults {
name: &format!("{} Vortex-to-ParquetZstd Ratio/{}", group_name, bench_name),
value: (vortex_nbytes as f64) / (parquet_zstd_nbytes as f64),
unit: "ratio",
range: 0.0,
})
.unwrap()
);
let parquet_uncompressed_nbytes =
parquet_written_size(uncompressed.as_ref(), Compression::UNCOMPRESSED);

println!(
"{}",
serde_json::to_string(&GenericBenchmarkResults {
name: &format!("{} Vortex-to-ParquetZstd Ratio/{}", group_name, bench_name),
value: (vortex_nbytes as f64) / (parquet_zstd_nbytes as f64),
unit: "ratio",
range: 0.0,
})
.unwrap()
);

println!(
"{}",
serde_json::to_string(&GenericBenchmarkResults {
name: &format!(
"{} Vortex-to-ParquetUncompressed Ratio/{}",
group_name, bench_name
),
value: (vortex_nbytes as f64) / (parquet_uncompressed_nbytes as f64),
unit: "ratio",
range: 0.0,
})
.unwrap()
);
println!(
"{}",
serde_json::to_string(&GenericBenchmarkResults {
name: &format!(
"{} Vortex-to-ParquetUncompressed Ratio/{}",
group_name, bench_name
),
value: (vortex_nbytes as f64) / (parquet_uncompressed_nbytes as f64),
unit: "ratio",
range: 0.0,
})
.unwrap()
);

println!(
"{}",
serde_json::to_string(&GenericBenchmarkResults {
name: &format!("{} Compression Ratio/{}", group_name, bench_name),
value: (compressed_size as f64) / (uncompressed_size as f64),
unit: "ratio",
range: 0.0,
})
.unwrap()
);
println!(
"{}",
serde_json::to_string(&GenericBenchmarkResults {
name: &format!("{} Compression Ratio/{}", group_name, bench_name),
value: (compressed_size as f64) / (uncompressed_size as f64),
unit: "ratio",
range: 0.0,
})
.unwrap()
);

println!(
"{}",
serde_json::to_string(&GenericBenchmarkResults {
name: &format!("{} Compression Size/{}", group_name, bench_name),
value: compressed_size as f64,
unit: "bytes",
range: 0.0,
})
.unwrap()
);
println!(
"{}",
serde_json::to_string(&GenericBenchmarkResults {
name: &format!("{} Compression Size/{}", group_name, bench_name),
value: compressed_size as f64,
unit: "bytes",
range: 0.0,
})
.unwrap()
);
}
}

fn yellow_taxi_trip_data(c: &mut Criterion) {
Expand Down

0 comments on commit 74d7aa9

Please sign in to comment.