Skip to content

Commit

Permalink
Run benchmarks as tests in CI (#797)
Browse files Browse the repository at this point in the history
Out benchmarking suite is actually a pretty valuable end-to-end test,
especially when working on compute related work. This PR adds it as part
of CI so we don't accidentally break it (as we've done many times in
recent weeks).

---------

Co-authored-by: Robert Kruszewski <[email protected]>
  • Loading branch information
AdamGS and robert3005 authored Sep 12, 2024
1 parent a79eba3 commit 83dffc5
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 35 deletions.
19 changes: 18 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ permissions:

env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1

jobs:
build:
Expand Down Expand Up @@ -54,11 +55,27 @@ jobs:
name: 'miri'
runs-on: ubuntu-latest
env:
RUST_BACKTRACE: 1
MIRIFLAGS: -Zmiri-strict-provenance -Zmiri-symbolic-alignment-check -Zmiri-backtrace=full
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
- name: Run tests with Miri
run: cargo miri test

bench-test:
name: 'bench test'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup

- uses: ./.github/actions/setup-rust
- uses: ./.github/actions/setup-python
# Required to run benchmarks
- name: Install DuckDB
uses: opt-nc/[email protected]
with:
version: v1.0.0
- name: Rust Bench as test
run: cargo bench --benches -- --test
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ fastlanes = "0.1.5"
flatbuffers = "24.3.25"
flexbuffers = "2.0.0"
fs_extra = "1.3.0"
fsst-rs = "0.4.0"
fsst-rs = "0.4.1"
futures = { version = "0.3.30", default-features = false }
futures-executor = "0.3.30"
futures-util = "0.3.30"
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/benches/compress_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ fn vortex_compress_taxi(c: &mut Criterion) {

fn vortex_compress_medicare1(c: &mut Criterion) {
let dataset = BenchmarkDatasets::PBI(Medicare1);
dataset.as_uncompressed();
dataset.write_as_parquet();
let mut group = c.benchmark_group("end to end - medicare");
group.sample_size(10);
group.bench_function("compress", |b| {
Expand Down
62 changes: 34 additions & 28 deletions bench-vortex/benches/random_access.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::env;
use std::sync::Arc;

use bench_vortex::reader::{
Expand Down Expand Up @@ -44,21 +45,24 @@ fn random_access_vortex(c: &mut Criterion) {
})
});

group.sample_size(10).bench_function("R2", |b| {
let r2_fs = Arc::new(AmazonS3Builder::from_env().build().unwrap()) as Arc<dyn ObjectStore>;
let r2_path = object_store::path::Path::from_url_path(
taxi_vortex.file_name().unwrap().to_str().unwrap(),
)
.unwrap();

b.to_async(Runtime::new().unwrap()).iter(|| async {
black_box(
take_vortex_object_store(&r2_fs, &r2_path, &INDICES)
.await
.unwrap(),
if env::var("AWS_ACCESS_KEY_ID").is_ok() {
group.sample_size(10).bench_function("R2", |b| {
let r2_fs =
Arc::new(AmazonS3Builder::from_env().build().unwrap()) as Arc<dyn ObjectStore>;
let r2_path = object_store::path::Path::from_url_path(
taxi_vortex.file_name().unwrap().to_str().unwrap(),
)
})
});
.unwrap();

b.to_async(Runtime::new().unwrap()).iter(|| async {
black_box(
take_vortex_object_store(&r2_fs, &r2_path, &INDICES)
.await
.unwrap(),
)
})
});
}
}

fn random_access_parquet(c: &mut Criterion) {
Expand All @@ -71,21 +75,23 @@ fn random_access_parquet(c: &mut Criterion) {
.iter(|| async { black_box(take_parquet(&taxi_parquet, &INDICES).await.unwrap()) })
});

group.bench_function("R2", |b| {
let r2_fs = Arc::new(AmazonS3Builder::from_env().build().unwrap());
let r2_parquet_path = object_store::path::Path::from_url_path(
taxi_parquet.file_name().unwrap().to_str().unwrap(),
)
.unwrap();

b.to_async(Runtime::new().unwrap()).iter(|| async {
black_box(
take_parquet_object_store(r2_fs.clone(), &r2_parquet_path, &INDICES)
.await
.unwrap(),
if env::var("AWS_ACCESS_KEY_ID").is_ok() {
group.bench_function("R2", |b| {
let r2_fs = Arc::new(AmazonS3Builder::from_env().build().unwrap());
let r2_parquet_path = object_store::path::Path::from_url_path(
taxi_parquet.file_name().unwrap().to_str().unwrap(),
)
})
});
.unwrap();

b.to_async(Runtime::new().unwrap()).iter(|| async {
black_box(
take_parquet_object_store(r2_fs.clone(), &r2_parquet_path, &INDICES)
.await
.unwrap(),
)
})
});
}
}

criterion_group!(benches, random_access_vortex, random_access_parquet);
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/src/public_bi_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ impl BenchmarkDataset for BenchmarkDatasets {
&path_for_file_type(self, output_fname, "parquet"),
|output_path| write_csv_as_parquet(f, output_path),
)
.expect("Failed to compress to parquet");
.unwrap();
let pq_size = compressed.metadata().unwrap().size();
info!(
"Parquet size: {}, {}B",
Expand Down
3 changes: 2 additions & 1 deletion bench-vortex/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ pub fn write_csv_as_parquet(csv_path: PathBuf, output_path: &Path) -> VortexResu
csv_path.as_path().to_str().unwrap(),
output_path.to_str().unwrap()
))
.status()?
.status()
.unwrap()
.exit_ok()
.unwrap();
Ok(())
Expand Down

0 comments on commit 83dffc5

Please sign in to comment.