Run benchmarks as tests in CI (#797)

Out benchmarking suite is actually a pretty valuable end-to-end test, especially when working on compute related work. This PR adds it as part of CI so we don't accidentally break it (as we've done many times in recent weeks). --------- Co-authored-by: Robert Kruszewski <[email protected]>
spiraldb · Sep 12, 2024 · 83dffc5 · 83dffc5
1 parent a79eba3
commit 83dffc5
Show file tree

Hide file tree

Showing 7 changed files with 59 additions and 35 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,6 +12,7 @@ permissions:
 
 env:
   CARGO_TERM_COLOR: always
+  RUST_BACKTRACE: 1
 
 jobs:
   build:
@@ -54,11 +55,27 @@ jobs:
     name: 'miri'
     runs-on: ubuntu-latest
     env:
-      RUST_BACKTRACE: 1
       MIRIFLAGS: -Zmiri-strict-provenance -Zmiri-symbolic-alignment-check -Zmiri-backtrace=full
     steps:
       - uses: actions/checkout@v4
       - uses: ./.github/actions/cleanup
       - uses: ./.github/actions/setup-rust
       - name: Run tests with Miri
         run: cargo miri test
+
+  bench-test:
+    name: 'bench test'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/cleanup
+
+      - uses: ./.github/actions/setup-rust
+      - uses: ./.github/actions/setup-python
+      # Required to run benchmarks
+      - name: Install DuckDB
+        uses: opt-nc/[email protected]
+        with:
+          version: v1.0.0
+      - name: Rust Bench as test
+        run: cargo bench --benches -- --test
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -81,7 +81,7 @@ fastlanes = "0.1.5"
 flatbuffers = "24.3.25"
 flexbuffers = "2.0.0"
 fs_extra = "1.3.0"
-fsst-rs = "0.4.0"
+fsst-rs = "0.4.1"
 futures = { version = "0.3.30", default-features = false }
 futures-executor = "0.3.30"
 futures-util = "0.3.30"

diff --git a/bench-vortex/benches/compress_benchmark.rs b/bench-vortex/benches/compress_benchmark.rs
@@ -19,7 +19,7 @@ fn vortex_compress_taxi(c: &mut Criterion) {
 
 fn vortex_compress_medicare1(c: &mut Criterion) {
     let dataset = BenchmarkDatasets::PBI(Medicare1);
-    dataset.as_uncompressed();
+    dataset.write_as_parquet();
     let mut group = c.benchmark_group("end to end - medicare");
     group.sample_size(10);
     group.bench_function("compress", |b| {

diff --git a/bench-vortex/benches/random_access.rs b/bench-vortex/benches/random_access.rs
@@ -1,3 +1,4 @@
+use std::env;
 use std::sync::Arc;
 
 use bench_vortex::reader::{
@@ -44,21 +45,24 @@ fn random_access_vortex(c: &mut Criterion) {
         })
     });
 
-    group.sample_size(10).bench_function("R2", |b| {
-        let r2_fs = Arc::new(AmazonS3Builder::from_env().build().unwrap()) as Arc<dyn ObjectStore>;
-        let r2_path = object_store::path::Path::from_url_path(
-            taxi_vortex.file_name().unwrap().to_str().unwrap(),
-        )
-        .unwrap();
-
-        b.to_async(Runtime::new().unwrap()).iter(|| async {
-            black_box(
-                take_vortex_object_store(&r2_fs, &r2_path, &INDICES)
-                    .await
-                    .unwrap(),
+    if env::var("AWS_ACCESS_KEY_ID").is_ok() {
+        group.sample_size(10).bench_function("R2", |b| {
+            let r2_fs =
+                Arc::new(AmazonS3Builder::from_env().build().unwrap()) as Arc<dyn ObjectStore>;
+            let r2_path = object_store::path::Path::from_url_path(
+                taxi_vortex.file_name().unwrap().to_str().unwrap(),
             )
-        })
-    });
+            .unwrap();
+
+            b.to_async(Runtime::new().unwrap()).iter(|| async {
+                black_box(
+                    take_vortex_object_store(&r2_fs, &r2_path, &INDICES)
+                        .await
+                        .unwrap(),
+                )
+            })
+        });
+    }
 }
 
 fn random_access_parquet(c: &mut Criterion) {
@@ -71,21 +75,23 @@ fn random_access_parquet(c: &mut Criterion) {
             .iter(|| async { black_box(take_parquet(&taxi_parquet, &INDICES).await.unwrap()) })
     });
 
-    group.bench_function("R2", |b| {
-        let r2_fs = Arc::new(AmazonS3Builder::from_env().build().unwrap());
-        let r2_parquet_path = object_store::path::Path::from_url_path(
-            taxi_parquet.file_name().unwrap().to_str().unwrap(),
-        )
-        .unwrap();
-
-        b.to_async(Runtime::new().unwrap()).iter(|| async {
-            black_box(
-                take_parquet_object_store(r2_fs.clone(), &r2_parquet_path, &INDICES)
-                    .await
-                    .unwrap(),
+    if env::var("AWS_ACCESS_KEY_ID").is_ok() {
+        group.bench_function("R2", |b| {
+            let r2_fs = Arc::new(AmazonS3Builder::from_env().build().unwrap());
+            let r2_parquet_path = object_store::path::Path::from_url_path(
+                taxi_parquet.file_name().unwrap().to_str().unwrap(),
             )
-        })
-    });
+            .unwrap();
+
+            b.to_async(Runtime::new().unwrap()).iter(|| async {
+                black_box(
+                    take_parquet_object_store(r2_fs.clone(), &r2_parquet_path, &INDICES)
+                        .await
+                        .unwrap(),
+                )
+            })
+        });
+    }
 }
 
 criterion_group!(benches, random_access_vortex, random_access_parquet);

diff --git a/bench-vortex/src/public_bi_data.rs b/bench-vortex/src/public_bi_data.rs
@@ -456,7 +456,7 @@ impl BenchmarkDataset for BenchmarkDatasets {
                 &path_for_file_type(self, output_fname, "parquet"),
                 |output_path| write_csv_as_parquet(f, output_path),
             )
-            .expect("Failed to compress to parquet");
+            .unwrap();
             let pq_size = compressed.metadata().unwrap().size();
             info!(
                 "Parquet size: {}, {}B",

diff --git a/bench-vortex/src/reader.rs b/bench-vortex/src/reader.rs
@@ -121,7 +121,8 @@ pub fn write_csv_as_parquet(csv_path: PathBuf, output_path: &Path) -> VortexResu
             csv_path.as_path().to_str().unwrap(),
             output_path.to_str().unwrap()
         ))
-        .status()?
+        .status()
+        .unwrap()
         .exit_ok()
         .unwrap();
     Ok(())