Skip to content

feat: store benchmarks in s3 #2623

feat: store benchmarks in s3

feat: store benchmarks in s3 #2623

Workflow file for this run

name: PR Benchmarks
on:
pull_request:
types: [ labeled, synchronize ]
branches: [ "develop" ]
workflow_dispatch: { }
permissions:
actions: write
contents: read
pull-requests: write
id-token: write
jobs:
label_trigger:
runs-on: ubuntu-latest
if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
steps:
# We remove the benchmark label first so that the workflow can be re-triggered.
- uses: actions-ecosystem/action-remove-labels@v1
with:
labels: benchmark
bench:
needs: label_trigger
strategy:
matrix:
benchmark:
- id: datafusion
name: DataFusion
- id: random_access
name: Random Access
- id: compress
name: Vortex Compression
runs-on: self-hosted
if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
- uses: spiraldb/actions/.github/actions/[email protected]
# The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
- name: Install DuckDB
uses: opt-nc/[email protected]
if: runner.environment != 'self-hosted'
with:
version: v1.0.0
- name: Set tempdir
if: runner.environment == 'self-hosted'
run: |
echo "TMPDIR=/work" >> $GITHUB_ENV
- name: Run benchmark
shell: bash
env:
BENCH_VORTEX_RATIOS: '.*'
RUSTFLAGS: '-C target-cpu=native'
run: |
cargo install cargo-criterion
cargo criterion --bench ${{ matrix.benchmark.id }} --message-format=json 2>&1 | tee out.json
cat out.json
sudo apt-get update && sudo apt-get install -y jq
jq --raw-input --compact-output '
fromjson?
| [ (if .mean != null then {name: .id, value: .mean.estimate, unit: .unit, range: ((.mean.upper_bound - .mean.lower_bound) / 2) } else {} end),
(if .throughput != null then {name: (.id + " throughput"), value: .throughput[].per_iteration, unit: .throughput[].unit, range: 0} else {} end),
{name, value, unit, range} ]
| .[]
| select(.value != null)
' \
out.json \
| jq --slurp --compact-output '.' >${{ matrix.benchmark.id }}.json
cat ${{ matrix.benchmark.id }}.json
- name: Store benchmark result
if: '!cancelled()'
uses: benchmark-action/github-action-benchmark@v1
with:
name: ${{ matrix.benchmark.name }}
tool: 'customSmallerIsBetter'
gh-pages-branch: gh-pages-bench
github-token: ${{ secrets.GITHUB_TOKEN }}
output-file-path: ${{ matrix.benchmark.id }}.json
summary-always: true
comment-always: true
auto-push: false
save-data-file: false
fail-on-alert: false
env:
# AWS Credentials for R2 storage tests
AWS_BUCKET: vortex-test
AWS_ENDPOINT: ${{ secrets.AWS_ENDPOINT }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
tpch:
needs: label_trigger
runs-on: self-hosted
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
- uses: spiraldb/actions/.github/actions/[email protected]
# The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
- name: Install DuckDB
uses: opt-nc/[email protected]
if: runner.environment != 'self-hosted'
with:
version: v1.0.0
- name: Set tempdir
if: runner.environment == 'self-hosted'
run: |
echo "TMPDIR=/work" >> $GITHUB_ENV
- name: Run TPC-H benchmark
shell: bash
env:
BENCH_VORTEX_RATIOS: '.*'
RUSTFLAGS: '-C target-cpu=native'
run: |
cargo run --bin tpch_benchmark --release -- --only-vortex -d gh-json -t 1 | tee tpch.json
- name: Store benchmark result
if: '!cancelled()'
uses: benchmark-action/github-action-benchmark@v1
with:
name: 'TPC-H'
tool: 'customSmallerIsBetter'
gh-pages-branch: gh-pages-bench
github-token: ${{ secrets.GITHUB_TOKEN }}
output-file-path: tpch.json
summary-always: true
comment-always: true
auto-push: false
save-data-file: false
fail-on-alert: false
env:
# AWS Credentials for R2 storage tests
AWS_BUCKET: vortex-test
AWS_ENDPOINT: ${{ secrets.AWS_ENDPOINT }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
clickbench:
needs: label_trigger
runs-on: self-hosted
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
- uses: spiraldb/actions/.github/actions/[email protected]
# The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
- name: Install DuckDB
uses: opt-nc/[email protected]
if: runner.environment != 'self-hosted'
with:
version: v1.0.0
- name: Set tempdir
if: runner.environment == 'self-hosted'
run: |
echo "TMPDIR=/work" >> $GITHUB_ENV
- name: Run ClickBench benchmark
shell: bash
env:
BENCH_VORTEX_RATIOS: '.*'
RUSTFLAGS: '-C target-cpu=native'
HOME: /home/ci-runner
run: |
cargo run --bin clickbench --release -- --only-vortex -d gh-json | tee clickbench.json
- name: Store benchmark result
if: '!cancelled()'
uses: benchmark-action/github-action-benchmark@v1
with:
name: 'Clickbench'
tool: 'customSmallerIsBetter'
gh-pages-branch: gh-pages-bench
github-token: ${{ secrets.GITHUB_TOKEN }}
output-file-path: clickbench.json
summary-always: true
comment-always: true
auto-push: false
save-data-file: false
fail-on-alert: false
env:
# AWS Credentials for R2 storage tests
AWS_BUCKET: vortex-test
AWS_ENDPOINT: ${{ secrets.AWS_ENDPOINT }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}