Skip to content

feat: track compressed size & compare to parquet(zstd)? & canonical #1172

feat: track compressed size & compare to parquet(zstd)? & canonical

feat: track compressed size & compare to parquet(zstd)? & canonical #1172

Workflow file for this run

name: PR Benchmarks
on:
pull_request:
types: [ labeled, synchronize ]
branches: [ "develop" ]
workflow_dispatch: { }
permissions:
actions: write
contents: read
pull-requests: write
jobs:
bench:
strategy:
matrix:
benchmark:
- id: tpch_benchmark
# this was the original name which we must preserve until we change the name of all the
# records in the gh-pages-bench branch "Vortex benchmarks"
name: Vortex benchmarks
- id: compress_benchmark
name: Vortex Compression
- id: bytes_at
name: Vortex bytes_at
- id: datafusion_benchmark
name: Vortex DataFusion
- id: random_access
name: Vortex random_access
runs-on: ubuntu-latest-large
if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
steps:
# We remove the benchmark label first so that the workflow can be re-triggered.
- uses: actions-ecosystem/action-remove-labels@v1
with:
labels: benchmark
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
- uses: ./.github/actions/setup-python
# The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
- name: Install DuckDB
uses: opt-nc/[email protected]
with:
version: v1.0.0
- name: Run benchmark
shell: bash
run: |
cargo install cargo-criterion
cargo criterion --bench ${{ matrix.benchmark.id }} --message-format=json 2>&1 | tee out.json
sudo apt-get update && sudo apt-get install -y jq
grep -e '^{' out.json | jq -c ' [
(if .mean != null then {name: .id, value: .mean.estimate, unit: .unit, range: ((.mean.upper_bound - .mean.lower_bound) / 2) } else {} end),
(if .throughput != null then {name: (.id + " throughput"), value: .throughput[].per_iteration, unit: .throughput[].unit, range: 0} else {} end),
{name, value, unit, range}
] | .[] | select(.value != null)
' \
out.json >${{ matrix.benchmark.id }}.json
cat ${{ matrix.benchmark.id }}.json
- name: Store benchmark result
if: '!cancelled()'
uses: benchmark-action/github-action-benchmark@v1
with:
name: ${{ matrix.benchmark.name }}
tool: 'customSmallerIsBetter'
gh-pages-branch: gh-pages-bench
github-token: ${{ secrets.GITHUB_TOKEN }}
output-file-path: ${{ matrix.benchmark.id }}.json
summary-always: true
comment-always: true
auto-push: false
save-data-file: false
fail-on-alert: false
env:
# AWS Credentials for R2 storage tests
AWS_BUCKET: vortex-test
AWS_ENDPOINT: ${{ secrets.AWS_ENDPOINT }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}