feat: track compressed size & compare to parquet(zstd)? & canonical #1172
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: PR Benchmarks | |
on: | |
pull_request: | |
types: [ labeled, synchronize ] | |
branches: [ "develop" ] | |
workflow_dispatch: { } | |
permissions: | |
actions: write | |
contents: read | |
pull-requests: write | |
jobs: | |
bench: | |
strategy: | |
matrix: | |
benchmark: | |
- id: tpch_benchmark | |
# this was the original name which we must preserve until we change the name of all the | |
# records in the gh-pages-bench branch "Vortex benchmarks" | |
name: Vortex benchmarks | |
- id: compress_benchmark | |
name: Vortex Compression | |
- id: bytes_at | |
name: Vortex bytes_at | |
- id: datafusion_benchmark | |
name: Vortex DataFusion | |
- id: random_access | |
name: Vortex random_access | |
runs-on: ubuntu-latest-large | |
if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }} | |
steps: | |
# We remove the benchmark label first so that the workflow can be re-triggered. | |
- uses: actions-ecosystem/action-remove-labels@v1 | |
with: | |
labels: benchmark | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/cleanup | |
- uses: ./.github/actions/setup-rust | |
- uses: ./.github/actions/setup-python | |
# The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet | |
- name: Install DuckDB | |
uses: opt-nc/[email protected] | |
with: | |
version: v1.0.0 | |
- name: Run benchmark | |
shell: bash | |
run: | | |
cargo install cargo-criterion | |
cargo criterion --bench ${{ matrix.benchmark.id }} --message-format=json 2>&1 | tee out.json | |
sudo apt-get update && sudo apt-get install -y jq | |
grep -e '^{' out.json | jq -c ' [ | |
(if .mean != null then {name: .id, value: .mean.estimate, unit: .unit, range: ((.mean.upper_bound - .mean.lower_bound) / 2) } else {} end), | |
(if .throughput != null then {name: (.id + " throughput"), value: .throughput[].per_iteration, unit: .throughput[].unit, range: 0} else {} end), | |
{name, value, unit, range} | |
] | .[] | select(.value != null) | |
' \ | |
out.json >${{ matrix.benchmark.id }}.json | |
cat ${{ matrix.benchmark.id }}.json | |
- name: Store benchmark result | |
if: '!cancelled()' | |
uses: benchmark-action/github-action-benchmark@v1 | |
with: | |
name: ${{ matrix.benchmark.name }} | |
tool: 'customSmallerIsBetter' | |
gh-pages-branch: gh-pages-bench | |
github-token: ${{ secrets.GITHUB_TOKEN }} | |
output-file-path: ${{ matrix.benchmark.id }}.json | |
summary-always: true | |
comment-always: true | |
auto-push: false | |
save-data-file: false | |
fail-on-alert: false | |
env: | |
# AWS Credentials for R2 storage tests | |
AWS_BUCKET: vortex-test | |
AWS_ENDPOINT: ${{ secrets.AWS_ENDPOINT }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} |