Skip to content

feat: store benchmarks in s3 #2669

feat: store benchmarks in s3

feat: store benchmarks in s3 #2669

Workflow file for this run

name: PR Benchmarks
on:
pull_request:
types: [ labeled, synchronize ]
branches: [ "develop" ]
workflow_dispatch: { }
permissions:
actions: write
contents: read
pull-requests: write
id-token: write
jobs:
label_trigger:
runs-on: ubuntu-latest
if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
steps:
# We remove the benchmark label first so that the workflow can be re-triggered.
- uses: actions-ecosystem/action-remove-labels@v1
with:
labels: benchmark
bench:
needs: label_trigger
strategy:
matrix:
benchmark:
- id: datafusion
name: DataFusion
- id: random_access
name: Random Access
- id: compress
name: Vortex Compression
runs-on: self-hosted
if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
- uses: spiraldb/actions/.github/actions/[email protected]
# The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
- name: Install DuckDB
uses: opt-nc/[email protected]
if: runner.environment != 'self-hosted'
with:
version: v1.0.0
- name: Set tempdir
if: runner.environment == 'self-hosted'
run: |
echo "TMPDIR=/work" >> $GITHUB_ENV
- name: Run benchmark
shell: bash
env:
BENCH_VORTEX_RATIOS: '.*'
RUSTFLAGS: '-C target-cpu=native'
run: |
cargo install cargo-criterion
sudo apt-get update && sudo apt-get install -y jq
cargo criterion \
--bench ${{ matrix.benchmark.id }} \
--message-format=json \
> ${{ matrix.benchmark.id }}-raw.json
cat ${{ matrix.benchmark.id }}-raw.json \
| bash scripts/coerce-criterion-json.sh \
> ${{ matrix.benchmark.id }}.json
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Compare results
shell: bash
run: |
set -Eeu -o pipefail -x
base_commit_sha=${{ github.event.pull_request.base.sha }}
aws s3 cp s3://vortex-benchmark-results-database/data.json - \
| grep $base_commit_sha \
> base.json
echo '# Benchmarks: ${{ matrix.benchmark.id }}' > comment.md
uv run pip install pandas
uv run python3 scripts/compare-benchmark-jsons.py base.json ${{ matrix.benchmark.id }}.json \
>> comment.md
- name: Comment PR
uses: thollander/actions-comment-pull-request@v3
with:
file-path: comment.md
comment-tag: execution
tpch:
needs: label_trigger
runs-on: self-hosted
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
- uses: spiraldb/actions/.github/actions/[email protected]
# The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
- name: Install DuckDB
uses: opt-nc/[email protected]
if: runner.environment != 'self-hosted'
with:
version: v1.0.0
- name: Set tempdir
if: runner.environment == 'self-hosted'
run: |
echo "TMPDIR=/work" >> $GITHUB_ENV
- name: Run TPC-H benchmark
shell: bash
env:
BENCH_VORTEX_RATIOS: '.*'
RUSTFLAGS: '-C target-cpu=native'
run: |
cargo run --bin tpch_benchmark --release -- -d gh-json -t 1 | tee tpch.json
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Compare results
shell: bash
run: |
set -Eeu -o pipefail -x
base_commit_sha=${{ github.event.pull_request.base.sha }}
aws s3 cp s3://vortex-benchmark-results-database/data.json - \
| grep $base_commit_sha \
> base.json
echo '# Benchmarks: TPC-H' > comment.md
uv run pip install pandas
uv run python3 scripts/compare-benchmark-jsons.py base.json tpch.json \
>> comment.md
- name: Comment PR
uses: thollander/actions-comment-pull-request@v3
with:
file-path: comment.md
comment-tag: execution
clickbench:
needs: label_trigger
runs-on: self-hosted
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
- uses: spiraldb/actions/.github/actions/[email protected]
# The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
- name: Install DuckDB
uses: opt-nc/[email protected]
if: runner.environment != 'self-hosted'
with:
version: v1.0.0
- name: Set tempdir
if: runner.environment == 'self-hosted'
run: |
echo "TMPDIR=/work" >> $GITHUB_ENV
- name: Run Clickbench benchmark
shell: bash
env:
BENCH_VORTEX_RATIOS: '.*'
RUSTFLAGS: '-C target-cpu=native'
HOME: /home/ci-runner
run: |
cargo run --bin clickbench --release -- -d gh-json | tee clickbench.json
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Compare results
shell: bash
run: |
set -Eeu -o pipefail -x
base_commit_sha=${{ github.event.pull_request.base.sha }}
aws s3 cp s3://vortex-benchmark-results-database/data.json - \
| grep $base_commit_sha \
> base.json
echo '# Benchmarks: Clickbench' > comment.md
uv run pip install pandas
uv run python3 scripts/compare-benchmark-jsons.py base.json clickbench.json \
>> comment.md
- name: Comment PR
uses: thollander/actions-comment-pull-request@v3
with:
file-path: comment.md
comment-tag: execution