TPC-H Benchmarks #74
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: TPC-H Benchmarks | |
on: | |
workflow_dispatch: | |
inputs: | |
scale: | |
description: 'Scale Factor' | |
required: true | |
default: 10000 | |
type: choice | |
options: | |
- 10000 | |
- 1000 | |
- 100 | |
- 10 | |
- 1 | |
dask: | |
description: 'Dask' | |
required: true | |
default: true | |
type: boolean | |
duckdb: | |
description: 'DuckDB' | |
required: true | |
default: true | |
type: boolean | |
polars: | |
description: 'Polars' | |
required: true | |
default: false | |
type: boolean | |
pyspark: | |
description: 'PySpark' | |
required: true | |
default: true | |
type: boolean | |
# concurrency: | |
# # Only allow a single run at a time to rate limiting | |
# group: tpch | |
defaults: | |
# Required shell entrypoint to have properly activated conda environments | |
run: | |
shell: bash -l {0} | |
jobs: | |
tpch: | |
name: TPC-H | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Set up environment | |
uses: conda-incubator/setup-miniconda@v3 | |
with: | |
miniforge-variant: Mambaforge | |
use-mamba: true | |
condarc-file: ci/condarc | |
python-version: "3.9" | |
environment-file: ci/environment.yml | |
- name: Pip Compile | |
run: pip-compile ci/requirements-2tpch-non-dask.in | |
- name: Add Environment dependencies | |
run: python -m pip install -r ci/requirements-2tpch-non-dask.txt | |
- name: Dump environment | |
run: | | |
# For debugging | |
echo -e "--\n--Conda Environment (re-create this with \`conda env create --name <name> -f <output_file>\`)\n--" | |
mamba env export | grep -E -v '^prefix:.*$' | |
- name: Add Dask to benchmark if enabled | |
if: ${{ inputs.dask }} | |
run: | | |
echo PYTEST_BENCHMARKS="${{ env.PYTEST_BENCHMARKS }} tests/tpch/test_dask.py" >> $GITHUB_ENV | |
- name: Add DuckDB to benchmark if enabled | |
if: ${{ inputs.duckdb }} | |
run: | | |
echo PYTEST_BENCHMARKS="${{ env.PYTEST_BENCHMARKS }} tests/tpch/test_duckdb.py" >> $GITHUB_ENV | |
- name: Add Polars to benchmark if enabled | |
if: ${{ inputs.polars }} | |
run: | | |
echo PYTEST_BENCHMARKS="${{ env.PYTEST_BENCHMARKS }} tests/tpch/test_polars.py" >> $GITHUB_ENV | |
- name: Add PySpark to benchmark if enabled | |
if: ${{ inputs.pyspark }} | |
run: | | |
echo PYTEST_BENCHMARKS="${{ env.PYTEST_BENCHMARKS }} tests/tpch/test_pyspark.py" >> $GITHUB_ENV | |
- name: Run TPC-H benchmarks (except polars) | |
env: | |
DASK_COILED__TOKEN: ${{ secrets.COILED_BENCHMARK_BOT_TOKEN }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.RUNTIME_CI_BOT_AWS_ACCESS_KEY_ID }} | |
AWS_DEFAULT_REGION: us-east-2 # this is needed for boto for some reason | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.RUNTIME_CI_BOT_AWS_SECRET_ACCESS_KEY }} | |
COILED_RUNTIME_VERSION: ${{ matrix.runtime-version }} | |
DB_NAME: tpch_${{ inputs.scale }}.db | |
CLUSTER_DUMP: always | |
DASK_DATAFRAME__QUERY_PLANNING: True | |
run: | | |
pytest --benchmark \ | |
${{ env.PYTEST_BENCHMARKS }} \ | |
-n 4 --dist loadscope \ | |
--scale ${{ inputs.scale }} | |
- name: Upload benchmark results | |
uses: actions/upload-artifact@v4 | |
if: always() | |
with: | |
name: tpch-benchmark | |
path: | | |
tpch_${{ inputs.scale }}.db | |
mamba_env_export.yml |