-
Notifications
You must be signed in to change notification settings - Fork 17
119 lines (105 loc) · 3.37 KB
/
tpch.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
name: TPC-H Benchmarks
on:
workflow_dispatch:
inputs:
scale:
description: 'Scale Factor'
required: true
default: 10000
type: choice
options:
- 10000
- 1000
- 100
- 10
- 1
dask:
description: 'Dask'
required: true
default: true
type: boolean
duckdb:
description: 'DuckDB'
required: true
default: true
type: boolean
polars:
description: 'Polars'
required: true
default: false
type: boolean
pyspark:
description: 'PySpark'
required: true
default: true
type: boolean
# concurrency:
# # Only allow a single run at a time to rate limiting
# group: tpch
defaults:
# Required shell entrypoint to have properly activated conda environments
run:
shell: bash -l {0}
jobs:
tpch:
name: TPC-H
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up environment
uses: conda-incubator/setup-miniconda@v3
with:
miniforge-variant: Mambaforge
use-mamba: true
condarc-file: ci/condarc
python-version: "3.9"
environment-file: ci/environment.yml
- name: Pip Compile
run: pip-compile ci/requirements-2tpch-non-dask.in
- name: Add Environment dependencies
run: python -m pip install -r ci/requirements-2tpch-non-dask.txt
- name: Dump environment
run: |
# For debugging
echo -e "--\n--Conda Environment (re-create this with \`conda env create --name <name> -f <output_file>\`)\n--"
mamba env export | grep -E -v '^prefix:.*$'
- name: Add Dask to benchmark if enabled
if: ${{ inputs.dask }}
run: |
echo PYTEST_BENCHMARKS="${{ env.PYTEST_BENCHMARKS }} tests/tpch/test_dask.py" >> $GITHUB_ENV
- name: Add DuckDB to benchmark if enabled
if: ${{ inputs.duckdb }}
run: |
echo PYTEST_BENCHMARKS="${{ env.PYTEST_BENCHMARKS }} tests/tpch/test_duckdb.py" >> $GITHUB_ENV
- name: Add Polars to benchmark if enabled
if: ${{ inputs.polars }}
run: |
echo PYTEST_BENCHMARKS="${{ env.PYTEST_BENCHMARKS }} tests/tpch/test_polars.py" >> $GITHUB_ENV
- name: Add PySpark to benchmark if enabled
if: ${{ inputs.pyspark }}
run: |
echo PYTEST_BENCHMARKS="${{ env.PYTEST_BENCHMARKS }} tests/tpch/test_pyspark.py" >> $GITHUB_ENV
- name: Run TPC-H benchmarks (except polars)
env:
DASK_COILED__TOKEN: ${{ secrets.COILED_BENCHMARK_BOT_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.RUNTIME_CI_BOT_AWS_ACCESS_KEY_ID }}
AWS_DEFAULT_REGION: us-east-2 # this is needed for boto for some reason
AWS_SECRET_ACCESS_KEY: ${{ secrets.RUNTIME_CI_BOT_AWS_SECRET_ACCESS_KEY }}
COILED_RUNTIME_VERSION: ${{ matrix.runtime-version }}
DB_NAME: tpch_${{ inputs.scale }}.db
CLUSTER_DUMP: always
DASK_DATAFRAME__QUERY_PLANNING: True
run: |
pytest --benchmark \
${{ env.PYTEST_BENCHMARKS }} \
-n 4 --dist loadscope \
--scale ${{ inputs.scale }}
- name: Upload benchmark results
uses: actions/upload-artifact@v4
if: always()
with:
name: tpch-benchmark
path: |
tpch_${{ inputs.scale }}.db
mamba_env_export.yml