-
Notifications
You must be signed in to change notification settings - Fork 89
247 lines (224 loc) · 10.6 KB
/
builds.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
name: Builds
on:
pull_request:
push:
branches:
- develop
workflow_dispatch:
# Cancels in-progress workflows for a PR when updated
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
# Please define `build.args.GEOS_TPL_TAG` in `.devcontainer/devcontainer.json`
jobs:
compliance_checks:
name: compliance
uses: ./.github/workflows/build_and_test.yml
secrets:
token: ${{ secrets.GITHUB_TOKEN }}
# Matrix containing all the CPU build.
# Those are quite fast and can efficiently benefit from the `sccache' tool to make them even faster.
cpu_builds:
name: ${{ matrix.name }}
needs: [is_not_draft_pull_request]
strategy:
# In-progress jobs will not be cancelled if there is a failure
fail-fast : false
matrix:
include:
- name: Ubuntu (20.04, gcc 9.4.0, open-mpi 4.0.3)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu20.04-gcc9
- name: Ubuntu debug (20.04, gcc 10.5.0, open-mpi 4.0.3) - github codespaces
CMAKE_BUILD_TYPE: Debug
DOCKER_REPOSITORY: geosx/ubuntu20.04-gcc10
- name: Ubuntu (20.04, gcc 10.5.0, open-mpi 4.0.3) - github codespaces
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu20.04-gcc10
- name: Ubuntu (22.04, gcc 11.4.0, open-mpi 4.1.2)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu22.04-gcc11
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/ubuntu22.04-gcc11
- name: Ubuntu (22.04, gcc 12.3.0, open-mpi 4.1.2)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu22.04-gcc12
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
- name: Ubuntu (22.04, clang 15.0.7, open-mpi 4.1.2)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu22.04-clang15
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
- name: Sherlock CPU (centos 7.9.2009, gcc 10.1.0, open-mpi 4.1.2, openblas 0.3.10)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/sherlock-gcc10.1.0-openmpi4.1.2-openblas0.3.10-zlib1.2.11
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/Sherlock-CPU
HOST_CONFIG: host-configs/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10.cmake
uses: ./.github/workflows/build_and_test.yml
with:
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
DOCKER_IMAGE_TAG: ${{ needs.is_not_draft_pull_request.outputs.DOCKER_IMAGE_TAG }}
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
GCP_BUCKET: ${{ matrix.GCP_BUCKET }}
HOST_CONFIG: ${{ matrix.HOST_CONFIG }}
RUNS_ON: ubuntu-22.04
secrets: inherit
# If the 'ci: run integrated tests' PR label is found, the integrated tests will be run immediately after the cpu jobs.
# Note: The integrated tests are optional and are (for the moment) run for convenience only.
run_integrated_tests:
needs:
- is_not_draft_pull_request
- cpu_builds
uses: ./.github/workflows/build_and_test.yml
secrets: inherit
with:
BUILD_AND_TEST_CLI_ARGS: --build-exe-only
BUILD_TYPE: integrated_tests
CMAKE_BUILD_TYPE: Release
DOCKER_IMAGE_TAG: ${{ needs.is_not_draft_pull_request.outputs.DOCKER_IMAGE_TAG }}
DOCKER_REPOSITORY: geosx/ubuntu22.04-gcc11
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/integratedTests
RUNS_ON: streak2-32core
NPROC: 32
DOCKER_RUN_ARGS: "--cpus=32 --memory=384g -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"
REQUIRED_LABEL: "ci: run integrated tests"
LOCAL_BASELINE_DIR: /data/GEOS/baselines
baseline_log:
needs: [is_not_draft_pull_request]
runs-on: ubuntu-22.04
steps:
- name: Checkout Repository
uses: actions/[email protected]
with:
submodules: false
lfs: false
fetch-depth: 0
sparse-checkout: |
scripts
- name: Check that the baseline logs are modified if rebaselines are detected
run: "scripts/check_baseline_log.sh"
# If the 'ci: run CUDA builds' PR label is found, the cuda jobs run immediately along side linux jobs.
# Note: CUDA jobs should only be run if PR is ready to merge.
cuda_builds:
name: ${{ matrix.name }}
needs:
- is_not_draft_pull_request
strategy:
# In-progress jobs will not be cancelled if there is a failure
fail-fast : false
matrix:
include:
- name: Ubuntu CUDA debug (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
BUILD_AND_TEST_CLI_ARGS: "--build-exe-only --no-install-schema"
CMAKE_BUILD_TYPE: Debug
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
RUNS_ON: streak2
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"
- name: Ubuntu CUDA (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
BUILD_AND_TEST_CLI_ARGS: "--no-install-schema"
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
RUNS_ON: streak
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=256g --runtime=nvidia --gpus all -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"
# compiler error in ElasticFirstOrderWaveEquationSEMKernel::StressComputation::launch in call to FE_TYPE::computeFirstOrderStiffnessTermX
# - name: Rockylinux (8, clang 17.0.6, cuda 12.5)
# BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
# CMAKE_BUILD_TYPE: Release
# DOCKER_REPOSITORY: geosx/rockylinux8-clang17-cuda12.5
# RUNS_ON: streak2
# NPROC: 2
# DOCKER_RUN_ARGS: "--cpus=1 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
# DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
# DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
# compiler error in ElasticFirstOrderWaveEquationSEMKernel::StressComputation::launch in call to FE_TYPE::computeFirstOrderStiffnessTermX
# - name: Rockylinux (8, gcc 8.5, cuda 12.5)
# BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
# CMAKE_BUILD_TYPE: Release
# DOCKER_REPOSITORY: geosx/rockylinux8-gcc8-cuda12.5
# RUNS_ON: streak2
# NPROC: 2
# DOCKER_RUN_ARGS: "--cpus=1 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
# DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
# DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
# Below this line, jobs that deploy to Google Cloud.
- name: Sherlock GPU (centos 7.9.2009, gcc 10.1.0, open-mpi 4.1.2, openblas 0.3.10, cuda 11.7.1,)
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/sherlock-gcc10.1.0-openmpi4.1.2-cuda11.7.1-openblas0.3.10-zlib1.2.11
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/Sherlock-GPU
HOST_CONFIG: host-configs/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda11.7.1-sm70.cmake
RUNS_ON: streak2
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
uses: ./.github/workflows/build_and_test.yml
with:
BUILD_AND_TEST_CLI_ARGS: ${{ matrix.BUILD_AND_TEST_CLI_ARGS }}
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
DOCKER_CERTS_DIR: ${{ matrix.DOCKER_CERTS_DIR }}
DOCKER_CERTS_UPDATE_COMMAND: ${{ matrix.DOCKER_CERTS_UPDATE_COMMAND }}
DOCKER_IMAGE_TAG: ${{ needs.is_not_draft_pull_request.outputs.DOCKER_IMAGE_TAG }}
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
DOCKER_RUN_ARGS: ${{ matrix.DOCKER_RUN_ARGS }}
ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
GCP_BUCKET: ${{ matrix.GCP_BUCKET }}
HOST_CONFIG: ${{ matrix.HOST_CONFIG }}
NPROC: ${{ matrix.NPROC }}
RUNS_ON: ${{ matrix.RUNS_ON }}
REQUIRED_LABEL: "ci: run CUDA builds"
secrets: inherit
# Convenience job - passes when all other jobs have passed (must pass the CUDA jobs).
check_that_all_jobs_succeeded:
runs-on: ubuntu-22.04
needs:
- if_not_unassigned_pull_request
- are_submodules_in_sync
- check_code_style_and_documentation
- cpu_builds
- cuda_builds
- run_integrated_tests
if: ${{ always() }}
steps:
- run: |
echo "if_not_unassigned_pull_request: ${{needs.if_not_unassigned_pull_request.result}}"
echo "are_submodules_in_sync: ${{needs.are_submodules_in_sync.result}}"
echo "check_code_style_and_documentation: ${{needs.check_code_style_and_documentation.result}}"
echo "cpu_builds: ${{needs.cpu_builds.result}}"
echo "cuda_builds: ${{needs.cuda_builds.result}}"
echo "run_integrated_tests: ${{needs.run_integrated_tests.result}} "
${{
needs.if_not_unassigned_pull_request.result == 'success' &&
needs.are_submodules_in_sync.result == 'success' &&
needs.check_code_style_and_documentation.result == 'success' &&
needs.cpu_builds.result == 'success' &&
needs.cuda_builds.result == 'success' &&
needs.run_integrated_tests.result == 'success'
}}