-
Notifications
You must be signed in to change notification settings - Fork 89
371 lines (340 loc) · 16.1 KB
/
ci_tests.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
name: GEOS CI
on:
push:
branches:
- develop
pull_request:
workflow_dispatch:
# Cancels in-progress workflows for a PR when updated
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
# Please define `build.args.GEOS_TPL_TAG` in `.devcontainer/devcontainer.json`
jobs:
# Jobs will be cancelled if PR is a draft.
# PR status must be "Open" to run CI.
is_not_draft_pull_request:
# Everywhere in this workflow, we use the most recent ubuntu distribution available in Github Actions
# to ensure maximum support of google cloud's sdk.
runs-on: ubuntu-22.04
outputs:
DOCKER_IMAGE_TAG: ${{ steps.extract_docker_image_tag.outputs.DOCKER_IMAGE_TAG }}
steps:
- name: Check that the PR is not a draft (cancel rest of jobs otherwise)
id: extract_pr_info
run: |
if [[ ${{github.event_name}} == 'pull_request' ]]; then
# We do not rely on the `github.event.pull_request.labels` information since it's cached at the job.
# Changing labels or assignee in the PR would not allow to simply re-run the job with a different outcome.
pr_json=$(curl -H "Accept: application/vnd.github+json" https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.number }})
# We stop the workflow if the pr is draft
draft_status=$(echo ${pr_json} | jq '.draft')
echo "Draft status of PR is ${draft_status}."
if [[ $draft_status == true ]]; then exit 1 ; fi
fi
# The TPL tag is contained in the codespaces configuration to avoid duplications.
- name: Checkout .devcontainer/devcontainer.json
uses: actions/[email protected]
with:
sparse-checkout: |
.devcontainer/devcontainer.json
sparse-checkout-cone-mode: false
submodules: false
lfs: false
fetch-depth: 1
- name: Extract docker image tag
id: extract_docker_image_tag
run: |
echo "DOCKER_IMAGE_TAG=$(jq '.build.args.GEOS_TPL_TAG' -r .devcontainer/devcontainer.json)" >> "$GITHUB_OUTPUT"
# PR must be assigned to be merged.
# This job will fail if this is not the case.
if_not_unassigned_pull_request:
needs: [is_not_draft_pull_request]
runs-on: ubuntu-22.04
steps:
- name: If this is a PR, Check that it is assigned
run: |
if [[ ${{github.event_name}} != 'pull_request' ]]; then exit 0 ; fi
pr_json=$(curl -H "Accept: application/vnd.github+json" https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.number }})
NUM_ASSIGNEES=$(echo ${pr_json} | jq '.assignees | length')
echo "There are ${NUM_ASSIGNEES} assignees on this PR."
if [[ $NUM_ASSIGNEES == 0 ]]; then exit 1 ; fi
# Validates that the PR is still pointing to the HEAD of the main branch of the submodules repositories.
# (There are exceptions, read the script about those).
are_submodules_in_sync:
needs: [is_not_draft_pull_request]
runs-on: ubuntu-22.04
steps:
# The integrated test submodule repository contains large data (using git lfs).
# To save time (and money) we do not let Github Actions automatically clone all our (lfs) subrepositories and do it by hand.
- name: Checkout Repository
uses: actions/[email protected]
with:
# Let script update submodules; Github Actions submodule history causes error
submodules: false
lfs: false
fetch-depth: 1
- name: Check that submodules are up to date
run: "scripts/test_submodule_updated.sh"
check_code_style_and_documentation:
name: ${{ matrix.name }}
needs: [is_not_draft_pull_request]
strategy:
fail-fast : false
matrix:
include:
# Validates the code-style using uncrustify
- name: Check code style
BUILD_AND_TEST_ARGS: --test-code-style
# Validates that the documentation generated using doxygen has no hole.
- name: Check documentation
BUILD_AND_TEST_ARGS: --test-documentation
uses: ./.github/workflows/build_and_test.yml
with:
BUILD_AND_TEST_CLI_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }}
CMAKE_BUILD_TYPE: Release
DOCKER_IMAGE_TAG: ${{ needs.is_not_draft_pull_request.outputs.DOCKER_IMAGE_TAG }}
DOCKER_REPOSITORY: geosx/ubuntu20.04-gcc9
RUNS_ON: ubuntu-22.04
USE_SCCACHE: false
# Matrix containing all the CPU build.
# Those are quite fast and can efficiently benefit from the `sccache' tool to make them even faster.
cpu_builds:
name: ${{ matrix.name }}
needs: [is_not_draft_pull_request]
strategy:
# In-progress jobs will not be cancelled if there is a failure
fail-fast : false
matrix:
include:
- name: Ubuntu (20.04, gcc 9.4.0, open-mpi 4.0.3)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu20.04-gcc9
- name: Ubuntu debug (20.04, gcc 10.5.0, open-mpi 4.0.3) - github codespaces
CMAKE_BUILD_TYPE: Debug
DOCKER_REPOSITORY: geosx/ubuntu20.04-gcc10
- name: Ubuntu (20.04, gcc 10.5.0, open-mpi 4.0.3) - github codespaces
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu20.04-gcc10
- name: Ubuntu (22.04, gcc 11.4.0, open-mpi 4.1.2)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu22.04-gcc11
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/ubuntu22.04-gcc11
- name: Ubuntu (22.04, gcc 12.3.0, open-mpi 4.1.2)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu22.04-gcc12
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
- name: Ubuntu (22.04, clang 15.0.7, open-mpi 4.1.2)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu22.04-clang15
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
- name: Pecan CPU (centos 7.7, gcc 8.2.0, open-mpi 4.0.1, mkl 2019.5)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/pecan-cpu-gcc8.2.0-openmpi4.0.1-mkl2019.5
HOST_CONFIG: host-configs/TOTAL/pecan-CPU.cmake
GCP_BUCKET: geosx/Pecan-CPU
- name: Pangea 2 (centos 7.6, gcc 8.3.0, open-mpi 2.1.5, mkl 2019.3)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/pangea2-gcc8.3.0-openmpi2.1.5-mkl2019.3
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/Pangea2
- name: Sherlock CPU (centos 7.9.2009, gcc 10.1.0, open-mpi 4.1.2, openblas 0.3.10)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/sherlock-gcc10.1.0-openmpi4.1.2-openblas0.3.10-zlib1.2.11
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/Sherlock-CPU
HOST_CONFIG: host-configs/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10.cmake
uses: ./.github/workflows/build_and_test.yml
with:
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
DOCKER_IMAGE_TAG: ${{ needs.is_not_draft_pull_request.outputs.DOCKER_IMAGE_TAG }}
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
GCP_BUCKET: ${{ matrix.GCP_BUCKET }}
HOST_CONFIG: ${{ matrix.HOST_CONFIG }}
RUNS_ON: ubuntu-22.04
secrets: inherit
# If the 'ci: run integrated tests' PR label is found, the integrated tests will be run immediately after the cpu jobs.
# Note: The integrated tests are optional and are (for the moment) run for convenience only.
run_integrated_tests:
needs:
- is_not_draft_pull_request
- cpu_builds
uses: ./.github/workflows/build_and_test.yml
secrets: inherit
with:
BUILD_AND_TEST_CLI_ARGS: --build-exe-only
BUILD_TYPE: integrated_tests
CMAKE_BUILD_TYPE: Release
DOCKER_IMAGE_TAG: ${{ needs.is_not_draft_pull_request.outputs.DOCKER_IMAGE_TAG }}
DOCKER_REPOSITORY: geosx/ubuntu22.04-gcc11
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/integratedTests
RUNS_ON: streak2-32core
NPROC: 32
DOCKER_RUN_ARGS: "--cpus=32 --memory=384g -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"
REQUIRED_LABEL: "ci: run integrated tests"
LOCAL_BASELINE_DIR: /data/GEOS/baselines
baseline_log:
needs: [is_not_draft_pull_request]
runs-on: ubuntu-22.04
steps:
- name: Checkout Repository
uses: actions/[email protected]
with:
submodules: false
lfs: false
fetch-depth: 0
sparse-checkout: |
scripts
- name: Check that the baseline logs are modified if rebaselines are detected
run: "scripts/check_baseline_log.sh"
code_coverage:
needs:
- is_not_draft_pull_request
uses: ./.github/workflows/build_and_test.yml
secrets: inherit
with:
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests"
CMAKE_BUILD_TYPE: Debug
CODE_COVERAGE: true
DOCKER_IMAGE_TAG: ${{ needs.is_not_draft_pull_request.outputs.DOCKER_IMAGE_TAG }}
DOCKER_REPOSITORY: geosx/ubuntu22.04-gcc11
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/ubuntu22.04-gcc11
RUNS_ON: Runner_4core_16GB
# If the 'ci: run CUDA builds' PR label is found, the cuda jobs run immediately along side linux jobs.
# Note: CUDA jobs should only be run if PR is ready to merge.
cuda_builds:
name: ${{ matrix.name }}
needs:
- is_not_draft_pull_request
strategy:
# In-progress jobs will not be cancelled if there is a failure
fail-fast : false
matrix:
include:
- name: Ubuntu CUDA debug (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
BUILD_AND_TEST_CLI_ARGS: "--build-exe-only --no-install-schema"
CMAKE_BUILD_TYPE: Debug
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
RUNS_ON: streak2
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"
- name: Ubuntu CUDA (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
BUILD_AND_TEST_CLI_ARGS: "--no-install-schema"
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
RUNS_ON: streak
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=256g --runtime=nvidia --gpus all -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"
- name: Centos (7.7, gcc 8.3.1, open-mpi 1.10.7, cuda 11.8.89)
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/centos7.7-gcc8.3.1-cuda11.8.89
RUNS_ON: streak2
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
# compiler error in ElasticFirstOrderWaveEquationSEMKernel::StressComputation::launch in call to FE_TYPE::computeFirstOrderStiffnessTermX
# - name: Rockylinux (8, clang 17.0.6, cuda 12.5)
# BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
# CMAKE_BUILD_TYPE: Release
# DOCKER_REPOSITORY: geosx/rockylinux8-clang17-cuda12.5
# RUNS_ON: streak2
# NPROC: 2
# DOCKER_RUN_ARGS: "--cpus=1 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
# DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
# DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
# Below this line, jobs that deploy to Google Cloud.
- name: Pecan GPU (centos 7.7, gcc 8.2.0, open-mpi 4.0.1, mkl 2019.5, cuda 11.5.119)
BUILD_AND_TEST_CLI_ARGS: "--build-exe-only --no-install-schema"
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/pecan-gpu-gcc8.2.0-openmpi4.0.1-mkl2019.5-cuda11.5.119
HOST_CONFIG: host-configs/TOTAL/pecan-GPU.cmake
RUNS_ON: streak2
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
- name: Sherlock GPU (centos 7.9.2009, gcc 10.1.0, open-mpi 4.1.2, openblas 0.3.10, cuda 11.7.1,)
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/sherlock-gcc10.1.0-openmpi4.1.2-cuda11.7.1-openblas0.3.10-zlib1.2.11
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/Sherlock-GPU
HOST_CONFIG: host-configs/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda11.7.1-sm70.cmake
RUNS_ON: streak2
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
uses: ./.github/workflows/build_and_test.yml
with:
BUILD_AND_TEST_CLI_ARGS: ${{ matrix.BUILD_AND_TEST_CLI_ARGS }}
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
DOCKER_CERTS_DIR: ${{ matrix.DOCKER_CERTS_DIR }}
DOCKER_CERTS_UPDATE_COMMAND: ${{ matrix.DOCKER_CERTS_UPDATE_COMMAND }}
DOCKER_IMAGE_TAG: ${{ needs.is_not_draft_pull_request.outputs.DOCKER_IMAGE_TAG }}
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
DOCKER_RUN_ARGS: ${{ matrix.DOCKER_RUN_ARGS }}
ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
GCP_BUCKET: ${{ matrix.GCP_BUCKET }}
HOST_CONFIG: ${{ matrix.HOST_CONFIG }}
NPROC: ${{ matrix.NPROC }}
RUNS_ON: ${{ matrix.RUNS_ON }}
REQUIRED_LABEL: "ci: run CUDA builds"
secrets: inherit
# Convenience job - passes when all other jobs have passed (must pass the CUDA jobs).
check_that_all_jobs_succeeded:
runs-on: ubuntu-22.04
needs:
- if_not_unassigned_pull_request
- are_submodules_in_sync
- check_code_style_and_documentation
- cpu_builds
- cuda_builds
- run_integrated_tests
if: ${{ always() }}
steps:
- run: |
echo "if_not_unassigned_pull_request: ${{needs.if_not_unassigned_pull_request.result}}"
echo "are_submodules_in_sync: ${{needs.are_submodules_in_sync.result}}"
echo "check_code_style_and_documentation: ${{needs.check_code_style_and_documentation.result}}"
echo "cpu_builds: ${{needs.cpu_builds.result}}"
echo "cuda_builds: ${{needs.cuda_builds.result}}"
echo "run_integrated_tests: ${{needs.run_integrated_tests.result}} "
${{
needs.if_not_unassigned_pull_request.result == 'success' &&
needs.are_submodules_in_sync.result == 'success' &&
needs.check_code_style_and_documentation.result == 'success' &&
needs.cpu_builds.result == 'success' &&
needs.cuda_builds.result == 'success' &&
needs.run_integrated_tests.result == 'success'
}}