ci: split ci into atomic workflows #1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: GPU builds | |
on: | |
pull_request: # Run workflow on PRs to the develop branch on labeled event | |
branches: | |
- develop | |
types: [ labeled ] | |
push: # Run workflow on push to the develop branch. | |
branches: | |
- develop | |
workflow_dispatch: # Workflow can be run manually | |
jobs: | |
# If the 'ci: run CUDA builds' PR label is found, the cuda jobs run immediately along side linux jobs. | |
# Note: CUDA jobs should only be run if PR is ready to merge. | |
cuda_builds: | |
# if the current added label is for GPUs | |
if: "${{ github.event.label.name == 'ci: run CUDA builds' }}" | |
name: ${{ matrix.name }} | |
strategy: | |
# In-progress jobs will not be cancelled if there is a failure | |
fail-fast : false | |
matrix: | |
include: | |
- name: Ubuntu CUDA debug (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89) | |
BUILD_AND_TEST_CLI_ARGS: "--build-exe-only --no-install-schema" | |
CMAKE_BUILD_TYPE: Debug | |
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89 | |
ENABLE_HYPRE_DEVICE: CUDA | |
ENABLE_HYPRE: ON | |
ENABLE_TRILINOS: OFF | |
RUNS_ON: streak2 | |
NPROC: 8 | |
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro" | |
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates" | |
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates" | |
- name: Ubuntu CUDA (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89) | |
BUILD_AND_TEST_CLI_ARGS: "--no-install-schema" | |
CMAKE_BUILD_TYPE: Release | |
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89 | |
ENABLE_HYPRE_DEVICE: CUDA | |
ENABLE_HYPRE: ON | |
ENABLE_TRILINOS: OFF | |
RUNS_ON: streak | |
NPROC: 8 | |
DOCKER_RUN_ARGS: "--cpus=8 --memory=256g --runtime=nvidia --gpus all -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro" | |
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates" | |
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates" | |
# compiler error in ElasticFirstOrderWaveEquationSEMKernel::StressComputation::launch in call to FE_TYPE::computeFirstOrderStiffnessTermX | |
# - name: Rockylinux (8, clang 17.0.6, cuda 12.5) | |
# BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema" | |
# CMAKE_BUILD_TYPE: Release | |
# DOCKER_REPOSITORY: geosx/rockylinux8-clang17-cuda12.5 | |
# RUNS_ON: streak2 | |
# NPROC: 2 | |
# DOCKER_RUN_ARGS: "--cpus=1 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro" | |
# DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors" | |
# DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust" | |
# compiler error in ElasticFirstOrderWaveEquationSEMKernel::StressComputation::launch in call to FE_TYPE::computeFirstOrderStiffnessTermX | |
# - name: Rockylinux (8, gcc 8.5, cuda 12.5) | |
# BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema" | |
# CMAKE_BUILD_TYPE: Release | |
# DOCKER_REPOSITORY: geosx/rockylinux8-gcc8-cuda12.5 | |
# RUNS_ON: streak2 | |
# NPROC: 2 | |
# DOCKER_RUN_ARGS: "--cpus=1 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro" | |
# DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors" | |
# DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust" | |
# Below this line, jobs that deploy to Google Cloud. | |
- name: Sherlock GPU (centos 7.9.2009, gcc 10.1.0, open-mpi 4.1.2, openblas 0.3.10, cuda 11.7.1,) | |
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema" | |
CMAKE_BUILD_TYPE: Release | |
DOCKER_REPOSITORY: geosx/sherlock-gcc10.1.0-openmpi4.1.2-cuda11.7.1-openblas0.3.10-zlib1.2.11 | |
ENABLE_HYPRE_DEVICE: CUDA | |
ENABLE_HYPRE: ON | |
ENABLE_TRILINOS: OFF | |
GCP_BUCKET: geosx/Sherlock-GPU | |
HOST_CONFIG: host-configs/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda11.7.1-sm70.cmake | |
RUNS_ON: streak2 | |
NPROC: 8 | |
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro" | |
DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors" | |
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust" | |
uses: ./.github/workflows/build_and_test.yml | |
with: | |
BUILD_AND_TEST_CLI_ARGS: ${{ matrix.BUILD_AND_TEST_CLI_ARGS }} | |
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }} | |
DOCKER_CERTS_DIR: ${{ matrix.DOCKER_CERTS_DIR }} | |
DOCKER_CERTS_UPDATE_COMMAND: ${{ matrix.DOCKER_CERTS_UPDATE_COMMAND }} | |
DOCKER_IMAGE_TAG: ${{ needs.is_not_draft_pull_request.outputs.DOCKER_IMAGE_TAG }} | |
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }} | |
DOCKER_RUN_ARGS: ${{ matrix.DOCKER_RUN_ARGS }} | |
ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }} | |
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }} | |
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }} | |
GCP_BUCKET: ${{ matrix.GCP_BUCKET }} | |
HOST_CONFIG: ${{ matrix.HOST_CONFIG }} | |
NPROC: ${{ matrix.NPROC }} | |
RUNS_ON: ${{ matrix.RUNS_ON }} | |
REQUIRED_LABEL: "ci: run CUDA builds" | |
secrets: inherit |