Skip to content

ci: split ci into atomic workflows #1

ci: split ci into atomic workflows

ci: split ci into atomic workflows #1

Workflow file for this run

name: GPU builds
on:
pull_request: # Run workflow on PRs to the develop branch on labeled event
branches:
- develop
types: [ labeled ]
push: # Run workflow on push to the develop branch.
branches:
- develop
workflow_dispatch: # Workflow can be run manually
jobs:
# If the 'ci: run CUDA builds' PR label is found, the cuda jobs run immediately along side linux jobs.
# Note: CUDA jobs should only be run if PR is ready to merge.
cuda_builds:
# if the current added label is for GPUs
if: "${{ github.event.label.name == 'ci: run CUDA builds' }}"
name: ${{ matrix.name }}
strategy:
# In-progress jobs will not be cancelled if there is a failure
fail-fast : false
matrix:
include:
- name: Ubuntu CUDA debug (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
BUILD_AND_TEST_CLI_ARGS: "--build-exe-only --no-install-schema"
CMAKE_BUILD_TYPE: Debug
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
RUNS_ON: streak2
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"
- name: Ubuntu CUDA (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
BUILD_AND_TEST_CLI_ARGS: "--no-install-schema"
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
RUNS_ON: streak
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=256g --runtime=nvidia --gpus all -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"
# compiler error in ElasticFirstOrderWaveEquationSEMKernel::StressComputation::launch in call to FE_TYPE::computeFirstOrderStiffnessTermX
# - name: Rockylinux (8, clang 17.0.6, cuda 12.5)
# BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
# CMAKE_BUILD_TYPE: Release
# DOCKER_REPOSITORY: geosx/rockylinux8-clang17-cuda12.5
# RUNS_ON: streak2
# NPROC: 2
# DOCKER_RUN_ARGS: "--cpus=1 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
# DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
# DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
# compiler error in ElasticFirstOrderWaveEquationSEMKernel::StressComputation::launch in call to FE_TYPE::computeFirstOrderStiffnessTermX
# - name: Rockylinux (8, gcc 8.5, cuda 12.5)
# BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
# CMAKE_BUILD_TYPE: Release
# DOCKER_REPOSITORY: geosx/rockylinux8-gcc8-cuda12.5
# RUNS_ON: streak2
# NPROC: 2
# DOCKER_RUN_ARGS: "--cpus=1 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
# DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
# DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
# Below this line, jobs that deploy to Google Cloud.
- name: Sherlock GPU (centos 7.9.2009, gcc 10.1.0, open-mpi 4.1.2, openblas 0.3.10, cuda 11.7.1,)
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/sherlock-gcc10.1.0-openmpi4.1.2-cuda11.7.1-openblas0.3.10-zlib1.2.11
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/Sherlock-GPU
HOST_CONFIG: host-configs/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda11.7.1-sm70.cmake
RUNS_ON: streak2
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
uses: ./.github/workflows/build_and_test.yml
with:
BUILD_AND_TEST_CLI_ARGS: ${{ matrix.BUILD_AND_TEST_CLI_ARGS }}
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
DOCKER_CERTS_DIR: ${{ matrix.DOCKER_CERTS_DIR }}
DOCKER_CERTS_UPDATE_COMMAND: ${{ matrix.DOCKER_CERTS_UPDATE_COMMAND }}
DOCKER_IMAGE_TAG: ${{ needs.is_not_draft_pull_request.outputs.DOCKER_IMAGE_TAG }}
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
DOCKER_RUN_ARGS: ${{ matrix.DOCKER_RUN_ARGS }}
ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
GCP_BUCKET: ${{ matrix.GCP_BUCKET }}
HOST_CONFIG: ${{ matrix.HOST_CONFIG }}
NPROC: ${{ matrix.NPROC }}
RUNS_ON: ${{ matrix.RUNS_ON }}
REQUIRED_LABEL: "ci: run CUDA builds"
secrets: inherit