Skip to content

ci: split ci into atomic workflows #8

ci: split ci into atomic workflows

ci: split ci into atomic workflows #8

Workflow file for this run

name: GPU builds
on:
pull_request: # Run workflow on PRs to the develop branch on labeled event
branches:
- develop
types: [ labeled ]
push: # Run workflow on push to the develop branch.
branches:
- develop
workflow_dispatch: # Workflow can be run manually
jobs:
get_docker_image_tag:
# if the current added label is for GPUs
if: "${{ github.event.label.name == 'ci: run CUDA builds'}}"
# Everywhere in this workflow, we use the most recent ubuntu distribution available in Github Actions
# to ensure maximum support of google cloud's sdk.
runs-on: ubuntu-22.04
outputs:
DOCKER_IMAGE_TAG: ${{ steps.extract_docker_image_tag.outputs.DOCKER_IMAGE_TAG }}
steps:
# The TPL tag is contained in the codespaces configuration to avoid duplications.
- name: Checkout .devcontainer/devcontainer.json
uses: actions/[email protected]
with:
sparse-checkout: |
.devcontainer/devcontainer.json
sparse-checkout-cone-mode: false
submodules: false
lfs: false
fetch-depth: 1
- name: Extract docker image tag
id: extract_docker_image_tag
run: |
echo "DOCKER_IMAGE_TAG=$(jq '.build.args.GEOS_TPL_TAG' -r .devcontainer/devcontainer.json)" >> "$GITHUB_OUTPUT"
# If the 'ci: run CUDA builds' PR label is found, the cuda jobs run immediately along side linux jobs.
# Note: CUDA jobs should only be run if PR is ready to merge.
cuda_builds:
needs:
- get_docker_image_tag
name: ${{ matrix.name }}
strategy:
# In-progress jobs will not be cancelled if there is a failure
fail-fast : false
matrix:
include:
- name: Ubuntu CUDA debug (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
BUILD_AND_TEST_CLI_ARGS: "--build-exe-only --no-install-schema"
CMAKE_BUILD_TYPE: Debug
BUILD_GENERATOR: "--ninja"
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
RUNS_ON: streak2
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"
- name: Ubuntu CUDA (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
BUILD_AND_TEST_CLI_ARGS: "--no-install-schema"
CMAKE_BUILD_TYPE: Release
BUILD_GENERATOR: "--ninja"
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
RUNS_ON: streak
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=256g --runtime=nvidia --gpus all -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"
- name: Rockylinux CUDA (8, clang 17.0.6, cuda 12.5.1)
BUILD_AND_TEST_CLI_ARGS: "--no-install-schema"
CMAKE_BUILD_TYPE: Release
BUILD_GENERATOR: "--ninja"
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
DOCKER_REPOSITORY: geosx/rockylinux8-clang17-cuda12.5
RUNS_ON: streak
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=256g --runtime=nvidia --gpus all -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
- name: Rockylinux CUDA (8, gcc 8.5, cuda 12.5.1)
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
CMAKE_BUILD_TYPE: Release
BUILD_GENERATOR: "--ninja"
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
DOCKER_REPOSITORY: geosx/rockylinux8-gcc8-cuda12.5
RUNS_ON: streak2
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
- name: Pangea 3 (AlmaLinux 8.8, gcc 9.4.0, open-mpi 4.1.2, cuda 11.5.0, openblas 0.3.10)
BUILD_AND_TEST_CLI_ARGS: "--build-exe-only --no-install-schema"
CMAKE_BUILD_TYPE: Release
BUILD_GENERATOR: "--makefile"
DOCKER_REPOSITORY: geosx/pangea3-almalinux8-gcc9.4-openmpi4.1.2-cuda11.5.0-openblas0.3.18
HOST_CONFIG: host-configs/TOTAL/pangea3-gcc8.4.1-openmpi-4.1.2-wave-solver.cmake
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
HOST_ARCH: ppc64le
RUNS_ON: streak2
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
# Below this line, jobs that deploy to Google Cloud.
uses: ./.github/workflows/build_and_test.yml
with:
BUILD_AND_TEST_CLI_ARGS: ${{ matrix.BUILD_AND_TEST_CLI_ARGS }}
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
BUILD_GENERATOR: ${{ matrix.BUILD_GENERATOR }}
DOCKER_CERTS_DIR: ${{ matrix.DOCKER_CERTS_DIR }}
DOCKER_CERTS_UPDATE_COMMAND: ${{ matrix.DOCKER_CERTS_UPDATE_COMMAND }}
DOCKER_IMAGE_TAG: ${{ needs.get_docker_image_tag.outputs.DOCKER_IMAGE_TAG }}
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
DOCKER_RUN_ARGS: ${{ matrix.DOCKER_RUN_ARGS }}
ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
HOST_ARCH: ${{ matrix.HOST_ARCH }}
HOST_CONFIG: ${{ matrix.HOST_CONFIG }}
NPROC: ${{ matrix.NPROC }}
RUNS_ON: ${{ matrix.RUNS_ON }}
REQUIRED_LABEL: "ci: run CUDA builds"
secrets: inherit
remove_label:
if: always()
needs:
- cuda_builds
runs-on: ubuntu-22.04
steps:
- name: Remove the label
uses: actions-ecosystem/action-remove-labels@v1
with:
labels: 'ci: run CUDA builds'