ci: split ci into atomic workflows #5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: GPU builds | |
on: | |
pull_request: # Run workflow on PRs to the develop branch on labeled event | |
branches: | |
- develop | |
types: [ labeled ] | |
push: # Run workflow on push to the develop branch. | |
branches: | |
- develop | |
workflow_dispatch: # Workflow can be run manually | |
jobs: | |
get_docker_image_tag: | |
# if the current added label is for GPUs | |
if: "${{ github.event.label.name == 'ci: run CUDA builds'}}" | |
# Everywhere in this workflow, we use the most recent ubuntu distribution available in Github Actions | |
# to ensure maximum support of google cloud's sdk. | |
runs-on: ubuntu-22.04 | |
outputs: | |
DOCKER_IMAGE_TAG: ${{ steps.extract_docker_image_tag.outputs.DOCKER_IMAGE_TAG }} | |
steps: | |
# The TPL tag is contained in the codespaces configuration to avoid duplications. | |
- name: Checkout .devcontainer/devcontainer.json | |
uses: actions/[email protected] | |
with: | |
sparse-checkout: | | |
.devcontainer/devcontainer.json | |
sparse-checkout-cone-mode: false | |
submodules: false | |
lfs: false | |
fetch-depth: 1 | |
- name: Extract docker image tag | |
id: extract_docker_image_tag | |
run: | | |
echo "DOCKER_IMAGE_TAG=$(jq '.build.args.GEOS_TPL_TAG' -r .devcontainer/devcontainer.json)" >> "$GITHUB_OUTPUT" | |
# If the 'ci: run CUDA builds' PR label is found, the cuda jobs run immediately along side linux jobs. | |
# Note: CUDA jobs should only be run if PR is ready to merge. | |
cuda_builds: | |
needs: | |
- get_docker_image_tag | |
name: ${{ matrix.name }} | |
strategy: | |
# In-progress jobs will not be cancelled if there is a failure | |
fail-fast : false | |
matrix: | |
include: | |
- name: Ubuntu CUDA debug (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89) | |
BUILD_AND_TEST_CLI_ARGS: "--build-exe-only --no-install-schema" | |
CMAKE_BUILD_TYPE: Debug | |
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89 | |
ENABLE_HYPRE_DEVICE: CUDA | |
ENABLE_HYPRE: ON | |
ENABLE_TRILINOS: OFF | |
RUNS_ON: streak2 | |
NPROC: 8 | |
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro" | |
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates" | |
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates" | |
- name: Ubuntu CUDA (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89) | |
BUILD_AND_TEST_CLI_ARGS: "--no-install-schema" | |
CMAKE_BUILD_TYPE: Release | |
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89 | |
ENABLE_HYPRE_DEVICE: CUDA | |
ENABLE_HYPRE: ON | |
ENABLE_TRILINOS: OFF | |
RUNS_ON: streak | |
NPROC: 8 | |
DOCKER_RUN_ARGS: "--cpus=8 --memory=256g --runtime=nvidia --gpus all -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro" | |
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates" | |
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates" | |
# compiler error in ElasticFirstOrderWaveEquationSEMKernel::StressComputation::launch in call to FE_TYPE::computeFirstOrderStiffnessTermX | |
# - name: Rockylinux (8, clang 17.0.6, cuda 12.5) | |
# BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema" | |
# CMAKE_BUILD_TYPE: Release | |
# DOCKER_REPOSITORY: geosx/rockylinux8-clang17-cuda12.5 | |
# RUNS_ON: streak2 | |
# NPROC: 2 | |
# DOCKER_RUN_ARGS: "--cpus=1 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro" | |
# DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors" | |
# DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust" | |
# compiler error in ElasticFirstOrderWaveEquationSEMKernel::StressComputation::launch in call to FE_TYPE::computeFirstOrderStiffnessTermX | |
# - name: Rockylinux (8, gcc 8.5, cuda 12.5) | |
# BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema" | |
# CMAKE_BUILD_TYPE: Release | |
# DOCKER_REPOSITORY: geosx/rockylinux8-gcc8-cuda12.5 | |
# RUNS_ON: streak2 | |
# NPROC: 2 | |
# DOCKER_RUN_ARGS: "--cpus=1 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro" | |
# DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors" | |
# DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust" | |
# Below this line, jobs that deploy to Google Cloud. | |
- name: Sherlock GPU (centos 7.9.2009, gcc 10.1.0, open-mpi 4.1.2, openblas 0.3.10, cuda 11.7.1,) | |
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema" | |
CMAKE_BUILD_TYPE: Release | |
DOCKER_REPOSITORY: geosx/sherlock-gcc10.1.0-openmpi4.1.2-cuda11.7.1-openblas0.3.10-zlib1.2.11 | |
ENABLE_HYPRE_DEVICE: CUDA | |
ENABLE_HYPRE: ON | |
ENABLE_TRILINOS: OFF | |
GCP_BUCKET: geosx/Sherlock-GPU | |
HOST_CONFIG: host-configs/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda11.7.1-sm70.cmake | |
RUNS_ON: streak2 | |
NPROC: 8 | |
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro" | |
DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors" | |
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust" | |
uses: ./.github/workflows/build_and_test.yml | |
with: | |
BUILD_AND_TEST_CLI_ARGS: ${{ matrix.BUILD_AND_TEST_CLI_ARGS }} | |
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }} | |
DOCKER_CERTS_DIR: ${{ matrix.DOCKER_CERTS_DIR }} | |
DOCKER_CERTS_UPDATE_COMMAND: ${{ matrix.DOCKER_CERTS_UPDATE_COMMAND }} | |
DOCKER_IMAGE_TAG: ${{ needs.get_docker_image_tag.outputs.DOCKER_IMAGE_TAG }} | |
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }} | |
DOCKER_RUN_ARGS: ${{ matrix.DOCKER_RUN_ARGS }} | |
ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }} | |
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }} | |
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }} | |
GCP_BUCKET: ${{ matrix.GCP_BUCKET }} | |
HOST_CONFIG: ${{ matrix.HOST_CONFIG }} | |
NPROC: ${{ matrix.NPROC }} | |
RUNS_ON: ${{ matrix.RUNS_ON }} | |
REQUIRED_LABEL: "ci: run CUDA builds" | |
secrets: inherit | |
remove_label: | |
needs: | |
- cuda_builds | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Remove the label | |
uses: actions-ecosystem/action-remove-labels@v1 | |
with: | |
labels: 'ci: run CUDA builds' |