ci: split ci into atomic workflows #6

Workflow file for this run

.github/workflows/gpu_builds.yml at 7b2bf78

	name: GPU builds

	on:
	pull_request: # Run workflow on PRs to the develop branch on labeled event
	branches:
	- develop
	types: [ labeled ]
	push: # Run workflow on push to the develop branch.
	branches:
	- develop
	workflow_dispatch: # Workflow can be run manually

	jobs:
	get_docker_image_tag:
	# if the current added label is for GPUs
	if: "${{ github.event.label.name == 'ci: run CUDA builds'}}"
	# Everywhere in this workflow, we use the most recent ubuntu distribution available in Github Actions
	# to ensure maximum support of google cloud's sdk.
	runs-on: ubuntu-22.04
	outputs:
	DOCKER_IMAGE_TAG: ${{ steps.extract_docker_image_tag.outputs.DOCKER_IMAGE_TAG }}
	steps:
	# The TPL tag is contained in the codespaces configuration to avoid duplications.
	- name: Checkout .devcontainer/devcontainer.json
	uses: actions/[email protected]
	with:
	sparse-checkout: \|
	.devcontainer/devcontainer.json
	sparse-checkout-cone-mode: false
	submodules: false
	lfs: false
	fetch-depth: 1
	- name: Extract docker image tag
	id: extract_docker_image_tag
	run: \|
	echo "DOCKER_IMAGE_TAG=$(jq '.build.args.GEOS_TPL_TAG' -r .devcontainer/devcontainer.json)" >> "$GITHUB_OUTPUT"

	# If the 'ci: run CUDA builds' PR label is found, the cuda jobs run immediately along side linux jobs.
	# Note: CUDA jobs should only be run if PR is ready to merge.
	cuda_builds:
	needs:
	- get_docker_image_tag
	name: ${{ matrix.name }}
	strategy:
	# In-progress jobs will not be cancelled if there is a failure
	fail-fast : false
	matrix:
	include:
	- name: Ubuntu CUDA debug (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
	BUILD_AND_TEST_CLI_ARGS: "--build-exe-only --no-install-schema"
	CMAKE_BUILD_TYPE: Debug
	DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89
	ENABLE_HYPRE_DEVICE: CUDA
	ENABLE_HYPRE: ON
	ENABLE_TRILINOS: OFF
	RUNS_ON: streak2
	NPROC: 8
	DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
	DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
	DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"

	- name: Ubuntu CUDA (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
	BUILD_AND_TEST_CLI_ARGS: "--no-install-schema"
	CMAKE_BUILD_TYPE: Release
	DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89
	ENABLE_HYPRE_DEVICE: CUDA
	ENABLE_HYPRE: ON
	ENABLE_TRILINOS: OFF
	RUNS_ON: streak
	NPROC: 8
	DOCKER_RUN_ARGS: "--cpus=8 --memory=256g --runtime=nvidia --gpus all -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
	DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
	DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"

	# compiler error in ElasticFirstOrderWaveEquationSEMKernel::StressComputation::launch in call to FE_TYPE::computeFirstOrderStiffnessTermX
	# - name: Rockylinux (8, clang 17.0.6, cuda 12.5)
	# BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
	# CMAKE_BUILD_TYPE: Release
	# DOCKER_REPOSITORY: geosx/rockylinux8-clang17-cuda12.5
	# RUNS_ON: streak2
	# NPROC: 2
	# DOCKER_RUN_ARGS: "--cpus=1 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
	# DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
	# DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"

	# compiler error in ElasticFirstOrderWaveEquationSEMKernel::StressComputation::launch in call to FE_TYPE::computeFirstOrderStiffnessTermX
	# - name: Rockylinux (8, gcc 8.5, cuda 12.5)
	# BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
	# CMAKE_BUILD_TYPE: Release
	# DOCKER_REPOSITORY: geosx/rockylinux8-gcc8-cuda12.5
	# RUNS_ON: streak2
	# NPROC: 2
	# DOCKER_RUN_ARGS: "--cpus=1 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
	# DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
	# DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"

	# Below this line, jobs that deploy to Google Cloud.
	- name: Sherlock GPU (centos 7.9.2009, gcc 10.1.0, open-mpi 4.1.2, openblas 0.3.10, cuda 11.7.1,)
	BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
	CMAKE_BUILD_TYPE: Release
	DOCKER_REPOSITORY: geosx/sherlock-gcc10.1.0-openmpi4.1.2-cuda11.7.1-openblas0.3.10-zlib1.2.11
	ENABLE_HYPRE_DEVICE: CUDA
	ENABLE_HYPRE: ON
	ENABLE_TRILINOS: OFF
	GCP_BUCKET: geosx/Sherlock-GPU
	HOST_CONFIG: host-configs/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda11.7.1-sm70.cmake
	RUNS_ON: streak2
	NPROC: 8
	DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
	DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
	DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"

	uses: ./.github/workflows/build_and_test.yml
	with:
	BUILD_AND_TEST_CLI_ARGS: ${{ matrix.BUILD_AND_TEST_CLI_ARGS }}
	CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
	DOCKER_CERTS_DIR: ${{ matrix.DOCKER_CERTS_DIR }}
	DOCKER_CERTS_UPDATE_COMMAND: ${{ matrix.DOCKER_CERTS_UPDATE_COMMAND }}
	DOCKER_IMAGE_TAG: ${{ needs.get_docker_image_tag.outputs.DOCKER_IMAGE_TAG }}
	DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
	DOCKER_RUN_ARGS: ${{ matrix.DOCKER_RUN_ARGS }}
	ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }}
	ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
	ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
	GCP_BUCKET: ${{ matrix.GCP_BUCKET }}
	HOST_CONFIG: ${{ matrix.HOST_CONFIG }}
	NPROC: ${{ matrix.NPROC }}
	RUNS_ON: ${{ matrix.RUNS_ON }}
	REQUIRED_LABEL: "ci: run CUDA builds"
	secrets: inherit

	remove_label:
	needs:
	- cuda_builds
	runs-on: ubuntu-22.04
	steps:
	- name: Remove the label
	uses: actions-ecosystem/action-remove-labels@v1
	with:
	labels: 'ci: run CUDA builds'

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ci: split ci into atomic workflows #6

Workflow file

ci: split ci into atomic workflows #6

Jobs

Run details

Workflow file for this run