Skip to content

Commit

Permalink
Github Actions - Run CUDA jobs only when read to merge (#2659)
Browse files Browse the repository at this point in the history
* Duplicate CUDA job with different dependency/need to skip waiting for linux builds if ready to merge; reusable workflows to cut down on steps reuse

* Use bigger runners

* CUDA jobs will now only run if ready to merge label is added; switch non-debug CUDA jobs back to using default non-paid runners
  • Loading branch information
bmhan12 authored Sep 25, 2023
1 parent 0eb9b90 commit e27b443
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 125 deletions.
92 changes: 92 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
name: Build and Test Configuration
on:
workflow_call:
inputs:
GEOSX_TPL_TAG:
required: true
type: string
DOCKER_REPOSITORY:
required: true
type: string
CMAKE_BUILD_TYPE:
required: true
type: string
BUILD_AND_TEST_ARGS:
required: false
type: string
HOST_CONFIG:
required: false
type: string
ENABLE_HYPRE:
required: false
type: string
ENABLE_HYPRE_DEVICE:
required: false
type: string
ENABLE_TRILINOS:
required: false
type: string
GCP_BUCKET:
required: false
type: string
COMMIT:
required: true
type: string
OS:
required: true
type: string
secrets:
GOOGLE_CLOUD_GCP:
required: false
jobs:
build_and_test:
runs-on: ${{ inputs.OS }}
env:
DOCKER_REPOSITORY: ${{ inputs.DOCKER_REPOSITORY }}
CMAKE_BUILD_TYPE: ${{ inputs.CMAKE_BUILD_TYPE }}
BUILD_AND_TEST_ARGS: ${{ inputs.BUILD_AND_TEST_ARGS }}
ENABLE_HYPRE: ${{ inputs.ENABLE_HYPRE }}
ENABLE_HYPRE_DEVICE: ${{ inputs.ENABLE_HYPRE_DEVICE }}
ENABLE_TRILINOS: ${{ inputs.ENABLE_TRILINOS }}
GCP_BUCKET: ${{ inputs.GCP_BUCKET }}
HOST_CONFIG: ${{ inputs.HOST_CONFIG }}
COMMIT: ${{ inputs.COMMIT }}
GEOSX_TPL_TAG: ${{ inputs.GEOSX_TPL_TAG }}
steps:
- name: Checkout Repository
uses: actions/checkout@v3
with:
submodules: true
lfs: false

- id: 'auth'
if: env.GCP_BUCKET
uses: 'google-github-actions/auth@v1'
with:
credentials_json: '${{ secrets.GOOGLE_CLOUD_GCP }}'
- name: 'Set up Cloud SDK'
if: env.GCP_BUCKET
uses: 'google-github-actions/setup-gcloud@v1'
with:
version: '>= 363.0.0'

- name: Print environment
run: printenv

# Build and test only
# Builds only the geosx executable (timeout when building tests)
- name: Build and test
if: ${{ !(env.GCP_BUCKET) }}
run: ./scripts/ci_build_and_test.sh

# Build, test, uploads GEOSX and its TPL to GCP/GCS using gcloud CLI
- name: Build and test and deploy
if: env.GCP_BUCKET
run: |
source ./scripts/ci_build_and_test.sh
TMP_DIR=/tmp
GEOSX_EXPORT_DIR=GEOSX-and-TPL-${COMMIT:0:7}
docker cp -a ${CONTAINER_NAME}:${GEOSX_TPL_DIR}/.. ${TMP_DIR}/${GEOSX_EXPORT_DIR}
GEOSX_BUNDLE=${TMP_DIR}/${GEOSX_EXPORT_DIR}.tar.gz
tar czf ${GEOSX_BUNDLE} --directory=${TMP_DIR} ${GEOSX_EXPORT_DIR}
CLOUDSDK_PYTHON=python3 gsutil cp -a public-read ${GEOSX_BUNDLE} gs://${GCP_BUCKET}/
196 changes: 71 additions & 125 deletions .github/workflows/ci_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@ env:
GEOSX_TPL_TAG: 238-63

jobs:

# Matrix jobs will be cancelled if PR is a draft.
# PR status must be "Open" to run CI.
check_pull_request_is_not_a_draft:
# We use the most recent ubuntu distribution available in Github Actions to ensure maximum support of google cloud's sdk.
runs-on: ubuntu-22.04
# Workaround due to github actions variable scope restrictions passing to
# another job's env
outputs:
GEOSX_TPL_TAG: ${{ env.GEOSX_TPL_TAG}}
steps:
- name: Check that the PR is not a draft (cancel rest of jobs otherwise)
run: |
Expand Down Expand Up @@ -85,11 +90,8 @@ jobs:
BUILD_AND_TEST_ARGS: --test-documentation
run: ./scripts/ci_build_and_test.sh


linux_builds:
name: ${{ matrix.name }}
# runs-on: ubuntu-22.04
runs-on: ${{ matrix.os }}
needs: [check_pull_request_is_not_a_draft]
strategy:

Expand Down Expand Up @@ -142,67 +144,28 @@ jobs:
OS: ubuntu-22.04
CMAKE_BUILD_TYPE: Release
GCP_BUCKET: geosx/ubuntu22.04-gcc11

steps:
- name: Checkout Repository
uses: actions/checkout@v3
with:
submodules: true
lfs: false

- id: 'auth'
if: matrix.GCP_BUCKET
uses: 'google-github-actions/auth@v1'
with:
credentials_json: '${{ secrets.GOOGLE_CLOUD_GCP }}'
- name: 'Set up Cloud SDK'
if: matrix.GCP_BUCKET
uses: 'google-github-actions/setup-gcloud@v1'
with:
version: '>= 363.0.0'

- name: Print environment
run: printenv

# Build and test only
# Builds only the geosx executable (timeout when building tests)
- name: Build and test
if: ${{ !(matrix.GCP_BUCKET) }}
env:
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
BUILD_AND_TEST_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
run: ./scripts/ci_build_and_test.sh

# Build, test, uploads GEOSX and its TPL to GCP/GCS using gcloud CLI
- name: Build and test and deploy
if: matrix.GCP_BUCKET
env:
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
BUILD_AND_TEST_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }}
HOST_CONFIG: ${{ matrix.HOST_CONFIG }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
GCP_BUCKET: ${{ matrix.GCP_BUCKET }}
COMMIT: ${{ github.event.pull_request.head.sha }}
run: |
source ./scripts/ci_build_and_test.sh
TMP_DIR=/tmp
GEOSX_EXPORT_DIR=GEOSX-and-TPL-${COMMIT:0:7}
docker cp -a ${CONTAINER_NAME}:${GEOSX_TPL_DIR}/.. ${TMP_DIR}/${GEOSX_EXPORT_DIR}
GEOSX_BUNDLE=${TMP_DIR}/${GEOSX_EXPORT_DIR}.tar.gz
tar czf ${GEOSX_BUNDLE} --directory=${TMP_DIR} ${GEOSX_EXPORT_DIR}
CLOUDSDK_PYTHON=python3 gsutil cp -a public-read ${GEOSX_BUNDLE} gs://${GCP_BUCKET}/
large_cuda_builds:
uses: ./.github/workflows/build_and_test.yml
with:
GEOSX_TPL_TAG: ${{ needs.check_pull_request_is_not_a_draft.outputs.GEOSX_TPL_TAG }}
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
BUILD_AND_TEST_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }}
HOST_CONFIG: ${{ matrix.HOST_CONFIG }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
GCP_BUCKET: ${{ matrix.GCP_BUCKET }}
COMMIT: ${{ github.event.pull_request.head.sha }}
OS: ${{ matrix.OS }}
secrets: inherit

# If "flag: ready to be merged" PR label found, cuda jobs run
# immediately along side linux jobs.
# CUDA jobs should only be run if PR is ready to merge.
cuda_builds_merge_ready:
name: ${{ matrix.name }}
# runs-on: ubuntu-22.04
runs-on: ${{ matrix.os }}
needs: [linux_builds]
needs: [check_pull_request_is_not_a_draft]
if: contains( github.event.pull_request.labels.*.name, format('flag{0} ready to be merged', ':'))
strategy:

# In-progress jobs will not be cancelled if there is a failure
Expand All @@ -220,7 +183,7 @@ jobs:

- name: Ubuntu CUDA (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89
OS: Runner_4core_16GB
OS: ubuntu-22.04
CMAKE_BUILD_TYPE: Release
BUILD_AND_TEST_ARGS: "--disable-unit-tests --disable-schema-deployment"
ENABLE_HYPRE: ON
Expand All @@ -229,78 +192,61 @@ jobs:

- name: Centos (7.7, gcc 8.3.1, open-mpi 1.10.7, cuda 11.8.89)
DOCKER_REPOSITORY: geosx/centos7.7-gcc8.3.1-cuda11.8.89
OS: Runner_4core_16GB
OS: ubuntu-22.04
CMAKE_BUILD_TYPE: Release
BUILD_AND_TEST_ARGS: "--disable-unit-tests --disable-schema-deployment"

# Matrix job that deploys to Google Cloud
- name: Pecan GPU (centos 7.7, gcc 8.2.0, open-mpi 4.0.1, mkl 2019.5, cuda 11.5.119)
DOCKER_REPOSITORY: geosx/pecan-gpu-gcc8.2.0-openmpi4.0.1-mkl2019.5-cuda11.5.119
OS: Runner_4core_16GB
OS: ubuntu-22.04
CMAKE_BUILD_TYPE: Release
BUILD_AND_TEST_ARGS: "--build-exe-only --disable-unit-tests --disable-schema-deployment"
HOST_CONFIG: host-configs/TOTAL/pecan-GPU.cmake
GCP_BUCKET: geosx/Pecan-GPU

steps:
- name: Checkout Repository
uses: actions/checkout@v3
with:
submodules: true
lfs: false

- id: 'auth'
if: matrix.GCP_BUCKET
uses: 'google-github-actions/auth@v1'
with:
credentials_json: '${{ secrets.GOOGLE_CLOUD_GCP }}'
- name: 'Set up Cloud SDK'
if: matrix.GCP_BUCKET
uses: 'google-github-actions/setup-gcloud@v1'
with:
version: '>= 363.0.0'

- name: Print environment
run: printenv

# Build and test only
# Builds only the geosx executable (timeout when building tests)
- name: Build and test
if: ${{ !(matrix.GCP_BUCKET) }}
env:
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
BUILD_AND_TEST_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
run: ./scripts/ci_build_and_test.sh

# Build, test, uploads GEOSX and its TPL to GCP/GCS using gcloud CLI
- name: Build and test and deploy
if: matrix.GCP_BUCKET
env:
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
BUILD_AND_TEST_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }}
HOST_CONFIG: ${{ matrix.HOST_CONFIG }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
GCP_BUCKET: ${{ matrix.GCP_BUCKET }}
COMMIT: ${{ github.event.pull_request.head.sha }}
run: |
source ./scripts/ci_build_and_test.sh
TMP_DIR=/tmp
GEOSX_EXPORT_DIR=GEOSX-and-TPL-${COMMIT:0:7}
docker cp -a ${CONTAINER_NAME}:${GEOSX_TPL_DIR}/.. ${TMP_DIR}/${GEOSX_EXPORT_DIR}
GEOSX_BUNDLE=${TMP_DIR}/${GEOSX_EXPORT_DIR}.tar.gz
tar czf ${GEOSX_BUNDLE} --directory=${TMP_DIR} ${GEOSX_EXPORT_DIR}
CLOUDSDK_PYTHON=python3 gsutil cp -a public-read ${GEOSX_BUNDLE} gs://${GCP_BUCKET}/
# Convenience job - passes when all other jobs have passed.
uses: ./.github/workflows/build_and_test.yml
with:
GEOSX_TPL_TAG: ${{ needs.check_pull_request_is_not_a_draft.outputs.GEOSX_TPL_TAG }}
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
BUILD_AND_TEST_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }}
HOST_CONFIG: ${{ matrix.HOST_CONFIG }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
GCP_BUCKET: ${{ matrix.GCP_BUCKET }}
COMMIT: ${{ github.event.pull_request.head.sha }}
OS: ${{ matrix.OS }}
secrets: inherit

# Convenience job - passes when all other jobs have passed
# (must pass the CUDA jobs)
check_that_all_jobs_succeeded:
runs-on: ubuntu-22.04
needs: [check_pull_request_is_not_a_draft, check_pull_request_is_assigned, check_submodules, code_style, documentation, linux_builds, large_cuda_builds]
needs:
- check_pull_request_is_not_a_draft
- check_pull_request_is_assigned
- check_submodules
- code_style
- documentation
- linux_builds
- cuda_builds_merge_ready
if: ${{ always() }}
env:
RETURN_VAL: |
${{
needs.check_pull_request_is_not_a_draft.result == 'success' &&
needs.check_pull_request_is_assigned.result == 'success' &&
needs.check_submodules.result == 'success' &&
needs.code_style.result == 'success' &&
needs.documentation.result == 'success' &&
needs.linux_builds.result == 'success' &&
needs.cuda_builds_merge_ready.result == 'success'
}}
steps:
- name: Success
- name: PR Success
if: ${{ contains(env.RETURN_VAL, 'true') }}
run: "true"
- name: PR Failure
if: ${{ contains(env.RETURN_VAL, 'false') }}
run: "false"

0 comments on commit e27b443

Please sign in to comment.