From e27b4434bbb36bc6d1d4bc32baa77372076455e3 Mon Sep 17 00:00:00 2001 From: Brian Han Date: Mon, 25 Sep 2023 12:42:42 -0700 Subject: [PATCH] Github Actions - Run CUDA jobs only when read to merge (#2659) * Duplicate CUDA job with different dependency/need to skip waiting for linux builds if ready to merge; reusable workflows to cut down on steps reuse * Use bigger runners * CUDA jobs will now only run if ready to merge label is added; switch non-debug CUDA jobs back to using default non-paid runners --- .github/workflows/build_and_test.yml | 92 +++++++++++++ .github/workflows/ci_tests.yml | 196 ++++++++++----------------- 2 files changed, 163 insertions(+), 125 deletions(-) create mode 100644 .github/workflows/build_and_test.yml diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml new file mode 100644 index 00000000000..4b2bcd14ba0 --- /dev/null +++ b/.github/workflows/build_and_test.yml @@ -0,0 +1,92 @@ +name: Build and Test Configuration +on: + workflow_call: + inputs: + GEOSX_TPL_TAG: + required: true + type: string + DOCKER_REPOSITORY: + required: true + type: string + CMAKE_BUILD_TYPE: + required: true + type: string + BUILD_AND_TEST_ARGS: + required: false + type: string + HOST_CONFIG: + required: false + type: string + ENABLE_HYPRE: + required: false + type: string + ENABLE_HYPRE_DEVICE: + required: false + type: string + ENABLE_TRILINOS: + required: false + type: string + GCP_BUCKET: + required: false + type: string + COMMIT: + required: true + type: string + OS: + required: true + type: string + secrets: + GOOGLE_CLOUD_GCP: + required: false +jobs: + build_and_test: + runs-on: ${{ inputs.OS }} + env: + DOCKER_REPOSITORY: ${{ inputs.DOCKER_REPOSITORY }} + CMAKE_BUILD_TYPE: ${{ inputs.CMAKE_BUILD_TYPE }} + BUILD_AND_TEST_ARGS: ${{ inputs.BUILD_AND_TEST_ARGS }} + ENABLE_HYPRE: ${{ inputs.ENABLE_HYPRE }} + ENABLE_HYPRE_DEVICE: ${{ inputs.ENABLE_HYPRE_DEVICE }} + ENABLE_TRILINOS: ${{ inputs.ENABLE_TRILINOS }} + GCP_BUCKET: ${{ inputs.GCP_BUCKET }} + HOST_CONFIG: ${{ inputs.HOST_CONFIG }} + COMMIT: ${{ inputs.COMMIT }} + GEOSX_TPL_TAG: ${{ inputs.GEOSX_TPL_TAG }} + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + with: + submodules: true + lfs: false + + - id: 'auth' + if: env.GCP_BUCKET + uses: 'google-github-actions/auth@v1' + with: + credentials_json: '${{ secrets.GOOGLE_CLOUD_GCP }}' + - name: 'Set up Cloud SDK' + if: env.GCP_BUCKET + uses: 'google-github-actions/setup-gcloud@v1' + with: + version: '>= 363.0.0' + + - name: Print environment + run: printenv + + # Build and test only + # Builds only the geosx executable (timeout when building tests) + - name: Build and test + if: ${{ !(env.GCP_BUCKET) }} + run: ./scripts/ci_build_and_test.sh + + # Build, test, uploads GEOSX and its TPL to GCP/GCS using gcloud CLI + - name: Build and test and deploy + if: env.GCP_BUCKET + run: | + source ./scripts/ci_build_and_test.sh + TMP_DIR=/tmp + GEOSX_EXPORT_DIR=GEOSX-and-TPL-${COMMIT:0:7} + docker cp -a ${CONTAINER_NAME}:${GEOSX_TPL_DIR}/.. ${TMP_DIR}/${GEOSX_EXPORT_DIR} + GEOSX_BUNDLE=${TMP_DIR}/${GEOSX_EXPORT_DIR}.tar.gz + tar czf ${GEOSX_BUNDLE} --directory=${TMP_DIR} ${GEOSX_EXPORT_DIR} + CLOUDSDK_PYTHON=python3 gsutil cp -a public-read ${GEOSX_BUNDLE} gs://${GCP_BUCKET}/ diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 81ecacfa102..0a327822c9b 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -10,11 +10,16 @@ env: GEOSX_TPL_TAG: 238-63 jobs: + # Matrix jobs will be cancelled if PR is a draft. # PR status must be "Open" to run CI. check_pull_request_is_not_a_draft: # We use the most recent ubuntu distribution available in Github Actions to ensure maximum support of google cloud's sdk. runs-on: ubuntu-22.04 + # Workaround due to github actions variable scope restrictions passing to + # another job's env + outputs: + GEOSX_TPL_TAG: ${{ env.GEOSX_TPL_TAG}} steps: - name: Check that the PR is not a draft (cancel rest of jobs otherwise) run: | @@ -85,11 +90,8 @@ jobs: BUILD_AND_TEST_ARGS: --test-documentation run: ./scripts/ci_build_and_test.sh - linux_builds: name: ${{ matrix.name }} -# runs-on: ubuntu-22.04 - runs-on: ${{ matrix.os }} needs: [check_pull_request_is_not_a_draft] strategy: @@ -142,67 +144,28 @@ jobs: OS: ubuntu-22.04 CMAKE_BUILD_TYPE: Release GCP_BUCKET: geosx/ubuntu22.04-gcc11 - - steps: - - name: Checkout Repository - uses: actions/checkout@v3 - with: - submodules: true - lfs: false - - - id: 'auth' - if: matrix.GCP_BUCKET - uses: 'google-github-actions/auth@v1' - with: - credentials_json: '${{ secrets.GOOGLE_CLOUD_GCP }}' - - name: 'Set up Cloud SDK' - if: matrix.GCP_BUCKET - uses: 'google-github-actions/setup-gcloud@v1' - with: - version: '>= 363.0.0' - - - name: Print environment - run: printenv - - # Build and test only - # Builds only the geosx executable (timeout when building tests) - - name: Build and test - if: ${{ !(matrix.GCP_BUCKET) }} - env: - DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }} - CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }} - BUILD_AND_TEST_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }} - ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }} - ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }} - ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }} - run: ./scripts/ci_build_and_test.sh - - # Build, test, uploads GEOSX and its TPL to GCP/GCS using gcloud CLI - - name: Build and test and deploy - if: matrix.GCP_BUCKET - env: - DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }} - CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }} - BUILD_AND_TEST_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }} - HOST_CONFIG: ${{ matrix.HOST_CONFIG }} - ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }} - ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }} - GCP_BUCKET: ${{ matrix.GCP_BUCKET }} - COMMIT: ${{ github.event.pull_request.head.sha }} - run: | - source ./scripts/ci_build_and_test.sh - TMP_DIR=/tmp - GEOSX_EXPORT_DIR=GEOSX-and-TPL-${COMMIT:0:7} - docker cp -a ${CONTAINER_NAME}:${GEOSX_TPL_DIR}/.. ${TMP_DIR}/${GEOSX_EXPORT_DIR} - GEOSX_BUNDLE=${TMP_DIR}/${GEOSX_EXPORT_DIR}.tar.gz - tar czf ${GEOSX_BUNDLE} --directory=${TMP_DIR} ${GEOSX_EXPORT_DIR} - CLOUDSDK_PYTHON=python3 gsutil cp -a public-read ${GEOSX_BUNDLE} gs://${GCP_BUCKET}/ - - large_cuda_builds: + uses: ./.github/workflows/build_and_test.yml + with: + GEOSX_TPL_TAG: ${{ needs.check_pull_request_is_not_a_draft.outputs.GEOSX_TPL_TAG }} + DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }} + CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }} + BUILD_AND_TEST_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }} + HOST_CONFIG: ${{ matrix.HOST_CONFIG }} + ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }} + ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }} + ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }} + GCP_BUCKET: ${{ matrix.GCP_BUCKET }} + COMMIT: ${{ github.event.pull_request.head.sha }} + OS: ${{ matrix.OS }} + secrets: inherit + + # If "flag: ready to be merged" PR label found, cuda jobs run + # immediately along side linux jobs. + # CUDA jobs should only be run if PR is ready to merge. + cuda_builds_merge_ready: name: ${{ matrix.name }} -# runs-on: ubuntu-22.04 - runs-on: ${{ matrix.os }} - needs: [linux_builds] + needs: [check_pull_request_is_not_a_draft] + if: contains( github.event.pull_request.labels.*.name, format('flag{0} ready to be merged', ':')) strategy: # In-progress jobs will not be cancelled if there is a failure @@ -220,7 +183,7 @@ jobs: - name: Ubuntu CUDA (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89) DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89 - OS: Runner_4core_16GB + OS: ubuntu-22.04 CMAKE_BUILD_TYPE: Release BUILD_AND_TEST_ARGS: "--disable-unit-tests --disable-schema-deployment" ENABLE_HYPRE: ON @@ -229,78 +192,61 @@ jobs: - name: Centos (7.7, gcc 8.3.1, open-mpi 1.10.7, cuda 11.8.89) DOCKER_REPOSITORY: geosx/centos7.7-gcc8.3.1-cuda11.8.89 - OS: Runner_4core_16GB + OS: ubuntu-22.04 CMAKE_BUILD_TYPE: Release BUILD_AND_TEST_ARGS: "--disable-unit-tests --disable-schema-deployment" # Matrix job that deploys to Google Cloud - name: Pecan GPU (centos 7.7, gcc 8.2.0, open-mpi 4.0.1, mkl 2019.5, cuda 11.5.119) DOCKER_REPOSITORY: geosx/pecan-gpu-gcc8.2.0-openmpi4.0.1-mkl2019.5-cuda11.5.119 - OS: Runner_4core_16GB + OS: ubuntu-22.04 CMAKE_BUILD_TYPE: Release BUILD_AND_TEST_ARGS: "--build-exe-only --disable-unit-tests --disable-schema-deployment" HOST_CONFIG: host-configs/TOTAL/pecan-GPU.cmake GCP_BUCKET: geosx/Pecan-GPU - - steps: - - name: Checkout Repository - uses: actions/checkout@v3 - with: - submodules: true - lfs: false - - - id: 'auth' - if: matrix.GCP_BUCKET - uses: 'google-github-actions/auth@v1' - with: - credentials_json: '${{ secrets.GOOGLE_CLOUD_GCP }}' - - name: 'Set up Cloud SDK' - if: matrix.GCP_BUCKET - uses: 'google-github-actions/setup-gcloud@v1' - with: - version: '>= 363.0.0' - - - name: Print environment - run: printenv - - # Build and test only - # Builds only the geosx executable (timeout when building tests) - - name: Build and test - if: ${{ !(matrix.GCP_BUCKET) }} - env: - DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }} - CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }} - BUILD_AND_TEST_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }} - ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }} - ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }} - ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }} - run: ./scripts/ci_build_and_test.sh - - # Build, test, uploads GEOSX and its TPL to GCP/GCS using gcloud CLI - - name: Build and test and deploy - if: matrix.GCP_BUCKET - env: - DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }} - CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }} - BUILD_AND_TEST_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }} - HOST_CONFIG: ${{ matrix.HOST_CONFIG }} - ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }} - ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }} - GCP_BUCKET: ${{ matrix.GCP_BUCKET }} - COMMIT: ${{ github.event.pull_request.head.sha }} - run: | - source ./scripts/ci_build_and_test.sh - TMP_DIR=/tmp - GEOSX_EXPORT_DIR=GEOSX-and-TPL-${COMMIT:0:7} - docker cp -a ${CONTAINER_NAME}:${GEOSX_TPL_DIR}/.. ${TMP_DIR}/${GEOSX_EXPORT_DIR} - GEOSX_BUNDLE=${TMP_DIR}/${GEOSX_EXPORT_DIR}.tar.gz - tar czf ${GEOSX_BUNDLE} --directory=${TMP_DIR} ${GEOSX_EXPORT_DIR} - CLOUDSDK_PYTHON=python3 gsutil cp -a public-read ${GEOSX_BUNDLE} gs://${GCP_BUCKET}/ - - # Convenience job - passes when all other jobs have passed. + uses: ./.github/workflows/build_and_test.yml + with: + GEOSX_TPL_TAG: ${{ needs.check_pull_request_is_not_a_draft.outputs.GEOSX_TPL_TAG }} + DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }} + CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }} + BUILD_AND_TEST_ARGS: ${{ matrix.BUILD_AND_TEST_ARGS }} + HOST_CONFIG: ${{ matrix.HOST_CONFIG }} + ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }} + ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }} + ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }} + GCP_BUCKET: ${{ matrix.GCP_BUCKET }} + COMMIT: ${{ github.event.pull_request.head.sha }} + OS: ${{ matrix.OS }} + secrets: inherit + + # Convenience job - passes when all other jobs have passed + # (must pass the CUDA jobs) check_that_all_jobs_succeeded: runs-on: ubuntu-22.04 - needs: [check_pull_request_is_not_a_draft, check_pull_request_is_assigned, check_submodules, code_style, documentation, linux_builds, large_cuda_builds] + needs: + - check_pull_request_is_not_a_draft + - check_pull_request_is_assigned + - check_submodules + - code_style + - documentation + - linux_builds + - cuda_builds_merge_ready + if: ${{ always() }} + env: + RETURN_VAL: | + ${{ + needs.check_pull_request_is_not_a_draft.result == 'success' && + needs.check_pull_request_is_assigned.result == 'success' && + needs.check_submodules.result == 'success' && + needs.code_style.result == 'success' && + needs.documentation.result == 'success' && + needs.linux_builds.result == 'success' && + needs.cuda_builds_merge_ready.result == 'success' + }} steps: - - name: Success + - name: PR Success + if: ${{ contains(env.RETURN_VAL, 'true') }} run: "true" + - name: PR Failure + if: ${{ contains(env.RETURN_VAL, 'false') }} + run: "false"