diff --git a/.github/workflows/Intel_Parallelworks_CI.yaml b/.github/workflows/Intel_Parallelworks_CI.yaml new file mode 100644 index 00000000..d0413305 --- /dev/null +++ b/.github/workflows/Intel_Parallelworks_CI.yaml @@ -0,0 +1,151 @@ +name: Compile SHiELD SOLO and run tests + +# This GitHub Action Workflow is running on the cloud devcimultiintel cluster +# The tests are run inside of a container with the following software/libraries: +# -intel: 2023.2.0 +# -hdf5: 1.14.0 +# -netcdf-c: 4.9.2 +# -netcdf-fortran: 4.6.0 +# -cmake +# -libyaml + +on: + pull_request: + branches: + - main + +jobs: + checkout: + runs-on: [self-hosted, devcimultiintel] + name: Checkout Code + steps: + # It can take a long time (5-15 minutes) to spinup nodes + # so this salloc will prompt 46 nodes to startup and stay active for 20 min + # this is enough nodes for the first 17 tests to run in parallel, and we + # have 17 runners configured. + - run: salloc --partition=p2 -N 46 -J SP_$GITHUB_REF sleep 20m & + - run: /contrib/fv3/SHiELD_physics_CI/checkout.sh $GITHUB_REF + + build: + runs-on: [self-hosted,devcimultiintel] + name: SOLO SHiELD build + needs: [checkout] + strategy: + fail-fast: true + max-parallel: 3 + matrix: + runpath: [/contrib/fv3/SHiELD_physics_CI/] + runscript: [swcompile.sh, nhcompile.sh, hydrocompile.sh] + steps: + - env: + RUNPATH: ${{ matrix.runpath }} + RUNSCRIPT: ${{ matrix.runscript }} + run: $RUNPATH/$RUNSCRIPT $GITHUB_REF + + test: + runs-on: [self-hosted, devcimultiintel] + name: SOLO SHiELD test suite + needs: [checkout, build] + strategy: + fail-fast: false + max-parallel: 17 + matrix: + runpath: [/contrib/fv3/SHiELD_physics_CI/] + runscript: + # These are placed in order of largest to smallest jobs + #layout 8,8 needs 8 nodes on dvcimultiintel cluster + - C512r20.solo.superC.sh + - C768.sw.BTwave.sh + #layout 4,8 needs 4 nodes on dvcimultiintel cluster + - C256r20.solo.superC.sh + - C384.sw.BLvortex.sh + #layout 4,4 needs 2 nodes on dvcimultiintel cluster + - C128r20.solo.superC.sh + - C128r3.solo.TC.d1.sh + - C128r3.solo.TC.h6.sh + - C128r3.solo.TC.sh + - C128r3.solo.TC.tr8.sh + - C192.sw.BLvortex.sh + - C192.sw.BTwave.sh + - C192.sw.modon.sh + - C384.sw.BTwave.sh + #layout 4,1 and 2,2 need 1 node on dvcimultiintel cluster + - C96.solo.BCdry.hyd.sh + - C96.solo.BCdry.sh + - C96.solo.BCmoist.hyd.d3.sh + - C96.solo.BCmoist.hyd.sh + - C96.solo.BCmoist.nhK.sh + - C96.solo.BCmoist.sh + - C96.solo.mtn_rest.hyd.diff2.sh + - C96.solo.mtn_rest.hyd.sh + - C96.solo.mtn_rest.nonmono.diff2.sh + - C96.solo.mtn_rest.sh + - C96.sw.BLvortex.sh + - C96.sw.BTwave.sh + - C96.sw.modon.sh + - C96.sw.RHwave.sh + - d96_1k.solo.mtn_rest_shear.olddamp.sh + - d96_1k.solo.mtn_rest_shear.sh + - d96_1k.solo.mtn_schar.mono.sh + - d96_1k.solo.mtn_schar.sh + - d96_2k.solo.bubble.n0.sh + - d96_2k.solo.bubble.nhK.sh + - d96_2k.solo.bubble.sh + - d96_500m.solo.mtn_schar.sh + steps: + # This will end the slurm job started in the checkout job + - run: scancel -n SP_$GITHUB_REF + - env: + RUNPATH: ${{ matrix.runpath }} + RUNSCRIPT: ${{ matrix.runscript }} + run: $RUNPATH/$RUNSCRIPT $GITHUB_REF + shutdown: + runs-on: [self-hosted, devcimultiintel] + name: Shutdown Processes + if: always() + needs: [checkout, build, test] + strategy: + fail-fast: false + max-parallel: 17 + matrix: + test: + - C512r20.solo.superC + - C768.sw.BTwave + - C256r20.solo.superC + - C384.sw.BLvortex + - C128r20.solo.superC + - C128r3.solo.TC.d1 + - C128r3.solo.TC.h6 + - C128r3.solo.TC + - C128r3.solo.TC.tr8 + - C192.sw.BLvortex + - C192.sw.BTwave + - C192.sw.modon + - C384.sw.BTwave + - C96.solo.BCdry.hyd + - C96.solo.BCdry + - C96.solo.BCmoist.hyd.d3 + - C96.solo.BCmoist.hyd + - C96.solo.BCmoist.nhK + - C96.solo.BCmoist + - C96.solo.mtn_rest.hyd.diff2 + - C96.solo.mtn_rest.hyd + - C96.solo.mtn_rest.nonmono.diff2 + - C96.solo.mtn_rest + - C96.sw.BLvortex + - C96.sw.BTwave + - C96.sw.modon + - C96.sw.RHwave + - d96_1k.solo.mtn_rest_shear.olddamp + - d96_1k.solo.mtn_rest_shear + - d96_1k.solo.mtn_schar.mono + - d96_1k.solo.mtn_schar + - d96_2k.solo.bubble.n0 + - d96_2k.solo.bubble.nhK + - d96_2k.solo.bubble + - d96_500m.solo.mtn_schar + steps: + - run: scancel -n SP_$GITHUB_REF + - env: + TEST: ${{ matrix.test }} + run: scancel -n SP_$GITHUB_REF_$TEST diff --git a/.github/workflows/daily_cleanup_parallelworks.yaml b/.github/workflows/daily_cleanup_parallelworks.yaml new file mode 100644 index 00000000..fc289a69 --- /dev/null +++ b/.github/workflows/daily_cleanup_parallelworks.yaml @@ -0,0 +1,17 @@ +name: Old Build Cleanup + +# This GitHub Action Workflow is runing on the devcimultiintel cluster +# This will delete all build directories older than 30 days +# Build directories are on the cloud at /contrib/fv3/2023.2.0 + +on: + schedule: + # run daily at midnight + - cron: '0 0 * * *' + +jobs: + delete: + runs-on: [self-hosted, devcimultiintel] + name: Delete Builds + steps: + - run: find /contrib/fv3/2023.2.0/SHiELD_physics/refs/pull -maxdepth 1 -mindepth 1 -mtime +30 -type d -print -delete