From c045ff51e0d37da10124dda886829842ec81b5d4 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Wed, 7 Feb 2024 13:15:21 -0800 Subject: [PATCH] use streak.llnl.gov as runner for GHA (#2873) * use pip_system_certs...this time in right context * add run args for docker image * clean workspace from inside the docker container * add nvidia runtime to docker args --- .github/workflows/build_and_test.yml | 43 ++++++- .github/workflows/ci_tests.yml | 13 +- scripts/buildOrTest.py | 114 ------------------ scripts/ci_build_and_test_in_container.sh | 36 +++++- .../constitutive/solid/SolidBase.hpp | 2 +- 5 files changed, 79 insertions(+), 129 deletions(-) delete mode 100755 scripts/buildOrTest.py diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index f9b423b95a2..2da2c18ea9b 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -22,6 +22,9 @@ on: DOCKER_REPOSITORY: required: true type: string + DOCKER_RUN_ARGS: + required: false + type: string ENABLE_HYPRE: required: false type: string @@ -52,7 +55,7 @@ jobs: runs-on: ${{ inputs.RUNS_ON }} steps: - name: Checkout Repository - uses: actions/checkout@v3 + uses: actions/checkout@v4.1.1 with: submodules: true lfs: ${{ inputs.BUILD_TYPE == 'integrated_tests' }} @@ -60,14 +63,14 @@ jobs: - id: 'auth' if: ${{ inputs.GCP_BUCKET || inputs.USE_SCCACHE }} - uses: 'google-github-actions/auth@v1' + uses: 'google-github-actions/auth@v2.1.0' with: credentials_json: '${{ secrets.GOOGLE_CLOUD_GCP }}' create_credentials_file: true - name: 'Set up Cloud SDK' if: inputs.GCP_BUCKET - uses: 'google-github-actions/setup-gcloud@v1' + uses: 'google-github-actions/setup-gcloud@v2.1.0' with: version: '>= 363.0.0' @@ -81,6 +84,10 @@ jobs: docker_args=() script_args=() + + + docker_args+=(${{ inputs.DOCKER_RUN_ARGS }}) + COMMIT=${{ github.event.pull_request.head.sha }} SHORT_COMMIT=${COMMIT:0:7} script_args+=(--install-dir-basename GEOSX-${SHORT_COMMIT}) @@ -97,7 +104,9 @@ jobs: script_args+=(--data-basename ${DATA_BASENAME}) DATA_EXCHANGE_DIR=/mnt/geos-exchange # Exchange folder outside of the container - sudo mkdir -p ${DATA_EXCHANGE_DIR} + if [ ! -d "${DATA_EXCHANGE_DIR}" ]; then + sudo mkdir -p ${DATA_EXCHANGE_DIR} + fi DATA_EXCHANGE_MOUNT_POINT=/tmp/exchange # Exchange folder inside of the container docker_args+=(--volume=${DATA_EXCHANGE_DIR}:${DATA_EXCHANGE_MOUNT_POINT}) script_args+=(--exchange-dir ${DATA_EXCHANGE_MOUNT_POINT}) @@ -110,6 +119,11 @@ jobs: script_args+=(--sccache-credentials $(basename ${GOOGLE_GHA_CREDS_PATH})) fi + if [ ${{ inputs.RUNS_ON }} == 'self-hosted' ]; then + RUNNER_CERTIFICATES_DIR=/etc/pki/ca-trust/source/anchors/ + mkdir -p ${GITHUB_WORKSPACE}/certificates + cp ${RUNNER_CERTIFICATES_DIR}/*.crt* ${GITHUB_WORKSPACE}/certificates + fi # We need to know where the code folder is mounted inside the container so we can run the script at the proper location! # Since this information is repeated twice, we use a variable. GITHUB_WORKSPACE_MOUNT_POINT=/tmp/geos @@ -126,11 +140,21 @@ jobs: docker_args+=(-e ENABLE_HYPRE_DEVICE=${ENABLE_HYPRE_DEVICE:-CPU}) docker_args+=(-e ENABLE_TRILINOS=${ENABLE_TRILINOS:-ON}) - docker_args+=(--cap-add=SYS_PTRACE) + docker_args+=(--cap-add=SYS_PTRACE --rm) script_args+=(--cmake-build-type ${{ inputs.CMAKE_BUILD_TYPE }}) script_args+=(${{ inputs.BUILD_AND_TEST_CLI_ARGS }}) + + SPLIT_DOCKER_REPOSITORY=(${DOCKER_REPOSITORY//// }) + CONTAINER_NAME=geosx_build_${SPLIT_DOCKER_REPOSITORY[1]}_${GITHUB_SHA:0:7} + echo "CONTAINER_NAME: ${CONTAINER_NAME}" + if [ "$(docker ps -aq -f name=${CONTAINER_NAME})" ]; then + docker rm -f ${CONTAINER_NAME} + fi + docker_args+=(--name ${CONTAINER_NAME}) + + if ${{ inputs.CODE_COVERAGE }} == 'true'; then script_args+=(--code-coverage) fi @@ -142,6 +166,7 @@ jobs: set +e docker run \ ${docker_args[@]} \ + -h=`hostname` \ ${{ inputs.DOCKER_REPOSITORY }}:${{ inputs.DOCKER_IMAGE_TAG }} \ ${GITHUB_WORKSPACE_MOUNT_POINT}/scripts/ci_build_and_test_in_container.sh \ ${script_args[@]} @@ -156,11 +181,17 @@ jobs: echo "Download the bundle at https://storage.googleapis.com/${{ inputs.GCP_BUCKET }}/${DATA_BASENAME}" fi fi + + # Remove the container and the workspace to avoid any conflict with the next run. + echo github.workspace = ${{ github.workspace }} + #rm -rf ${{ github.workspace }}/* + #docker rm -f ${CONTAINER_NAME} + exit ${EXIT_STATUS} - name: Upload coverage to Codecov if: inputs.CODE_COVERAGE - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4.0.1 with: files: geos_coverage.info.cleaned fail_ci_if_error: true diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 7eef4cea0e4..ad4913ba5c6 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -47,7 +47,7 @@ jobs: # The TPL tag is contained in the codespaces configuration to avoid duplications. - name: Checkout .devcontainer/devcontainer.json - uses: actions/checkout@v3 + uses: actions/checkout@v4.1.1 with: sparse-checkout: | .devcontainer/devcontainer.json @@ -83,7 +83,7 @@ jobs: # The integrated test submodule repository contains large data (using git lfs). # To save time (and money) we do not let Github Actions automatically clone all our (lfs) subrepositories and do it by hand. - name: Checkout Repository - uses: actions/checkout@v3 + uses: actions/checkout@v4.1.1 with: # Let script update submodules; Github Actions submodule history causes error submodules: false @@ -188,7 +188,7 @@ jobs: needs: - is_not_draft_pull_request - cpu_builds - if: "${{ contains( fromJSON( needs.is_not_draft_pull_request.outputs.LABELS ), 'ci: run integrated tests') }}" + if: "${{ contains( fromJSON( needs.is_not_draft_pull_request.outputs.LABELS ), 'ci: run integrated tests') || github.event_name != 'pull_request' }}" uses: ./.github/workflows/build_and_test.yml secrets: inherit with: @@ -240,13 +240,15 @@ jobs: RUNS_ON: Runner_8core_32GB - name: Ubuntu CUDA (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89) - BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema" + BUILD_AND_TEST_CLI_ARGS: "--no-install-schema" CMAKE_BUILD_TYPE: Release DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89 ENABLE_HYPRE_DEVICE: CUDA ENABLE_HYPRE: ON ENABLE_TRILINOS: OFF - RUNS_ON: Runner_4core_16GB + RUNS_ON: self-hosted + DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia --gpus all" + - name: Centos (7.7, gcc 8.3.1, open-mpi 1.10.7, cuda 11.8.89) BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema" @@ -267,6 +269,7 @@ jobs: CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }} DOCKER_IMAGE_TAG: ${{ needs.is_not_draft_pull_request.outputs.DOCKER_IMAGE_TAG }} DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }} + DOCKER_RUN_ARGS: ${{ matrix.DOCKER_RUN_ARGS }} ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }} ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }} ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }} diff --git a/scripts/buildOrTest.py b/scripts/buildOrTest.py deleted file mode 100755 index fd39d2668ea..00000000000 --- a/scripts/buildOrTest.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/bin/sh -"exec" "python" "-u" "-B" "$0" "$@" -#python scripts/config-build.py -hc host-configs/darwin-clang.cmake -#python scripts/config-build.py -hc host-configs/darwin-clang37.cmake -#python scripts/config-build.py -hc host-configs/darwin-gcc.cmake -#python scripts/config-build.py -bp build-xcode -hc host-configs/darwin-clang.cmake -x - -darwinHosts = ["clang", "clang37", "gcc"] -chaosHosts = ["gcc@4.9.3"] - -platforms = {"darwin": darwinHosts, "chaos_5_x86_64_ib": chaosHosts} -trueNames = { - "darwin": "darwin", - "osx": "darwin", - "lc": "chaos_5_x86_64_ib", - "chaos_5_x86_64_ib": "chaos_5_x86_64_ib", - "chaos": "chaos_5_x86_64_ib", - "linux2": "chaos_5_x86_64_ib" -} -import os -import subprocess -import tempfile -import sys - - -def executeSubProcess(command, workingDirectory=os.getcwd(), verbose=2, stdin=sys.stdin): - - if verbose == -1: - verbose = globalVerbosity - if verbose > 1: - print("Executing: " + command + "\n\t Working Directory: " + workingDirectory) - #*************************************************************************************************************** - #Note: Even though python's documentation says that "shell=True" opens up a computer for malicious shell commands, - # it is needed to allow users to fully utilize shell commands, such as cd. - #*************************************************************************************************************** - - # TODO: Followable commands aren't working in Windows right now - initially there were some pickling difficulties, - # but now we are seeing behaviors that look like multiprocessing subprocesses are being launched in incorrect directories. - # To be troubleshooted later. - if False: - with tempfile.NamedTemporaryFile() as tmpFile: - launcher = FollowableCommand(command, workingDirectory, tmpFile, stdin) - launcher.run(startStreaming=3.0) - tmpFile.seek(0) - output = tmpFile.read() - if verbose > 1 and launcher.stopFollowing == 0: - print(output.strip()) - process = launcher.finishedProcesses.get() - else: - with tempfile.TemporaryFile() as tmpFile: - process = subprocess.Popen(command, - cwd=workingDirectory, - shell=(os.name != "nt"), - stdout=tmpFile.fileno(), - stderr=subprocess.STDOUT) - process.wait() - tmpFile.seek(0) - output = tmpFile.read() - if verbose > 1: - print(output.strip()) - - process.output = output - if process.returncode != 0 and verbose > 1: - print("Command '" + command + "': exited with error code " + str(process.returncode)) - return process - - -def execute(cmd, dryRun): - if dryRun: - print cmd - else: - process = executeSubProcess(cmd) - if (process.returncode): - exit(process.returncode) - - -def main(platform, build, local, dryRun=False): - hosts = platforms[trueNames[platform.lower()]] - platform = trueNames[platform] - for host in hosts: - if local: - execute("cd src/thirdparty && chairajabuild", dryRun) - if build: - execute("scripts/config-build.py -hc host-configs/%s-%s.cmake" % (platform, host), dryRun) - cmd = "make " if build else "make test" - execute("cd build-%s-%s* && %s" % (platform, host, cmd), dryRun) - - -usage = "scripts/buildOrTest [build|test] [ [--local]] " -if __name__ == "__main__": - local = False - if ("scripts" not in sys.argv[0]): - print "USAGE:" + usage - exit(1) - try: - build = sys.argv[1].lower() == "build" - test = sys.argv[1].lower() == "test" - - if (not (build or test)): - print "USAGE:" + usage - exit(1) - except IndexError: - print "USAGE:" + usage - exit(1) - try: - platform = sys.argv[2].lower() - try: - local = sys.argv[3] - except IndexError: - pass - except IndexError: - platform = sys.platform - build = build or not test - main(platform, build, local) diff --git a/scripts/ci_build_and_test_in_container.sh b/scripts/ci_build_and_test_in_container.sh index a77e3fc0ae2..f74a0795b48 100755 --- a/scripts/ci_build_and_test_in_container.sh +++ b/scripts/ci_build_and_test_in_container.sh @@ -143,6 +143,26 @@ EOT # The path to the `sccache` executable is available through the SCCACHE environment variable. SCCACHE_CMAKE_ARGS="-DCMAKE_CXX_COMPILER_LAUNCHER=${SCCACHE} -DCMAKE_CUDA_COMPILER_LAUNCHER=${SCCACHE}" + if [[ ${HOSTNAME} == 'streak.llnl.gov' ]]; then + DOCKER_CERTS_DIR=/usr/local/share/ca-certificates + for file in "${GEOS_SRC_DIR}"/certificates/*.crt.pem; do + if [ -f "$file" ]; then + filename=$(basename -- "$file") + filename_no_ext="${filename%.*}" + new_filename="${DOCKER_CERTS_DIR}/${filename_no_ext}.crt" + cp "$file" "$new_filename" + echo "Copied $filename to $new_filename" + fi + done + update-ca-certificates + # gcloud config set core/custom_ca_certs_file cert.pem' + + NPROC=8 + else + NPROC=$(nproc) + fi + echo "Using ${NPROC} cores." + echo "sccache initial state" ${SCCACHE} --show-stats fi @@ -208,9 +228,9 @@ fi # Performing the requested build. if [[ "${BUILD_EXE_ONLY}" = true ]]; then - or_die ninja -j $(nproc) geosx + or_die ninja -j $NPROC geosx else - or_die ninja -j $(nproc) + or_die ninja -j $NPROC or_die ninja install if [[ ! -z "${DATA_BASENAME_WE}" ]]; then @@ -232,7 +252,11 @@ fi # Run the unit tests (excluding previously ran checks). if [[ "${RUN_UNIT_TESTS}" = true ]]; then - or_die ctest --output-on-failure -E "testUncrustifyCheck|testDoxygenCheck" + if [[ ${HOSTNAME} == 'streak.llnl.gov' ]]; then + or_die ctest --output-on-failure -E "testUncrustifyCheck|testDoxygenCheck|testLifoStorage|testExternalSolvers" + else + or_die ctest --output-on-failure -E "testUncrustifyCheck|testDoxygenCheck" + fi fi if [[ "${RUN_INTEGRATED_TESTS}" = true ]]; then @@ -256,8 +280,14 @@ if [[ "${RUN_INTEGRATED_TESTS}" = true ]]; then or_die tar cfM ${DATA_EXCHANGE_DIR}/${DATA_BASENAME_WE}.tar --directory ${GEOS_SRC_DIR} --transform "s/^integratedTests/${DATA_BASENAME_WE}\/repo/" integratedTests or_die tar rfM ${DATA_EXCHANGE_DIR}/${DATA_BASENAME_WE}.tar --directory ${GEOSX_BUILD_DIR} --transform "s/^integratedTests/${DATA_BASENAME_WE}\/logs/" integratedTests or_die gzip ${DATA_EXCHANGE_DIR}/${DATA_BASENAME_WE}.tar + + # want to clean the integrated tests folder to avoid polluting the next build. + or_die integratedTests/geos_ats.sh -a clean fi +# Cleaning the build directory. +or_die ninja clean + # If we're here, either everything went OK or we have to deal with the integrated tests manually. if [[ ! -z "${INTEGRATED_TEST_EXIT_STATUS+x}" ]]; then echo "Exiting the build process with exit status ${INTEGRATED_TEST_EXIT_STATUS} from the integrated tests." diff --git a/src/coreComponents/constitutive/solid/SolidBase.hpp b/src/coreComponents/constitutive/solid/SolidBase.hpp index e91a3323242..6e8f6264817 100644 --- a/src/coreComponents/constitutive/solid/SolidBase.hpp +++ b/src/coreComponents/constitutive/solid/SolidBase.hpp @@ -98,7 +98,7 @@ class SolidBaseUpdates arrayView1d< real64 const > const m_thermalExpansionCoefficient; /// Flag to disable inelasticity - const bool & m_disableInelasticity; + const bool m_disableInelasticity; /** * @brief Get bulkModulus