Skip to content

Commit

Permalink
use streak.llnl.gov as runner for GHA (#2873)
Browse files Browse the repository at this point in the history
* use pip_system_certs...this time in right context

* add run args for docker image

* clean workspace from inside the docker container

* add nvidia runtime to docker args
  • Loading branch information
rrsettgast authored and ouassimkh committed Feb 16, 2024
1 parent c940ed6 commit c045ff5
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 129 deletions.
43 changes: 37 additions & 6 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ on:
DOCKER_REPOSITORY:
required: true
type: string
DOCKER_RUN_ARGS:
required: false
type: string
ENABLE_HYPRE:
required: false
type: string
Expand Down Expand Up @@ -52,22 +55,22 @@ jobs:
runs-on: ${{ inputs.RUNS_ON }}
steps:
- name: Checkout Repository
uses: actions/checkout@v3
uses: actions/checkout@v4.1.1
with:
submodules: true
lfs: ${{ inputs.BUILD_TYPE == 'integrated_tests' }}
fetch-depth: 1

- id: 'auth'
if: ${{ inputs.GCP_BUCKET || inputs.USE_SCCACHE }}
uses: 'google-github-actions/auth@v1'
uses: 'google-github-actions/auth@v2.1.0'
with:
credentials_json: '${{ secrets.GOOGLE_CLOUD_GCP }}'
create_credentials_file: true

- name: 'Set up Cloud SDK'
if: inputs.GCP_BUCKET
uses: 'google-github-actions/setup-gcloud@v1'
uses: 'google-github-actions/setup-gcloud@v2.1.0'
with:
version: '>= 363.0.0'

Expand All @@ -81,6 +84,10 @@ jobs:
docker_args=()
script_args=()
docker_args+=(${{ inputs.DOCKER_RUN_ARGS }})
COMMIT=${{ github.event.pull_request.head.sha }}
SHORT_COMMIT=${COMMIT:0:7}
script_args+=(--install-dir-basename GEOSX-${SHORT_COMMIT})
Expand All @@ -97,7 +104,9 @@ jobs:
script_args+=(--data-basename ${DATA_BASENAME})
DATA_EXCHANGE_DIR=/mnt/geos-exchange # Exchange folder outside of the container
sudo mkdir -p ${DATA_EXCHANGE_DIR}
if [ ! -d "${DATA_EXCHANGE_DIR}" ]; then
sudo mkdir -p ${DATA_EXCHANGE_DIR}
fi
DATA_EXCHANGE_MOUNT_POINT=/tmp/exchange # Exchange folder inside of the container
docker_args+=(--volume=${DATA_EXCHANGE_DIR}:${DATA_EXCHANGE_MOUNT_POINT})
script_args+=(--exchange-dir ${DATA_EXCHANGE_MOUNT_POINT})
Expand All @@ -110,6 +119,11 @@ jobs:
script_args+=(--sccache-credentials $(basename ${GOOGLE_GHA_CREDS_PATH}))
fi
if [ ${{ inputs.RUNS_ON }} == 'self-hosted' ]; then
RUNNER_CERTIFICATES_DIR=/etc/pki/ca-trust/source/anchors/
mkdir -p ${GITHUB_WORKSPACE}/certificates
cp ${RUNNER_CERTIFICATES_DIR}/*.crt* ${GITHUB_WORKSPACE}/certificates
fi
# We need to know where the code folder is mounted inside the container so we can run the script at the proper location!
# Since this information is repeated twice, we use a variable.
GITHUB_WORKSPACE_MOUNT_POINT=/tmp/geos
Expand All @@ -126,11 +140,21 @@ jobs:
docker_args+=(-e ENABLE_HYPRE_DEVICE=${ENABLE_HYPRE_DEVICE:-CPU})
docker_args+=(-e ENABLE_TRILINOS=${ENABLE_TRILINOS:-ON})
docker_args+=(--cap-add=SYS_PTRACE)
docker_args+=(--cap-add=SYS_PTRACE --rm)
script_args+=(--cmake-build-type ${{ inputs.CMAKE_BUILD_TYPE }})
script_args+=(${{ inputs.BUILD_AND_TEST_CLI_ARGS }})
SPLIT_DOCKER_REPOSITORY=(${DOCKER_REPOSITORY//// })
CONTAINER_NAME=geosx_build_${SPLIT_DOCKER_REPOSITORY[1]}_${GITHUB_SHA:0:7}
echo "CONTAINER_NAME: ${CONTAINER_NAME}"
if [ "$(docker ps -aq -f name=${CONTAINER_NAME})" ]; then
docker rm -f ${CONTAINER_NAME}
fi
docker_args+=(--name ${CONTAINER_NAME})
if ${{ inputs.CODE_COVERAGE }} == 'true'; then
script_args+=(--code-coverage)
fi
Expand All @@ -142,6 +166,7 @@ jobs:
set +e
docker run \
${docker_args[@]} \
-h=`hostname` \
${{ inputs.DOCKER_REPOSITORY }}:${{ inputs.DOCKER_IMAGE_TAG }} \
${GITHUB_WORKSPACE_MOUNT_POINT}/scripts/ci_build_and_test_in_container.sh \
${script_args[@]}
Expand All @@ -156,11 +181,17 @@ jobs:
echo "Download the bundle at https://storage.googleapis.com/${{ inputs.GCP_BUCKET }}/${DATA_BASENAME}"
fi
fi
# Remove the container and the workspace to avoid any conflict with the next run.
echo github.workspace = ${{ github.workspace }}
#rm -rf ${{ github.workspace }}/*
#docker rm -f ${CONTAINER_NAME}
exit ${EXIT_STATUS}
- name: Upload coverage to Codecov
if: inputs.CODE_COVERAGE
uses: codecov/codecov-action@v3
uses: codecov/codecov-action@v4.0.1
with:
files: geos_coverage.info.cleaned
fail_ci_if_error: true
Expand Down
13 changes: 8 additions & 5 deletions .github/workflows/ci_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
# The TPL tag is contained in the codespaces configuration to avoid duplications.
- name: Checkout .devcontainer/devcontainer.json
uses: actions/checkout@v3
uses: actions/checkout@v4.1.1
with:
sparse-checkout: |
.devcontainer/devcontainer.json
Expand Down Expand Up @@ -83,7 +83,7 @@ jobs:
# The integrated test submodule repository contains large data (using git lfs).
# To save time (and money) we do not let Github Actions automatically clone all our (lfs) subrepositories and do it by hand.
- name: Checkout Repository
uses: actions/checkout@v3
uses: actions/checkout@v4.1.1
with:
# Let script update submodules; Github Actions submodule history causes error
submodules: false
Expand Down Expand Up @@ -188,7 +188,7 @@ jobs:
needs:
- is_not_draft_pull_request
- cpu_builds
if: "${{ contains( fromJSON( needs.is_not_draft_pull_request.outputs.LABELS ), 'ci: run integrated tests') }}"
if: "${{ contains( fromJSON( needs.is_not_draft_pull_request.outputs.LABELS ), 'ci: run integrated tests') || github.event_name != 'pull_request' }}"
uses: ./.github/workflows/build_and_test.yml
secrets: inherit
with:
Expand Down Expand Up @@ -240,13 +240,15 @@ jobs:
RUNS_ON: Runner_8core_32GB

- name: Ubuntu CUDA (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
BUILD_AND_TEST_CLI_ARGS: "--no-install-schema"
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
RUNS_ON: Runner_4core_16GB
RUNS_ON: self-hosted
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia --gpus all"


- name: Centos (7.7, gcc 8.3.1, open-mpi 1.10.7, cuda 11.8.89)
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
Expand All @@ -267,6 +269,7 @@ jobs:
CMAKE_BUILD_TYPE: ${{ matrix.CMAKE_BUILD_TYPE }}
DOCKER_IMAGE_TAG: ${{ needs.is_not_draft_pull_request.outputs.DOCKER_IMAGE_TAG }}
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
DOCKER_RUN_ARGS: ${{ matrix.DOCKER_RUN_ARGS }}
ENABLE_HYPRE_DEVICE: ${{ matrix.ENABLE_HYPRE_DEVICE }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
Expand Down
114 changes: 0 additions & 114 deletions scripts/buildOrTest.py

This file was deleted.

36 changes: 33 additions & 3 deletions scripts/ci_build_and_test_in_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,26 @@ EOT
# The path to the `sccache` executable is available through the SCCACHE environment variable.
SCCACHE_CMAKE_ARGS="-DCMAKE_CXX_COMPILER_LAUNCHER=${SCCACHE} -DCMAKE_CUDA_COMPILER_LAUNCHER=${SCCACHE}"

if [[ ${HOSTNAME} == 'streak.llnl.gov' ]]; then
DOCKER_CERTS_DIR=/usr/local/share/ca-certificates
for file in "${GEOS_SRC_DIR}"/certificates/*.crt.pem; do
if [ -f "$file" ]; then
filename=$(basename -- "$file")
filename_no_ext="${filename%.*}"
new_filename="${DOCKER_CERTS_DIR}/${filename_no_ext}.crt"
cp "$file" "$new_filename"
echo "Copied $filename to $new_filename"
fi
done
update-ca-certificates
# gcloud config set core/custom_ca_certs_file cert.pem'

NPROC=8
else
NPROC=$(nproc)
fi
echo "Using ${NPROC} cores."

echo "sccache initial state"
${SCCACHE} --show-stats
fi
Expand Down Expand Up @@ -208,9 +228,9 @@ fi

# Performing the requested build.
if [[ "${BUILD_EXE_ONLY}" = true ]]; then
or_die ninja -j $(nproc) geosx
or_die ninja -j $NPROC geosx
else
or_die ninja -j $(nproc)
or_die ninja -j $NPROC
or_die ninja install

if [[ ! -z "${DATA_BASENAME_WE}" ]]; then
Expand All @@ -232,7 +252,11 @@ fi

# Run the unit tests (excluding previously ran checks).
if [[ "${RUN_UNIT_TESTS}" = true ]]; then
or_die ctest --output-on-failure -E "testUncrustifyCheck|testDoxygenCheck"
if [[ ${HOSTNAME} == 'streak.llnl.gov' ]]; then
or_die ctest --output-on-failure -E "testUncrustifyCheck|testDoxygenCheck|testLifoStorage|testExternalSolvers"
else
or_die ctest --output-on-failure -E "testUncrustifyCheck|testDoxygenCheck"
fi
fi

if [[ "${RUN_INTEGRATED_TESTS}" = true ]]; then
Expand All @@ -256,8 +280,14 @@ if [[ "${RUN_INTEGRATED_TESTS}" = true ]]; then
or_die tar cfM ${DATA_EXCHANGE_DIR}/${DATA_BASENAME_WE}.tar --directory ${GEOS_SRC_DIR} --transform "s/^integratedTests/${DATA_BASENAME_WE}\/repo/" integratedTests
or_die tar rfM ${DATA_EXCHANGE_DIR}/${DATA_BASENAME_WE}.tar --directory ${GEOSX_BUILD_DIR} --transform "s/^integratedTests/${DATA_BASENAME_WE}\/logs/" integratedTests
or_die gzip ${DATA_EXCHANGE_DIR}/${DATA_BASENAME_WE}.tar

# want to clean the integrated tests folder to avoid polluting the next build.
or_die integratedTests/geos_ats.sh -a clean
fi

# Cleaning the build directory.
or_die ninja clean

# If we're here, either everything went OK or we have to deal with the integrated tests manually.
if [[ ! -z "${INTEGRATED_TEST_EXIT_STATUS+x}" ]]; then
echo "Exiting the build process with exit status ${INTEGRATED_TEST_EXIT_STATUS} from the integrated tests."
Expand Down
2 changes: 1 addition & 1 deletion src/coreComponents/constitutive/solid/SolidBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class SolidBaseUpdates
arrayView1d< real64 const > const m_thermalExpansionCoefficient;

/// Flag to disable inelasticity
const bool & m_disableInelasticity;
const bool m_disableInelasticity;

/**
* @brief Get bulkModulus
Expand Down

0 comments on commit c045ff5

Please sign in to comment.