From ba208a49e16c4ff8cd95514c8119a2964280f403 Mon Sep 17 00:00:00 2001 From: Webb Scales <7795764+webbnh@users.noreply.github.com> Date: Fri, 25 Aug 2023 10:46:48 -0400 Subject: [PATCH] Use `systemctl` to launch the Pbench Server container when deploying (#3516) ince we use systemctl to run the containerized Pbench Server in the Staging deployment (and since we'll do the same in Production), use the same mechanism when running the Server for functional test or development. (Also, pick lint.) --- jenkins/Pipeline.gy | 4 +- jenkins/run-server-func-tests | 55 ++++++++++++-- server/pbenchinacan/deploy | 134 ++++++++++++++-------------------- server/pbenchinacan/redeploy | 107 +++++++++++++++++++++++++++ 4 files changed, 213 insertions(+), 87 deletions(-) create mode 100755 server/pbenchinacan/redeploy diff --git a/jenkins/Pipeline.gy b/jenkins/Pipeline.gy index 630fcca548..43448e1fa8 100644 --- a/jenkins/Pipeline.gy +++ b/jenkins/Pipeline.gy @@ -123,13 +123,15 @@ pipeline { sh 'rm -f cov/report.xml' } always { + sh 'systemctl --user disable --now pbench-server >/dev/null 2>&1 || true' + sh 'systemctl --user reset-failed pbench-server >/dev/null 2>&1 || true' sh 'podman image ls \ --filter reference="*pbench-agent*" \ --filter reference="*pbench-server*" \ --format "{{.Id}}" --filter containers=false \ | sort -u \ | xargs podman image rm -f \ - || true' + || true' sh 'buildah rm --all' } } diff --git a/jenkins/run-server-func-tests b/jenkins/run-server-func-tests index 6c50110565..1718563138 100755 --- a/jenkins/run-server-func-tests +++ b/jenkins/run-server-func-tests @@ -33,6 +33,7 @@ function usage { } opts=$(getopt -q -o ckh --longoptions "cleanup,keep,help" -n "run-server-func-tests" -- "${@}") +# shellcheck disable=SC2181 if [[ ${?} -ne 0 ]]; then printf -- "%s %s\n\n\tunrecognized option specified\n\n" "${0}" "${*}" >&2 usage >&2 @@ -57,15 +58,15 @@ while true; do break ;; *) - printf -- "${0}: unrecognized command line argument, '${arg}'\n" >&2 + echo "${0}: unrecognized command line argument, '${arg}'" >&2 usage >&2 exit 1 ;; esac done -if (( ${keep_flag} && ${cleanup_flag} )); then - printf -- "${0}: [WARNING] 'keep' option is ineffective with 'cleanup'\n" +if (( keep_flag && cleanup_flag )); then + echo "${0}: [WARNING] 'keep' option is ineffective with 'cleanup'" fi function dump_journal { @@ -75,11 +76,47 @@ function dump_journal { printf -- "\n--- journalctl dump ---\n\n" } +function sc_disable { + # Run systemctl to disable the Pbench Server unit and clean up after any + # resulting failures attempting to do so. + + # Send the command output to the bitbucket; capture stderr in a variable. + o=$(systemctl --user disable --now pbench-server 2>&1 >/dev/null) + r=${?} + if (( r != 0 )); then + # Ignore the expected error + if [[ ${o} != *"Unit file"*"does not exist"* ]]; then + echo "systemctl disable: ${o}" + return ${r} + fi + fi + + # Remove the service file, if any. + rm -f "${HOME}/.config/systemd/user/pbench-server.service" + + # Even though we've disabled it above, the pbench-server service may still + # be present in systemctl's memory if it failed before or during being + # disabled. A side-effect of the command below will purge this entry. + o=$(systemctl --user reset-failed pbench-server 2>&1 >/dev/null) + r=${?} + if (( r != 0 )); then + # Ignore the expected error + if [[ ${o} != *"Unit"*"not found"* && ${o} != *"Unit"*"not loaded"* ]]; then + echo "systemctl reset-failed: ${o}" + return ${r} + fi + fi + + return 0 +} + function cleanup { - if (( ${cleanup_flag} )); then + if (( cleanup_flag )); then # Remove the Pbench Server container and the dependencies pod which we # just created and ran; remove any dangling containers; and then remove # any dangling images. + echo "Disabling the pbench-server service..." >&2 + sc_disable echo "Forcefully removing the Pbench Server container..." >&2 podman rm --force --ignore ${PB_SERVER_CONTAINER_NAME} echo "Forcefully removing the Pbench Support Services pod..." >&2 @@ -93,6 +130,7 @@ function cleanup { else echo "No clean up requested -- the Pbench Server container and support services pod likely still running!" >&2 fi + # shellcheck disable=SC2046 trap - $(trap -p | sed -e 's/.* //') exit ${exit_status} } @@ -133,7 +171,7 @@ if [[ "${status_code}" != "200" ]]; then curl ${SERVER_API_ENDPOINTS} exit_status=2 else - if (( ${keep_flag} )); then + if (( keep_flag )); then KEEP_DATASETS="keep" fi EXTRA_PODMAN_SWITCHES="${EXTRA_PODMAN_SWITCHES} --network host" \ @@ -142,20 +180,21 @@ else fi if [[ ${exit_status} -ne 0 ]]; then - if (( ${cleanup_flag} )); then + if (( cleanup_flag )); then dump_journal fi printf -- "\nFunctional tests exited with code %s\n" ${exit_status} >&2 fi -if (( ! ${cleanup_flag} )); then +if (( ! cleanup_flag )); then echo "No clean up requested -- the Pbench Server container and support services pod are running!" + # shellcheck disable=SC2046 trap - $(trap -p | sed -e 's/.* //') exit ${exit_status} fi echo "Stopping the Pbench Server container..." -podman stop ${PB_SERVER_CONTAINER_NAME} +sc_disable stop_status=${?} if [[ ${exit_status} -eq 0 ]]; then exit_status=${stop_status} diff --git a/server/pbenchinacan/deploy b/server/pbenchinacan/deploy index b76f6ee438..08cde899e1 100755 --- a/server/pbenchinacan/deploy +++ b/server/pbenchinacan/deploy @@ -1,18 +1,13 @@ #!/bin/bash -e # -# This script deploys a Pbench Server instance from a container image. +# This script deploys a new Pbench Server instance using a container image. +# (Note: for updating an existing instance, see the `redeploy` script.) # # Steps performed: -# - The Pbench Dashboard application files are copied from the build directory -# into the SRV_PBENCH file system. (If the build directory does not exist, -# an empty directory is created at the output location, a warning is -# printed, and the Pbench Server is deployed without a Dashboard.) -# - The Nginx favicon file is copied into the appropriate location in the -# SRV_PBENCH file system. -# - The ownership of the files in the SRV_PBENCH file system are set for -# access inside the container. -# - The Pbench Server container is started and run as a detached process. +# - Create a systemd service file for the pbench-server service. Copy it +# into the appropriate directory. Enable the service (but don't start it). +# - Invoke the `redeploy` script to deploy the Dashboard and start the service. # # The key inputs are the following environment variables: # - PB_DEPLOY_FILES: the location (on the host) of the configuration files @@ -22,7 +17,9 @@ # # Other inputs which can be provided by environment variables include: # - the name and pull policy for the Pbench Server container image +# - the container image tag or the entire image specification # - the location of the Pbench Dashboard app files and the Nginx Favicon +# (for use by the `redeploy` script invocation) # - the location of the Pbench Git checkout (used to locate the above) # # In all cases, suitable defaults are provided for any value which is undefined. @@ -33,7 +30,8 @@ #- # Pbench Server container image -PB_SERVER_IMAGE=${PB_SERVER_IMAGE:-"images.paas.redhat.com/pbench/pbench-server:main"} +PB_SERVER_IMAGE_TAG=${PB_SERVER_IMAGE_TAG:-"main"} +PB_SERVER_IMAGE=${PB_SERVER_IMAGE:-"images.paas.redhat.com/pbench/pbench-server:${PB_SERVER_IMAGE_TAG}"} PB_SERVER_IMAGE_PULL_POLICY=${PB_SERVER_IMAGE_PULL_POLICY:-"newer"} PB_SERVER_CONTAINER_NAME=${PB_SERVER_CONTAINER_NAME:-"pbench-server"} @@ -45,76 +43,56 @@ SRV_PBENCH=${SRV_PBENCH:-/srv/pbench} PB_SSL_CERT_FILE=${PB_SSL_CERT_FILE:-${PB_DEPLOY_FILES}/pbench-server.crt} PB_SSL_KEY_FILE=${PB_SSL_KEY_FILE:-${PB_DEPLOY_FILES}/pbench-server.key} -# Locations inside the container -# -# The value of ${NGINX_FAVICON} points to the location where the file will be -# found inside the container and is used to create a symbolic link; by default, -# it refers to a file in the Dashboard deployment relative to -# /srv/pbench/public_html. -NGINX_FAVICON=${NGINX_FAVICON:-./dashboard/$(realpath --relative-to "${PB_DASHBOARD_DIR}" "${PB_DASHBOARD_DIR}"/static/media/color-square.256x256*.ico)} - #+ # Deployment #- -# This is the first invocation of the Podman run command in this script, so we -# execute a trivial containerized command to make Podman apply the requested -# pull policy here. After this invocation (if it doesn't fail...), the -# container image will be present in the local repository, and so we will use -# the default Podman policy ("missing") for the subsequent invocations. -# (Otherwise, we risk pulling a remote image when we shouldn't.) However, if -# this script was invoked by the run-pbench-in-a-can script, then _it_ will have -# done the first pull, and it will have set the policy to "missing" to prompt us -# to skip it here. In any case, if the pull policy is set to "missing", then -# the first Podman run invocation below will do the right thing on its own, so -# we skip the extra one here. -if [[ ${PB_SERVER_IMAGE_PULL_POLICY} != "missing" ]]; then - podman run --entrypoint true --rm --pull ${PB_SERVER_IMAGE_PULL_POLICY} ${PB_SERVER_IMAGE} -fi +# Create a systemd service file for the Pbench Server +# (This was originally generated by Podman and then heavily edited.) +cat >"${PB_DEPLOY_FILES}/pbench-server.service" <<- EOD + [Unit] + Description=Containerized Pbench Server + Documentation=https://distributed-system-analysis.github.io/pbench/ + Documentation=https://pbench.readthedocs.io/en/latest/Server/API/README.html + Documentation=https://github.com/distributed-system-analysis/pbench + Wants=network-online.target + After=network-online.target + RequiresMountsFor=%t/containers -# Update the Dashboard code, removing any existing code and copying in fresh. -# We expect that the host dashboard directory already exists and contains a -# build of the Pbench Dashboard application; however, if it is missing, we -# deploy the Pbench Server without a Dashboard deployment. If we do update the -# Dashboard, then also update the Nginx favicon symlink. -podman unshare rm -r ${SRV_PBENCH}/public_html/dashboard -if [[ -d "${PB_DASHBOARD_DIR}" ]]; then - podman run \ - --rm \ - --volume ${PB_DASHBOARD_DIR}:/dashboard:Z \ - --volume ${SRV_PBENCH}:/srv/pbench:Z \ - --entrypoint cp \ - ${PB_SERVER_IMAGE} \ - -r /dashboard /srv/pbench/public_html/dashboard - podman run \ - --rm \ - --volume ${SRV_PBENCH}:/srv/pbench:Z \ - --entrypoint ln \ - ${PB_SERVER_IMAGE} \ - -sf ${NGINX_FAVICON} /srv/pbench/public_html/favicon.ico -else - echo "Warning: deploying the Pbench Server without a Dashboard" >&2 - podman run \ - --rm \ - --volume ${SRV_PBENCH}:/srv/pbench:Z \ - --entrypoint mkdir \ - ${PB_SERVER_IMAGE} \ - -m 0755 -p /srv/pbench/public_html/dashboard -fi + [Service] + Environment=PODMAN_SYSTEMD_UNIT=%n + Restart=on-failure + TimeoutStopSec=30 + ExecStartPre=/bin/rm -f %t/%n.ctr-id + ExecStart=/usr/bin/podman run \ + --cidfile=%t/%n.ctr-id \ + --cgroups=no-conmon \ + --sdnotify=conmon \ + --replace \ + --detach \ + --name "${PB_SERVER_CONTAINER_NAME}" \ + --network host \ + --rm \ + --volume "${PB_DEPLOY_FILES}"/etc/rsyslog.conf:/etc/rsyslog.conf:Z \ + --volume "${PB_DEPLOY_FILES}"/etc/rsyslog.d:/etc/rsyslog.d:Z \ + --volume "${PB_SSL_CERT_FILE}":/etc/pki/tls/certs/pbench-server.crt:Z \ + --volume "${PB_SSL_KEY_FILE}":/etc/pki/tls/private/pbench-server.key:Z \ + --volume "${PB_DEPLOY_FILES}"/pbench-server.cfg:/opt/pbench-server/lib/config/pbench-server.cfg:Z \ + --volume "${SRV_PBENCH}":/srv/pbench:Z \ + ${PB_SERVER_PODMAN_SWITCHES} \ + ${PB_SERVER_IMAGE} + ExecStop=/usr/bin/podman stop --ignore --cidfile=%t/%n.ctr-id + ExecStopPost=/usr/bin/podman rm -f --ignore --cidfile=%t/%n.ctr-id + Type=notify + NotifyAccess=all -# Run the Pbench Server. -# -# See NOTE above on the first Podman run invocation regarding pull policy. -podman run \ - --detach \ - --name ${PB_SERVER_CONTAINER_NAME} \ - --network host \ - --rm \ - --volume ${PB_DEPLOY_FILES}/etc/rsyslog.conf:/etc/rsyslog.conf:Z \ - --volume ${PB_DEPLOY_FILES}/etc/rsyslog.d:/etc/rsyslog.d:Z \ - --volume ${PB_SSL_CERT_FILE}:/etc/pki/tls/certs/pbench-server.crt:Z \ - --volume ${PB_SSL_KEY_FILE}:/etc/pki/tls/private/pbench-server.key:Z \ - --volume ${PB_DEPLOY_FILES}/pbench-server.cfg:/opt/pbench-server/lib/config/pbench-server.cfg:Z \ - --volume ${SRV_PBENCH}:/srv/pbench:Z \ - ${PB_SERVER_PODMAN_SWITCHES} \ - ${PB_SERVER_IMAGE} + [Install] + WantedBy=default.target +EOD + +# Set up the Pbench Server as a system service. +cp "${PB_DEPLOY_FILES}/pbench-server.service" "${HOME}/.config/systemd/user/" +systemctl --user enable --force pbench-server + +# Refresh the deployment and start the service. +server/pbenchinacan/redeploy diff --git a/server/pbenchinacan/redeploy b/server/pbenchinacan/redeploy new file mode 100755 index 0000000000..6606a4d3fe --- /dev/null +++ b/server/pbenchinacan/redeploy @@ -0,0 +1,107 @@ +#!/bin/bash -e + +# +# This script re-deploys a Pbench Server instance updating the Dashboard and +# restarting the container image +# +# Steps performed: +# - The existing pbench-server service, if any, is stopped. +# - The Pbench Dashboard application files are copied from the build directory +# into the SRV_PBENCH file system. (If the build directory does not exist, +# an empty directory is created at the output location, a warning is +# printed, and the Pbench Server is deployed without a Dashboard.) +# - A symbolic link for Nginx favicon file is created at the appropriate +# location in the SRV_PBENCH file system. +# - The pbench-server service is started. +# +# The key inputs are the following environment variables: +# - SRV_PBENCH: the location of the file system (on the host) which will be +# mounted inside the container at /srv/pbench. +# +# Other inputs which can be provided by environment variables include: +# - the name and pull policy for the Pbench Server container image +# - the container image tag or the entire image specification +# - the location of the Pbench Dashboard app files and the Nginx Favicon +# - the location of the Pbench Git checkout (used to locate the above) +# +# In all cases, suitable defaults are provided for any value which is undefined. +# + +#+ +# Configuration +#- + +# Pbench Server container image +PB_SERVER_IMAGE_TAG=${PB_SERVER_IMAGE_TAG:-"main"} +PB_SERVER_IMAGE=${PB_SERVER_IMAGE:-"images.paas.redhat.com/pbench/pbench-server:${PB_SERVER_IMAGE_TAG}"} +PB_SERVER_IMAGE_PULL_POLICY=${PB_SERVER_IMAGE_PULL_POLICY:-"newer"} +PB_SERVER_CONTAINER_NAME=${PB_SERVER_CONTAINER_NAME:-"pbench-server"} + +# Locations on the host +GITTOP=${GITTOP:-$(git rev-parse --show-toplevel)} +PB_DASHBOARD_DIR=${PB_DASHBOARD_DIR:-${GITTOP}/dashboard/build} +SRV_PBENCH=${SRV_PBENCH:-/srv/pbench} + +# Locations inside the container +# +# The value of ${NGINX_FAVICON} points to the location where the file will be +# found inside the container and is used to create a symbolic link; by default, +# it refers to a file in the Dashboard deployment relative to +# /srv/pbench/public_html. +NGINX_FAVICON=${NGINX_FAVICON:-./dashboard/$(realpath --relative-to "${PB_DASHBOARD_DIR}" "${PB_DASHBOARD_DIR}"/static/media/color-square.256x256*.ico)} + +#+ +# Deployment +#- + + +# Stop the currently running Pbench Server, if any. +systemctl --user stop pbench-server || true + +# This is the first invocation of the Podman run command in this script, so we +# execute a trivial containerized command to make Podman apply the requested +# pull policy here. After this invocation (if it doesn't fail...), the +# container image will be present in the local repository, and so we will use +# the default Podman policy ("missing") for the subsequent invocations. +# (Otherwise, we risk pulling a remote image when we shouldn't.) However, if +# this script was invoked by the run-pbench-in-a-can script, then _it_ will have +# done the first pull, and it will have set the policy to "missing" to prompt us +# to skip it here. In any case, if the pull policy is set to "missing", then +# the first Podman run invocation below will do the right thing on its own, so +# we skip the extra one here. +if [[ ${PB_SERVER_IMAGE_PULL_POLICY} != "missing" ]]; then + podman run --entrypoint true --rm --pull "${PB_SERVER_IMAGE_PULL_POLICY}" "${PB_SERVER_IMAGE}" +fi + +# Update the Dashboard code, removing any existing code and copying in fresh. +# We expect that the host dashboard directory already exists and contains a +# build of the Pbench Dashboard application; however, if it is missing, we +# deploy the Pbench Server without a Dashboard deployment. If we do update the +# Dashboard, then also update the Nginx favicon symlink. +podman unshare rm -r "${SRV_PBENCH}/public_html/dashboard" +if [[ -d "${PB_DASHBOARD_DIR}" ]]; then + podman run \ + --rm \ + --volume "${PB_DASHBOARD_DIR}":/dashboard:Z \ + --volume "${SRV_PBENCH}":/srv/pbench:Z \ + --entrypoint cp \ + "${PB_SERVER_IMAGE}" \ + -r /dashboard /srv/pbench/public_html/dashboard + podman run \ + --rm \ + --volume "${SRV_PBENCH}":/srv/pbench:Z \ + --entrypoint ln \ + "${PB_SERVER_IMAGE}" \ + -sf "${NGINX_FAVICON}" /srv/pbench/public_html/favicon.ico +else + echo "Warning: deploying the Pbench Server without a Dashboard" >&2 + podman run \ + --rm \ + --volume "${SRV_PBENCH}":/srv/pbench:Z \ + --entrypoint mkdir \ + "${PB_SERVER_IMAGE}" \ + -m 0755 -p /srv/pbench/public_html/dashboard +fi + +# Run the Pbench Server. +systemctl --user start pbench-server