From c360fb72d60a2de61a923470ca55db0ed0e296ea Mon Sep 17 00:00:00 2001 From: Angelo Teixeira Date: Sun, 13 Feb 2022 00:29:41 +0100 Subject: [PATCH] Implement Health Checks w/ Rollback Added health checks for nijobs-be on deployment --- deployments/.gitignore | 1 + deployments/deploy-types.sh | 54 ++++++++++++++++++++++++++- deployments/deploy.sh | 4 +- deployments/health-checker.sh | 68 ++++++++++++++++++++++++++++++++++ deployments/project-configs.sh | 5 +++ 5 files changed, 128 insertions(+), 4 deletions(-) create mode 100644 deployments/health-checker.sh diff --git a/deployments/.gitignore b/deployments/.gitignore index 0fcfd64..659dd44 100644 --- a/deployments/.gitignore +++ b/deployments/.gitignore @@ -4,6 +4,7 @@ !deploy.sh !deploy-types.sh !spam-deploys.sh +!health-checker.sh !README.md # Also, the project configurations! !project-configs.sh diff --git a/deployments/deploy-types.sh b/deployments/deploy-types.sh index ea49845..706cfde 100755 --- a/deployments/deploy-types.sh +++ b/deployments/deploy-types.sh @@ -10,10 +10,13 @@ function to_lower_case() { echo "$1" | tr '[:upper:]' '[:lower:]' } +# shellcheck source=deployments/health-checker.sh +source "$deploy_curr_dir/health-checker.sh" + # For deploying stuff with docker, simply put. function deploy_default() { - # (dotenv_location is not mandatory) - local project="$1" branch="$2" image_tag port="$3" dotenv_location="${4:-}" docker_flags="${5:-}" + # (dotenv_location, docker_flags and health_check_url are not mandatory) + local project="$1" branch="$2" image_tag port="$3" dotenv_location="${4:-}" docker_flags="${5:-}" health_check_url="${6:-}" image_tag="$(to_lower_case "$project---$branch" )" # If we have a dotenv file specified, copy it into the current directory (in case of error, `cp` prints something so no need to echo anything) @@ -75,6 +78,53 @@ function deploy_default() { return "$run_status" fi + local health_check_result + health_check_result=1 + if [ $health_check_url ]; then + echo -e "Starting health check...\n" + health_checker $health_check_url || true + health_check_result="$?" + fi + + if [ "$health_check_result" != 0 ]; then + >&2 echo -e "\n###-> ERROR! Service did not pass the health check! Rolling back to previous container!" + + local new_container_id + new_container_id="$(docker ps -aq --filter ancestor="$image_tag")" + + local new_image_id + new_image_id="$(docker images -q "$image_tag")" + + docker stop "$new_container_id" &>/dev/null + printf "New container exit code: " + docker wait "$new_container_id" + echo -e "\n###-> New container stopped.\n" + + >&2 echo "###-> Retagging old image and starting old container back up" + if [[ -n "$old_image_id" ]]; then + docker tag "$old_image_id" "$image_tag" + else + echo "###->> No old image found for retagging!!" + fi + if [[ -n "$old_container_id" ]]; then + docker start "$old_container_id" + else + echo "###->> No old container found for starting back up!!" + fi + + echo "###->> Removing new (un-healthy) container" + docker rm "$new_container_id" + + if [[ "$(docker images -q "$image_tag")" == "$new_image_id" ]]; then + echo "###-> Not removing image, as the container was run using the same one (build did a full cache hit)" + else + printf "old image id: " + docker rmi "$new_image_id" + fi + + return 1 + fi + # Cleanup echo -e "\n###-> New container now running successfuly, removing old container and image!" if [[ -n "$old_container_id" ]]; then diff --git a/deployments/deploy.sh b/deployments/deploy.sh index 129b2c7..ab47829 100755 --- a/deployments/deploy.sh +++ b/deployments/deploy.sh @@ -24,7 +24,7 @@ branch="${2:-master}" # shellcheck source=utils/utils.sh source "$deploy_curr_dir/../utils/utils.sh" -# Getting project configurations (configured_projects, project_port and project_dotenv_location) +# Getting project configurations (configured_projects, project_port, project_dotenv_location and project_health_check_url) # shellcheck source=deployments/project-configs.sh source "$deploy_curr_dir/project-configs.sh" @@ -111,7 +111,7 @@ set +e echo # Passing in the configs from ./project-configs.sh. dotenv_location and docker_flags might not be set so sending instead an empty variable ("") so that the 'unbound variable' error does not occur - deploy_default "$project" "$branch" "${project_port[$project---$branch]}" "${project_dotenv_location[$project---$branch]:-}" "${project_docker_flags[$project---$branch]:-}" + deploy_default "$project" "$branch" "${project_port[$project---$branch]}" "${project_dotenv_location[$project---$branch]:-}" "${project_docker_flags[$project---$branch]:-}" "${project_health_check_url[$project---$branch]:-}" ) 2>&1 | tee "$logfile" # This gets the return status of the first element of the previous pipe, aka the subshell executing the deployment commands diff --git a/deployments/health-checker.sh b/deployments/health-checker.sh new file mode 100644 index 0000000..4b07170 --- /dev/null +++ b/deployments/health-checker.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash + +### Check for service availability after container startup +### To be used by deploy-types.sh + +# See https://sipb.mit.edu/doc/safe-shell/ +set -ueo pipefail + +# Calls a given url and returns 0 if the response status is HTTP 200, 1 otherwise +# Arguments: url - URL to call +function is_healthy_url() { + local url="$1" + + local response_code + response_code="$(curl -s -o /dev/null -w "%{http_code}" "$url")" + + if [ "$response_code" == "200" ]; then + echo 0 + else + echo 1 + fi + + return 0 +} + +# Periodically calls a given url until it returns HTTP 200, or max retries is reached. +# Returns 0 if health check was successful, 1 otherwise +# Arguments: url - URL to call +function health_checker() { + local url="$1" + + # According to 1 retry every 10 seconds, this will try for 5 minutes + local MAX_ATTEMPTS=26 + local RETRY_INTERVAL_SECONDS=1 + + local is_healthy_result=1 + local retry_count=0 + + while [ "$is_healthy_result" -ne 0 ] + do + + if [ "$retry_count" -eq "$MAX_ATTEMPTS" ]; then + break + fi + + echo -e "[Health Checker] Attempt $retry_count\n" + + is_healthy_result="$(is_healthy_url "$url")" + + if [ "$is_healthy_result" -eq 0 ]; then + echo -e "[Health Checker] Health Check successfull!\n" + break + fi + + echo -e "[Health Checker] Attempt ${retry_count} failed.\n" + retry_count=$((retry_count+1)) + + sleep $RETRY_INTERVAL_SECONDS + done + + if [ "$is_healthy_result" -ne 0 ]; then + echo -e "[Health Checker] Max number of retries reached. Health Check failed.\n" + fi + + return "$is_healthy_result" +} + +export -f health_checker diff --git a/deployments/project-configs.sh b/deployments/project-configs.sh index c4a0b6f..eef6044 100755 --- a/deployments/project-configs.sh +++ b/deployments/project-configs.sh @@ -12,6 +12,7 @@ configured_projects="Website-NIAEFEUP tts-fe nijobs-fe nijobs-be" declare -A project_port declare -A project_dotenv_location declare -A project_docker_flags +declare -A project_health_check_url # Website-NIAEFEUP project_port[Website-NIAEFEUP---master]=3000 @@ -38,10 +39,13 @@ project_dotenv_location[nijobs-fe---experimental]='/home/ni/niployments/deployme project_port[nijobs-be---master]=4010 project_dotenv_location[nijobs-be---master]='/home/ni/niployments/deployments/env-files/nijobs-be/master/.env.local' project_docker_flags[nijobs-be---master]='-v /home/ni/niployments/deployments/volumes-data/nijobs:/usr/src/app/static' +project_health_check_url[nijobs-be---master]='https://localhost:${project_port[nijobs-be---master]}/' ## nijobs-be staging project_port[nijobs-be---develop]=4011 project_dotenv_location[nijobs-be---develop]='/home/ni/niployments/deployments/env-files/nijobs-be/develop/.env.local' project_docker_flags[nijobs-be---develop]='-v /home/ni/niployments/deployments/volumes-data/nijobs-beta:/usr/src/app/static' +project_health_check_url[nijobs-be---develop]='https://localhost:${project_port[nijobs-be---develop]}/' + # debug example: # project_dotenv_location[nijobs-be---develop]='/home/miguel/Coding/NIAEFEUP/niployments/deployments/env-files/nijobs-be/develop/.env.local' @@ -49,4 +53,5 @@ project_docker_flags[nijobs-be---develop]='-v /home/ni/niployments/deployments/v export project_port export project_dotenv_location export project_docker_flags +export project_health_check_url export configured_projects