Skip to content
This repository has been archived by the owner on May 23, 2024. It is now read-only.

Commit

Permalink
Implement Health Checks w/ Rollback
Browse files Browse the repository at this point in the history
Added health checks for nijobs-be on deployment
  • Loading branch information
imnotteixeira committed Mar 30, 2022
1 parent 4801cf6 commit c360fb7
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 4 deletions.
1 change: 1 addition & 0 deletions deployments/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
!deploy.sh
!deploy-types.sh
!spam-deploys.sh
!health-checker.sh
!README.md
# Also, the project configurations!
!project-configs.sh
Expand Down
54 changes: 52 additions & 2 deletions deployments/deploy-types.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@ function to_lower_case() {
echo "$1" | tr '[:upper:]' '[:lower:]'
}

# shellcheck source=deployments/health-checker.sh
source "$deploy_curr_dir/health-checker.sh"

# For deploying stuff with docker, simply put.
function deploy_default() {
# (dotenv_location is not mandatory)
local project="$1" branch="$2" image_tag port="$3" dotenv_location="${4:-}" docker_flags="${5:-}"
# (dotenv_location, docker_flags and health_check_url are not mandatory)
local project="$1" branch="$2" image_tag port="$3" dotenv_location="${4:-}" docker_flags="${5:-}" health_check_url="${6:-}"
image_tag="$(to_lower_case "$project---$branch" )"

# If we have a dotenv file specified, copy it into the current directory (in case of error, `cp` prints something so no need to echo anything)
Expand Down Expand Up @@ -75,6 +78,53 @@ function deploy_default() {
return "$run_status"
fi

local health_check_result
health_check_result=1
if [ $health_check_url ]; then
echo -e "Starting health check...\n"
health_checker $health_check_url || true
health_check_result="$?"
fi

if [ "$health_check_result" != 0 ]; then
>&2 echo -e "\n###-> ERROR! Service did not pass the health check! Rolling back to previous container!"

local new_container_id
new_container_id="$(docker ps -aq --filter ancestor="$image_tag")"

local new_image_id
new_image_id="$(docker images -q "$image_tag")"

docker stop "$new_container_id" &>/dev/null
printf "New container exit code: "
docker wait "$new_container_id"
echo -e "\n###-> New container stopped.\n"

>&2 echo "###-> Retagging old image and starting old container back up"
if [[ -n "$old_image_id" ]]; then
docker tag "$old_image_id" "$image_tag"
else
echo "###->> No old image found for retagging!!"
fi
if [[ -n "$old_container_id" ]]; then
docker start "$old_container_id"
else
echo "###->> No old container found for starting back up!!"
fi

echo "###->> Removing new (un-healthy) container"
docker rm "$new_container_id"

if [[ "$(docker images -q "$image_tag")" == "$new_image_id" ]]; then
echo "###-> Not removing image, as the container was run using the same one (build did a full cache hit)"
else
printf "old image id: "
docker rmi "$new_image_id"
fi

return 1
fi

# Cleanup
echo -e "\n###-> New container now running successfuly, removing old container and image!"
if [[ -n "$old_container_id" ]]; then
Expand Down
4 changes: 2 additions & 2 deletions deployments/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ branch="${2:-master}"
# shellcheck source=utils/utils.sh
source "$deploy_curr_dir/../utils/utils.sh"

# Getting project configurations (configured_projects, project_port and project_dotenv_location)
# Getting project configurations (configured_projects, project_port, project_dotenv_location and project_health_check_url)
# shellcheck source=deployments/project-configs.sh
source "$deploy_curr_dir/project-configs.sh"

Expand Down Expand Up @@ -111,7 +111,7 @@ set +e
echo

# Passing in the configs from ./project-configs.sh. dotenv_location and docker_flags might not be set so sending instead an empty variable ("") so that the 'unbound variable' error does not occur
deploy_default "$project" "$branch" "${project_port[$project---$branch]}" "${project_dotenv_location[$project---$branch]:-}" "${project_docker_flags[$project---$branch]:-}"
deploy_default "$project" "$branch" "${project_port[$project---$branch]}" "${project_dotenv_location[$project---$branch]:-}" "${project_docker_flags[$project---$branch]:-}" "${project_health_check_url[$project---$branch]:-}"
) 2>&1 | tee "$logfile"

# This gets the return status of the first element of the previous pipe, aka the subshell executing the deployment commands
Expand Down
68 changes: 68 additions & 0 deletions deployments/health-checker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env bash

### Check for service availability after container startup
### To be used by deploy-types.sh

# See https://sipb.mit.edu/doc/safe-shell/
set -ueo pipefail

# Calls a given url and returns 0 if the response status is HTTP 200, 1 otherwise
# Arguments: url - URL to call
function is_healthy_url() {
local url="$1"

local response_code
response_code="$(curl -s -o /dev/null -w "%{http_code}" "$url")"

if [ "$response_code" == "200" ]; then
echo 0
else
echo 1
fi

return 0
}

# Periodically calls a given url until it returns HTTP 200, or max retries is reached.
# Returns 0 if health check was successful, 1 otherwise
# Arguments: url - URL to call
function health_checker() {
local url="$1"

# According to 1 retry every 10 seconds, this will try for 5 minutes
local MAX_ATTEMPTS=26
local RETRY_INTERVAL_SECONDS=1

local is_healthy_result=1
local retry_count=0

while [ "$is_healthy_result" -ne 0 ]
do

if [ "$retry_count" -eq "$MAX_ATTEMPTS" ]; then
break
fi

echo -e "[Health Checker] Attempt $retry_count\n"

is_healthy_result="$(is_healthy_url "$url")"

if [ "$is_healthy_result" -eq 0 ]; then
echo -e "[Health Checker] Health Check successfull!\n"
break
fi

echo -e "[Health Checker] Attempt ${retry_count} failed.\n"
retry_count=$((retry_count+1))

sleep $RETRY_INTERVAL_SECONDS
done

if [ "$is_healthy_result" -ne 0 ]; then
echo -e "[Health Checker] Max number of retries reached. Health Check failed.\n"
fi

return "$is_healthy_result"
}

export -f health_checker
5 changes: 5 additions & 0 deletions deployments/project-configs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ configured_projects="Website-NIAEFEUP tts-fe nijobs-fe nijobs-be"
declare -A project_port
declare -A project_dotenv_location
declare -A project_docker_flags
declare -A project_health_check_url

# Website-NIAEFEUP
project_port[Website-NIAEFEUP---master]=3000
Expand All @@ -38,15 +39,19 @@ project_dotenv_location[nijobs-fe---experimental]='/home/ni/niployments/deployme
project_port[nijobs-be---master]=4010
project_dotenv_location[nijobs-be---master]='/home/ni/niployments/deployments/env-files/nijobs-be/master/.env.local'
project_docker_flags[nijobs-be---master]='-v /home/ni/niployments/deployments/volumes-data/nijobs:/usr/src/app/static'
project_health_check_url[nijobs-be---master]='https://localhost:${project_port[nijobs-be---master]}/'
## nijobs-be staging
project_port[nijobs-be---develop]=4011
project_dotenv_location[nijobs-be---develop]='/home/ni/niployments/deployments/env-files/nijobs-be/develop/.env.local'
project_docker_flags[nijobs-be---develop]='-v /home/ni/niployments/deployments/volumes-data/nijobs-beta:/usr/src/app/static'
project_health_check_url[nijobs-be---develop]='https://localhost:${project_port[nijobs-be---develop]}/'

# debug example:
# project_dotenv_location[nijobs-be---develop]='/home/miguel/Coding/NIAEFEUP/niployments/deployments/env-files/nijobs-be/develop/.env.local'

# Essential, duh! :)
export project_port
export project_dotenv_location
export project_docker_flags
export project_health_check_url
export configured_projects

0 comments on commit c360fb7

Please sign in to comment.