diff --git a/deployments/.gitignore b/deployments/.gitignore index 0fcfd64..659dd44 100644 --- a/deployments/.gitignore +++ b/deployments/.gitignore @@ -4,6 +4,7 @@ !deploy.sh !deploy-types.sh !spam-deploys.sh +!health-checker.sh !README.md # Also, the project configurations! !project-configs.sh diff --git a/deployments/README.md b/deployments/README.md index af47b2a..6e21bad 100644 --- a/deployments/README.md +++ b/deployments/README.md @@ -21,6 +21,10 @@ The `port` is mandatory, but the `env_file_path` and `docker_flags` are optional The paths provided in the said file **MUST** be **absolute** (trust me you don't want to handle bash's path spaghetti :upside_down_face: :wink:). +#### Deployment Health Checks and Rollbacks + +You can set a `project_health_check_url` per project, which will be used to assert the service is running properly after deployment. The deploy script will query the url (using cURL) every 10 seconds until it returns HTTP 200, for a maximum of 5 minutes. It is advised that you pass a url served by the respective service which only returns 200 when the service is in good operating state. This variable is optional. If it is not provided, the health check will not be done. + ## Notes `docker system prune` should be run periodically to clean up dangling images and containers. The deployment scripts attempt to minimize the number of these but some are left on purpose due to speeding up multi-stage builds. diff --git a/deployments/deploy-types.sh b/deployments/deploy-types.sh index ea49845..4269635 100755 --- a/deployments/deploy-types.sh +++ b/deployments/deploy-types.sh @@ -10,10 +10,17 @@ function to_lower_case() { echo "$1" | tr '[:upper:]' '[:lower:]' } +# Adaptation of https://stackoverflow.com/questions/192292/how-best-to-include-other-scripts +deploy_curr_dir="${BASH_SOURCE%/*}" +if [[ ! -d "$deploy_curr_dir" ]]; then deploy_curr_dir="${0%/*}"; fi + +# shellcheck source=/dev/null +source "$deploy_curr_dir/health-checker.sh" + # For deploying stuff with docker, simply put. function deploy_default() { - # (dotenv_location is not mandatory) - local project="$1" branch="$2" image_tag port="$3" dotenv_location="${4:-}" docker_flags="${5:-}" + # (dotenv_location, docker_flags and health_check_url are not mandatory) + local project="$1" branch="$2" image_tag port="$3" dotenv_location="${4:-}" docker_flags="${5:-}" health_check_url="${6:-}" image_tag="$(to_lower_case "$project---$branch" )" # If we have a dotenv file specified, copy it into the current directory (in case of error, `cp` prints something so no need to echo anything) @@ -28,7 +35,10 @@ function deploy_default() { old_container_id="$(docker ps -aq --filter ancestor="$image_tag")" echo -e "Starting docker build\n" - docker build -f Dockerfile-prod -t "$image_tag" . + + local new_image_id + new_image_id=$(docker build -q -f Dockerfile-prod -t "$image_tag" .) + local build_status="$?" # Disabled as this meant that no dependencies could be cached. Instead run `docker system prune` periodically to clear up disk space if necessary. @@ -57,7 +67,10 @@ function deploy_default() { else echo -e "#-> No docker flags specified.\n" fi - echo "${docker_flags:-} -d --restart=unless-stopped --env PORT=80 -p $port:80 $image_tag" | xargs docker run + + local new_container_id + new_container_id=$(echo "${docker_flags:-} -d --restart=unless-stopped --env PORT=80 -p $port:80 $image_tag" | xargs docker run) + local run_status="$?" if [ "$run_status" != 0 ]; then >&2 echo -e "\n###-> ERROR! Run failed!" @@ -75,6 +88,47 @@ function deploy_default() { return "$run_status" fi + local health_check_result=0 + if [[ -n "$health_check_url" ]]; then + echo -e "###->Starting health check...\n" + # This is done this way due to the use of set -e above. + # If the command is successful, the || won't run, so the default value is 0 + # if the command is not successful, we need the || so that the script does not exit immediately + health_checker "$health_check_url" || health_check_result="$?" + + if [ "$health_check_result" != 0 ]; then + >&2 echo -e "\n###-> ERROR! Service did not pass the health check! Rolling back to previous container!" + + docker stop "$new_container_id" &>/dev/null + docker wait "$new_container_id" + echo -e "\n###-> New container stopped.\n" + + >&2 echo "###-> Retagging old image and starting old container back up" + if [[ -n "$old_image_id" ]]; then + docker tag "$old_image_id" "$image_tag" + else + >&2 echo "###->> No old image found for retagging!!" + fi + if [[ -n "$old_container_id" ]]; then + docker start "$old_container_id" + else + >&2 echo "###->> No old container found for starting back up!!" + fi + + >&2 echo "###->> Removing new (un-healthy) container" + docker rm "$new_container_id" + + if [[ "$(docker images -q "$image_tag")" == "$new_image_id" ]]; then + >&2 echo "###-> Not removing image, as the container was run using the same one (build did a full cache hit)" + else + printf "Removed new image with id: " + docker rmi "$new_image_id" + fi + + return 1 + fi + fi + # Cleanup echo -e "\n###-> New container now running successfuly, removing old container and image!" if [[ -n "$old_container_id" ]]; then @@ -89,7 +143,7 @@ function deploy_default() { echo "###-> Not removing image, as the container was run using the same one (build did a full cache hit)" else printf "old image id: " - docker rmi "$old_image_id" + docker rmi "$old_image_id" fi else echo "###-> No old image found, so none removed." diff --git a/deployments/deploy.sh b/deployments/deploy.sh index 129b2c7..ab47829 100755 --- a/deployments/deploy.sh +++ b/deployments/deploy.sh @@ -24,7 +24,7 @@ branch="${2:-master}" # shellcheck source=utils/utils.sh source "$deploy_curr_dir/../utils/utils.sh" -# Getting project configurations (configured_projects, project_port and project_dotenv_location) +# Getting project configurations (configured_projects, project_port, project_dotenv_location and project_health_check_url) # shellcheck source=deployments/project-configs.sh source "$deploy_curr_dir/project-configs.sh" @@ -111,7 +111,7 @@ set +e echo # Passing in the configs from ./project-configs.sh. dotenv_location and docker_flags might not be set so sending instead an empty variable ("") so that the 'unbound variable' error does not occur - deploy_default "$project" "$branch" "${project_port[$project---$branch]}" "${project_dotenv_location[$project---$branch]:-}" "${project_docker_flags[$project---$branch]:-}" + deploy_default "$project" "$branch" "${project_port[$project---$branch]}" "${project_dotenv_location[$project---$branch]:-}" "${project_docker_flags[$project---$branch]:-}" "${project_health_check_url[$project---$branch]:-}" ) 2>&1 | tee "$logfile" # This gets the return status of the first element of the previous pipe, aka the subshell executing the deployment commands diff --git a/deployments/health-checker.sh b/deployments/health-checker.sh new file mode 100755 index 0000000..695e64a --- /dev/null +++ b/deployments/health-checker.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +### Check for service availability after container startup +### To be used by deploy-types.sh + +# See https://sipb.mit.edu/doc/safe-shell/ +set -ueo pipefail + +# Calls a given url and returns 0 if the response status is HTTP 200, 1 otherwise +# Arguments: url - URL to call +function is_healthy_url() { + local url="$1" + + local response_code + response_code="$(curl -s -o /dev/null -w "%{http_code}" "$url")" + + if [ "$response_code" == "200" ]; then + echo 0 + else + echo 1 + fi +} + +# Periodically calls a given url until it returns HTTP 200, or max retries is reached. +# Returns 0 if health check was successful, 1 otherwise +# Arguments: url - URL to call +function health_checker() { + local url="$1" + + # According to 1 retry every 10 seconds, this will try for 5 minutes + local MAX_ATTEMPTS=31 + local RETRY_INTERVAL_SECONDS=10 + + local is_healthy_result=1 + local retry_count=0 + + while [ "$is_healthy_result" -ne 0 ] + do + + if [ "$retry_count" -eq "$MAX_ATTEMPTS" ]; then + break + fi + + echo -e "[Health Checker] Attempt $retry_count\n" + + is_healthy_result="$(is_healthy_url "$url")" + + if [ "$is_healthy_result" -eq 0 ]; then + echo -e "[Health Checker] Health Check successfull!\n" + break + fi + + echo -e "[Health Checker] Attempt ${retry_count} failed.\n" + retry_count=$((retry_count+1)) + + sleep $RETRY_INTERVAL_SECONDS + done + + if [ "$is_healthy_result" -ne 0 ]; then + echo -e "[Health Checker] Max number of retries reached. Health Check failed.\n" + fi + + return "$is_healthy_result" +} + +export -f health_checker diff --git a/deployments/project-configs.sh b/deployments/project-configs.sh index c4a0b6f..faf740f 100755 --- a/deployments/project-configs.sh +++ b/deployments/project-configs.sh @@ -12,6 +12,7 @@ configured_projects="Website-NIAEFEUP tts-fe nijobs-fe nijobs-be" declare -A project_port declare -A project_dotenv_location declare -A project_docker_flags +declare -A project_health_check_url # Website-NIAEFEUP project_port[Website-NIAEFEUP---master]=3000 @@ -38,10 +39,13 @@ project_dotenv_location[nijobs-fe---experimental]='/home/ni/niployments/deployme project_port[nijobs-be---master]=4010 project_dotenv_location[nijobs-be---master]='/home/ni/niployments/deployments/env-files/nijobs-be/master/.env.local' project_docker_flags[nijobs-be---master]='-v /home/ni/niployments/deployments/volumes-data/nijobs:/usr/src/app/static' +project_health_check_url[nijobs-be---master]="https://localhost:${project_port[nijobs-be---master]}/" ## nijobs-be staging project_port[nijobs-be---develop]=4011 project_dotenv_location[nijobs-be---develop]='/home/ni/niployments/deployments/env-files/nijobs-be/develop/.env.local' project_docker_flags[nijobs-be---develop]='-v /home/ni/niployments/deployments/volumes-data/nijobs-beta:/usr/src/app/static' +project_health_check_url[nijobs-be---develop]="https://localhost:${project_port[nijobs-be---develop]}/" + # debug example: # project_dotenv_location[nijobs-be---develop]='/home/miguel/Coding/NIAEFEUP/niployments/deployments/env-files/nijobs-be/develop/.env.local' @@ -49,4 +53,5 @@ project_docker_flags[nijobs-be---develop]='-v /home/ni/niployments/deployments/v export project_port export project_dotenv_location export project_docker_flags +export project_health_check_url export configured_projects