Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CDPT-2186 Create a new container to push static assets as a k8s job. #742

Merged
merged 15 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ AWS_S3_CUSTOM_HOST="minio.${SERVER_NAME}"
AWS_CLOUDFRONT_HOST="cdn.${SERVER_NAME}"
AWS_CLOUDFRONT_SCHEME=http

# An optional git commit hash, for running build-s3-push and using AmazonS3AndCloudFrontAssets locally.
IMAGE_TAG=git-hash

# SSH
LOCAL_SSH_PASSWORD=ssh-password

Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
--build-arg COMPOSER_USER="$COMPOSER_USER" --build-arg COMPOSER_PASS="$COMPOSER_PASS" \
--build-arg ACF_PRO_LICENSE="$ACF_PRO_LICENSE" --build-arg ACF_PRO_PASS="$ACF_PRO_PASS" \
--build-arg AS3CF_PRO_USER="$AS3CF_PRO_USER" --build-arg AS3CF_PRO_PASS="$AS3CF_PRO_PASS" \
--build-arg IMAGE_TAG="$IMAGE_TAG" \
--target build-fpm .

docker image build -t $REGISTRY/$REPOSITORY:nginx-$IMAGE_TAG \
Expand All @@ -50,11 +51,19 @@ jobs:
docker image build -t $REGISTRY/$REPOSITORY:cron-$IMAGE_TAG \
--target build-cron .

docker image build -t $REGISTRY/$REPOSITORY:s3-push-$IMAGE_TAG \
--build-arg COMPOSER_USER --build-arg COMPOSER_PASS \
--build-arg ACF_PRO_LICENSE --build-arg ACF_PRO_PASS \
--build-arg AS3CF_PRO_USER --build-arg AS3CF_PRO_PASS \
--build-arg IMAGE_TAG \
--target build-s3-push .

# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

docker image push $REGISTRY/$REPOSITORY:fpm-$IMAGE_TAG
docker image push $REGISTRY/$REPOSITORY:nginx-$IMAGE_TAG
docker image push $REGISTRY/$REPOSITORY:cron-$IMAGE_TAG
docker image push $REGISTRY/$REPOSITORY:s3-push-$IMAGE_TAG

# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ jobs:
IMAGE_TAG_NGINX: "nginx-${{ github.sha }}"
IMAGE_TAG_FPM: "fpm-${{ github.sha }}"
IMAGE_TAG_CRON: "cron-${{ github.sha }}"
IMAGE_TAG_S3_PUSH: "s3-push-${{ github.sha }}"
GOV_NOTIFY_API_KEY: ${{ secrets.GOV_NOTIFY_API_KEY }}
ACF_PRO_LICENSE: ${{ secrets.ACF_PRO_LICENSE }}
AS3CF_PRO_LICENCE: ${{ secrets.AS3CF_PRO_LICENCE }}
Expand Down
37 changes: 37 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,11 @@ COPY --from=build-fpm-composer ${path}/vendor vendor
# non-root
USER 101

# Set IMAGE_TAG at build time, we don't want this container to be run with an incorrect IMAGE_TAG.
# Set towards the end of the Dockerfile to benefit from caching.
ARG IMAGE_TAG
ENV IMAGE_TAG=$IMAGE_TAG


# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

Expand Down Expand Up @@ -301,3 +306,35 @@ USER 3001
WORKDIR /home/crooner

ENTRYPOINT ["/bin/sh", "-c", "cron-start"]

# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

# S3 Pusher

# Use the same verion as the cron (to benefit from caching).
FROM alpine:${version_cron_alpine} AS build-s3-push

ARG user=s3pusher
RUN addgroup --gid 3001 ${user} && adduser -D -G ${user} -g "${user} user" -u 3001 ${user}

RUN apk add --no-cache aws-cli jq

WORKDIR /usr/bin

COPY deploy/config/init/s3-push-start.sh ./s3-push-start
RUN chmod +x s3-push-start

USER 3001

# Go home...
WORKDIR /home/s3pusher
# Grab assets for pushing to s3
COPY --from=build-fpm-composer /var/www/html/vendor-assets ./
COPY --from=assets-build /node/dist public/app/themes/clarity/dist/

# Set IMAGE_TAG at build time, we don't want this container to be run with an incorrect IMAGE_TAG.
# Set towards the end of the Dockerfile to benefit from caching.
ARG IMAGE_TAG
ENV IMAGE_TAG=$IMAGE_TAG

ENTRYPOINT ["/bin/sh", "-c", "s3-push-start"]
31 changes: 31 additions & 0 deletions bin/local-run-s3-push.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env bash

# This script will build an s3-push image and run the container.
# When the container runs, assets will be pushed to the S3 bucket.
# This script is meant to be run locally (for testing/development) and not in the CI/CD pipeline.

# Prerequisites:
# - Have the .env file in the project root.
# - Have the minio server running locally.

# Run the script from the project root with the following command:
# $ bin/local-run-s3-push.sh

# Load the environment variables from the .env file.
set -a && source .env && set +a

docker image build -t intranet-s3-push:latest \
--build-arg COMPOSER_USER --build-arg COMPOSER_PASS \
--build-arg ACF_PRO_LICENSE --build-arg ACF_PRO_PASS \
--build-arg AS3CF_PRO_USER --build-arg AS3CF_PRO_PASS \
--build-arg IMAGE_TAG \
--target build-s3-push .

# Run the container with env vars from .env,
# and a custom S3 endpoint because we are using minio locally.
docker run --rm -it \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-e AWS_S3_BUCKET \
-e AWS_ENDPOINT_URL=http://host.docker.internal:9000 \
intranet-s3-push:latest
263 changes: 263 additions & 0 deletions deploy/config/init/s3-push-start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
#!/bin/sh

export AWS_CLI_ARGS=""
# Truncate $IMAGE_TAG to 8 chars.
export IMAGE_TAG=$(echo $IMAGE_TAG | cut -c1-8)
export S3_DESTINATION="s3://$AWS_S3_BUCKET/build/$IMAGE_TAG"
export S3_MANIFESTS="s3://$AWS_S3_BUCKET/build/manifests/"
export S3_MANIFEST="s3://$AWS_S3_BUCKET/build/manifests/$IMAGE_TAG.json"
export S3_SUMMARY="s3://$AWS_S3_BUCKET/build/manifests/summary.jsonl"
export TIMESTAMP=$(date +%s)


# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░
# 1️⃣ Function to handle errors
# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

# Accepts 2 arguments, the return code of the aws command and the command itself.
# 0: The service responded with an HTTP response status code of 200 and there were
# no errors from either the CLI or the service the request was made to.
# 1: At least one or more s3 transfers failed for the command executed.
# 2: The meaning of this return code depends on the command being run.

catch_error() {
if [ $1 -ne 0 ]; then
echo "Error: command \`$2\` failed with return code $1"
exit $1
fi
}


# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░
# 2️⃣ Prepare CLI arguments
# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

# If $AWS_ENDPOINT_URL is set and it's not an empty string, append to the AWS CLI args.
# This allows for localhost testing with minio.
if [ -n "$AWS_ENDPOINT_URL" ]; then
export AWS_CLI_ARGS="$AWS_CLI_ARGS --endpoint-url $AWS_ENDPOINT_URL"
fi


# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░
# 3️⃣ Sync files to S3
# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

echo "Syncing assets to $S3_DESTINATION ..."

# Sync the contents of the static folder to the s3 bucket
aws $AWS_CLI_ARGS s3 sync ./public $S3_DESTINATION
catch_error $? "aws s3 sync ./public $S3_DESTINATION"


# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░
# 4️⃣ Get a list of uploaded files
# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

echo "Fetching list of uploaded files..."

# Get an array of all the files that were uploaded, remove "/build/$IMAGE_TAG" from the start.
UPLOADED_FILES=$(aws $AWS_CLI_ARGS s3 ls $S3_DESTINATION/ --recursive | awk '{print $4}')
catch_error $? "aws s3 ls $S3_DESTINATION/ --recursive"


# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░
# 5️⃣ Verify file counts
# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

echo "Verifying file counts..."

# Verify that the number of files uploaded matches the number of files in the public folder.
# This is in addition to s3 sync's built-in verification.
LOCAL_FILES_COUNT=$(find ./public -type f | wc -l)
UPLOADED_FILES_COUNT=$(echo "$UPLOADED_FILES" | wc -l)

if [ $LOCAL_FILES_COUNT -ne $UPLOADED_FILES_COUNT ]; then
echo "Error: The number of uploaded files ($UPLOADED_FILES_COUNT) does not match the number of local files ($LOCAL_FILES_COUNT)"
exit 1
fi


# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░
# 6️⃣ Copy the list of uploaded files to S3
# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

echo "Copying manifest to $S3_MANIFEST..."

# Use jq to parse the line-seperated $UPLOADED_FILES variable into a json array.
echo "$UPLOADED_FILES" | jq -R -s '{timestamp: '$TIMESTAMP', build: "'$IMAGE_TAG'", files: split("\n")[:-1]}' > ./mainfest.json

aws $AWS_CLI_ARGS s3 cp ./mainfest.json $S3_MANIFEST
catch_error $? "aws s3 cp ./mainfest.json $S3_MANIFEST"


# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░
# 7️⃣ Append this manifest to the summary
# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

echo "Getting summary file..."

MANIFESTS_LS=$(aws $AWS_CLI_ARGS s3 ls $S3_MANIFESTS | awk '{print $4}')
catch_error $? "aws s3 ls $S3_MANIFESTS"

# Check if the summary file exists.
SUMMARY_EXISTS=$(echo "$MANIFESTS_LS" | grep -q "^summary.jsonl$" && echo "true" || echo "false")

if [ "$SUMMARY_EXISTS" = "true" ]; then
echo "Summary file exists. Downloading..."
aws $AWS_CLI_ARGS s3 cp $S3_SUMMARY ./summary.jsonl
catch_error $? "aws s3 cp $S3_SUMMARY ./summary.jsonl"
else
echo "Summary file does not exist. Creating..."
touch ./summary.jsonl
fi

echo "Appending manifest to summary..."
echo '{"timestamp": '$TIMESTAMP', "build": "'$IMAGE_TAG'"}' >> ./summary.jsonl

echo "Copying summary to S3..."
aws $AWS_CLI_ARGS s3 cp ./summary.jsonl $S3_SUMMARY
catch_error $? "aws s3 cp ./summary.jsonl $S3_SUMMARY"


# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░
# 8️⃣ Manage the lifecycle of old builds
# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

# Here we will:
# - Delete any builds that have been marked for deletion with the deleteAfter property
# (so long as the current time is greater than the deleteAfter value).
# - Mark all builds except for the latest 5 deletion, with a deleteAfter property.

# An example of what will happen:
# - The application checks the summary file to see if it's build is still there.
# - Let's say it is, that value is cached for an hour.
# - We mark the build for deletion in the summary file (with deleteAfter property).
# - The application's cached value will expire before the build is deleted.
# - This way, the application's cached value is never incorrect.

# This function deletes a build from the S3 bucket, accepts a build tag as an argument
delete_build () {

# 🅐 Remove the build from the summary file first.
echo "Removing build $1 from $S3_SUMMARY..."

cat ./summary.jsonl | jq -s -c 'map(select(.build != "'$1'")) .[]' > ./summary-tmp.jsonl
catch_error $? "jq removing build from summary"
mv ./summary-tmp.jsonl ./summary.jsonl

# 🅑 Copy the revised summary to S3
aws $AWS_CLI_ARGS s3 cp ./summary.jsonl $S3_SUMMARY
catch_error $? "aws s3 cp ./summary.jsonl $S3_SUMMARY"

# Next, delete the build folder from the S3 bucket.
echo "Removing build $1 from s3://$AWS_S3_BUCKET/build/$1..."

aws $AWS_CLI_ARGS s3 rm s3://$AWS_S3_BUCKET/build/$1 --recursive
catch_error $? "aws s3 rm s3://$AWS_S3_BUCKET/build/$1 --recursive"


# 🅒 Finally, remove the manifest file from the S3 bucket.
echo "Removing manifest $1 from s3://$AWS_S3_BUCKET/build/manifests/$1.json..."

aws $AWS_CLI_ARGS s3 rm s3://$AWS_S3_BUCKET/build/manifests/$1.json
catch_error $? "aws s3 rm s3://$AWS_S3_BUCKET/build/manifests/$1.json"

echo "Build $1 removed."
}

BUILDS_TO_DELETE=$(
cat ./summary.jsonl |
jq -s -c -r '
# Identfy the entries where the deleteAfter property is set
# and the current time is greater than the deleteAfter value.
map(select(.deleteAfter and .deleteAfter < '$TIMESTAMP')) |
# Get unique values by build property
unique_by(.build) |
# Return only the build property
map(.build)
.[]
'
)
catch_error $? "jq getting builds to delete"


if [ -z "$BUILDS_TO_DELETE" ]; then
BUILDS_TO_DELETE_COUNT="0"
else
BUILDS_TO_DELETE_COUNT=$(echo "$BUILDS_TO_DELETE" | wc -l)
BUILDS_TO_DELETE_CSV=$(echo "$BUILDS_TO_DELETE" | tr '\n' ',' | sed 's/,$//')

echo "Deleting the following builds: $BUILDS_TO_DELETE_CSV"

for row in $BUILDS_TO_DELETE; do
delete_build ${row}
done
fi

# This function flags a build for deletion (with the deleteAfter property) in the summary file,
# it accepts a variable of build tags (seperated by line breaks) as an argument.

flag_builds () {

echo "Marking the following builds for deletion: $BUILDS_TO_FLAG_CSV"

# 🅐 Prepare a csv string to use in jq.
BUILDS_TO_FLAG_CSV=$(echo $1 | tr '\n' ',' | sed 's/,$//')
DELETE_AFTER=$(expr $TIMESTAMP + 86400) # 24 hours from now

# 🅑 Use jq to transform the contents of summary.jsonl
cat ./summary.jsonl | jq -s -c '
map(
if .build | IN ('$BUILDS_TO_FLAG_CSV') then
. + {deleteAfter: '$DELETE_AFTER'}
else
.
end
)
.[]
' > ./summary-tmp.jsonl
catch_error $? "jq setting deleteAfter property"

mv ./summary-tmp.jsonl ./summary.jsonl

# 🅒 Copy the updated file to S3
echo "Copying summary (with builds flagged for deletion) to S3..."
aws $AWS_CLI_ARGS s3 cp ./summary.jsonl $S3_SUMMARY
catch_error $? "aws s3 cp ./summary.jsonl $S3_SUMMARY"

}

# Get the oldest builds (excluding the newest 5), they will be flagged for deletion.
BUILDS_TO_FLAG=$(
cat ./summary.jsonl |
jq -s -c '
unique_by(.build) |
sort_by(.timestamp) |
# Filter out entries where the deleteAfter property is already set
map(select(.deleteAfter == null)) |
# Get all but the last 5 builds
.[:-5] |
map(.build)
.[]
'
)
catch_error $? "jq getting builds to flag"

if [ -z "$BUILDS_TO_FLAG" ]; then
BUILDS_TO_FLAG_COUNT="0"
else
BUILDS_TO_FLAG_COUNT=$(echo "$BUILDS_TO_FLAG" | wc -l)
flag_builds $BUILDS_TO_FLAG
fi


# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░
# 9️⃣ Report on actions taken
# ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░ ░░

echo "Assets pushed to: $S3_DESTINATION"
echo "Manifest pushed to: $S3_MANIFEST"
echo "Summary pushed to: $S3_SUMMARY"
echo "Builds deleted: $BUILDS_TO_DELETE_COUNT"
echo "Builds flagged for deletion: $BUILDS_TO_FLAG_COUNT"
Loading
Loading