Delete GCP resources #236
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow is designed to delete old Google Cloud Platform (GCP) resources to save on costs. | |
# | |
# 1. Deletes specific instances in GCP older than a defined number of days. | |
# 2. Deletes instance templates older than a set number of days. | |
# 3. Deletes older disks not currently in use, with certain ones prefixed by commit hashes or "zebrad-". | |
# 4. Deletes cache images from GCP, retaining a specified number of the latest images for certain types like zebrad checkpoint cache, zebrad tip cache, and lightwalletd + zebrad tip cache. | |
# 5. Deletes unused artifacts from Google Artifact Registry older than a defined number of hours while retaining the latest few. | |
# | |
# It uses the gcloud CLI for most of its operations and also leverages specific GitHub Actions like the gcr-cleaner for deleting old images from the Google Artifact Registry. | |
# The workflow is scheduled to run daily at 0700 UTC. | |
name: Delete GCP resources | |
on: | |
# Run daily, when most devs aren't working | |
# 0700 UTC is after AEST working hours but before ET working hours | |
schedule: | |
- cron: "0 7 * * *" | |
workflow_dispatch: | |
env: | |
# Delete all resources created before $DELETE_INSTANCE_DAYS days ago. | |
# We keep this short to reduce CPU, RAM, and storage costs. | |
DELETE_INSTANCE_DAYS: 3 | |
# Delete all other resources created before $DELETE_AGE_DAYS days ago. | |
# We keep this short to reduce storage costs. | |
DELETE_AGE_DAYS: 2 | |
# But keep the latest $KEEP_LATEST_IMAGE_COUNT images of each type. | |
# We keep this small to reduce storage costs. | |
KEEP_LATEST_IMAGE_COUNT: 2 | |
# Delete all artifacts in registry created before $DELETE_IMAGE_HOURS hours ago. | |
# We keep this long enough for PRs that are still on the same commit can re-run with the same image. | |
DELETE_IMAGE_HOURS: 504h # 21 days | |
jobs: | |
delete-resources: | |
name: Delete old GCP resources | |
runs-on: ubuntu-latest | |
permissions: | |
contents: 'read' | |
id-token: 'write' | |
steps: | |
- uses: actions/[email protected] | |
with: | |
persist-credentials: false | |
# Setup gcloud CLI | |
- name: Authenticate to Google Cloud | |
id: auth | |
uses: google-github-actions/[email protected] | |
with: | |
retries: '3' | |
workload_identity_provider: '${{ vars.GCP_WIF }}' | |
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' | |
- name: Set up Cloud SDK | |
uses: google-github-actions/[email protected] | |
# Deletes all mainnet and testnet instances older than $DELETE_INSTANCE_DAYS days. | |
# | |
# We only delete instances that end in 7 or more hex characters, | |
# to avoid deleting managed instance groups and manually created instances. | |
# | |
# ${INSTANCE_AND_ZONE} expands to: | |
# <instance-name> --zone=<zone-name> | |
# so it can't be shell-quoted. | |
- name: Delete old instances | |
run: | | |
./.github/workflows/scripts/gcp-delete-old-instances.sh | |
# Deletes all the instance templates older than $DELETE_AGE_DAYS days. | |
- name: Delete old instance templates | |
run: | | |
./.github/workflows/scripts/gcp-delete-old-templates.sh | |
# Deletes all mainnet and testnet disks older than $DELETE_AGE_DAYS days. | |
# | |
# Disks that are attached to an instance template can't be deleted, so it is safe to try to delete all disks here. | |
# | |
# ${DISK_AND_LOCATION} expands to: | |
# <disk-name> --[zone|region]=<location-name> | |
# so it can't be shell-quoted. | |
- name: Delete old disks | |
run: | | |
./.github/workflows/scripts/gcp-delete-old-disks.sh | |
# Deletes mainnet and testnet cache images older than $DELETE_AGE_DAYS days. | |
# | |
# Keeps all images younger than $DELETE_AGE_DAYS. | |
# Also keeps $KEEP_LATEST_IMAGE_COUNT older images of each type, for each network: | |
# - zebrad checkpoint cache | |
# - zebrad tip cache | |
# - lightwalletd + zebrad tip cache | |
# | |
# TODO: | |
# - refactor out repeated shell script code | |
- name: Delete old cache images | |
run: | | |
./.github/workflows/scripts/gcp-delete-old-cache-images.sh | |
# We're using a generic approach here, which allows multiple registries to be included, | |
# even those not related to GCP. Enough reason to create a separate job. | |
# | |
# The same artifacts are used for both mainnet and testnet. | |
clean-registries: | |
name: Delete unused artifacts in registry | |
runs-on: ubuntu-latest | |
permissions: | |
contents: 'read' | |
id-token: 'write' | |
steps: | |
- uses: actions/[email protected] | |
with: | |
persist-credentials: false | |
# Setup gcloud CLI | |
- name: Authenticate to Google Cloud | |
id: auth | |
uses: google-github-actions/[email protected] | |
with: | |
retries: '3' | |
workload_identity_provider: '${{ vars.GCP_WIF }}' | |
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' | |
token_format: 'access_token' | |
- name: Login to Google Artifact Registry | |
uses: docker/[email protected] | |
with: | |
registry: us-docker.pkg.dev | |
username: oauth2accesstoken | |
password: ${{ steps.auth.outputs.access_token }} | |
# Deletes all images older than $DELETE_IMAGE_HOURS days. | |
- uses: 'docker://us-docker.pkg.dev/gcr-cleaner/gcr-cleaner/gcr-cleaner-cli' | |
continue-on-error: true # TODO: remove after fixig https://github.com/ZcashFoundation/zebra/issues/5933 | |
# Refer to the official documentation to understand available arguments: | |
# https://github.com/GoogleCloudPlatform/gcr-cleaner | |
with: | |
args: >- | |
-repo=us-docker.pkg.dev/${{ vars.GCP_PROJECT }}/zebra/zebrad-test | |
-grace=${{ env.DELETE_IMAGE_HOURS }} | |
-keep=${{ env.KEEP_LATEST_IMAGE_COUNT }} |