From 5aa2f51247bb01ab48f77228be598630a26f6150 Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Thu, 24 Aug 2023 13:40:02 -0400 Subject: [PATCH] Rename data-pipeline configurations (us-central1 migration) (#431) * Change data-pipeline cluster name * Use _CLUSTER_NAME in cloudbuild.yaml * Remove obsolete create script * Write to new etl bucket in us-central1 * Remove legacy cloudbuild config --- apply-cluster.sh | 2 +- cloudbuild.yaml | 17 +-- create-pipeline-cluster.sh | 107 ------------------ .../deployments/etl-gardener-universal.yml | 2 +- .../persistentvolumes/persistent-volumes.yml | 0 .../persistentvolumes/storage-class.yml | 0 .../services/etl-gardener-service.yml | 0 .../services/etl-gardener-status.yml | 0 8 files changed, 3 insertions(+), 125 deletions(-) delete mode 100755 create-pipeline-cluster.sh rename k8s/{data-processing => data-pipeline}/deployments/etl-gardener-universal.yml (96%) rename k8s/{data-processing => data-pipeline}/persistentvolumes/persistent-volumes.yml (100%) rename k8s/{data-processing => data-pipeline}/persistentvolumes/storage-class.yml (100%) rename k8s/{data-processing => data-pipeline}/services/etl-gardener-service.yml (100%) rename k8s/{data-processing => data-pipeline}/services/etl-gardener-status.yml (100%) diff --git a/apply-cluster.sh b/apply-cluster.sh index 48618925..a2ffc81f 100755 --- a/apply-cluster.sh +++ b/apply-cluster.sh @@ -31,7 +31,7 @@ sed -i \ # Create the configmap kubectl create configmap gardener-config --dry-run \ --from-file config/config.yml \ - -o yaml > k8s/data-processing/deployments/config.yml + -o yaml > k8s/${CLUSTER}/deployments/config.yml # Apply templates find k8s/${CLUSTER}/ -type f -exec \ diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 36d5b863..83a98e61 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -90,19 +90,4 @@ steps: ] env: - CLOUDSDK_COMPUTE_REGION=$_CLUSTER_REGION - - CLOUDSDK_CONTAINER_CLUSTER=$_CLUSTER - -# LEGACY CLUSTER: Run apply-cluster.sh -# TODO: delete once legacy cluster services are retired. -#- name: gcr.io/cloud-builders/kubectl -# id: "Deploy legacy gardener configurations" -# entrypoint: /bin/bash -# args: [ -# '-c', '/builder/kubectl.bash version && ./apply-cluster.sh' -# ] -# env: -# - CLOUDSDK_COMPUTE_ZONE=$_LEGACY_CLUSTER_ZONE -# - CLOUDSDK_CONTAINER_CLUSTER=$_LEGACY_CLUSTER -# # Defaults to zero. Only set in sandbox. -# - DATE_SKIP=$_DATE_SKIP -# - TASK_FILE_SKIP=$_TASK_FILE_SKIP + - CLOUDSDK_CONTAINER_CLUSTER=$_CLUSTER_NAME diff --git a/create-pipeline-cluster.sh b/create-pipeline-cluster.sh deleted file mode 100755 index b4e5dd4a..00000000 --- a/create-pipeline-cluster.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/bin/bash -# -# Configure cluster, network, firewall and node-pools for gardener and etl. - -set -x -set -e - -USAGE="$0 " -PROJECT=${1:?Please provide the GCP project id, e.g. mlab-sandbox: $USAGE} -REGION=${2:?Please provide the cluster region, e.g. us-central1: $USAGE} - -gcloud config set project $PROJECT -gcloud config set compute/region $REGION -gcloud config set container/cluster data-processing - -# The network for comms among the components has to be created first. -if gcloud compute networks list | grep "^data-processing "; then - # TODO - networks can be updated!! - echo "Network already exists" -else - gcloud compute networks create data-processing --subnet-mode=custom \ - --description="Network for communication among backend processing services." -fi - -# This allows internal connections between components. -if gcloud compute firewall-rules list | grep "^dp-allow-internal "; then - # TODO - firewall rules can be updated!! - echo "Firewall rule dp-allow-internal already exists" -else - gcloud compute firewall-rules create \ - dp-allow-internal --network=data-processing \ - --allow=tcp:0-65535,udp:0-65535,icmp --direction=INGRESS \ - --source-ranges=10.128.0.0/9,10.100.0.0/16 \ - --description='Allow internal traffic from anywhere' -fi - -# Then add the subnet -# Subnet has the same name and address range across projects, but each is in a distinct (data-processing) VPC network." -if gcloud compute networks subnets list --network=data-processing | grep "^dp-gardener "; then - # TODO - subnets can be updated!! - echo "subnet data-processing/dp-gardener already exists" -else - gcloud compute networks subnets create dp-gardener \ - --network=data-processing --range=10.100.0.0/16 \ - --enable-private-ip-google-access \ - --description="Subnet for gardener,etl,annotation-service." -fi - -# And define the static IP address that will be used by etl parsers to reach gardener. -if gcloud compute addresses list --filter=region=\"region:($REGION)\" | grep "^etl-gardener "; then - echo "subnet data-processing/dp-gardener already exists" -else - gcloud compute addresses create etl-gardener \ - --region=$REGION - --subnet=dp-gardener --addresses=10.100.1.2 -fi - -# Now we can create the cluster. -# It includes a default node-pool, though it isn't actually needed? -gcloud container clusters create data-processing \ - --network=data-processing --subnetwork=dp-gardener \ - --enable-autorepair --enable-autoupgrade \ - --scopes=bigquery,taskqueue,compute-rw,storage-ro,service-control,service-management,datastore \ - --num-nodes 2 --image-type=cos --machine-type=n1-standard-4 \ - --node-labels=gardener-node=true --labels=data-processing=true - - -# Define or update the role for etl-parsers -gcloud --project=$PROJECT iam roles update etl_parser --file=etl_parser_role.json -# Update service-account with the appropriate ACLS. -gcloud projects add-iam-policy-binding mlab-sandbox \ - --member=serviceAccount:etl-k8s-parser@mlab-sandbox.iam.gserviceaccount.com \ - --role=projects/mlab-sandbox/roles/parser_k8s - -# Set up node pools for parser and gardener pods. -# Parser needs write access to storage. Gardener needs only read access. -# TODO - narrow the cloud-platform scope? https://github.com/m-lab/etl-gardener/issues/308 -if gcloud container node-pools describe parser-pool; then - gcloud container node-pools update parser-pool \ - --num-nodes=1 --machine-type=n1-standard-8 \ - --enable-autorepair --enable-autoupgrade \ - --scopes storage-rw,compute-rw,datastore,cloud-platform \ - --node-labels=parser-node=true \ - --service-account=etl-k8s-parser@{$PROJECT}.iam.gserviceaccount.com -else - gcloud container node-pools create parser-pool \ - --num-nodes=1 --machine-type=n1-standard-8 \ - --enable-autorepair --enable-autoupgrade \ - --scopes storage-rw,compute-rw,datastore,cloud-platform \ - --node-labels=parser-node=true \ - --service-account=etl-k8s-parser@{$PROJECT}.iam.gserviceaccount.com -fi - -# TODO - narrow the cloud-platform scope? https://github.com/m-lab/etl-gardener/issues/308 -gcloud container node-pools create gardener-pool \ - --num-nodes=1 --machine-type=n1-standard-2 \ - --enable-autorepair --enable-autoupgrade \ - --scopes storage-ro,compute-rw,datastore,cloud-platform \ - --node-labels=gardener-node=true \ - --service-account=etl-k8s-parser@{$PROJECT}.iam.gserviceaccount.com - -# Setup node-pool for prometheus -gcloud container node-pools create prometheus-pool \ - --num-nodes=1 --machine-type=n1-standard-4 \ - --enable-autorepair --enable-autoupgrade \ - --node-labels=prometheus-node=true - diff --git a/k8s/data-processing/deployments/etl-gardener-universal.yml b/k8s/data-pipeline/deployments/etl-gardener-universal.yml similarity index 96% rename from k8s/data-processing/deployments/etl-gardener-universal.yml rename to k8s/data-pipeline/deployments/etl-gardener-universal.yml index 4b8856e9..0b8a5fa3 100644 --- a/k8s/data-processing/deployments/etl-gardener-universal.yml +++ b/k8s/data-pipeline/deployments/etl-gardener-universal.yml @@ -44,7 +44,7 @@ spec: "-project={{GCLOUD_PROJECT}}", "-shutdown_timeout=5m", "-job_expiration_time=6h", - "-input_location=etl-{{GCLOUD_PROJECT}}", # must correspond to -output_location from etl parser. + "-input_location=etl-{{GCLOUD_PROJECT}}-us-central1", # must correspond to -output_location from etl parser. ] ports: - name: prometheus-port diff --git a/k8s/data-processing/persistentvolumes/persistent-volumes.yml b/k8s/data-pipeline/persistentvolumes/persistent-volumes.yml similarity index 100% rename from k8s/data-processing/persistentvolumes/persistent-volumes.yml rename to k8s/data-pipeline/persistentvolumes/persistent-volumes.yml diff --git a/k8s/data-processing/persistentvolumes/storage-class.yml b/k8s/data-pipeline/persistentvolumes/storage-class.yml similarity index 100% rename from k8s/data-processing/persistentvolumes/storage-class.yml rename to k8s/data-pipeline/persistentvolumes/storage-class.yml diff --git a/k8s/data-processing/services/etl-gardener-service.yml b/k8s/data-pipeline/services/etl-gardener-service.yml similarity index 100% rename from k8s/data-processing/services/etl-gardener-service.yml rename to k8s/data-pipeline/services/etl-gardener-service.yml diff --git a/k8s/data-processing/services/etl-gardener-status.yml b/k8s/data-pipeline/services/etl-gardener-status.yml similarity index 100% rename from k8s/data-processing/services/etl-gardener-status.yml rename to k8s/data-pipeline/services/etl-gardener-status.yml