-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adds a reusable GKE module --------- Signed-off-by: Pris Nasrat <[email protected]> Co-authored-by: Pris Nasrat <[email protected]>
- Loading branch information
Showing
3 changed files
with
441 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,303 @@ | ||
terraform { | ||
required_providers { | ||
google = { | ||
source = "hashicorp/google" | ||
} | ||
} | ||
} | ||
|
||
# The default service account applied to all cluster node pools | ||
resource "google_service_account" "cluster_default" { | ||
account_id = "${var.name}-gke-default" | ||
display_name = "${var.name} GKE Default" | ||
project = var.project | ||
} | ||
|
||
resource "google_project_iam_member" "cluster" { | ||
for_each = merge({ | ||
# Read access to the project GCR | ||
"gcr" = "roles/storage.objectViewer" | ||
# Read access to the project ACR | ||
"acr" = "roles/artifactregistry.reader" | ||
# Log writer access | ||
"log-writer" = "roles/logging.logWriter" | ||
# Metrics writer access | ||
"metrics-writer" = "roles/monitoring.metricWriter" | ||
# Monitoring viewer access | ||
"monitoring-viewer" = "roles/monitoring.viewer" | ||
}, var.extra_roles) | ||
|
||
project = var.project | ||
role = each.value | ||
member = "serviceAccount:${google_service_account.cluster_default.email}" | ||
} | ||
|
||
locals { | ||
default_labels = { | ||
"gke" : var.name | ||
} | ||
|
||
squad_label = { | ||
"squad" : var.squad | ||
} | ||
} | ||
|
||
resource "google_container_cluster" "this" { | ||
name = var.name | ||
project = var.project | ||
|
||
network = var.network | ||
subnetwork = var.subnetwork | ||
|
||
location = var.region | ||
node_locations = var.zones | ||
|
||
enable_intranode_visibility = true | ||
|
||
remove_default_node_pool = true | ||
initial_node_count = 1 | ||
|
||
# Use Dataplane V2 (eBPF based networking) | ||
datapath_provider = "ADVANCED_DATAPATH" | ||
|
||
networking_mode = "VPC_NATIVE" | ||
// Keeping this empty means GKE handles the secondary pod/service CIDR creation | ||
ip_allocation_policy {} | ||
|
||
workload_identity_config { | ||
workload_pool = "${var.project}.svc.id.goog" | ||
} | ||
|
||
release_channel { | ||
# NOTE: Toggle to "RAPID" when we want to start playing with things like gcsfuse | ||
channel = var.release_channel | ||
} | ||
|
||
# Configured with separate node_pool resources | ||
# node_config {} | ||
|
||
dynamic "cluster_autoscaling" { | ||
for_each = var.cluster_autoscaling == false ? [] : ["placeholder"] | ||
|
||
content { | ||
enabled = var.cluster_autoscaling | ||
resource_limits { | ||
resource_type = var.cluster_autoscaling_cpu_limits.resource_type | ||
minimum = var.cluster_autoscaling_cpu_limits.minimum | ||
maximum = var.cluster_autoscaling_cpu_limits.maximum | ||
} | ||
resource_limits { | ||
resource_type = var.cluster_autoscaling_memory_limits.resource_type | ||
minimum = var.cluster_autoscaling_memory_limits.minimum | ||
maximum = var.cluster_autoscaling_memory_limits.maximum | ||
} | ||
dynamic "auto_provisioning_defaults" { | ||
for_each = var.cluster_autoscaling_provisioning_defaults == null ? [] : ["placeholder"] | ||
|
||
content { | ||
service_account = google_service_account.cluster_default.email | ||
disk_size = var.cluster_autoscaling_provisioning_defaults.disk_size | ||
disk_type = var.cluster_autoscaling_provisioning_defaults.disk_type | ||
|
||
dynamic "shielded_instance_config" { | ||
for_each = var.cluster_autoscaling_provisioning_defaults.shielded_instance_config == null ? [] : ["placeholder"] | ||
|
||
content { | ||
enable_secure_boot = var.cluster_autoscaling_provisioning_defaults.shielded_instance_config.enable_secure_boot | ||
enable_integrity_monitoring = var.cluster_autoscaling_provisioning_defaults.shielded_instance_config.enable_integrity_monitoring | ||
} | ||
} | ||
dynamic "management" { | ||
for_each = var.cluster_autoscaling_provisioning_defaults.management == null ? [] : ["placeholder"] | ||
|
||
content { | ||
auto_upgrade = var.cluster_autoscaling_provisioning_defaults.management.auto_upgrade | ||
auto_repair = var.cluster_autoscaling_provisioning_defaults.management.auto_repair | ||
} | ||
} | ||
} | ||
} | ||
autoscaling_profile = var.cluster_autoscaling_profile | ||
} | ||
} | ||
|
||
master_authorized_networks_config { | ||
# gcp_public_cidrs_access_enabled = true | ||
cidr_blocks { | ||
display_name = "Everywhere" | ||
cidr_block = "0.0.0.0/0" | ||
} | ||
|
||
# TODO: Pin this to https://api.github.com/meta | ||
# Github recommends against doing this, so maybe there's a more effective way, perhaps a certain scale with a tail? | ||
# cidr_blocks {} | ||
} | ||
|
||
private_cluster_config { | ||
enable_private_nodes = true | ||
enable_private_endpoint = false | ||
master_ipv4_cidr_block = var.master_ipv4_cidr_block | ||
master_global_access_config { | ||
enabled = true | ||
} | ||
# This doesn't do what you think it does | ||
# private_endpoint_subnetwork = var.subnetwork | ||
} | ||
|
||
dns_config { | ||
# Enable more efficient DNS resolution by leveraging the GCP backplane (instead of kube-dns) | ||
# Technically this adds cloud DNS billing, but the cost is negligible | ||
# https://cloud.google.com/kubernetes-engine/docs/how-to/cloud-dns | ||
cluster_dns = "CLOUD_DNS" | ||
cluster_dns_scope = "CLUSTER_SCOPE" | ||
} | ||
|
||
# TODO: These probably could be configurable | ||
addons_config { | ||
http_load_balancing { | ||
disabled = false | ||
} | ||
gke_backup_agent_config { | ||
enabled = false | ||
} | ||
config_connector_config { | ||
enabled = false | ||
} | ||
gcs_fuse_csi_driver_config { | ||
enabled = true | ||
} | ||
} | ||
|
||
monitoring_config { | ||
enable_components = ["SYSTEM_COMPONENTS", "APISERVER", "SCHEDULER", "CONTROLLER_MANAGER", "STORAGE", "POD"] | ||
managed_prometheus { enabled = true } | ||
|
||
} | ||
|
||
# This can't hurt... right? | ||
cost_management_config { | ||
enabled = true | ||
} | ||
|
||
timeouts { | ||
create = "30m" | ||
update = "30m" | ||
delete = "30m" | ||
} | ||
|
||
lifecycle { | ||
ignore_changes = [initial_node_count] | ||
} | ||
|
||
depends_on = [google_service_account.cluster_default] | ||
} | ||
|
||
resource "google_container_node_pool" "pools" { | ||
for_each = var.pools | ||
provider = google-beta | ||
|
||
name = each.key | ||
cluster = google_container_cluster.this.name | ||
project = var.project | ||
location = google_container_cluster.this.location | ||
|
||
network_config { | ||
enable_private_nodes = false | ||
create_pod_range = true | ||
pod_ipv4_cidr_block = null | ||
} | ||
|
||
node_config { | ||
service_account = google_service_account.cluster_default.email | ||
image_type = "COS_CONTAINERD" | ||
machine_type = each.value.machine_type | ||
workload_metadata_config { | ||
# Run the GKE metadata server on these nodes (required for workload identity) | ||
mode = "GKE_METADATA" | ||
} | ||
metadata = { | ||
disable-legacy-endpoints = true | ||
block-project-ssh-keys = true | ||
} | ||
|
||
disk_type = each.value.disk_type | ||
disk_size_gb = each.value.disk_size | ||
|
||
dynamic "ephemeral_storage_local_ssd_config" { | ||
for_each = each.value.ephemeral_storage_local_ssd_count > 0 ? [1] : [] | ||
content { | ||
local_ssd_count = each.value.ephemeral_storage_local_ssd_count | ||
} | ||
} | ||
|
||
# Don't set legacy scopes | ||
# oauth_scopes = [] | ||
|
||
# Enable google vNIC driver | ||
gvnic { | ||
enabled = true | ||
} | ||
|
||
# Enable google container filesystem (required for image streaming) | ||
gcfs_config { | ||
enabled = true | ||
} | ||
|
||
dynamic "sandbox_config" { | ||
for_each = each.value.gvisor ? [1] : [] | ||
content { | ||
sandbox_type = "gvisor" | ||
} | ||
} | ||
|
||
spot = each.value.spot | ||
labels = each.value.labels | ||
resource_labels = merge(local.default_labels, local.squad_label) | ||
|
||
dynamic "taint" { | ||
for_each = each.value.taints | ||
content { | ||
key = taint.value.key | ||
value = taint.value.value | ||
effect = taint.value.effect | ||
} | ||
} | ||
} | ||
|
||
autoscaling { | ||
min_node_count = each.value.min_node_count | ||
max_node_count = each.value.max_node_count | ||
} | ||
|
||
management { | ||
auto_repair = true | ||
auto_upgrade = true | ||
} | ||
} | ||
|
||
# Allow GKE master to hit non 443 ports for webhook/admission controllers | ||
# | ||
# https://github.com/kubernetes/kubernetes/issues/79739 | ||
resource "google_compute_firewall" "master_webhook" { | ||
project = var.project | ||
network = var.network | ||
|
||
name = "${var.name}-master-webhook" | ||
description = "Allow GKE master to hit non 443 ports for webhook/admission controllers" | ||
direction = "INGRESS" | ||
|
||
source_ranges = ["${google_container_cluster.this.endpoint}/32"] | ||
source_tags = [] | ||
target_tags = ["gke-${google_container_cluster.this.name}"] | ||
|
||
allow { | ||
protocol = "tcp" | ||
ports = [ | ||
"8443", | ||
"9443", | ||
"15017", | ||
] | ||
} | ||
|
||
depends_on = [google_container_cluster.this] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
output "cluster_name" { | ||
value = google_container_cluster.this.name | ||
} | ||
|
||
output "cluster_id" { | ||
value = google_container_cluster.this.id | ||
} | ||
|
||
output "service_account_email" { | ||
value = google_service_account.cluster_default.email | ||
} | ||
|
||
output "cluster_endpoint" { | ||
value = google_container_cluster.this.endpoint | ||
sensitive = true | ||
} | ||
|
||
output "cluster_ca_certificate" { | ||
value = google_container_cluster.this.master_auth[0].cluster_ca_certificate | ||
sensitive = true | ||
} |
Oops, something went wrong.