From 9db114d4caa8a5f0ca30c30eef52aa3626bb3ef1 Mon Sep 17 00:00:00 2001 From: gleb Date: Mon, 30 Sep 2024 17:01:37 +0200 Subject: [PATCH] wip --- examples/gke/gke_impersonate/imperson.tf | 88 +++-------- examples/gke/gke_impersonate/main.tf | 180 ++++++++++++---------- examples/gke/gke_impersonate/providers.tf | 5 +- 3 files changed, 128 insertions(+), 145 deletions(-) diff --git a/examples/gke/gke_impersonate/imperson.tf b/examples/gke/gke_impersonate/imperson.tf index 1b16f379..ae999da3 100644 --- a/examples/gke/gke_impersonate/imperson.tf +++ b/examples/gke/gke_impersonate/imperson.tf @@ -1,74 +1,34 @@ - locals { - service_account_id = "castai-gke-tf-${substr(sha1(var.cluster_name), 0, 8)}" + service_account_id = "castai-gke-tf-${substr(sha1(var.cluster_name), 0, 8)}" } -resource "google_project_iam_custom_role" "gke_access_role" { - role_id = "castai.gkeAccess" - title = "Custom Role for GKE Access" - description = "Custom role for managing GKE resources" - project = var.project_id +data "castai_gke_user_policies" "gke" {} - permissions = [ - "container.clusters.get", - "container.clusters.update", - "container.certificateSigningRequests.approve", - "container.operations.get", - "compute.instances.get", - "compute.instances.list", - "compute.instances.create", - "compute.instances.start", - "compute.instances.stop", - "compute.instances.delete", - "compute.instances.setLabels", - "compute.instances.setServiceAccount", - "compute.instances.setMetadata", - "compute.instances.setTags", - "compute.instanceGroupManagers.get", - "compute.instanceGroupManagers.update", - "compute.instanceGroups.get", - "compute.networks.use", - "compute.networks.useExternalIp", - "compute.subnetworks.get", - "compute.subnetworks.use", - "compute.subnetworks.useExternalIp", - "compute.addresses.use", - "compute.disks.use", - "compute.disks.create", - "compute.disks.setLabels", - "compute.images.get", - "compute.images.useReadOnly", - "compute.instanceTemplates.get", - "compute.instanceTemplates.list", - "compute.instanceTemplates.create", - "compute.instanceTemplates.delete", - "compute.regionOperations.get", - "compute.zoneOperations.get", - "compute.zones.list", - "compute.zones.get", - "serviceusage.services.list", - "resourcemanager.projects.getIamPolicy" - ] +data "google_project" "project" { + project_id = var.project_id } -resource "google_project_iam_member" "gke_access_sa_binding" { - project = var.project_id - role = "projects/${var.project_id}/roles/castai.gkeAccess" - member = "serviceAccount:${google_service_account.client_service_account.email}" - condition { - title = "AlwaysTrueCondition" - description = "This condition is always true" - expression = "true" - } -} - - resource "google_service_account" "client_service_account" { account_id = local.service_account_id display_name = "Service account to manage ${var.cluster_name} cluster via CAST" project = var.project_id } +resource "google_project_iam_custom_role" "castai_role" { + role_id = "castai.gkeAccess.${substr(sha1(var.cluster_name), 0, 8)}.tf" + title = "Role to manage GKE cluster via CAST AI" + description = "Role to manage GKE cluster via CAST AI" + permissions = toset(data.castai_gke_user_policies.gke.policy) + project = var.project_id + stage = "GA" +} + +resource "google_project_iam_binding" "compute_manager_binding" { + project = var.project_id + role = "projects/${var.project_id}/roles/castai.gkeAccess.${substr(sha1(var.cluster_name), 0, 8)}.tf" + members = ["serviceAccount:${google_service_account.client_service_account.email}"] +} + # Configure GKE cluster and obtain the castai service account. resource "castai_gke_cluster_id" "cluster_id" { name = var.cluster_name @@ -101,11 +61,11 @@ resource "google_service_account_iam_member" "impersonation_user_binding" { role = "roles/iam.serviceAccountUser" member = "serviceAccount:${castai_gke_cluster_id.cluster_id.cast_service_account}" -# condition { -# title = "SpecificServiceAccountCondition" -# description = "Allow impersonation only for CASTAI_SERVICE_ACCOUNT" -# expression = "request.auth.claims.email == \"${castai_gke_cluster_id.cluster_id.cast_service_account}\"" -# } + # condition { + # title = "SpecificServiceAccountCondition" + # description = "Allow impersonation only for CASTAI_SERVICE_ACCOUNT" + # expression = "request.auth.claims.email == \"${castai_gke_cluster_id.cluster_id.cast_service_account}\"" + # } condition { title = "AlwaysTrueCondition" description = "This condition is always true" diff --git a/examples/gke/gke_impersonate/main.tf b/examples/gke/gke_impersonate/main.tf index 698150db..b8c87a51 100644 --- a/examples/gke/gke_impersonate/main.tf +++ b/examples/gke/gke_impersonate/main.tf @@ -1,84 +1,110 @@ -locals { - service_account_id = "castai-gke-tf-${substr(sha1(var.cluster_name), 0, 8)}" -} - -data "google_client_config" "default" {} - -resource "google_service_account" "client_service_account" { - account_id = local.service_account_id - display_name = "Service account to manage ${var.cluster_name} cluster via CAST" - project = var.project_id -} - -# Configure GKE cluster and obtain the castai service account. -resource "castai_gke_cluster_id" "cluster_id" { - name = var.cluster_name - location = var.cluster_region - project_id = var.project_id - client_service_account = google_service_account.client_service_account.email - cast_service_account = "to-be-computed" -} - -# Grant the roles/iam.serviceAccountTokenCreator role to the CASTAI_SERVICE_ACCOUNT -resource "google_service_account_iam_member" "token_creator_binding" { - service_account_id = google_service_account.client_service_account.name - role = "roles/iam.serviceAccountTokenCreator" - member = "serviceAccount:${castai_gke_cluster_id.cluster_id.cast_service_account}" - - condition { - title = "AlwaysTrueCondition" - description = "This condition is always true" - expression = "true" - } +module "castai_gke_cluster" { + source = "../../../../terraform-castai-gke-cluster" + # # source = "castai/gke-cluster/castai" - depends_on = [castai_gke_cluster_id.cluster_id] -} + api_url = var.castai_api_url + castai_api_token = var.castai_api_token + grpc_url = var.castai_grpc_url + wait_for_cluster_ready = true -# Grant the roles/iam.serviceAccountUser role to the CASTAI_SERVICE_ACCOUNT with a specific condition -resource "google_service_account_iam_member" "impersonation_user_binding" { - service_account_id = google_service_account.client_service_account.name - role = "roles/iam.serviceAccountUser" - member = "serviceAccount:${castai_gke_cluster_id.cluster_id.cast_service_account}" + project_id = var.project_id + gke_cluster_name = var.cluster_name + gke_cluster_location = module.gke.location - condition { - title = "SpecificServiceAccountCondition" - description = "Allow impersonation only for CASTAI_SERVICE_ACCOUNT" - expression = "request.auth.claims.email == \"${castai_gke_cluster_id.cluster_id.cast_service_account}\"" - } + gke_credentials = "{}" + delete_nodes_on_disconnect = var.delete_nodes_on_disconnect - depends_on = [castai_gke_cluster_id.cluster_id] -} - -module "castai_gke_cluster" { - source = "../../../../terraform-castai-gke-cluster" - # source = "castai/gke-cluster/castai" - - project_id = var.project_id - gke_cluster_name = var.cluster_name - gke_cluster_location = module.gke.location - client_service_account_email = google_service_account.client_service_account.email - delete_nodes_on_disconnect = var.delete_nodes_on_disconnect - gke_credentials = "{}" - api_url = var.castai_api_url - castai_api_token = var.castai_api_token - default_node_configuration = module.castai_gke_cluster.castai_node_configurations["default"] + default_node_configuration_name = "default" node_configurations = { default = { disk_cpu_ratio = 25 subnets = [module.vpc.subnets_ids[0]] - tags = { - "node-config" : "default" - } + tags = var.tags + } - max_pods_per_node = 110 + test_node_config = { + disk_cpu_ratio = 10 + subnets = [module.vpc.subnets_ids[0]] + tags = var.tags + max_pods_per_node = 40 + disk_type = "pd-ssd", network_tags = ["dev"] - disk_type = "pd-balanced" - } + } + node_templates = { - configuration_id = module.castai_gke_cluster.castai_node_configurations["default"] + default_by_castai = { + name = "default-by-castai" + configuration_name = "default" + is_default = true + is_enabled = true + should_taint = false + + constraints = { + on_demand = true + spot = true + use_spot_fallbacks = true + + enable_spot_diversity = false + spot_diversity_price_increase_limit_percent = 20 + } + } + + spot_tmpl = { + configuration_id = module.castai_gke_cluster.castai_node_configurations["default"] + is_enabled = true + should_taint = true + + custom_labels = { + custom-label-key-1 = "custom-label-value-1" + custom-label-key-2 = "custom-label-value-2" + } + + custom_taints = [ + { + key = "custom-taint-key-1" + value = "custom-taint-value-1" + effect = "NoSchedule" + }, + { + key = "custom-taint-key-2" + value = "custom-taint-value-2" + effect = "NoSchedule" + } + ] + + constraints = { + fallback_restore_rate_seconds = 1800 + spot = true + use_spot_fallbacks = true + min_cpu = 4 + max_cpu = 100 + instance_families = { + exclude = ["e2"] + } + compute_optimized_state = "disabled" + storage_optimized_state = "disabled" + # Optional: define custom priority for instances selection. + # + # 1. Prioritize C2D and C2 spot instances above all else, regardless of price. + # 2. If C2D and C2 is not available, try C3D family. + custom_priority = [ + { + instance_families = ["c2d", "c2"] + spot = true + }, + { + instance_families = ["c3d"] + spot = true + } + # 3. instances not matching any of custom priority groups will be tried after + # nothing matches from priority groups. + ] + } + custom_instances_enabled = true + } } autoscaler_settings = { @@ -87,18 +113,6 @@ module "castai_gke_cluster" { unschedulable_pods = { enabled = true - - headroom = { - enabled = true - cpu_percentage = 10 - memory_percentage = 10 - } - - headroom_spot = { - enabled = true - cpu_percentage = 10 - memory_percentage = 10 - } } node_downscaler = { @@ -110,7 +124,7 @@ module "castai_gke_cluster" { evictor = { aggressive_mode = false - cycle_interval = "5s10s" + cycle_interval = "5m10s" dry_run = false enabled = true node_grace_period_minutes = 10 @@ -127,4 +141,10 @@ module "castai_gke_cluster" { } } } + + // depends_on helps terraform with creating proper dependencies graph in case of resource creation and in this case destroy + // module "castai-gke-cluster" has to be destroyed before module "castai-gke-iam" and "module.gke" + depends_on = [ + google_service_account_iam_member.token_creator_binding, + google_service_account_iam_member.impersonation_user_binding] } diff --git a/examples/gke/gke_impersonate/providers.tf b/examples/gke/gke_impersonate/providers.tf index 55a1e957..77d9d6b1 100644 --- a/examples/gke/gke_impersonate/providers.tf +++ b/examples/gke/gke_impersonate/providers.tf @@ -1,3 +1,6 @@ +# Configure Data sources and providers required for CAST AI connection. +data "google_client_config" "default" {} + provider "castai" { api_url = var.castai_api_url api_token = var.castai_api_token @@ -9,4 +12,4 @@ provider "helm" { token = data.google_client_config.default.access_token cluster_ca_certificate = base64decode(module.gke.ca_certificate) } -} +} \ No newline at end of file