From afdbd3467c6a05252938f38cfd1ad38ebeb38a22 Mon Sep 17 00:00:00 2001 From: Michele Zanotti Date: Thu, 3 Oct 2024 15:12:06 +0200 Subject: [PATCH] minor fixes, update doc --- .terraform-docs.yml | 20 +++++++-------- README.md | 47 +++++++++++++++++----------------- main.tf | 8 +++++- pod.yaml | 13 ++++++++++ tests/dev-provisioning/main.tf | 13 ++++++---- variables.tf | 21 +++++++++++++++ 6 files changed, 83 insertions(+), 39 deletions(-) create mode 100644 pod.yaml diff --git a/.terraform-docs.yml b/.terraform-docs.yml index 828e7e5..426a77a 100644 --- a/.terraform-docs.yml +++ b/.terraform-docs.yml @@ -37,7 +37,7 @@ content: |- > before using this Terraform module, ensure that you have your Nebuly credentials ready. > These credentials are necessary to activate your installation and should be provided as input via the `nebuly_credentials` input. - To get started with Nebuly installation on AWS, you can follow the steps below. + To get started with Nebuly installation on GCP, you can follow the steps below. These instructions will guide you through the installation using Nebuly's default standard configuration with the Nebuly Helm Chart. @@ -49,7 +49,7 @@ content: |- For configuration examples, you can refer to the [Examples](#examples). - Once the Terraform changes are applied, proceed with the next steps to deploy Nebuly on the provisioned Elastic Kubernetes Service (EKS) cluster. + Once the Terraform changes are applied, proceed with the next steps to deploy Nebuly on the provisioned Google Kubernetes Engine (GKE) cluster. ### 2. Connect to the GKE Cluster @@ -82,7 +82,7 @@ content: |- Create a Kubernetes [Image Pull Secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/) for authenticating with your Docker registry and pulling the Nebuly Docker images. - ### 4. Bootstrap EKS cluster + ### 4. Bootstrap GKE cluster Retrieve the auto-generated values from the Terraform outputs and save them to a file named `values-bootstrap.yaml`: @@ -90,12 +90,12 @@ content: |- terraform output helm_values_bootstrap ``` - Install the bootstrap Helm chart to set up all the dependencies required for installing the Nebuly Platform Helm chart on EKS. + Install the bootstrap Helm chart to set up all the dependencies required for installing the Nebuly Platform Helm chart on GKE. - Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree/main/bootstrap-aws) for all the configuration details. + Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree/main/bootstrap-gcp) for all the configuration details. ```shell - helm install oci://ghcr.io/nebuly-ai/helm-charts/bootstrap-aws \ + helm install oci://ghcr.io/nebuly-ai/helm-charts/bootstrap-gcp \ --namespace nebuly-bootstrap \ --generate-name \ --create-namespace \ @@ -104,7 +104,7 @@ content: |- ### 5. Create Secret Provider Class - Create a Secret Provider Class to allow EKS to fetch credentials from the provisioned Key Vault. + Create a Secret Provider Class to allow GKE to fetch credentials from the provisioned Key Vault. * Get the Secret Provider Class YAML definition from the Terraform module outputs: ```shell @@ -136,7 +136,7 @@ content: |- helm install oci://ghcr.io/nebuly-ai/helm-charts/nebuly-platform \ --namespace nebuly \ -f values.yaml \ - --timeout 10m \ + --timeout 16m \ ``` @@ -145,13 +145,13 @@ content: |- ### 7. Access Nebuly - Retrieve the external Load Balancer DNS name to access the Nebuly Platform: + Retrieve the external Load Balancer IP address to access the Nebuly Platform: ```shell kubectl get svc -n nebuly-bootstrap -o jsonpath='{range .items[?(@.status.loadBalancer.ingress)]}{.status.loadBalancer.ingress[0].ip}{"\n"}{end}' ``` - You can then register a DNS CNAME record pointing to the Load Balancer DNS name to access Nebuly via the custom domain you provided + You can then register a DNS A record pointing to the Load Balancer IP address to access Nebuly via the custom domain you provided in the input variable `platform_domain`. diff --git a/README.md b/README.md index 2115cfd..e99618c 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Available on [Terraform Registry](https://registry.terraform.io/modules/nebuly-a > before using this Terraform module, ensure that you have your Nebuly credentials ready. > These credentials are necessary to activate your installation and should be provided as input via the `nebuly_credentials` input. -To get started with Nebuly installation on AWS, you can follow the steps below. +To get started with Nebuly installation on GCP, you can follow the steps below. These instructions will guide you through the installation using Nebuly's default standard configuration with the Nebuly Helm Chart. @@ -33,7 +33,7 @@ Import Nebuly into your Terraform root module, provide the necessary variables, For configuration examples, you can refer to the [Examples](#examples). -Once the Terraform changes are applied, proceed with the next steps to deploy Nebuly on the provisioned Elastic Kubernetes Service (EKS) cluster. +Once the Terraform changes are applied, proceed with the next steps to deploy Nebuly on the provisioned Google Kubernetes Engine (GKE) cluster. ### 2. Connect to the GKE Cluster @@ -66,7 +66,7 @@ The auto-generated Helm values use the name defined in the k8s_image_pull_secret Create a Kubernetes [Image Pull Secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/) for authenticating with your Docker registry and pulling the Nebuly Docker images. -### 4. Bootstrap EKS cluster +### 4. Bootstrap GKE cluster Retrieve the auto-generated values from the Terraform outputs and save them to a file named `values-bootstrap.yaml`: @@ -74,12 +74,12 @@ Retrieve the auto-generated values from the Terraform outputs and save them to a terraform output helm_values_bootstrap ``` -Install the bootstrap Helm chart to set up all the dependencies required for installing the Nebuly Platform Helm chart on EKS. +Install the bootstrap Helm chart to set up all the dependencies required for installing the Nebuly Platform Helm chart on GKE. -Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree/main/bootstrap-aws) for all the configuration details. +Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree/main/bootstrap-gcp) for all the configuration details. ```shell -helm install oci://ghcr.io/nebuly-ai/helm-charts/bootstrap-aws \ +helm install oci://ghcr.io/nebuly-ai/helm-charts/bootstrap-gcp \ --namespace nebuly-bootstrap \ --generate-name \ --create-namespace \ @@ -88,7 +88,7 @@ helm install oci://ghcr.io/nebuly-ai/helm-charts/bootstrap-aws \ ### 5. Create Secret Provider Class -Create a Secret Provider Class to allow EKS to fetch credentials from the provisioned Key Vault. +Create a Secret Provider Class to allow GKE to fetch credentials from the provisioned Key Vault. * Get the Secret Provider Class YAML definition from the Terraform module outputs: ```shell @@ -120,7 +120,7 @@ Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree helm install oci://ghcr.io/nebuly-ai/helm-charts/nebuly-platform \ --namespace nebuly \ -f values.yaml \ - --timeout 10m \ + --timeout 16m \ ``` @@ -129,13 +129,13 @@ helm install oci://ghcr.io/nebuly-ai/helm-charts/nebuly-platform \ ### 7. Access Nebuly -Retrieve the external Load Balancer DNS name to access the Nebuly Platform: +Retrieve the external Load Balancer IP address to access the Nebuly Platform: ```shell kubectl get svc -n nebuly-bootstrap -o jsonpath='{range .items[?(@.status.loadBalancer.ingress)]}{.status.loadBalancer.ingress[0].ip}{"\n"}{end}' ``` -You can then register a DNS CNAME record pointing to the Load Balancer DNS name to access Nebuly via the custom domain you provided +You can then register a DNS A record pointing to the Load Balancer IP address to access Nebuly via the custom domain you provided in the input variable `platform_domain`. @@ -172,7 +172,7 @@ You can find examples of code that uses this Terraform module in the [examples]( | [gke\_delete\_protection](#input\_gke\_delete\_protection) | Whether the GKE Cluster should have delete protection enabled. | `bool` | `true` | no | | [gke\_kubernetes\_version](#input\_gke\_kubernetes\_version) | The used Kubernetes version for the GKE cluster. | `string` | `"1.30.3"` | no | | [gke\_nebuly\_namespaces](#input\_gke\_nebuly\_namespaces) | The namespaces used by Nebuly installation. Update this if you use custom namespaces in the Helm chart installation. | `set(string)` |
[
"nebuly",
"nebuly-bootstrap"
]
| no | -| [gke\_node\_pools](#input\_gke\_node\_pools) | The node Pools used by the GKE cluster. |
map(object({
machine_type = string
min_nodes = number
max_nodes = number
node_count = number
node_locations = optional(set(string), null)
preemptible = optional(bool, false)
guest_accelerator = optional(object({
type = string
count = number
}), null)
}))
|
{
"gpu-primary": {
"machine_type": "g2-standard-4",
"max_nodes": 1,
"min_nodes": 0,
"node_count": null
},
"web-services": {
"machine_type": "n2-highmem-4",
"max_nodes": 1,
"min_nodes": 1,
"node_count": 1
}
}
| no | +| [gke\_node\_pools](#input\_gke\_node\_pools) | The node Pools used by the GKE cluster. |
map(object({
machine_type = string
min_nodes = number
max_nodes = number
node_count = number
node_locations = optional(set(string), null)
preemptible = optional(bool, false)
labels = optional(map(string), {})
guest_accelerator = optional(object({
type = string
count = number
}), null)
}))
|
{
"gpu-primary": {
"guest_accelerator": {
"count": 1,
"type": "nvidia-l4"
},
"labels": {
"gke-no-default-nvidia-gpu-device-plugin": true
},
"machine_type": "g2-standard-4",
"max_nodes": 1,
"min_nodes": 0,
"node_count": null
},
"gpu-secondary": {
"guest_accelerator": {
"count": 1,
"type": "nvidia-tesla-t4"
},
"labels": {
"gke-no-default-nvidia-gpu-device-plugin": true
},
"machine_type": "n1-standard-4",
"max_nodes": 1,
"min_nodes": 1,
"node_count": null
},
"web-services": {
"machine_type": "n2-highmem-4",
"max_nodes": 1,
"min_nodes": 1,
"node_count": 1
}
}
| no | | [gke\_service\_account\_name](#input\_gke\_service\_account\_name) | The name of the Kubernetes Service Account used by Nebuly installation. | `string` | `"nebuly"` | no | | [k8s\_image\_pull\_secret\_name](#input\_k8s\_image\_pull\_secret\_name) | The name of the Kubernetes Image Pull Secret to use.
This value will be used to auto-generate the values.yaml file for installing the Nebuly Platform Helm chart. | `string` | `"nebuly-docker-pull"` | no | | [labels](#input\_labels) | Common labels that will be applied to all resources. | `map(string)` | `{}` | no | @@ -201,20 +201,20 @@ You can find examples of code that uses this Terraform module in the [examples]( - resource.google_compute_subnetwork.main (/terraform-docs/main.tf#50) - resource.google_container_cluster.main (/terraform-docs/main.tf#206) - resource.google_container_node_pool.main (/terraform-docs/main.tf#253) -- resource.google_project_iam_binding.gke_cluster_admin (/terraform-docs/main.tf#303) -- resource.google_project_iam_member.gke_secret_accessors (/terraform-docs/main.tf#292) -- resource.google_secret_manager_secret.jwt_signing_key (/terraform-docs/main.tf#320) -- resource.google_secret_manager_secret.nebuly_client_id (/terraform-docs/main.tf#347) -- resource.google_secret_manager_secret.nebuly_client_secret (/terraform-docs/main.tf#359) -- resource.google_secret_manager_secret.openai_api_key (/terraform-docs/main.tf#335) +- resource.google_project_iam_binding.gke_cluster_admin (/terraform-docs/main.tf#321) +- resource.google_project_iam_member.gke_secret_accessors (/terraform-docs/main.tf#298) +- resource.google_secret_manager_secret.jwt_signing_key (/terraform-docs/main.tf#338) +- resource.google_secret_manager_secret.nebuly_client_id (/terraform-docs/main.tf#365) +- resource.google_secret_manager_secret.nebuly_client_secret (/terraform-docs/main.tf#377) +- resource.google_secret_manager_secret.openai_api_key (/terraform-docs/main.tf#353) - resource.google_secret_manager_secret.postgres_analytics_password (/terraform-docs/main.tf#150) - resource.google_secret_manager_secret.postgres_analytics_username (/terraform-docs/main.tf#138) - resource.google_secret_manager_secret.postgres_auth_password (/terraform-docs/main.tf#191) - resource.google_secret_manager_secret.postgres_auth_username (/terraform-docs/main.tf#179) -- resource.google_secret_manager_secret_version.jwt_signing_key (/terraform-docs/main.tf#328) -- resource.google_secret_manager_secret_version.nebuly_client_id (/terraform-docs/main.tf#355) -- resource.google_secret_manager_secret_version.nebuly_client_secret (/terraform-docs/main.tf#367) -- resource.google_secret_manager_secret_version.openai_api_key (/terraform-docs/main.tf#343) +- resource.google_secret_manager_secret_version.jwt_signing_key (/terraform-docs/main.tf#346) +- resource.google_secret_manager_secret_version.nebuly_client_id (/terraform-docs/main.tf#373) +- resource.google_secret_manager_secret_version.nebuly_client_secret (/terraform-docs/main.tf#385) +- resource.google_secret_manager_secret_version.openai_api_key (/terraform-docs/main.tf#361) - resource.google_secret_manager_secret_version.postgres_analytics_password (/terraform-docs/main.tf#158) - resource.google_secret_manager_secret_version.postgres_analytics_username (/terraform-docs/main.tf#146) - resource.google_secret_manager_secret_version.postgres_auth_password (/terraform-docs/main.tf#199) @@ -226,10 +226,11 @@ You can find examples of code that uses this Terraform module in the [examples]( - resource.google_sql_database_instance.main (/terraform-docs/main.tf#82) - resource.google_sql_user.analytics (/terraform-docs/main.tf#133) - resource.google_sql_user.auth (/terraform-docs/main.tf#174) -- resource.google_storage_bucket.main (/terraform-docs/main.tf#373) +- resource.google_storage_bucket.main (/terraform-docs/main.tf#391) +- resource.google_storage_bucket_iam_binding.gke_storage_object_user (/terraform-docs/main.tf#309) - resource.random_password.analytics (/terraform-docs/main.tf#128) - resource.random_password.auth (/terraform-docs/main.tf#169) -- resource.tls_private_key.jwt_signing_key (/terraform-docs/main.tf#316) +- resource.tls_private_key.jwt_signing_key (/terraform-docs/main.tf#334) - data source.google_compute_zones.available (/terraform-docs/main.tf#23) - data source.google_container_engine_versions.main (/terraform-docs/main.tf#24) - data source.google_project.current (/terraform-docs/main.tf#22) diff --git a/main.tf b/main.tf index 4d88a48..bd3dbf2 100644 --- a/main.tf +++ b/main.tf @@ -280,11 +280,17 @@ resource "google_container_node_pool" "main" { service_account = google_service_account.gke_node_pool.email + labels = each.value.labels + dynamic "guest_accelerator" { for_each = each.value.guest_accelerator == null ? {} : { "" : each.value.guest_accelerator } content { type = guest_accelerator.value.type count = guest_accelerator.value.count + + gpu_driver_installation_config { + gpu_driver_version = "INSTALLATION_DISABLED" + } } } } @@ -302,7 +308,7 @@ resource "google_project_iam_member" "gke_secret_accessors" { } resource "google_storage_bucket_iam_binding" "gke_storage_object_user" { bucket = google_storage_bucket.main.name - role = "roles/storage.objectUser" + role = "roles/storage.objectUser" members = [ for namespace in var.gke_nebuly_namespaces : "principal://iam.googleapis.com/projects/${data.google_project.current.number}/locations/global/workloadIdentityPools/${data.google_project.current.project_id}.svc.id.goog/subject/ns/${namespace}/sa/${var.gke_service_account_name}" diff --git a/pod.yaml b/pod.yaml new file mode 100644 index 0000000..d9bdd6c --- /dev/null +++ b/pod.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Pod +metadata: + name: nginx-gpu-pod +spec: + containers: + - name: nginx-container + image: nginx:latest + resources: + limits: + nvidia.com/gpu: 1 # Request 1 GPU + ports: + - containerPort: 80 diff --git a/tests/dev-provisioning/main.tf b/tests/dev-provisioning/main.tf index 07e1052..4547f3f 100644 --- a/tests/dev-provisioning/main.tf +++ b/tests/dev-provisioning/main.tf @@ -21,6 +21,12 @@ provider "google" { variable "region" { type = string } +variable "nebuly_credentials" { + type = object({ + client_id = string + client_secret = string + }) +} # ------ Main ------ # @@ -45,11 +51,8 @@ module "platform" { openai_endpoint = "https://api.openai.com" openai_gpt4_deployment_name = "gpt-4" - platform_domain = "platform.gcp.testing.nebuly.com" - nebuly_credentials = { - client_id = "my-client-id" - client_secret = "my-client-secret" - } + platform_domain = "platform.gcp.testing.nebuly.com" + nebuly_credentials = var.nebuly_credentials } output "gke_cluster_get_credentials" { diff --git a/variables.tf b/variables.tf index 2f1aa82..e8b09dc 100644 --- a/variables.tf +++ b/variables.tf @@ -153,6 +153,7 @@ variable "gke_node_pools" { node_count = number node_locations = optional(set(string), null) preemptible = optional(bool, false) + labels = optional(map(string), {}) guest_accelerator = optional(object({ type = string count = number @@ -170,6 +171,26 @@ variable "gke_node_pools" { min_nodes = 0 max_nodes = 1 node_count = null + guest_accelerator = { + type = "nvidia-l4" + count = 1 + } + labels = { + "gke-no-default-nvidia-gpu-device-plugin" : true, + } + } + "gpu-secondary" : { + machine_type = "n1-standard-4" + min_nodes = 1 + max_nodes = 1 + node_count = null + guest_accelerator = { + type = "nvidia-tesla-t4" + count = 1 + } + labels = { + "gke-no-default-nvidia-gpu-device-plugin" : true, + } } } }