minor fixes, update doc

nebuly-ai · Oct 3, 2024 · afdbd34 · afdbd34
1 parent f5ae546
commit afdbd34
Show file tree

Hide file tree

Showing 6 changed files with 83 additions and 39 deletions.
diff --git a/.terraform-docs.yml b/.terraform-docs.yml
@@ -37,7 +37,7 @@ content: |-
   > before using this Terraform module, ensure that you have your Nebuly credentials ready. 
   > These credentials are necessary to activate your installation and should be provided as input via the `nebuly_credentials` input.
 
-  To get started with Nebuly installation on AWS, you can follow the steps below. 
+  To get started with Nebuly installation on GCP, you can follow the steps below. 
 
   These instructions will guide you through the installation using Nebuly's default standard configuration with the Nebuly Helm Chart.
 
@@ -49,7 +49,7 @@ content: |-
 
   For configuration examples, you can refer to the [Examples](#examples). 
 
-  Once the Terraform changes are applied, proceed with the next steps to deploy Nebuly on the provisioned Elastic Kubernetes Service (EKS) cluster.
+  Once the Terraform changes are applied, proceed with the next steps to deploy Nebuly on the provisioned Google Kubernetes Engine (GKE) cluster.
 
   ### 2. Connect to the GKE Cluster 
 
@@ -82,20 +82,20 @@ content: |-
   Create a Kubernetes [Image Pull Secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/) for 
   authenticating with your Docker registry and pulling the Nebuly Docker images.
 
-  ### 4. Bootstrap EKS cluster
+  ### 4. Bootstrap GKE cluster
 
   Retrieve the auto-generated values from the Terraform outputs and save them to a file named `values-bootstrap.yaml`:
 
   ```shell
   terraform output helm_values_bootstrap
   ```
 
-  Install the bootstrap Helm chart to set up all the dependencies required for installing the Nebuly Platform Helm chart on EKS.
+  Install the bootstrap Helm chart to set up all the dependencies required for installing the Nebuly Platform Helm chart on GKE.
 
-  Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree/main/bootstrap-aws) for all the configuration details.
+  Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree/main/bootstrap-gcp) for all the configuration details.
 
   ```shell
-  helm install oci://ghcr.io/nebuly-ai/helm-charts/bootstrap-aws \
+  helm install oci://ghcr.io/nebuly-ai/helm-charts/bootstrap-gcp \
     --namespace nebuly-bootstrap \
     --generate-name \
     --create-namespace \
@@ -104,7 +104,7 @@ content: |-
 
 
   ### 5. Create Secret Provider Class
-  Create a Secret Provider Class to allow EKS to fetch credentials from the provisioned Key Vault.
+  Create a Secret Provider Class to allow GKE to fetch credentials from the provisioned Key Vault.
 
   * Get the Secret Provider Class YAML definition from the Terraform module outputs:
     ```shell
@@ -136,7 +136,7 @@ content: |-
   helm install oci://ghcr.io/nebuly-ai/helm-charts/nebuly-platform \
     --namespace nebuly \
     -f values.yaml \
-    --timeout 10m \
+    --timeout 16m \
     <your-release-name> 
   ```
 
@@ -145,13 +145,13 @@ content: |-
 
   ### 7. Access Nebuly
 
-  Retrieve the external Load Balancer DNS name to access the Nebuly Platform:
+  Retrieve the external Load Balancer IP address to access the Nebuly Platform:
 
   ```shell
   kubectl get svc -n nebuly-bootstrap -o jsonpath='{range .items[?(@.status.loadBalancer.ingress)]}{.status.loadBalancer.ingress[0].ip}{"\n"}{end}'
   ```
 
-  You can then register a DNS CNAME record pointing to the Load Balancer DNS name to access Nebuly via the custom domain you provided 
+  You can then register a DNS A record pointing to the Load Balancer IP address to access Nebuly via the custom domain you provided 
   in the input variable `platform_domain`.
 
 

diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ Available on [Terraform Registry](https://registry.terraform.io/modules/nebuly-a
 > before using this Terraform module, ensure that you have your Nebuly credentials ready. 
 > These credentials are necessary to activate your installation and should be provided as input via the `nebuly_credentials` input.
 
-To get started with Nebuly installation on AWS, you can follow the steps below. 
+To get started with Nebuly installation on GCP, you can follow the steps below. 
 
 These instructions will guide you through the installation using Nebuly's default standard configuration with the Nebuly Helm Chart.
 
@@ -33,7 +33,7 @@ Import Nebuly into your Terraform root module, provide the necessary variables,
 
 For configuration examples, you can refer to the [Examples](#examples). 
 
-Once the Terraform changes are applied, proceed with the next steps to deploy Nebuly on the provisioned Elastic Kubernetes Service (EKS) cluster.
+Once the Terraform changes are applied, proceed with the next steps to deploy Nebuly on the provisioned Google Kubernetes Engine (GKE) cluster.
 
 ### 2. Connect to the GKE Cluster 
 
@@ -66,20 +66,20 @@ The auto-generated Helm values use the name defined in the k8s_image_pull_secret
 Create a Kubernetes [Image Pull Secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/) for 
 authenticating with your Docker registry and pulling the Nebuly Docker images.
 
-### 4. Bootstrap EKS cluster
+### 4. Bootstrap GKE cluster
 
 Retrieve the auto-generated values from the Terraform outputs and save them to a file named `values-bootstrap.yaml`:
 
 ```shell
 terraform output helm_values_bootstrap
 ```
 
-Install the bootstrap Helm chart to set up all the dependencies required for installing the Nebuly Platform Helm chart on EKS.
+Install the bootstrap Helm chart to set up all the dependencies required for installing the Nebuly Platform Helm chart on GKE.
 
-Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree/main/bootstrap-aws) for all the configuration details.
+Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree/main/bootstrap-gcp) for all the configuration details.
 
 ```shell
-helm install oci://ghcr.io/nebuly-ai/helm-charts/bootstrap-aws \
+helm install oci://ghcr.io/nebuly-ai/helm-charts/bootstrap-gcp \
   --namespace nebuly-bootstrap \
   --generate-name \
   --create-namespace \
@@ -88,7 +88,7 @@ helm install oci://ghcr.io/nebuly-ai/helm-charts/bootstrap-aws \
 
 
 ### 5. Create Secret Provider Class
-Create a Secret Provider Class to allow EKS to fetch credentials from the provisioned Key Vault.
+Create a Secret Provider Class to allow GKE to fetch credentials from the provisioned Key Vault.
 
 * Get the Secret Provider Class YAML definition from the Terraform module outputs:
   ```shell
@@ -120,7 +120,7 @@ Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree
 helm install oci://ghcr.io/nebuly-ai/helm-charts/nebuly-platform \
   --namespace nebuly \
   -f values.yaml \
-  --timeout 10m \
+  --timeout 16m \
   <your-release-name> 
 ```
 
@@ -129,13 +129,13 @@ helm install oci://ghcr.io/nebuly-ai/helm-charts/nebuly-platform \
 
 ### 7. Access Nebuly
 
-Retrieve the external Load Balancer DNS name to access the Nebuly Platform:
+Retrieve the external Load Balancer IP address to access the Nebuly Platform:
 
 ```shell
 kubectl get svc -n nebuly-bootstrap -o jsonpath='{range .items[?(@.status.loadBalancer.ingress)]}{.status.loadBalancer.ingress[0].ip}{"\n"}{end}'
 ```
 
-You can then register a DNS CNAME record pointing to the Load Balancer DNS name to access Nebuly via the custom domain you provided 
+You can then register a DNS A record pointing to the Load Balancer IP address to access Nebuly via the custom domain you provided 
 in the input variable `platform_domain`.
 
 
@@ -172,7 +172,7 @@ You can find examples of code that uses this Terraform module in the [examples](
 | <a name="input_gke_delete_protection"></a> [gke\_delete\_protection](#input\_gke\_delete\_protection) | Whether the GKE Cluster should have delete protection enabled. | `bool` | `true` | no |
 | <a name="input_gke_kubernetes_version"></a> [gke\_kubernetes\_version](#input\_gke\_kubernetes\_version) | The used Kubernetes version for the GKE cluster. | `string` | `"1.30.3"` | no |
 | <a name="input_gke_nebuly_namespaces"></a> [gke\_nebuly\_namespaces](#input\_gke\_nebuly\_namespaces) | The namespaces used by Nebuly installation. Update this if you use custom namespaces in the Helm chart installation. | `set(string)` | <pre>[<br/>  "nebuly",<br/>  "nebuly-bootstrap"<br/>]</pre> | no |
-| <a name="input_gke_node_pools"></a> [gke\_node\_pools](#input\_gke\_node\_pools) | The node Pools used by the GKE cluster. | <pre>map(object({<br/>    machine_type   = string<br/>    min_nodes      = number<br/>    max_nodes      = number<br/>    node_count     = number<br/>    node_locations = optional(set(string), null)<br/>    preemptible    = optional(bool, false)<br/>    guest_accelerator = optional(object({<br/>      type  = string<br/>      count = number<br/>    }), null)<br/>  }))</pre> | <pre>{<br/>  "gpu-primary": {<br/>    "machine_type": "g2-standard-4",<br/>    "max_nodes": 1,<br/>    "min_nodes": 0,<br/>    "node_count": null<br/>  },<br/>  "web-services": {<br/>    "machine_type": "n2-highmem-4",<br/>    "max_nodes": 1,<br/>    "min_nodes": 1,<br/>    "node_count": 1<br/>  }<br/>}</pre> | no |
+| <a name="input_gke_node_pools"></a> [gke\_node\_pools](#input\_gke\_node\_pools) | The node Pools used by the GKE cluster. | <pre>map(object({<br/>    machine_type   = string<br/>    min_nodes      = number<br/>    max_nodes      = number<br/>    node_count     = number<br/>    node_locations = optional(set(string), null)<br/>    preemptible    = optional(bool, false)<br/>    labels         = optional(map(string), {})<br/>    guest_accelerator = optional(object({<br/>      type  = string<br/>      count = number<br/>    }), null)<br/>  }))</pre> | <pre>{<br/>  "gpu-primary": {<br/>    "guest_accelerator": {<br/>      "count": 1,<br/>      "type": "nvidia-l4"<br/>    },<br/>    "labels": {<br/>      "gke-no-default-nvidia-gpu-device-plugin": true<br/>    },<br/>    "machine_type": "g2-standard-4",<br/>    "max_nodes": 1,<br/>    "min_nodes": 0,<br/>    "node_count": null<br/>  },<br/>  "gpu-secondary": {<br/>    "guest_accelerator": {<br/>      "count": 1,<br/>      "type": "nvidia-tesla-t4"<br/>    },<br/>    "labels": {<br/>      "gke-no-default-nvidia-gpu-device-plugin": true<br/>    },<br/>    "machine_type": "n1-standard-4",<br/>    "max_nodes": 1,<br/>    "min_nodes": 1,<br/>    "node_count": null<br/>  },<br/>  "web-services": {<br/>    "machine_type": "n2-highmem-4",<br/>    "max_nodes": 1,<br/>    "min_nodes": 1,<br/>    "node_count": 1<br/>  }<br/>}</pre> | no |
 | <a name="input_gke_service_account_name"></a> [gke\_service\_account\_name](#input\_gke\_service\_account\_name) | The name of the Kubernetes Service Account used by Nebuly installation. | `string` | `"nebuly"` | no |
 | <a name="input_k8s_image_pull_secret_name"></a> [k8s\_image\_pull\_secret\_name](#input\_k8s\_image\_pull\_secret\_name) | The name of the Kubernetes Image Pull Secret to use. <br/>  This value will be used to auto-generate the values.yaml file for installing the Nebuly Platform Helm chart. | `string` | `"nebuly-docker-pull"` | no |
 | <a name="input_labels"></a> [labels](#input\_labels) | Common labels that will be applied to all resources. | `map(string)` | `{}` | no |
@@ -201,20 +201,20 @@ You can find examples of code that uses this Terraform module in the [examples](
 - resource.google_compute_subnetwork.main (/terraform-docs/main.tf#50)
 - resource.google_container_cluster.main (/terraform-docs/main.tf#206)
 - resource.google_container_node_pool.main (/terraform-docs/main.tf#253)
-- resource.google_project_iam_binding.gke_cluster_admin (/terraform-docs/main.tf#303)
-- resource.google_project_iam_member.gke_secret_accessors (/terraform-docs/main.tf#292)
-- resource.google_secret_manager_secret.jwt_signing_key (/terraform-docs/main.tf#320)
-- resource.google_secret_manager_secret.nebuly_client_id (/terraform-docs/main.tf#347)
-- resource.google_secret_manager_secret.nebuly_client_secret (/terraform-docs/main.tf#359)
-- resource.google_secret_manager_secret.openai_api_key (/terraform-docs/main.tf#335)
+- resource.google_project_iam_binding.gke_cluster_admin (/terraform-docs/main.tf#321)
+- resource.google_project_iam_member.gke_secret_accessors (/terraform-docs/main.tf#298)
+- resource.google_secret_manager_secret.jwt_signing_key (/terraform-docs/main.tf#338)
+- resource.google_secret_manager_secret.nebuly_client_id (/terraform-docs/main.tf#365)
+- resource.google_secret_manager_secret.nebuly_client_secret (/terraform-docs/main.tf#377)
+- resource.google_secret_manager_secret.openai_api_key (/terraform-docs/main.tf#353)
 - resource.google_secret_manager_secret.postgres_analytics_password (/terraform-docs/main.tf#150)
 - resource.google_secret_manager_secret.postgres_analytics_username (/terraform-docs/main.tf#138)
 - resource.google_secret_manager_secret.postgres_auth_password (/terraform-docs/main.tf#191)
 - resource.google_secret_manager_secret.postgres_auth_username (/terraform-docs/main.tf#179)
-- resource.google_secret_manager_secret_version.jwt_signing_key (/terraform-docs/main.tf#328)
-- resource.google_secret_manager_secret_version.nebuly_client_id (/terraform-docs/main.tf#355)
-- resource.google_secret_manager_secret_version.nebuly_client_secret (/terraform-docs/main.tf#367)
-- resource.google_secret_manager_secret_version.openai_api_key (/terraform-docs/main.tf#343)
+- resource.google_secret_manager_secret_version.jwt_signing_key (/terraform-docs/main.tf#346)
+- resource.google_secret_manager_secret_version.nebuly_client_id (/terraform-docs/main.tf#373)
+- resource.google_secret_manager_secret_version.nebuly_client_secret (/terraform-docs/main.tf#385)
+- resource.google_secret_manager_secret_version.openai_api_key (/terraform-docs/main.tf#361)
 - resource.google_secret_manager_secret_version.postgres_analytics_password (/terraform-docs/main.tf#158)
 - resource.google_secret_manager_secret_version.postgres_analytics_username (/terraform-docs/main.tf#146)
 - resource.google_secret_manager_secret_version.postgres_auth_password (/terraform-docs/main.tf#199)
@@ -226,10 +226,11 @@ You can find examples of code that uses this Terraform module in the [examples](
 - resource.google_sql_database_instance.main (/terraform-docs/main.tf#82)
 - resource.google_sql_user.analytics (/terraform-docs/main.tf#133)
 - resource.google_sql_user.auth (/terraform-docs/main.tf#174)
-- resource.google_storage_bucket.main (/terraform-docs/main.tf#373)
+- resource.google_storage_bucket.main (/terraform-docs/main.tf#391)
+- resource.google_storage_bucket_iam_binding.gke_storage_object_user (/terraform-docs/main.tf#309)
 - resource.random_password.analytics (/terraform-docs/main.tf#128)
 - resource.random_password.auth (/terraform-docs/main.tf#169)
-- resource.tls_private_key.jwt_signing_key (/terraform-docs/main.tf#316)
+- resource.tls_private_key.jwt_signing_key (/terraform-docs/main.tf#334)
 - data source.google_compute_zones.available (/terraform-docs/main.tf#23)
 - data source.google_container_engine_versions.main (/terraform-docs/main.tf#24)
 - data source.google_project.current (/terraform-docs/main.tf#22)
diff --git a/main.tf b/main.tf
@@ -280,11 +280,17 @@ resource "google_container_node_pool" "main" {
 
     service_account = google_service_account.gke_node_pool.email
 
+    labels = each.value.labels 
+
     dynamic "guest_accelerator" {
       for_each = each.value.guest_accelerator == null ? {} : { "" : each.value.guest_accelerator }
       content {
         type  = guest_accelerator.value.type
         count = guest_accelerator.value.count
+
+        gpu_driver_installation_config {
+          gpu_driver_version = "INSTALLATION_DISABLED"
+        }
       }
     }
   }
@@ -302,7 +308,7 @@ resource "google_project_iam_member" "gke_secret_accessors" {
 }
 resource "google_storage_bucket_iam_binding" "gke_storage_object_user" {
   bucket = google_storage_bucket.main.name
-  role = "roles/storage.objectUser"
+  role   = "roles/storage.objectUser"
   members = [
     for namespace in var.gke_nebuly_namespaces :
     "principal://iam.googleapis.com/projects/${data.google_project.current.number}/locations/global/workloadIdentityPools/${data.google_project.current.project_id}.svc.id.goog/subject/ns/${namespace}/sa/${var.gke_service_account_name}"

diff --git a/pod.yaml b/pod.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: nginx-gpu-pod
+spec:
+  containers:
+  - name: nginx-container
+    image: nginx:latest
+    resources:
+      limits:
+        nvidia.com/gpu: 1  # Request 1 GPU
+    ports:
+    - containerPort: 80
diff --git a/tests/dev-provisioning/main.tf b/tests/dev-provisioning/main.tf
@@ -21,6 +21,12 @@ provider "google" {
 variable "region" {
   type = string
 }
+variable "nebuly_credentials" {
+  type = object({
+    client_id     = string
+    client_secret = string
+  })
+}
 
 
 # ------ Main ------ #
@@ -45,11 +51,8 @@ module "platform" {
   openai_endpoint             = "https://api.openai.com"
   openai_gpt4_deployment_name = "gpt-4"
 
-  platform_domain = "platform.gcp.testing.nebuly.com"
-  nebuly_credentials = {
-    client_id     = "my-client-id"
-    client_secret = "my-client-secret"
-  }
+  platform_domain    = "platform.gcp.testing.nebuly.com"
+  nebuly_credentials = var.nebuly_credentials
 }
 
 output "gke_cluster_get_credentials" {

diff --git a/variables.tf b/variables.tf
@@ -153,6 +153,7 @@ variable "gke_node_pools" {
     node_count     = number
     node_locations = optional(set(string), null)
     preemptible    = optional(bool, false)
+    labels         = optional(map(string), {})
     guest_accelerator = optional(object({
       type  = string
       count = number
@@ -170,6 +171,26 @@ variable "gke_node_pools" {
       min_nodes    = 0
       max_nodes    = 1
       node_count   = null
+      guest_accelerator = {
+        type  = "nvidia-l4"
+        count = 1
+      }
+      labels = {
+        "gke-no-default-nvidia-gpu-device-plugin" : true,
+      }
+    }
+    "gpu-secondary" : {
+      machine_type = "n1-standard-4"
+      min_nodes    = 1
+      max_nodes    = 1
+      node_count   = null
+      guest_accelerator = {
+        type  = "nvidia-tesla-t4"
+        count = 1
+      }
+      labels = {
+        "gke-no-default-nvidia-gpu-device-plugin" : true,
+      }
     }
   }
 }