diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..53a0503 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,53 @@ +name: CI Pipeline + +on: + workflow_dispatch: + push: + branches: + - "main" + paths-ignore: + - ".github/**" + - "*.md" + - "docs/**" + - "!README.md" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + CI: + env: + ARM_TENANT_ID: ${{secrets.TENANT_ID }} + ARM_SUBSCRIPTION_ID: ${{secrets.SUBSCRIPTION_ID }} + ARM_CLIENT_ID: ${{secrets.CLIENT_ID }} + ARM_CLIENT_SECRET: ${{secrets.CLIENT_SECRET }} + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: "1.9.3" + + - name: Check is doc is update to date + run: | + make doc + if ! git diff --exit-code; then + echo "Documentation not up to date. Please run \"make doc\" and commit changes!" >&2 + exit 1 + fi + + - uses: terraform-linters/setup-tflint@v4 + name: Setup TFLint + + - name: Init TFLint + run: tflint --init + env: + # https://github.com/terraform-linters/tflint/blob/master/docs/user-guide/plugins.md#avoiding-rate-limiting + GITHUB_TOKEN: ${{ github.token }} + + - run: terraform init + + - run: make check diff --git a/.github/workflows/pull-requests.yaml b/.github/workflows/pull-requests.yaml new file mode 100644 index 0000000..c9b88d2 --- /dev/null +++ b/.github/workflows/pull-requests.yaml @@ -0,0 +1,49 @@ +name: Pull Request Pipeline + +on: + workflow_dispatch: + pull_request: + branches: + - "main" + paths-ignore: + - ".github/**" + - "*.md" + - "docs/**" + - "!README.md" + +jobs: + CI: + env: + ARM_TENANT_ID: ${{secrets.TENANT_ID }} + ARM_SUBSCRIPTION_ID: ${{secrets.SUBSCRIPTION_ID }} + ARM_CLIENT_ID: ${{secrets.CLIENT_ID }} + ARM_CLIENT_SECRET: ${{secrets.CLIENT_SECRET }} + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: "1.9.3" + + - name: Check is doc is update to date + run: | + make doc + if ! git diff --exit-code; then + echo "Documentation not up to date. Please run \"make doc\" and commit changes!" >&2 + exit 1 + fi + + - uses: terraform-linters/setup-tflint@v4 + name: Setup TFLint + + - name: Init TFLint + run: tflint --init + env: + # https://github.com/terraform-linters/tflint/blob/master/docs/user-guide/plugins.md#avoiding-rate-limiting + GITHUB_TOKEN: ${{ github.token }} + + - run: terraform init + + - run: make check-no-tests diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..409e8e0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +**/.terraform/ +*.secrets.auto.tfvars +secrets.auto.tfvars +.idea/ +backend.tfvars +.terraform.lock.hcl diff --git a/.terraform-docs.yml b/.terraform-docs.yml new file mode 100644 index 0000000..1e09425 --- /dev/null +++ b/.terraform-docs.yml @@ -0,0 +1,166 @@ +formatter: "markdown" # this is required + +version: "" + +header-from: main.tf +footer-from: "" + +recursive: + enabled: false + path: modules + +sections: + hide: [] + show: [] + +content: |- + # Nebuly Platform (Azure) + + Terraform module for provisioning Nebuly Platform resources on Microsoft Azure. + + Available on [Terraform Registry](https://registry.terraform.io/modules/nebuly-ai/nebuly-platform/azurerm/latest). + + ## Quickstart + + > ⚠️ **Prerequisite**: + > before using this Terraform module, ensure that you have your Nebuly credentials ready. + > These credentials are necessary to activate your installation and should be provided as input via the `nebuly_credentials` input. + + To get started with Nebuly installation on Microsoft Azure, you can follow the steps below. + + These instructions will guide you through the installation using Nebuly's default standard configuration with the Nebuly Helm Chart. + + For specific configurations or assistance, reach out to the Nebuly Slack channel or email [support@nebuly.ai](mailto:support@nebuly.ai). + + ### 1. Terraform setup + + Import Nebuly into your Terraform root module, provide the necessary variables, and apply the changes. + + For configuration examples, you can refer to the [Examples](#examples). + + Once the Terraform changes are applied, proceed with the next steps to deploy Nebuly on the provisioned Azure Kubernetes Service (AKS) cluster. + + ### 2. Connect to the Azure Kubernetes Service cluster + + Prerequisites: install the [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli). + + * Fetch the command for retrieving the credentials from the module outputs: + + ```shell + terraform output aks_get_credentials + ``` + + * Run the command you got from the previous step + + ### 3. Create image pull secret + + The auto-generated Helm values use the name defined in the k8s_image_pull_secret_name input variable for the Image Pull Secret. If you prefer a custom name, update either the Terraform variable or your Helm values accordingly. + Create a Kubernetes [Image Pull Secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/) for + authenticating with your Docker registry and pulling the Nebuly Docker images. + + + ### 4. Create Secret Provider Class + Create a Secret Provider Class to allow AKS to fetch credentials from the provisioned Key Vault. + + * Get the Secret Provider Class YAML definition from the Terraform module outputs: + ```shell + terraform output secret_provider_class + ``` + + * Copy the output of the command into a file named secret-provider-class.yaml. + + * Run the following commands to install Nebuly in the Kubernetes namespace nebuly: + + ```shell + kubectl create ns nebuly + kubectl apply --server-side -f secret-provider-class.yaml + ``` + + ### 5. Bootstrap AKS cluster + + Install the bootstrap Helm chart to set up all the dependencies required for installing the Nebuly Platform Helm chart on AKS. + + Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree/main/bootstrap-azure) for all the configuration details. + + ```shell + helm install oci://ghcr.io/nebuly-ai/helm-charts/bootstrap-azure \ + --namespace nebuly-bootstrap \ + --generate-name \ + --create-namespace + ``` + + ### 6. Install nebuly-platform chart + + Retrieve the auto-generated values from the Terraform outputs and save them to a file named `values.yaml`: + + ```shell + terraform output helm_values + ``` + + Install the Nebuly Platform Helm chart. + Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree/main/nebuly-platform) for detailed configuration options. + + ```shell + helm install oci://ghcr.io/nebuly-ai/helm-charts/nebuly-platform \ + --namespace nebuly \ + -f values.yaml \ + --timeout 10m \ + + ``` + + > ℹ️ During the initial installation of the chart, all required Nebuly LLMs are uploaded to your model registry. + > This process can take approximately 5 minutes. If the helm install command appears to be stuck, don't worry: it's simply waiting for the upload to finish. + + + ## Examples + + You can find examples of code that uses this Terraform module in the [examples](./examples) directory. + + + {{ .Header }} + + + {{ .Providers }} + + + {{ .Outputs }} + + + {{ .Inputs }} + + ## Resources + + {{ range .Module.Resources }} + - {{ .GetMode }}.{{ .Spec }} ({{ .Position.Filename }}#{{ .Position.Line }}) + {{- end }} + +output: + file: "" + mode: inject + template: |- + + {{ .Content }} + + +output-values: + enabled: false + from: "" + +sort: + enabled: true + by: name + +settings: + anchor: true + color: true + default: true + description: false + escape: true + hide-empty: false + html: true + indent: 2 + lockfile: false + read-comments: true + required: true + sensitive: true + type: true diff --git a/LICENSE b/LICENSE index 261eeb9..35b36ee 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2024 NebulyAI Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..edecdce --- /dev/null +++ b/Makefile @@ -0,0 +1,48 @@ +OK?=\033[0;32m[Ok]\033[0m + +##@ General +.PHONY: help +help: ## Display this help. + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + + +##@ Dev +.PHONY: doc +doc: ## Generate the doc + docker run --rm --volume "$$(pwd):/terraform-docs" -u $$(id -u) quay.io/terraform-docs/terraform-docs:0.18.0 markdown /terraform-docs > README.md + + +.PHONY: lint +lint: ## Lint the codebase + @echo "\033[0;33m[Linting...]\033[0m" + @if command -v tflint > /dev/null; then \ + tflint; \ + else \ + docker run --rm -v $$(pwd):/data -t ghcr.io/terraform-linters/tflint; \ + fi + @echo "${OK}" + +.PHONY: validate +validate: + @echo "\033[0;33m[Terraform validate...]\033[0m" + @terraform validate + @echo "${OK}" + +.PHONY: test +test: ## Run the tests + @echo "\033[0;33m[Running tests...]\033[0m" + @terraform test + @echo "${OK}" + +.PHONY: formatting +formatting: + @echo "\033[0;33m[Terraform fmt...]\033[0m" + @terraform fmt -check + @echo "${OK}" + + +.PHONY: check-no-tests +check-no-tests: formatting validate lint + +.PHONY: check +check: check-no-tests test diff --git a/README.md b/README.md new file mode 100644 index 0000000..4f49214 --- /dev/null +++ b/README.md @@ -0,0 +1,222 @@ +# Nebuly Platform (Azure) + +Terraform module for provisioning Nebuly Platform resources on Microsoft Azure. + +Available on [Terraform Registry](https://registry.terraform.io/modules/nebuly-ai/nebuly-platform/azurerm/latest). + +## Quickstart + +> ⚠️ **Prerequisite**: +> before using this Terraform module, ensure that you have your Nebuly credentials ready. +> These credentials are necessary to activate your installation and should be provided as input via the `nebuly_credentials` input. + +To get started with Nebuly installation on Microsoft Azure, you can follow the steps below. + +These instructions will guide you through the installation using Nebuly's default standard configuration with the Nebuly Helm Chart. + +For specific configurations or assistance, reach out to the Nebuly Slack channel or email [support@nebuly.ai](mailto:support@nebuly.ai). + +### 1. Terraform setup + +Import Nebuly into your Terraform root module, provide the necessary variables, and apply the changes. + +For configuration examples, you can refer to the [Examples](#examples). + +Once the Terraform changes are applied, proceed with the next steps to deploy Nebuly on the provisioned Azure Kubernetes Service (AKS) cluster. + +### 2. Connect to the Azure Kubernetes Service cluster + +Prerequisites: install the [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli). + +* Fetch the command for retrieving the credentials from the module outputs: + +```shell +terraform output aks_get_credentials +``` + +* Run the command you got from the previous step + +### 3. Create image pull secret + +The auto-generated Helm values use the name defined in the k8s_image_pull_secret_name input variable for the Image Pull Secret. If you prefer a custom name, update either the Terraform variable or your Helm values accordingly. +Create a Kubernetes [Image Pull Secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/) for +authenticating with your Docker registry and pulling the Nebuly Docker images. + + +### 4. Create Secret Provider Class +Create a Secret Provider Class to allow AKS to fetch credentials from the provisioned Key Vault. + +* Get the Secret Provider Class YAML definition from the Terraform module outputs: + ```shell + terraform output secret_provider_class + ``` + +* Copy the output of the command into a file named secret-provider-class.yaml. + +* Run the following commands to install Nebuly in the Kubernetes namespace nebuly: + + ```shell + kubectl create ns nebuly + kubectl apply --server-side -f secret-provider-class.yaml + ``` + +### 5. Bootstrap AKS cluster + +Install the bootstrap Helm chart to set up all the dependencies required for installing the Nebuly Platform Helm chart on AKS. + +Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree/main/bootstrap-azure) for all the configuration details. + +```shell +helm install oci://ghcr.io/nebuly-ai/helm-charts/bootstrap-azure \ + --namespace nebuly-bootstrap \ + --generate-name \ + --create-namespace +``` + +### 6. Install nebuly-platform chart + +Retrieve the auto-generated values from the Terraform outputs and save them to a file named `values.yaml`: + +```shell +terraform output helm_values +``` + +Install the Nebuly Platform Helm chart. +Refer to the [chart documentation](https://github.com/nebuly-ai/helm-charts/tree/main/nebuly-platform) for detailed configuration options. + +```shell +helm install oci://ghcr.io/nebuly-ai/helm-charts/nebuly-platform \ + --namespace nebuly \ + -f values.yaml \ + --timeout 10m \ + +``` + +> ℹ️ During the initial installation of the chart, all required Nebuly LLMs are uploaded to your model registry. +> This process can take approximately 5 minutes. If the helm install command appears to be stuck, don't worry: it's simply waiting for the upload to finish. + + +## Examples + +You can find examples of code that uses this Terraform module in the [examples](./examples) directory. + + + + + +## Providers + +| Name | Version | +|------|---------| +| [azuread](#provider\_azuread) | ~>2.53 | +| [azurerm](#provider\_azurerm) | ~>3.114 | +| [http](#provider\_http) | ~>3.4 | +| [random](#provider\_random) | ~>3.6 | +| [time](#provider\_time) | ~>0.12 | +| [tls](#provider\_tls) | ~>4.0 | + + +## Outputs + +| Name | Description | +|------|-------------| +| [aks\_get\_credentials](#output\_aks\_get\_credentials) | Command for getting the credentials for connecting to the provisioned AKS cluster. | +| [helm\_values](#output\_helm\_values) | The `values.yaml` file for installing Nebuly with Helm.

The default standard configuration is used, which uses Nginx as ingress controller and exposes the application to the Internet. This configuration can be customized according to specific needs. | +| [secret\_provider\_class](#output\_secret\_provider\_class) | The `secret-provider-class.yaml` file to make Kubernetes reference the secrets stored in the Key Vault. | + + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [aks\_cluster\_admin\_object\_ids](#input\_aks\_cluster\_admin\_object\_ids) | Object IDs that are granted the Cluster Admin role over the AKS cluster | `set(string)` | n/a | yes | +| [aks\_kubernetes\_version](#input\_aks\_kubernetes\_version) | The Kubernetes version to use. |
object({
workers = string
control_plane = string
})
|
{
"control_plane": "1.30.3",
"workers": "1.30.3"
}
| no | +| [aks\_log\_analytics\_workspace](#input\_aks\_log\_analytics\_workspace) | Existing azurerm\_log\_analytics\_workspace to attach azurerm\_log\_analytics\_solution. Providing the config disables creation of azurerm\_log\_analytics\_workspace. |
object({
id = string
name = string
location = optional(string)
resource_group_name = optional(string)
})
| `null` | no | +| [aks\_net\_profile\_dns\_service\_ip](#input\_aks\_net\_profile\_dns\_service\_ip) | IP address within the Kubernetes service address range that is used by cluster service discovery (kube-dns). Must be inluced in net\_profile\_cidr. Example: 10.32.0.10 | `string` | `"10.32.0.10"` | no | +| [aks\_net\_profile\_service\_cidr](#input\_aks\_net\_profile\_service\_cidr) | The Network Range used by the Kubernetes service. Must not overlap with the AKS Nodes address space. Example: 10.32.0.0/24 | `string` | `"10.32.0.0/24"` | no | +| [aks\_sku\_tier](#input\_aks\_sku\_tier) | The AKS tier. Possible values are: Free, Standard, Premium. It is recommended to use Standard or Premium for production workloads. | `string` | `"Standard"` | no | +| [aks\_sys\_pool](#input\_aks\_sys\_pool) | The configuration of the AKS System Nodes Pool. |
object({
vm_size : string
nodes_max_pods : number
name : string
availability_zones : list(string)
disk_size_gb : number
disk_type : string
nodes_labels : optional(map(string), {})
nodes_tags : optional(map(string), {})
only_critical_addons_enabled : optional(bool, false)
# Auto-scaling settings
nodes_count : optional(number, null)
enable_auto_scaling : optional(bool, false)
agents_min_count : optional(number, null)
agents_max_count : optional(number, null)
})
|
{
"agents_max_count": 3,
"agents_min_count": 1,
"availability_zones": [
"1",
"2",
"3"
],
"disk_size_gb": 128,
"disk_type": "Ephemeral",
"enable_auto_scaling": true,
"name": "system",
"nodes_max_pods": 60,
"only_critical_addons_enabled": false,
"vm_size": "Standard_E4ads_v5"
}
| no | +| [aks\_worker\_pools](#input\_aks\_worker\_pools) | The worker pools of the AKS cluster, each with the respective configuration.
The default configuration uses a single worker node, with no HA. |
map(object({
enabled : optional(bool, true)
vm_size : string
priority : optional(string, "Regular")
tags : map(string)
max_pods : number
disk_size_gb : optional(number, 128)
disk_type : string
availability_zones : list(string)
node_taints : optional(list(string), [])
node_labels : optional(map(string), {})
# Auto-scaling settings
nodes_count : optional(number, null)
enable_auto_scaling : optional(bool, false)
nodes_min_count : optional(number, null)
nodes_max_count : optional(number, null)
}))
|
{
"a100w01": {
"availability_zones": [
"1"
],
"disk_size_gb": 128,
"disk_type": "Ephemeral",
"enable_auto_scaling": true,
"max_pods": 30,
"node_labels": {
"nebuly.com/accelerator": "nvidia-ampere-a100"
},
"node_taints": [
"nvidia.com/gpu=:NoSchedule"
],
"nodes_count": null,
"nodes_max_count": 1,
"nodes_min_count": 0,
"priority": "Regular",
"tags": {},
"vm_size": "Standard_NC24ads_A100_v4"
},
"a100w02": {
"availability_zones": [
"2"
],
"disk_size_gb": 128,
"disk_type": "Ephemeral",
"enable_auto_scaling": true,
"max_pods": 30,
"node_labels": {
"nebuly.com/accelerator": "nvidia-ampere-a100"
},
"node_taints": [
"nvidia.com/gpu=:NoSchedule"
],
"nodes_count": null,
"nodes_max_count": 1,
"nodes_min_count": 0,
"priority": "Regular",
"tags": {},
"vm_size": "Standard_NC24ads_A100_v4"
},
"a100w03": {
"availability_zones": [
"3"
],
"disk_size_gb": 128,
"disk_type": "Ephemeral",
"enable_auto_scaling": true,
"max_pods": 30,
"node_labels": {
"nebuly.com/accelerator": "nvidia-ampere-a100"
},
"node_taints": [
"nvidia.com/gpu=:NoSchedule"
],
"nodes_count": null,
"nodes_max_count": 1,
"nodes_min_count": 0,
"priority": "Regular",
"tags": {},
"vm_size": "Standard_NC24ads_A100_v4"
},
"t4workers": {
"availability_zones": [
"1",
"2",
"3"
],
"disk_size_gb": 128,
"disk_type": "Ephemeral",
"enable_auto_scaling": true,
"max_pods": 30,
"node_labels": {
"nebuly.com/accelerator": "nvidia-tesla-t4"
},
"node_taints": [
"nvidia.com/gpu=:NoSchedule"
],
"nodes_count": null,
"nodes_max_count": 1,
"nodes_min_count": 0,
"priority": "Regular",
"tags": {},
"vm_size": "Standard_NC4as_T4_v3"
}
}
| no | +| [azure\_openai\_location](#input\_azure\_openai\_location) | The Azure region where to deploy the Azure OpenAI models.
Note that the models required by Nebuly are supported only in few specific regions. For more information, you can refer to Azure documentation:
https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#standard-deployment-model-availability | `string` | `"EastUS"` | no | +| [azure\_openai\_rate\_limits](#input\_azure\_openai\_rate\_limits) | The rate limits (K-tokens/minute) of the deployed Azure OpenAI models. |
object({
gpt_4 : number
gpt_4o_mini : number
})
|
{
"gpt_4": 100,
"gpt_4o_mini": 100
}
| no | +| [k8s\_image\_pull\_secret\_name](#input\_k8s\_image\_pull\_secret\_name) | The name of the Kubernetes Image Pull Secret to use.
This value will be used to auto-generate the values.yaml file for installing the Nebuly Platform Helm chart. | `string` | `"nebuly-docker-pull"` | no | +| [key\_vault\_public\_network\_access\_enabled](#input\_key\_vault\_public\_network\_access\_enabled) | Can the Key Vault be accessed from the Internet, according to the firewall rules?
Default to true to to allow the Terraform module to be executed even outside the private virtual network.
When set to true, firewall rules are applied, and all connections are denied by default. | `bool` | `true` | no | +| [key\_vault\_purge\_protection\_enabled](#input\_key\_vault\_purge\_protection\_enabled) | Is purge protection enabled for the Key Vault? | `bool` | `false` | no | +| [key\_vault\_sku\_name](#input\_key\_vault\_sku\_name) | The SKU of the Key Vault. | `string` | `"Standard"` | no | +| [key\_vault\_soft\_delete\_retention\_days](#input\_key\_vault\_soft\_delete\_retention\_days) | The number of days that items should be retained for once soft-deleted. This value can be between 7 and 90 (the default) days. | `number` | `7` | no | +| [location](#input\_location) | The region where to provision the resources. | `string` | n/a | yes | +| [nebuly\_credentials](#input\_nebuly\_credentials) | The credentials provided by Nebuly are required for activating your platform installation.
If you haven't received your credentials or have lost them, please contact support@nebuly.ai. |
object({
client_id : string
client_secret : string
})
| n/a | yes | +| [platform\_domain](#input\_platform\_domain) | The domain on which the deployed Nebuly platform is made accessible. | `string` | n/a | yes | +| [postgres\_server\_admin\_username](#input\_postgres\_server\_admin\_username) | The username of the admin user of the PostgreSQL Server. | `string` | `"nebulyadmin"` | no | +| [postgres\_server\_alert\_rules](#input\_postgres\_server\_alert\_rules) | The Azure Monitor alert rules to set on the provisioned PostgreSQL server. |
map(object({
description = string
frequency = string
window_size = string
action_group_id = string
severity = number

criteria = optional(
object({
aggregation = string
metric_name = string
operator = string
threshold = number
})
, null)
dynamic_criteria = optional(
object({
aggregation = string
metric_name = string
operator = string
alert_sensitivity = string
})
, null)
}))
| `{}` | no | +| [postgres\_server\_high\_availability](#input\_postgres\_server\_high\_availability) | High-availability configuration of the DB server. Possible values for mode are: SameZone or ZoneRedundant. |
object({
enabled : bool
mode : optional(string, "SameZone")
standby_availability_zone : optional(string, null)
})
|
{
"enabled": true,
"mode": "SameZone"
}
| no | +| [postgres\_server\_lock](#input\_postgres\_server\_lock) | Optionally lock the PostgreSQL server to prevent deletion. |
object({
enabled = optional(bool, false)
notes = optional(string, "Cannot be deleted.")
name = optional(string, "terraform-lock")
})
|
{
"enabled": true
}
| no | +| [postgres\_server\_maintenance\_window](#input\_postgres\_server\_maintenance\_window) | The window for performing automatic maintenance of the PostgreSQL Server. Default is Sunday at 00:00 of the timezone of the server location. |
object({
day_of_week : number
start_hour : number
start_minute : number
})
|
{
"day_of_week": 0,
"start_hour": 0,
"start_minute": 0
}
| no | +| [postgres\_server\_max\_storage\_mb](#input\_postgres\_server\_max\_storage\_mb) | The max storage allowed for the PostgreSQL Flexible Server. Possible values are 32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 4193280, 4194304, 8388608, 16777216 and 33553408. | `number` | `262144` | no | +| [postgres\_server\_optional\_configurations](#input\_postgres\_server\_optional\_configurations) | Optional Flexible PostgreSQL configurations. Defaults to recommended configurations. | `map(string)` |
{
"intelligent_tuning": "on",
"intelligent_tuning.metric_targets": "ALL",
"metrics.autovacuum_diagnostics": "on",
"metrics.collector_database_activity": "on",
"pg_qs.query_capture_mode": "ALL",
"pg_qs.retention_period_in_days": "7",
"pg_qs.store_query_plans": "on",
"pgaudit.log": "WRITE",
"pgms_wait_sampling.query_capture_mode": "ALL",
"track_io_timing": "on"
}
| no | +| [postgres\_server\_point\_in\_time\_backup](#input\_postgres\_server\_point\_in\_time\_backup) | The backup settings of the PostgreSQL Server. |
object({
geo_redundant : optional(bool, true)
retention_days : optional(number, 30)
})
|
{
"geo_redundant": true,
"retention_days": 30
}
| no | +| [postgres\_server\_sku](#input\_postgres\_server\_sku) | The SKU of the PostgreSQL Server, including the Tier and the Name. Examples: B\_Standard\_B1ms, GP\_Standard\_D2s\_v3, MO\_Standard\_E4s\_v3 |
object({
tier : string
name : string
})
|
{
"name": "Standard_D4ds_v5",
"tier": "GP"
}
| no | +| [postgres\_version](#input\_postgres\_version) | The PostgreSQL version to use. | `string` | `"16"` | no | +| [private\_dns\_zones](#input\_private\_dns\_zones) | Private DNS zones to use for Private Endpoint connections. If not provided, a new DNS Zone
is created and linked to the respective subnet. |
object({
flexible_postgres = optional(object({
name : string
id : string
}), null)
})
| `{}` | no | +| [resource\_group\_name](#input\_resource\_group\_name) | The name of the resource group where to provision the resources. | `string` | n/a | yes | +| [resource\_prefix](#input\_resource\_prefix) | The prefix that is used for generating resource names. | `string` | n/a | yes | +| [subnet\_address\_space\_aks\_nodes](#input\_subnet\_address\_space\_aks\_nodes) | Address space of the new subnet in which to create the nodes of the AKS cluster.
If `subnet_name_aks_nodes` is provided, the existing subnet is used and this variable is ignored. | `list(string)` |
[
"10.0.0.0/22"
]
| no | +| [subnet\_address\_space\_flexible\_postgres](#input\_subnet\_address\_space\_flexible\_postgres) | Address space of the new subnet delgated to Flexible PostgreSQL Server service.
If `subnet_name_flexible_postgres` is provided, the existing subnet is used and this variable is ignored. | `list(string)` |
[
"10.0.12.0/26"
]
| no | +| [subnet\_address\_space\_private\_endpoints](#input\_subnet\_address\_space\_private\_endpoints) | Address space of the new subnet in which to create private endpoints.
If `subnet_name_private_endpoints` is provided, the existing subnet is used and this variable is ignored. | `list(string)` |
[
"10.0.8.0/26"
]
| no | +| [subnet\_name\_aks\_nodes](#input\_subnet\_name\_aks\_nodes) | Optional name of the subnet to be used for provisioning AKS nodes.
If not provided, a new subnet is created. | `string` | `null` | no | +| [subnet\_name\_flexible\_postgres](#input\_subnet\_name\_flexible\_postgres) | Optional name of the subnet delegated to Flexible PostgreSQL Server service.
If not provided, a new subnet is created. | `string` | `null` | no | +| [subnet\_name\_private\_endpoints](#input\_subnet\_name\_private\_endpoints) | Optional name of the subnet to which attach the Private Endpoints.
If not provided, a new subnet is created. | `string` | `null` | no | +| [tags](#input\_tags) | Common tags that are applied to all resources. | `map(string)` | `{}` | no | +| [virtual\_network\_address\_space](#input\_virtual\_network\_address\_space) | Address space of the new virtual network in which to create resources.
If `virtual_network_name` is provided, the existing virtual network is used and this variable is ignored. | `list(string)` |
[
"10.0.0.0/16"
]
| no | +| [virtual\_network\_name](#input\_virtual\_network\_name) | Optional name of the virtual network in which to create the resources.
If not provided, a new virtual network is created. | `string` | `null` | no | +| [whitelist\_current\_ip](#input\_whitelist\_current\_ip) | If true, add the current IP executing the Terraform module to the whitelist rules of the provisioned services.
This allows Terraform to access and configure the resources even when running outside the virtual network.

The whitelisting excludes the Database Server, which remains unexposed to the Internet and is accessible only from the virtual network. | `bool` | `true` | no | +| [whitelisted\_ips](#input\_whitelisted\_ips) | Optional list of IPs that will be able to access the following resources from the internet: Azure Kubernetes Service (AKS) API Server,
Azure Key Vault, Azure Storage Account. | `list(string)` | `[]` | no | + +## Resources + + +- resource.azuread_application.main (/terraform-docs/main.tf#234) +- resource.azuread_service_principal.main (/terraform-docs/main.tf#240) +- resource.azuread_service_principal_password.main (/terraform-docs/main.tf#245) +- resource.azurerm_cognitive_account.main (/terraform-docs/main.tf#452) +- resource.azurerm_cognitive_deployment.gpt_4_turbo (/terraform-docs/main.tf#471) +- resource.azurerm_cognitive_deployment.gpt_4o_mini (/terraform-docs/main.tf#486) +- resource.azurerm_key_vault.main (/terraform-docs/main.tf#193) +- resource.azurerm_key_vault_secret.azure_openai_api_key (/terraform-docs/main.tf#501) +- resource.azurerm_key_vault_secret.azuread_application_client_id (/terraform-docs/main.tf#249) +- resource.azurerm_key_vault_secret.azuread_application_client_secret (/terraform-docs/main.tf#258) +- resource.azurerm_key_vault_secret.jwt_signing_key (/terraform-docs/main.tf#681) +- resource.azurerm_key_vault_secret.nebuly_azure_client_id (/terraform-docs/main.tf#271) +- resource.azurerm_key_vault_secret.nebuly_azure_client_secret (/terraform-docs/main.tf#280) +- resource.azurerm_key_vault_secret.postgres_password (/terraform-docs/main.tf#435) +- resource.azurerm_key_vault_secret.postgres_user (/terraform-docs/main.tf#426) +- resource.azurerm_kubernetes_cluster_node_pool.linux_pools (/terraform-docs/main.tf#638) +- resource.azurerm_management_lock.postgres_server (/terraform-docs/main.tf#369) +- resource.azurerm_monitor_metric_alert.postgres_server_alerts (/terraform-docs/main.tf#377) +- resource.azurerm_postgresql_flexible_server.main (/terraform-docs/main.tf#299) +- resource.azurerm_postgresql_flexible_server_configuration.mandatory_configurations (/terraform-docs/main.tf#350) +- resource.azurerm_postgresql_flexible_server_configuration.optional_configurations (/terraform-docs/main.tf#343) +- resource.azurerm_postgresql_flexible_server_database.analytics (/terraform-docs/main.tf#363) +- resource.azurerm_postgresql_flexible_server_database.auth (/terraform-docs/main.tf#357) +- resource.azurerm_private_dns_zone.flexible_postgres (/terraform-docs/main.tf#172) +- resource.azurerm_private_dns_zone_virtual_network_link.flexible_postgres (/terraform-docs/main.tf#178) +- resource.azurerm_role_assignment.aks_network_contributor (/terraform-docs/main.tf#633) +- resource.azurerm_role_assignment.key_vault_secret_officer__current (/terraform-docs/main.tf#224) +- resource.azurerm_role_assignment.key_vault_secret_user__aks (/terraform-docs/main.tf#216) +- resource.azurerm_role_assignment.storage_container_models__data_contributor (/terraform-docs/main.tf#539) +- resource.azurerm_storage_account.main (/terraform-docs/main.tf#515) +- resource.azurerm_storage_container.models (/terraform-docs/main.tf#535) +- resource.azurerm_subnet.aks_nodes (/terraform-docs/main.tf#128) +- resource.azurerm_subnet.flexible_postgres (/terraform-docs/main.tf#150) +- resource.azurerm_subnet.private_endpints (/terraform-docs/main.tf#142) +- resource.azurerm_virtual_network.main (/terraform-docs/main.tf#120) +- resource.random_password.postgres_server_admin_password (/terraform-docs/main.tf#294) +- resource.time_sleep.wait_aks_creation (/terraform-docs/main.tf#620) +- resource.tls_private_key.aks (/terraform-docs/main.tf#549) +- resource.tls_private_key.jwt_signing_key (/terraform-docs/main.tf#677) +- data source.azurerm_client_config.current (/terraform-docs/main.tf#78) +- data source.azurerm_resource_group.main (/terraform-docs/main.tf#75) +- data source.azurerm_subnet.aks_nodes (/terraform-docs/main.tf#89) +- data source.azurerm_subnet.flexible_postgres (/terraform-docs/main.tf#103) +- data source.azurerm_virtual_network.main (/terraform-docs/main.tf#83) +- data source.http_http.current_ip (/terraform-docs/main.tf#80) diff --git a/examples/basic/README.md b/examples/basic/README.md new file mode 100644 index 0000000..fb9135e --- /dev/null +++ b/examples/basic/README.md @@ -0,0 +1,12 @@ +# Nebuly Platform Example - Basic usage + +This directory shows an example of Terraform code that uses the +[terraform-azurerm-nebuly-platform](https://github.com/nebuly-ai/terraform-azurerm-nebuly-platform) module. + +In this example, all default settings are applied, resulting in the creation of a new virtual network to which all resources will be linked. + +The Azure Key Vault for storing secrets and the Azure Storage Account for storing Nebuly's LLMs will only be accessible from the IP address +of the current Terraform provisioner. This allows you to run terraform apply without having to setup a VPN. + +To modify this behavior, you can set `whitelist_current_ip=false` as an input. +To whitelist more IP address, you can specify them in the input variable `whitelisted_ips`. diff --git a/examples/basic/main.tf b/examples/basic/main.tf new file mode 100644 index 0000000..12411b5 --- /dev/null +++ b/examples/basic/main.tf @@ -0,0 +1,82 @@ +# ------- Terraform Setup ------ # +terraform { + required_version = ">=1.9" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~>3.114" + } + azuread = { + source = "hashicorp/azuread" + version = "~>2.53" + } + random = { + source = "hashicorp/random" + version = "~>3.6" + } + } +} + +provider "azurerm" { + features {} + + client_id = var.client_id + client_secret = var.client_secret + tenant_id = var.tenant_id + subscription_id = var.subscription_id +} + + +# ------ Variables ------ # +variable "client_id" { + type = string +} +variable "subscription_id" { + type = string +} +variable "tenant_id" { + type = string +} +variable "client_secret" { + type = string +} + + +module "platform" { + source = "nebuly-ai/nebuly-platform/azure" + version = ">=0.2.10" + + location = "EastUS" + resource_group_name = "my-resource-group" + platform_domain = "platform.azure.testing" + resource_prefix = "myprefix" + + # Credentials provided by Nebuly for activating your Platform installation. + nebuly_credentials = { + client_id = "" + client_secret = "" + } + + key_vault_public_network_access_enabled = true + aks_cluster_admin_object_ids = [ + # Add here your AAD Groups, users, service principals, etc. + # These identities will be able to access the created AKS cluster as "Cluster Admin". + ] + + tags = { + "env" : "dev" + "managed-by" : "terraform" + } +} + + +# ------ Outputs ------ # +output "secret_provider_class" { + value = module.platform.secret_provider_class + sensitive = true +} +output "helm_values" { + value = module.platform.helm_values + sensitive = true +} diff --git a/main.tf b/main.tf new file mode 100644 index 0000000..c560fb7 --- /dev/null +++ b/main.tf @@ -0,0 +1,767 @@ +terraform { + required_version = ">= 1.9" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~>3.114" + } + time = { + source = "hashicorp/time" + version = "~>0.12" + } + azuread = { + source = "hashicorp/azuread" + version = "~>2.53" + } + random = { + source = "hashicorp/random" + version = "~>3.6" + } + tls = { + source = "hashicorp/tls" + version = "~>4.0" + } + http = { + source = "hashicorp/http" + version = "~>3.4" + } + } +} + + + +# ------ Locals ------ # +locals { + aks_cluster_name = format("%snebuly", var.resource_prefix) + + current_ip = chomp(data.http.current_ip.response_body) + whitelisted_ips = var.whitelist_current_ip ? concat([local.current_ip], var.whitelisted_ips) : var.whitelisted_ips + + postgres_server_name = format("%snebulydb", var.resource_prefix) + postgres_server_configurations = { + "azure.extensions" : "vector,pgaudit", + "shared_preload_libraries" : "pgaudit", + } + + key_vault_name = format("%snebulykv", var.resource_prefix) + + use_existing_virtual_network = var.virtual_network_name != null + use_existing_aks_nodes_subnet = var.subnet_name_aks_nodes != null + use_existing_private_endpoints_subnet = var.subnet_name_private_endpoints != null + use_existing_flexible_postgres_subnet = var.subnet_name_flexible_postgres != null + + virtual_network = ( + local.use_existing_virtual_network ? + data.azurerm_virtual_network.main[0] : + azurerm_virtual_network.main[0] + ) + aks_nodes_subnet = ( + local.use_existing_aks_nodes_subnet ? + data.azurerm_subnet.aks_nodes[0] : + azurerm_subnet.aks_nodes[0] + ) + flexible_postgres_subnet = ( + local.use_existing_flexible_postgres_subnet ? + data.azurerm_subnet.flexible_postgres[0] : + azurerm_subnet.flexible_postgres[0] + ) +} + + + + +# ------ Data Sources ------ # +data "azurerm_resource_group" "main" { + name = var.resource_group_name +} +data "azurerm_client_config" "current" { +} +data "http" "current_ip" { + url = "https://ipv4.icanhazip.com" +} +data "azurerm_virtual_network" "main" { + count = local.use_existing_virtual_network ? 1 : 0 + + resource_group_name = var.resource_group_name + name = var.virtual_network_name +} +data "azurerm_subnet" "aks_nodes" { + count = local.use_existing_aks_nodes_subnet ? 1 : 0 + + resource_group_name = data.azurerm_resource_group.main.name + virtual_network_name = data.azurerm_virtual_network.main[0].name + name = var.subnet_name_aks_nodes + + lifecycle { + precondition { + condition = length(data.azurerm_virtual_network.main) > 0 + error_message = "`virtual_network_name` must be provided and must point to a valid virtual network." + } + } +} +data "azurerm_subnet" "flexible_postgres" { + count = local.use_existing_flexible_postgres_subnet ? 1 : 0 + + resource_group_name = data.azurerm_resource_group.main.name + virtual_network_name = data.azurerm_virtual_network.main[0].name + name = var.subnet_name_flexible_postgres + + lifecycle { + precondition { + condition = length(data.azurerm_virtual_network.main) > 0 + error_message = "`virtual_network_name` must be provided and must point to a valid virtual network." + } + } +} + + +# ------ Networking: Networks and Subnets ------ # +resource "azurerm_virtual_network" "main" { + count = local.use_existing_virtual_network ? 0 : 1 + + name = format("%s-nebuly-vnet", var.resource_prefix) + resource_group_name = data.azurerm_resource_group.main.name + location = var.location + address_space = var.virtual_network_address_space +} +resource "azurerm_subnet" "aks_nodes" { + count = local.use_existing_aks_nodes_subnet ? 0 : 1 + + name = "aks-nodes" + virtual_network_name = local.virtual_network.name + resource_group_name = data.azurerm_resource_group.main.name + address_prefixes = var.subnet_address_space_aks_nodes + + service_endpoints = [ + "Microsoft.Storage", + "Microsoft.CognitiveServices", + "Microsoft.KeyVault", + ] +} +resource "azurerm_subnet" "private_endpints" { + count = local.use_existing_private_endpoints_subnet ? 0 : 1 + + name = "private-endpoints" + virtual_network_name = local.virtual_network.name + resource_group_name = data.azurerm_resource_group.main.name + address_prefixes = var.subnet_address_space_private_endpoints +} +resource "azurerm_subnet" "flexible_postgres" { + count = local.use_existing_flexible_postgres_subnet ? 0 : 1 + + name = "flexible-postgres" + virtual_network_name = local.virtual_network.name + resource_group_name = data.azurerm_resource_group.main.name + address_prefixes = var.subnet_address_space_flexible_postgres + + delegation { + name = "delegation" + service_delegation { + actions = [ + "Microsoft.Network/virtualNetworks/subnets/join/action", + ] + name = "Microsoft.DBforPostgreSQL/flexibleServers" + } + } +} + + + +# ------ Networking: Private DNS Zones ------ # +resource "azurerm_private_dns_zone" "flexible_postgres" { + count = var.private_dns_zones.flexible_postgres == null ? 1 : 0 + + name = "${var.resource_prefix}.nebuly.postgres.database.azure.com" + resource_group_name = data.azurerm_resource_group.main.name +} +resource "azurerm_private_dns_zone_virtual_network_link" "flexible_postgres" { + count = var.private_dns_zones.flexible_postgres == null ? 1 : 0 + + name = format( + "%s-flexible-postgres-%s", + var.resource_prefix, + local.virtual_network.name, + ) + resource_group_name = data.azurerm_resource_group.main.name + virtual_network_id = local.virtual_network.id + private_dns_zone_name = azurerm_private_dns_zone.flexible_postgres[0].name +} + + +# ------ Key Vault ------ # +resource "azurerm_key_vault" "main" { + name = local.key_vault_name + location = var.location + tenant_id = data.azurerm_client_config.current.tenant_id + resource_group_name = data.azurerm_resource_group.main.name + + enable_rbac_authorization = true + + soft_delete_retention_days = var.key_vault_soft_delete_retention_days + purge_protection_enabled = var.key_vault_purge_protection_enabled + public_network_access_enabled = var.key_vault_public_network_access_enabled + + sku_name = lower(var.key_vault_sku_name) + + network_acls { + bypass = "AzureServices" + default_action = "Deny" + virtual_network_subnet_ids = [local.aks_nodes_subnet.id] + ip_rules = local.whitelisted_ips + } + + tags = var.tags +} +resource "azurerm_role_assignment" "key_vault_secret_user__aks" { + scope = azurerm_key_vault.main.id + principal_id = try( + module.aks.key_vault_secrets_provider.secret_identity[0].object_id, + module.aks.cluster_identity.object_id, + ) + role_definition_name = "Key Vault Secrets User" +} +resource "azurerm_role_assignment" "key_vault_secret_officer__current" { + scope = azurerm_key_vault.main.id + role_definition_name = "Key Vault Secrets Officer" + principal_id = data.azurerm_client_config.current.object_id +} + + + + +# ------ Identity ------ # +resource "azuread_application" "main" { + display_name = format("%s.nebuly.platform", var.resource_prefix) + owners = [data.azurerm_client_config.current.object_id] + sign_in_audience = "AzureADMyOrg" # default + identifier_uris = [] +} +resource "azuread_service_principal" "main" { + client_id = azuread_application.main.client_id + owners = [data.azurerm_client_config.current.object_id] + app_role_assignment_required = true +} +resource "azuread_service_principal_password" "main" { + service_principal_id = azuread_service_principal.main.id + end_date_relative = null +} +resource "azurerm_key_vault_secret" "azuread_application_client_id" { + key_vault_id = azurerm_key_vault.main.id + name = format("%s-azure-client-id", var.resource_prefix) + value = azuread_application.main.client_id + + depends_on = [ + azurerm_role_assignment.key_vault_secret_officer__current + ] +} +resource "azurerm_key_vault_secret" "azuread_application_client_secret" { + key_vault_id = azurerm_key_vault.main.id + name = format("%s-azure-client-secret", var.resource_prefix) + value = azuread_application.main.client_id + + depends_on = [ + azurerm_role_assignment.key_vault_secret_officer__current + ] +} + + + +# ------ Nebuly Identity for pulling LLMs ------ # +resource "azurerm_key_vault_secret" "nebuly_azure_client_id" { + key_vault_id = azurerm_key_vault.main.id + name = format("%s-nebuly-azure-client-id", var.resource_prefix) + value = var.nebuly_credentials.client_id + + depends_on = [ + azurerm_role_assignment.key_vault_secret_officer__current + ] +} +resource "azurerm_key_vault_secret" "nebuly_azure_client_secret" { + key_vault_id = azurerm_key_vault.main.id + name = format("%s-nebuly-azure-client-secret", var.resource_prefix) + value = var.nebuly_credentials.client_secret + + depends_on = [ + azurerm_role_assignment.key_vault_secret_officer__current + ] +} + + + + +# ------ Database Server ------ # +resource "random_password" "postgres_server_admin_password" { + length = 16 + special = true + override_special = "!#%&*()-_=+[]{}<>?" +} +resource "azurerm_postgresql_flexible_server" "main" { + name = local.postgres_server_name + location = var.location + resource_group_name = data.azurerm_resource_group.main.name + + create_mode = "Default" + + administrator_login = var.postgres_server_admin_username + administrator_password = random_password.postgres_server_admin_password.result + + sku_name = "${var.postgres_server_sku.tier}_${var.postgres_server_sku.name}" + version = var.postgres_version + storage_mb = var.postgres_server_max_storage_mb + + backup_retention_days = var.postgres_server_point_in_time_backup.retention_days + geo_redundant_backup_enabled = var.postgres_server_point_in_time_backup.geo_redundant + public_network_access_enabled = false + + delegated_subnet_id = local.flexible_postgres_subnet.id + private_dns_zone_id = length(azurerm_private_dns_zone.flexible_postgres) > 0 ? azurerm_private_dns_zone.flexible_postgres[0].id : var.private_dns_zones.flexible_postgres.id + + dynamic "high_availability" { + for_each = var.postgres_server_high_availability.enabled ? { "" : var.postgres_server_high_availability } : {} + content { + mode = high_availability.value.mode + standby_availability_zone = high_availability.value.standby_availability_zone + } + } + + maintenance_window { + day_of_week = var.postgres_server_maintenance_window.day_of_week + start_hour = var.postgres_server_maintenance_window.start_hour + start_minute = var.postgres_server_maintenance_window.start_minute + } + + tags = var.tags + + lifecycle { + ignore_changes = [ + zone, + high_availability[0].standby_availability_zone, + ] + } +} +resource "azurerm_postgresql_flexible_server_configuration" "optional_configurations" { + for_each = var.postgres_server_optional_configurations + + name = each.key + server_id = azurerm_postgresql_flexible_server.main.id + value = each.value +} +resource "azurerm_postgresql_flexible_server_configuration" "mandatory_configurations" { + for_each = local.postgres_server_configurations + + name = each.key + server_id = azurerm_postgresql_flexible_server.main.id + value = each.value +} +resource "azurerm_postgresql_flexible_server_database" "auth" { + name = "auth" + server_id = azurerm_postgresql_flexible_server.main.id + collation = "en_US.utf8" + charset = "utf8" +} +resource "azurerm_postgresql_flexible_server_database" "analytics" { + name = "analytics" + server_id = azurerm_postgresql_flexible_server.main.id + collation = "en_US.utf8" + charset = "utf8" +} +resource "azurerm_management_lock" "postgres_server" { + count = var.postgres_server_lock.enabled ? 1 : 0 + + name = var.postgres_server_lock.name + scope = azurerm_postgresql_flexible_server.main.id + lock_level = "CanNotDelete" + notes = var.postgres_server_lock.notes +} +resource "azurerm_monitor_metric_alert" "postgres_server_alerts" { + for_each = var.postgres_server_alert_rules + + description = each.value.description + frequency = each.value.frequency + window_size = each.value.window_size + + name = format( + "%s-%s", + local.postgres_server_name, + each.key, + ) + + resource_group_name = data.azurerm_resource_group.main.name + severity = each.value.severity + scopes = [azurerm_postgresql_flexible_server.main.id] + + target_resource_type = "Microsoft.DBforPostgreSQL/flexibleServers" + + action { + action_group_id = each.value.action_group_id + webhook_properties = {} + } + + dynamic "criteria" { + for_each = each.value.criteria == null ? {} : { "" : each.value.criteria } + content { + aggregation = criteria.value.aggregation + metric_name = criteria.value.metric_name + metric_namespace = "Microsoft.DBforPostgreSQL/flexibleServers" + operator = criteria.value.operator + skip_metric_validation = false + threshold = criteria.value.threshold + } + } + + dynamic "dynamic_criteria" { + for_each = each.value.dynamic_criteria == null ? {} : { "" : each.value.dynamic_criteria } + content { + aggregation = dynamic_criteria.value.aggregation + metric_name = dynamic_criteria.value.metric_name + metric_namespace = "Microsoft.DBforPostgreSQL/flexibleServers" + operator = dynamic_criteria.value.operator + alert_sensitivity = dynamic_criteria.value.alert_sensitivity + } + } + + tags = var.tags +} +resource "azurerm_key_vault_secret" "postgres_user" { + name = "${var.resource_prefix}-postgres-username" + value = var.postgres_server_admin_username + key_vault_id = azurerm_key_vault.main.id + + depends_on = [ + azurerm_role_assignment.key_vault_secret_officer__current + ] +} +resource "azurerm_key_vault_secret" "postgres_password" { + name = "${var.resource_prefix}-postgres-password" + value = random_password.postgres_server_admin_password.result + key_vault_id = azurerm_key_vault.main.id + + depends_on = [ + azurerm_role_assignment.key_vault_secret_officer__current + ] +} + + + + +# ------ Azure OpenAI ------ # +locals { + azure_openai_account_name = format("%snebuly", var.resource_prefix) +} +resource "azurerm_cognitive_account" "main" { + name = local.azure_openai_account_name + location = var.azure_openai_location + resource_group_name = data.azurerm_resource_group.main.name + kind = "OpenAI" + + sku_name = "S0" + custom_subdomain_name = local.azure_openai_account_name + + network_acls { + default_action = "Deny" + + virtual_network_rules { + subnet_id = local.aks_nodes_subnet.id + } + } + + tags = var.tags +} +resource "azurerm_cognitive_deployment" "gpt_4_turbo" { + cognitive_account_id = azurerm_cognitive_account.main.id + name = "gpt-4-turbo" + rai_policy_name = "Microsoft.Default" + + model { + format = "OpenAI" + name = "gpt-4" + version = "turbo-2024-04-09" + } + scale { + type = "Standard" + capacity = var.azure_openai_rate_limits.gpt_4 + } +} +resource "azurerm_cognitive_deployment" "gpt_4o_mini" { + cognitive_account_id = azurerm_cognitive_account.main.id + name = "gpt-4o-mini" + rai_policy_name = "Microsoft.Default" + + model { + format = "OpenAI" + name = "gpt-4o-mini" + version = "2024-07-18" + } + scale { + type = "Standard" + capacity = var.azure_openai_rate_limits.gpt_4o_mini + } +} +resource "azurerm_key_vault_secret" "azure_openai_api_key" { + name = "${var.resource_prefix}-openai-api-key" + value = azurerm_cognitive_account.main.primary_access_key + key_vault_id = azurerm_key_vault.main.id + + depends_on = [ + azurerm_role_assignment.key_vault_secret_officer__current + ] +} + + + + +# ------ Model Registry ------ # +resource "azurerm_storage_account" "main" { + name = format("%s%s", var.resource_prefix, "models") + resource_group_name = data.azurerm_resource_group.main.name + location = var.location + + account_tier = "Standard" + account_replication_type = "LRS" + access_tier = "Hot" + + public_network_access_enabled = true # TODO + is_hns_enabled = false + + network_rules { + default_action = "Deny" + ip_rules = local.whitelisted_ips + virtual_network_subnet_ids = [local.aks_nodes_subnet.id] + } + + tags = var.tags +} +resource "azurerm_storage_container" "models" { + storage_account_name = azurerm_storage_account.main.name + name = "models" +} +resource "azurerm_role_assignment" "storage_container_models__data_contributor" { + role_definition_name = "Storage Blob Data Contributor" + principal_id = module.aks.kubelet_identity[0].object_id + scope = azurerm_storage_container.models.resource_manager_id +} + + + + +# ------ AKS ------ # +resource "tls_private_key" "aks" { + algorithm = "RSA" # Azure VMs currently do not support ECDSA + rsa_bits = "4096" +} +module "aks" { + source = "Azure/aks/azurerm" + version = "9.1.0" + + prefix = var.resource_prefix + cluster_name = local.aks_cluster_name + location = var.location + resource_group_name = data.azurerm_resource_group.main.name + + kubernetes_version = var.aks_kubernetes_version.control_plane + orchestrator_version = var.aks_kubernetes_version.workers + sku_tier = var.aks_sku_tier + + + vnet_subnet_id = local.aks_nodes_subnet.id + net_profile_service_cidr = var.aks_net_profile_service_cidr + net_profile_dns_service_ip = var.aks_net_profile_dns_service_ip + api_server_authorized_ip_ranges = [ + for ip in local.whitelisted_ips : "${ip}/32" + ] + + rbac_aad_admin_group_object_ids = var.aks_cluster_admin_object_ids + rbac_aad_managed = true + role_based_access_control_enabled = true + local_account_disabled = true + private_cluster_enabled = false + + log_analytics_workspace = var.aks_log_analytics_workspace + + temporary_name_for_rotation = "systemback" + + os_disk_size_gb = var.aks_sys_pool.disk_size_gb + os_disk_type = var.aks_sys_pool.disk_type + enable_auto_scaling = var.aks_sys_pool.enable_auto_scaling + agents_size = var.aks_sys_pool.vm_size + agents_min_count = var.aks_sys_pool.agents_min_count + agents_max_count = var.aks_sys_pool.agents_max_count + agents_count = var.aks_sys_pool.nodes_count + agents_max_pods = var.aks_sys_pool.nodes_max_pods + agents_pool_name = var.aks_sys_pool.name + agents_availability_zones = var.aks_sys_pool.availability_zones + only_critical_addons_enabled = var.aks_sys_pool.only_critical_addons_enabled + agents_type = "VirtualMachineScaleSets" + + agents_labels = merge(var.aks_sys_pool.nodes_labels, { + "nodepool" : "defaultnodepool" + }) + + agents_tags = merge(var.aks_sys_pool.nodes_tags, { + "Agent" : "defaultnodepoolagent" + }) + + network_policy = "azure" + network_plugin = "azure" + + # We set this to `false` and create the role assignment manually to avoid invalid for_each argument error. + create_role_assignment_network_contributor = false + + public_ssh_key = tls_private_key.aks.public_key_openssh + + # Plugins + storage_profile_blob_driver_enabled = true + key_vault_secrets_provider_enabled = true + azure_policy_enabled = true + + tags = var.tags +} +resource "time_sleep" "wait_aks_creation" { + create_duration = "30s" + + depends_on = [ + module.aks + ] +} +# The AKS cluster identity has the Contributor role on the AKS second resource group (MC_myResourceGroup_myAKSCluster_eastus) +# However when using a custom VNET, the AKS cluster identity needs the Network Contributor role on the VNET subnets +# used by the system node pool and by any additional node pools. +# https://learn.microsoft.com/en-us/azure/aks/configure-kubenet#prerequisites +# https://learn.microsoft.com/en-us/azure/aks/configure-azure-cni#prerequisites +# https://github.com/Azure/terraform-azurerm-aks/issues/178 +resource "azurerm_role_assignment" "aks_network_contributor" { + principal_id = module.aks.cluster_identity.principal_id + scope = local.aks_nodes_subnet.id + role_definition_name = "Network Contributor" +} +resource "azurerm_kubernetes_cluster_node_pool" "linux_pools" { + for_each = { for k, v in var.aks_worker_pools : k => v if v.enabled } + + name = each.key + kubernetes_cluster_id = module.aks.aks_id + vm_size = each.value.vm_size + vnet_subnet_id = local.aks_nodes_subnet.id + priority = each.value.priority + + node_count = each.value.nodes_count + max_pods = each.value.max_pods + min_count = each.value.enable_auto_scaling ? each.value.nodes_min_count : null + max_count = each.value.enable_auto_scaling ? each.value.nodes_max_count : null + enable_auto_scaling = each.value.enable_auto_scaling + + os_disk_size_gb = each.value.disk_size_gb + os_disk_type = each.value.disk_type + + zones = each.value.availability_zones + node_taints = each.value.node_taints + node_labels = each.value.node_labels + + tags = each.value.tags + + lifecycle { + ignore_changes = [ + node_labels, + node_taints, + eviction_policy, + ] + } + + depends_on = [ + time_sleep.wait_aks_creation, + ] +} + + +# ------ Auth ------ # +resource "tls_private_key" "jwt_signing_key" { + algorithm = "RSA" + rsa_bits = 4096 +} +resource "azurerm_key_vault_secret" "jwt_signing_key" { + key_vault_id = azurerm_key_vault.main.id + name = format("%s-jwt-signing-key", var.resource_prefix) + value = tls_private_key.jwt_signing_key.private_key_pem + + depends_on = [ + azurerm_role_assignment.key_vault_secret_officer__current + ] +} + + + +# ------ Post provisioning ------ # +locals { + secret_provider_class_name = "nebuly-platform" + secret_provider_class_secret_name = "nebuly-platform-credentials" + + # k8s secrets keys + k8s_secret_key_db_username = "db-username" + k8s_secret_key_db_password = "db-password" + k8s_secret_key_jwt_signing_key = "jwt-signing-key" + k8s_secret_key_openai_api_key = "openai-api-key" + k8s_secret_key_azure_client_id = "azure-client-id" + k8s_secret_key_azure_client_secret = "azure-client-secret" + k8s_secret_key_nebuly_client_id = "nebuly-azure-client-id" + k8s_secret_key_nebuly_client_secret = "nebuly-azure-client-secret" + + helm_values = templatefile( + "${path.module}/templates/helm-values.tpl.yaml", + { + platform_domain = var.platform_domain + image_pull_secret_name = var.k8s_image_pull_secret_name + + openai_endpoint = azurerm_cognitive_account.main.endpoint + openai_frustration_deployment = azurerm_cognitive_deployment.gpt_4_turbo.name + + secret_provider_class_name = local.secret_provider_class_name + secret_provider_class_secret_name = local.secret_provider_class_secret_name + + k8s_secret_key_db_username = local.k8s_secret_key_db_username + k8s_secret_key_db_password = local.k8s_secret_key_db_password + k8s_secret_key_jwt_signing_key = local.k8s_secret_key_jwt_signing_key + k8s_secret_key_openai_api_key = local.k8s_secret_key_openai_api_key + k8s_secret_key_nebuly_client_secret = local.k8s_secret_key_nebuly_client_secret + k8s_secret_key_nebuly_client_id = local.k8s_secret_key_nebuly_client_id + + postgres_server_url = azurerm_postgresql_flexible_server.main.fqdn + postgres_auth_database_name = azurerm_postgresql_flexible_server_database.auth.name + postgres_analytics_database_name = azurerm_postgresql_flexible_server_database.analytics.name + + kubelet_identity_client_id = module.aks.kubelet_identity[0].client_id + storage_account_name = azurerm_storage_account.main.name + storage_container_name = azurerm_storage_container.models.name + tenant_id = data.azurerm_client_config.current.tenant_id + }, + ) + secret_provider_class = templatefile( + "${path.module}/templates/secret-provider-class.tpl.yaml", + { + secret_provider_class_name = local.secret_provider_class_name + secret_provider_class_secret_name = local.secret_provider_class_secret_name + + key_vault_name = azurerm_key_vault.main.name + tenant_id = data.azurerm_client_config.current.tenant_id + aks_managed_identity_id = try(module.aks.key_vault_secrets_provider.secret_identity[0].client_id, "TODO") + + secret_name_jwt_signing_key = azurerm_key_vault_secret.jwt_signing_key.name + secret_name_db_username = azurerm_key_vault_secret.postgres_user.name + secret_name_db_password = azurerm_key_vault_secret.postgres_password.name + secret_name_openai_api_key = azurerm_key_vault_secret.azure_openai_api_key.name + secret_name_azure_client_id = azurerm_key_vault_secret.azuread_application_client_id.name + secret_name_azure_client_secret = azurerm_key_vault_secret.azuread_application_client_secret.name + secret_name_nebuly_client_id = azurerm_key_vault_secret.nebuly_azure_client_id.name + secret_name_nebuly_client_secret = azurerm_key_vault_secret.nebuly_azure_client_secret.name + + k8s_secret_key_db_username = local.k8s_secret_key_db_username + k8s_secret_key_db_password = local.k8s_secret_key_db_password + k8s_secret_key_jwt_signing_key = local.k8s_secret_key_jwt_signing_key + k8s_secret_key_openai_api_key = local.k8s_secret_key_openai_api_key + k8s_secret_key_azure_client_id = local.k8s_secret_key_azure_client_id + k8s_secret_key_azure_client_secret = local.k8s_secret_key_azure_client_secret + k8s_secret_key_nebuly_client_secret = local.k8s_secret_key_nebuly_client_secret + k8s_secret_key_nebuly_client_id = local.k8s_secret_key_nebuly_client_id + }, + ) +} + diff --git a/outputs.tf b/outputs.tf new file mode 100644 index 0000000..c5487ab --- /dev/null +++ b/outputs.tf @@ -0,0 +1,19 @@ +output "helm_values" { + value = local.helm_values + sensitive = true + description = <