-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add R/O Onboarding example for AKS * remove unused variables * run make generate-sdk * update sdk * add ARM template for onboarding clusters in AKS --------- Co-authored-by: Phil Andrews <[email protected]> Co-authored-by: Phil Andrews <[email protected]>
- Loading branch information
1 parent
4eb102d
commit 5d51d84
Showing
9 changed files
with
495 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# AKS and CAST AI example with CAST AI Autoscaler policies and additional Node Configurations | ||
Following example shows how to onboard AKS cluster to CAST AI, configure [Autoscaler policies](https://docs.cast.ai/reference/policiesapi_upsertclusterpolicies) and additional [Node Configurations](https://docs.cast.ai/docs/node-configuration/). | ||
|
||
Example configuration should be analysed in the following order: | ||
1. Create Virtual network - `vnet.tf` | ||
2. Create AKS cluster - `aks.tf` | ||
3. Create CAST AI related resources to connect AKS cluster to CAST AI, configure Autoscaler and Node Configurations - `castai.tf` | ||
|
||
# Usage | ||
1. Rename `tf.vars.example` to `tf.vars` | ||
2. Update `tf.vars` file with your cluster name, cluster region and CAST AI API token. | ||
3. Initialize Terraform. Under example root folder run: | ||
``` | ||
terraform init | ||
``` | ||
4. Run Terraform apply: | ||
``` | ||
terraform apply -var-file=tf.vars | ||
``` | ||
5. To destroy resources created by this example: | ||
``` | ||
terraform destroy -var-file=tf.vars | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# 2. Create AKS cluster. | ||
|
||
resource "azurerm_kubernetes_cluster" "this" { | ||
name = var.cluster_name | ||
resource_group_name = azurerm_resource_group.this.name | ||
location = azurerm_resource_group.this.location | ||
dns_prefix = var.cluster_name | ||
node_resource_group = "${var.cluster_name}-ng" | ||
|
||
default_node_pool { | ||
name = "default" | ||
# Node count has to be > 2 to successfully deploy CAST AI controller. | ||
node_count = 2 | ||
vm_size = "Standard_D2_v2" | ||
vnet_subnet_id = azurerm_subnet.internal.id | ||
} | ||
|
||
identity { | ||
type = "SystemAssigned" | ||
} | ||
|
||
tags = { | ||
Environment = "Test" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
# 3. Connect AKS cluster to CAST AI in READ-ONLY mode. | ||
|
||
# Configure Data sources and providers required for CAST AI connection. | ||
data "azurerm_subscription" "current" {} | ||
|
||
provider "castai" { | ||
api_url = var.castai_api_url | ||
api_token = var.castai_api_token | ||
} | ||
|
||
provider "helm" { | ||
kubernetes { | ||
host = azurerm_kubernetes_cluster.this.kube_config.0.host | ||
client_certificate = base64decode(azurerm_kubernetes_cluster.this.kube_config.0.client_certificate) | ||
client_key = base64decode(azurerm_kubernetes_cluster.this.kube_config.0.client_key) | ||
cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.this.kube_config.0.cluster_ca_certificate) | ||
} | ||
} | ||
|
||
# Configure AKS cluster connection to CAST AI using CAST AI aks-cluster module. | ||
module "castai-aks-cluster" { | ||
source = "castai/aks/castai" | ||
|
||
api_url = var.castai_api_url | ||
castai_api_token = var.castai_api_token | ||
wait_for_cluster_ready = true | ||
|
||
aks_cluster_name = var.cluster_name | ||
aks_cluster_region = var.cluster_region | ||
node_resource_group = azurerm_kubernetes_cluster.this.node_resource_group | ||
resource_group = azurerm_kubernetes_cluster.this.resource_group_name | ||
|
||
delete_nodes_on_disconnect = var.delete_nodes_on_disconnect | ||
|
||
subscription_id = data.azurerm_subscription.current.subscription_id | ||
tenant_id = data.azurerm_subscription.current.tenant_id | ||
|
||
default_node_configuration = module.castai-aks-cluster.castai_node_configurations["default"] | ||
|
||
node_configurations = { | ||
default = { | ||
disk_cpu_ratio = 25 | ||
subnets = [azurerm_subnet.internal.id] | ||
tags = var.tags | ||
} | ||
|
||
test_node_config = { | ||
disk_cpu_ratio = 25 | ||
subnets = [azurerm_subnet.internal.id] | ||
tags = var.tags | ||
max_pods_per_node = 40 | ||
} | ||
} | ||
|
||
node_templates = { | ||
default_by_castai = { | ||
name = "default-by-castai" | ||
configuration_id = module.castai-aks-cluster.castai_node_configurations["default"] | ||
is_default = true | ||
should_taint = false | ||
|
||
constraints = { | ||
on_demand = true | ||
spot = true | ||
use_spot_fallbacks = true | ||
|
||
enable_spot_diversity = false | ||
spot_diversity_price_increase_limit_percent = 20 | ||
} | ||
} | ||
spot_tmpl = { | ||
configuration_id = module.castai-aks-cluster.castai_node_configurations["default"] | ||
should_taint = true | ||
|
||
custom_labels = { | ||
custom-label-key-1 = "custom-label-value-1" | ||
custom-label-key-2 = "custom-label-value-2" | ||
} | ||
|
||
custom_taints = [ | ||
{ | ||
key = "custom-taint-key-1" | ||
value = "custom-taint-value-1" | ||
}, | ||
{ | ||
key = "custom-taint-key-2" | ||
value = "custom-taint-value-2" | ||
} | ||
] | ||
|
||
constraints = { | ||
fallback_restore_rate_seconds = 1800 | ||
spot = true | ||
use_spot_fallbacks = true | ||
min_cpu = 4 | ||
max_cpu = 100 | ||
instance_families = { | ||
exclude = ["standard_DPLSv5"] | ||
} | ||
compute_optimized = false | ||
storage_optimized = false | ||
} | ||
} | ||
} | ||
|
||
// Configure Autoscaler policies as per API specification https://api.cast.ai/v1/spec/#/PoliciesAPI/PoliciesAPIUpsertClusterPolicies. | ||
// Here: | ||
// - unschedulablePods - Unscheduled pods policy | ||
// - nodeDownscaler - Node deletion policy | ||
autoscaler_policies_json = <<-EOT | ||
{ | ||
"enabled": true, | ||
"unschedulablePods": { | ||
"enabled": true | ||
}, | ||
"nodeDownscaler": { | ||
"enabled": true, | ||
"emptyNodes": { | ||
"enabled": true | ||
}, | ||
"evictor": { | ||
"aggressiveMode": false, | ||
"cycleInterval": "5m10s", | ||
"dryRun": false, | ||
"enabled": true, | ||
"nodeGracePeriodMinutes": 10, | ||
"scopedMode": false | ||
} | ||
}, | ||
"clusterLimits": { | ||
"cpu": { | ||
"maxCores": 20, | ||
"minCores": 1 | ||
}, | ||
"enabled": true | ||
} | ||
} | ||
EOT | ||
|
||
} | ||
|
||
resource "azurerm_resource_group_template_deployment" "castai_onboarding_arm" { | ||
name = var.cluster_name | ||
resource_group_name = azurerm_kubernetes_cluster.this.resource_group_name | ||
deployment_mode = "Incremental" | ||
template_content = file("castai_arm_template.json",) | ||
parameters_content = jsonencode({ | ||
"apiKey" = {value = var.castai_api_token } | ||
"clusterResourceName" = {value = var.cluster_name} | ||
"location" = {value = var.cluster_region} | ||
}) | ||
} |
Oops, something went wrong.