diff --git a/k8s/azure/README b/k8s/azure/README new file mode 100644 index 0000000..4bf78e2 --- /dev/null +++ b/k8s/azure/README @@ -0,0 +1,20 @@ +This needs to be run on a machine with Terraform install and the +Azure CLI. Personal login for the Azure CLI can be done with: + + az login + +The actual account used is a service principal account though: + + az ad sp create-for-rbac -n kernelci-k8s + +which outputs an appId and password, this should be distributed via the +credential store and set in terraform variables, see variables.tf. + +When the clusters are created a logged in user can set up the client +credentials like this: + + for c in $(az aks list --query '[].name' -o tsv) ; do + az aks get-credentials --resource-group kernelci-workers --name ${c} + done + +(TBD: also put this in outputs.tf, need to figure out syntax for arrays) diff --git a/k8s/azure/aks-cluster.tf b/k8s/azure/aks-cluster.tf new file mode 100644 index 0000000..ee6eb63 --- /dev/null +++ b/k8s/azure/aks-cluster.tf @@ -0,0 +1,131 @@ +# FIXME: For real deployment we should store the terraform state +# in cloud storage rather than just the current directory, terraform +# supports Azure blob storage directly. This means configuration +# doesn't need to be on a single machine somewhere. +# +# See https://www.terraform.io/language/settings/backends/azurerm +# +#terraform { +# backend "azurerm" { +# resource_group_name = "kernelci-tf-storage" +# storage_account_name = "kernelci-tf" +# container_name = "tfstate" +# key = "workers.terraform.tfstate" +# } +#} + +provider "azurerm" { + features {} +} + +# We assign all clusters to the same resource group, this is purely for +# accounting purposes so it doesn't matter where the resource group is +resource "azurerm_resource_group" "workers" { + name = "kernelci-workers" + location = "East US" + + tags = { + environment = "kernelci-workers" + } +} + +locals { + zones = toset([ + "uksouth", + "eastus", + ]) +} + +resource "azurerm_kubernetes_cluster" "workers" { + for_each = local.zones + + name = "${each.key}-workers-aks" + location = each.key + resource_group_name = azurerm_resource_group.workers.name + dns_prefix = "${each.key}-workers-k8s" + + # Automatically roll out upgrades from AKS + automatic_channel_upgrade = "stable" + + # Single always present node as AKS requires a default node pool - + # Terraform and/or AKS don't let us tag this as a spot instance and + # ideally we can scale the builders down to 0 so this is a small + # instance not tagged for work. + default_node_pool { + name = "default" + node_count = 1 + vm_size = "Standard_DS2_v2" + os_disk_size_gb = 30 + + node_labels = { + "kernelci/management" = "management" + } + } + + service_principal { + client_id = var.appId + client_secret = var.password + } + + role_based_access_control { + enabled = true + } + + tags = { + environment = "kernelci" + } +} + +# Smaller nodes for most jobs +resource "azurerm_kubernetes_cluster_node_pool" "small_workers" { + for_each = azurerm_kubernetes_cluster.workers + + name = "smallworkers" + kubernetes_cluster_id = each.value.id + + # 3rd gen Xeon 8 cores, 32G RAM - general purpose + vm_size = "Standard_D8s_v5" + + # Currently things struggle with scale to 0 so require a node + enable_auto_scaling = true + min_count = 1 + node_count = 1 + max_count = 10 + + priority = "Spot" + # We could set this lower to control costs, -1 means up to on demand + # price + spot_max_price = -1 + + node_labels = { + "kernelci/worker" = "worker" + "kernelci/worker-size" = "small" + } +} + +# Big nodes for more intensive jobs (and large numbers of small jobs) +resource "azurerm_kubernetes_cluster_node_pool" "big_workers" { + for_each = azurerm_kubernetes_cluster.workers + + name = "bigworkers" + kubernetes_cluster_id = each.value.id + + # 3rd gen Xeon, 32 core, 64G RAM - compute optimised + vm_size = "Standard_F32s_v2" + + # Currently things struggle with scale to 0 so require a node + enable_auto_scaling = true + min_count = 1 + node_count = 1 + max_count = 10 + + priority = "Spot" + # We could set this lower to control costs, -1 means up to on demand + # price + spot_max_price = -1 + + node_labels = { + "kernelci/worker" = "worker" + "kernelci/worker-size" = "big" + } +} diff --git a/k8s/azure/outputs.tf b/k8s/azure/outputs.tf new file mode 100644 index 0000000..ac355d5 --- /dev/null +++ b/k8s/azure/outputs.tf @@ -0,0 +1,3 @@ +output "resource_group_name" { + value = azurerm_resource_group.workers.name +} diff --git a/k8s/azure/variables.tf b/k8s/azure/variables.tf new file mode 100644 index 0000000..6bff5ce --- /dev/null +++ b/k8s/azure/variables.tf @@ -0,0 +1,7 @@ +variable "appId" { + description = "Azure Kubernetes Service Cluster service principal" +} + +variable "password" { + description = "Azure Kubernetes Service Cluster password" +} diff --git a/k8s/azure/versions.tf b/k8s/azure/versions.tf new file mode 100644 index 0000000..c951183 --- /dev/null +++ b/k8s/azure/versions.tf @@ -0,0 +1,11 @@ +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "2.66.0" + } + } + + required_version = ">= 0.14" +} +