From 02828e18a139db2cf6ffa3e10f5874609a3e3d5e Mon Sep 17 00:00:00 2001 From: Kartik Joshi Date: Tue, 28 Jan 2025 15:19:08 +0530 Subject: [PATCH] Add test for konnectivity agent autoscaler performance Signed-off-by: Kartik Joshi --- .../konnectivity-resource-consume.yml | 56 ++++++++++++++ .../terraform-inputs/azure.tfvars | 76 +++++++++++++++++++ .../terraform-test-inputs/azure.json | 4 + .../collect-clusterloader2.yml | 17 +++++ .../execute-clusterloader2.yml | 17 +++++ .../validate-resources.yml | 16 ++++ 6 files changed, 186 insertions(+) create mode 100644 pipelines/perf-eval/CRI Benchmark/konnectivity-resource-consume.yml create mode 100644 scenarios/perf-eval/konnectivity-autoscale/terraform-inputs/azure.tfvars create mode 100644 scenarios/perf-eval/konnectivity-autoscale/terraform-test-inputs/azure.json create mode 100644 steps/topology/cri-autoscale-resource-consume/collect-clusterloader2.yml create mode 100644 steps/topology/cri-autoscale-resource-consume/execute-clusterloader2.yml create mode 100644 steps/topology/cri-autoscale-resource-consume/validate-resources.yml diff --git a/pipelines/perf-eval/CRI Benchmark/konnectivity-resource-consume.yml b/pipelines/perf-eval/CRI Benchmark/konnectivity-resource-consume.yml new file mode 100644 index 000000000..068c4416b --- /dev/null +++ b/pipelines/perf-eval/CRI Benchmark/konnectivity-resource-consume.yml @@ -0,0 +1,56 @@ +trigger: none +#schedules: +#- cron: "0 2-23/4 * * *" +# displayName: "Every 4 Hour" +# branches: +# include: +# - main +# always: true + +variables: + SCENARIO_TYPE: perf-eval + SCENARIO_NAME: konnectivity-autoscale + SCENARIO_VERSION: main + +stages: + - stage: eastus2 + dependsOn: [] + jobs: + - template: /jobs/competitive-test.yml + parameters: + cloud: azure + regions: + - westus3 + engine: clusterloader2 + engine_input: + image: "ghcr.io/azure/clusterloader2:v20241016" + topology: cri-autoscale-resource-consume + matrix: + n100-p3-cpu: + node_count: 3 + max_pods: 20 + repeats: 1 + operation_timeout: 15m + load_type: cpu + node_per_step: 1 + scale_enabled: true + #n500-p5-cpu: + # node_count: 501 + # max_pods: 20 + # repeats: 1 + # operation_timeout: 30m + # load_type: cpu + # node_per_step: 100 + # scale_enabled: true + #n1001-p6-cpu: + # node_count: 1001 + # max_pods: 20 + # repeats: 1 + # operation_timeout: 30m + # load_type: cpu + # node_per_step: 100 + # scale_enabled: true + max_parallel: 3 + timeout_in_minutes: 720 + credential_type: service_connection + ssh_key_enabled: false diff --git a/scenarios/perf-eval/konnectivity-autoscale/terraform-inputs/azure.tfvars b/scenarios/perf-eval/konnectivity-autoscale/terraform-inputs/azure.tfvars new file mode 100644 index 000000000..cd4504d83 --- /dev/null +++ b/scenarios/perf-eval/konnectivity-autoscale/terraform-inputs/azure.tfvars @@ -0,0 +1,76 @@ +scenario_type = "perf-eval" +scenario_name = "konnectivity-autoscale" +deletion_delay = "3h" +owner = "aks" + +network_config_list = [ + { + role = "client" + vnet_name = "cri-autoscale-vnet" + vnet_address_space = "10.0.0.0/9" + subnet = [ + { + name = "cri-autoscale-subnet-1" + address_prefix = "10.0.0.0/16" + } + ] + network_security_group_name = "" + nic_public_ip_associations = [] + nsr_rules = [] + } +] + +aks_config_list = [ + { + role = "client" + aks_name = "konnectivity-autoscale" + dns_prefix = "cl2" + subnet_name = "cri-autoscale-subnet-1" + sku_tier = "Standard" + network_profile = { + network_plugin = "azure" + network_plugin_mode = "overlay" + pod_cidr = "10.128.0.0/9" + service_cidr = "192.168.0.0/16" + dns_service_ip = "192.168.0.10" + } + default_node_pool = { + name = "default" + node_count = 3 + vm_size = "standard_b16als_v2" + os_disk_type = "Managed" + only_critical_addons_enabled = true + temporary_name_for_rotation = "defaulttmp" + } + extra_node_pool = [ + { + name = "prompool" + node_count = 1 + auto_scaling_enabled = false + vm_size = "standard_b16als_v2" + node_labels = { "prometheus" = "true" } + }, + { + name = "userpool0" + node_count = 1 + min_count = 0 + max_count = 500 + auto_scaling_enabled = true + vm_size = "standard_b2als_v2" + max_pods = 110 + node_labels = { "cri-resource-consume" = "true" } + }, + { + name = "userpool1" + node_count = 0 + min_count = 0 + max_count = 501 + auto_scaling_enabled = true + vm_size = "standard_b2als_v2" + max_pods = 110 + node_labels = { "cri-resource-consume" = "true" } + } + ] + kubernetes_version = "1.30" + } +] diff --git a/scenarios/perf-eval/konnectivity-autoscale/terraform-test-inputs/azure.json b/scenarios/perf-eval/konnectivity-autoscale/terraform-test-inputs/azure.json new file mode 100644 index 000000000..abddcd924 --- /dev/null +++ b/scenarios/perf-eval/konnectivity-autoscale/terraform-test-inputs/azure.json @@ -0,0 +1,4 @@ +{ + "run_id": "1234567890", + "region": "eastus" +} diff --git a/steps/topology/cri-autoscale-resource-consume/collect-clusterloader2.yml b/steps/topology/cri-autoscale-resource-consume/collect-clusterloader2.yml new file mode 100644 index 000000000..ee0c8a1bb --- /dev/null +++ b/steps/topology/cri-autoscale-resource-consume/collect-clusterloader2.yml @@ -0,0 +1,17 @@ +parameters: +- name: cloud + type: string + default: '' +- name: engine_input + type: object + default: {} +- name: regions + type: object + default: {} + +steps: +- template: /steps/engine/clusterloader2/cri/collect.yml + parameters: + cloud: ${{ parameters.cloud }} + engine_input: ${{ parameters.engine_input }} + region: ${{ parameters.regions[0] }} diff --git a/steps/topology/cri-autoscale-resource-consume/execute-clusterloader2.yml b/steps/topology/cri-autoscale-resource-consume/execute-clusterloader2.yml new file mode 100644 index 000000000..fcdab04db --- /dev/null +++ b/steps/topology/cri-autoscale-resource-consume/execute-clusterloader2.yml @@ -0,0 +1,17 @@ +parameters: +- name: cloud + type: string + default: '' +- name: engine_input + type: object + default: {} +- name: regions + type: object + default: {} + +steps: +- template: /steps/engine/clusterloader2/cri/execute.yml + parameters: + cloud: ${{ parameters.cloud }} + engine_input: ${{ parameters.engine_input }} + region: ${{ parameters.regions[0] }} diff --git a/steps/topology/cri-autoscale-resource-consume/validate-resources.yml b/steps/topology/cri-autoscale-resource-consume/validate-resources.yml new file mode 100644 index 000000000..3db17b37a --- /dev/null +++ b/steps/topology/cri-autoscale-resource-consume/validate-resources.yml @@ -0,0 +1,16 @@ +parameters: +- name: cloud + type: string +- name: engine + type: string +- name: regions + type: object + +steps: +- template: /steps/cloud/${{ parameters.cloud }}/update-kubeconfig.yml + parameters: + role: client + region: ${{ parameters.regions[0] }} +- template: /steps/engine/clusterloader2/slo/validate.yml + parameters: + desired_nodes: 5