From b83358e06568baede547fbf52543414e9e593501 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Thu, 4 Jan 2024 10:46:50 -0800 Subject: [PATCH 1/2] Include initContainers when calculating pod overhead https://github.com/2i2c-org/infrastructure/pull/3569 changed the cryptnono daemonset to have different resource requests for the init containers as well as the container. While working on https://github.com/2i2c-org/infrastructure/pull/3566, I noticed this was generating wrong choices - the overhead was calculated wrong (too small). We were intentionally ignoring init containers while calculating overhead, and turns out the scheduler and the autoscaler both do take it into consideration. The effective resource request for a pod is the higher of the resource requests for the containers *or* the init containers - this ensures that a pod with higher requests for init containers than containers (like our cryptnono pod!) will actually run. This is documented at https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#resource-sharing-within-containers, and implemented in Kubernetes itself at https://github.com/kubernetes/kubernetes/blob/9bd0ef5f173de3cc2d1d629a4aee499d53690aee/pkg/api/v1/resource/helpers.go#L50 (this is the library code that the cluster autoscaler uses). This PR updates the two places we currently have that calculate effective resource requests (I assume eventually these will be merged into one - I haven't kept up with the team's work last quarter here). I've updated the node-capacity-info.json file, which is what seems to be used by the generator script right now. --- .../resource_allocation/daemonset_requests.py | 45 +++++++++++++++---- .../daemonset_requests.yaml | 2 +- .../node-capacity-info.json | 8 ++-- .../resource_allocation/update_nodeinfo.py | 40 +++++++++++++---- 4 files changed, 72 insertions(+), 23 deletions(-) diff --git a/deployer/commands/generate/resource_allocation/daemonset_requests.py b/deployer/commands/generate/resource_allocation/daemonset_requests.py index e47ac51ed6..f6611f38b9 100644 --- a/deployer/commands/generate/resource_allocation/daemonset_requests.py +++ b/deployer/commands/generate/resource_allocation/daemonset_requests.py @@ -64,23 +64,50 @@ def get_daemon_sets_requests(): info = [] for ds in daemon_sets: name = ds["metadata"]["name"] - req_mem = req_cpu = lim_mem = lim_cpu = 0 + # From https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#resource-sharing-within-containers + # > - The highest of any particular resource request or limit defined on + # > all init containers is the effective init request/limit. If any + # > resource has no resource limit specified this is considered as the + # > highest limit. + # > - The Pod's effective request/limit for a resource is the higher of: + # > - the sum of all app containers request/limit for a resource + # > - the effective init request/limit for a resource + # + # So we have to calculate the requests of the init containers and containers separately, + # and take the max as the effective request / limit + + container_req_mem = ( + container_req_cpu + ) = container_lim_mem = container_lim_cpu = 0 + init_container_req_mem = ( + init_container_req_cpu + ) = init_container_lim_mem = init_container_lim_cpu = 0 + for c in ds["spec"]["template"]["spec"]["containers"]: resources = c.get("resources", {}) requests = resources.get("requests", {}) limits = resources.get("limits", {}) - req_mem += parse_quantity(requests.get("memory", 0)) - lim_mem += parse_quantity(limits.get("memory", 0)) - req_cpu += parse_quantity(requests.get("cpu", 0)) - lim_cpu += parse_quantity(limits.get("cpu", 0)) + container_req_mem += parse_quantity(requests.get("memory", 0)) + container_lim_mem += parse_quantity(limits.get("memory", 0)) + container_req_cpu += parse_quantity(requests.get("cpu", 0)) + container_lim_cpu += parse_quantity(limits.get("cpu", 0)) + + for c in ds["spec"]["template"]["spec"].get("initContainers", []): + resources = c.get("resources", {}) + requests = resources.get("requests", {}) + limits = resources.get("limits", {}) + init_container_req_mem += parse_quantity(requests.get("memory", 0)) + init_container_lim_mem += parse_quantity(limits.get("memory", 0)) + init_container_req_cpu += parse_quantity(requests.get("cpu", 0)) + init_container_lim_cpu += parse_quantity(limits.get("cpu", 0)) info.append( { "name": name, - "cpu_request": float(req_cpu), - "cpu_limit": float(lim_cpu), - "memory_request": int(req_mem), - "memory_limit": int(lim_mem), + "cpu_request": float(max(container_req_cpu, init_container_req_cpu)), + "cpu_limit": float(max(container_lim_cpu, init_container_lim_cpu)), + "memory_request": int(max(container_req_mem, init_container_req_mem)), + "memory_limit": int(max(container_lim_mem, init_container_lim_mem)), } ) diff --git a/deployer/commands/generate/resource_allocation/daemonset_requests.yaml b/deployer/commands/generate/resource_allocation/daemonset_requests.yaml index ef9e94e15f..7af1b13692 100644 --- a/deployer/commands/generate/resource_allocation/daemonset_requests.yaml +++ b/deployer/commands/generate/resource_allocation/daemonset_requests.yaml @@ -134,7 +134,7 @@ eks: other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 openscapes: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" diff --git a/deployer/commands/generate/resource_allocation/node-capacity-info.json b/deployer/commands/generate/resource_allocation/node-capacity-info.json index ff153a2f17..6cedc667e2 100644 --- a/deployer/commands/generate/resource_allocation/node-capacity-info.json +++ b/deployer/commands/generate/resource_allocation/node-capacity-info.json @@ -55,12 +55,12 @@ "memory": 130451771392 }, "measured_overhead": { - "cpu": 0.165, - "memory": 157286400 + "cpu": 0.17, + "memory": 262144000 }, "available": { - "cpu": 15.725, - "memory": 130294484992 + "cpu": 15.72, + "memory": 130189627392 } }, "n2-highmem-32": { diff --git a/deployer/commands/generate/resource_allocation/update_nodeinfo.py b/deployer/commands/generate/resource_allocation/update_nodeinfo.py index 0b9c57e6e4..17ed22c122 100644 --- a/deployer/commands/generate/resource_allocation/update_nodeinfo.py +++ b/deployer/commands/generate/resource_allocation/update_nodeinfo.py @@ -106,20 +106,42 @@ def get_node_capacity_info(instance_type: str): mem_available = mem_allocatable for p in pods: - mem_request = 0 - cpu_request = 0 - # Iterate through all the containers in the pod, and count the memory & cpu requests - # they make. We don't count initContainers' requests as they don't overlap with the - # container requests at any point. + # From https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#resource-sharing-within-containers + # > - The highest of any particular resource request or limit defined on + # > all init containers is the effective init request/limit. If any + # > resource has no resource limit specified this is considered as the + # > highest limit. + # > - The Pod's effective request/limit for a resource is the higher of: + # > - the sum of all app containers request/limit for a resource + # > - the effective init request/limit for a resource + # + # So we have to calculate the requests of the init containers and containers separately, + # and take the max as the effective request / limit + container_cpu_request = container_mem_request = 0 + init_container_cpu_request = init_container_mem_request = 0 + for c in p["spec"]["containers"]: - mem_request += parse_quantity( + container_mem_request += parse_quantity( + c.get("resources", {}).get("requests", {}).get("memory", "0") + ) + container_cpu_request += parse_quantity( + c.get("resources", {}).get("requests", {}).get("cpu", "0") + ) + + for c in p["spec"].get("initContainers", []): + init_container_mem_request += parse_quantity( c.get("resources", {}).get("requests", {}).get("memory", "0") ) - cpu_request += parse_quantity( + init_container_cpu_request += parse_quantity( c.get("resources", {}).get("requests", {}).get("cpu", "0") ) - cpu_available -= cpu_request - mem_available -= mem_request + + print( + p["metadata"]["name"], + max(init_container_mem_request, container_mem_request), + ) + cpu_available -= max(container_cpu_request, init_container_cpu_request) + mem_available -= max(container_mem_request, init_container_mem_request) return { # CPU units are in fractions, while memory units are bytes From e295b8f81f3fafb3b8e317a115d66477deadd125 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Fri, 5 Jan 2024 00:31:36 +0100 Subject: [PATCH 2/2] Re-generate daemonset requests for our clusters --- .../daemonset_requests.yaml | 60 ++++++++++--------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/deployer/commands/generate/resource_allocation/daemonset_requests.yaml b/deployer/commands/generate/resource_allocation/daemonset_requests.yaml index 7af1b13692..c41fca02e2 100644 --- a/deployer/commands/generate/resource_allocation/daemonset_requests.yaml +++ b/deployer/commands/generate/resource_allocation/daemonset_requests.yaml @@ -22,7 +22,7 @@ gke: 2i2c: requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter - other_daemon_sets: "" + other_daemon_sets: binder-staging-dind,binder-staging-image-cleaner,imagebuilding-demo-binderhub-service-docker-api cpu_requests: 344m memory_requests: 596Mi k8s_version: v1.27.4-gke.900 @@ -31,7 +31,7 @@ gke: other_daemon_sets: "" cpu_requests: 344m memory_requests: 596Mi - k8s_version: v1.27.4-gke.900 + k8s_version: v1.27.7-gke.1056000 awi-ciroh: requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" @@ -43,25 +43,25 @@ gke: other_daemon_sets: "" cpu_requests: 344m memory_requests: 596Mi - k8s_version: v1.27.4-gke.900 + k8s_version: v1.27.7-gke.1056000 catalystproject-latam: requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 338m memory_requests: 496Mi - k8s_version: v1.27.3-gke.100 + k8s_version: v1.27.7-gke.1056000 cloudbank: - requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,pdcsi-node,support-cryptnono,support-prometheus-node-exporter - other_daemon_sets: continuous-image-puller,continuous-image-puller,continuous-image-puller,netd - cpu_requests: 342m - memory_requests: 566Mi - k8s_version: v1.26.5-gke.2100 + requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter + other_daemon_sets: "" + cpu_requests: 344m + memory_requests: 596Mi + k8s_version: v1.27.5-gke.200 hhmi: requesting_daemon_sets: fluentbit-gke,gke-metadata-server,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 228m memory_requests: 480Mi - k8s_version: v1.27.3-gke.100 + k8s_version: v1.27.7-gke.1056000 leap: requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" @@ -81,54 +81,60 @@ gke: memory_requests: 580Mi k8s_version: v1.27.4-gke.900 pangeo-hubs: - requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,pdcsi-node,support-cryptnono,support-prometheus-node-exporter - other_daemon_sets: netd - cpu_requests: 342m - memory_requests: 566Mi - k8s_version: v1.26.5-gke.2100 + requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter + other_daemon_sets: "" + cpu_requests: 344m + memory_requests: 596Mi + k8s_version: v1.27.5-gke.200 qcl: requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 338m memory_requests: 496Mi - k8s_version: v1.27.4-gke.900 + k8s_version: v1.27.7-gke.1056000 eks: 2i2c-aws-us: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 catalystproject-africa: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.27.4-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 gridsst: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 jupyter-meets-the-earth: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 nasa-cryo: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 + nasa-esdis: + requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter + other_daemon_sets: "" + cpu_requests: 170m + memory_requests: 250Mi + k8s_version: v1.27.8-eks-8cb36c9 nasa-ghg: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.27.4-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 nasa-veda: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" @@ -140,29 +146,29 @@ eks: other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.24.16-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 smithsonian: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 ubc-eoas: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.24.17-eks-f8587cb + k8s_version: v1.27.8-eks-8cb36c9 victor: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 aks: utoronto: requesting_daemon_sets: cloud-node-manager,csi-azuredisk-node,csi-azurefile-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter - other_daemon_sets: calico-node,continuous-image-puller,continuous-image-puller,continuous-image-puller,continuous-image-puller + other_daemon_sets: calico-node cpu_requests: 226m memory_requests: 300Mi k8s_version: v1.26.3