diff --git a/deployer/commands/generate/resource_allocation/daemonset_requests.py b/deployer/commands/generate/resource_allocation/daemonset_requests.py index e47ac51ed6..f6611f38b9 100644 --- a/deployer/commands/generate/resource_allocation/daemonset_requests.py +++ b/deployer/commands/generate/resource_allocation/daemonset_requests.py @@ -64,23 +64,50 @@ def get_daemon_sets_requests(): info = [] for ds in daemon_sets: name = ds["metadata"]["name"] - req_mem = req_cpu = lim_mem = lim_cpu = 0 + # From https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#resource-sharing-within-containers + # > - The highest of any particular resource request or limit defined on + # > all init containers is the effective init request/limit. If any + # > resource has no resource limit specified this is considered as the + # > highest limit. + # > - The Pod's effective request/limit for a resource is the higher of: + # > - the sum of all app containers request/limit for a resource + # > - the effective init request/limit for a resource + # + # So we have to calculate the requests of the init containers and containers separately, + # and take the max as the effective request / limit + + container_req_mem = ( + container_req_cpu + ) = container_lim_mem = container_lim_cpu = 0 + init_container_req_mem = ( + init_container_req_cpu + ) = init_container_lim_mem = init_container_lim_cpu = 0 + for c in ds["spec"]["template"]["spec"]["containers"]: resources = c.get("resources", {}) requests = resources.get("requests", {}) limits = resources.get("limits", {}) - req_mem += parse_quantity(requests.get("memory", 0)) - lim_mem += parse_quantity(limits.get("memory", 0)) - req_cpu += parse_quantity(requests.get("cpu", 0)) - lim_cpu += parse_quantity(limits.get("cpu", 0)) + container_req_mem += parse_quantity(requests.get("memory", 0)) + container_lim_mem += parse_quantity(limits.get("memory", 0)) + container_req_cpu += parse_quantity(requests.get("cpu", 0)) + container_lim_cpu += parse_quantity(limits.get("cpu", 0)) + + for c in ds["spec"]["template"]["spec"].get("initContainers", []): + resources = c.get("resources", {}) + requests = resources.get("requests", {}) + limits = resources.get("limits", {}) + init_container_req_mem += parse_quantity(requests.get("memory", 0)) + init_container_lim_mem += parse_quantity(limits.get("memory", 0)) + init_container_req_cpu += parse_quantity(requests.get("cpu", 0)) + init_container_lim_cpu += parse_quantity(limits.get("cpu", 0)) info.append( { "name": name, - "cpu_request": float(req_cpu), - "cpu_limit": float(lim_cpu), - "memory_request": int(req_mem), - "memory_limit": int(lim_mem), + "cpu_request": float(max(container_req_cpu, init_container_req_cpu)), + "cpu_limit": float(max(container_lim_cpu, init_container_lim_cpu)), + "memory_request": int(max(container_req_mem, init_container_req_mem)), + "memory_limit": int(max(container_lim_mem, init_container_lim_mem)), } ) diff --git a/deployer/commands/generate/resource_allocation/daemonset_requests.yaml b/deployer/commands/generate/resource_allocation/daemonset_requests.yaml index ef9e94e15f..c41fca02e2 100644 --- a/deployer/commands/generate/resource_allocation/daemonset_requests.yaml +++ b/deployer/commands/generate/resource_allocation/daemonset_requests.yaml @@ -22,7 +22,7 @@ gke: 2i2c: requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter - other_daemon_sets: "" + other_daemon_sets: binder-staging-dind,binder-staging-image-cleaner,imagebuilding-demo-binderhub-service-docker-api cpu_requests: 344m memory_requests: 596Mi k8s_version: v1.27.4-gke.900 @@ -31,7 +31,7 @@ gke: other_daemon_sets: "" cpu_requests: 344m memory_requests: 596Mi - k8s_version: v1.27.4-gke.900 + k8s_version: v1.27.7-gke.1056000 awi-ciroh: requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" @@ -43,25 +43,25 @@ gke: other_daemon_sets: "" cpu_requests: 344m memory_requests: 596Mi - k8s_version: v1.27.4-gke.900 + k8s_version: v1.27.7-gke.1056000 catalystproject-latam: requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 338m memory_requests: 496Mi - k8s_version: v1.27.3-gke.100 + k8s_version: v1.27.7-gke.1056000 cloudbank: - requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,pdcsi-node,support-cryptnono,support-prometheus-node-exporter - other_daemon_sets: continuous-image-puller,continuous-image-puller,continuous-image-puller,netd - cpu_requests: 342m - memory_requests: 566Mi - k8s_version: v1.26.5-gke.2100 + requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter + other_daemon_sets: "" + cpu_requests: 344m + memory_requests: 596Mi + k8s_version: v1.27.5-gke.200 hhmi: requesting_daemon_sets: fluentbit-gke,gke-metadata-server,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 228m memory_requests: 480Mi - k8s_version: v1.27.3-gke.100 + k8s_version: v1.27.7-gke.1056000 leap: requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" @@ -81,88 +81,94 @@ gke: memory_requests: 580Mi k8s_version: v1.27.4-gke.900 pangeo-hubs: - requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,pdcsi-node,support-cryptnono,support-prometheus-node-exporter - other_daemon_sets: netd - cpu_requests: 342m - memory_requests: 566Mi - k8s_version: v1.26.5-gke.2100 + requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter + other_daemon_sets: "" + cpu_requests: 344m + memory_requests: 596Mi + k8s_version: v1.27.5-gke.200 qcl: requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 338m memory_requests: 496Mi - k8s_version: v1.27.4-gke.900 + k8s_version: v1.27.7-gke.1056000 eks: 2i2c-aws-us: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 catalystproject-africa: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.27.4-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 gridsst: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 jupyter-meets-the-earth: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 nasa-cryo: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 + nasa-esdis: + requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter + other_daemon_sets: "" + cpu_requests: 170m + memory_requests: 250Mi + k8s_version: v1.27.8-eks-8cb36c9 nasa-ghg: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.27.4-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 nasa-veda: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 openscapes: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.24.16-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 smithsonian: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 ubc-eoas: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.24.17-eks-f8587cb + k8s_version: v1.27.8-eks-8cb36c9 victor: requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter other_daemon_sets: "" cpu_requests: 170m memory_requests: 250Mi - k8s_version: v1.25.12-eks-2d98532 + k8s_version: v1.27.8-eks-8cb36c9 aks: utoronto: requesting_daemon_sets: cloud-node-manager,csi-azuredisk-node,csi-azurefile-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter - other_daemon_sets: calico-node,continuous-image-puller,continuous-image-puller,continuous-image-puller,continuous-image-puller + other_daemon_sets: calico-node cpu_requests: 226m memory_requests: 300Mi k8s_version: v1.26.3 diff --git a/deployer/commands/generate/resource_allocation/node-capacity-info.json b/deployer/commands/generate/resource_allocation/node-capacity-info.json index ff153a2f17..6cedc667e2 100644 --- a/deployer/commands/generate/resource_allocation/node-capacity-info.json +++ b/deployer/commands/generate/resource_allocation/node-capacity-info.json @@ -55,12 +55,12 @@ "memory": 130451771392 }, "measured_overhead": { - "cpu": 0.165, - "memory": 157286400 + "cpu": 0.17, + "memory": 262144000 }, "available": { - "cpu": 15.725, - "memory": 130294484992 + "cpu": 15.72, + "memory": 130189627392 } }, "n2-highmem-32": { diff --git a/deployer/commands/generate/resource_allocation/update_nodeinfo.py b/deployer/commands/generate/resource_allocation/update_nodeinfo.py index 0b9c57e6e4..17ed22c122 100644 --- a/deployer/commands/generate/resource_allocation/update_nodeinfo.py +++ b/deployer/commands/generate/resource_allocation/update_nodeinfo.py @@ -106,20 +106,42 @@ def get_node_capacity_info(instance_type: str): mem_available = mem_allocatable for p in pods: - mem_request = 0 - cpu_request = 0 - # Iterate through all the containers in the pod, and count the memory & cpu requests - # they make. We don't count initContainers' requests as they don't overlap with the - # container requests at any point. + # From https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#resource-sharing-within-containers + # > - The highest of any particular resource request or limit defined on + # > all init containers is the effective init request/limit. If any + # > resource has no resource limit specified this is considered as the + # > highest limit. + # > - The Pod's effective request/limit for a resource is the higher of: + # > - the sum of all app containers request/limit for a resource + # > - the effective init request/limit for a resource + # + # So we have to calculate the requests of the init containers and containers separately, + # and take the max as the effective request / limit + container_cpu_request = container_mem_request = 0 + init_container_cpu_request = init_container_mem_request = 0 + for c in p["spec"]["containers"]: - mem_request += parse_quantity( + container_mem_request += parse_quantity( + c.get("resources", {}).get("requests", {}).get("memory", "0") + ) + container_cpu_request += parse_quantity( + c.get("resources", {}).get("requests", {}).get("cpu", "0") + ) + + for c in p["spec"].get("initContainers", []): + init_container_mem_request += parse_quantity( c.get("resources", {}).get("requests", {}).get("memory", "0") ) - cpu_request += parse_quantity( + init_container_cpu_request += parse_quantity( c.get("resources", {}).get("requests", {}).get("cpu", "0") ) - cpu_available -= cpu_request - mem_available -= mem_request + + print( + p["metadata"]["name"], + max(init_container_mem_request, container_mem_request), + ) + cpu_available -= max(container_cpu_request, init_container_cpu_request) + mem_available -= max(container_mem_request, init_container_mem_request) return { # CPU units are in fractions, while memory units are bytes