From 0763ee8ab97cac2172f2d2e3da401cd631bd3890 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 3 Jan 2024 15:23:42 -0800 Subject: [PATCH 1/3] Use generated resource allocation options for veda hub While starting to work on https://github.com/2i2c-org/infrastructure/issues/3565, I realized that VEDA was still using the older style 'node share' rather than the generated 'resource allocation' options. I've swapped over the options to now be based on images for users to choose and resource allocation options generated by our resource allocation script. This matches openscapes, and there has generally been pretty big positive feedback on this mode. I've kept the initial cloning to only happen on the pangeo image as it currently exists, without making any changes. That should be cleaned up as part of https://github.com/2i2c-org/infrastructure/issues/3565 --- config/clusters/nasa-veda/common.values.yaml | 355 +++++++----------- .../instance_capacities.yaml | 4 +- 2 files changed, 141 insertions(+), 218 deletions(-) diff --git a/config/clusters/nasa-veda/common.values.yaml b/config/clusters/nasa-veda/common.values.yaml index e9c399bd96..e306d9d756 100644 --- a/config/clusters/nasa-veda/common.values.yaml +++ b/config/clusters/nasa-veda/common.values.yaml @@ -59,31 +59,146 @@ basehub: - wildintellect singleuser: defaultUrl: /lab - image: - name: public.ecr.aws/nasa-veda/nasa-veda-singleuser - # Based off pangeo/pangeo-notebook:2023.07.05 which uses JupyterLab <4, so jupyterlab-git and dask-dashboard work - # If updating this tag, also update it in the `profile_options.image.options.pangeo.kubespawner_override.image`below - tag: "5068290376e8c3151d97a36ae6485bb7ff79650b94aecc93ffb2ea1b42d76460" profileList: - # NOTE: About node sharing - # - # CPU/Memory requests/limits are actively considered still. This - # profile list is setup to involve node sharing as considered in - # https://github.com/2i2c-org/infrastructure/issues/2121. - # - # - Memory requests are different from the description, based on: - # whats found to remain allocate in k8s, subtracting 1GiB - # overhead for misc system pods, and transitioning from GB in - # description to GiB in mem_guarantee. - # - CPU requests are lower than the description, with a factor of - # 10%. - # - - display_name: "Small: up to 4 CPU / 32 GB RAM" - description: &profile_list_description "Start a container with at least a chosen share of capacity on a node of this type" - slug: small + - display_name: "Modified Pangeo Notebook" + slug: modified-pangeo + description: Pangeo based notebook with a Python environment default: true + kubespawner_override: + image: public.ecr.aws/nasa-veda/nasa-veda-singleuser:5068290376e8c3151d97a36ae6485bb7ff79650b94aecc93ffb2ea1b42d76460 + init_containers: + # Need to explicitly fix ownership here, as otherwise these directories will be owned + # by root on most NFS filesystems - neither EFS nor Google Filestore support anonuid + - name: volume-mount-ownership-fix + image: busybox:1.36.1 + command: + - sh + - -c + - id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan + securityContext: + runAsUser: 0 + volumeMounts: + - name: home + mountPath: /home/jovyan + subPath: "{username}" + # mounted without readonly attribute here, + # so we can chown it appropriately + - name: home + mountPath: /home/jovyan/shared + subPath: _shared + # this container uses nbgitpuller to mount https://github.com/NASA-IMPACT/veda-docs/ for user pods + # image source: https://github.com/NASA-IMPACT/veda-jh-environments/tree/main/docker-images/base/nasa-veda-singleuser-init + - name: nasa-veda-singleuser-init + image: public.ecr.aws/nasa-veda/nasa-veda-singleuser-init:38e8998f9be64b0a59ac6c4d6d152d3403121dfc4be6d49bdf52ddc92827af8a + command: + - "python3" + - "/opt/k8s-init-container-nb-docs.py" + - "/home/jovyan" + volumeMounts: + - name: home + mountPath: /home/jovyan + subPath: "{username}" + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + profile_options: &profile_options + resource_allocation: + display_name: Resource Allocation + choices: + mem_1_9: + display_name: 1.9 GB RAM, upto 3.75 CPUs + kubespawner_override: + mem_guarantee: 1992701952 + mem_limit: 1992701952 + cpu_guarantee: 0.234375 + cpu_limit: 3.75 + node_selector: + node.kubernetes.io/instance-type: r5.xlarge + default: true + mem_3_7: + display_name: 3.7 GB RAM, upto 3.75 CPUs + kubespawner_override: + mem_guarantee: 3985403904 + mem_limit: 3985403904 + cpu_guarantee: 0.46875 + cpu_limit: 3.75 + node_selector: + node.kubernetes.io/instance-type: r5.xlarge + mem_7_4: + display_name: 7.4 GB RAM, upto 3.75 CPUs + kubespawner_override: + mem_guarantee: 7970807808 + mem_limit: 7970807808 + cpu_guarantee: 0.9375 + cpu_limit: 3.75 + node_selector: + node.kubernetes.io/instance-type: r5.xlarge + mem_14_8: + display_name: 14.8 GB RAM, upto 3.75 CPUs + kubespawner_override: + mem_guarantee: 15941615616 + mem_limit: 15941615616 + cpu_guarantee: 1.875 + cpu_limit: 3.75 + node_selector: + node.kubernetes.io/instance-type: r5.xlarge + mem_29_7: + display_name: 29.7 GB RAM, upto 3.75 CPUs + kubespawner_override: + mem_guarantee: 31883231232 + mem_limit: 31883231232 + cpu_guarantee: 3.75 + cpu_limit: 3.75 + node_selector: + node.kubernetes.io/instance-type: r5.xlarge + mem_60_6: + display_name: 60.6 GB RAM, upto 15.72 CPUs + kubespawner_override: + mem_guarantee: 65105797120 + mem_limit: 65105797120 + cpu_guarantee: 7.86 + cpu_limit: 15.72 + node_selector: + node.kubernetes.io/instance-type: r5.4xlarge + mem_121_3: + display_name: 121.3 GB RAM, upto 15.72 CPUs + kubespawner_override: + mem_guarantee: 130211594240 + mem_limit: 130211594240 + cpu_guarantee: 15.72 + cpu_limit: 15.72 + node_selector: + node.kubernetes.io/instance-type: r5.4xlarge + - display_name: "Rocker Geospatial with RStudio" + slug: rocker + description: R environment with many geospatial libraries pre-installed + kubespawner_override: + image: rocker/binder:4.3 + # Launch RStudio after the user logs in + default_url: /rstudio + # Ensures container working dir is homedir + # https://github.com/2i2c-org/infrastructure/issues/2559 + working_dir: /home/rstudio + profile_options: *profile_options + - display_name: "QGIS on Linux Desktop" + slug: qgis + description: Linux desktop in the browser, with qgis installed + kubespawner_override: + # Explicitly unset this - we set this to 'jupyterhub-singleuser' + # in basehub/values.yaml. We instead want to leave this unset, + # so the default command for the docker image is used instead. + # This is required for .desktop files to show up correctly. + cmd: null + # Launch people directly into the Linux desktop when they start + default_url: /desktop + # Built from https://github.com/jupyterhub/jupyter-remote-desktop-proxy/pull/51 + image: "quay.io/jupyter-remote-desktop-proxy/qgis:2023-09-27" + profile_options: *profile_options + - display_name: "Bring your own image" + description: Specify your own docker image (must have python and jupyterhub installed in it) + slug: custom profile_options: - image: &image_options + image: display_name: Image unlisted_choice: enabled: True @@ -92,200 +207,8 @@ basehub: validation_message: "Must be a publicly available docker image, of form :" kubespawner_override: image: "{value}" - choices: - pangeo: - display_name: Modified Pangeo Notebook - default: true - slug: pangeo - kubespawner_override: - image: public.ecr.aws/nasa-veda/nasa-veda-singleuser:5068290376e8c3151d97a36ae6485bb7ff79650b94aecc93ffb2ea1b42d76460 - init_containers: - # Need to explicitly fix ownership here, as otherwise these directories will be owned - # by root on most NFS filesystems - neither EFS nor Google Filestore support anonuid - - name: volume-mount-ownership-fix - image: busybox:1.36.1 - command: - - sh - - -c - - id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan - securityContext: - runAsUser: 0 - volumeMounts: - - name: home - mountPath: /home/jovyan - subPath: "{username}" - # mounted without readonly attribute here, - # so we can chown it appropriately - - name: home - mountPath: /home/jovyan/shared - subPath: _shared - # this container uses nbgitpuller to mount https://github.com/NASA-IMPACT/veda-docs/ for user pods - # image source: https://github.com/NASA-IMPACT/veda-jh-environments/tree/main/docker-images/base/nasa-veda-singleuser-init - - name: nasa-veda-singleuser-init - image: public.ecr.aws/nasa-veda/nasa-veda-singleuser-init:38e8998f9be64b0a59ac6c4d6d152d3403121dfc4be6d49bdf52ddc92827af8a - command: - - "python3" - - "/opt/k8s-init-container-nb-docs.py" - - "/home/jovyan" - volumeMounts: - - name: home - mountPath: /home/jovyan - subPath: "{username}" - securityContext: - runAsUser: 1000 - runAsGroup: 1000 - qgis: - display_name: QGIS on Linux Desktop - slug: qgis - kubespawner_override: - # Explicitly unset this - we set this to 'jupyterhub-singleuser' - # in basehub/values.yaml. We instead want to leave this unset, - # so the default command for the docker image is used instead. - # This is required for .desktop files to show up correctly. - cmd: null - # Launch people directly into the Linux desktop when they start - default_url: /desktop - # Built from https://github.com/jupyterhub/jupyter-remote-desktop-proxy/pull/51 - image: "quay.io/jupyter-remote-desktop-proxy/qgis:2023-09-27" - rocker: - display_name: Rocker Geospatial with RStudio - slug: rocker - kubespawner_override: - image: rocker/binder:4.3 - # Launch RStudio after the user logs in - default_url: /rstudio - # Ensures container working dir is homedir - # https://github.com/2i2c-org/infrastructure/issues/2559 - working_dir: /home/rstudio - init_containers: - # Need to explicitly fix ownership here, as otherwise these directories will be owned - # by root on most NFS filesystems - neither EFS nor Google Filestore support anonuid - - name: volume-mount-ownership-fix - image: busybox:1.36.1 - command: - [ - "sh", - "-c", - "id && chown 1000:1000 /home/rstudio && ls -lhd /home/rstudio ", - ] - securityContext: - runAsUser: 0 - volumeMounts: - - name: home - mountPath: /home/rstudio - subPath: "{username}" - # this container uses nbgitpuller to mount https://github.com/NASA-IMPACT/veda-docs/ for user pods - # image source: https://github.com/NASA-IMPACT/veda-jh-environments/tree/main/docker-images/base/nasa-veda-singleuser-init - - name: nasa-veda-singleuser-init - image: public.ecr.aws/nasa-veda/nasa-veda-singleuser-init:38e8998f9be64b0a59ac6c4d6d152d3403121dfc4be6d49bdf52ddc92827af8a - command: - - "python3" - - "/opt/k8s-init-container-nb-docs.py" - - "/home/rstudio" - volumeMounts: - - name: home - mountPath: /home/rstudio - subPath: "{username}" - securityContext: - runAsUser: 1000 - runAsGroup: 1000 - requests: - # NOTE: Node share choices are in active development, see comment - # next to profileList: above. - display_name: Node share - choices: - mem_1: - default: true - display_name: ~1 GB, ~0.125 CPU - kubespawner_override: - mem_guarantee: 0.904G - cpu_guarantee: 0.013 - mem_2: - display_name: ~2 GB, ~0.25 CPU - kubespawner_override: - mem_guarantee: 1.809G - cpu_guarantee: 0.025 - mem_4: - display_name: ~4 GB, ~0.5 CPU - kubespawner_override: - mem_guarantee: 3.617G - cpu_guarantee: 0.05 - mem_8: - display_name: ~8 GB, ~1.0 CPU - kubespawner_override: - mem_guarantee: 7.234G - cpu_guarantee: 0.1 - mem_16: - display_name: ~16 GB, ~2.0 CPU - kubespawner_override: - mem_guarantee: 14.469G - cpu_guarantee: 0.2 - mem_32: - display_name: ~32 GB, ~4.0 CPU - kubespawner_override: - mem_guarantee: 28.937G - cpu_guarantee: 0.4 - kubespawner_override: - cpu_limit: null - mem_limit: null - node_selector: - node.kubernetes.io/instance-type: r5.xlarge - - display_name: "Medium: up to 16 CPU / 128 GB RAM" - description: *profile_list_description - slug: medium - profile_options: - image: *image_options - requests: - # NOTE: Node share choices are in active development, see comment - # next to profileList: above. - display_name: Node share - choices: - mem_1: - display_name: ~1 GB, ~0.125 CPU - kubespawner_override: - mem_guarantee: 0.942G - cpu_guarantee: 0.013 - mem_2: - display_name: ~2 GB, ~0.25 CPU - kubespawner_override: - mem_guarantee: 1.883G - cpu_guarantee: 0.025 - mem_4: - default: true - display_name: ~4 GB, ~0.5 CPU - kubespawner_override: - mem_guarantee: 3.766G - cpu_guarantee: 0.05 - mem_8: - display_name: ~8 GB, ~1.0 CPU - kubespawner_override: - mem_guarantee: 7.532G - cpu_guarantee: 0.1 - mem_16: - display_name: ~16 GB, ~2.0 CPU - kubespawner_override: - mem_guarantee: 15.064G - cpu_guarantee: 0.2 - mem_32: - display_name: ~32 GB, ~4.0 CPU - kubespawner_override: - mem_guarantee: 30.128G - cpu_guarantee: 0.4 - mem_64: - display_name: ~64 GB, ~8.0 CPU - kubespawner_override: - mem_guarantee: 60.257G - cpu_guarantee: 0.8 - mem_128: - display_name: ~128 GB, ~16.0 CPU - kubespawner_override: - mem_guarantee: 120.513G - cpu_guarantee: 1.6 - kubespawner_override: - cpu_limit: null - mem_limit: null - node_selector: - node.kubernetes.io/instance-type: r5.4xlarge + choices: {} + scheduling: userScheduler: enabled: true diff --git a/deployer/commands/generate/resource_allocation/instance_capacities.yaml b/deployer/commands/generate/resource_allocation/instance_capacities.yaml index ebe5eef58e..5aaed80017 100644 --- a/deployer/commands/generate/resource_allocation/instance_capacities.yaml +++ b/deployer/commands/generate/resource_allocation/instance_capacities.yaml @@ -131,9 +131,9 @@ r5.4xlarge: cpu_capacity_high: 16.0 cpu_allocatable_low: 15.89 cpu_allocatable_high: 15.89 - mem_capacity_low: 124.364Gi + mem_capacity_low: 124.353Gi mem_capacity_high: 124.364Gi - mem_allocatable_low: 121.504Gi + mem_allocatable_low: 121.492Gi mem_allocatable_high: 121.504Gi m5.large: cpu_capacity_low: 2.0 From 0c64bb131de263a3df7f592513bf256f11300757 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 3 Jan 2024 15:39:45 -0800 Subject: [PATCH 2/3] Fix missing resource allocation choice for custom image --- config/clusters/nasa-veda/common.values.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/clusters/nasa-veda/common.values.yaml b/config/clusters/nasa-veda/common.values.yaml index e306d9d756..dda3098ed8 100644 --- a/config/clusters/nasa-veda/common.values.yaml +++ b/config/clusters/nasa-veda/common.values.yaml @@ -102,7 +102,7 @@ basehub: runAsUser: 1000 runAsGroup: 1000 profile_options: &profile_options - resource_allocation: + resource_allocation: &profile_options_resource_allocation display_name: Resource Allocation choices: mem_1_9: @@ -208,6 +208,7 @@ basehub: kubespawner_override: image: "{value}" choices: {} + resource_allocation: *profile_options_resource_allocation scheduling: userScheduler: From c822e4d6ca6294b72419c01ec7bde95edeadb867 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 3 Jan 2024 15:56:39 -0800 Subject: [PATCH 3/3] Update capacities & regenerate veda choices --- config/clusters/nasa-veda/common.values.yaml | 43 ++++++++++--------- .../node-capacity-info.json | 18 ++++---- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/config/clusters/nasa-veda/common.values.yaml b/config/clusters/nasa-veda/common.values.yaml index dda3098ed8..7d30d6fb3d 100644 --- a/config/clusters/nasa-veda/common.values.yaml +++ b/config/clusters/nasa-veda/common.values.yaml @@ -108,8 +108,8 @@ basehub: mem_1_9: display_name: 1.9 GB RAM, upto 3.75 CPUs kubespawner_override: - mem_guarantee: 1992701952 - mem_limit: 1992701952 + mem_guarantee: 1991341312 + mem_limit: 1991341312 cpu_guarantee: 0.234375 cpu_limit: 3.75 node_selector: @@ -118,8 +118,8 @@ basehub: mem_3_7: display_name: 3.7 GB RAM, upto 3.75 CPUs kubespawner_override: - mem_guarantee: 3985403904 - mem_limit: 3985403904 + mem_guarantee: 3982682624 + mem_limit: 3982682624 cpu_guarantee: 0.46875 cpu_limit: 3.75 node_selector: @@ -127,8 +127,8 @@ basehub: mem_7_4: display_name: 7.4 GB RAM, upto 3.75 CPUs kubespawner_override: - mem_guarantee: 7970807808 - mem_limit: 7970807808 + mem_guarantee: 7965365248 + mem_limit: 7965365248 cpu_guarantee: 0.9375 cpu_limit: 3.75 node_selector: @@ -136,8 +136,8 @@ basehub: mem_14_8: display_name: 14.8 GB RAM, upto 3.75 CPUs kubespawner_override: - mem_guarantee: 15941615616 - mem_limit: 15941615616 + mem_guarantee: 15930730496 + mem_limit: 15930730496 cpu_guarantee: 1.875 cpu_limit: 3.75 node_selector: @@ -145,28 +145,29 @@ basehub: mem_29_7: display_name: 29.7 GB RAM, upto 3.75 CPUs kubespawner_override: - mem_guarantee: 31883231232 - mem_limit: 31883231232 + mem_guarantee: 31861460992 + mem_limit: 31861460992 cpu_guarantee: 3.75 cpu_limit: 3.75 node_selector: node.kubernetes.io/instance-type: r5.xlarge - mem_60_6: - display_name: 60.6 GB RAM, upto 15.72 CPUs + + mem_60_7: + display_name: 60.7 GB RAM, upto 15.725 CPUs kubespawner_override: - mem_guarantee: 65105797120 - mem_limit: 65105797120 - cpu_guarantee: 7.86 - cpu_limit: 15.72 + mem_guarantee: 65147242496 + mem_limit: 65147242496 + cpu_guarantee: 7.8625 + cpu_limit: 15.725 node_selector: node.kubernetes.io/instance-type: r5.4xlarge mem_121_3: - display_name: 121.3 GB RAM, upto 15.72 CPUs + display_name: 121.3 GB RAM, upto 15.725 CPUs kubespawner_override: - mem_guarantee: 130211594240 - mem_limit: 130211594240 - cpu_guarantee: 15.72 - cpu_limit: 15.72 + mem_guarantee: 130294484992 + mem_limit: 130294484992 + cpu_guarantee: 15.725 + cpu_limit: 15.725 node_selector: node.kubernetes.io/instance-type: r5.4xlarge - display_name: "Rocker Geospatial with RStudio" diff --git a/deployer/commands/generate/resource_allocation/node-capacity-info.json b/deployer/commands/generate/resource_allocation/node-capacity-info.json index 2566393828..ff153a2f17 100644 --- a/deployer/commands/generate/resource_allocation/node-capacity-info.json +++ b/deployer/commands/generate/resource_allocation/node-capacity-info.json @@ -2,11 +2,11 @@ "r5.xlarge": { "capacity": { "cpu": 4.0, - "memory": 33186611200 + "memory": 33164840960 }, "allocatable": { "cpu": 3.92, - "memory": 32145375232 + "memory": 32123604992 }, "measured_overhead": { "cpu": 0.17, @@ -14,7 +14,7 @@ }, "available": { "cpu": 3.75, - "memory": 31883231232 + "memory": 31861460992 } }, "r5.16xlarge": { @@ -48,19 +48,19 @@ "r5.4xlarge": { "capacity": { "cpu": 16.0, - "memory": 133545017344 + "memory": 133523050496 }, "allocatable": { "cpu": 15.89, - "memory": 130473738240 + "memory": 130451771392 }, "measured_overhead": { - "cpu": 0.17, - "memory": 262144000 + "cpu": 0.165, + "memory": 157286400 }, "available": { - "cpu": 15.72, - "memory": 130211594240 + "cpu": 15.725, + "memory": 130294484992 } }, "n2-highmem-32": {