From 0763ee8ab97cac2172f2d2e3da401cd631bd3890 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 3 Jan 2024 15:23:42 -0800 Subject: [PATCH] Use generated resource allocation options for veda hub While starting to work on https://github.com/2i2c-org/infrastructure/issues/3565, I realized that VEDA was still using the older style 'node share' rather than the generated 'resource allocation' options. I've swapped over the options to now be based on images for users to choose and resource allocation options generated by our resource allocation script. This matches openscapes, and there has generally been pretty big positive feedback on this mode. I've kept the initial cloning to only happen on the pangeo image as it currently exists, without making any changes. That should be cleaned up as part of https://github.com/2i2c-org/infrastructure/issues/3565 --- config/clusters/nasa-veda/common.values.yaml | 355 +++++++----------- .../instance_capacities.yaml | 4 +- 2 files changed, 141 insertions(+), 218 deletions(-) diff --git a/config/clusters/nasa-veda/common.values.yaml b/config/clusters/nasa-veda/common.values.yaml index e9c399bd96..e306d9d756 100644 --- a/config/clusters/nasa-veda/common.values.yaml +++ b/config/clusters/nasa-veda/common.values.yaml @@ -59,31 +59,146 @@ basehub: - wildintellect singleuser: defaultUrl: /lab - image: - name: public.ecr.aws/nasa-veda/nasa-veda-singleuser - # Based off pangeo/pangeo-notebook:2023.07.05 which uses JupyterLab <4, so jupyterlab-git and dask-dashboard work - # If updating this tag, also update it in the `profile_options.image.options.pangeo.kubespawner_override.image`below - tag: "5068290376e8c3151d97a36ae6485bb7ff79650b94aecc93ffb2ea1b42d76460" profileList: - # NOTE: About node sharing - # - # CPU/Memory requests/limits are actively considered still. This - # profile list is setup to involve node sharing as considered in - # https://github.com/2i2c-org/infrastructure/issues/2121. - # - # - Memory requests are different from the description, based on: - # whats found to remain allocate in k8s, subtracting 1GiB - # overhead for misc system pods, and transitioning from GB in - # description to GiB in mem_guarantee. - # - CPU requests are lower than the description, with a factor of - # 10%. - # - - display_name: "Small: up to 4 CPU / 32 GB RAM" - description: &profile_list_description "Start a container with at least a chosen share of capacity on a node of this type" - slug: small + - display_name: "Modified Pangeo Notebook" + slug: modified-pangeo + description: Pangeo based notebook with a Python environment default: true + kubespawner_override: + image: public.ecr.aws/nasa-veda/nasa-veda-singleuser:5068290376e8c3151d97a36ae6485bb7ff79650b94aecc93ffb2ea1b42d76460 + init_containers: + # Need to explicitly fix ownership here, as otherwise these directories will be owned + # by root on most NFS filesystems - neither EFS nor Google Filestore support anonuid + - name: volume-mount-ownership-fix + image: busybox:1.36.1 + command: + - sh + - -c + - id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan + securityContext: + runAsUser: 0 + volumeMounts: + - name: home + mountPath: /home/jovyan + subPath: "{username}" + # mounted without readonly attribute here, + # so we can chown it appropriately + - name: home + mountPath: /home/jovyan/shared + subPath: _shared + # this container uses nbgitpuller to mount https://github.com/NASA-IMPACT/veda-docs/ for user pods + # image source: https://github.com/NASA-IMPACT/veda-jh-environments/tree/main/docker-images/base/nasa-veda-singleuser-init + - name: nasa-veda-singleuser-init + image: public.ecr.aws/nasa-veda/nasa-veda-singleuser-init:38e8998f9be64b0a59ac6c4d6d152d3403121dfc4be6d49bdf52ddc92827af8a + command: + - "python3" + - "/opt/k8s-init-container-nb-docs.py" + - "/home/jovyan" + volumeMounts: + - name: home + mountPath: /home/jovyan + subPath: "{username}" + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + profile_options: &profile_options + resource_allocation: + display_name: Resource Allocation + choices: + mem_1_9: + display_name: 1.9 GB RAM, upto 3.75 CPUs + kubespawner_override: + mem_guarantee: 1992701952 + mem_limit: 1992701952 + cpu_guarantee: 0.234375 + cpu_limit: 3.75 + node_selector: + node.kubernetes.io/instance-type: r5.xlarge + default: true + mem_3_7: + display_name: 3.7 GB RAM, upto 3.75 CPUs + kubespawner_override: + mem_guarantee: 3985403904 + mem_limit: 3985403904 + cpu_guarantee: 0.46875 + cpu_limit: 3.75 + node_selector: + node.kubernetes.io/instance-type: r5.xlarge + mem_7_4: + display_name: 7.4 GB RAM, upto 3.75 CPUs + kubespawner_override: + mem_guarantee: 7970807808 + mem_limit: 7970807808 + cpu_guarantee: 0.9375 + cpu_limit: 3.75 + node_selector: + node.kubernetes.io/instance-type: r5.xlarge + mem_14_8: + display_name: 14.8 GB RAM, upto 3.75 CPUs + kubespawner_override: + mem_guarantee: 15941615616 + mem_limit: 15941615616 + cpu_guarantee: 1.875 + cpu_limit: 3.75 + node_selector: + node.kubernetes.io/instance-type: r5.xlarge + mem_29_7: + display_name: 29.7 GB RAM, upto 3.75 CPUs + kubespawner_override: + mem_guarantee: 31883231232 + mem_limit: 31883231232 + cpu_guarantee: 3.75 + cpu_limit: 3.75 + node_selector: + node.kubernetes.io/instance-type: r5.xlarge + mem_60_6: + display_name: 60.6 GB RAM, upto 15.72 CPUs + kubespawner_override: + mem_guarantee: 65105797120 + mem_limit: 65105797120 + cpu_guarantee: 7.86 + cpu_limit: 15.72 + node_selector: + node.kubernetes.io/instance-type: r5.4xlarge + mem_121_3: + display_name: 121.3 GB RAM, upto 15.72 CPUs + kubespawner_override: + mem_guarantee: 130211594240 + mem_limit: 130211594240 + cpu_guarantee: 15.72 + cpu_limit: 15.72 + node_selector: + node.kubernetes.io/instance-type: r5.4xlarge + - display_name: "Rocker Geospatial with RStudio" + slug: rocker + description: R environment with many geospatial libraries pre-installed + kubespawner_override: + image: rocker/binder:4.3 + # Launch RStudio after the user logs in + default_url: /rstudio + # Ensures container working dir is homedir + # https://github.com/2i2c-org/infrastructure/issues/2559 + working_dir: /home/rstudio + profile_options: *profile_options + - display_name: "QGIS on Linux Desktop" + slug: qgis + description: Linux desktop in the browser, with qgis installed + kubespawner_override: + # Explicitly unset this - we set this to 'jupyterhub-singleuser' + # in basehub/values.yaml. We instead want to leave this unset, + # so the default command for the docker image is used instead. + # This is required for .desktop files to show up correctly. + cmd: null + # Launch people directly into the Linux desktop when they start + default_url: /desktop + # Built from https://github.com/jupyterhub/jupyter-remote-desktop-proxy/pull/51 + image: "quay.io/jupyter-remote-desktop-proxy/qgis:2023-09-27" + profile_options: *profile_options + - display_name: "Bring your own image" + description: Specify your own docker image (must have python and jupyterhub installed in it) + slug: custom profile_options: - image: &image_options + image: display_name: Image unlisted_choice: enabled: True @@ -92,200 +207,8 @@ basehub: validation_message: "Must be a publicly available docker image, of form :" kubespawner_override: image: "{value}" - choices: - pangeo: - display_name: Modified Pangeo Notebook - default: true - slug: pangeo - kubespawner_override: - image: public.ecr.aws/nasa-veda/nasa-veda-singleuser:5068290376e8c3151d97a36ae6485bb7ff79650b94aecc93ffb2ea1b42d76460 - init_containers: - # Need to explicitly fix ownership here, as otherwise these directories will be owned - # by root on most NFS filesystems - neither EFS nor Google Filestore support anonuid - - name: volume-mount-ownership-fix - image: busybox:1.36.1 - command: - - sh - - -c - - id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan - securityContext: - runAsUser: 0 - volumeMounts: - - name: home - mountPath: /home/jovyan - subPath: "{username}" - # mounted without readonly attribute here, - # so we can chown it appropriately - - name: home - mountPath: /home/jovyan/shared - subPath: _shared - # this container uses nbgitpuller to mount https://github.com/NASA-IMPACT/veda-docs/ for user pods - # image source: https://github.com/NASA-IMPACT/veda-jh-environments/tree/main/docker-images/base/nasa-veda-singleuser-init - - name: nasa-veda-singleuser-init - image: public.ecr.aws/nasa-veda/nasa-veda-singleuser-init:38e8998f9be64b0a59ac6c4d6d152d3403121dfc4be6d49bdf52ddc92827af8a - command: - - "python3" - - "/opt/k8s-init-container-nb-docs.py" - - "/home/jovyan" - volumeMounts: - - name: home - mountPath: /home/jovyan - subPath: "{username}" - securityContext: - runAsUser: 1000 - runAsGroup: 1000 - qgis: - display_name: QGIS on Linux Desktop - slug: qgis - kubespawner_override: - # Explicitly unset this - we set this to 'jupyterhub-singleuser' - # in basehub/values.yaml. We instead want to leave this unset, - # so the default command for the docker image is used instead. - # This is required for .desktop files to show up correctly. - cmd: null - # Launch people directly into the Linux desktop when they start - default_url: /desktop - # Built from https://github.com/jupyterhub/jupyter-remote-desktop-proxy/pull/51 - image: "quay.io/jupyter-remote-desktop-proxy/qgis:2023-09-27" - rocker: - display_name: Rocker Geospatial with RStudio - slug: rocker - kubespawner_override: - image: rocker/binder:4.3 - # Launch RStudio after the user logs in - default_url: /rstudio - # Ensures container working dir is homedir - # https://github.com/2i2c-org/infrastructure/issues/2559 - working_dir: /home/rstudio - init_containers: - # Need to explicitly fix ownership here, as otherwise these directories will be owned - # by root on most NFS filesystems - neither EFS nor Google Filestore support anonuid - - name: volume-mount-ownership-fix - image: busybox:1.36.1 - command: - [ - "sh", - "-c", - "id && chown 1000:1000 /home/rstudio && ls -lhd /home/rstudio ", - ] - securityContext: - runAsUser: 0 - volumeMounts: - - name: home - mountPath: /home/rstudio - subPath: "{username}" - # this container uses nbgitpuller to mount https://github.com/NASA-IMPACT/veda-docs/ for user pods - # image source: https://github.com/NASA-IMPACT/veda-jh-environments/tree/main/docker-images/base/nasa-veda-singleuser-init - - name: nasa-veda-singleuser-init - image: public.ecr.aws/nasa-veda/nasa-veda-singleuser-init:38e8998f9be64b0a59ac6c4d6d152d3403121dfc4be6d49bdf52ddc92827af8a - command: - - "python3" - - "/opt/k8s-init-container-nb-docs.py" - - "/home/rstudio" - volumeMounts: - - name: home - mountPath: /home/rstudio - subPath: "{username}" - securityContext: - runAsUser: 1000 - runAsGroup: 1000 - requests: - # NOTE: Node share choices are in active development, see comment - # next to profileList: above. - display_name: Node share - choices: - mem_1: - default: true - display_name: ~1 GB, ~0.125 CPU - kubespawner_override: - mem_guarantee: 0.904G - cpu_guarantee: 0.013 - mem_2: - display_name: ~2 GB, ~0.25 CPU - kubespawner_override: - mem_guarantee: 1.809G - cpu_guarantee: 0.025 - mem_4: - display_name: ~4 GB, ~0.5 CPU - kubespawner_override: - mem_guarantee: 3.617G - cpu_guarantee: 0.05 - mem_8: - display_name: ~8 GB, ~1.0 CPU - kubespawner_override: - mem_guarantee: 7.234G - cpu_guarantee: 0.1 - mem_16: - display_name: ~16 GB, ~2.0 CPU - kubespawner_override: - mem_guarantee: 14.469G - cpu_guarantee: 0.2 - mem_32: - display_name: ~32 GB, ~4.0 CPU - kubespawner_override: - mem_guarantee: 28.937G - cpu_guarantee: 0.4 - kubespawner_override: - cpu_limit: null - mem_limit: null - node_selector: - node.kubernetes.io/instance-type: r5.xlarge - - display_name: "Medium: up to 16 CPU / 128 GB RAM" - description: *profile_list_description - slug: medium - profile_options: - image: *image_options - requests: - # NOTE: Node share choices are in active development, see comment - # next to profileList: above. - display_name: Node share - choices: - mem_1: - display_name: ~1 GB, ~0.125 CPU - kubespawner_override: - mem_guarantee: 0.942G - cpu_guarantee: 0.013 - mem_2: - display_name: ~2 GB, ~0.25 CPU - kubespawner_override: - mem_guarantee: 1.883G - cpu_guarantee: 0.025 - mem_4: - default: true - display_name: ~4 GB, ~0.5 CPU - kubespawner_override: - mem_guarantee: 3.766G - cpu_guarantee: 0.05 - mem_8: - display_name: ~8 GB, ~1.0 CPU - kubespawner_override: - mem_guarantee: 7.532G - cpu_guarantee: 0.1 - mem_16: - display_name: ~16 GB, ~2.0 CPU - kubespawner_override: - mem_guarantee: 15.064G - cpu_guarantee: 0.2 - mem_32: - display_name: ~32 GB, ~4.0 CPU - kubespawner_override: - mem_guarantee: 30.128G - cpu_guarantee: 0.4 - mem_64: - display_name: ~64 GB, ~8.0 CPU - kubespawner_override: - mem_guarantee: 60.257G - cpu_guarantee: 0.8 - mem_128: - display_name: ~128 GB, ~16.0 CPU - kubespawner_override: - mem_guarantee: 120.513G - cpu_guarantee: 1.6 - kubespawner_override: - cpu_limit: null - mem_limit: null - node_selector: - node.kubernetes.io/instance-type: r5.4xlarge + choices: {} + scheduling: userScheduler: enabled: true diff --git a/deployer/commands/generate/resource_allocation/instance_capacities.yaml b/deployer/commands/generate/resource_allocation/instance_capacities.yaml index ebe5eef58e..5aaed80017 100644 --- a/deployer/commands/generate/resource_allocation/instance_capacities.yaml +++ b/deployer/commands/generate/resource_allocation/instance_capacities.yaml @@ -131,9 +131,9 @@ r5.4xlarge: cpu_capacity_high: 16.0 cpu_allocatable_low: 15.89 cpu_allocatable_high: 15.89 - mem_capacity_low: 124.364Gi + mem_capacity_low: 124.353Gi mem_capacity_high: 124.364Gi - mem_allocatable_low: 121.504Gi + mem_allocatable_low: 121.492Gi mem_allocatable_high: 121.504Gi m5.large: cpu_capacity_low: 2.0