Merge pull request #2584 from consideRatio/pr/add-ncar-cisl

2i2c-aws-us: ncar-cisl added, researchdelight GPU image option removed
2i2c-org · May 29, 2023 · 89f33ad · 89f33ad
2 parents 3a6b47f + e35dcd1
commit 89f33ad
Show file tree

Hide file tree

Showing 8 changed files with 507 additions and 209 deletions.
diff --git a/config/clusters/2i2c-aws-us/cluster.yaml b/config/clusters/2i2c-aws-us/cluster.yaml
@@ -33,3 +33,11 @@ hubs:
       - common.values.yaml
       - researchdelight.values.yaml
       - enc-researchdelight.secret.values.yaml
+  - name: ncar-cisl
+    display_name: "NCAR-CISL for UCAR"
+    domain: ncar-cisl.2i2c.cloud
+    helm_chart: daskhub
+    helm_chart_values_files:
+      - common.values.yaml
+      - ncar-cisl.values.yaml
+      - enc-ncar-cisl.secret.values.yaml
diff --git a/config/clusters/2i2c-aws-us/common.values.yaml b/config/clusters/2i2c-aws-us/common.values.yaml
@@ -4,199 +4,7 @@ basehub:
       userScheduler:
         enabled: true
     singleuser:
-      profileList:
-        # NOTE: About node sharing
-        #
-        #       CPU/Memory requests/limits are actively considered still. This
-        #       profile list is setup to involve node sharing as considered in
-        #       https://github.com/2i2c-org/infrastructure/issues/2121.
-        #
-        #       - Memory requests are different from the description, based on:
-        #         whats found to remain allocate in k8s, subtracting 1GiB
-        #         overhead for misc system pods, and transitioning from GB in
-        #         description to GiB in mem_guarantee.
-        #       - CPU requests are lower than the description, with a factor of
-        #         10%.
-        #
-        - display_name: "Small: up to 4 CPU / 32 GB RAM"
-          description: &profile_list_description "Start a container with at least a chosen share of capacity on a node of this type"
-          slug: small
-          default: true
-          profile_options:
-            requests:
-              # NOTE: Node share choices are in active development, see comment
-              #       next to profileList: above.
-              display_name: Node share
-              choices:
-                mem_1:
-                  default: true
-                  display_name: ~1 GB, ~0.125 CPU
-                  kubespawner_override:
-                    mem_guarantee: 0.904G
-                    cpu_guarantee: 0.013
-                mem_2:
-                  display_name: ~2 GB, ~0.25 CPU
-                  kubespawner_override:
-                    mem_guarantee: 1.809G
-                    cpu_guarantee: 0.025
-                mem_4:
-                  display_name: ~4 GB, ~0.5 CPU
-                  kubespawner_override:
-                    mem_guarantee: 3.617G
-                    cpu_guarantee: 0.05
-                mem_8:
-                  display_name: ~8 GB, ~1.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 7.234G
-                    cpu_guarantee: 0.1
-                mem_16:
-                  display_name: ~16 GB, ~2.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 14.469G
-                    cpu_guarantee: 0.2
-                mem_32:
-                  display_name: ~32 GB, ~4.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 28.937G
-                    cpu_guarantee: 0.4
-          kubespawner_override:
-            cpu_limit: null
-            mem_limit: null
-            node_selector:
-              node.kubernetes.io/instance-type: r5.xlarge
-        - display_name: "Medium: up to 16 CPU / 128 GB RAM"
-          description: *profile_list_description
-          slug: medium
-          profile_options:
-            requests:
-              # NOTE: Node share choices are in active development, see comment
-              #       next to profileList: above.
-              display_name: Node share
-              choices:
-                mem_1:
-                  display_name: ~1 GB, ~0.125 CPU
-                  kubespawner_override:
-                    mem_guarantee: 0.942G
-                    cpu_guarantee: 0.013
-                mem_2:
-                  display_name: ~2 GB, ~0.25 CPU
-                  kubespawner_override:
-                    mem_guarantee: 1.883G
-                    cpu_guarantee: 0.025
-                mem_4:
-                  default: true
-                  display_name: ~4 GB, ~0.5 CPU
-                  kubespawner_override:
-                    mem_guarantee: 3.766G
-                    cpu_guarantee: 0.05
-                mem_8:
-                  display_name: ~8 GB, ~1.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 7.532G
-                    cpu_guarantee: 0.1
-                mem_16:
-                  display_name: ~16 GB, ~2.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 15.064G
-                    cpu_guarantee: 0.2
-                mem_32:
-                  display_name: ~32 GB, ~4.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 30.128G
-                    cpu_guarantee: 0.4
-                mem_64:
-                  display_name: ~64 GB, ~8.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 60.257G
-                    cpu_guarantee: 0.8
-                mem_128:
-                  display_name: ~128 GB, ~16.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 120.513G
-                    cpu_guarantee: 1.6
-          kubespawner_override:
-            cpu_limit: null
-            mem_limit: null
-            node_selector:
-              node.kubernetes.io/instance-type: r5.4xlarge
-        - display_name: "Large: up to 64 CPU / 512 GB RAM"
-          description: *profile_list_description
-          slug: large
-          profile_options:
-            requests:
-              # NOTE: Node share choices are in active development, see comment
-              #       next to profileList: above.
-              display_name: Node share
-              choices:
-                mem_4:
-                  display_name: ~4 GB, ~0.5 CPU
-                  kubespawner_override:
-                    mem_guarantee: 3.821G
-                    cpu_guarantee: 0.05
-                mem_8:
-                  display_name: ~8 GB, ~1.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 7.643G
-                    cpu_guarantee: 0.1
-                mem_16:
-                  default: true
-                  display_name: ~16 GB, ~2.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 15.285G
-                    cpu_guarantee: 0.2
-                mem_32:
-                  display_name: ~32 GB, ~4.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 30.571G
-                    cpu_guarantee: 0.4
-                mem_64:
-                  display_name: ~64 GB, ~8.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 61.141G
-                    cpu_guarantee: 0.8
-                mem_128:
-                  display_name: ~128 GB, ~16.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 122.282G
-                    cpu_guarantee: 1.6
-                mem_256:
-                  display_name: ~256 GB, ~32.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 244.565G
-                    cpu_guarantee: 3.2
-                mem_512:
-                  display_name: ~512 GB, ~64.0 CPU
-                  kubespawner_override:
-                    mem_guarantee: 489.13G
-                    cpu_guarantee: 6.4
-          kubespawner_override:
-            cpu_limit: null
-            mem_limit: null
-            node_selector:
-              node.kubernetes.io/instance-type: r5.16xlarge
-
-        - display_name: NVIDIA Tesla T4, ~16 GB, ~4 CPUs
-          slug: gpu
-          description: "Start a container on a dedicated node with a GPU"
-          profile_options:
-            image:
-              display_name: Image
-              choices:
-                tensorflow:
-                  display_name: Pangeo Tensorflow ML Notebook
-                  slug: "tensorflow"
-                  kubespawner_override:
-                    node.kubernetes.io/instance-type: g4dn.xlarge
-                    image: "pangeo/ml-notebook:b9584f6"
-                pytorch:
-                  display_name: Pangeo PyTorch ML Notebook
-                  default: true
-                  slug: "pytorch"
-                  kubespawner_override:
-                    node.kubernetes.io/instance-type: g4dn.xlarge
-                    image: "pangeo/pytorch-notebook:b9584f6"
-          kubespawner_override:
-            mem_limit: null
-            mem_guarantee: 14G
-            extra_resource_limits:
-              nvidia.com/gpu: "1"
+      extraEnv:
+        # Temporarily set for *all* pods, including pods without any GPUs,
+        # to work around https://github.com/2i2c-org/infrastructure/issues/1530
+        NVIDIA_DRIVER_CAPABILITIES: compute,utility
diff --git a/config/clusters/2i2c-aws-us/enc-ncar-cisl.secret.values.yaml b/config/clusters/2i2c-aws-us/enc-ncar-cisl.secret.values.yaml
@@ -0,0 +1,21 @@
+basehub:
+    jupyterhub:
+        hub:
+            config:
+                GitHubOAuthenticator:
+                    client_id: ENC[AES256_GCM,data:w8TiVOOw38P4ZzVLlDjMwSg/31k=,iv:uWPJZpGMcOmOtsRtj+/P7NdnKxQt3fekJE5VrTx7cuY=,tag:Gh/vNCb74pE25FMyVSeJ5A==,type:str]
+                    client_secret: ENC[AES256_GCM,data:cJGGF0B84gcdsyG5dxG4l5EelVdC95cXie6j4z4wdtLLKa7McjXETA==,iv:saFdxwLsn+MJy9jWUiuDVIq8prVAj2rLIn8cBcai5I4=,tag:gUtfeUBTrD61B+oT1B/lVw==,type:str]
+sops:
+    kms: []
+    gcp_kms:
+        - resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs
+          created_at: "2023-05-29T06:00:51Z"
+          enc: CiUA4OM7eEzeTOTJHKWQmB1JmagZWrA4RFy2NgQrh0IRUhvfBqpgEkkAyiwFHHJQTkDoCjoAG9xqZImH9v4mc13lIh8qX4ixg5KHBC7RFL3jY7VglROpS+3lAYvxS+VtscuysmPKg72FVDD1EaGU59Do
+    azure_kv: []
+    hc_vault: []
+    age: []
+    lastmodified: "2023-05-29T06:00:51Z"
+    mac: ENC[AES256_GCM,data:iAGbHj/lnENLexJVp7zWIXiYzReY0CWmU/OI1c/iz7Q7d3G+aYpNb1nP872mrSRMb6Ssojor2AqOWpVnBxUWzV29NAFT1hr2mf6d5LEnrB2g4Mg0ZqQhTw+gKi8niIR88jEd0/scuu+54CR874HJ3JBCePtY8Q31PXcreQtTrU8=,iv:TeEYZHZxTZ7rZwS7tWw8laramN+JJzRvxuJFH/xkXTM=,tag:7hVl2kLj7dnXZS/Pyc+5tg==,type:str]
+    pgp: []
+    unencrypted_suffix: _unencrypted
+    version: 3.7.2