diff --git a/config/clusters/nasa-cryo/common.values.yaml b/config/clusters/nasa-cryo/common.values.yaml
index 6e33813e16..ccae60f6f2 100644
--- a/config/clusters/nasa-cryo/common.values.yaml
+++ b/config/clusters/nasa-cryo/common.values.yaml
@@ -56,22 +56,6 @@ basehub:
- fperez
JupyterHub:
authenticator_class: github
- # Announcement is a JupyterHub feature to present messages to users in
- # web pages under the /hub path (JupyterHub responds), but not via the
- # /user path (single-user server responds).
- #
- # ref: https://github.com/2i2c-org/infrastructure/issues/1501
- # ref: https://jupyterhub.readthedocs.io/en/stable/reference/templates.html#announcement-configuration-variables
- #
- template_vars:
- announcement: >-
-
- Service maintenance is scheduled Sunday March 19, to Monday 8AM
- EST.
-
-
- Running servers may be forcefully stopped and service disruption
- is expected.
GitHubOAuthenticator:
# We are restricting profiles based on GitHub Team membership and
# so need to populate the teams in the auth state
@@ -95,53 +79,194 @@ basehub:
subPath: _shared
readOnly: true
profileList:
- # The mem-guarantees are here so k8s doesn't schedule other pods
- # on these nodes.
- - display_name: "Small: m5.large"
- description: "~2 CPU, ~8G RAM"
+ # NOTE: About node sharing
+ #
+ # CPU/Memory requests/limits are actively considered still. This
+ # profile list is setup to involve node sharing as considered in
+ # https://github.com/2i2c-org/infrastructure/issues/2121.
+ #
+ # - Memory requests are different from the description, based on:
+ # whats found to remain allocate in k8s, subtracting 1GiB
+ # overhead for misc system pods, and transitioning from GB in
+ # description to GiB in mem_guarantee.
+ # - CPU requests are lower than the description, with a factor of
+ # 10%.
+ #
+ - display_name: "Small: up to 4 CPU / 32 GB RAM"
+ description: &profile_list_description "Start a container with at least a chosen share of capacity on a node of this type"
+ slug: small
default: true
allowed_teams:
- 2i2c-org:hub-access-for-2i2c-staff
- CryoInTheCloud:cryoclouduser
- CryoInTheCloud:cryocloudadvanced
+ profile_options:
+ requests:
+ # NOTE: Node share choices are in active development, see comment
+ # next to profileList: above.
+ display_name: Node share
+ choices:
+ mem_1:
+ default: true
+ display_name: ~1 GB, ~0.125 CPU
+ kubespawner_override:
+ mem_guarantee: 0.904G
+ cpu_guarantee: 0.013
+ mem_2:
+ display_name: ~2 GB, ~0.25 CPU
+ kubespawner_override:
+ mem_guarantee: 1.809G
+ cpu_guarantee: 0.025
+ mem_4:
+ display_name: ~4 GB, ~0.5 CPU
+ kubespawner_override:
+ mem_guarantee: 3.617G
+ cpu_guarantee: 0.05
+ mem_8:
+ display_name: ~8 GB, ~1.0 CPU
+ kubespawner_override:
+ mem_guarantee: 7.234G
+ cpu_guarantee: 0.1
+ mem_16:
+ display_name: ~16 GB, ~2.0 CPU
+ kubespawner_override:
+ mem_guarantee: 14.469G
+ cpu_guarantee: 0.2
+ mem_32:
+ display_name: ~32 GB, ~4.0 CPU
+ kubespawner_override:
+ mem_guarantee: 28.937G
+ cpu_guarantee: 0.4
kubespawner_override:
- # Explicitly unset mem_limit, so it overrides the default memory limit we set in
- # basehub/values.yaml
+ cpu_limit: null
mem_limit: null
- mem_guarantee: 6.5G
node_selector:
- node.kubernetes.io/instance-type: m5.large
- - display_name: "Medium: m5.xlarge"
- description: "~4 CPU, ~15G RAM"
- allowed_teams:
- - 2i2c-org:hub-access-for-2i2c-staff
- - CryoInTheCloud:cryoclouduser
- - CryoInTheCloud:cryocloudadvanced
- kubespawner_override:
- mem_limit: null
- mem_guarantee: 12G
- node_selector:
- node.kubernetes.io/instance-type: m5.xlarge
- - display_name: "Large: m5.2xlarge"
- description: "~8 CPU, ~30G RAM"
- allowed_teams:
- - 2i2c-org:hub-access-for-2i2c-staff
- - CryoInTheCloud:cryocloudadvanced
- kubespawner_override:
- mem_limit: null
- mem_guarantee: 26G
- node_selector:
- node.kubernetes.io/instance-type: m5.2xlarge
- - display_name: "Huge: m5.8xlarge"
- description: "~32 CPU, ~128G RAM"
+ node.kubernetes.io/instance-type: r5.xlarge
+
+ - display_name: "Medium: up to 16 CPU / 128 GB RAM"
+ description: *profile_list_description
+ slug: medium
allowed_teams:
- 2i2c-org:hub-access-for-2i2c-staff
- CryoInTheCloud:cryocloudadvanced
+ profile_options:
+ requests:
+ # NOTE: Node share choices are in active development, see comment
+ # next to profileList: above.
+ display_name: Node share
+ choices:
+ mem_1:
+ display_name: ~1 GB, ~0.125 CPU
+ kubespawner_override:
+ mem_guarantee: 0.942G
+ cpu_guarantee: 0.013
+ mem_2:
+ display_name: ~2 GB, ~0.25 CPU
+ kubespawner_override:
+ mem_guarantee: 1.883G
+ cpu_guarantee: 0.025
+ mem_4:
+ default: true
+ display_name: ~4 GB, ~0.5 CPU
+ kubespawner_override:
+ mem_guarantee: 3.766G
+ cpu_guarantee: 0.05
+ mem_8:
+ display_name: ~8 GB, ~1.0 CPU
+ kubespawner_override:
+ mem_guarantee: 7.532G
+ cpu_guarantee: 0.1
+ mem_16:
+ display_name: ~16 GB, ~2.0 CPU
+ kubespawner_override:
+ mem_guarantee: 15.064G
+ cpu_guarantee: 0.2
+ mem_32:
+ display_name: ~32 GB, ~4.0 CPU
+ kubespawner_override:
+ mem_guarantee: 30.128G
+ cpu_guarantee: 0.4
+ mem_64:
+ display_name: ~64 GB, ~8.0 CPU
+ kubespawner_override:
+ mem_guarantee: 60.257G
+ cpu_guarantee: 0.8
+ mem_128:
+ display_name: ~128 GB, ~16.0 CPU
+ kubespawner_override:
+ mem_guarantee: 120.513G
+ cpu_guarantee: 1.6
kubespawner_override:
+ cpu_limit: null
mem_limit: null
- mem_guarantee: 115G
node_selector:
- node.kubernetes.io/instance-type: m5.8xlarge
+ node.kubernetes.io/instance-type: r5.4xlarge
+
+ # NOTE: The large option is added as a comment for now. It may be that
+ # its relevant in the future for advanced users having a workshop,
+ # and then its possible to enable more easily.
+ #
+ # This setup was discussed with Tasha Snow in March 2023 at
+ # https://2i2c.freshdesk.com/a/tickets/543.
+ #
+ # - display_name: "Large: up to 64 CPU / 512 GB RAM"
+ # description: *profile_list_description
+ # slug: large
+ # allowed_teams:
+ # - 2i2c-org:hub-access-for-2i2c-staff
+ # - CryoInTheCloud:cryocloudadvanced
+ # profile_options:
+ # requests:
+ # # NOTE: Node share choices are in active development, see comment
+ # # next to profileList: above.
+ # display_name: Node share
+ # choices:
+ # mem_4:
+ # display_name: ~4 GB, ~0.5 CPU
+ # kubespawner_override:
+ # mem_guarantee: 3.821G
+ # cpu_guarantee: 0.05
+ # mem_8:
+ # display_name: ~8 GB, ~1.0 CPU
+ # kubespawner_override:
+ # mem_guarantee: 7.643G
+ # cpu_guarantee: 0.1
+ # mem_16:
+ # default: true
+ # display_name: ~16 GB, ~2.0 CPU
+ # kubespawner_override:
+ # mem_guarantee: 15.285G
+ # cpu_guarantee: 0.2
+ # mem_32:
+ # display_name: ~32 GB, ~4.0 CPU
+ # kubespawner_override:
+ # mem_guarantee: 30.571G
+ # cpu_guarantee: 0.4
+ # mem_64:
+ # display_name: ~64 GB, ~8.0 CPU
+ # kubespawner_override:
+ # mem_guarantee: 61.141G
+ # cpu_guarantee: 0.8
+ # mem_128:
+ # display_name: ~128 GB, ~16.0 CPU
+ # kubespawner_override:
+ # mem_guarantee: 122.282G
+ # cpu_guarantee: 1.6
+ # mem_256:
+ # display_name: ~256 GB, ~32.0 CPU
+ # kubespawner_override:
+ # mem_guarantee: 244.565G
+ # cpu_guarantee: 3.2
+ # mem_512:
+ # display_name: ~512 GB, ~64.0 CPU
+ # kubespawner_override:
+ # mem_guarantee: 489.13G
+ # cpu_guarantee: 6.4
+ # kubespawner_override:
+ # cpu_limit: null
+ # mem_limit: null
+ # node_selector:
+ # node.kubernetes.io/instance-type: r5.16xlarge
scheduling:
userScheduler:
enabled: true
diff --git a/eksctl/nasa-cryo.jsonnet b/eksctl/nasa-cryo.jsonnet
index 74b6df443b..4ea828f38d 100644
--- a/eksctl/nasa-cryo.jsonnet
+++ b/eksctl/nasa-cryo.jsonnet
@@ -25,10 +25,9 @@ local nodeAz = "us-west-2a";
// A `node.kubernetes.io/instance-type label is added, so pods
// can request a particular kind of node with a nodeSelector
local notebookNodes = [
- { instanceType: "m5.large" },
- { instanceType: "m5.xlarge" },
- { instanceType: "m5.2xlarge" },
- { instanceType: "m5.8xlarge" },
+ { instanceType: "r5.xlarge" },
+ { instanceType: "r5.4xlarge" },
+ { instanceType: "r5.16xlarge" },
];
local daskNodes = [
@@ -38,10 +37,7 @@ local daskNodes = [
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
- { instancesDistribution+: { instanceTypes: ["m5.large"] }},
- { instancesDistribution+: { instanceTypes: ["m5.xlarge"] }},
- { instancesDistribution+: { instanceTypes: ["m5.2xlarge"] }},
- { instancesDistribution+: { instanceTypes: ["m5.8xlarge"] }},
+ { instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
];
@@ -51,7 +47,7 @@ local daskNodes = [
metadata+: {
name: "nasa-cryo",
region: clusterRegion,
- version: '1.22'
+ version: '1.25'
},
availabilityZones: masterAzs,
iam: {
@@ -83,7 +79,7 @@ local daskNodes = [
ssh: {
publicKeyPath: 'ssh-keys/nasa-cryo.key.pub'
},
- instanceType: "m5.xlarge",
+ instanceType: "r5.xlarge",
minSize: 1,
maxSize: 6,
labels+: {
@@ -138,6 +134,4 @@ local daskNodes = [
},
} + n for n in daskNodes
]
-
-
-}
\ No newline at end of file
+}