Skip to content

Commit

Permalink
4/6 gke clusters still on 1.27: update core/user nodes to 1.29
Browse files Browse the repository at this point in the history
  • Loading branch information
consideRatio committed May 9, 2024
1 parent 84c084e commit a609de2
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 17 deletions.
4 changes: 2 additions & 2 deletions config/clusters/cloudbank/common.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ jupyterhub:
name: quay.io/2i2c/cloudbank-data8-image
tag: d2746e55a4ee
nodeSelector:
# Put everything on the most appropriate nodepool for these users
cloud.google.com/gke-nodepool: nb-n2-highmem-4
# Put everything on the most appropriate instance type for these users
node.kubernetes.io/instance-type: n2-highmem-4
3 changes: 2 additions & 1 deletion config/clusters/leap/common.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,8 @@ basehub:
environment:
NVIDIA_DRIVER_CAPABILITIES: compute,utility
node_selector:
cloud.google.com/gke-nodepool: nb-gpu-t4
node.kubernetes.io/instance-type: n1-standard-8
cloud.google.com/gke-accelerator: nvidia-tesla-t4
mem_limit: 30G
mem_guarantee: 24G
extra_resource_limits:
Expand Down
8 changes: 4 additions & 4 deletions terraform/gcp/projects/awi-ciroh.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ filestore_capacity_gb = 2048

k8s_versions = {
min_master_version : "1.29.1-gke.1589018",
core_nodes_version : "1.27.4-gke.900",
notebook_nodes_version : "1.27.4-gke.900",
dask_nodes_version : "1.27.4-gke.900",
core_nodes_version : "1.29.1-gke.1589018",
notebook_nodes_version : "1.29.1-gke.1589018",
dask_nodes_version : "1.29.1-gke.1589018",
}

user_buckets = {
Expand All @@ -31,7 +31,7 @@ user_buckets = {

# Setup notebook node pools
notebook_nodes = {
"n2-highmem-4" : {
"n2-highmem-4-b" : {
min : 0,
max : 100,
machine_type : "n2-highmem-4",
Expand Down
15 changes: 11 additions & 4 deletions terraform/gcp/projects/cloudbank.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ k8s_versions = {
# upgrading the control plane, there will be ~5 minutes of k8s not being
# available making new server launches error etc.
min_master_version : "1.29.1-gke.1589018",
core_nodes_version : "1.27.5-gke.200",
notebook_nodes_version : "1.27.5-gke.200",
dask_nodes_version : "1.27.5-gke.200",
core_nodes_version : "1.29.1-gke.1589018",
notebook_nodes_version : "1.29.1-gke.1589018",
dask_nodes_version : "1.29.1-gke.1589018",
}

core_node_machine_type = "n2-highmem-2"
Expand All @@ -22,10 +22,17 @@ enable_filestore = true
filestore_capacity_gb = 1024

notebook_nodes = {
# FIXME: tainted, to be deleted when empty, replaced by k8s upgraded variant
"n2-highmem-4" : {
min : 0,
max : 100,
machine_type : "n2-highmem-4",
node_version : "1.27.5-gke.200",
},
"n2-highmem-4-b" : {
min : 0,
max : 100,
machine_type : "n2-highmem-4",
},
"n2-highmem-16" : {
min : 0,
Expand All @@ -36,7 +43,7 @@ notebook_nodes = {
min : 0,
max : 100,
machine_type : "n2-highmem-64",
}
},
}

# Setup a single node pool for dask workers.
Expand Down
36 changes: 33 additions & 3 deletions terraform/gcp/projects/leap.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ core_node_machine_type = "n2-highmem-4"

k8s_versions = {
min_master_version : "1.29.1-gke.1589018",
core_nodes_version : "1.27.4-gke.900",
notebook_nodes_version : "1.27.4-gke.900",
dask_nodes_version : "1.27.4-gke.900",
core_nodes_version : "1.29.1-gke.1589018",
notebook_nodes_version : "1.29.1-gke.1589018",
dask_nodes_version : "1.29.1-gke.1589018",
}

# GPUs not available in us-central1-b
Expand Down Expand Up @@ -79,19 +79,49 @@ notebook_nodes = {
max : 100,
machine_type : "n2-highmem-4",
},
# FIXME: tainted, to be deleted when empty, replaced by k8s upgraded variant
"n2-highmem-16" : {
# A minimum of one is configured for LEAP to ensure quick startups at all
# time. Cost is not a greater concern than optimizing startup times.
min : 1,
max : 100,
machine_type : "n2-highmem-16",
node_version : "1.27.4-gke.900",
},
"n2-highmem-16-b" : {
# A minimum of one is configured for LEAP to ensure quick startups at all
# time. Cost is not a greater concern than optimizing startup times.
min : 1,
max : 100,
machine_type : "n2-highmem-16",
node_version : "1.27.4-gke.900",
},
"n2-highmem-64" : {
min : 0,
max : 100,
machine_type : "n2-highmem-64"
}
# FIXME: tainted, to be deleted when empty, replaced by k8s upgraded variant
"gpu-t4" : {
min : 0,
max : 100,
machine_type : "n1-standard-8",
node_version : "1.27.4-gke.900",
gpu : {
enabled : true,
type : "nvidia-tesla-t4",
count : 1
},
zones : [
# Get GPUs wherever they are available, as sometimes a single
# zone might be out of GPUs.
"us-central1-a",
"us-central1-b",
"us-central1-c",
"us-central1-f"
]
},
"gpu-t4-b" : {
min : 0,
max : 100,
machine_type : "n1-standard-8",
Expand Down
20 changes: 17 additions & 3 deletions terraform/gcp/projects/qcl.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ region = "europe-west1"

k8s_versions = {
min_master_version : "1.29.1-gke.1589018",
core_nodes_version : "1.27.4-gke.900",
notebook_nodes_version : "1.27.4-gke.900",
core_nodes_version : "1.29.1-gke.1589018",
notebook_nodes_version : "1.29.1-gke.1589018",
}

core_node_machine_type = "n2-highmem-2"
Expand All @@ -26,10 +26,17 @@ user_buckets = {
}

notebook_nodes = {
# FIXME: tainted, to be deleted when empty, replaced by k8s upgraded variant
"n2-highmem-4" : {
min : 0,
max : 100,
machine_type : "n2-highmem-4",
node_version : "1.27.4-gke.900",
},
"n2-highmem-4-b" : {
min : 0,
max : 100,
machine_type : "n2-highmem-4",
},
"n2-highmem-16" : {
min : 0,
Expand All @@ -56,11 +63,18 @@ notebook_nodes = {
max : 100,
machine_type : "n2-highcpu-32",
},
# FIXME: tainted, to be deleted when empty, replaced by k8s upgraded variant
"n2-highcpu-96" : {
min : 0,
max : 100,
machine_type : "n2-highcpu-96",
}
node_version : "1.27.4-gke.900",
},
"n2-highcpu-96-b" : {
min : 0,
max : 100,
machine_type : "n2-highcpu-96",
},
}

hub_cloud_permissions = {
Expand Down

0 comments on commit a609de2

Please sign in to comment.