Skip to content

Commit

Permalink
Merge pull request #5180 from GeorgianaElena/new-cluster-templates
Browse files Browse the repository at this point in the history
Update new cluster templates to match current nodegroup setup
  • Loading branch information
GeorgianaElena authored Nov 22, 2024
2 parents 46e6282 + fa08604 commit b9239e2
Show file tree
Hide file tree
Showing 11 changed files with 83 additions and 33 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ repos:
rev: v3.0.0
hooks:
- id: terraform-fmt
exclude: terraform/aws/projects/template.tfvars

# Prevent unencrypted files from being committed
- repo: https://github.com/yuvipanda/pre-commit-hook-ensure-sops
Expand Down
14 changes: 8 additions & 6 deletions config/clusters/templates/aws/cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ hubs: []
# Uncomment the lines below once the support infrastructure was deployed and
# you are ready to add the first cluster

# - name: <hub_name>
{% for hub in hubs %}
# - name: {{ hub }}
# # Tip: consider changing this to something more human friendly
# display_name: "{{ cluster_name }} - <hub_name>"
# domain: <hub_name>.{{ cluster_name }}.2i2c.cloud
# helm_chart: {{ hub_type }}
# display_name: "{{ cluster_name }} - {{ hub }}"
# domain: {{ hub }}.{{ cluster_name }}.2i2c.cloud
# helm_chart: basehub
# helm_chart_values_files:
# - common.values.yaml
# - <hub_name>.values.yaml
# - enc-<hub_name>.secret.values.yaml
# - {{ hub }}.values.yaml
# - enc-{{ hub }}.secret.values.yaml
{% endfor %}
2 changes: 1 addition & 1 deletion config/clusters/templates/common/cluster-entry.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ hubs:
- name: {{ hub_name }}
display_name: {{ cluster_name }} {{ hub_name }}
domain: {{ hub_name }}.{{ cluster_name }}.2i2c.cloud
helm_chart: {{ hub_type }}
helm_chart: "basehub"
helm_chart_values_files:
- common.values.yaml
- {{ hub_name }}.values.yaml
Expand Down
2 changes: 1 addition & 1 deletion config/clusters/templates/gcp/cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ hubs: []
# # Tip: consider changing this to something more human friendly
# display_name: "{{ cluster_name }} - <hub_name>"
# domain: <hub_name>.{{ cluster_name }}.2i2c.cloud
# helm_chart: {{ hub_type }}
# helm_chart: basehub
# helm_chart_values_files:
# - common.values.yaml
# - <hub_name>.values.yaml
Expand Down
1 change: 0 additions & 1 deletion deployer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,6 @@ for a GCP cluster.
- `cluster_name` - the name of the cluster
- `cluster_region`- the region where the cluster will be deployed
- `project_id` - the project ID of the GCP project
- `hub_type` (basehub/daskhub) - whether the hub deployed there would need dask or not
- `hub_name` - the name of the first hub which will be deployed in the cluster (usually `staging`)

The templates have a set of default features and define some opinionated characteristics for the cluster.
Expand Down
13 changes: 12 additions & 1 deletion deployer/commands/generate/dedicated_cluster/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,14 @@ def aws(
...,
prompt="The AWS account id or alias. Declare 2i2c for 2i2c's SSO based accounts and paid_by_us=true",
),
hubs: str = typer.Option(
"staging",
prompt="The list of hubs that will be deployed in the cluster separated by a comma. Example: staging, prod.",
),
dask_nodes: bool = typer.Option(
False,
prompt='If this cluster needs dask nodes, please type "y", otherwise hit ENTER.',
),
force: bool = typer.Option(
False,
"--force",
Expand All @@ -134,9 +142,12 @@ def aws(
# Also store the provider, as it's useful for some jinja templates
# to differentiate between them when rendering the configuration
"provider": "aws",
"hub_type": "basehub",
"dask_nodes": dask_nodes,
"cluster_name": cluster_name,
"cluster_region": cluster_region,
"hubs": hubs.replace(
",", " "
).split(), # Convert the comma separated string to a list
"sign_in_url": sign_in_url,
"paid_by_us": str(paid_by_us).lower(),
}
Expand Down
6 changes: 5 additions & 1 deletion deployer/commands/generate/dedicated_cluster/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ def gcp(
project_id: str = typer.Option(
..., prompt="Please insert the Project ID of the GCP project"
),
dask_nodes: bool = typer.Option(
False,
prompt='If this cluster needs dask nodes, please type "y", otherwise hit ENTER.',
),
force: bool = typer.Option(
False,
"--force",
Expand All @@ -79,7 +83,7 @@ def gcp(
# Also store the provider, as it's useful for some jinja templates
# to differentiate between them when rendering the configuration
"provider": "gcp",
"hub_type": "basehub",
"dask_nodes": dask_nodes,
"cluster_name": cluster_name,
"cluster_region": cluster_region,
"project_id": project_id,
Expand Down
1 change: 0 additions & 1 deletion deployer/commands/generate/hub_asset/cluster_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def cluster_entry(
"""

vars = {
"hub_type": "basehub",
"cluster_name": cluster_name,
"hub_name": hub_name,
}
Expand Down
45 changes: 40 additions & 5 deletions eksctl/template.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,30 @@ local nodeAz = "<< cluster_region >>a";
// A `node.kubernetes.io/instance-type label is added, so pods
// can request a particular kind of node with a nodeSelector
local notebookNodes = [
{ instanceType: "r5.xlarge" },
{ instanceType: "r5.4xlarge" },
{ instanceType: "r5.16xlarge" },
<% for hub in hubs %>
// << hub >>
{
instanceType: "r5.xlarge",
namePrefix: "nb-<< hub >>",
labels+: { "2i2c/hub-name": "<< hub >>" },
tags+: { "2i2c:hub-name": "<< hub >>" },
},
{
instanceType: "r5.4xlarge",
namePrefix: "nb-<< hub >>",
labels+: { "2i2c/hub-name": "<< hub >>" },
tags+: { "2i2c:hub-name": "<< hub >>" },
},
{
instanceType: "r5.16xlarge",
namePrefix: "nb-<< hub >>",
labels+: { "2i2c/hub-name": "<< hub >>" },
tags+: { "2i2c:hub-name": "<< hub >>" },
},
<% endfor %>
];
<% if hub_type == "daskhub" %>

<% if dask_nodes %>
local daskNodes = [
// Node definitions for dask worker nodes. Config here is merged
// with our dask worker node definition, which uses spot instances.
Expand All @@ -52,7 +71,14 @@ local daskNodes = [
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
<% for hub in hubs %>
{
namePrefix: "dask-<< hub >>",
labels+: { "2i2c/hub-name": "<< hub >>" },
tags+: { "2i2c:hub-name": "<< hub >>" },
instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }
},
<% endfor %>
];
<% else %>
local daskNodes = [];
Expand Down Expand Up @@ -145,6 +171,9 @@ local daskNodes = [];
"hub.jupyter.org/node-purpose": "core",
"k8s.dask.org/node-purpose": "core",
},
tags+: {
"2i2c:node-purpose": "core"
},
},
] + [
ng + {
Expand All @@ -164,6 +193,9 @@ local daskNodes = [];
"hub.jupyter.org_dedicated": "user:NoSchedule",
"hub.jupyter.org/dedicated": "user:NoSchedule",
},
tags+: {
"2i2c:node-purpose": "user"
},
} + n for n in notebookNodes
] + ( if daskNodes != null then
[
Expand All @@ -182,6 +214,9 @@ local daskNodes = [];
"k8s.dask.org_dedicated" : "worker:NoSchedule",
"k8s.dask.org/dedicated" : "worker:NoSchedule",
},
tags+: {
"2i2c:node-purpose": "worker"
},
instancesDistribution+: {
onDemandBaseCapacity: 0,
onDemandPercentageAboveBaseCapacity: 0,
Expand Down
29 changes: 14 additions & 15 deletions terraform/aws/projects/template.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,25 @@ cluster_nodes_location = "{{ cluster_region }}a"

enable_aws_ce_grafana_backend_iam = true

# Tip: uncomment and fill the missing info in the lines below if you want
# Tip: uncomment and verify any missing info in the lines below if you want
# to setup scratch buckets for the hubs on this cluster.
#
#user_buckets = {
# "scratch-staging" : {
# "delete_after" : 7,
# "tags" : { "2i2c:hub-name" : "staging" },
# },
# # Tip: add more scratch buckets below, if this cluster will be multi-tenant
#}

# Tip: uncomment and fill the missing info in the lines below if you want
{% for hub in hubs %}
# "scratch-{{ hub }}" : {
# "delete_after" : 7,
# "tags" : { "2i2c:hub-name" : "{{ hub }}" },
# },
{% endfor %}

# Tip: uncomment and verify any missing info in the lines below if you want
# to setup specific cloud permissions for the buckets in this cluster.
#
#hub_cloud_permissions = {
# "staging" : {
# hub_cloud_permissions = {
{% for hub in hubs %}
# "{{ hub }}" : {
# "user-sa" : {
# bucket_admin_access : ["scratch-staging"],
# bucket_admin_access : ["scratch-{{ hub }}"],
# },
# },
# # Tip: add more namespaces below, if this cluster will be multi-tenant
#}

{% endfor %}
2 changes: 1 addition & 1 deletion terraform/gcp/projects/cluster.tfvars.template
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ notebook_nodes = {
}
}

{% if hub_type == "daskhub" %}
{% if dask_nodes == "daskhub" %}
dask_nodes = {
# A not yet fully established policy is being developed about using a single
# node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
Expand Down

0 comments on commit b9239e2

Please sign in to comment.