diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cc64c25474..93374ca970 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -50,6 +50,7 @@ repos: rev: v3.0.0 hooks: - id: terraform-fmt + exclude: terraform/aws/projects/template.tfvars # Prevent unencrypted files from being committed - repo: https://github.com/yuvipanda/pre-commit-hook-ensure-sops diff --git a/config/clusters/templates/aws/cluster.yaml b/config/clusters/templates/aws/cluster.yaml index c50bcec9a5..e17d6b5131 100644 --- a/config/clusters/templates/aws/cluster.yaml +++ b/config/clusters/templates/aws/cluster.yaml @@ -15,12 +15,14 @@ hubs: [] # Uncomment the lines below once the support infrastructure was deployed and # you are ready to add the first cluster - # - name: +{% for hub in hubs %} + # - name: {{ hub }} # # Tip: consider changing this to something more human friendly - # display_name: "{{ cluster_name }} - " - # domain: .{{ cluster_name }}.2i2c.cloud - # helm_chart: {{ hub_type }} + # display_name: "{{ cluster_name }} - {{ hub }}" + # domain: {{ hub }}.{{ cluster_name }}.2i2c.cloud + # helm_chart: basehub # helm_chart_values_files: # - common.values.yaml - # - .values.yaml - # - enc-.secret.values.yaml + # - {{ hub }}.values.yaml + # - enc-{{ hub }}.secret.values.yaml +{% endfor %} \ No newline at end of file diff --git a/config/clusters/templates/common/cluster-entry.yaml b/config/clusters/templates/common/cluster-entry.yaml index ca4642fb2d..13008d601d 100644 --- a/config/clusters/templates/common/cluster-entry.yaml +++ b/config/clusters/templates/common/cluster-entry.yaml @@ -2,7 +2,7 @@ hubs: - name: {{ hub_name }} display_name: {{ cluster_name }} {{ hub_name }} domain: {{ hub_name }}.{{ cluster_name }}.2i2c.cloud - helm_chart: {{ hub_type }} + helm_chart: "basehub" helm_chart_values_files: - common.values.yaml - {{ hub_name }}.values.yaml diff --git a/config/clusters/templates/gcp/cluster.yaml b/config/clusters/templates/gcp/cluster.yaml index 0f56cf3b65..470a647c9b 100644 --- a/config/clusters/templates/gcp/cluster.yaml +++ b/config/clusters/templates/gcp/cluster.yaml @@ -31,7 +31,7 @@ hubs: [] # # Tip: consider changing this to something more human friendly # display_name: "{{ cluster_name }} - " # domain: .{{ cluster_name }}.2i2c.cloud - # helm_chart: {{ hub_type }} + # helm_chart: basehub # helm_chart_values_files: # - common.values.yaml # - .values.yaml diff --git a/deployer/README.md b/deployer/README.md index 141dabfa84..95b14fa0b6 100644 --- a/deployer/README.md +++ b/deployer/README.md @@ -260,7 +260,6 @@ for a GCP cluster. - `cluster_name` - the name of the cluster - `cluster_region`- the region where the cluster will be deployed - `project_id` - the project ID of the GCP project - - `hub_type` (basehub/daskhub) - whether the hub deployed there would need dask or not - `hub_name` - the name of the first hub which will be deployed in the cluster (usually `staging`) The templates have a set of default features and define some opinionated characteristics for the cluster. diff --git a/deployer/commands/generate/dedicated_cluster/aws.py b/deployer/commands/generate/dedicated_cluster/aws.py index 6aa86bb12c..a033e6ef17 100644 --- a/deployer/commands/generate/dedicated_cluster/aws.py +++ b/deployer/commands/generate/dedicated_cluster/aws.py @@ -110,6 +110,14 @@ def aws( ..., prompt="The AWS account id or alias. Declare 2i2c for 2i2c's SSO based accounts and paid_by_us=true", ), + hubs: str = typer.Option( + "staging", + prompt="The list of hubs that will be deployed in the cluster separated by a comma. Example: staging, prod.", + ), + dask_nodes: bool = typer.Option( + False, + prompt='If this cluster needs dask nodes, please type "y", otherwise hit ENTER.', + ), force: bool = typer.Option( False, "--force", @@ -134,9 +142,12 @@ def aws( # Also store the provider, as it's useful for some jinja templates # to differentiate between them when rendering the configuration "provider": "aws", - "hub_type": "basehub", + "dask_nodes": dask_nodes, "cluster_name": cluster_name, "cluster_region": cluster_region, + "hubs": hubs.replace( + ",", " " + ).split(), # Convert the comma separated string to a list "sign_in_url": sign_in_url, "paid_by_us": str(paid_by_us).lower(), } diff --git a/deployer/commands/generate/dedicated_cluster/gcp.py b/deployer/commands/generate/dedicated_cluster/gcp.py index c5622061a6..5327694858 100644 --- a/deployer/commands/generate/dedicated_cluster/gcp.py +++ b/deployer/commands/generate/dedicated_cluster/gcp.py @@ -63,6 +63,10 @@ def gcp( project_id: str = typer.Option( ..., prompt="Please insert the Project ID of the GCP project" ), + dask_nodes: bool = typer.Option( + False, + prompt='If this cluster needs dask nodes, please type "y", otherwise hit ENTER.', + ), force: bool = typer.Option( False, "--force", @@ -79,7 +83,7 @@ def gcp( # Also store the provider, as it's useful for some jinja templates # to differentiate between them when rendering the configuration "provider": "gcp", - "hub_type": "basehub", + "dask_nodes": dask_nodes, "cluster_name": cluster_name, "cluster_region": cluster_region, "project_id": project_id, diff --git a/deployer/commands/generate/hub_asset/cluster_entry.py b/deployer/commands/generate/hub_asset/cluster_entry.py index 3f9d094bbe..28132dc955 100644 --- a/deployer/commands/generate/hub_asset/cluster_entry.py +++ b/deployer/commands/generate/hub_asset/cluster_entry.py @@ -18,7 +18,6 @@ def cluster_entry( """ vars = { - "hub_type": "basehub", "cluster_name": cluster_name, "hub_name": hub_name, } diff --git a/eksctl/template.jsonnet b/eksctl/template.jsonnet index a5ce2bdd2b..aad355def9 100644 --- a/eksctl/template.jsonnet +++ b/eksctl/template.jsonnet @@ -36,11 +36,30 @@ local nodeAz = "<< cluster_region >>a"; // A `node.kubernetes.io/instance-type label is added, so pods // can request a particular kind of node with a nodeSelector local notebookNodes = [ - { instanceType: "r5.xlarge" }, - { instanceType: "r5.4xlarge" }, - { instanceType: "r5.16xlarge" }, +<% for hub in hubs %> + // << hub >> + { + instanceType: "r5.xlarge", + namePrefix: "nb-<< hub >>", + labels+: { "2i2c/hub-name": "<< hub >>" }, + tags+: { "2i2c:hub-name": "<< hub >>" }, + }, + { + instanceType: "r5.4xlarge", + namePrefix: "nb-<< hub >>", + labels+: { "2i2c/hub-name": "<< hub >>" }, + tags+: { "2i2c:hub-name": "<< hub >>" }, + }, + { + instanceType: "r5.16xlarge", + namePrefix: "nb-<< hub >>", + labels+: { "2i2c/hub-name": "<< hub >>" }, + tags+: { "2i2c:hub-name": "<< hub >>" }, + }, +<% endfor %> ]; -<% if hub_type == "daskhub" %> + +<% if dask_nodes %> local daskNodes = [ // Node definitions for dask worker nodes. Config here is merged // with our dask worker node definition, which uses spot instances. @@ -52,7 +71,14 @@ local daskNodes = [ // A not yet fully established policy is being developed about using a single // node pool, see https://github.com/2i2c-org/infrastructure/issues/2687. // - { instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }}, +<% for hub in hubs %> + { + namePrefix: "dask-<< hub >>", + labels+: { "2i2c/hub-name": "<< hub >>" }, + tags+: { "2i2c:hub-name": "<< hub >>" }, + instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } + }, +<% endfor %> ]; <% else %> local daskNodes = []; @@ -145,6 +171,9 @@ local daskNodes = []; "hub.jupyter.org/node-purpose": "core", "k8s.dask.org/node-purpose": "core", }, + tags+: { + "2i2c:node-purpose": "core" + }, }, ] + [ ng + { @@ -164,6 +193,9 @@ local daskNodes = []; "hub.jupyter.org_dedicated": "user:NoSchedule", "hub.jupyter.org/dedicated": "user:NoSchedule", }, + tags+: { + "2i2c:node-purpose": "user" + }, } + n for n in notebookNodes ] + ( if daskNodes != null then [ @@ -182,6 +214,9 @@ local daskNodes = []; "k8s.dask.org_dedicated" : "worker:NoSchedule", "k8s.dask.org/dedicated" : "worker:NoSchedule", }, + tags+: { + "2i2c:node-purpose": "worker" + }, instancesDistribution+: { onDemandBaseCapacity: 0, onDemandPercentageAboveBaseCapacity: 0, diff --git a/terraform/aws/projects/template.tfvars b/terraform/aws/projects/template.tfvars index 028548f4ac..b365c249d6 100644 --- a/terraform/aws/projects/template.tfvars +++ b/terraform/aws/projects/template.tfvars @@ -9,26 +9,25 @@ cluster_nodes_location = "{{ cluster_region }}a" enable_aws_ce_grafana_backend_iam = true -# Tip: uncomment and fill the missing info in the lines below if you want +# Tip: uncomment and verify any missing info in the lines below if you want # to setup scratch buckets for the hubs on this cluster. # -#user_buckets = { -# "scratch-staging" : { -# "delete_after" : 7, -# "tags" : { "2i2c:hub-name" : "staging" }, -# }, -# # Tip: add more scratch buckets below, if this cluster will be multi-tenant -#} -# Tip: uncomment and fill the missing info in the lines below if you want +{% for hub in hubs %} +# "scratch-{{ hub }}" : { +# "delete_after" : 7, +# "tags" : { "2i2c:hub-name" : "{{ hub }}" }, +# }, +{% endfor %} + +# Tip: uncomment and verify any missing info in the lines below if you want # to setup specific cloud permissions for the buckets in this cluster. # -#hub_cloud_permissions = { -# "staging" : { +# hub_cloud_permissions = { +{% for hub in hubs %} +# "{{ hub }}" : { # "user-sa" : { -# bucket_admin_access : ["scratch-staging"], +# bucket_admin_access : ["scratch-{{ hub }}"], # }, # }, -# # Tip: add more namespaces below, if this cluster will be multi-tenant -#} - +{% endfor %} diff --git a/terraform/gcp/projects/cluster.tfvars.template b/terraform/gcp/projects/cluster.tfvars.template index 7ad5cd6581..d521896eaf 100644 --- a/terraform/gcp/projects/cluster.tfvars.template +++ b/terraform/gcp/projects/cluster.tfvars.template @@ -78,7 +78,7 @@ notebook_nodes = { } } -{% if hub_type == "daskhub" %} +{% if dask_nodes == "daskhub" %} dask_nodes = { # A not yet fully established policy is being developed about using a single # node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.