From 5f14abb1dcdaecaf7924e4bd89a5c067c652976d Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Mon, 10 Apr 2023 18:41:36 +0530 Subject: [PATCH 1/6] Initial commit of JMTE hub - Use 'jupyter-meets-the-earth' rather than jmte as name, because the existing cluster is already called 'jmte'. - SFTP service is gone! - Replicates config from https://github.com/2i2c-org/infrastructure/pull/436/files to the extent possible - Uses our IRSA config for AWS permissions, rather than the eksctl created service account in use earlier. - Uses CILogon+GitHub for authentication, rather than auth0+github - Re-use the same EFS filesystem from before, avoiding the need to copy a few terabytes of data around - Hub is now at jmte.2i2c.cloud, and the old URL (hub.jupyterearth.org) redirects here. Same for staging. Ref https://github.com/2i2c-org/infrastructure/issues/2201 --- .../jupyter-meets-the-earth/cluster.yaml | 27 ++ .../common.values.yaml | 340 ++++++++++++++++++ .../enc-deployer-credentials.secret.json | 25 ++ .../enc-prod.secret.values.yaml | 21 ++ .../enc-staging.secret.values.yaml | 21 ++ .../enc-support.secret.values.yaml | 22 ++ .../jupyter-meets-the-earth/prod.values.yaml | 33 ++ .../staging.values.yaml | 19 + .../support.values.yaml | 42 +++ eksctl/jupyter-meets-the-earth.jsonnet | 167 +++++++++ .../ssh-keys/jupyter-meets-the-earth.key.pub | 1 + .../secret/jupyter-meets-the-earth.key | 21 ++ .../projects/jupyter-meets-the-earth.tfvars | 54 +++ 13 files changed, 793 insertions(+) create mode 100644 config/clusters/jupyter-meets-the-earth/cluster.yaml create mode 100644 config/clusters/jupyter-meets-the-earth/common.values.yaml create mode 100644 config/clusters/jupyter-meets-the-earth/enc-deployer-credentials.secret.json create mode 100644 config/clusters/jupyter-meets-the-earth/enc-prod.secret.values.yaml create mode 100644 config/clusters/jupyter-meets-the-earth/enc-staging.secret.values.yaml create mode 100644 config/clusters/jupyter-meets-the-earth/enc-support.secret.values.yaml create mode 100644 config/clusters/jupyter-meets-the-earth/prod.values.yaml create mode 100644 config/clusters/jupyter-meets-the-earth/staging.values.yaml create mode 100644 config/clusters/jupyter-meets-the-earth/support.values.yaml create mode 100644 eksctl/jupyter-meets-the-earth.jsonnet create mode 100644 eksctl/ssh-keys/jupyter-meets-the-earth.key.pub create mode 100644 eksctl/ssh-keys/secret/jupyter-meets-the-earth.key create mode 100644 terraform/aws/projects/jupyter-meets-the-earth.tfvars diff --git a/config/clusters/jupyter-meets-the-earth/cluster.yaml b/config/clusters/jupyter-meets-the-earth/cluster.yaml new file mode 100644 index 0000000000..ab6a76f8db --- /dev/null +++ b/config/clusters/jupyter-meets-the-earth/cluster.yaml @@ -0,0 +1,27 @@ +name: jupyter-meets-the-earth +provider: aws +aws: + key: enc-deployer-credentials.secret.json + clusterType: eks + clusterName: jupyter-meets-the-earth + region: us-west-2 +support: + helm_chart_values_files: + - support.values.yaml + - enc-support.secret.values.yaml +hubs: + - name: staging + domain: staging.hub.jupytearth.org + helm_chart: daskhub + helm_chart_values_files: + - common.values.yaml + - staging.values.yaml + - enc-staging.secret.values.yaml + - name: prod + display_name: "Jupyter Meets the Earth" + domain: hub.jupytearth.org + helm_chart: daskhub + helm_chart_values_files: + - common.values.yaml + - prod.values.yaml + - enc-prod.secret.values.yaml diff --git a/config/clusters/jupyter-meets-the-earth/common.values.yaml b/config/clusters/jupyter-meets-the-earth/common.values.yaml new file mode 100644 index 0000000000..b8d540ec61 --- /dev/null +++ b/config/clusters/jupyter-meets-the-earth/common.values.yaml @@ -0,0 +1,340 @@ +basehub: + nfs: + # enabled is adjusted by staging/prod values + # enabled: true + shareCreator: + enabled: true + pv: + serverIP: fs-01707b06.efs.us-west-2.amazonaws.com + # mountOptions from https://docs.aws.amazon.com/efs/latest/ug/mounting-fs-nfs-mount-settings.html + mountOptions: + - rsize=1048576 + - wsize=1048576 + - timeo=600 + - soft # We pick soft over hard, so NFS lockups don't lead to hung processes + - retrans=2 + - noresvport + # baseShareName is required to be just "/" so that we can create + # various sub folders in the filesystem that our PV to access the + # NFS server can reference successfully as it isn't supported to + # access a not yet existing folder. This creation is automated by + # the nfs-share-creator resource part of the basehub Helm chart. + baseShareName: / + + jupyterhub: + custom: + homepage: + templateVars: + org: + name: Jupyter meets the Earth + logo_url: https://pangeo-data.github.io/jupyter-earth/_static/jupyter-earth.png + url: https://jupytearth.org + designed_by: + name: 2i2c + url: https://2i2c.org + operated_by: + name: 2i2c + url: https://2i2c.org + funded_by: + name: Jupyter meets the Earth + url: https://jupytearth.org + + singleuser: + # extraFiles ref: https://zero-to-jupyterhub.readthedocs.io/en/latest/resources/reference.html#singleuser-extrafiles + extraFiles: + jupyter_notebook_config.json: + mountPath: /etc/jupyter/jupyter_notebook_config.json + data: + # Allow jupyterlab option to show hidden files in browser + # https://github.com/berkeley-dsep-infra/datahub/issues/3160 + ContentsManager: + allow_hidden: true + initContainers: + # Need to explicitly fix ownership here, since EFS doesn't do anonuid + - name: volume-mount-ownership-fix + image: busybox + command: + [ + "sh", + "-c", + "id && chown 1000:1000 /home/jovyan /home/jovyan/shared /home/jovyan/shared-public && ls -lhd /home/jovyan", + ] + securityContext: + runAsUser: 0 + volumeMounts: + - name: home + mountPath: /home/jovyan + subPath: "{username}" + - name: home + mountPath: /home/jovyan/shared + subPath: _shared + - name: home + mountPath: /home/jovyan/shared-public + subPath: _shared_public + + # /dev/shm is mounted as a filesystem path, where writing to it means to + # write to memory. + # + # How to: https://stackoverflow.com/questions/46085748/define-size-for-dev-shm-on-container-engine/46434614#46434614 + # Request for this by Ellie: https://fperezgroup.slack.com/archives/C020XCEFPEH/p1658168872788389 + # + storage: + extraVolumes: + - name: dev-shm + emptyDir: + medium: Memory + extraVolumeMounts: + - name: dev-shm + mountPath: /dev/shm + # FIXME: we override the list extraVolumeMounts which is also set in + # the the basehub chart, due to that, we need to add this here + # as well. An option is to add hub.extraConfig entries that + # append the kubespawner configuration to include these extra + # volume mounts. + # + - name: home + mountPath: /home/jovyan/shared + subPath: _shared + readOnly: true + - name: home + mountPath: /home/jovyan/shared-public + subPath: _shared_public + + # Increased as we have experienced a too slow image pull at least + # once. Our pods can take ~6-7 minutes to start on a new node it + # seems, so this gives us some margin. + startTimeout: 1200 + + extraEnv: + GH_SCOPED_CREDS_APP_URL: https://github.com/apps/hub-jupytearth-org-github-integ + GH_SCOPED_CREDS_CLIENT_ID: Iv1.a073b1649637af12 + + # FIXME: Until we can set this just for the GPU nodes, we need to set it for everyon + NVIDIA_DRIVER_CAPABILITIES: compute,utility + + image: + # NOTE: We use the jupyterhub-configurator so this image/tag is not + # relevant. Visit its UI to configure the hub. + # + # staging: https://staging.hub.jupytearth.org/services/configurator/ + # prod: https://hub.jupytearth.org/services/configurator/ + pullPolicy: Always + name: 286354552638.dkr.ecr.us-west-2.amazonaws.com/jmte/user-env + tag: "latest" + + profileList: + - display_name: "16th of Medium: 0.25-4 CPU, 1-16 GB" + default: True + description: "A shared machine, the recommended option until you experience a limitation." + kubespawner_override: + cpu_guarantee: 0.225 + mem_guarantee: 0.875G + mem_limit: null + node_selector: + node.kubernetes.io/instance-type: m5.xlarge + - display_name: "4th of Medium: 1-4 CPU, 4-16 GB" + description: "A shared machine." + kubespawner_override: + cpu_guarantee: 0.875 + mem_guarantee: 3.5G + mem_limit: null + node_selector: + node.kubernetes.io/instance-type: m5.xlarge + - display_name: "Medium: 4 CPU, 16 GB" + description: "A dedicated machine for you." + kubespawner_override: + cpu_guarantee: 3.5 + mem_guarantee: 14G + mem_limit: null + node_selector: + node.kubernetes.io/instance-type: m5.xlarge + - display_name: "Large: 16 CPU, 64 GB" + description: "A dedicated machine for you." + kubespawner_override: + mem_guarantee: 56G + mem_limit: null + node_selector: + node.kubernetes.io/instance-type: m5.4xlarge + - display_name: "Massive: 64 CPU, 256 GB" + description: "A dedicated machine for you." + kubespawner_override: + mem_guarantee: 224G + mem_limit: null + node_selector: + node.kubernetes.io/instance-type: m5.16xlarge + - display_name: "Massive high-memory: 64 CPU, 976 GB" + description: "A dedicated machine for you." + kubespawner_override: + mem_guarantee: 900G + mem_limit: null + node_selector: + node.kubernetes.io/instance-type: x1.16xlarge + - display_name: "Medium GPU: 4 CPU, 16 GB, 1 T4 Tensor Core GPU" + description: "A dedicated machine for you with one GPU attached." + kubespawner_override: + cpu_guarantee: 3.5 + mem_guarantee: 14G + mem_limit: null + node_selector: + node.kubernetes.io/instance-type: g4dn.xlarge + extra_resource_limits: + nvidia.com/gpu: "1" + - display_name: "Large GPU: 16 CPU, 64 GB, 1 T4 Tensor Core GPU" + description: "A dedicated machine for you with one GPU attached." + kubespawner_override: + mem_guarantee: 56G + mem_limit: null + node_selector: + node.kubernetes.io/instance-type: g4dn.4xlarge + extra_resource_limits: + nvidia.com/gpu: "1" + - display_name: "Massive GPU: 64 CPU, 256 GB, 1 T4 Tensor Core GPU" + description: "A dedicated machine for you with one GPU attached." + kubespawner_override: + mem_guarantee: 224G + mem_limit: null + node_selector: + node.kubernetes.io/instance-type: g4dn.16xlarge + extra_resource_limits: + nvidia.com/gpu: "1" + - display_name: "16th of Medium: 0.25-4 CPU, 1-16 GB - Test of latest image" + description: "Helps us test an image before we make it the default" + kubespawner_override: + image: 286354552638.dkr.ecr.us-west-2.amazonaws.com/jmte/user-env:latest + image_pull_policy: Always + cpu_guarantee: 0.225 + mem_guarantee: 0.875G + node_selector: + node.kubernetes.io/instance-type: m5.xlarge + mem_limit: null + + hub: + config: + JupyterHub: + authenticator_class: cilogon + CILogonOAuthenticator: + scope: + - "profile" + username_claim: "preferred_username" + # Only show the option to login with GitHub + shown_idps: + - http://github.com/login/oauth/authorize + Authenticator: + allowed_users: &users + # This is just listing a few of the users/admins, a lot of + # users has been added manually, see: + # https://github.com/pangeo-data/jupyter-earth/issues/53 + - abbyazari # Abby Azari + - andersy005 # Anderson Banihirwe + - consideratio # Erik Sundell + - choldgraf # Chris Holdgraf + - elliesch # Ellie Abrahams + - EMscience # Edom Moges + - espg # Shane Grigsby + - facusapienza21 # Facundo Sapienza + - fperez # Fernando PĂ©rez + - kmpaul # Kevin Paul + - lrennels # Lisa Rennels + - mrsiegfried # Matthew Siegfried + - tsnow03 # Tasha Snow + - whyjz # Whyjay Zheng + - yuvipanda # Yuvi Panda + - jonathan-taylor # Jonathan Taylor + admin_users: *users + allowNamedServers: true + +dask-gateway: + gateway: + backend: + scheduler: + # IMPORTANT: We have experienced that the scheduler can fail with + # 1GB memory limit. This was observed "stream closed" + # from the python client working against the + # Dask-Gateway created DaskCluster. + # + # CommClosedError: in : Stream is closed + # + cores: + request: 1 + limit: 64 + memory: + request: 2G + limit: 500G + extraPodConfig: + nodeSelector: + hub.jupyter.org/node-purpose: user + k8s.dask.org/node-purpose: null + # serviceAccountName is adjusted by staging/prod values + # serviceAccountName: *user-sa + worker: + extraPodConfig: + nodeSelector: + k8s.dask.org/node-purpose: worker + # serviceAccountName is adjusted by staging/prod values + # serviceAccountName: *user-sa + + # Note that we are overriding options provided in 2i2c's helm chart that has + # default values for these config entries. + # + extraConfig: + # This configuration represents options that can be presented to users + # that want to create a Dask cluster using dask-gateway. For more + # details, see https://gateway.dask.org/cluster-options.html + # + # The goal is to provide a simple configuration that allow the user some + # flexibility while also fitting well well on AWS nodes that are all + # having 1:4 ratio between CPU and GB of memory. By providing the + # username label, we help administrators to track user pods. + option_handler: | + from dask_gateway_server.options import Options, Select, String, Mapping + def cluster_options(user): + def option_handler(options): + if ":" not in options.image: + raise ValueError("When specifying an image you must also provide a tag") + extra_labels = {} + extra_annotations = { + "prometheus.io/scrape": "true", + "prometheus.io/port": "8787", + } + chosen_worker_cpu = int(options.worker_specification.split("CPU")[0]) + chosen_worker_memory = 4 * chosen_worker_cpu + # We multiply the requests by a fraction to ensure that the + # worker fit well within a node that need some resources + # reserved for system pods. + return { + # A default image is suggested via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable + "image": options.image, + "scheduler_extra_pod_labels": extra_labels, + "scheduler_extra_pod_annotations": extra_annotations, + "worker_extra_pod_labels": extra_labels, + "worker_extra_pod_annotations": extra_annotations, + "worker_cores": 0.85 * chosen_worker_cpu, + "worker_cores_limit": chosen_worker_cpu, + "worker_memory": "%fG" % (0.85 * chosen_worker_memory), + "worker_memory_limit": "%fG" % chosen_worker_memory, + "environment": options.environment, + } + return Options( + Select( + "worker_specification", + [ + "1CPU, 4GB", + "2CPU, 8GB", + "4CPU, 16GB", + "8CPU, 32GB", + "16CPU, 64GB", + "32CPU, 128GB", + "64CPU, 256GB", + ], + default="1CPU, 4GB", + label="Worker specification", + ), + # The default image is set via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable + String("image", label="Image"), + Mapping("environment", {}, label="Environment variables"), + handler=option_handler, + ) + c.Backend.cluster_options = cluster_options + idle: | + # timeout after 30 minutes of inactivity + c.KubeClusterConfig.idle_timeout = 1800 diff --git a/config/clusters/jupyter-meets-the-earth/enc-deployer-credentials.secret.json b/config/clusters/jupyter-meets-the-earth/enc-deployer-credentials.secret.json new file mode 100644 index 0000000000..4b5fe0c2db --- /dev/null +++ b/config/clusters/jupyter-meets-the-earth/enc-deployer-credentials.secret.json @@ -0,0 +1,25 @@ +{ + "AccessKey": { + "AccessKeyId": "ENC[AES256_GCM,data:A3+Abzcvq+I2hZq2u4coAYzNjvk=,iv:B4kPrUIM8nx/VTrEQI+tUxEySkDDe6eZHJqAJ9B4YcU=,tag:PtO2TdNEJsaYY0nQyvTHSw==,type:str]", + "SecretAccessKey": "ENC[AES256_GCM,data:gfFXGESHTJn6tiQUpMkpbpqNJJ43KxkNvYaH8V7sC5lRKUPl85Dw7w==,iv:krcKBzv/Wzu+jjtd9MJiTQvj6ELo2JHXird+mn0Vt5c=,tag:jv4YANW0drzpjpVekpmzqg==,type:str]", + "UserName": "ENC[AES256_GCM,data:8fWApCCT7IL+9E6t0FkRS3XTaHDL+XA=,iv:/rsHbqCvzulMvT6Jzj20zqfOb39ojUWprFbn8359ozA=,tag:Nc1L5ufStyZMOUxI8xVrzA==,type:str]" + }, + "sops": { + "kms": null, + "gcp_kms": [ + { + "resource_id": "projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs", + "created_at": "2023-04-07T13:38:22Z", + "enc": "CiUA4OM7eGDmmlUnGoSPNr9unRpxJ7GEcQ5/pXY2SrvhODPp9JWFEkkALQgViOWaFqYsRFv2FP6kqShPvabRqOC6KTPai4WGjiuK10rHIgiBbGNAfwQdenfi/vBU3h0rslaKojCN2qO4H+TAb4LG7eyO" + } + ], + "azure_kv": null, + "hc_vault": null, + "age": null, + "lastmodified": "2023-04-07T13:38:23Z", + "mac": "ENC[AES256_GCM,data:HD/8swJpKnpElskOZXFjkJW6SjTIKChIZtHTqqlYexrj1x/HqrkLaGdHAuWIijZ91SOjxWlQxY67RzbpiJgdxG7XUcokrHqs+mEaWV65XVS087jucZo2tVC86wBFwNe4smlAEj6AF8n2gq/UAQbWoBE4fo3Vm/ojzhStqlLL0aQ=,iv:rrI6EO+c1LONQAHbsG7/TfEGlrrlKfzuriO+g29DFno=,tag:ZJqRJHVKlXOI+5S6cpsFtg==,type:str]", + "pgp": null, + "unencrypted_suffix": "_unencrypted", + "version": "3.7.3" + } +} \ No newline at end of file diff --git a/config/clusters/jupyter-meets-the-earth/enc-prod.secret.values.yaml b/config/clusters/jupyter-meets-the-earth/enc-prod.secret.values.yaml new file mode 100644 index 0000000000..4108d6eb94 --- /dev/null +++ b/config/clusters/jupyter-meets-the-earth/enc-prod.secret.values.yaml @@ -0,0 +1,21 @@ +basehub: + jupyterhub: + hub: + config: + CILogonOAuthenticator: + client_id: ENC[AES256_GCM,data:pSlKv7EOrxXkt8Rhr2g1AmzK8r5chvTQNF9fcQclAiFJ4D4zrsgrKIAIMfBzK1qZaPqR,iv:bhyj+ytwpxHcpG782gwhxZ4T9qBYRuLzXF4kIczoM8w=,tag:QW7BUb87yxBLgQL2LaBelw==,type:str] + client_secret: ENC[AES256_GCM,data:DSoHfbfBHNroZ+c7+7BJIpUYfe5/RyuAj/uAgwCf51Q340WBow6X7nBSAKMTEgmzX2cOzYpjoWzQWXaM6IY21hmJJD9SjEJ1IY5kxmFqjahKGYvm9LM=,iv:Ze1SGQGcGw0mDQuoj7EVXnQTQAT/R/6T4/KH1n86orY=,tag:UfQQFNgUyyjcirl6ln52Sw==,type:str] +sops: + kms: [] + gcp_kms: + - resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs + created_at: "2023-04-10T11:12:30Z" + enc: CiUA4OM7eITj8Go7yW8sabmF/ng6BAsGlrokT4eIYmff/PGx3HhhEkkALQgViBPoYsyjX82iMTE9MRaMY5Cp+4YwuGD9beVZCCRDZfyU9xkj+qHWH/6cr23FQ5iWmlmONLTeigXAWuV4nrGiQgqVAZuM + azure_kv: [] + hc_vault: [] + age: [] + lastmodified: "2023-04-10T11:12:31Z" + mac: ENC[AES256_GCM,data:M/Q9+8zZjf4oX6WsAsXF/56WUrbce9QDEwYt6zWQ1yVZEujeIhmZ1Dw4OgEGeDGA8UdhGAlBH9hmpUtauQeMgXF3ajnO8S+5OscXsOueEAU8syNkGWpaI3r2U4ipm6ud76sf1juTPg7ia0wzLEb8kTx6qQvoNCyUYjB8Qoar42k=,iv:FiXQdIxf4sF06CWs5BctO47WvIe6bDy7rnmlbc1AlDc=,tag:NTAkSySc7Lsy4QhVVmPhMQ==,type:str] + pgp: [] + unencrypted_suffix: _unencrypted + version: 3.7.3 diff --git a/config/clusters/jupyter-meets-the-earth/enc-staging.secret.values.yaml b/config/clusters/jupyter-meets-the-earth/enc-staging.secret.values.yaml new file mode 100644 index 0000000000..0ee111110e --- /dev/null +++ b/config/clusters/jupyter-meets-the-earth/enc-staging.secret.values.yaml @@ -0,0 +1,21 @@ +basehub: + jupyterhub: + hub: + config: + CILogonOAuthenticator: + client_id: ENC[AES256_GCM,data:QvtyFvUBHrocXJXfoFAqFmVlUEDR9uIQhIBwHYla4rwGIvbq5XTIIJPsSvUBtWG0BesX,iv:Hn5akZ2HLeJmZEB1ZsNIRkmlrW/Vow1NrsY8YtyhmGQ=,tag:v2UpT0DMrhGswGAzEDe19g==,type:str] + client_secret: ENC[AES256_GCM,data:p7KOeQXzyXKqfMkTLiJDbTakb5RyK/tw7IDcU0vbFkEDef8c1QMt+3GzT31IkG6l3ssY5KQB2k/dp30rYfHkvtF2iHZT6c2fzf+Xt76Aigy9ldKaUug=,iv:6fYoHjzTnJyNnfdv3SFni5Qb8Sph01xtnpfvTcU7+34=,tag:ZlkF69HrvO0V/Ac/ZbfQkQ==,type:str] +sops: + kms: [] + gcp_kms: + - resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs + created_at: "2023-04-10T09:02:07Z" + enc: CiUA4OM7eMLBGhlA5MZgqOFZG8RibyQ4wuTe/WsQq4gk21whFdyOEkkALQgViLfXKg7B84WFDyDAj0Ds8xGLenys8lq9IKTa3p5vaSRe5YX47znSoraImOy906m+29cxgdJF+zXQnipg/CDpxR8+/gy4 + azure_kv: [] + hc_vault: [] + age: [] + lastmodified: "2023-04-10T09:02:08Z" + mac: ENC[AES256_GCM,data:onuoygd7R3QEyzBSFbOGvYe30gJSuTyfFXYWu7uDwX5/pnQLwbbpQaWQJZ2oLxn7cgmY8GATxcbl+Kw1A48UTY/eXUbr4FJuxnFx7A5CzDlNaLNV0z7WP97LX3aMG36XksjQGFYd5u1hQlHtTNw9Y6V7EK9d8sUIpTzO18qCKuw=,iv:CA/1WBiw6lA03Q1ouUHUc9pQonzjswtPUicvZlhKeU4=,tag:/hAPsb/b/CAWXlnDSKt4FA==,type:str] + pgp: [] + unencrypted_suffix: _unencrypted + version: 3.7.3 diff --git a/config/clusters/jupyter-meets-the-earth/enc-support.secret.values.yaml b/config/clusters/jupyter-meets-the-earth/enc-support.secret.values.yaml new file mode 100644 index 0000000000..47a0fff5ae --- /dev/null +++ b/config/clusters/jupyter-meets-the-earth/enc-support.secret.values.yaml @@ -0,0 +1,22 @@ +prometheusIngressAuthSecret: + username: ENC[AES256_GCM,data:Lxo2KWpnRyr4UZW/ZPIvMzfpI5fo49DIgPfUYBPZ1rFfCFToHT7HpH7+xdVC2Y6ldiGSt+BsNYp5Ux6r/6p4vg==,iv:0rX0TFog3mra5iNVI+zO/NH/GelxWgmh80TqsNfJ+/k=,tag:Sxe+aLvxAX1qzMXXwM7lWg==,type:str] + password: ENC[AES256_GCM,data:2T05VEs7PkCC5sJYz6MvDzuwBXsAiSLgKOUshDsqEgbCJOPYY520YHQtNphWcvCAZ7IXG3LXW5lEXKcxSuAjNg==,iv:NA3IwCBFLw28v6Egjm9AfVnT57Cku3V9WR099sodOc0=,tag:o6QVxZFY9HjfToFpPjNRuA==,type:str] +grafana: + grafana.ini: + auth.github: + client_id: ENC[AES256_GCM,data:ixnkaGcyPh3OvairUfg3IHK4xr4=,iv:W1FCPAU+0ErsmBIDTkXM2ft18gzj+78VQkhg1TfUUsU=,tag:eyFyPC9sviwgTCvbLNgunQ==,type:str] + client_secret: ENC[AES256_GCM,data:bRsWTxa6Y24jD1xNQzDz04s4+3GQTzooD8oNAK2Q/zwR7kGNSwppRg==,iv:vkcSxRlygv/1bXDXPO/U7g2LIT3RLhoH2LvxJP+GRjk=,tag:vxLCCXn9rIGR+U5Nk3t1cg==,type:str] +sops: + kms: [] + gcp_kms: + - resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs + created_at: "2023-04-07T14:54:54Z" + enc: CiUA4OM7eDkzrGm/CiVG909bBroiCREEEsmKI2zjB0G+meFA7xljEkgALQgViIqbWsISzLYa/deyIGE0bdzk+OSdJl6GerEWRCTv7hKpagMdU47oqP7yzkqVFNVBA6VlwnClxAjckUq7eAaq25FYtCM= + azure_kv: [] + hc_vault: [] + age: [] + lastmodified: "2023-04-07T14:54:56Z" + mac: ENC[AES256_GCM,data:5mzWzX5ypBA4LQmJLHQDZQYn+T4IksV4alMPhK3MPRqixPzGjym0cHeiIA4EZZ7IsZKwUrwwZeyHvsBR8zYTFnuXk48B7sCyNeFb1KMXbtSLTiFy+02dFh3BkqLWJtU/1fnLnhNw5zTHJbmc5mNceAm2aIJ2uqmYGsI2/XeJRqo=,iv:GqaEqd4byTvBvhKvn1/vOvygxizfO0Itd9kNkskVIJw=,tag:hKwiwUztcBOOFwQgB3lJjw==,type:str] + pgp: [] + unencrypted_suffix: _unencrypted + version: 3.7.3 diff --git a/config/clusters/jupyter-meets-the-earth/prod.values.yaml b/config/clusters/jupyter-meets-the-earth/prod.values.yaml new file mode 100644 index 0000000000..e284d1c8f1 --- /dev/null +++ b/config/clusters/jupyter-meets-the-earth/prod.values.yaml @@ -0,0 +1,33 @@ +basehub: + userServiceAccount: + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam::286354552638:role/jupyter-meets-the-earth-prod + jupyterhub: + hub: + config: + CILogonOAuthenticator: + oauth_callback_url: "https://jmte.2i2c.cloud/hub/oauth_callback" + ingress: + hosts: [jmte.2i2c.cloud] + tls: + - hosts: [jmte.2i2c.cloud] + secretName: https-auto-tls + # We want to keep a single m5.xlarge around for *quick* startup of + # servers for the shared use case. This is picked up by the placeholder + scheduling: + userPlaceholder: + enabled: true + replicas: 1 + resources: + requests: + # FIXME: We keep a *full* spare medium node around, is this necessary? + memory: 14G + singleuser: + extraEnv: + # FIXME: This is a *pre-existing bucket*, not the one created by + # terraform. Either import it properly into our terraform state, or + # change the bucket here. **This bucket does not empty every 7 days!** + SCRATCH_BUCKET: s3://jmte-scratch/$(JUPYTERHUB_USER) + PANGEO_SCRATCH: s3://jmte-scratch/$(JUPYTERHUB_USER) + nodeSelector: + node.kubernetes.io/instance-type: m5.xlarge diff --git a/config/clusters/jupyter-meets-the-earth/staging.values.yaml b/config/clusters/jupyter-meets-the-earth/staging.values.yaml new file mode 100644 index 0000000000..766cb4daf0 --- /dev/null +++ b/config/clusters/jupyter-meets-the-earth/staging.values.yaml @@ -0,0 +1,19 @@ +basehub: + userServiceAccount: + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam::286354552638:role/jupyter-meets-the-earth-staging + jupyterhub: + hub: + config: + CILogonOAuthenticator: + oauth_callback_url: "https://staging.jmte.2i2c.cloud/hub/oauth_callback" + ingress: + hosts: [staging.jmte.2i2c.cloud] + tls: + - hosts: [staging.jmte.2i2c.cloud] + secretName: https-auto-tls + + singleuser: + extraEnv: + # This bucket is created via terraform. + SCRATCH_BUCKET: s3://jupyter-meets-the-earth-staging-scratch/$(JUPYTERHUB_USER) diff --git a/config/clusters/jupyter-meets-the-earth/support.values.yaml b/config/clusters/jupyter-meets-the-earth/support.values.yaml new file mode 100644 index 0000000000..a8c3990489 --- /dev/null +++ b/config/clusters/jupyter-meets-the-earth/support.values.yaml @@ -0,0 +1,42 @@ +prometheusIngressAuthSecret: + enabled: true + +cluster-autoscaler: + enabled: true + autoDiscovery: + clusterName: jupyter-meets-the-earth + awsRegion: us-west-2 + +grafana: + grafana.ini: + server: + root_url: https://grafana.jmte.2i2c.cloud/ + auth.github: + enabled: true + allowed_organizations: 2i2c-org + ingress: + hosts: + - grafana.jmte.2i2c.cloud + tls: + - secretName: grafana-tls + hosts: + - grafana.jmte.2i2c.cloud + +redirects: + rules: + # These are currently managed from https://console.cloud.google.com/net-services/dns/zones/jupytearth-org/details?project=domains-sos + - from: hub.jupyterearth.org + to: jmte.2i2c.cloud + - from: staging.hub.jupyterearth.org + to: staging.jmte.2i2c.cloud + +prometheus: + server: + ingress: + enabled: true + hosts: + - prometheus.jmte.2i2c.cloud + tls: + - secretName: prometheus-tls + hosts: + - prometheus.jmte.2i2c.cloud diff --git a/eksctl/jupyter-meets-the-earth.jsonnet b/eksctl/jupyter-meets-the-earth.jsonnet new file mode 100644 index 0000000000..735238bf4a --- /dev/null +++ b/eksctl/jupyter-meets-the-earth.jsonnet @@ -0,0 +1,167 @@ +/* + This file is a jsonnet template of a eksctl's cluster configuration file, + that is used with the eksctl CLI to both update and initialize an AWS EKS + based cluster. + + This file has in turn been generated from eksctl/template.jsonnet which is + relevant to compare with for changes over time. + + To use jsonnet to generate an eksctl configuration file from this, do: + + jsonnet jupyter-meets-the-earth.jsonnet > jupyter-meets-the-earth.eksctl.yaml + + References: + - https://eksctl.io/usage/schema/ +*/ +local ng = import "./libsonnet/nodegroup.jsonnet"; + +// place all cluster nodes here +local clusterRegion = "us-west-2"; +local masterAzs = ["us-west-2a", "us-west-2b", "us-west-2c"]; +local nodeAz = "us-west-2a"; + +// Node definitions for notebook nodes. Config here is merged +// with our notebook node definition. +// A `node.kubernetes.io/instance-type label is added, so pods +// can request a particular kind of node with a nodeSelector +local notebookNodes = [ + { instanceType: "m5.xlarge" }, + { instanceType: "m5.4xlarge" }, + { instanceType: "m5.16xlarge" }, + { instanceType: "x1.16xlarge" }, + { + instanceType: "g4dn.xlarge", minSize: 0, + tags+: { + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" + }, + taints+: { + "nvidia.com/gpu": "NoSchedule" + } + }, + { + instanceType: "g4dn.4xlarge", minSize: 0, + tags+: { + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" + }, + taints+: { + "nvidia.com/gpu": "NoSchedule" + } + }, + { + instanceType: "g4dn.16xlarge", minSize: 0, + taints+: { + "nvidia.com/gpu": "NoSchedule" + }, + tags+: { + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" + } + }, +]; +local daskNodes = [ + // Node definitions for dask worker nodes. Config here is merged + // with our dask worker node definition, which uses spot instances. + // A `node.kubernetes.io/instance-type label is set to the name of the + // *first* item in instanceDistribution.instanceTypes, to match + // what we do with notebook nodes. Pods can request a particular + // kind of node with a nodeSelector + { instancesDistribution+: { instanceTypes: ["m5.large"] }}, + { instancesDistribution+: { instanceTypes: ["m5.4xlarge"] }}, + { instancesDistribution+: { instanceTypes: ["m5.16xlarge"] }}, +]; + + +{ + apiVersion: 'eksctl.io/v1alpha5', + kind: 'ClusterConfig', + metadata+: { + name: "jupyter-meets-the-earth", + region: clusterRegion, + version: '1.25' + }, + availabilityZones: masterAzs, + iam: { + withOIDC: true, + }, + // If you add an addon to this config, run the create addon command. + // + // eksctl create addon --config-file=jupyter-meets-the-earth.eksctl.yaml + // + addons: [ + { + // aws-ebs-csi-driver ensures that our PVCs are bound to PVs that + // couple to AWS EBS based storage, without it expect to see pods + // mounting a PVC failing to schedule and PVC resources that are + // unbound. + // + // Related docs: https://docs.aws.amazon.com/eks/latest/userguide/managing-ebs-csi.html + // + name: 'aws-ebs-csi-driver', + wellKnownPolicies: { + ebsCSIController: true, + }, + }, + ], + nodeGroups: [ + ng { + name: 'core-a', + availabilityZones: [nodeAz], + ssh: { + publicKeyPath: 'ssh-keys/jupyter-meets-the-earth.key.pub' + }, + instanceType: "m5.xlarge", + minSize: 1, + maxSize: 6, + labels+: { + "hub.jupyter.org/node-purpose": "core", + "k8s.dask.org/node-purpose": "core" + }, + }, + ] + [ + ng { + // NodeGroup names can't have a '.' in them, while + // instanceTypes always have a . + name: "nb-%s" % std.strReplace(n.instanceType, ".", "-"), + availabilityZones: [nodeAz], + minSize: 0, + maxSize: 500, + instanceType: n.instanceType, + ssh: { + publicKeyPath: 'ssh-keys/jupyter-meets-the-earth.key.pub' + }, + labels+: { + "hub.jupyter.org/node-purpose": "user", + "k8s.dask.org/node-purpose": "scheduler" + }, + taints+: { + "hub.jupyter.org_dedicated": "user:NoSchedule", + "hub.jupyter.org/dedicated": "user:NoSchedule" + }, + } + n for n in notebookNodes + ] + ( if daskNodes != null then + [ + ng { + // NodeGroup names can't have a '.' in them, while + // instanceTypes always have a . + name: "dask-%s" % std.strReplace(n.instancesDistribution.instanceTypes[0], ".", "-"), + availabilityZones: [nodeAz], + minSize: 0, + maxSize: 500, + ssh: { + publicKeyPath: 'ssh-keys/jupyter-meets-the-earth.key.pub' + }, + labels+: { + "k8s.dask.org/node-purpose": "worker" + }, + taints+: { + "k8s.dask.org_dedicated" : "worker:NoSchedule", + "k8s.dask.org/dedicated" : "worker:NoSchedule" + }, + instancesDistribution+: { + onDemandBaseCapacity: 0, + onDemandPercentageAboveBaseCapacity: 0, + spotAllocationStrategy: "capacity-optimized", + }, + } + n for n in daskNodes + ] else [] + ) +} diff --git a/eksctl/ssh-keys/jupyter-meets-the-earth.key.pub b/eksctl/ssh-keys/jupyter-meets-the-earth.key.pub new file mode 100644 index 0000000000..dc1f7c825d --- /dev/null +++ b/eksctl/ssh-keys/jupyter-meets-the-earth.key.pub @@ -0,0 +1 @@ +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQC8AvtPUWRPLXKhQc6TqE28Tsi2EGpLbFVvotZ2L4kL9SgMrgfDqIX4KEVxmzGXI3h5BCLbGhS4UkedCALdD7uwVR7X07sE93SJvTtFO4/XsJTps/lzoucDxMGq0Tt3OKjNHqeyzTUzVwEtV3GIuPoUTdkwpZd3C71726stMGEQk35gSgpmeU1chyaBC9azDS+6Uf8+3nElOZOMj34NHopW5b/KmUrWidhAO4WEbfwXIEhsZSEhSQQFX4UWxp1c+oYPhSEtom3dI/LBLqFQF1uZzoZ0+Lx+QLMp+OS8d03jOCC6mn8Det/7Pqy8QXoHcGHap1Lq3+mQeo5nhoWyD93pq0L62eJOLwBp1/xB9nfkyyJe/6wLPOmLFNG+NjXESCqk0a+ZbORlfCOseb+nwkiFqokbF3JPIOAxodigM8Relylvk6mI36UceXxDPM9DECst1a2O6g1gfl59z+38qouClL2J71Lbg6M+UyhJ9/Y1L7on1A6did2FC0lynjh107U= yuvipanda@instantaneous-authenticity.local diff --git a/eksctl/ssh-keys/secret/jupyter-meets-the-earth.key b/eksctl/ssh-keys/secret/jupyter-meets-the-earth.key new file mode 100644 index 0000000000..ee46d3c359 --- /dev/null +++ b/eksctl/ssh-keys/secret/jupyter-meets-the-earth.key @@ -0,0 +1,21 @@ +{ + "data": "ENC[AES256_GCM,data:kDf5OwDSgxLzCCyClAsVVqxOHekSnJ/xnG1AKZoITMsrUo3GZf00fgDp+nBC+QCq9JE8ll8OUJfbsqIiQc+tHGuadz4GNM/Z4Cllm7IAo7KX/dabLfvovepcZE+oaiNjj4ZWmSZc66RnLfGS4jwKoR+1yGcAaZBWCuQg/RzPtN0cT+tZaLWnDZ/jax+gBq42miXnaRhO0gBlLBtywVY0IH19ptIHY+6b4sgJTRBSpTXOD3bPM3RBS2oLA6ohy4yZt+BZceftqL5JdFzOCb0PLgzALbwXGxdFZmjyRKXYgpLs/X6pAJMn2PTWI4U5+mewu5nWELPXDNkMK/A2VzsSNsoT8YltYv3msdtxijo56d6KlPMVR7Nd3lYSN8EWK0AOuqBqYkwOT2o+HF0rJ2ipWiHrxCNpuCzYtmHWhQRah3ewHX3c6V5LAeSKi4lYe4fwfQk4StctZBwxmYxhDULm8G5c32bq5X+b6Za/LndIYM8qBcb1d5JKXKiNPaFFKkc96xzgm5bqJWtSwOnHbrpg6GR82d2HEewfvBnHVlbEL9t6ETea8L0auTLhOawUKVGq2sZ/bAZWa2ezNRFvR3tlw3ytgNYdJ6N3nI0W/oArxxnFx5sM9QhoFtQKVHv9mXYoZ7cP3PqXJWMKwsJL7vfOz8W9Av4RCepbRudq52/eLTMsNJtoKxt9S/FDlToNMrk29BVjD7eq2Kk6rMr0bVGGbr0pmPcoMUjdhXFNtQxzNQNPirgamT/P2Hr2E8VhJKL3BVKpFR8TCdrExmiB/DLyNRaSMprlCKHFrB4xNby/HM/0LzK1ksPKKe7XNhnN8PQRMKsWPaBMXPR3deK9Ibg4C29ixgZtUmf8UQIY2mEdSXsUmytnoS61Te8/CiAKUtC8WJQjmM6QJKX3tt9GGbehwKr1GAx2V/hyTPokio7lpEibCqmOQJYVoYempeSFoWqMb/BhH6FqKYGHWrWGQrMrh1Gi965o7DDKqDGGiFojbaIUUp4DKhT/9Jc5dfnDP3N66N9IdFqZhNhj2O2VkXvkdIY0H4i7ZiBKueOH28nh7t7yynpQQ0DDin4anVJnNaVIIFQz5HIJtRFOmDteIw5Ec62Pz2rny3Pf6epZPfzVwEzcjG4P+iGWZzhHfm7144NtGZKfKYn68tzQJ3HbqsmsyZ/wPJwJE2y54i6GZVCTE+T4g2uY3yIOOlWdXFxrfoHkkEJ+eJVjLi8U+3YauG3NsEcD14yIXPdKHNL3TDaQBs82iMexq+3aNRnwziqy4AGO4n7N3AGu/546eaWigURDcXdsFhDObW+ptnxqO8lXXQ/+Lt5zTdfje7yXJH9d6XRxutMEdZxTYcAsjjv+j/LyXjqv2Mtc6C8tVfW6WKAnQipxPXuieG+8oL64V0hEwQycQ8j2VV2GhTzKluUPWbTyRhJVtyhf5/wUK/zbT0TFhriYuTQio/EMqMylAt81PPPnxjHE4PsHdVqAyugaxN6guACMQHnVspLLT1F3UcNP+zEy4aWFsNN9zGE2yks6UZ2XivNbAvBYJY/BDyroNX/S9uNvKaBHNA+DGoZAn8uWKLyN+cyfOl+XwezOdlWNfKkITuv4bKzRF0avqpH7nRGiKhvwsbY/3bu3ziQyM82ULesqQ5AUHZFYknV4kfeGEzclKWbdIe9Sj8/QXZqg7U3hUzGkffkB0KPWUT1MuIg8xICWAYaTT7Q1ciJ+wMd6tRdMlWy6pp0RJEnCuMW8HVSzcGWajZpIzQvpm9eg/yz8Ak4n2mqehveic5EluzfgKqSoNW/rw5VPyCVbbsyZKle3m69wITozEKnvD7Og0Q0J4Q/1Cfb6LTiKlgASN51YibR/JNG+8A6cz52Q3MEV2vpSdPJNRo+npE7MlyLRPRDGghrC6Gp4e9BnHRMzT9bNJ0R802/Y4sRXdO9V554e8KpoidgsovS4tMWYzOiDsc3BzIi9SL/ceGM73b+M/CQsNzLGek6wGLOa/eZVca+JwIy5KpSXtYZ5EuPpOihx5dwCoIyMU4ooSN8pSMFNk/J1podoh6K4D86R7VfmLM/S2B9Howj1K/HksjnnsONf+lSNa4q4yIy3avIjLENQEmhm92vcWX2/nD9gccUSqOvSu1mAY/J7wcVa+LzTXUTJ2PqeR91uHAyCP0AhIFMmc0+0RyCKsXAJkuNCncqEBbQZJiJa0zkzf2vArvqreF790ACQBVP+wl5zKcjMRIHM/6eTXjPqUvzPK5ORFhn9rj9ecWAOJ3+K64Qk8KDwwqKuqqdGfpITOtmY8+L8LyKMiYD1v8kA3ug9kRMrjHPwlzjksID9wCH9E71zccy9+xwTosnzVSV7AvoQ0+WH0ThgR3zu8SN80h9tsI0qkZDDcK5cQmN+uwgjhUKt+VrPUuq3Z8XXVp0kU0Jxxak0Fhpri8Veu7u00550R5MXMgc7hpBpkgLFfGp+9FNyQBr9ZcNAxa3fshrsLeDHcH5J6z/5PFmn9GEAU+vOXCauwUH+/TpHql7Wkpvr+dHkSewc0eF+b6YDGDSy68OtwolcQqrMUpVkVpsZGvOr9/xq4XQYTNxYPLshRw2sndonsLo9GF1ik90TlbxQidsO1t6szF376ckNZPDalnP5g332pRE9LVHZ+MkoQ7OVv7UgcHxVuH+dZ8Qi+8fEhPY99B8MFsovStl6vxkMIoY4WV2thsWw+TtivlhxeJyHTZ3vslmU1m/Aw9ndkQLZQ3/6NW22BGWiMeT6Y/H5a8TAAtmE4MjNPfFmTSBlRvv8adueNuQEUnSjq9ULHzNFw8MPmQeGmPvs6oRAH7q7PXsmpcLK3nw55K8eZxQ1txIpzi2+UH/b7/Iztcy4g0J6C2s9VkDhXPakSmTp1LEr+I4x1pzZL/O8Vnn74FeOBzKgifh89kvg4Kc634o5ABBfJ88cbfASONa5DdCkSn/pc7+RO5f9+jDPGMttARMa4aVWbjW3a3OjwnlAYAFYrD/vgoitzBsp3d7AxH7rfhB5F3G8e5o7B/0/jyZmXuGzXYQQAOxknay9J0RcmLJSgKDlB64fw1jimEznjJJqAC41ZNWRi8ao+W9ngabSXPUXXW6fb48VE63X2vvWq0JoECQX1j8gJk3bImdsawyC1qdg33wrQgoQtFHvh5Bysj4d6MpOnszeGNRCHBvxtogQnmdB0JhqCPeBB0YMTdNUbuzrfDCe9nye+xtA1kRTCsiRYDzMOOL+1tKg+FVcIegqYz4OsQtArGcgJ9hacAn6ZMeADqBEH2oXcsNmAyZTBe72lJRi/afYB2NiRfnFb7N1GVWU/JuIWLs4bCTvsGUCHxJpFnSwONdkoiGmIXC+/2ysYvt/Q5/YQAN7eYLuTrWlwN14Wm3uJ5NDxtJiniiRFzXA2lcI+EnvOTxOXptk/mHucPmLiE6xeIyjm4X8KseL2evEAgTiVDDsxy6TyC2ilvCK3pYmYT33Gqm0hiFaAdB3+n8FRMQ9Ui2cm/h3fuDtlwakfakN0CK13mcZYg==,iv:h1imyue9hbKmn/LtIxz2+K+Klk4jvGcH0zugLVML48w=,tag:BbeYm5LB5F6mkhCkYloNHg==,type:str]", + "sops": { + "kms": null, + "gcp_kms": [ + { + "resource_id": "projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs", + "created_at": "2023-04-06T18:08:49Z", + "enc": "CiUA4OM7eBjO24ik1UIlDPMkCRsVpX42Spew7rnZabjlgB8Nm3G5EkkALQgViJ3ABfw/reWPwGMqQ3JJtAJdPeLoMvRpZrPbrWKQnh1kAgqqBXQamtPcwVWcCg7xLcWeDuSN/xaCxO6h3BNakaHm0/15" + } + ], + "azure_kv": null, + "hc_vault": null, + "age": null, + "lastmodified": "2023-04-06T18:08:50Z", + "mac": "ENC[AES256_GCM,data:t2bp3m/uU7u5itygUdqJTClg/KX7P98YFoXpuFSeMF5k+Z6O7YLkhXW2X2S/4IEzVLFSAyuCIg+5V20W5VQx64kKnzN4dh3qs8RfzNM5jfIhg0iorpFhWnBv21KUZvYUr1loi2JLJKzXI1YPDIIevef9Szk0tLNsVf08f2b0rT4=,iv:1w6ml0QMsRZ3gan62UoAE5OPXi144DLAD9NqO3MnL2M=,tag:WrdcS9FvGRXqSUMNkXNKCA==,type:str]", + "pgp": null, + "unencrypted_suffix": "_unencrypted", + "version": "3.7.3" + } +} \ No newline at end of file diff --git a/terraform/aws/projects/jupyter-meets-the-earth.tfvars b/terraform/aws/projects/jupyter-meets-the-earth.tfvars new file mode 100644 index 0000000000..3be6942768 --- /dev/null +++ b/terraform/aws/projects/jupyter-meets-the-earth.tfvars @@ -0,0 +1,54 @@ +region = "us-west-2" + +cluster_name = "jupyter-meets-the-earth" + +cluster_nodes_location = "us-west-2a" + +user_buckets = { + "scratch-staging": { + "delete_after" : 7 + }, + "scratch": { + "delete_after": 7 + }, +} + + +hub_cloud_permissions = { + "staging" : { + requestor_pays: true, + bucket_admin_access: ["scratch-staging"], + # FIXME: Previously, users were granted full S3 permissions. + # Keep it the same for now + extra_iam_policy: <<-EOT +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": ["s3:*"], + "Resource": ["arn:aws:s3:::*"] + } + ] +} +EOT + }, + "prod" : { + requestor_pays: true, + bucket_admin_access: ["scratch"], + # FIXME: Previously, users were granted full S3 permissions. + # Keep it the same for now + extra_iam_policy: <<-EOT +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": ["s3:*"], + "Resource": ["arn:aws:s3:::*"] + } + ] +} +EOT + }, +} From 430131abe55f7ad7a4e47297ccb0795e1d78f89d Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Tue, 11 Apr 2023 01:41:49 +0530 Subject: [PATCH 2/6] Cleanup config a little --- .../common.values.yaml | 33 ++++--------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/config/clusters/jupyter-meets-the-earth/common.values.yaml b/config/clusters/jupyter-meets-the-earth/common.values.yaml index b8d540ec61..e4e09d003c 100644 --- a/config/clusters/jupyter-meets-the-earth/common.values.yaml +++ b/config/clusters/jupyter-meets-the-earth/common.values.yaml @@ -1,12 +1,7 @@ basehub: nfs: - # enabled is adjusted by staging/prod values - # enabled: true - shareCreator: - enabled: true pv: - serverIP: fs-01707b06.efs.us-west-2.amazonaws.com - # mountOptions from https://docs.aws.amazon.com/efs/latest/ug/mounting-fs-nfs-mount-settings.html + # from https://docs.aws.amazon.com/efs/latest/ug/mounting-fs-nfs-mount-settings.html mountOptions: - rsize=1048576 - wsize=1048576 @@ -14,11 +9,8 @@ basehub: - soft # We pick soft over hard, so NFS lockups don't lead to hung processes - retrans=2 - noresvport - # baseShareName is required to be just "/" so that we can create - # various sub folders in the filesystem that our PV to access the - # NFS server can reference successfully as it isn't supported to - # access a not yet existing folder. This creation is automated by - # the nfs-share-creator resource part of the basehub Helm chart. + serverIP: fs-01707b06.efs.us-west-2.amazonaws.com + # This is different from rest of our hubs! baseShareName: / jupyterhub: @@ -40,9 +32,8 @@ basehub: url: https://jupytearth.org singleuser: - # extraFiles ref: https://zero-to-jupyterhub.readthedocs.io/en/latest/resources/reference.html#singleuser-extrafiles extraFiles: - jupyter_notebook_config.json: + jupyter_server_config.json: mountPath: /etc/jupyter/jupyter_notebook_config.json data: # Allow jupyterlab option to show hidden files in browser @@ -50,7 +41,8 @@ basehub: ContentsManager: allow_hidden: true initContainers: - # Need to explicitly fix ownership here, since EFS doesn't do anonuid + # Need to explicitly set this up and copy what's in basehub/values.yaml + # as we have an extra 'shared-public' directory here. - name: volume-mount-ownership-fix image: busybox command: @@ -77,7 +69,6 @@ basehub: # # How to: https://stackoverflow.com/questions/46085748/define-size-for-dev-shm-on-container-engine/46434614#46434614 # Request for this by Ellie: https://fperezgroup.slack.com/archives/C020XCEFPEH/p1658168872788389 - # storage: extraVolumes: - name: dev-shm @@ -260,18 +251,6 @@ dask-gateway: memory: request: 2G limit: 500G - extraPodConfig: - nodeSelector: - hub.jupyter.org/node-purpose: user - k8s.dask.org/node-purpose: null - # serviceAccountName is adjusted by staging/prod values - # serviceAccountName: *user-sa - worker: - extraPodConfig: - nodeSelector: - k8s.dask.org/node-purpose: worker - # serviceAccountName is adjusted by staging/prod values - # serviceAccountName: *user-sa # Note that we are overriding options provided in 2i2c's helm chart that has # default values for these config entries. From 9809dda495a007eb40317b85430c2f12e7f35cba Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Tue, 11 Apr 2023 13:20:08 +0530 Subject: [PATCH 3/6] Fix domain names for JMTE hubs --- config/clusters/jupyter-meets-the-earth/cluster.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/clusters/jupyter-meets-the-earth/cluster.yaml b/config/clusters/jupyter-meets-the-earth/cluster.yaml index ab6a76f8db..6b4692077a 100644 --- a/config/clusters/jupyter-meets-the-earth/cluster.yaml +++ b/config/clusters/jupyter-meets-the-earth/cluster.yaml @@ -11,7 +11,7 @@ support: - enc-support.secret.values.yaml hubs: - name: staging - domain: staging.hub.jupytearth.org + domain: staging.jmte.2i2c.cloud helm_chart: daskhub helm_chart_values_files: - common.values.yaml @@ -19,7 +19,7 @@ hubs: - enc-staging.secret.values.yaml - name: prod display_name: "Jupyter Meets the Earth" - domain: hub.jupytearth.org + domain: jmte.2i2c.cloud helm_chart: daskhub helm_chart_values_files: - common.values.yaml From 8cc8451afe2f21628489e44b1b0640700569da78 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Tue, 11 Apr 2023 13:21:36 +0530 Subject: [PATCH 4/6] Add comment with signing link to AWS console --- config/clusters/jupyter-meets-the-earth/cluster.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/clusters/jupyter-meets-the-earth/cluster.yaml b/config/clusters/jupyter-meets-the-earth/cluster.yaml index 6b4692077a..6e93a00caf 100644 --- a/config/clusters/jupyter-meets-the-earth/cluster.yaml +++ b/config/clusters/jupyter-meets-the-earth/cluster.yaml @@ -1,5 +1,5 @@ name: jupyter-meets-the-earth -provider: aws +provider: aws # https://286354552638.signin.aws.amazon.com/console aws: key: enc-deployer-credentials.secret.json clusterType: eks From 675bee1990e3bf403b9ef40c5262685c350f0ce8 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Tue, 11 Apr 2023 13:24:28 +0530 Subject: [PATCH 5/6] Fix typo in domain redirect --- config/clusters/jupyter-meets-the-earth/support.values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/clusters/jupyter-meets-the-earth/support.values.yaml b/config/clusters/jupyter-meets-the-earth/support.values.yaml index a8c3990489..db8e8ed15a 100644 --- a/config/clusters/jupyter-meets-the-earth/support.values.yaml +++ b/config/clusters/jupyter-meets-the-earth/support.values.yaml @@ -25,9 +25,9 @@ grafana: redirects: rules: # These are currently managed from https://console.cloud.google.com/net-services/dns/zones/jupytearth-org/details?project=domains-sos - - from: hub.jupyterearth.org + - from: hub.jupytearth.org to: jmte.2i2c.cloud - - from: staging.hub.jupyterearth.org + - from: staging.hub.jupytearth.org to: staging.jmte.2i2c.cloud prometheus: From 3a4ea7e2c754615540e7d8d2b85d8c5b8352bd78 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Tue, 11 Apr 2023 13:45:34 +0530 Subject: [PATCH 6/6] Support more than one redirect being set on a cluster --- helm-charts/support/templates/redirects.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/helm-charts/support/templates/redirects.yaml b/helm-charts/support/templates/redirects.yaml index b0171c3987..a1e7852fb1 100644 --- a/helm-charts/support/templates/redirects.yaml +++ b/helm-charts/support/templates/redirects.yaml @@ -6,14 +6,16 @@ metadata: cert-manager.io/cluster-issuer: letsencrypt-prod nginx.ingress.kubernetes.io/server-snippet: | return {{ .code | default 302 }} $scheme://{{ .to }}$request_uri; - name: ingress-redirect + name: ingress-redirect-{{ .from }} spec: ingressClassName: nginx tls: - hosts: - {{ .from }} - # .from is a DNS name, but dots aren't allo secretName: redirect-{{ .from }}-tls rules: - host: {{ .from }} +# This object separator is required, as we generate multiple k8s +# objects from this one file. +--- {{ end }}