diff --git a/.github/workflows/deploy-grafana-dashboards.yaml b/.github/workflows/deploy-grafana-dashboards.yaml index 1fded0d9ab..de85e9ce99 100644 --- a/.github/workflows/deploy-grafana-dashboards.yaml +++ b/.github/workflows/deploy-grafana-dashboards.yaml @@ -15,6 +15,7 @@ jobs: - cluster_name: 2i2c-aws-us - cluster_name: 2i2c-uk - cluster_name: awi-ciroh + - cluster_name: bican - cluster_name: catalystproject-africa - cluster_name: catalystproject-latam - cluster_name: cloudbank diff --git a/config/clusters/bican/cluster.yaml b/config/clusters/bican/cluster.yaml new file mode 100644 index 0000000000..8f3055c1db --- /dev/null +++ b/config/clusters/bican/cluster.yaml @@ -0,0 +1,12 @@ +name: bican +provider: aws # https://2i2c.awsapps.com/start#/ +aws: + key: enc-deployer-credentials.secret.json + clusterType: eks + clusterName: bican + region: us-east-2 +support: + helm_chart_values_files: + - support.values.yaml + - enc-support.secret.values.yaml +hubs: [] diff --git a/config/clusters/bican/enc-deployer-credentials.secret.json b/config/clusters/bican/enc-deployer-credentials.secret.json new file mode 100644 index 0000000000..f9932f7732 --- /dev/null +++ b/config/clusters/bican/enc-deployer-credentials.secret.json @@ -0,0 +1,25 @@ +{ + "AccessKey": { + "AccessKeyId": "ENC[AES256_GCM,data:DeO+KCrRK5prA+MwdYH259ZoE30=,iv:wHbKxC+nj15O6Fk4W/RYxePl1ZIhN5IiTUotCCD8RU0=,tag:RW73gvM38ZVzM+uRxMNxeA==,type:str]", + "SecretAccessKey": "ENC[AES256_GCM,data:dWhFaB2jqM1/lN4T/vsLnUI4JZ2aEvKltHz1yELlHQYZocZwe9BuKQ==,iv:OaqLfhzgyJi+XlTxyNVwF7oofk2bxEL4w/vkqoIGBJw=,tag:4SUngOLKzXJKiU+RnvJB/A==,type:str]", + "UserName": "ENC[AES256_GCM,data:SWPTr2vK0g9duv/PJnGP6+yEhajuSHE=,iv:uzCudF/Jns0rkZeTFgF3TzfAdHj+uzIaRTddxjZkIQY=,tag:TicuD0Gy4DlS1cs3v+34eg==,type:str]" + }, + "sops": { + "kms": null, + "gcp_kms": [ + { + "resource_id": "projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs", + "created_at": "2024-03-25T16:18:28Z", + "enc": "CiUA4OM7eI62pdzV0VlvcAweLwu3y+/mVg+D/IxPHr0JdwvDieADEkkAXoW3JvyeLJ3uoPtSm7/C3hjB9a+fehCUe/eA/2GgL4GCsWNWsCcIh8wftPsXPqVJn10DoUF/zjmiGaYTrxlsArVXBciGHip9" + } + ], + "azure_kv": null, + "hc_vault": null, + "age": null, + "lastmodified": "2024-03-25T16:18:28Z", + "mac": "ENC[AES256_GCM,data:FE0dsSHtAjhgJjtUrHhax/zP7Rb6E50XjN25kFOFmDBos5He7R5F1ibIZ+iy/WAu3BRgs1IZIc+egfK1GHihvvmmAv1gPGf/KCjtxHmVm2AWRSzht76Z54Lp65z9yLwh7nvTgyI/KdeVgmfFQvbcZsdMugg+s5JnRZ1CKdcSLM4=,iv:xzSgeWcIqZJ+S1VvcX4V3Baeve3HbPL6YDClCn2UFPY=,tag:tfVCPiUb8Jg0yYZNqRejig==,type:str]", + "pgp": null, + "unencrypted_suffix": "_unencrypted", + "version": "3.8.1" + } +} \ No newline at end of file diff --git a/config/clusters/bican/enc-grafana-token.secret.yaml b/config/clusters/bican/enc-grafana-token.secret.yaml new file mode 100644 index 0000000000..769ce05714 --- /dev/null +++ b/config/clusters/bican/enc-grafana-token.secret.yaml @@ -0,0 +1,15 @@ +grafana_token: ENC[AES256_GCM,data:ycdgNGt3VqcyJGKh4F51r1E9fn53GIHhOXwF5/mCoMeGgrWZg9qNcrgpUyAzmg==,iv:qZ8j+D2xJuEd2p+1pv7KYm1txAAiOB2RwqIP8ErCzM0=,tag:ujAmi9YozXMiux9yU20OHQ==,type:str] +sops: + kms: [] + gcp_kms: + - resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs + created_at: "2024-03-25T17:00:00Z" + enc: CiUA4OM7eOhriXoULTcORAf79SP0MiE+/8E7mpYi/Fld/7pHr/64EkkAXoW3JiccZbzRvAC1iWH3vgaunmsL7V8GS8bSYJpR7XoFSSyXB5aQijOObgcQGEIAAH7L3MIJrOxV4eW9f3t+fN9I1QWGy1+6 + azure_kv: [] + hc_vault: [] + age: [] + lastmodified: "2024-03-25T17:00:00Z" + mac: ENC[AES256_GCM,data:n7XUcuVcfgX37zg+ZLt5w8p9Ct5605laTBeaK809+UktFoUokByaFB5+x6/S+OZN08Q1A+fgF2E6TXNUavN+qgUNOiNal5FvQZr9HKPofgDPIWJi72851Fy1Q6rVYou7iSPC3f1oHMfh5ilPR0GmaHjQny4W+r3p88rLO6dtP/M=,iv:qSeOy1AKNAQt2hozBNGeeSCRW6JvXz2oB4IVd9uWfzo=,tag:7y8GOuq03ZI7LlTQdWpPqQ==,type:str] + pgp: [] + unencrypted_suffix: _unencrypted + version: 3.8.1 diff --git a/config/clusters/bican/enc-support.secret.values.yaml b/config/clusters/bican/enc-support.secret.values.yaml new file mode 100644 index 0000000000..6df6ee3860 --- /dev/null +++ b/config/clusters/bican/enc-support.secret.values.yaml @@ -0,0 +1,22 @@ +prometheusIngressAuthSecret: + username: ENC[AES256_GCM,data:GzcSEBqXXvY2bxnAHq7NFM8T7OnkVdJqNdMHV6P57kjwpvQGhTBI0upMzYuWejD8OVpYgK+gywNUwrUIsIw+kw==,iv:coBbRSe2gdi8vqV5uaWDBm9oS16hEZR4Je3cSD/E7tE=,tag:4563alICoKwoXbok/h3/mQ==,type:str] + password: ENC[AES256_GCM,data:tp/AfO5x4oFV+9YiO42uyyAeJRGFPbJHB2Gtak7n77fWZuA+g1w2mpjG9UKoPcTmJXp6pv79+Clc03CeqNfXPQ==,iv:rzIYpQ4YI66CWDwm/Lc3EIxkJrz1mp+J/+SYtVZQK8k=,tag:Ho7pAmyS5fmDkqK22WvDOQ==,type:str] +grafana: + grafana.ini: + auth.github: + client_id: ENC[AES256_GCM,data:r0girp4lkSsFJxT9RYkNqATagK0=,iv:rUF0925+HkU4d3xRbsgcq8az+aLSgW2kbNGg06ZR504=,tag:/kZxOX/ZNRrfA62vdAWIZg==,type:str] + client_secret: ENC[AES256_GCM,data:EysT8taieKuhc76qFFqUo9qxoeYxofRcOo1hLrDQ4aBsKH6zIOAH5Q==,iv:31LdaFhwg7Kkn1DvyFxnUFoWDRBYlchwMeFdm819uc0=,tag:aKt+2mdtcA4uO6yiJFsPXw==,type:str] +sops: + kms: [] + gcp_kms: + - resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs + created_at: "2024-03-25T16:58:36Z" + enc: CiUA4OM7eMjNppdOUODfRkFuLa6+B4w/pOw4QFFBwysbGYfDrxVhEkkAXoW3JnUAhPZeVwdvizCgUdBQpIH4O4V7SqdCmNYy+chPbvDkiyhOefoVVg7FrU4nHr9xSwhCsXVO16GOOdEukbrxL9YUiD7O + azure_kv: [] + hc_vault: [] + age: [] + lastmodified: "2024-03-25T16:58:36Z" + mac: ENC[AES256_GCM,data:aLnsOYFwKqn97RsoHcbeoBdbQMV3djowqgaN1xWsPMhMEoyEFt4AyoFe8ZOrD2LFfPdCS7C9liGv93jkLY9HuIsoCtELxFUdTQzClBU7rzqspMDA+XQy4hGSJ+0mhg/JaxtBZ9NBuy19m2pBGM55V1nZPVzZNSTxWi/9EtGQDOY=,iv:jtdvEWfW3gqeLl/duh+vGIgFrCzWa+sBHwlcWRAmEJo=,tag:gVrCUxenY1REHnjepCY4lQ==,type:str] + pgp: [] + unencrypted_suffix: _unencrypted + version: 3.8.1 diff --git a/config/clusters/bican/support.values.yaml b/config/clusters/bican/support.values.yaml new file mode 100644 index 0000000000..372d4e6ae3 --- /dev/null +++ b/config/clusters/bican/support.values.yaml @@ -0,0 +1,34 @@ +prometheusIngressAuthSecret: + enabled: true + +prometheus: + server: + ingress: + enabled: true + hosts: + - prometheus.bican.2i2c.cloud + tls: + - secretName: prometheus-tls + hosts: + - prometheus.bican.2i2c.cloud + +grafana: + grafana.ini: + server: + root_url: https://grafana.bican.2i2c.cloud/ + auth.github: + enabled: true + allowed_organizations: 2i2c-org + ingress: + hosts: + - grafana.bican.2i2c.cloud + tls: + - secretName: grafana-tls + hosts: + - grafana.bican.2i2c.cloud + +cluster-autoscaler: + enabled: true + autoDiscovery: + clusterName: bican + awsRegion: us-east-2 diff --git a/eksctl/bican.jsonnet b/eksctl/bican.jsonnet new file mode 100644 index 0000000000..9ecdd82a59 --- /dev/null +++ b/eksctl/bican.jsonnet @@ -0,0 +1,149 @@ +/* + This file is a jsonnet template of a eksctl's cluster configuration file, + that is used with the eksctl CLI to both update and initialize an AWS EKS + based cluster. + + This file has in turn been generated from eksctl/template.jsonnet which is + relevant to compare with for changes over time. + + To use jsonnet to generate an eksctl configuration file from this, do: + + jsonnet bican.jsonnet > bican.eksctl.yaml + + References: + - https://eksctl.io/usage/schema/ +*/ +local ng = import "./libsonnet/nodegroup.jsonnet"; + +// place all cluster nodes here +local clusterRegion = "us-east-2"; +local masterAzs = ["us-east-2a", "us-east-2b", "us-east-2c"]; +local nodeAz = "us-east-2a"; + +// Node definitions for notebook nodes. Config here is merged +// with our notebook node definition. +// A `node.kubernetes.io/instance-type label is added, so pods +// can request a particular kind of node with a nodeSelector +local notebookNodes = [ + { instanceType: "r5.xlarge" }, + { instanceType: "r5.4xlarge" }, + { instanceType: "r5.16xlarge" }, + { + instanceType: "g4dn.xlarge", + tags+: { + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" + }, + // Allow provisioning GPUs across all AZs, to prevent situation where all + // GPUs in a single AZ are in use and no new nodes can be spawned + availabilityZones: masterAzs, + }, +]; +local daskNodes = [ + // Node definitions for dask worker nodes. Config here is merged + // with our dask worker node definition, which uses spot instances. + // A `node.kubernetes.io/instance-type label is set to the name of the + // *first* item in instanceDistribution.instanceTypes, to match + // what we do with notebook nodes. Pods can request a particular + // kind of node with a nodeSelector + // + // A not yet fully established policy is being developed about using a single + // node pool, see https://github.com/2i2c-org/infrastructure/issues/2687. + // + { instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }}, +]; + + +{ + apiVersion: 'eksctl.io/v1alpha5', + kind: 'ClusterConfig', + metadata+: { + name: "bican", + region: clusterRegion, + version: "1.29", + }, + availabilityZones: masterAzs, + iam: { + withOIDC: true, + }, + // If you add an addon to this config, run the create addon command. + // + // eksctl create addon --config-file=bican.eksctl.yaml + // + addons: [ + { + // aws-ebs-csi-driver ensures that our PVCs are bound to PVs that + // couple to AWS EBS based storage, without it expect to see pods + // mounting a PVC failing to schedule and PVC resources that are + // unbound. + // + // Related docs: https://docs.aws.amazon.com/eks/latest/userguide/managing-ebs-csi.html + // + name: 'aws-ebs-csi-driver', + version: "latest", + wellKnownPolicies: { + ebsCSIController: true, + }, + }, + ], + nodeGroups: [ + ng + { + namePrefix: 'core', + nameSuffix: 'a', + nameIncludeInstanceType: false, + availabilityZones: [nodeAz], + ssh: { + publicKeyPath: 'ssh-keys/bican.key.pub' + }, + instanceType: "r5.xlarge", + minSize: 1, + maxSize: 6, + labels+: { + "hub.jupyter.org/node-purpose": "core", + "k8s.dask.org/node-purpose": "core" + }, + }, + ] + [ + ng + { + namePrefix: 'nb', + availabilityZones: [nodeAz], + minSize: 0, + maxSize: 500, + instanceType: n.instanceType, + ssh: { + publicKeyPath: 'ssh-keys/bican.key.pub' + }, + labels+: { + "hub.jupyter.org/node-purpose": "user", + "k8s.dask.org/node-purpose": "scheduler" + }, + taints+: { + "hub.jupyter.org_dedicated": "user:NoSchedule", + "hub.jupyter.org/dedicated": "user:NoSchedule" + }, + } + n for n in notebookNodes + ] + ( if daskNodes != null then + [ + ng + { + namePrefix: 'dask', + availabilityZones: [nodeAz], + minSize: 0, + maxSize: 500, + ssh: { + publicKeyPath: 'ssh-keys/bican.key.pub' + }, + labels+: { + "k8s.dask.org/node-purpose": "worker" + }, + taints+: { + "k8s.dask.org_dedicated" : "worker:NoSchedule", + "k8s.dask.org/dedicated" : "worker:NoSchedule" + }, + instancesDistribution+: { + onDemandBaseCapacity: 0, + onDemandPercentageAboveBaseCapacity: 0, + spotAllocationStrategy: "capacity-optimized", + }, + } + n for n in daskNodes + ] else [] + ) +} \ No newline at end of file diff --git a/eksctl/ssh-keys/bican.key.pub b/eksctl/ssh-keys/bican.key.pub new file mode 100644 index 0000000000..1969fae359 --- /dev/null +++ b/eksctl/ssh-keys/bican.key.pub @@ -0,0 +1 @@ +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAPuAmunoNk11Qr1WCfRulycEvQkbyfziEUXKxR3mUNU sgibson@Athena.local diff --git a/eksctl/ssh-keys/secret/bican.key b/eksctl/ssh-keys/secret/bican.key new file mode 100644 index 0000000000..02ea8215f2 --- /dev/null +++ b/eksctl/ssh-keys/secret/bican.key @@ -0,0 +1,21 @@ +{ + "data": "ENC[AES256_GCM,data:kII7kHiQ1oc8b49pb09PPWCEvwy5Q4+RISUGuasdIL7+FcqLT2GW/U5KHhe68ciYoobeYMMZ0KGZIcUi7dVnOK7JuMVj17Xbhnjtlz0J1WE+j0USBRwf7KDXI2IYZ/HUim4PjaUzll7lc+VGJvHpc3gNYgAidO0l4V/wpYqzDG8Pg2vqnzeONBPgGi6JXgvC/xS7YmeTRK8lJL+AUYquFCnt0K7DKFS9qq//4fgfct4NF1EzZf1wvjdCT3eIcmGxkm9z/8wLorkvQmT9rq8lKFgOknkVxLFCYmu1xOF/U2Szp4xl1jf3DAt5MFZNEe1iOdLLYs5mGc0PExbJbBY7uTRZX3t1MwAn8F5Aw35gAfaKW2pk8Lj3woFWpevkA/oNctqAW4vcD4I/NepMENu0sFHMKKFZjZ/IzOeRESQJF2hKw8beV5wGZwQSPIa0Gs7caVSBm4fVspHjOGmMbR+gVcjNezZ4+1P56IC9QqaR2Fksx3Js39v8IRjkFQJSPd6KeboTdCif01PZTr4IFPC/G15mnc9iMedL+XxO,iv:jORG54pmCYaxjkiJBvEdqDatv7jPc+3Y/y3EXtBuuOg=,tag:5FRS/BDxyNmBn5uCM3NRHQ==,type:str]", + "sops": { + "kms": null, + "gcp_kms": [ + { + "resource_id": "projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs", + "created_at": "2024-03-25T15:49:18Z", + "enc": "CiUA4OM7eDXDtAIuBO2G1PQUO/+z63OzqWASErrdn4hJXKUwOAoaEkkAXoW3Jrow6VFVyfdvrPtVgv13BIJ0lStrIiu4RjceEh/FHH8J2V9Bdx4uVEm2wxNArKjLEEmwIO+TcA9+9bTQmQNh6B5ZwFZ8" + } + ], + "azure_kv": null, + "hc_vault": null, + "age": null, + "lastmodified": "2024-03-25T15:49:18Z", + "mac": "ENC[AES256_GCM,data:yu9fpvR48VhcnfODFTxeEqhqyJ9+fYF1sVFzED/Ej22XL1Lz2zIPHZtIu/sv5fwr82vWo5ku5/Ld+MMR3xrVL5M1+unRpPEVJRXpUcknssug0X+rHQ4/PcemamElIF77Ra82ezxkMEgGak2vsDn5v7UqMPeGDqjwhKlqtXxRnb0=,iv:AoXpaQhhuCOCDirK+yA8xL4VijcbmFPBtHClATGBh+A=,tag:KH6Czg6OBNQQuboqIaXUYw==,type:str]", + "pgp": null, + "unencrypted_suffix": "_unencrypted", + "version": "3.8.1" + } +} \ No newline at end of file diff --git a/terraform/aws/projects/bican.tfvars b/terraform/aws/projects/bican.tfvars new file mode 100644 index 0000000000..0b0c92785b --- /dev/null +++ b/terraform/aws/projects/bican.tfvars @@ -0,0 +1,26 @@ +region = "us-east-2" + +cluster_name = "bican" + +cluster_nodes_location = "us-east-2a" + +user_buckets = { + "scratch-staging" : { + "delete_after" : 7 + }, + "scratch" : { + "delete_after" : 7 + }, +} + + +hub_cloud_permissions = { + "staging" : { + bucket_admin_access : ["scratch-staging"], + extra_iam_policy : "" + }, + "prod" : { + bucket_admin_access : ["scratch"], + extra_iam_policy : "" + }, +} \ No newline at end of file