diff --git a/deploy/backup/backup_from_volume.sh b/deploy/backup/backup_from_volume.sh new file mode 100644 index 0000000..1ebda80 --- /dev/null +++ b/deploy/backup/backup_from_volume.sh @@ -0,0 +1,85 @@ +#!/usr/bin/#!/usr/bin/env bash + +project=rhg-project-1 +zone=us-west1-a +namespace=rhodium-jupyter + +# create a google compute instance in the same zone & project as the +# cluster. the below commands assume you're running ubuntu... so... that's +# preferable +instance=mikes-crazy-solo-instance-please-dont-let-this-run-past-jan2020 + +gcs_backup_dir=gs://compute-rhg-backups/test-manual-backups-$(date +%F) + +token_file=rhg-project-1-compute-rhg-backup-manager.json + +# copy our gcloud service account token to the instance +gcloud compute scp -q $token_file $instance:~/ > /dev/null + +gcloud compute ssh --zone $zone $instance -- bash -c "echo && "\ +"sudo apt-get update -qq > /dev/null; "\ +"sudo apt-get --yes -qq install --upgrade apt-utils kubectl google-cloud-sdk > /dev/null 2>&1; "\ +"gcloud auth activate-service-account -q --key-file ~/$token_file >/dev/null 2>&1; " > /dev/null 2>&1 + +in_list() { + local search="$1" + shift + local list=("$@") + for file in "${list[@]}" ; do + [[ $file == $search ]] && return 0 + done + return 1 +} + +# compile a list of cluster users +claims=$(kubectl -n $namespace get PersistentVolumes | grep claim- | awk '{print $6}') + +# get a list of currently running pods +running_pods=$(for pod in $(kubectl -n $namespace get pods | grep jupyter- | awk '{print $1}'); do echo ${pod/jupyter-/}; done) + +cluster_users=$( + for claim in $claims; do + claim_user=${claim#*/}; + cluster_user=${claim_user/claim-/}; + if ! in_list $cluster_user $running_pods; then + echo $cluster_user; + fi + done +); + +# cluster_users=$(for user in mattgoldklang smohan moonlimb; do echo $user; done) + +# enumerate counter +i=0 + +# loop over our user list +for cluster_user in $cluster_users; do + + # get the GKE persistent volume claim and associated GCE Volume ID + claim=$(kubectl -n $namespace get PersistentVolumes | grep "$namespace/claim-$cluster_user\ " | awk '{print $1}') + volume=$(gcloud compute disks list --filter="zone:($zone) name:($claim)" | grep $claim | awk '{print $1}'); + + # attach the volume to the instance + gcloud compute instances attach-disk -q $instance --disk $volume --zone $zone > /dev/null + + # mount the volume and copy the data to GCS + gcloud compute ssh --zone $zone $instance -- bash -c "echo &&\ + sudo mkdir /mnt/$cluster_user && \ + sudo mount /dev/sdb /mnt/$cluster_user && \ + gsutil -m cp -r /mnt/$cluster_user $gcs_backup_dir/$cluster_user/home/jovyan; \ + sudo umount /mnt/$cluster_user && \ + sudo rm -r /mnt/$cluster_user" + + # detach the volume from the instance + gcloud compute instances detach-disk -q $instance --disk $volume --zone $zone > /dev/null + + echo $i + i=$((i+1)); + +done +# done | tqdm --total $(echo "$cluster_users" | wc -w) > /dev/null + +# remove the credentials from the temporary instance +gcloud compute ssh --zone $zone $instance -- bash -c "echo && "\ +"gcloud auth revoke compute-rhg-backup-manager@rhg-project-1.iam.gserviceaccount.com >/dev/null 2>&1; "\ +"rm -f ~/$token_file; " > /dev/null 2>&1 diff --git a/deploy/backup/recover_from_drive.sh b/deploy/backup/recover_from_drive.sh new file mode 100644 index 0000000..e69de29 diff --git a/deploy/backup/recover_from_gcs.sh b/deploy/backup/recover_from_gcs.sh new file mode 100644 index 0000000..1e18e0c --- /dev/null +++ b/deploy/backup/recover_from_gcs.sh @@ -0,0 +1,30 @@ +#!/usr/bin/#!/usr/bin/env bash + +project=rhg-project-1 +zone=us-west1-a +namespace=compute-rhg + +# not true for rhg-hub or test-hub. be warned. +cluster=$namespace + +gcloud container clusters get-credentials $cluster --zone $zone --project $project + +gcs_backup_dir=gs://compute-rhg-backups/test-manual-backups-2019-12-07 + +token_file=rhg-project-1-compute-rhg-backup-manager.json + +# active_users=$(for pod in $(kubectl -n $namespace get pods | grep jupyter- | awk '{print $1}'); do user=${pod/jupyter-/}; echo $user; done); +active_users=$(for user in delgadom; do echo $user; done); + +i=0; +for cluster_user in $active_users; do + kubectl cp -n $namespace $token_file jupyter-$cluster_user:/home/jovyan/; + kubectl exec jupyter-$cluster_user --namespace $namespace -- bash -c "\ + sudo apt-get update -qq > /dev/null; \ + sudo apt-get --yes -qq install --upgrade apt-utils kubectl google-cloud-sdk > /dev/null 2>&1; \ + gcloud auth activate-service-account -q --key-file /home/jovyan/$token_file >/dev/null 2>&1; \ + gsutil -m -q cp -r $gcs_backup_dir/$cluster_user/home/jovyan/ /home/ >/dev/null; \ + gcloud auth revoke compute-rhg-backup-manager@rhg-project-1.iam.gserviceaccount.com >/dev/null 2>&1; \ + rm -f /home/jovyan/$token_file"; + echo $((i++)); +done | tqdm --total $(echo $active_users | wc -w) > /dev/null diff --git a/deploy/create_scripts/compute-rhg.sh b/deploy/create_scripts/compute-rhg.sh new file mode 100644 index 0000000..19db10f --- /dev/null +++ b/deploy/create_scripts/compute-rhg.sh @@ -0,0 +1,135 @@ +#!/usr/bin/env bash + +set -e + +# Make sure you're logged in to gcloud and have the correct permissions +EMAIL=$(gcloud config get-value account) +PROJECTID=$(gcloud config get-value project) +ZONE=$(gcloud config get-value compute/zone) +CLUSTER_NAME=compute-rhg +DEPLOYMENT_NAME=compute-rhg +URL=testing.climate-kube.com +HELM_SPEC=jupyter-config.yml +NUM_NODES=1 +MAX_WORKER_NODES=200 +MIN_WORKER_NODES=0 +DISK_SIZE=100 +NB_MACHINE_TYPE=n1-highmem-8 +WORKER_MACHINE_TYPE=n1-highmem-8 +# PREEMPTIBLE_FLAG= +PREEMPTIBLE_FLAG=--preemptible + +# Start cluster on Google cloud +gcloud container clusters create $CLUSTER_NAME --num-nodes=$NUM_NODES \ + --machine-type=n1-standard-2 --zone=$ZONE --project=$PROJECTID \ + --enable-ip-alias --no-enable-legacy-authorization + +# get rid of default pool that we don't want +echo deleting default pool +gcloud container node-pools delete default-pool --cluster $CLUSTER_NAME \ + --zone=$ZONE --project=$PROJECTID --quiet + +# core-pool +echo creating core pool... +core_machine_type="n1-standard-2" +core_labels="hub.jupyter.org/node-purpose=core" +gcloud container node-pools create core-pool --cluster=${CLUSTER_NAME} \ + --machine-type=${core_machine_type} --zone=${ZONE} --num-nodes=2 \ + --node-labels ${core_labels} + +# jupyter-pools +echo creating jupyter pool... +jupyter_taints="hub.jupyter.org_dedicated=user:NoSchedule" +jupyter_labels="hub.jupyter.org/node-purpose=user" +gcloud container node-pools create jupyter-pool --cluster=${CLUSTER_NAME} \ + --machine-type=${NB_MACHINE_TYPE} --disk-type=pd-ssd --zone=${ZONE} \ + --num-nodes=0 --enable-autoscaling --min-nodes=0 --max-nodes=10 \ + --node-taints ${jupyter_taints} --node-labels ${jupyter_labels} + +# dask-pool +echo creating dask pool... +dask_taints="k8s.dask.org_dedicated=worker:NoSchedule" +dask_labels="k8s.dask.org/node-purpose=worker" +gcloud container node-pools create dask-pool --cluster=${CLUSTER_NAME} \ + ${PREEMPTIBLE_FLAG} --machine-type=${WORKER_MACHINE_TYPE} --disk-type=pd-ssd \ + --zone=${ZONE} --num-nodes=0 --enable-autoscaling --min-nodes=0 \ + --max-nodes=${MAX_WORKER_NODES} --node-taints ${dask_taints} \ + --node-labels ${dask_labels} + +# make sure you have the credentials for this cluster loaded +echo get credentials for cluster +gcloud container clusters get-credentials $CLUSTER_NAME --zone $ZONE \ + --project $PROJECTID + +#this will give you admin access on the cluster +kubectl create clusterrolebinding cluster-admin-binding \ + --clusterrole=cluster-admin --user=$EMAIL + +# ############ +# ## Only strictly necessary if helm 3 (but ok to do either way) +# create namespace +echo creating namespace... +kubectl create namespace $DEPLOYMENT_NAME +# ############ + + +# # ############ +# ## Only necessary if helm 2 (will break otherwise b/c helm 3 has no tiller) +# #Give the tiller process cluster-admin status +# kubectl create serviceaccount tiller --namespace=kube-system +# kubectl create clusterrolebinding tiller --clusterrole cluster-admin \ +# --serviceaccount=kube-system:tiller +# +# #strangely this allows helm to install tiller into the kubernetes cluster +# helm init --service-account tiller +# +# # this patches the security of the deployment so that no other processes in the cluster can access the other pods +# kubectl --namespace=kube-system patch deployment tiller-deploy --type=json \ +# --patch='[{"op": "add", "path": "/spec/template/spec/containers/0/command", "value": ["/tiller", "--listen=localhost:44134"]}]' +# # ############ + +# Make sure you are in the rhg-hub repo for this: +echo add pangeo repo to cluster... +helm repo add pangeo https://pangeo-data.github.io/helm-chart/ +helm repo update + +# generate a secret token for the cluster +echo generating secret token... +secret_token=$(openssl rand -hex 32) +echo "SECRET_TOKEN=$secret_token" + +# secret_token=5486775e2cbb0a533aa81977a4ba9cf9697ba33de12b3f819edeed2596cba820 +# secret_token=782d44af360f3d7f41b86a15555f817cd67d1f6e880dff421bf23105c931ea70 + +## NOTE: you will need to change 600s to 600 in both the install and upgrade commands +## if working with Helm 2 +echo installing helm chart... +helm install $DEPLOYMENT_NAME pangeo/pangeo --version 19.09.27-86dd66c --namespace=$DEPLOYMENT_NAME \ + --timeout 600s -f $HELM_SPEC \ + --set jupyterhub.proxy.https.hosts="{${URL}}" \ + --set jupyterhub.proxy.secretToken="${secret_token}" \ + --set jupyterhub.auth.github.clientId="${GITHUB_CLIENT_ID}" \ + --set jupyterhub.auth.github.clientSecret="${GITHUB_SECRET_TOKEN}" \ + --set jupyterhub.auth.github.callbackUrl="https://${URL}/hub/oauth_callback" + +echo "waiting for cluster to boot" +sleep 120 + +echo "retrieving external IP" +EXTERNAL_IP=$(kubectl -n ${CLUSTER_NAME} get service proxy-public -o wide | awk '{print $4}' | tail -n1) + +echo "IMPORTANT" +echo "To update the cluster, run the following command. Save this somewhere as you will need the secret tokens:" +echo + +echo "helm upgrade ${DEPLOYMENT_NAME} pangeo/pangeo --version 19.09.27-86dd66c --timeout 600s --namespace=${DEPLOYMENT_NAME} -f $HELM_SPEC \\" +echo " --set jupyterhub.proxy.service.loadBalancerIP=${EXTERNAL_IP} \\" +echo " --set jupyterhub.proxy.https.hosts=\"{${URL}}\" \\" +echo " --set jupyterhub.proxy.secretToken=\"${secret_token}\" \\" +echo " --set jupyterhub.auth.github.clientId=\"\" \\" +echo " --set jupyterhub.auth.github.clientSecret=\"\" \\" +echo " --set jupyterhub.auth.github.callbackUrl=\"https://${URL}/hub/oauth_callback\"" + + +# Complete the installation using the cluster deployment instructions +# https://paper.dropbox.com/doc/Cluster-Deployments--AgOxfFIh7eCjBgsbFjTjjMpOAg-TQN0OpVDCIR3zW5PGJSRf diff --git a/deploy/create_scripts/ganymede.sh b/deploy/create_scripts/ganymede.sh new file mode 100644 index 0000000..80dc5ca --- /dev/null +++ b/deploy/create_scripts/ganymede.sh @@ -0,0 +1,150 @@ +#!/usr/bin/env bash + +set -e + +# Before you begin, set the GITHUB_CLIENT_ID and GITHUB_SECRET_TOKEN +# environment variables for this URL. If one does not exist, create the +# OAuth app using github organization oauth settings. + +# Make sure you're logged in to gcloud and have the correct permissions +EMAIL=$(gcloud config get-value account) +PROJECTID=$(gcloud config get-value project) +ZONE=$(gcloud config get-value compute/zone) +CLUSTER_NAME=ganymede +DEPLOYMENT_NAME=ganymede +URL=ganymede.climate-kube.com +DNS_ZONE=climate-kube +HELM_SPEC=jupyter-config.yml +NUM_NODES=1 +MAX_JUPYTER_NODES=500 +MIN_JUPYTER_NODES=0 +MAX_DASK_NODES=5000 +MIN_DASK_NODES=0 +DISK_SIZE=100 +NB_MACHINE_TYPE=n1-highmem-8 +WORKER_MACHINE_TYPE=n1-highmem-8 +# PREEMPTIBLE_FLAG= +PREEMPTIBLE_FLAG=--preemptible + +# Start cluster on Google cloud +gcloud container clusters create $CLUSTER_NAME --num-nodes=$NUM_NODES \ + --machine-type=n1-standard-2 --zone=$ZONE --project=$PROJECTID \ + --enable-ip-alias --no-enable-legacy-authorization + +# get rid of default pool that we don't want +echo; echo deleting default pool +gcloud container node-pools delete default-pool --cluster $CLUSTER_NAME \ + --zone=$ZONE --project=$PROJECTID --quiet + +# core-pool +echo; echo creating core pool... +core_machine_type="n1-standard-2" +core_labels="hub.jupyter.org/node-purpose=core" +gcloud container node-pools create core-pool --cluster=${CLUSTER_NAME} \ + --machine-type=${core_machine_type} --zone=${ZONE} --num-nodes=2 \ + --node-labels ${core_labels} + +# jupyter-pools +echo; echo creating jupyter pool... +jupyter_taints="hub.jupyter.org_dedicated=user:NoSchedule" +jupyter_labels="hub.jupyter.org/node-purpose=user" +gcloud container node-pools create jupyter-pool --cluster=${CLUSTER_NAME} \ + --machine-type=${NB_MACHINE_TYPE} --disk-type=pd-ssd --zone=${ZONE} \ + --num-nodes=0 --enable-autoscaling --min-nodes=0 \ + --max-nodes=${MAX_JUPYTER_NODES} --node-taints ${jupyter_taints} \ + --node-labels ${jupyter_labels} + +# dask-pool +echo; echo creating dask pool... +dask_taints="k8s.dask.org_dedicated=worker:NoSchedule" +dask_labels="k8s.dask.org/node-purpose=worker" +gcloud container node-pools create dask-pool --cluster=${CLUSTER_NAME} \ + --machine-type=${WORKER_MACHINE_TYPE} --disk-type=pd-ssd --zone=${ZONE} \ + --num-nodes=0 --enable-autoscaling --min-nodes=0 \ + --max-nodes=${MAX_DASK_NODES} --node-taints ${dask_taints} \ + --node-labels ${dask_labels} ${PREEMPTIBLE_FLAG} + +# make sure you have the credentials for this cluster loaded +echo; echo get credentials for cluster +gcloud container clusters get-credentials $CLUSTER_NAME --zone $ZONE \ + --project $PROJECTID + +#this will give you admin access on the cluster +echo; echo create clusterrolebinding +kubectl create clusterrolebinding cluster-admin-binding \ + --clusterrole=cluster-admin --user=$EMAIL + +# ############ +# ## Only strictly necessary if helm 3 (but ok to do either way) +# create namespace +echo; echo creating namespace... +kubectl create namespace $DEPLOYMENT_NAME +# ############ + + +# # ############ +# ## Only necessary if helm 2 (will break otherwise b/c helm 3 has no tiller) +# #Give the tiller process cluster-admin status +# kubectl create serviceaccount tiller --namespace=kube-system +# kubectl create clusterrolebinding tiller --clusterrole cluster-admin \ +# --serviceaccount=kube-system:tiller +# +# #strangely this allows helm to install tiller into the kubernetes cluster +# helm init --service-account tiller +# +# # this patches the security of the deployment so that no other processes in the cluster can access the other pods +# kubectl --namespace=kube-system patch deployment tiller-deploy --type=json \ +# --patch='[{"op": "add", "path": "/spec/template/spec/containers/0/command", "value": ["/tiller", "--listen=localhost:44134"]}]' +# # ############ + +# Make sure you are in the rhg-hub repo for this: +echo; echo add pangeo repo to cluster... +helm repo add pangeo https://pangeo-data.github.io/helm-chart/ +helm repo update + +# generate a secret token for the cluster +echo; echo generating secret token... +secret_token=$(openssl rand -hex 32) +echo "SECRET_TOKEN=$secret_token" + +## NOTE: you will need to change 600s to 600 in both the install and upgrade commands +## if working with Helm 2 +echo; echo installing helm chart... +helm install $DEPLOYMENT_NAME pangeo/pangeo --version 19.09.27-86dd66c --namespace=$DEPLOYMENT_NAME \ + --timeout 600s -f $HELM_SPEC \ + --set jupyterhub.proxy.https.hosts="{${URL}}" \ + --set jupyterhub.proxy.secretToken="${secret_token}" \ + --set jupyterhub.auth.github.clientId="${GITHUB_CLIENT_ID}" \ + --set jupyterhub.auth.github.clientSecret="${GITHUB_SECRET_TOKEN}" \ + --set jupyterhub.auth.github.callbackUrl="https://${URL}/hub/oauth_callback" + +echo; echo "waiting for cluster to boot" +sleep 120 + +echo; echo "retrieving external IP" +EXTERNAL_IP=$(kubectl -n ${CLUSTER_NAME} get service proxy-public -o wide | awk '{print $4}' | tail -n1) + +# Modify DNS record set to direct cluster URL to proxy IP +if [[ "${DNS_ZONE}"!="" ]]; then + echo; echo "updating DNS record set for ${DNS_ZONE}" + gcloud dns record-sets transaction start --zone=$DNS_ZONE + gcloud dns record-sets transaction add "$EXTERNAL_IP" \ + --name=${URL} --ttl=300 --type=A --zone=$DNS_ZONE + gcloud dns record-sets transaction execute --zone=$DNS_ZONE +fi + +echo; echo "IMPORTANT" +echo "To update the cluster, run the following command. Save this somewhere as you will need the secret tokens:" +echo + +echo "helm upgrade ${DEPLOYMENT_NAME} pangeo/pangeo --version 19.09.27-86dd66c --timeout 600s --namespace=${DEPLOYMENT_NAME} -f $HELM_SPEC \\" +echo " --set jupyterhub.proxy.service.loadBalancerIP=${EXTERNAL_IP} \\" +echo " --set jupyterhub.proxy.https.hosts=\"{${URL}}\" \\" +echo " --set jupyterhub.proxy.secretToken=\"${secret_token}\" \\" +echo " --set jupyterhub.auth.github.clientId=\"${GITHUB_CLIENT_ID}\" \\" +echo " --set jupyterhub.auth.github.clientSecret=\"${GITHUB_SECRET_TOKEN}\" \\" +echo " --set jupyterhub.auth.github.callbackUrl=\"https://${URL}/hub/oauth_callback\"" + + +# Complete the installation using the cluster deployment instructions +# https://paper.dropbox.com/doc/Cluster-Deployments--AgOxfFIh7eCjBgsbFjTjjMpOAg-TQN0OpVDCIR3zW5PGJSRf diff --git a/deploy/create_scripts/the_list.txt b/deploy/create_scripts/the_list.txt new file mode 100644 index 0000000..eb1d902 --- /dev/null +++ b/deploy/create_scripts/the_list.txt @@ -0,0 +1,72 @@ +Ganymede +Io +Adrastea +Callisto +Amalthea +Metis +Europa +Thebe +Philophrosyne +Ersa +Euporie +S/2003 J 18 +Lysithea +Helike +S/2016 J 1 +S/2017 J 3 +Arche +Himalia +Leda +Isonoe +Pandia +Praxidike +Elara +Kore +Euanthe +Kale +Kallichore +Dia +Themisto +Kalyke +S/2017 J 7 +Valetudo +S/2017 J 9 +Hermippe +S/2011 J 1 +Carme +S/2017 J 2 +Herse +Orthosie +Harpalyke +Eupheme +Thyone +Erinome +S/2003 J 19 +Callirrhoe +Thelxinoe +Carpo +Sinope +Eukelade +S/2017 J 5 +Iocaste +Chaldene +S/2010 J 2 +Eirene +Megaclite +S/2017 J 8 +Mneme +S/2010 J 1 +Pasithee +S/2011 J 2 +Ananke +Taygete +Autonoe +Pasiphae +Eurydome +Aitne +S/2017 J 1 +Hegemone +Cyllene +Aoede +Sponde +S/2017 J 6 diff --git a/jupyter-config.yml b/jupyter-config.yml index ceaec25..3a7b98d 100644 --- a/jupyter-config.yml +++ b/jupyter-config.yml @@ -2,12 +2,11 @@ jupyterhub: singleuser: image: name: rhodium/notebook - tag: b46e2cda7727af3eef095279da220df283b57a66 + tag: 91e8177fe0db3a60b343ff01a5deb699bcb6eb2a storage: capacity: 10Gi dynamic: -# storageClass should be changed to pd-ssd if/when we create a new cluster that uses -# this disk type + # storageClass: ssd storageClass: standard cpu: limit: 3.75 @@ -23,23 +22,10 @@ jupyterhub: minAvailable: 0 extraConfig: customPodHook: | - from kubernetes import client - def modify_pod_hook(spawner, pod): - pod.spec.containers[0].security_context = client.V1SecurityContext( - privileged=True, - capabilities=client.V1Capabilities( - add=['SYS_ADMIN'] - ) - ) - return pod - c.KubeSpawner.modify_pod_hook = modify_pod_hook c.JupyterHub.logo_file = '/usr/local/share/jupyterhub/static/custom/images/logo.png' c.JupyterHub.template_paths = ['/usr/local/share/jupyterhub/custom_templates/'] # The templates section is borrowed from the pangeo approach, i.e. # https://github.com/pangeo-data/pangeo-cloud-federation/blob/staging/deployments/ocean/config/common.yaml -# TODO: Maybe just merge this template into the notebook image repo and do the -# configuration there rather than maintaining a separate repo and mounting it -# in this deploy config? extraVolumes: - name: custom-templates gitRepo: @@ -66,12 +52,18 @@ jupyterhub: cull: timeout: 259200 + proxy: + secretToken: OVERRIDEME + https: + letsencrypt: + contactEmail: mdelgado@rhg.com + auth: type: github - github: - callbackUrl: "https://test2.climate-kube.com/hub/oauth_callback" scopes: - "read:user" + orgWhitelist: + - RhodiumGroup admin: access: true users: @@ -79,22 +71,9 @@ jupyterhub: - brews - delgadom - dgergel + - mattgoldklang whitelist: users: - bolliger32 - - brews - - delgadom - - dgergel - - alihamidi1 - - andyhultgren - - cledna - dpa9694 - - ferleejc - - hanskolus - - hpitt - - jaysayre - - kemccusker - - mattgoldklang - - samirasiddique - - smohan-rhg - - wjherndon + - mayaanorman