-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add in ganymede build scripts and backup scripts
- Loading branch information
Showing
7 changed files
with
484 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
#!/usr/bin/#!/usr/bin/env bash | ||
|
||
project=rhg-project-1 | ||
zone=us-west1-a | ||
namespace=rhodium-jupyter | ||
|
||
# create a google compute instance in the same zone & project as the | ||
# cluster. the below commands assume you're running ubuntu... so... that's | ||
# preferable | ||
instance=mikes-crazy-solo-instance-please-dont-let-this-run-past-jan2020 | ||
|
||
gcs_backup_dir=gs://compute-rhg-backups/test-manual-backups-$(date +%F) | ||
|
||
token_file=rhg-project-1-compute-rhg-backup-manager.json | ||
|
||
# copy our gcloud service account token to the instance | ||
gcloud compute scp -q $token_file $instance:~/ > /dev/null | ||
|
||
gcloud compute ssh --zone $zone $instance -- bash -c "echo && "\ | ||
"sudo apt-get update -qq > /dev/null; "\ | ||
"sudo apt-get --yes -qq install --upgrade apt-utils kubectl google-cloud-sdk > /dev/null 2>&1; "\ | ||
"gcloud auth activate-service-account -q --key-file ~/$token_file >/dev/null 2>&1; " > /dev/null 2>&1 | ||
|
||
in_list() { | ||
local search="$1" | ||
shift | ||
local list=("$@") | ||
for file in "${list[@]}" ; do | ||
[[ $file == $search ]] && return 0 | ||
done | ||
return 1 | ||
} | ||
|
||
# compile a list of cluster users | ||
claims=$(kubectl -n $namespace get PersistentVolumes | grep claim- | awk '{print $6}') | ||
|
||
# get a list of currently running pods | ||
running_pods=$(for pod in $(kubectl -n $namespace get pods | grep jupyter- | awk '{print $1}'); do echo ${pod/jupyter-/}; done) | ||
|
||
cluster_users=$( | ||
for claim in $claims; do | ||
claim_user=${claim#*/}; | ||
cluster_user=${claim_user/claim-/}; | ||
if ! in_list $cluster_user $running_pods; then | ||
echo $cluster_user; | ||
fi | ||
done | ||
); | ||
|
||
# cluster_users=$(for user in mattgoldklang smohan moonlimb; do echo $user; done) | ||
|
||
# enumerate counter | ||
i=0 | ||
|
||
# loop over our user list | ||
for cluster_user in $cluster_users; do | ||
|
||
# get the GKE persistent volume claim and associated GCE Volume ID | ||
claim=$(kubectl -n $namespace get PersistentVolumes | grep "$namespace/claim-$cluster_user\ " | awk '{print $1}') | ||
volume=$(gcloud compute disks list --filter="zone:($zone) name:($claim)" | grep $claim | awk '{print $1}'); | ||
|
||
# attach the volume to the instance | ||
gcloud compute instances attach-disk -q $instance --disk $volume --zone $zone > /dev/null | ||
|
||
# mount the volume and copy the data to GCS | ||
gcloud compute ssh --zone $zone $instance -- bash -c "echo &&\ | ||
sudo mkdir /mnt/$cluster_user && \ | ||
sudo mount /dev/sdb /mnt/$cluster_user && \ | ||
gsutil -m cp -r /mnt/$cluster_user $gcs_backup_dir/$cluster_user/home/jovyan; \ | ||
sudo umount /mnt/$cluster_user && \ | ||
sudo rm -r /mnt/$cluster_user" | ||
|
||
# detach the volume from the instance | ||
gcloud compute instances detach-disk -q $instance --disk $volume --zone $zone > /dev/null | ||
|
||
echo $i | ||
i=$((i+1)); | ||
|
||
done | ||
# done | tqdm --total $(echo "$cluster_users" | wc -w) > /dev/null | ||
|
||
# remove the credentials from the temporary instance | ||
gcloud compute ssh --zone $zone $instance -- bash -c "echo && "\ | ||
"gcloud auth revoke [email protected] >/dev/null 2>&1; "\ | ||
"rm -f ~/$token_file; " > /dev/null 2>&1 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/usr/bin/#!/usr/bin/env bash | ||
|
||
project=rhg-project-1 | ||
zone=us-west1-a | ||
namespace=compute-rhg | ||
|
||
# not true for rhg-hub or test-hub. be warned. | ||
cluster=$namespace | ||
|
||
gcloud container clusters get-credentials $cluster --zone $zone --project $project | ||
|
||
gcs_backup_dir=gs://compute-rhg-backups/test-manual-backups-2019-12-07 | ||
|
||
token_file=rhg-project-1-compute-rhg-backup-manager.json | ||
|
||
# active_users=$(for pod in $(kubectl -n $namespace get pods | grep jupyter- | awk '{print $1}'); do user=${pod/jupyter-/}; echo $user; done); | ||
active_users=$(for user in delgadom; do echo $user; done); | ||
|
||
i=0; | ||
for cluster_user in $active_users; do | ||
kubectl cp -n $namespace $token_file jupyter-$cluster_user:/home/jovyan/; | ||
kubectl exec jupyter-$cluster_user --namespace $namespace -- bash -c "\ | ||
sudo apt-get update -qq > /dev/null; \ | ||
sudo apt-get --yes -qq install --upgrade apt-utils kubectl google-cloud-sdk > /dev/null 2>&1; \ | ||
gcloud auth activate-service-account -q --key-file /home/jovyan/$token_file >/dev/null 2>&1; \ | ||
gsutil -m -q cp -r $gcs_backup_dir/$cluster_user/home/jovyan/ /home/ >/dev/null; \ | ||
gcloud auth revoke [email protected] >/dev/null 2>&1; \ | ||
rm -f /home/jovyan/$token_file"; | ||
echo $((i++)); | ||
done | tqdm --total $(echo $active_users | wc -w) > /dev/null |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -e | ||
|
||
# Make sure you're logged in to gcloud and have the correct permissions | ||
EMAIL=$(gcloud config get-value account) | ||
PROJECTID=$(gcloud config get-value project) | ||
ZONE=$(gcloud config get-value compute/zone) | ||
CLUSTER_NAME=compute-rhg | ||
DEPLOYMENT_NAME=compute-rhg | ||
URL=testing.climate-kube.com | ||
HELM_SPEC=jupyter-config.yml | ||
NUM_NODES=1 | ||
MAX_WORKER_NODES=200 | ||
MIN_WORKER_NODES=0 | ||
DISK_SIZE=100 | ||
NB_MACHINE_TYPE=n1-highmem-8 | ||
WORKER_MACHINE_TYPE=n1-highmem-8 | ||
# PREEMPTIBLE_FLAG= | ||
PREEMPTIBLE_FLAG=--preemptible | ||
|
||
# Start cluster on Google cloud | ||
gcloud container clusters create $CLUSTER_NAME --num-nodes=$NUM_NODES \ | ||
--machine-type=n1-standard-2 --zone=$ZONE --project=$PROJECTID \ | ||
--enable-ip-alias --no-enable-legacy-authorization | ||
|
||
# get rid of default pool that we don't want | ||
echo deleting default pool | ||
gcloud container node-pools delete default-pool --cluster $CLUSTER_NAME \ | ||
--zone=$ZONE --project=$PROJECTID --quiet | ||
|
||
# core-pool | ||
echo creating core pool... | ||
core_machine_type="n1-standard-2" | ||
core_labels="hub.jupyter.org/node-purpose=core" | ||
gcloud container node-pools create core-pool --cluster=${CLUSTER_NAME} \ | ||
--machine-type=${core_machine_type} --zone=${ZONE} --num-nodes=2 \ | ||
--node-labels ${core_labels} | ||
|
||
# jupyter-pools | ||
echo creating jupyter pool... | ||
jupyter_taints="hub.jupyter.org_dedicated=user:NoSchedule" | ||
jupyter_labels="hub.jupyter.org/node-purpose=user" | ||
gcloud container node-pools create jupyter-pool --cluster=${CLUSTER_NAME} \ | ||
--machine-type=${NB_MACHINE_TYPE} --disk-type=pd-ssd --zone=${ZONE} \ | ||
--num-nodes=0 --enable-autoscaling --min-nodes=0 --max-nodes=10 \ | ||
--node-taints ${jupyter_taints} --node-labels ${jupyter_labels} | ||
|
||
# dask-pool | ||
echo creating dask pool... | ||
dask_taints="k8s.dask.org_dedicated=worker:NoSchedule" | ||
dask_labels="k8s.dask.org/node-purpose=worker" | ||
gcloud container node-pools create dask-pool --cluster=${CLUSTER_NAME} \ | ||
${PREEMPTIBLE_FLAG} --machine-type=${WORKER_MACHINE_TYPE} --disk-type=pd-ssd \ | ||
--zone=${ZONE} --num-nodes=0 --enable-autoscaling --min-nodes=0 \ | ||
--max-nodes=${MAX_WORKER_NODES} --node-taints ${dask_taints} \ | ||
--node-labels ${dask_labels} | ||
|
||
# make sure you have the credentials for this cluster loaded | ||
echo get credentials for cluster | ||
gcloud container clusters get-credentials $CLUSTER_NAME --zone $ZONE \ | ||
--project $PROJECTID | ||
|
||
#this will give you admin access on the cluster | ||
kubectl create clusterrolebinding cluster-admin-binding \ | ||
--clusterrole=cluster-admin --user=$EMAIL | ||
|
||
# ############ | ||
# ## Only strictly necessary if helm 3 (but ok to do either way) | ||
# create namespace | ||
echo creating namespace... | ||
kubectl create namespace $DEPLOYMENT_NAME | ||
# ############ | ||
|
||
|
||
# # ############ | ||
# ## Only necessary if helm 2 (will break otherwise b/c helm 3 has no tiller) | ||
# #Give the tiller process cluster-admin status | ||
# kubectl create serviceaccount tiller --namespace=kube-system | ||
# kubectl create clusterrolebinding tiller --clusterrole cluster-admin \ | ||
# --serviceaccount=kube-system:tiller | ||
# | ||
# #strangely this allows helm to install tiller into the kubernetes cluster | ||
# helm init --service-account tiller | ||
# | ||
# # this patches the security of the deployment so that no other processes in the cluster can access the other pods | ||
# kubectl --namespace=kube-system patch deployment tiller-deploy --type=json \ | ||
# --patch='[{"op": "add", "path": "/spec/template/spec/containers/0/command", "value": ["/tiller", "--listen=localhost:44134"]}]' | ||
# # ############ | ||
|
||
# Make sure you are in the rhg-hub repo for this: | ||
echo add pangeo repo to cluster... | ||
helm repo add pangeo https://pangeo-data.github.io/helm-chart/ | ||
helm repo update | ||
|
||
# generate a secret token for the cluster | ||
echo generating secret token... | ||
secret_token=$(openssl rand -hex 32) | ||
echo "SECRET_TOKEN=$secret_token" | ||
|
||
# secret_token=5486775e2cbb0a533aa81977a4ba9cf9697ba33de12b3f819edeed2596cba820 | ||
# secret_token=782d44af360f3d7f41b86a15555f817cd67d1f6e880dff421bf23105c931ea70 | ||
|
||
## NOTE: you will need to change 600s to 600 in both the install and upgrade commands | ||
## if working with Helm 2 | ||
echo installing helm chart... | ||
helm install $DEPLOYMENT_NAME pangeo/pangeo --version 19.09.27-86dd66c --namespace=$DEPLOYMENT_NAME \ | ||
--timeout 600s -f $HELM_SPEC \ | ||
--set jupyterhub.proxy.https.hosts="{${URL}}" \ | ||
--set jupyterhub.proxy.secretToken="${secret_token}" \ | ||
--set jupyterhub.auth.github.clientId="${GITHUB_CLIENT_ID}" \ | ||
--set jupyterhub.auth.github.clientSecret="${GITHUB_SECRET_TOKEN}" \ | ||
--set jupyterhub.auth.github.callbackUrl="https://${URL}/hub/oauth_callback" | ||
|
||
echo "waiting for cluster to boot" | ||
sleep 120 | ||
|
||
echo "retrieving external IP" | ||
EXTERNAL_IP=$(kubectl -n ${CLUSTER_NAME} get service proxy-public -o wide | awk '{print $4}' | tail -n1) | ||
|
||
echo "IMPORTANT" | ||
echo "To update the cluster, run the following command. Save this somewhere as you will need the secret tokens:" | ||
echo | ||
|
||
echo "helm upgrade ${DEPLOYMENT_NAME} pangeo/pangeo --version 19.09.27-86dd66c --timeout 600s --namespace=${DEPLOYMENT_NAME} -f $HELM_SPEC \\" | ||
echo " --set jupyterhub.proxy.service.loadBalancerIP=${EXTERNAL_IP} \\" | ||
echo " --set jupyterhub.proxy.https.hosts=\"{${URL}}\" \\" | ||
echo " --set jupyterhub.proxy.secretToken=\"${secret_token}\" \\" | ||
echo " --set jupyterhub.auth.github.clientId=\"<GITHUB_CLIENT_ID>\" \\" | ||
echo " --set jupyterhub.auth.github.clientSecret=\"<GITHUB_SECRET_TOKEN>\" \\" | ||
echo " --set jupyterhub.auth.github.callbackUrl=\"https://${URL}/hub/oauth_callback\"" | ||
|
||
|
||
# Complete the installation using the cluster deployment instructions | ||
# https://paper.dropbox.com/doc/Cluster-Deployments--AgOxfFIh7eCjBgsbFjTjjMpOAg-TQN0OpVDCIR3zW5PGJSRf |
Oops, something went wrong.