From fdc44574c32cb1cefdce7a2f51f505ab03055f9b Mon Sep 17 00:00:00 2001
From: Konrad Ponichtera <konpon96@gmail.com>
Date: Fri, 23 Sep 2022 00:04:32 +0200
Subject: [PATCH] Streamlined Jupyter notebooks for Terraform and experiment
 deployment (installing extractor, enabling/disabling autoscaling so that
 experiments don't end up deployed on the default node pool, removed changing
 working directory from within the notebook)

---
 jupyter/experiment_notebook.ipynb | 80 +++++++++++++++++++++++++++----
 jupyter/terraform_notebook.ipynb  | 10 ++--
 2 files changed, 77 insertions(+), 13 deletions(-)

diff --git a/jupyter/experiment_notebook.ipynb b/jupyter/experiment_notebook.ipynb
index 44eb224f..121d124d 100644
--- a/jupyter/experiment_notebook.ipynb
+++ b/jupyter/experiment_notebook.ipynb
@@ -74,7 +74,7 @@
    "source": [
     "# These commands might take a while to complete.\n",
     "gcloud container clusters resize $CLUSTER_NAME --node-pool $DEFAULT_POOL \\\n",
-    "     --num-nodes 1 --region us-central1-c --quiet"
+    "     --num-nodes 1 --region $REGION --quiet"
    ]
   },
   {
@@ -132,7 +132,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "helm install -n test extractor ../charts/extractor -f ../charts/fltk-values.yaml"
+    "helm upgrade --install -n test extractor ../charts/extractor -f ../charts/fltk-values.yaml \\\n",
+    "    --set provider.projectName=$PROJECT_ID"
    ]
   },
   {
@@ -200,9 +201,10 @@
    },
    "outputs": [],
    "source": [
-    "helm uninstall experiment-orchestrator -n test\n",
-    "helm install experiment-orchestrator ../charts/orchestrator --namespace test -f ../charts/fltk-values.yaml \\\n",
-    "  --set-file orchestrator.experiment=$EXPERIMENT_FILE,orchestrator.configuration=$CLUSTER_CONFIG\n"
+    "helm uninstall -n test experiment-orchestrator\n",
+    "helm install -n test experiment-orchestrator ../charts/orchestrator -f ../charts/fltk-values.yaml \\\n",
+    "    --set-file orchestrator.experiment=$EXPERIMENT_FILE,orchestrator.configuration=$CLUSTER_CONFIG \\\n",
+    "    --set provider.projectName=$PROJECT_ID"
    ]
   },
   {
@@ -216,7 +218,7 @@
    "outputs": [],
    "source": [
     "# To get logs from the orchestrator\n",
-    "kubectl logs -n test fl-learner"
+    "kubectl logs -n test fl-server"
    ]
   },
   {
@@ -236,6 +238,68 @@
     "kubectl logs -n test trainjob-eb056010-7c33-4c46-9559-b197afc7cb84-worker-0"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Copy experiment results from the extractor\n",
+    "\n",
+    "Extractor holds the experiment results in the format that can be processedby TensorBoard.\n",
+    "In order to download it to the local machine, execute:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "EXTRACTOR_POD_NAME=$(kubectl get pods -n test -l \"app.kubernetes.io/name=fltk.extractor\" -o jsonpath=\"{.items[0].metadata.name}\")\n",
+    "\n",
+    "kubectl cp -n test $EXTRACTOR_POD_NAME:/opt/federation-lab/logging ./logging"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Cleanup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Removing orchestrator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "helm uninstall -n test experiment-orchestrator"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Removing extractor\n",
+    "\n",
+    "IMPORTANT: Removing extractor chart will result in deleting the already collected experiment results, stored in the NFS!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "helm uninstall extractor -n test"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -244,7 +308,7 @@
     }
    },
    "source": [
-    "# Wrapping up\n",
+    "## Wrapping up\n",
     "\n",
     "To scale down the cluster nodepools, run the cell below. This will scale the node pools down and remove all the experiments deployed (on the cluster).\n",
     "\n",
@@ -266,7 +330,7 @@
     "kubectl delete pytorchjobs.kubeflow.org --all-namespaces --all\n",
     "\n",
     "gcloud container clusters resize $CLUSTER_NAME --node-pool $DEFAULT_POOL \\\n",
-    "     --num-nodes 0 --region $REGION --quiet\n",
+    "    --num-nodes 0 --region $REGION --quiet\n",
     "\n",
     "gcloud container clusters resize $CLUSTER_NAME --node-pool $EXPERIMENT_POOL \\\n",
     "    --num-nodes 0 --region $REGION --quiet"
diff --git a/jupyter/terraform_notebook.ipynb b/jupyter/terraform_notebook.ipynb
index e2539085..dbaffbbe 100644
--- a/jupyter/terraform_notebook.ipynb
+++ b/jupyter/terraform_notebook.ipynb
@@ -203,7 +203,7 @@
     "##################\n",
     "### CHANGE ME! ###\n",
     "##################\n",
-    "BILLING_ACCOUNT=\"015594-41687F-092941\"      "
+    "BILLING_ACCOUNT=\"015594-41687F-092941\""
    ]
   },
   {
@@ -310,7 +310,7 @@
     "##################\n",
     "### CHANGE ME! ###\n",
     "##################\n",
-    "OWNER_MAIL=\"jargsnork@gmail.com\"\n",
+    "OWNER_MAIL=\"mygoogleaccount@gmail.com\"\n",
     "\n",
     "gcloud iam service-accounts add-iam-policy-binding $PRIVILEGED_ACCOUNT_ID \\\n",
     " --member=\"user:$OWNER_MAIL\" \\\n",
@@ -480,11 +480,11 @@
    "outputs": [],
    "source": [
     "gcloud container clusters update $CLUSTER_NAME --node-pool $DEFAULT_POOL \\\n",
-    "    --disable-autoscaling --quiet\n",
+    "    --no-enable-autoscaling --region $REGION --quiet\n",
     "    \n",
     "# The high performance node will scale up automatically whenever the workloads are deployed\n",
     "gcloud container clusters update $CLUSTER_NAME --node-pool $EXPERIMENT_POOL \\\n",
-    "    --enable-autoscaling --quiet\n",
+    "    --enable-autoscaling --min-nodes=0 --max-nodes=10 --region $REGION --quiet\n",
     "\n",
     "gcloud container clusters resize $CLUSTER_NAME --node-pool $DEFAULT_POOL \\\n",
     "    --num-nodes 1 --region $REGION --quiet\n"
@@ -636,7 +636,7 @@
    "outputs": [],
    "source": [
     "# Retrieve all CRD Pytorchjob from Kubeflow.\n",
-    "kubectl get pytorchjobs.kubeflow.org --all-namespaces --all\n",
+    "kubectl get pytorchjobs.kubeflow.org --all-namespaces\n",
     "\n",
     "# Alternatively, we can remove all jobs, this will remove all information and logs as well.\n",
     "kubectl delete pytorchjobs.kubeflow.org --all-namespaces --all"