From 06d375eba8c79058ea7f1da6ebfc0758973c891f Mon Sep 17 00:00:00 2001
From: Yaron Haviv <yaronh@iguaz.io>
Date: Wed, 29 Jul 2020 16:10:52 +0300
Subject: [PATCH] add describe step

---
 gitops_project.ipynb | 419 +++++++++++++++++++++++++++++++++++++------
 project.yaml         |  10 +-
 workflow.py          |   7 +-
 3 files changed, 375 insertions(+), 61 deletions(-)
diff --git a/gitops_project.ipynb b/gitops_project.ipynb
index 10298d8..110f379 100644
--- a/gitops_project.ipynb
+++ b/gitops_project.ipynb
@@ -166,25 +166,23 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[mlrun] 2020-06-10 12:28:42,497 starting run iris_gen uid=40270e2513c14a8996949a8e8a5d4a7e  -> http://mlrun-api:8080\n",
-      "[mlrun] 2020-06-10 12:28:42,533 saving iris dataframe to /User/demo-github-actions/data\n",
-      "[mlrun] 2020-06-10 12:28:42,594 log artifact iris_dataset at /User/demo-github-actions/data/iris_dataset.csv, size: 2776, db: Y\n",
-      "\n"
+      "> 2020-07-29 10:38:35,433 [info] starting run iris_gen uid=3e340d3561ca402c91e9bb09b1631dd4  -> http://mlrun-api:8080\n",
+      "> 2020-07-29 10:38:35,518 [info] saving iris dataframe to /User/demo-github-actions/data\n"
      ]
     },
     {
      "data": {
       "text/html": [
-       "<style> \n",
+       "<style>\n",
        ".dictlist {\n",
-       "  background-color: #b3edff; \n",
-       "  text-align: center; \n",
-       "  margin: 4px; \n",
+       "  background-color: #b3edff;\n",
+       "  text-align: center;\n",
+       "  margin: 4px;\n",
        "  border-radius: 3px; padding: 0px 3px 1px 3px; display: inline-block;}\n",
        ".artifact {\n",
-       "  cursor: pointer; \n",
-       "  background-color: #ffe6cc; \n",
-       "  text-align: left; \n",
+       "  cursor: pointer;\n",
+       "  background-color: #ffe6cc;\n",
+       "  text-align: left;\n",
        "  margin: 4px; border-radius: 3px; padding: 0px 3px 1px 3px; display: inline-block;\n",
        "}\n",
        "div.block.hidden {\n",
@@ -268,7 +266,7 @@
        "\n",
        "  document.querySelector(panelName + \"-title\").innerHTML = el.title\n",
        "  iframe = document.querySelector(panelName + \"-body\");\n",
-       "  \n",
+       "\n",
        "  const tblcss = `<style> body { font-family: Arial, Helvetica, sans-serif;}\n",
        "    #csv { margin-bottom: 15px; }\n",
        "    #csv table { border-collapse: collapse;}\n",
@@ -278,13 +276,13 @@
        "    return '<div id=\"csv\"><table><tr><td>' +  str.replace(/[\\n\\r]+$/g, '').replace(/[\\n\\r]+/g, '</td></tr><tr><td>')\n",
        "      .replace(/,/g, '</td><td>') + '</td></tr></table></div>';\n",
        "  }\n",
-       "  \n",
+       "\n",
        "  function reqListener () {\n",
        "    if (el.title.endsWith(\".csv\")) {\n",
        "      iframe.setAttribute(\"srcdoc\", tblcss + csvToHtmlTable(this.responseText));\n",
        "    } else {\n",
        "      iframe.setAttribute(\"srcdoc\", this.responseText);\n",
-       "    }  \n",
+       "    }\n",
        "    console.log(this.responseText);\n",
        "  }\n",
        "\n",
@@ -292,8 +290,8 @@
        "  oReq.addEventListener(\"load\", reqListener);\n",
        "  oReq.open(\"GET\", el.title);\n",
        "  oReq.send();\n",
-       "  \n",
-       "  \n",
+       "\n",
+       "\n",
        "  //iframe.src = el.title;\n",
        "  const resultPane = document.querySelector(panelName + \"-pane\");\n",
        "  if (resultPane.classList.contains(\"hidden\")) {\n",
@@ -343,26 +341,26 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <td>gitops-project</td>\n",
-       "      <td><div title=\"40270e2513c14a8996949a8e8a5d4a7e\"><a href=\"https://mlrun-ui.default-tenant.app.yh55.iguazio-cd2.com/projects/gitops-project/jobs/40270e2513c14a8996949a8e8a5d4a7e/info\" target=\"_blank\" >...8a5d4a7e</a></div></td>\n",
+       "      <td><div title=\"3e340d3561ca402c91e9bb09b1631dd4\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/3e340d3561ca402c91e9bb09b1631dd4/info\" target=\"_blank\" >...b1631dd4</a></div></td>\n",
        "      <td>0</td>\n",
-       "      <td>Jun 10 12:28:42</td>\n",
+       "      <td>Jul 29 10:38:35</td>\n",
        "      <td>completed</td>\n",
        "      <td>iris_gen</td>\n",
-       "      <td><div class=\"dictlist\">v3io_user=admin</div><div class=\"dictlist\">kind=handler</div><div class=\"dictlist\">owner=admin</div><div class=\"dictlist\">host=jupyter-65887d7ffb-5jsn2</div></td>\n",
+       "      <td><div class=\"dictlist\">v3io_user=admin</div><div class=\"dictlist\">kind=handler</div><div class=\"dictlist\">owner=admin</div><div class=\"dictlist\">host=jupyter-58d8fdb6fc-nmqbq</div></td>\n",
        "      <td></td>\n",
        "      <td></td>\n",
        "      <td></td>\n",
-       "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"resultd5ef5c94\" title=\"/files/demo-github-actions/data/iris_dataset.csv\">iris_dataset</div></td>\n",
+       "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result52946aee\" title=\"/files/demo-github-actions/data/iris_dataset.csv\">iris_dataset</div></td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div></div>\n",
-       "  <div id=\"resultd5ef5c94-pane\" class=\"right-pane block hidden\">\n",
+       "  <div id=\"result52946aee-pane\" class=\"right-pane block hidden\">\n",
        "    <div class=\"pane-header\">\n",
-       "      <span id=\"resultd5ef5c94-title\" class=\"pane-header-title\">Title</span>\n",
-       "      <span onclick=\"closePanel(this)\" paneName=\"resultd5ef5c94\" class=\"close clickable\">&times;</span>\n",
+       "      <span id=\"result52946aee-title\" class=\"pane-header-title\">Title</span>\n",
+       "      <span onclick=\"closePanel(this)\" paneName=\"result52946aee\" class=\"close clickable\">&times;</span>\n",
        "    </div>\n",
-       "    <iframe class=\"fileview\" id=\"resultd5ef5c94-body\"></iframe>\n",
+       "    <iframe class=\"fileview\" id=\"result52946aee-body\"></iframe>\n",
        "  </div>\n",
        "</div>\n"
       ],
@@ -378,8 +376,8 @@
      "output_type": "stream",
      "text": [
       "to track results use .show() or .logs() or in CLI: \n",
-      "!mlrun get run 40270e2513c14a8996949a8e8a5d4a7e --project gitops-project , !mlrun logs 40270e2513c14a8996949a8e8a5d4a7e --project gitops-project\n",
-      "[mlrun] 2020-06-10 12:28:42,646 run executed, status=completed\n"
+      "!mlrun get run 3e340d3561ca402c91e9bb09b1631dd4 --project gitops-project , !mlrun logs 3e340d3561ca402c91e9bb09b1631dd4 --project gitops-project\n",
+      "> 2020-07-29 10:38:35,641 [info] run executed, status=completed\n"
      ]
     }
    ],
@@ -404,7 +402,7 @@
     {
      "data": {
       "text/plain": [
-       "<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f36f2cef438>"
+       "<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f5d751dbe10>"
       ]
      },
      "execution_count": 8,
@@ -417,6 +415,300 @@
     "skproj.set_function(gen_func)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Analyze the dataset features (useing marketplace function)\n",
+    "load dataset analysis function (`describe`) from the function hub (marketplace), and print its doc."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "function: describe\n",
+      "describe and visualizes dataset stats\n",
+      "default handler: summarize\n",
+      "entry points:\n",
+      "  summarize: Summarize a table\n",
+      "    context(MLClientCtx)  - the function context, default=\n",
+      "    table(DataItem)  - MLRun input pointing to pandas dataframe (csv/parquet file path), default=\n",
+      "    label_column(str)  - ground truth column label, default=None\n",
+      "    class_labels(List[str])  - label for each class in tables and plots, default=[]\n",
+      "    plot_hist(bool)  - (True) set this to False for large tables, default=True\n",
+      "    plots_dest(str)  - destination folder of summary plots (relative to artifact_path), default=plots\n",
+      "    update_dataset  - when the table is a registered dataset update the charts in-place, default=False\n"
+     ]
+    }
+   ],
+   "source": [
+    "skproj.set_function('hub://describe', 'describe')\n",
+    "skproj.func('describe').doc()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Run the describe function on our dataset (as a Kubernetes job)\n",
+    "<b> using shared file system mount (`mount_v3io`) with our notebook.</b>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> 2020-07-29 12:46:52,341 [info] starting run describe-summarize uid=301ab10adbf34adb898f0751c7f0f0b4  -> http://mlrun-api:8080\n",
+      "> 2020-07-29 12:46:52,497 [info] Job is running in the background, pod: describe-summarize-r9tvz\n",
+      "> 2020-07-29 12:47:01,761 [info] run executed, status=completed\n",
+      "final state: succeeded\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<style>\n",
+       ".dictlist {\n",
+       "  background-color: #b3edff;\n",
+       "  text-align: center;\n",
+       "  margin: 4px;\n",
+       "  border-radius: 3px; padding: 0px 3px 1px 3px; display: inline-block;}\n",
+       ".artifact {\n",
+       "  cursor: pointer;\n",
+       "  background-color: #ffe6cc;\n",
+       "  text-align: left;\n",
+       "  margin: 4px; border-radius: 3px; padding: 0px 3px 1px 3px; display: inline-block;\n",
+       "}\n",
+       "div.block.hidden {\n",
+       "  display: none;\n",
+       "}\n",
+       ".clickable {\n",
+       "  cursor: pointer;\n",
+       "}\n",
+       ".ellipsis {\n",
+       "  display: inline-block;\n",
+       "  max-width: 60px;\n",
+       "  white-space: nowrap;\n",
+       "  overflow: hidden;\n",
+       "  text-overflow: ellipsis;\n",
+       "}\n",
+       ".master-wrapper {\n",
+       "  display: flex;\n",
+       "  flex-flow: row nowrap;\n",
+       "  justify-content: flex-start;\n",
+       "  align-items: stretch;\n",
+       "}\n",
+       ".master-tbl {\n",
+       "  flex: 3\n",
+       "}\n",
+       ".master-wrapper > div {\n",
+       "  margin: 4px;\n",
+       "  padding: 10px;\n",
+       "}\n",
+       "iframe.fileview {\n",
+       "  border: 0 none;\n",
+       "  height: 100%;\n",
+       "  width: 100%;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       ".pane-header-title {\n",
+       "  width: 80%;\n",
+       "  font-weight: 500;\n",
+       "}\n",
+       ".pane-header {\n",
+       "  line-height: 1;\n",
+       "  background-color: #ffe6cc;\n",
+       "  padding: 3px;\n",
+       "}\n",
+       ".pane-header .close {\n",
+       "  font-size: 20px;\n",
+       "  font-weight: 700;\n",
+       "  float: right;\n",
+       "  margin-top: -5px;\n",
+       "}\n",
+       ".master-wrapper .right-pane {\n",
+       "  border: 1px inset silver;\n",
+       "  width: 40%;\n",
+       "  min-height: 300px;\n",
+       "  flex: 3\n",
+       "  min-width: 500px;\n",
+       "}\n",
+       ".master-wrapper * {\n",
+       "  box-sizing: border-box;\n",
+       "}\n",
+       "</style><script>\n",
+       "function copyToClipboard(fld) {\n",
+       "    if (document.queryCommandSupported && document.queryCommandSupported('copy')) {\n",
+       "        var textarea = document.createElement('textarea');\n",
+       "        textarea.textContent = fld.innerHTML;\n",
+       "        textarea.style.position = 'fixed';\n",
+       "        document.body.appendChild(textarea);\n",
+       "        textarea.select();\n",
+       "\n",
+       "        try {\n",
+       "            return document.execCommand('copy'); // Security exception may be thrown by some browsers.\n",
+       "        } catch (ex) {\n",
+       "\n",
+       "        } finally {\n",
+       "            document.body.removeChild(textarea);\n",
+       "        }\n",
+       "    }\n",
+       "}\n",
+       "function expandPanel(el) {\n",
+       "  const panelName = \"#\" + el.getAttribute('paneName');\n",
+       "  console.log(el.title);\n",
+       "\n",
+       "  document.querySelector(panelName + \"-title\").innerHTML = el.title\n",
+       "  iframe = document.querySelector(panelName + \"-body\");\n",
+       "\n",
+       "  const tblcss = `<style> body { font-family: Arial, Helvetica, sans-serif;}\n",
+       "    #csv { margin-bottom: 15px; }\n",
+       "    #csv table { border-collapse: collapse;}\n",
+       "    #csv table td { padding: 4px 8px; border: 1px solid silver;} </style>`;\n",
+       "\n",
+       "  function csvToHtmlTable(str) {\n",
+       "    return '<div id=\"csv\"><table><tr><td>' +  str.replace(/[\\n\\r]+$/g, '').replace(/[\\n\\r]+/g, '</td></tr><tr><td>')\n",
+       "      .replace(/,/g, '</td><td>') + '</td></tr></table></div>';\n",
+       "  }\n",
+       "\n",
+       "  function reqListener () {\n",
+       "    if (el.title.endsWith(\".csv\")) {\n",
+       "      iframe.setAttribute(\"srcdoc\", tblcss + csvToHtmlTable(this.responseText));\n",
+       "    } else {\n",
+       "      iframe.setAttribute(\"srcdoc\", this.responseText);\n",
+       "    }\n",
+       "    console.log(this.responseText);\n",
+       "  }\n",
+       "\n",
+       "  const oReq = new XMLHttpRequest();\n",
+       "  oReq.addEventListener(\"load\", reqListener);\n",
+       "  oReq.open(\"GET\", el.title);\n",
+       "  oReq.send();\n",
+       "\n",
+       "\n",
+       "  //iframe.src = el.title;\n",
+       "  const resultPane = document.querySelector(panelName + \"-pane\");\n",
+       "  if (resultPane.classList.contains(\"hidden\")) {\n",
+       "    resultPane.classList.remove(\"hidden\");\n",
+       "  }\n",
+       "}\n",
+       "function closePanel(el) {\n",
+       "  const panelName = \"#\" + el.getAttribute('paneName')\n",
+       "  const resultPane = document.querySelector(panelName + \"-pane\");\n",
+       "  if (!resultPane.classList.contains(\"hidden\")) {\n",
+       "    resultPane.classList.add(\"hidden\");\n",
+       "  }\n",
+       "}\n",
+       "\n",
+       "</script>\n",
+       "<div class=\"master-wrapper\">\n",
+       "  <div class=\"block master-tbl\"><div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>project</th>\n",
+       "      <th>uid</th>\n",
+       "      <th>iter</th>\n",
+       "      <th>start</th>\n",
+       "      <th>state</th>\n",
+       "      <th>name</th>\n",
+       "      <th>labels</th>\n",
+       "      <th>inputs</th>\n",
+       "      <th>parameters</th>\n",
+       "      <th>results</th>\n",
+       "      <th>artifacts</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>gitops-project</td>\n",
+       "      <td><div title=\"301ab10adbf34adb898f0751c7f0f0b4\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/301ab10adbf34adb898f0751c7f0f0b4/info\" target=\"_blank\" >...c7f0f0b4</a></div></td>\n",
+       "      <td>0</td>\n",
+       "      <td>Jul 29 12:46:57</td>\n",
+       "      <td>completed</td>\n",
+       "      <td>describe-summarize</td>\n",
+       "      <td><div class=\"dictlist\">v3io_user=admin</div><div class=\"dictlist\">kind=job</div><div class=\"dictlist\">owner=admin</div><div class=\"dictlist\">host=describe-summarize-r9tvz</div></td>\n",
+       "      <td><div title=\"store://gitops-project/iris_gen_iris_dataset#3e340d3561ca402c91e9bb09b1631dd4\">table</div></td>\n",
+       "      <td><div class=\"dictlist\">label_column=label</div></td>\n",
+       "      <td></td>\n",
+       "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result13fa91d5\" title=\"/files/demo-github-actions/plots/hist.html\">histograms</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result13fa91d5\" title=\"/files/demo-github-actions/plots/violin.html\">violin</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result13fa91d5\" title=\"/files/demo-github-actions/plots/imbalance.html\">imbalance</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result13fa91d5\" title=\"/files/demo-github-actions/plots/imbalance-weights-vec.csv\">imbalance-weights-vec</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result13fa91d5\" title=\"/files/demo-github-actions/plots/correlation-matrix.csv\">correlation-matrix</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result13fa91d5\" title=\"/files/demo-github-actions/plots/corr.html\">correlation</div></td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div></div>\n",
+       "  <div id=\"result13fa91d5-pane\" class=\"right-pane block hidden\">\n",
+       "    <div class=\"pane-header\">\n",
+       "      <span id=\"result13fa91d5-title\" class=\"pane-header-title\">Title</span>\n",
+       "      <span onclick=\"closePanel(this)\" paneName=\"result13fa91d5\" class=\"close clickable\">&times;</span>\n",
+       "    </div>\n",
+       "    <iframe class=\"fileview\" id=\"result13fa91d5-body\"></iframe>\n",
+       "  </div>\n",
+       "</div>\n"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "to track results use .show() or .logs() or in CLI: \n",
+      "!mlrun get run 301ab10adbf34adb898f0751c7f0f0b4 --project gitops-project , !mlrun logs 301ab10adbf34adb898f0751c7f0f0b4 --project gitops-project\n",
+      "> 2020-07-29 12:47:11,671 [info] run executed, status=completed\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<mlrun.model.RunObject at 0x7f5d74f87d10>"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "skproj.func('describe').apply(mount_v3io()).run(params={'label_column': 'label'}, \n",
+    "                                                inputs={\"table\": gen.outputs['iris_dataset']}, \n",
+    "                                                artifact_path=artifact_path)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -436,7 +728,7 @@
     {
      "data": {
       "text/plain": [
-       "<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f36e75d45c0>"
+       "<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f5d74fd8d50>"
       ]
      },
      "execution_count": 9,
@@ -476,7 +768,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -514,7 +806,6 @@
     ")\n",
     "def kfpipeline():\n",
     "    \n",
-    "    \n",
     "    # run the ingestion function with the new image and params\n",
     "    ingest = funcs['gen-iris'].as_step(\n",
     "        name=\"get-data\",\n",
@@ -522,6 +813,12 @@
     "        params={'format': 'pq'},\n",
     "        outputs=[DATASET])\n",
     "\n",
+    "    # analyze our dataset\n",
+    "    describe = funcs[\"describe\"].as_step(\n",
+    "        name=\"summary\",\n",
+    "        params={\"label_column\": LABELS},\n",
+    "        inputs={\"table\": ingest.outputs[DATASET]})\n",
+    "    \n",
     "    # train with hyper-paremeters\n",
     "    train = funcs[\"train\"].as_step(\n",
     "        name=\"train\",\n",
@@ -555,7 +852,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -572,11 +869,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
-    "skproj.artifact_path = 'v3io:///users/admin/pipe/{{workflow.uid}}'\n",
+    "skproj.artifact_path = 'v3io:///users/{{run.user}}/pipe/{{workflow.uid}}'\n",
     "skproj.save()"
    ]
   },
@@ -593,7 +890,7 @@
     "The workflow ID is returned and can be used to track the progress or you can use the hyperlinks\n",
     "\n",
     "> Note: The same command can be issued through CLI commands:<br>\n",
-    "    `mlrun project my-proj/ -r main -p \"v3io:///users/admin/mlrun/kfp/{{workflow.uid}}/\"`\n",
+    "    `mlrun project my-proj/ -r main -p \"v3io:///users/{{run.user}}/mlrun/kfp/{{workflow.uid}}/\"`\n",
     "\n",
     "The `dirty` flag allow us to run a project with uncommited changes (when the notebook is in the same git dir it will always be dirty)<br>\n",
     "The `watch` flag will wait for the pipeline to complete"
@@ -601,7 +898,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -611,13 +908,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
-       "Experiment link <a href=\"https://dashboard.default-tenant.app.yh55.iguazio-cd2.com/pipelines/#/experiments/details/e359f6d0-4221-40fe-b2e7-da841904ea9d\" target=\"_blank\" >here</a>"
+       "Experiment link <a href=\"https://dashboard.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/pipelines/#/experiments/details/3070cb06-629f-4ff2-9123-81a0d9751d83\" target=\"_blank\" >here</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -629,7 +926,7 @@
     {
      "data": {
       "text/html": [
-       "Run link <a href=\"https://dashboard.default-tenant.app.yh55.iguazio-cd2.com/pipelines/#/runs/details/7a60c6c1-b765-4702-919e-29ceec31dc20\" target=\"_blank\" >here</a>"
+       "Run link <a href=\"https://dashboard.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/pipelines/#/runs/details/8f462295-2154-428a-b861-4ec8be504832\" target=\"_blank\" >here</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -642,14 +939,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[mlrun] 2020-06-10 12:29:40,557 Pipeline run id=7a60c6c1-b765-4702-919e-29ceec31dc20, check UI or DB for progress\n",
-      "[mlrun] 2020-06-10 12:29:40,558 waiting for pipeline run completion\n"
+      "> 2020-07-29 13:04:18,155 [info] Pipeline run id=8f462295-2154-428a-b861-4ec8be504832, check UI or DB for progress\n",
+      "> 2020-07-29 13:04:18,156 [info] waiting for pipeline run completion\n"
      ]
     },
     {
      "data": {
       "text/html": [
-       "<h2>Run Results</h2>Workflow 7a60c6c1-b765-4702-919e-29ceec31dc20 finished, status=Succeeded<br>click the hyper links below to see detailed results<br><table border=\"1\" class=\"dataframe\">\n",
+       "<h2>Run Results</h2>Workflow 8f462295-2154-428a-b861-4ec8be504832 finished, status=Succeeded<br>click the hyper links below to see detailed results<br><table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th>uid</th>\n",
@@ -662,36 +959,44 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <td><div title=\"3425cf3fa62a4ecba42075369005fbe5\"><a href=\"https://mlrun-ui.default-tenant.app.yh55.iguazio-cd2.com/projects/gitops-project/jobs/3425cf3fa62a4ecba42075369005fbe5/info\" target=\"_blank\" >...9005fbe5</a></div></td>\n",
-       "      <td>Jun 10 12:30:29</td>\n",
+       "      <td><div title=\"6f2713272e674d748a481d4a7a29c0aa\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/6f2713272e674d748a481d4a7a29c0aa/info\" target=\"_blank\" >...7a29c0aa</a></div></td>\n",
+       "      <td>Jul 29 13:05:03</td>\n",
        "      <td>completed</td>\n",
        "      <td>model-tester</td>\n",
-       "      <td><div class=\"dictlist\">total_tests=15</div><div class=\"dictlist\">errors=0</div><div class=\"dictlist\">match=14</div><div class=\"dictlist\">avg_latency=12694</div><div class=\"dictlist\">min_latency=11730</div><div class=\"dictlist\">max_latency=16535</div></td>\n",
-       "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/7a60c6c1-b765-4702-919e-29ceec31dc20/latency.html\">latency</div></td>\n",
+       "      <td><div class=\"dictlist\">total_tests=15</div><div class=\"dictlist\">errors=0</div><div class=\"dictlist\">match=14</div><div class=\"dictlist\">avg_latency=11446</div><div class=\"dictlist\">min_latency=11047</div><div class=\"dictlist\">max_latency=12131</div></td>\n",
+       "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/latency.html\">latency</div></td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <td><div title=\"4ba1414e689a4a7cb86a7d1b9c77295f\"><a href=\"https://mlrun-ui.default-tenant.app.yh55.iguazio-cd2.com/projects/gitops-project/jobs/4ba1414e689a4a7cb86a7d1b9c77295f/info\" target=\"_blank\" >...9c77295f</a></div></td>\n",
-       "      <td>Jun 10 12:30:16</td>\n",
+       "      <td><div title=\"655390e8e25d4c4a99356d4579bfc3ac\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/655390e8e25d4c4a99356d4579bfc3ac/info\" target=\"_blank\" >...79bfc3ac</a></div></td>\n",
+       "      <td>Jul 29 13:04:54</td>\n",
        "      <td>completed</td>\n",
        "      <td>test</td>\n",
-       "      <td><div class=\"dictlist\">rocauc=0.46440904774238106</div><div class=\"dictlist\">avg_precscore=0.40055555555555555</div><div class=\"dictlist\">accuracy=0.9333333333333333</div><div class=\"dictlist\">f1_score=0.9333333333333333</div></td>\n",
-       "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/7a60c6c1-b765-4702-919e-29ceec31dc20/plots/roc.html\">roc</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/7a60c6c1-b765-4702-919e-29ceec31dc20/plots//tmp/tmpyjy7b5ys-confusion.html\">confusion</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/7a60c6c1-b765-4702-919e-29ceec31dc20/plots/featimp.html\">featimp</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/7a60c6c1-b765-4702-919e-29ceec31dc20/featimp-tbl.csv\">featimp-tbl</div><div title=\"v3io:///users/admin/pipe/7a60c6c1-b765-4702-919e-29ceec31dc20/test_set_preds.parquet\">test_set_preds</div></td>\n",
+       "      <td><div class=\"dictlist\">accuracy=0.9333333333333333</div><div class=\"dictlist\">test-error=0.06666666666666667</div><div class=\"dictlist\">auc-micro=0.9655555555555556</div><div class=\"dictlist\">auc-weighted=0.9888888888888889</div><div class=\"dictlist\">f1-score=0.9137254901960784</div><div class=\"dictlist\">precision_score=0.8888888888888888</div><div class=\"dictlist\">recall_score=0.9629629629629629</div></td>\n",
+       "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/confusion-matrix.html\">confusion-matrix</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/feature-importances.html\">feature-importances</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/precision-recall-multiclass.html\">precision-recall-multiclass</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/roc-multiclass.html\">roc-multiclass</div><div title=\"v3io:///users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/test_set_preds.parquet\">test_set_preds</div></td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td><div title=\"05c0d074d5074627996073d91ecf6eb3\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/05c0d074d5074627996073d91ecf6eb3/info\" target=\"_blank\" >...1ecf6eb3</a></div></td>\n",
+       "      <td>Jul 29 13:04:37</td>\n",
+       "      <td>completed</td>\n",
+       "      <td>summary</td>\n",
+       "      <td></td>\n",
+       "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/hist.html\">histograms</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/violin.html\">violin</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/imbalance.html\">imbalance</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/imbalance-weights-vec.csv\">imbalance-weights-vec</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/correlation-matrix.csv\">correlation-matrix</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/corr.html\">correlation</div></td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <td><div title=\"a15f463d180e432eb5cf9980a52ec175\"><a href=\"https://mlrun-ui.default-tenant.app.yh55.iguazio-cd2.com/projects/gitops-project/jobs/a15f463d180e432eb5cf9980a52ec175/info\" target=\"_blank\" >...a52ec175</a></div></td>\n",
-       "      <td>Jun 10 12:30:00</td>\n",
+       "      <td><div title=\"5ffee3d96618489a89a7b37d60321e1e\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/5ffee3d96618489a89a7b37d60321e1e/info\" target=\"_blank\" >...60321e1e</a></div></td>\n",
+       "      <td>Jul 29 13:04:36</td>\n",
        "      <td>completed</td>\n",
        "      <td>train</td>\n",
-       "      <td><div class=\"dictlist\">best_iteration=1</div><div class=\"dictlist\">rocauc=0.9945117845117846</div><div class=\"dictlist\">accuracy=0.9705882352941176</div><div class=\"dictlist\">f1_score=0.9705882352941176</div></td>\n",
-       "      <td><div title=\"v3io:///users/admin/pipe/7a60c6c1-b765-4702-919e-29ceec31dc20/1/test_set.parquet\">test_set</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/7a60c6c1-b765-4702-919e-29ceec31dc20/1/plots/train/roc.html\">roc</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/7a60c6c1-b765-4702-919e-29ceec31dc20/1/plots/train/confusion.html\">confusion</div><div title=\"v3io:///users/admin/pipe/7a60c6c1-b765-4702-919e-29ceec31dc20/1/models/\">model</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/7a60c6c1-b765-4702-919e-29ceec31dc20/iteration_results.csv\">iteration_results</div></td>\n",
+       "      <td><div class=\"dictlist\">best_iteration=1</div><div class=\"dictlist\">accuracy=0.9705882352941176</div><div class=\"dictlist\">test-error=0.029411764705882353</div><div class=\"dictlist\">auc-micro=0.9969723183391004</div><div class=\"dictlist\">auc-weighted=0.9949732620320856</div><div class=\"dictlist\">f1-score=0.9679633867276888</div><div class=\"dictlist\">precision_score=0.9666666666666667</div><div class=\"dictlist\">recall_score=0.9722222222222222</div></td>\n",
+       "      <td><div title=\"v3io:///users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/data/1/test_set.parquet\">test_set</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/model/plots/1/confusion-matrix.html\">confusion-matrix</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/model/plots/1/feature-importances.html\">feature-importances</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/model/plots/1/precision-recall-multiclass.html\">precision-recall-multiclass</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/model/plots/1/roc-multiclass.html\">roc-multiclass</div><div title=\"v3io:///users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/model/1/\">model</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/iteration_results.csv\">iteration_results</div></td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <td><div title=\"b0e33584675d4835a0feed331be90d31\"><a href=\"https://mlrun-ui.default-tenant.app.yh55.iguazio-cd2.com/projects/gitops-project/jobs/b0e33584675d4835a0feed331be90d31/info\" target=\"_blank\" >...1be90d31</a></div></td>\n",
-       "      <td>Jun 10 12:29:50</td>\n",
+       "      <td><div title=\"5bb988fa14d94b839c31db1eb05fce96\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/5bb988fa14d94b839c31db1eb05fce96/info\" target=\"_blank\" >...b05fce96</a></div></td>\n",
+       "      <td>Jul 29 13:04:26</td>\n",
        "      <td>completed</td>\n",
        "      <td>get-data</td>\n",
        "      <td></td>\n",
-       "      <td><div title=\"v3io:///users/admin/pipe/7a60c6c1-b765-4702-919e-29ceec31dc20/iris_dataset.parquet\">iris_dataset</div></td>\n",
+       "      <td><div title=\"v3io:///users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/iris_dataset.parquet\">iris_dataset</div></td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>"
@@ -734,7 +1039,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.6"
   }
  },
  "nbformat": 4,
diff --git a/project.yaml b/project.yaml
index 2702742..14cf899 100644
--- a/project.yaml
+++ b/project.yaml
@@ -19,15 +19,17 @@ functions:
           doc: ''
           parameters:
           - name: context
+            default: ''
           - name: format
             default: csv
-          outputs: []
+          outputs:
+          - default: ''
           lineno: 11
       description: ''
       build:
         functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG9zCmZyb20gc2tsZWFybi5kYXRhc2V0cyBpbXBvcnQgbG9hZF9pcmlzCmZyb20gc2tsZWFybi5tb2RlbF9zZWxlY3Rpb24gaW1wb3J0IHRyYWluX3Rlc3Rfc3BsaXQKaW1wb3J0IG51bXB5IGFzIG5wCmZyb20gc2tsZWFybi5tZXRyaWNzIGltcG9ydCBhY2N1cmFjeV9zY29yZQpmcm9tIG1scnVuLmFydGlmYWN0cyBpbXBvcnQgVGFibGVBcnRpZmFjdCwgUGxvdEFydGlmYWN0CmltcG9ydCBwYW5kYXMgYXMgcGQKCmRlZiBpcmlzX2dlbmVyYXRvcihjb250ZXh0LCBmb3JtYXQ9J2NzdicpOgogICAgaXJpcyA9IGxvYWRfaXJpcygpCiAgICBpcmlzX2RhdGFzZXQgPSBwZC5EYXRhRnJhbWUoZGF0YT1pcmlzLmRhdGEsIGNvbHVtbnM9aXJpcy5mZWF0dXJlX25hbWVzKQogICAgaXJpc19sYWJlbHMgPSBwZC5EYXRhRnJhbWUoZGF0YT1pcmlzLnRhcmdldCwgY29sdW1ucz1bJ2xhYmVsJ10pCiAgICBpcmlzX2RhdGFzZXQgPSBwZC5jb25jYXQoW2lyaXNfZGF0YXNldCwgaXJpc19sYWJlbHNdLCBheGlzPTEpCiAgICAKICAgIGNvbnRleHQubG9nZ2VyLmluZm8oJ3NhdmluZyBpcmlzIGRhdGFmcmFtZSB0byB7fScuZm9ybWF0KGNvbnRleHQuYXJ0aWZhY3RfcGF0aCkpCiAgICBjb250ZXh0LmxvZ19kYXRhc2V0KCdpcmlzX2RhdGFzZXQnLCBkZj1pcmlzX2RhdGFzZXQsIGZvcm1hdD1mb3JtYXQsIGluZGV4PUZhbHNlKQoK
         commands: []
-        code_origin: https://github.com/mlrun/demo-github-actions.git#3395573d8f1c7ad4725314afb3d067751bbea465:gen_iris.ipynb
+        code_origin: https://github.com/mlrun/demo-github-actions.git#0e717588b1354d3d60cd96ba5c352d71aace0552
 - url: hub://sklearn_classifier
   name: train
 - url: hub://test_classifier
@@ -36,8 +38,10 @@ functions:
   name: serving
 - url: hub://model_server_tester
   name: live_tester
+- url: hub://describe
+  name: describe
 workflows:
 - name: main
   path: workflow.py
 artifacts: []
-artifact_path: v3io:///users/admin/pipe/{{workflow.uid}}
+artifact_path: v3io:///users/{{run.user}}/pipe/{{workflow.uid}}
diff --git a/workflow.py b/workflow.py
index 91606c5..23961c8 100644
--- a/workflow.py
+++ b/workflow.py
@@ -23,7 +23,6 @@ def init_functions(functions: dict, project=None, secrets=None):
 )
 def kfpipeline():
     
-    
     # run the ingestion function with the new image and params
     ingest = funcs['gen-iris'].as_step(
         name="get-data",
@@ -31,6 +30,12 @@ def kfpipeline():
         params={'format': 'pq'},
         outputs=[DATASET])
 
+    # analyze our dataset
+    describe = funcs["describe"].as_step(
+        name="summary",
+        params={"label_column": LABELS},
+        inputs={"table": ingest.outputs[DATASET]})
+    
     # train with hyper-paremeters
     train = funcs["train"].as_step(
         name="train",

uid
...9005fbe5	Jun 10 12:30:29	...7a29c0aa	Jul 29 13:05:03	completed	model-tester	total_tests=15 errors=0 match=14 avg_latency=12694 min_latency=11730 max_latency=16535	latency	total_tests=15 errors=0 match=14 avg_latency=11446 min_latency=11047 max_latency=12131	latency
...9c77295f	Jun 10 12:30:16	...79bfc3ac	Jul 29 13:04:54	completed	test	rocauc=0.46440904774238106 avg_precscore=0.40055555555555555 accuracy=0.9333333333333333 f1_score=0.9333333333333333	roc confusion featimp featimp-tbl test_set_preds	accuracy=0.9333333333333333 test-error=0.06666666666666667 auc-micro=0.9655555555555556 auc-weighted=0.9888888888888889 f1-score=0.9137254901960784 precision_score=0.8888888888888888 recall_score=0.9629629629629629	confusion-matrix feature-importances precision-recall-multiclass roc-multiclass test_set_preds
...1ecf6eb3	Jul 29 13:04:37	completed	summary		histograms violin imbalance imbalance-weights-vec correlation-matrix correlation
...a52ec175	Jun 10 12:30:00	...60321e1e	Jul 29 13:04:36	completed	train	best_iteration=1 rocauc=0.9945117845117846 accuracy=0.9705882352941176 f1_score=0.9705882352941176	test_set roc confusion model iteration_results	best_iteration=1 accuracy=0.9705882352941176 test-error=0.029411764705882353 auc-micro=0.9969723183391004 auc-weighted=0.9949732620320856 f1-score=0.9679633867276888 precision_score=0.9666666666666667 recall_score=0.9722222222222222	test_set confusion-matrix feature-importances precision-recall-multiclass roc-multiclass model iteration_results
...1be90d31	Jun 10 12:29:50	...b05fce96	Jul 29 13:04:26	completed	get-data		iris_dataset	iris_dataset