From ac2d6c0aeed28a17ed334647b18914d657d5b902 Mon Sep 17 00:00:00 2001
From: Biel Stela <biel.stela@gmail.com>
Date: Wed, 11 Sep 2024 15:30:31 +0200
Subject: [PATCH] removes all outputs from notebooks

---
 science/notebooks/merge_entrega_roberto.ipynb | 168 ++++--------------
 1 file changed, 35 insertions(+), 133 deletions(-)

diff --git a/science/notebooks/merge_entrega_roberto.ipynb b/science/notebooks/merge_entrega_roberto.ipynb
index cc3cf618..f1ea486d 100644
--- a/science/notebooks/merge_entrega_roberto.ipynb
+++ b/science/notebooks/merge_entrega_roberto.ipynb
@@ -2,162 +2,69 @@
  "cells": [
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-09T08:30:08.332078Z",
-     "start_time": "2024-09-09T08:30:08.112470Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
-    "import polars as pl\n",
     "from pathlib import Path\n",
-    "import h3ronpy.polars"
-   ],
-   "outputs": [],
-   "execution_count": 1
+    "\n",
+    "import polars as pl"
+   ]
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-09T08:37:46.700129Z",
-     "start_time": "2024-09-09T08:37:46.697392Z"
-    }
-   },
-   "source": "csvs = list(Path(\"../data/raw/ENTREGA UNO MUESTRAS HEXA CSV 18072024\").glob(\"*.CSV\"))",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
-   "execution_count": 39
+   "source": [
+    "csvs = list(Path(\"../data/raw/ENTREGA UNO MUESTRAS HEXA CSV 18072024\").glob(\"*.CSV\"))"
+   ]
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-09T08:37:50.987739Z",
-     "start_time": "2024-09-09T08:37:50.852450Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "dfs = [pl.read_csv(f, separator=\";\", decimal_comma=True) for f in csvs]\n",
     "df = pl.concat(dfs, how=\"align\", rechunk=True)\n",
     "df.head()"
-   ],
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "shape: (5, 9)\n",
-       "┌──────────────┬───────┬────────────┬────────────┬───┬───────────┬──────────┬───────────┬──────────┐\n",
-       "│ GRID_ID      ┆ FRECF ┆ AMIN       ┆ AMAX       ┆ … ┆ TREEPERCT ┆ PMIN     ┆ PMAX      ┆ PMEAN    │\n",
-       "│ ---          ┆ ---   ┆ ---        ┆ ---        ┆   ┆ ---       ┆ ---      ┆ ---       ┆ ---      │\n",
-       "│ str          ┆ i64   ┆ f64        ┆ f64        ┆   ┆ f64       ┆ f64      ┆ f64       ┆ f64      │\n",
-       "╞══════════════╪═══════╪════════════╪════════════╪═══╪═══════════╪══════════╪═══════════╪══════════╡\n",
-       "│ 865f00007fff ┆ null  ┆ 114.678246 ┆ 209.731842 ┆ … ┆ 100.0     ┆ 0.058348 ┆ 7.531753  ┆ 1.69093  │\n",
-       "│ fff          ┆       ┆            ┆            ┆   ┆           ┆          ┆           ┆          │\n",
-       "│ 865f0000ffff ┆ null  ┆ 127.660339 ┆ 705.040772 ┆ … ┆ 99.985832 ┆ 0.148311 ┆ 31.043549 ┆ 6.346733 │\n",
-       "│ fff          ┆       ┆            ┆            ┆   ┆           ┆          ┆           ┆          │\n",
-       "│ 865f00017fff ┆ null  ┆ 117.937508 ┆ 175.799759 ┆ … ┆ 100.0     ┆ 0.028819 ┆ 2.731335  ┆ 1.063382 │\n",
-       "│ fff          ┆       ┆            ┆            ┆   ┆           ┆          ┆           ┆          │\n",
-       "│ 865f0001ffff ┆ null  ┆ 123.765045 ┆ 193.208282 ┆ … ┆ 100.0     ┆ 0.047981 ┆ 4.67722   ┆ 1.557258 │\n",
-       "│ fff          ┆       ┆            ┆            ┆   ┆           ┆          ┆           ┆          │\n",
-       "│ 865f00027fff ┆ null  ┆ 111.118088 ┆ 277.398895 ┆ … ┆ 100.0     ┆ 0.144035 ┆ 12.342467 ┆ 2.193349 │\n",
-       "│ fff          ┆       ┆            ┆            ┆   ┆           ┆          ┆           ┆          │\n",
-       "└──────────────┴───────┴────────────┴────────────┴───┴───────────┴──────────┴───────────┴──────────┘"
-      ],
-      "text/html": [
-       "<div><style>\n",
-       ".dataframe > thead > tr,\n",
-       ".dataframe > tbody > tr {\n",
-       "  text-align: right;\n",
-       "  white-space: pre-wrap;\n",
-       "}\n",
-       "</style>\n",
-       "<small>shape: (5, 9)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>GRID_ID</th><th>FRECF</th><th>AMIN</th><th>AMAX</th><th>AMEAN</th><th>TREEPERCT</th><th>PMIN</th><th>PMAX</th><th>PMEAN</th></tr><tr><td>str</td><td>i64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>&quot;865f00007ffffff&quot;</td><td>null</td><td>114.678246</td><td>209.731842</td><td>149.513126</td><td>100.0</td><td>0.058348</td><td>7.531753</td><td>1.69093</td></tr><tr><td>&quot;865f0000fffffff&quot;</td><td>null</td><td>127.660339</td><td>705.040772</td><td>245.461013</td><td>99.985832</td><td>0.148311</td><td>31.043549</td><td>6.346733</td></tr><tr><td>&quot;865f00017ffffff&quot;</td><td>null</td><td>117.937508</td><td>175.799759</td><td>145.636984</td><td>100.0</td><td>0.028819</td><td>2.731335</td><td>1.063382</td></tr><tr><td>&quot;865f0001fffffff&quot;</td><td>null</td><td>123.765045</td><td>193.208282</td><td>156.474098</td><td>100.0</td><td>0.047981</td><td>4.67722</td><td>1.557258</td></tr><tr><td>&quot;865f00027ffffff&quot;</td><td>null</td><td>111.118088</td><td>277.398895</td><td>146.417323</td><td>100.0</td><td>0.144035</td><td>12.342467</td><td>2.193349</td></tr></tbody></table></div>"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 42
+   ]
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-09T08:37:51.695475Z",
-     "start_time": "2024-09-09T08:37:51.673626Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "df = df.with_columns(pl.col(\"GRID_ID\").h3.cells_parse())\n",
     "df = df.drop(\"GRID_ID\")"
-   ],
-   "outputs": [],
-   "execution_count": 43
+   ]
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-09T08:37:52.296769Z",
-     "start_time": "2024-09-09T08:37:52.286054Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "df.select(pl.col(\"cell\").h3.cells_resolution()).unique()"
-   ],
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "shape: (1, 1)\n",
-       "┌────────────┐\n",
-       "│ resolution │\n",
-       "│ ---        │\n",
-       "│ u8         │\n",
-       "╞════════════╡\n",
-       "│ 6          │\n",
-       "└────────────┘"
-      ],
-      "text/html": [
-       "<div><style>\n",
-       ".dataframe > thead > tr,\n",
-       ".dataframe > tbody > tr {\n",
-       "  text-align: right;\n",
-       "  white-space: pre-wrap;\n",
-       "}\n",
-       "</style>\n",
-       "<small>shape: (1, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>resolution</th></tr><tr><td>u8</td></tr></thead><tbody><tr><td>6</td></tr></tbody></table></div>"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 44
+   ]
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-09T08:37:53.457159Z",
-     "start_time": "2024-09-09T08:37:53.382329Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "CELLS_RES = 6\n",
     "OVERVIEW_LEVEL = CELLS_RES - 5\n",
     "\n",
     "df = df.with_columns(\n",
     "    pl.col(\"cell\").h3.change_resolution(OVERVIEW_LEVEL).h3.cells_to_string().alias(\"tile_id\"),  # type: ignore[attr-defined]\n",
-    "    pl.col(\"cell\").h3.cells_to_string()\n",
+    "    pl.col(\"cell\").h3.cells_to_string(),\n",
     ")\n",
     "partition_dfs = df.partition_by([\"tile_id\"], as_dict=True, include_key=False)"
-   ],
-   "outputs": [],
-   "execution_count": 45
+   ]
   },
   {
    "cell_type": "markdown",
@@ -168,12 +75,9 @@
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-09T08:40:16.950037Z",
-     "start_time": "2024-09-09T08:40:16.897734Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "seen_tiles = set()\n",
     "n_cells = 0\n",
@@ -184,18 +88,16 @@
     "    tile_id = tile_group[0]\n",
     "    filename = Path(\"../data/processed/grid/1\") / (tile_id + \".arrow\")\n",
     "    if tile_id in seen_tiles:\n",
-    "        tile_df = pl.concat(\n",
-    "            [pl.read_ipc(filename), tile_df], how=\"vertical_relaxed\"\n",
-    "        ).unique(subset=[\"cell\"])\n",
+    "        tile_df = pl.concat([pl.read_ipc(filename), tile_df], how=\"vertical_relaxed\").unique(\n",
+    "            subset=[\"cell\"]\n",
+    "        )\n",
     "        tile_df.write_parquet(filename)\n",
     "        n_cells += len(tile_df)\n",
     "    else:\n",
     "        seen_tiles.add(tile_id)\n",
     "        tile_df.write_ipc(filename)\n",
     "        n_cells += len(tile_df)"
-   ],
-   "outputs": [],
-   "execution_count": 48
+   ]
   },
   {
    "cell_type": "markdown",