Skip to content

Commit

Permalink
removes all outputs from notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
BielStela committed Sep 11, 2024
1 parent e1f7c41 commit ac2d6c0
Showing 1 changed file with 35 additions and 133 deletions.
168 changes: 35 additions & 133 deletions science/notebooks/merge_entrega_roberto.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,162 +2,69 @@
"cells": [
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-09T08:30:08.332078Z",
"start_time": "2024-09-09T08:30:08.112470Z"
}
},
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import polars as pl\n",
"from pathlib import Path\n",
"import h3ronpy.polars"
],
"outputs": [],
"execution_count": 1
"\n",
"import polars as pl"
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-09T08:37:46.700129Z",
"start_time": "2024-09-09T08:37:46.697392Z"
}
},
"source": "csvs = list(Path(\"../data/raw/ENTREGA UNO MUESTRAS HEXA CSV 18072024\").glob(\"*.CSV\"))",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 39
"source": [
"csvs = list(Path(\"../data/raw/ENTREGA UNO MUESTRAS HEXA CSV 18072024\").glob(\"*.CSV\"))"
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-09T08:37:50.987739Z",
"start_time": "2024-09-09T08:37:50.852450Z"
}
},
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfs = [pl.read_csv(f, separator=\";\", decimal_comma=True) for f in csvs]\n",
"df = pl.concat(dfs, how=\"align\", rechunk=True)\n",
"df.head()"
],
"outputs": [
{
"data": {
"text/plain": [
"shape: (5, 9)\n",
"┌──────────────┬───────┬────────────┬────────────┬───┬───────────┬──────────┬───────────┬──────────┐\n",
"│ GRID_ID ┆ FRECF ┆ AMIN ┆ AMAX ┆ … ┆ TREEPERCT ┆ PMIN ┆ PMAX ┆ PMEAN │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ i64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞══════════════╪═══════╪════════════╪════════════╪═══╪═══════════╪══════════╪═══════════╪══════════╡\n",
"│ 865f00007fff ┆ null ┆ 114.678246 ┆ 209.731842 ┆ … ┆ 100.0 ┆ 0.058348 ┆ 7.531753 ┆ 1.69093 │\n",
"│ fff ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
"│ 865f0000ffff ┆ null ┆ 127.660339 ┆ 705.040772 ┆ … ┆ 99.985832 ┆ 0.148311 ┆ 31.043549 ┆ 6.346733 │\n",
"│ fff ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
"│ 865f00017fff ┆ null ┆ 117.937508 ┆ 175.799759 ┆ … ┆ 100.0 ┆ 0.028819 ┆ 2.731335 ┆ 1.063382 │\n",
"│ fff ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
"│ 865f0001ffff ┆ null ┆ 123.765045 ┆ 193.208282 ┆ … ┆ 100.0 ┆ 0.047981 ┆ 4.67722 ┆ 1.557258 │\n",
"│ fff ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
"│ 865f00027fff ┆ null ┆ 111.118088 ┆ 277.398895 ┆ … ┆ 100.0 ┆ 0.144035 ┆ 12.342467 ┆ 2.193349 │\n",
"│ fff ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
"└──────────────┴───────┴────────────┴────────────┴───┴───────────┴──────────┴───────────┴──────────┘"
],
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (5, 9)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>GRID_ID</th><th>FRECF</th><th>AMIN</th><th>AMAX</th><th>AMEAN</th><th>TREEPERCT</th><th>PMIN</th><th>PMAX</th><th>PMEAN</th></tr><tr><td>str</td><td>i64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>&quot;865f00007ffffff&quot;</td><td>null</td><td>114.678246</td><td>209.731842</td><td>149.513126</td><td>100.0</td><td>0.058348</td><td>7.531753</td><td>1.69093</td></tr><tr><td>&quot;865f0000fffffff&quot;</td><td>null</td><td>127.660339</td><td>705.040772</td><td>245.461013</td><td>99.985832</td><td>0.148311</td><td>31.043549</td><td>6.346733</td></tr><tr><td>&quot;865f00017ffffff&quot;</td><td>null</td><td>117.937508</td><td>175.799759</td><td>145.636984</td><td>100.0</td><td>0.028819</td><td>2.731335</td><td>1.063382</td></tr><tr><td>&quot;865f0001fffffff&quot;</td><td>null</td><td>123.765045</td><td>193.208282</td><td>156.474098</td><td>100.0</td><td>0.047981</td><td>4.67722</td><td>1.557258</td></tr><tr><td>&quot;865f00027ffffff&quot;</td><td>null</td><td>111.118088</td><td>277.398895</td><td>146.417323</td><td>100.0</td><td>0.144035</td><td>12.342467</td><td>2.193349</td></tr></tbody></table></div>"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 42
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-09T08:37:51.695475Z",
"start_time": "2024-09-09T08:37:51.673626Z"
}
},
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = df.with_columns(pl.col(\"GRID_ID\").h3.cells_parse())\n",
"df = df.drop(\"GRID_ID\")"
],
"outputs": [],
"execution_count": 43
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-09T08:37:52.296769Z",
"start_time": "2024-09-09T08:37:52.286054Z"
}
},
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.select(pl.col(\"cell\").h3.cells_resolution()).unique()"
],
"outputs": [
{
"data": {
"text/plain": [
"shape: (1, 1)\n",
"┌────────────┐\n",
"│ resolution │\n",
"│ --- │\n",
"│ u8 │\n",
"╞════════════╡\n",
"│ 6 │\n",
"└────────────┘"
],
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (1, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>resolution</th></tr><tr><td>u8</td></tr></thead><tbody><tr><td>6</td></tr></tbody></table></div>"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 44
]
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-09T08:37:53.457159Z",
"start_time": "2024-09-09T08:37:53.382329Z"
}
},
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"CELLS_RES = 6\n",
"OVERVIEW_LEVEL = CELLS_RES - 5\n",
"\n",
"df = df.with_columns(\n",
" pl.col(\"cell\").h3.change_resolution(OVERVIEW_LEVEL).h3.cells_to_string().alias(\"tile_id\"), # type: ignore[attr-defined]\n",
" pl.col(\"cell\").h3.cells_to_string()\n",
" pl.col(\"cell\").h3.cells_to_string(),\n",
")\n",
"partition_dfs = df.partition_by([\"tile_id\"], as_dict=True, include_key=False)"
],
"outputs": [],
"execution_count": 45
]
},
{
"cell_type": "markdown",
Expand All @@ -168,12 +75,9 @@
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-09T08:40:16.950037Z",
"start_time": "2024-09-09T08:40:16.897734Z"
}
},
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"seen_tiles = set()\n",
"n_cells = 0\n",
Expand All @@ -184,18 +88,16 @@
" tile_id = tile_group[0]\n",
" filename = Path(\"../data/processed/grid/1\") / (tile_id + \".arrow\")\n",
" if tile_id in seen_tiles:\n",
" tile_df = pl.concat(\n",
" [pl.read_ipc(filename), tile_df], how=\"vertical_relaxed\"\n",
" ).unique(subset=[\"cell\"])\n",
" tile_df = pl.concat([pl.read_ipc(filename), tile_df], how=\"vertical_relaxed\").unique(\n",
" subset=[\"cell\"]\n",
" )\n",
" tile_df.write_parquet(filename)\n",
" n_cells += len(tile_df)\n",
" else:\n",
" seen_tiles.add(tile_id)\n",
" tile_df.write_ipc(filename)\n",
" n_cells += len(tile_df)"
],
"outputs": [],
"execution_count": 48
]
},
{
"cell_type": "markdown",
Expand Down

0 comments on commit ac2d6c0

Please sign in to comment.