diff --git a/science/notebooks/merge_entrega_roberto.ipynb b/science/notebooks/merge_entrega_roberto.ipynb
index cc3cf618..f1ea486d 100644
--- a/science/notebooks/merge_entrega_roberto.ipynb
+++ b/science/notebooks/merge_entrega_roberto.ipynb
@@ -2,162 +2,69 @@
"cells": [
{
"cell_type": "code",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2024-09-09T08:30:08.332078Z",
- "start_time": "2024-09-09T08:30:08.112470Z"
- }
- },
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
"source": [
- "import polars as pl\n",
"from pathlib import Path\n",
- "import h3ronpy.polars"
- ],
- "outputs": [],
- "execution_count": 1
+ "\n",
+ "import polars as pl"
+ ]
},
{
"cell_type": "code",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2024-09-09T08:37:46.700129Z",
- "start_time": "2024-09-09T08:37:46.697392Z"
- }
- },
- "source": "csvs = list(Path(\"../data/raw/ENTREGA UNO MUESTRAS HEXA CSV 18072024\").glob(\"*.CSV\"))",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": 39
+ "source": [
+ "csvs = list(Path(\"../data/raw/ENTREGA UNO MUESTRAS HEXA CSV 18072024\").glob(\"*.CSV\"))"
+ ]
},
{
"cell_type": "code",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2024-09-09T08:37:50.987739Z",
- "start_time": "2024-09-09T08:37:50.852450Z"
- }
- },
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
"source": [
"dfs = [pl.read_csv(f, separator=\";\", decimal_comma=True) for f in csvs]\n",
"df = pl.concat(dfs, how=\"align\", rechunk=True)\n",
"df.head()"
- ],
- "outputs": [
- {
- "data": {
- "text/plain": [
- "shape: (5, 9)\n",
- "┌──────────────┬───────┬────────────┬────────────┬───┬───────────┬──────────┬───────────┬──────────┐\n",
- "│ GRID_ID ┆ FRECF ┆ AMIN ┆ AMAX ┆ … ┆ TREEPERCT ┆ PMIN ┆ PMAX ┆ PMEAN │\n",
- "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
- "│ str ┆ i64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
- "╞══════════════╪═══════╪════════════╪════════════╪═══╪═══════════╪══════════╪═══════════╪══════════╡\n",
- "│ 865f00007fff ┆ null ┆ 114.678246 ┆ 209.731842 ┆ … ┆ 100.0 ┆ 0.058348 ┆ 7.531753 ┆ 1.69093 │\n",
- "│ fff ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ 865f0000ffff ┆ null ┆ 127.660339 ┆ 705.040772 ┆ … ┆ 99.985832 ┆ 0.148311 ┆ 31.043549 ┆ 6.346733 │\n",
- "│ fff ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ 865f00017fff ┆ null ┆ 117.937508 ┆ 175.799759 ┆ … ┆ 100.0 ┆ 0.028819 ┆ 2.731335 ┆ 1.063382 │\n",
- "│ fff ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ 865f0001ffff ┆ null ┆ 123.765045 ┆ 193.208282 ┆ … ┆ 100.0 ┆ 0.047981 ┆ 4.67722 ┆ 1.557258 │\n",
- "│ fff ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ 865f00027fff ┆ null ┆ 111.118088 ┆ 277.398895 ┆ … ┆ 100.0 ┆ 0.144035 ┆ 12.342467 ┆ 2.193349 │\n",
- "│ fff ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "└──────────────┴───────┴────────────┴────────────┴───┴───────────┴──────────┴───────────┴──────────┘"
- ],
- "text/html": [
- "
\n",
- "
shape: (5, 9)GRID_ID | FRECF | AMIN | AMAX | AMEAN | TREEPERCT | PMIN | PMAX | PMEAN |
---|
str | i64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 |
"865f00007ffffff" | null | 114.678246 | 209.731842 | 149.513126 | 100.0 | 0.058348 | 7.531753 | 1.69093 |
"865f0000fffffff" | null | 127.660339 | 705.040772 | 245.461013 | 99.985832 | 0.148311 | 31.043549 | 6.346733 |
"865f00017ffffff" | null | 117.937508 | 175.799759 | 145.636984 | 100.0 | 0.028819 | 2.731335 | 1.063382 |
"865f0001fffffff" | null | 123.765045 | 193.208282 | 156.474098 | 100.0 | 0.047981 | 4.67722 | 1.557258 |
"865f00027ffffff" | null | 111.118088 | 277.398895 | 146.417323 | 100.0 | 0.144035 | 12.342467 | 2.193349 |
"
- ]
- },
- "execution_count": 42,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "execution_count": 42
+ ]
},
{
"cell_type": "code",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2024-09-09T08:37:51.695475Z",
- "start_time": "2024-09-09T08:37:51.673626Z"
- }
- },
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
"source": [
"df = df.with_columns(pl.col(\"GRID_ID\").h3.cells_parse())\n",
"df = df.drop(\"GRID_ID\")"
- ],
- "outputs": [],
- "execution_count": 43
+ ]
},
{
"cell_type": "code",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2024-09-09T08:37:52.296769Z",
- "start_time": "2024-09-09T08:37:52.286054Z"
- }
- },
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
"source": [
"df.select(pl.col(\"cell\").h3.cells_resolution()).unique()"
- ],
- "outputs": [
- {
- "data": {
- "text/plain": [
- "shape: (1, 1)\n",
- "┌────────────┐\n",
- "│ resolution │\n",
- "│ --- │\n",
- "│ u8 │\n",
- "╞════════════╡\n",
- "│ 6 │\n",
- "└────────────┘"
- ],
- "text/html": [
- ""
- ]
- },
- "execution_count": 44,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "execution_count": 44
+ ]
},
{
"cell_type": "code",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2024-09-09T08:37:53.457159Z",
- "start_time": "2024-09-09T08:37:53.382329Z"
- }
- },
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
"source": [
"CELLS_RES = 6\n",
"OVERVIEW_LEVEL = CELLS_RES - 5\n",
"\n",
"df = df.with_columns(\n",
" pl.col(\"cell\").h3.change_resolution(OVERVIEW_LEVEL).h3.cells_to_string().alias(\"tile_id\"), # type: ignore[attr-defined]\n",
- " pl.col(\"cell\").h3.cells_to_string()\n",
+ " pl.col(\"cell\").h3.cells_to_string(),\n",
")\n",
"partition_dfs = df.partition_by([\"tile_id\"], as_dict=True, include_key=False)"
- ],
- "outputs": [],
- "execution_count": 45
+ ]
},
{
"cell_type": "markdown",
@@ -168,12 +75,9 @@
},
{
"cell_type": "code",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2024-09-09T08:40:16.950037Z",
- "start_time": "2024-09-09T08:40:16.897734Z"
- }
- },
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
"source": [
"seen_tiles = set()\n",
"n_cells = 0\n",
@@ -184,18 +88,16 @@
" tile_id = tile_group[0]\n",
" filename = Path(\"../data/processed/grid/1\") / (tile_id + \".arrow\")\n",
" if tile_id in seen_tiles:\n",
- " tile_df = pl.concat(\n",
- " [pl.read_ipc(filename), tile_df], how=\"vertical_relaxed\"\n",
- " ).unique(subset=[\"cell\"])\n",
+ " tile_df = pl.concat([pl.read_ipc(filename), tile_df], how=\"vertical_relaxed\").unique(\n",
+ " subset=[\"cell\"]\n",
+ " )\n",
" tile_df.write_parquet(filename)\n",
" n_cells += len(tile_df)\n",
" else:\n",
" seen_tiles.add(tile_id)\n",
" tile_df.write_ipc(filename)\n",
" n_cells += len(tile_df)"
- ],
- "outputs": [],
- "execution_count": 48
+ ]
},
{
"cell_type": "markdown",