Skip to content

Commit

Permalink
Fix columns and strart port to python script
Browse files Browse the repository at this point in the history
  • Loading branch information
BielStela committed May 13, 2024
1 parent 260169f commit a9e0d56
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 13 deletions.
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
abeadbd726250dc419ba2e58c3b3cc8ddc23b5fd673e669882b55cf1d8a937d6 gadm36_levels0-2_simp.dbf
98aaf3d1c0ecadf1a424a4536de261c3daf4e373697cb86c40c43b989daf52eb gadm36_levels0-2_simp.prj
95554d6002422bca7118ced401c05a5869808343612155a4d128cf775c739fdb gadm36_levels0-2_simp.sha256
5578ded00390bba9e8c332e8219e17dcd9e9a94a1c2d516ae78708b76c652b1a gadm36_levels0-2_simp.shp
7c7e4b7cf827eefcd534bdcdf946cb6409255f06e8a12211f1f0311269d626ad gadm36_levels0-2_simp.shx
2 changes: 1 addition & 1 deletion data/gadm_importer/data_checksums/geo_region.zip.sha256
Original file line number Diff line number Diff line change
@@ -1 +1 @@
c9cd5f7329207e24c778f5faa0a12d3c24bb5ee120bc29c2c1d0a171fc2656b2 geo_region.zip
14df4a3134734b9a520610c538595fe750f04554babd9faf2836d532f8c6095e geo_region.zip
2 changes: 1 addition & 1 deletion data/gadm_importer/populate_geo_and_admin_regions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ CREATE EXTENSION IF NOT EXISTS ltree;
-- 1. Upsert from gadm to geo_region converting geometry to H3
TRUNCATE TABLE geo_region CASCADE;

\copy geo_region FROM 'geo_region.csv' WITH (FORMAT csv, HEADER, FORCE_NULL ("h3Compact", "h3Flat", "totalArea"));
\copy geo_region FROM 'geo_region.csv' WITH (FORMAT csv, HEADER, FORCE_NULL ("h3Compact", "h3Flat"));

-- 2. Insert into admin_region referencing geo_region
BEGIN;
Expand Down
21 changes: 14 additions & 7 deletions data/preprocessing/gadm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,20 @@ DATADIR=data
# intermediate files
WORKDIR=data/tmp

all: checksum
all: upload_results
make clean-workdir

checksum: compress-geo_region
upload_results: checksum
aws s3 cp $(DATADIR)/geo_region.zip $(AWS_S3_BUCKET_URL)/processed/geo_region/
rm $(DATADIR)/geo_region.zip $(DATADIR)/geo_region.csv
aws s3 sync $(DATADIR) $(AWS_S3_BUCKET_URL)/processed/gadm


checksum:
@echo "Generating checksums..."
cd $(DATADIR) && sha256sum geo_region.zip > geo_region.zip.sha256
cd $(DATADIR) && sha256sum gadm36_levels0-2_simp.* > gadm36_levels0-2_simp.sha256
cd $(DATADIR) && sha256sum geo_region.zip > ../../../gadm_importer/data_checksums/geo_region.zip.sha256
cd $(DATADIR) && sha256sum gadm36_levels0-2_simp.* > ../../../gadm_importer/data_checksums/gadm36_levels0-2_simp.sha256


compress-geo_region: geo_region_table
@echo "Compressing geo_region.csv..."
Expand All @@ -30,7 +37,7 @@ geo_region_table: combine-gadm-file
PGPASSWORD=$$API_POSTGRES_PASSWORD && \
psql -d $$API_POSTGRES_DATABASE -h $$API_POSTGRES_HOST -p $$API_POSTGRES_PORT -U $$API_POSTGRES_USERNAME -f make_geo_region_table.sql

combine-gadm-file: gadm36_0_simp.shp gadm36_1_simp.shp gadm36_2_simp.shp
$(DATADIR)/gadm36_levels0-2_simp.shp: gadm36_0_simp.shp gadm36_1_simp.shp gadm36_2_simp.shp
@echo "Combining GADM files..."
mapshaper -i $(WORKDIR)/gadm36_0_simp.shp $(WORKDIR)/gadm36_1_simp.shp $(WORKDIR)/gadm36_2_simp.shp snap combine-files \
-each 'level = this.layer_name == "gadm36_0_simp" ? 0 \
Expand All @@ -53,12 +60,12 @@ gadm36_%_simp.shp: decompress-gadm
@echo "Simplifying $@ ..."
mapshaper $(WORKDIR)/gadm36_$*.shp -simplify 10% -filter-islands min-vertices=3 -filter-slivers -clean -o $(WORKDIR)/$@ force

decompress-gadm: download-gadm
decompress-gadm: $(WORKDIR)/gadm36_levels_shp.zip
@echo "Decompressing GADM file..."
unzip -u $(WORKDIR)/gadm36_levels_shp.zip gadm36_0* gadm36_1* gadm36_2* -d $(WORKDIR)
rm $(WORKDIR)/gadm36_levels_shp.zip

download-gadm:
$(WORKDIR)/gadm36_levels_shp.zip:
@echo "Downloading GADM file..."
mkdir -p $(WORKDIR)
cd $(WORKDIR) && curl -O https://data.biogeo.ucdavis.edu/data/gadm3.6/gadm36_levels_shp.zip
Expand Down
34 changes: 34 additions & 0 deletions data/preprocessing/gadm/gadm_h3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from pathlib import Path

import click
import geopandas as gpd
from h3ronpy.pandas import compact
from h3ronpy.pandas.vector import geoseries_to_cells

H3_RESOLUTION = 6


@click.command()
@click.argument("filename", type=click.Path(exists=True, path_type=Path))
@click.argument("output", type=click.Path(path_type=Path))
def main(filename: Path, output) -> None:
"""Convert gadm shapefile to csv with h3 columns"""
print("Reading file...")
gdf = gpd.read_file(filename)
print("Making h3 cells...")
gdf["h3flat"] = geoseries_to_cells(gdf["geometry"], resolution=H3_RESOLUTION, compact=False)
print('Compacting h3 cells...')
gdf["h3Compact"] = [list(compact(x)) for x in gdf["h3flat"]]
print('Converting to cell indexes to hexadecimal...')
gdf["h3Compact"] = gdf["h3Compact"].apply(lambda arr: [hex(x)[2:] for x in arr])
gdf["h3flat"] = gdf["h3flat"].apply(lambda arr: [hex(x)[2:] for x in arr])
gdf = gdf.to_wkb(hex=True)
gdf = gdf.rename({"geometry": "theGeom"})

gdf["h3FlatLength"] = gdf["h3flat"].apply(lambda x: len(x))
print(f"writing to {output}...")
gdf.to_csv(output, index=False)


if __name__ == "__main__":
main()
5 changes: 2 additions & 3 deletions data/preprocessing/gadm/make_geo_region_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ CREATE EXTENSION IF NOT EXISTS ltree;
TRUNCATE TABLE geo_region CASCADE;

INSERT INTO geo_region
("name", "h3Flat", "h3Compact", "theGeom", "isCreatedByUser", "totalArea")
("name", "h3Flat", "h3Compact", "theGeom", "isCreatedByUser")

SELECT
mpath,
Expand All @@ -17,8 +17,7 @@ array(
))
) AS "h3Compact",
wkb_geometry,
false,
null
false
FROM gadm_levels0_2
ON CONFLICT (name) DO UPDATE SET
"h3Compact" = EXCLUDED."h3Compact",
Expand Down
2 changes: 1 addition & 1 deletion data/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ numpy==1.21.1
pandas==1.3.1
geopandas==0.10.2
rasterio==1.3.6
h3ronpy==0.16.1
h3ronpy==0.20.1
h3==3.7.6
psycopg2-binary==2.9.1
docopt==0.6.0
Expand Down

0 comments on commit a9e0d56

Please sign in to comment.