From b2dbdf8e6101902c7c13a6a626a97267b6624b8e Mon Sep 17 00:00:00 2001 From: bhupatiraju Date: Wed, 5 Mar 2025 12:20:00 +0000 Subject: [PATCH 1/2] load and transform admin1 boundaries --- geo/admin_boundaries_dlt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geo/admin_boundaries_dlt.py b/geo/admin_boundaries_dlt.py index 4f31fac..d40d17e 100644 --- a/geo/admin_boundaries_dlt.py +++ b/geo/admin_boundaries_dlt.py @@ -15,7 +15,7 @@ from pyspark.sql.types import StructType, StructField, DoubleType from shapely.ops import unary_union -DATA_DIR = '/dbfs/mnt/DAP/data/admin1geoboundaries' +DATA_DIR = '/Volumes/prd_mega/sboost4/vboost4/Workspace/auxiliary_data/admin1geoboundaries' # admin1 name corrections correct_admin1_names = { From cf3b6c961666775a5b889150d104b50d8e3aebec Mon Sep 17 00:00:00 2001 From: bhupatiraju Date: Wed, 5 Mar 2025 12:21:33 +0000 Subject: [PATCH 2/2] extract admin1 boundaries from same source as WB. Exceptions for Kenya and Bangladesh --- geo/admin_boundaries.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/geo/admin_boundaries.py b/geo/admin_boundaries.py index e0e32e9..b342a00 100644 --- a/geo/admin_boundaries.py +++ b/geo/admin_boundaries.py @@ -8,8 +8,7 @@ import json from shapely.geometry import shape, mapping import pandas as pd - -DATA_DIR = '/dbfs/mnt/DAP/data/admin1geoboundaries' +DATA_DIR = '/Volumes/prd_mega/sboost4/vboost4/Workspace/auxiliary_data/admin1geoboundaries' WB_ADM1_GEO_FILENAME = f'{DATA_DIR}/WB_admin1geoboundaries.geojson' ALT_ADM1_GEO_FILENAME = f'{DATA_DIR}/ALT_admin1geoboundaries.geojson' @@ -63,7 +62,7 @@ def get_params(xmin, ymin, xmax, ymax): print(f'Exported {len(features)} admin 1 geo boundary records to {WB_ADM1_GEO_FILENAME}') - +# COMMAND ---------- # Obtaining suitable boundaries (not available within the WB boundaries) from other sources for select countries @@ -96,6 +95,9 @@ def simplify_geometry(geometry, tolerance=0.01): ] assert len(boundaries_KEN['features']) == 47, f"Expected 47 subnational regions, got {len(boundaries_KEN['features'])}" + +# COMMAND ---------- + # Bangladesh boundaries BGD_URL = 'https://github.com/wmgeolab/geoBoundaries/raw/main/releaseData/gbOpen/BGD/ADM1/geoBoundaries-BGD-ADM1.geojson' boundaries_BGD = requests.get(BGD_URL).json() @@ -123,4 +125,3 @@ def simplify_geometry(geometry, tolerance=0.01): boundaries_alt = {'type':'FeatureCollection', 'features':features_KEN+features_BGD} with open(ALT_ADM1_GEO_FILENAME, 'w', encoding='utf-8') as f: json.dump(boundaries_alt, f, ensure_ascii=False, indent=2) -