worldbank · elbeejay · Feb 28, 2024 · Feb 29, 2024 · Feb 29, 2024 · Feb 29, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,6 +29,8 @@ repos:
       - id: codespell
         name: codespell
         description: Checks for common misspellings in text files
+        additional_dependencies:
+          - tomli
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.2.2
     hooks:

diff --git a/README.md b/README.md
@@ -18,23 +18,24 @@ pip install GOSTurban
 
 1. Clone or download this repository to your local machine. Then, navigate to the root directory of the repository:
 
-    ```shell
-    git clone https://github.com/worldbank/GOSTurban.git
-    cd GOSTurban
-    ```
+   ```shell
+   git clone https://github.com/worldbank/GOSTurban.git
+   cd GOSTurban
+   ```
 
 2. Create a virtual environment (optional but recommended):
 
-    ```shell
-    python3 -m venv venv
-    source venv/bin/activate  # On Windows, use `venv\Scripts\activate`
-    ```
+   ```shell
+   python3 -m venv venv
+   source venv/bin/activate  # On Windows, use `venv\Scripts\activate`
+   ```
 
 3. Install the package with dependencies:
 
-    ```shell
-    pip install .
-    ```
+   ```shell
+   pip install .
+   ```
+
 ### Developer Installation
 
 Install the package **in editable** mode with all of the dependencies needed to run the tests and build the documentation locally:

diff --git a/notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb b/notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb
@@ -17,7 +17,6 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "import sys\n",
     "\n",
     "import rasterio\n",
     "import rasterio.features\n",
@@ -29,8 +28,7 @@
     "from rasterio.plot import show\n",
     "\n",
     "# Import GOST urban functions\n",
-    "sys.path.append(\"../../\")\n",
-    "from src.LEI import *"
+    "from GOSTurban.LEI import calculate_LEI, summarize_LEI"
    ]
   },
   {
@@ -58,7 +56,7 @@
     "                    os.stat(os.path.join(root, \"GHSL.tif\")).st_size,\n",
     "                )\n",
     "            )\n",
-    "    except:\n",
+    "    except Exception:\n",
     "        pass"
    ]
   },
@@ -67,7 +65,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Vizualize raster data - GHSL"
+    "# Visualize raster data - GHSL"
    ]
   },
   {
@@ -104,7 +102,7 @@
    "source": [
     "# write out raster to file\n",
     "outProperties = inRaster.profile\n",
-    "outRaster = outRaster.astype(\"int32\")\n",
+    "outRaster = inRaster.astype(\"int32\")\n",
     "outProperties[\"dtype\"] = \"int32\"\n",
     "with rasterio.open(inputGHSL.replace(\".tif\", \"_LEI.tif\"), \"w\", **outProperties) as out:\n",
     "    out.write(outRaster)"

diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/ECA_Urban_Extents.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/ECA_Urban_Extents.ipynb
@@ -250,7 +250,7 @@
     "for idx, row in final_center.iterrows():\n",
     "    try:\n",
     "        sel_city = inCities.loc[inCities.intersects(row[\"geometry\"])]\n",
-    "    except:\n",
+    "    except Exception:\n",
     "        sel_city = inCities.loc[inCities.intersects(row[\"geometry\"].buffer(0))]\n",
     "    if sel_city.shape[0] > 0:\n",
     "        final_center.loc[idx, \"wCity\"] = sel_city[\"city\"].iloc[0]"
@@ -278,7 +278,7 @@
     "for idx, row in hd_center.iterrows():\n",
     "    try:\n",
     "        sel_city = inCities.loc[inCities.intersects(row[\"geometry\"])]\n",
-    "    except:\n",
+    "    except Exception:\n",
     "        sel_city = inCities.loc[inCities.intersects(row[\"geometry\"].buffer(0))]\n",
     "    if sel_city.shape[0] > 0:\n",
     "        hd_center.loc[idx, \"wCity\"] = sel_city[\"city\"].iloc[0]\n",

diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Combine_All.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Combine_All.ipynb
@@ -5,7 +5,7 @@
    "metadata": {},
    "source": [
     "# Combine All Urban Metrics\n",
-    "Run the 4 seperate notebooks seperately, and then this notebook last to combine all of the results into one shapefile\n",
+    "Run the 4 separate notebooks separately, and then this notebook last to combine all of the results into one shapefile\n",
     "\n",
     "Check to see that all of the results have the same number of rows"
    ]

diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Fullness.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Fullness.ipynb
@@ -138,7 +138,7 @@
     "\n",
     "    input_shapes_gpd = gpd.read_file(shpName)\n",
     "\n",
-    "    # psuedocode\n",
+    "    # pseudocode\n",
     "    # For each Shape:\n",
     "    # Select all built-up pixels that are mostly within shape\n",
     "    # Area of shape = sum of all pixels * area of each pixel\n",
@@ -197,7 +197,7 @@
     "        # print(\"print metrics_scalar\")\n",
     "        # print(metrics_scalar)\n",
     "\n",
-    "        # and concatinate it with the row's shape\n",
+    "        # and concatenate it with the row's shape\n",
     "        new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n",
     "\n",
     "        # print(\"print new_temp_gdf\")\n",

diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Shape.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Shape.ipynb
@@ -379,7 +379,7 @@
     "        metrics_scalar[k] = [metrics[k]]\n",
     "    metrics_df = pd.DataFrame(metrics_scalar)\n",
     "\n",
-    "    # and concatinate it with the row's shape\n",
+    "    # and concatenate it with the row's shape\n",
     "    new_temp_gdf_proj = pd.concat(\n",
     "        [temp_gdf_proj.reset_index(drop=True), metrics_df], axis=1\n",
     "    )\n",

diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Sprawl.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Sprawl.ipynb
@@ -134,7 +134,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Psuedocode\n",
+    "# Pseudocode\n",
     "\n",
     "# pop_values = []\n",
     "# For each Shape/FUA:\n",
@@ -193,7 +193,7 @@
     "        # geometry\n",
     "        d[\"geometry\"] = d.apply(lambda row: Point(row[\"x\"], row[\"y\"]), axis=1)\n",
     "\n",
-    "        # exlude pixels with value less than 77\n",
+    "        # exclude pixels with value less than 77\n",
     "        print(len(d))\n",
     "\n",
     "        # print(d)\n",
@@ -238,7 +238,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Psuedocode\n",
+    "# Pseudocode\n",
     "\n",
     "# for each Shape/FUA:\n",
     "# pixel_count_below_median = 0\n",
@@ -288,7 +288,7 @@
     "\n",
     "        d = gpd.GeoDataFrame({\"col\": col, \"row\": row, \"val\": val})\n",
     "\n",
-    "        # exlude pixels with value less than 77\n",
+    "        # exclude pixels with value less than 77\n",
     "        d = d[d.val > 77]\n",
     "        d_count = len(d)\n",
     "        # print(f\"d_count is {d_count}\")\n",
@@ -322,7 +322,7 @@
     "        # print(\"print metrics_scalar\")\n",
     "        # print(metrics_scalar)\n",
     "\n",
-    "        # and concatinate it with the row's shape\n",
+    "        # and concatenate it with the row's shape\n",
     "        new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n",
     "\n",
     "        # print(\"print new_temp_gdf\")\n",

diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Structure.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Structure.ipynb
@@ -158,7 +158,7 @@
     "        # print(\"print metrics_scalar\")\n",
     "        # print(metrics_scalar)\n",
     "\n",
-    "        # and concatinate it with the row's shape\n",
+    "        # and concatenate it with the row's shape\n",
     "        new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n",
     "\n",
     "        # print(\"print new_temp_gdf\")\n",
@@ -185,7 +185,7 @@
     "        #         metrics_scalar['intersection_density_km'] = 0\n",
     "        #         metrics_scalar['street_density_km'] = 0\n",
     "        #         metrics_df = pd.DataFrame(metrics_scalar)\n",
-    "        # and concatinate it with the row's shape\n",
+    "        # and concatenate it with the row's shape\n",
     "        #         new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n",
     "        #         output_new_temp_gdf = output_new_temp_gdf.append(new_temp_gdf, ignore_index=True)\n",
     "        continue"

diff --git a/notebooks/Implementations/MENA_Benchmarking/NTL_zonal_stats.ipynb b/notebooks/Implementations/MENA_Benchmarking/NTL_zonal_stats.ipynb
@@ -99,22 +99,8 @@
   }
  ],
  "metadata": {
-  "kernelspec": {
-   "display_name": "urban_test",
-   "language": "python",
-   "name": "python3"
-  },
   "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.2"
+   "name": "python"
   }
  },
  "nbformat": 4,

diff --git a/notebooks/Implementations/README.md b/notebooks/Implementations/README.md
@@ -1,2 +1,3 @@
 # Implementations
+
 The primary role of the GOST team in the World Bank Group is to support operational teams in their exploitation of geospatial data. This happens in many different ways, and the notebooks herein present examples of the specific support the team has provided to various investigations of urbanization.
diff --git a/notebooks/Implementations/Slum_Mapping/slumML/README.md b/notebooks/Implementations/Slum_Mapping/slumML/README.md
@@ -3,6 +3,7 @@
 ## 1. slumML main component
 
 ### 1.1. Sample data preparation
+
 (1) 5–10 sample areas per area character of interest (e.g., slum, commercial, middle-class resident, etc). If you’d like to cluster your target city into four classes (slum, Commercial, Middle-income neighborhoods, and rich neighborhoods), we need 20–40 sample area data in total. If you have a colleague who can map these sample areas in GIS, please provide a shapefile of the sample areas to me. If not, just draw lines on a paper map, scan it as a high-resolution PDF, and send it to me. I will digitize the sample areas.
 
 (2) In relation to (1), we need to have local staff who can verify an ML-predicted result to adjust the model.
@@ -11,17 +12,21 @@ The below image show examples of the sample areas gathered for the Niamey study:
 ![Sample areas](https://user-images.githubusercontent.com/64405484/149363357-ac2fe7d9-4aca-4345-88a1-c5d2f9e304a5.png)
 
 ### 1.2. Actual slumML analysis - STEP1 and STEP2
+
 The original codes were developed by Alex Chunet.
 Perform STEP 1 to generate morphological indices of the target building footprint layer. Then run STEP 2 to perform an automated supervised ML analysis.
 
 ## 2. Auxiliary materials
+
 ### 2.1. GOB2FC (Google Open Building layer to ESRI feature class)
+
 This script is to convert the CSV file(s) of [Google Open Buildings](https://sites.research.google/open-buildings/) to a Feature Class.
 
 ![Layout2](https://user-images.githubusercontent.com/64405484/137813865-0abd8f0c-ff15-4980-9251-042a8f9dc66b.png)
 An example map rendered in ArcGIS Pro (Cairo Metropolitan Area)
 
 #### 2.2.1. How to use
+
 This script uses ArcPy modules, so your environment should have ArcPy library. Simply, load the script to your Tool Box on your ArcGIS Pro project and run it. The target Open Buildings CSV file must be stored in the folder where the script is located.
 
 If you want to test the script with a small chunk of a full CSV, modify the code:
@@ -32,6 +37,7 @@ df_test.reset_index(inplace=True, drop=True)
 
 for i, r in df_conf.iterrows():
 ```
+
 For example, iloc[0:100] will retirieve the first 100 records from the original data. df_conf should be replaced by df_test.
 
 #### 2.2.2. NOTE

diff --git a/notebooks/Implementations/Slum_Mapping/slumML/STEP1.ipynb b/notebooks/Implementations/Slum_Mapping/slumML/STEP1.ipynb
@@ -106,7 +106,9 @@
     "# Prepare the original shape file\n",
     "original = gpd.read_file(f)  # Read ESEI shapefile\n",
     "if original.crs != WGS:\n",
-    "    original = original.to_crs(WGS)  # Convert the spatial referenct to WGS if it is not\n",
+    "    original = original.to_crs(\n",
+    "        WGS\n",
+    "    )  # Convert the spatial referenced to WGS if it is not\n",
     "\n",
     "original[\"PID\"] = original.index + 1\n",
     "\n",
@@ -115,7 +117,7 @@
     "fil = original.copy()\n",
     "\n",
     "fil = fil.to_crs(UTM)  # Convert the spatial reference to UTM\n",
-    "# Adding attributes to the shapefile: area, geomerty, and PID (unique IDs)\n",
+    "# Adding attributes to the shapefile: area, geometry, and PID (unique IDs)\n",
     "fil[\"area\"] = fil.area\n",
     "fil[\"centroid\"] = fil[\"geometry\"].centroid\n",
     "\n",
@@ -151,9 +153,9 @@
     "def Main(passed_dict):\n",
     "    # unpack passed dict into local variables for this thread.\n",
     "    short = passed_dict[\"df\"]\n",
-    "    thread_no = passed_dict[\"thread_no\"]\n",
+    "    # thread_no = passed_dict[\"thread_no\"]\n",
     "    print_thresh = passed_dict[\"print_thresh\"]\n",
-    "    save_thresh = passed_dict[\"save_thresh\"]\n",
+    "    # save_thresh = passed_dict[\"save_thresh\"]\n",
     "\n",
     "    # set up some counters / timings\n",
     "    t = time.time()\n",
@@ -262,8 +264,8 @@
     "            print(\"%s rows completed at %s\" % (counter, time.ctime()))\n",
     "\n",
     "        \"\"\"\n",
-    "        # this functionality saves progress in case the process cannot be finished in one sitting. \n",
-    "        # ideally, finish the processing in one sitting. \n",
+    "        # this functionality saves progress in case the process cannot be finished in one sitting.\n",
+    "        # ideally, finish the processing in one sitting.\n",
     "        old = 0\n",
     "        if counter % save_thresh == 0:\n",
     "            saver = pd.DataFrame(bundle)\n",

diff --git a/notebooks/Implementations/Slum_Mapping/slumML/STEP1.py b/notebooks/Implementations/Slum_Mapping/slumML/STEP1.py
@@ -47,11 +47,33 @@
 
 ###
 def Main(passed_dict):
+    """
+    This function will calculate the nearest neighbours and their attributes for each building in the passed DataFrame.
+
+    Parameters
+    ----------
+    passed_dict : dict
+        a dictionary containing the following elements:
+            df : DataFrame
+                a DataFrame of building footprints, with a unique identifier, a centroid, and an area
+            thread_no : int
+                an integer representing the thread number
+            print_thresh : int
+                an integer representing the number of rows to process before printing progress
+            save_thresh : int
+                an integer representing the number of rows to process before saving progress
+
+    Returns
+    -------
+    list
+        a list of dictionaries, each containing the attributes of the nearest neighbours for each building in the passed DataFrame
+
+    """
     # unpack passed dict into local variables for this thread.
     short = passed_dict["df"]
-    thread_no = passed_dict["thread_no"]
+    # thread_no = passed_dict["thread_no"]
     print_thresh = passed_dict["print_thresh"]
-    save_thresh = passed_dict["save_thresh"]
+    # save_thresh = passed_dict["save_thresh"]
 
     # set up some counters / timings
     t = time.time()

diff --git a/notebooks/Implementations/Slum_Mapping/slumML/STEP2.ipynb b/notebooks/Implementations/Slum_Mapping/slumML/STEP2.ipynb
@@ -98,7 +98,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pth = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/\"  # Directory to save model, ouputs\n",
+    "pth = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/\"  # Directory to save model, outputs\n",
     "building_file = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/Yaounde_DA_morphology.shp\"  # Specify the processed building footprint data\n",
     "sample_file = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/Yaounde_sample_data.shp\"  # Specify the sample data"
    ]

diff --git a/notebooks/Implementations/Slum_Mapping/slumML/STEP2.py b/notebooks/Implementations/Slum_Mapping/slumML/STEP2.py
@@ -124,6 +124,20 @@
 
 
 def MLpred(df):
+    """
+    Perform machine learning prediction on the input dataframe.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        The input dataframe containing the data for prediction.
+
+    Returns
+    -------
+    pandas.DataFrame
+        The dataframe with the predicted values merged.
+
+    """
     df_input = df[predictors]
     # Extract predictor cols only (specified by the 'predictors' LIST)
     hf_temp = H2OFrame(df_input)
@@ -135,9 +149,9 @@ def MLpred(df):
     df.reset_index(inplace=True)
     pred_df_temp["PID"] = df.PID
 
-    ans = pd.merge(df, pred_df_temp, on="PID")
+    ans_var = pd.merge(df, pred_df_temp, on="PID")
 
-    return ans
+    return ans_var
 
 
 # Create an empty DF for append
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1,3 @@
		# Implementations

		The primary role of the GOST team in the World Bank Group is to support operational teams in their exploitation of geospatial data. This happens in many different ways, and the notebooks herein present examples of the specific support the team has provided to various investigations of urbanization.