nutrient assimilation capacity indicator

Vizzuality · Oct 10, 2023 · c0051c1 · elpamart · Oct 10, 2023 · c0051c1
1 parent bdd9d5d
commit c0051c1
Show file tree

Hide file tree

Showing 4 changed files with 235 additions and 6 deletions.
diff --git a/data/h3_data_importer/Makefile b/data/h3_data_importer/Makefile
@@ -20,9 +20,11 @@ WORKDIR_HDI=data/contextual/hdi
 WORKDIR_DEFORESTATION=data/hansen_loss
 WORKDIR_GHG=data/forest_ghg
 WORKDIR_WOODPULP=data/woodpulp
-WORKING_NATURAL_CROP_CONVERSION=data/natural_crop_conversion
+WORKDIR_NATURAL_CROP_CONVERSION=data/natural_crop_conversion
+WORKDIR_NUTRIENT_LOAD_REDUCTION=data/nutrient_load_reduction
 WORKDIR_BIODIVERSITY=data/biodiversity
 
+
 CHECKSUMS_PATH=data_checksums
 
 export AWS_ACCESS_KEY_ID = $(DATA_S3_ACCESS_KEY)
@@ -53,6 +55,7 @@ indicators:
 	make convert-forestGHG
 	make convert-satDeforestation
 	make convert-naturalCropConversion
+	make convert-nutrientLoadReduction
 	make convert-biodiversity
 contextual-layers: convert-hdi-contextual convert-blue-water-contextual
 
@@ -226,14 +229,14 @@ convert-forestGHG: download-forestGHG
 	python raster_folder_to_h3_table.py $(WORKDIR_GHG) h3_grid_ghg_global indicator GHG_LUC_T 2021 --h3-res=6 --thread-count=$(PARALLELIZATION_FACTOR)
 
 download-naturalCropConversion:
-	mkdir -p $(WORKING_NATURAL_CROP_CONVERSION)
-	aws s3 sync $(AWS_S3_BUCKET_URL)/processed/natural_crop_conversion $(WORKING_NATURAL_CROP_CONVERSION)
-	cd $(WORKING_NATURAL_CROP_CONVERSION) && sha256sum --check ../../$(CHECKSUMS_PATH)/natural_crop_conversion
+	mkdir -p $(WORKDIR_NATURAL_CROP_CONVERSION)
+	aws s3 sync $(AWS_S3_BUCKET_URL)/processed/natural_crop_conversion $(WORKDIR_NATURAL_CROP_CONVERSION)
+	cd $(WORKDIR_NATURAL_CROP_CONVERSION) && sha256sum --check ../../$(CHECKSUMS_PATH)/natural_crop_conversion
 
 
 convert-naturalCropConversion: download-naturalCropConversion
 	@echo "Converting natural crop conversion data... "
-	python raster_folder_to_h3_table.py $(WORKING_NATURAL_CROP_CONVERSION) h3_grid_natural_crop_conversion_global indicator NECR 2022 --h3-res=6 --thread-count=$(PARALLELIZATION_FACTOR)
+	python raster_folder_to_h3_table.py $(WORKDIR_NATURAL_CROP_CONVERSION) h3_grid_natural_crop_conversion_global indicator NECR 2022 --h3-res=6 --thread-count=$(PARALLELIZATION_FACTOR)
 
 
 download-biodiversity:
@@ -320,7 +323,7 @@ convert-woodpulp: download-woodpulp
 	python	raster_folder_to_h3_table.py	$(WORKDIR_WOODPULP)/ha		h3_grid_woodpulp_ha		harvest_area	gfw_plantations		2021	--h3-res=6	--thread-count=$(PARALLELIZATION_FACTOR)
 	python	raster_folder_to_h3_table.py	$(WORKDIR_WOODPULP)/prod	h3_grid_woodpulp_prod	production		gfw_plantations		2021	--h3-res=6	--thread-count=$(PARALLELIZATION_FACTOR)
 ###################
-# Contextual data #
+# Aqueduct data   #
 ###################
 
 # Aqueduct Global water risk contextual data
@@ -332,12 +335,58 @@ extract-aqueduct: download-aqueduct
 	unzip -q -u $(WORKDIR_AQUEDUCT)/excess_withdrawals.zip  -d $(WORKDIR_AQUEDUCT)/
 	cd $(WORKDIR_AQUEDUCT) && sha256sum --check ../../../$(CHECKSUMS_PATH)/excess_withdrawals
 
+
 convert-aqueduct: extract-aqueduct
 	@echo "Converting excess withdrawals data... "
 	python vector_folder_to_h3_table.py $(WORKDIR_AQUEDUCT) h3_grid_excess_withdrawals_global perc_reduc excess_withdrawals "Environmental datasets" 2023 --indicator=UWUSR_T --h3-res=6
 	@echo "Including contextual layer... "
 	python vector_folder_to_h3_table.py $(WORKDIR_AQUEDUCT) h3_grid_aqueduct_global bws_cat aqueduct "Environmental datasets" 2023
 
+
+#########################################
+# Nutrient load reduction data   #
+#########################################
+
+download-nutrient-load-reduction:
+	mkdir -p $(WORKDIR_NUTRIENT_LOAD_REDUCTION)
+	aws s3 sync $(AWS_S3_BUCKET_URL)/processed/nutrients_load_reduction/ $(WORKDIR_NUTRIENT_LOAD_REDUCTION)
+
+extract-nutrient-load-reduction:download-nutrient-load-reduction
+	unzip -q -u $(WORKDIR_NUTRIENT_LOAD_REDUCTION)/nutrient_load_reduction.zip  -d $(WORKDIR_NUTRIENT_LOAD_REDUCTION)/
+	cd $(WORKDIR_NUTRIENT_LOAD_REDUCTION) && sha256sum --check ../../$(CHECKSUMS_PATH)/nutrient_load_reduction
+
+convert-nutrientLoadReduction: extract-nutrient-load-reduction
+	@echo "Converting nutrient load reduction data... "
+	python vector_folder_to_h3_table.py $(WORKDIR_NUTRIENT_LOAD_REDUCTION) h3_grid_nutrient_load_global perc_reduc nutrient_load_reduction "Environmental datasets" 2023 --indicator=NLR --h3-res=6
+	@echo "Including contextual layer... "
+	python vector_folder_to_h3_table.py $(WORKDIR_NUTRIENT_LOAD_REDUCTION) h3_grid_limiting_nutrients_global Cases_v2_1 limiting_nutrient "Environmental datasets" 2023
+
+###################
+# Contextual data #
+###################
+
+#########################################
+# Nutrient load reduction data   #
+#########################################
+
+download-nutrient-load-reduction:
+	mkdir -p $(WORKDIR_NUTRIENT_LOAD_REDUCTION)
+	aws s3 sync $(AWS_S3_BUCKET_URL)/processed/nutrients_load_reduction/ $(WORKDIR_NUTRIENT_LOAD_REDUCTION)
+
+extract-nutrient-load-reduction:download-nutrient-load-reduction
+	unzip -q -u $(WORKDIR_NUTRIENT_LOAD_REDUCTION)/nutrient_load_reduction.zip  -d $(WORKDIR_NUTRIENT_LOAD_REDUCTION)/
+	cd $(WORKDIR_NUTRIENT_LOAD_REDUCTION) && sha256sum --check ../../$(CHECKSUMS_PATH)/nutrient_load_reduction
+
+convert-nutrientLoadReduction: extract-nutrient-load-reduction
+	@echo "Converting nutrient load reduction data... "
+	python vector_folder_to_h3_table.py $(WORKDIR_NUTRIENT_LOAD_REDUCTION) h3_grid_nutrient_load_global perc_reduc nutrient_load_reduction "Environmental datasets" 2023 --indicator=NLR --h3-res=6
+	@echo "Including contextual layer... "
+	python vector_folder_to_h3_table.py $(WORKDIR_NUTRIENT_LOAD_REDUCTION) h3_grid_limiting_nutrients_global Cases_v2_1 limiting_nutrient "Environmental datasets" 2023
+
+###################
+# Contextual data #
+###################
+
 download-hdi-contextual:
 	mkdir -p $(WORKDIR_HDI)
 	wget --show-progress -q -O $(WORKDIR_HDI)/IHDI_HDR2020_040722.csv https://hdr.undp.org/sites/default/files/data/2020/IHDI_HDR2020_040722.csv

diff --git a/data/preprocessing/nutrient_assimilation_capacity/Makefile b/data/preprocessing/nutrient_assimilation_capacity/Makefile
@@ -0,0 +1,25 @@
+# Makefile for downloading, processing, and uploading data
+# Variables
+DATA_DIR=data/
+checksums_dir=../../../../h3_data_importer/data_checksums
+AWS_S3_BUCKET_URL=s3://landgriffon-raw-data
+
+# Targets
+.PHONY: unzip-limiting-nutrient
+
+all: unzip-limiting-nutrient
+
+# First you need to download the data manually from https://figshare.com/articles/figure/DRP_NO3_TN_TP_rasters/14527638/1?file=31154728 and save it in nutrient_assimilation_capacity/data
+unzip-limiting-nutrient:
+	unzip -q -u $(DATA_DIR)/hybas_l03_v1c_Cases.zip -d $(DATA_DIR)/
+
+# Preprocess the data before ingesting instead of performing these calculations on the database
+process-limiting-nutrients:
+	python process_data.py $(DATA_DIR)/hybas_l03_v1c_Cases
+
+upload_results:
+	aws s3 cp $(DATA_DIR)/hybas_l03_v1c_Cases/nutrient_assimilation_capacity.shp ${AWS_S3_BUCKET_URL}/processed/nutrients_assimilation_capacity/
+
+write_checksum:
+	cd $(DATA_DIR)/hybas_l03_v1c_Cases && sha256sum nutrient_assimilation_capacity.shp > $(checksums_dir)/nutrient_assimilation_capacity
+
diff --git a/data/preprocessing/nutrient_assimilation_capacity/README b/data/preprocessing/nutrient_assimilation_capacity/README
@@ -0,0 +1,65 @@
+# Data Processing Pipeline README
+
+This repository contains a data processing pipeline implemented using a Makefile and Python script to download, preprocess, upload, and generate checksums for data files. The pipeline is designed to work with geospatial data related to nutrient assimilation capacity.
+
+## Prerequisites
+
+Before running the pipeline, ensure you have the following prerequisites in place:
+
+1. **Data Download**: You need to manually download the data from [here](https://figshare.com/articles/figure/DRP_NO3_TN_TP_rasters/14527638/1?file=31154728) and save it in the `data/` directory.
+
+2. **Python Dependencies**: The preprocessing script requires Python and the following Python packages:
+   - `geopandas`
+   - Other dependencies as specified in your `process_data.py` script.
+
+3. **AWS Credentials**: To upload results to an AWS S3 bucket, you should have AWS credentials configured on your machine.
+
+## Usage
+
+### 1. Download and Unzip Data
+
+Use the following command to download and unzip the data:
+
+```bash
+make unzip-limiting-nutrient
+```
+This command will download the data and place it in the data/ directory.
+
+### 2. Preprocess Data
+
+Before ingesting the data into your database, preprocess it using the Python script. Run the following command:
+
+``` bash
+make process-limiting-nutrients
+```
+This command will execute the process_data.py script, which performs data preprocessing, including reprojection and calculation of nutrient reduction percentages.
+
+### 3. Upload Process Data
+
+To upload the processed data to an AWS S3 bucket, use the following command:
+
+```bash
+make upload_results
+```
+Make sure you have AWS credentials configured to access the specified S3 bucket.
+
+### 4. Generate Checksum
+
+Generate a SHA-256 checksum for the processed data by running the following command:
+
+```bash
+make write_checksum
+```
+This command will calculate the checksum and save it in the data_checksums/ directory.
+
+## Configuration
+
+You can configure the pipeline by modifying the variables at the top of the Makefile:
+
+- `DATA_DIR`: Specify the directory where data files are stored.
+- `checksums_dir`: Define the directory where checksum files will be saved.
+- `AWS_S3_BUCKET_URL`: Set the AWS S3 bucket URL for uploading results.
+
+Feel free to adapt this pipeline to suit your specific data processing needs and directory structure.
+
+`Note`: Make sure you have the necessary permissions and access to the data sources and AWS resources mentioned in this README before running the pipeline.
diff --git a/data/preprocessing/nutrient_assimilation_capacity/process_data.py b/data/preprocessing/nutrient_assimilation_capacity/process_data.py
@@ -0,0 +1,90 @@
+""" Reads the limiting nutrients equal area vector file, reporjects the file to EPSG4326 and estimates the percentage of reduction needed to meet a good water quality conditions.
+
+Usage:
+process_data.py <folder>
+
+Arguments:
+    <folder>     Folder containing the limiting nutrients shapefile
+"""
+import os
+import logging
+from pathlib import Path
+import argparse
+
+import geopandas as gpd
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger("preprocessing_limiting_nutrients_file")
+
+def check_and_reproject_to_4326(gdf):
+    """
+    Checks if a GeoDataFrame is in CRS 4326 (WGS84) and reprojects it if not.
+
+    Parameters:
+    - gdf: GeoDataFrame to check and reproject if needed.
+
+    Returns:
+    - Reprojected GeoDataFrame (if reprojected) or the original GeoDataFrame (if already in 4326).
+    """
+    if gdf.crs is None or gdf.crs.to_epsg() != 4326:
+        log.info("Reprojecting GeoDataFrame to EPSG:4326 (WGS84)...")
+        try:
+            # Reproject to EPSG:4326
+            gdf = gdf.to_crs(epsg=4326)
+            log.info("Reprojection successful.")
+        except:
+            log.error("Reprojection failed with error")
+    else:
+        log.info("GeoDataFrame is already in EPSG:4326 (WGS84).")
+
+    return gdf
+
+# Define the function to calculate perc_reduction
+def calculate_perc_reduction(row):
+    if row['Cases_v2_1'] == 4 and row['TP_con_V2_']:
+        return ((row['TP_con_V2_'] - 0.046) / row['TP_con_V2_']) * 100
+    elif row['Cases_v2_1'] == 2 and row['TN_con_V2_']:
+        return ((row['TN_con_V2_'] - 0.7) / row['TN_con_V2_']) * 100
+    else:
+        return 0
+
+def process_folder(folder):
+    vec_extensions = "gdb gpkg shp json geojson".split()
+    path = Path(folder)
+    vectors = []
+    for ext in vec_extensions:
+        vectors.extend(path.glob(f"*.{ext}"))
+    if not vectors:
+        log.error(f"No vectors with extension {vec_extensions} found in {folder}")
+        return
+    if len(vectors) == 1: #folder just contains one vector file
+        # Read the shapefile
+        gdf = gpd.read_file(vectors[0])
+        # Check and reproject to EPSG:4326
+        gdf = check_and_reproject_to_4326(gdf)
+        # Calculate perc_reduction and add it as a new column
+        gdf['perc_reduc'] = gdf.apply(calculate_perc_reduction, axis=1)
+        # Save the processed data to a new shapefile
+        gdf = gdf[['Cases_v2_1', 'perc_reduc', 'geometry']]
+        output_file = os.path.join(folder, 'nutrient_assimilation_capacity.shp')
+        log.info(f"Saving preprocessed file to {output_file}")
+        gdf.to_file(output_file)
+    else:
+        mssg = (
+            f"Found more than one vector file in {folder}."
+            f" For now we only support folders with just one vector file."
+        )
+        logging.error(mssg)
+        return
+
+def main():
+    # Parse command-line arguments
+    parser = argparse.ArgumentParser(description="Process limiting nutrients vector files.")
+    parser.add_argument("folder", type=str, help="Path to the folder containing vector files")
+    args = parser.parse_args()
+
+    # Process the specified folder
+    process_folder(args.folder)
+
+if __name__ == "__main__":
+    main()