From 3dbd55c474456880d08056d400d4899491d1b1ba Mon Sep 17 00:00:00 2001 From: elpamart Date: Thu, 21 Sep 2023 15:55:14 +0200 Subject: [PATCH] add biodiversity preprocessing --- .../data_checksums/biodiversity | 1 + data/preprocessing/biodiversity/Makefile | 30 +++++++++++ data/preprocessing/biodiversity/README.md | 52 +++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 data/h3_data_importer/data_checksums/biodiversity create mode 100644 data/preprocessing/biodiversity/Makefile create mode 100644 data/preprocessing/biodiversity/README.md diff --git a/data/h3_data_importer/data_checksums/biodiversity b/data/h3_data_importer/data_checksums/biodiversity new file mode 100644 index 0000000000..31674092d8 --- /dev/null +++ b/data/h3_data_importer/data_checksums/biodiversity @@ -0,0 +1 @@ +683cfc1d67d2fbd28673965b10f5bbeb33e4a7bb7fdef3d7807967e0faaef82b natural_crop_flii_loss_by_human_lu_50km_10km.tif diff --git a/data/preprocessing/biodiversity/Makefile b/data/preprocessing/biodiversity/Makefile new file mode 100644 index 0000000000..11a26c7fdc --- /dev/null +++ b/data/preprocessing/biodiversity/Makefile @@ -0,0 +1,30 @@ +# Variables +data_dir=data +resampling_resolution="0.083333" +checksums_dir=../../../h3_data_importer/data_checksums +AWS_S3_BUCKET_URL=s3://landgriffon-raw-data + +# Targets +.PHONY: all download_forest_landscape_integrity_loss resample_forest_landscape_integrity_loss upload_results write_checksum + +all: download_forest_landscape_integrity_loss resample_forest_landscape_integrity_loss upload_results write_checksum + +download_forest_landscape_integrity_loss: + mkdir -p $(data_dir) + gsutil -m cp \ + "gs://landgriffon-gee-bucket/landscape_indicators_20230821/natural_crop_flii_loss_by_human_lu_50km_1000m.tif" \ + $(data_dir) + +resample_forest_landscape_integrity_loss: + rio warp \ + $(data_dir)/natural_crop_flii_loss_by_human_lu_50km_1000m.tif \ + $(data_dir)/natural_crop_flii_loss_by_human_lu_50km_10km.tif \ + --resampling average \ + --res $(resampling_resolution) \ + --overwrite + +upload_results: + aws s3 cp $(data_dir)/natural_crop_flii_loss_by_human_lu_50km_10km.tif ${AWS_S3_BUCKET_URL}/processed/biodiversity/ + +write_checksum: + cd $(data_dir) && sha256sum natural_crop_flii_loss_by_human_lu_50km_10km.tif > $(checksums_dir)/biodiversity diff --git a/data/preprocessing/biodiversity/README.md b/data/preprocessing/biodiversity/README.md new file mode 100644 index 0000000000..58af9fc8b7 --- /dev/null +++ b/data/preprocessing/biodiversity/README.md @@ -0,0 +1,52 @@ +# Biodiversity Indicators +## Forest Landscape Integrity Loss Processing + +This folder contains a set of Makefile targets to download, process and upload the Biodiversity - Forest Landscape Integrity Loss data. The data is downloaded from Google Cloud Storage (GCS), resampled and then uploaded to an Amazon Web Serice (AWS) S3 bucket. + + +## Prerequisites + +Before running the pipeline, ensure you have the following prerequisites in place: + +1. **Google Cloud SDK (gsutil)**: You'll need gsutil to download data from Google Cloud Storage. Make sure it's installed and authenticated. + +2. **Rasterio (rio)**: You'll need rio (Rasterio) for resampling the raster data. Install it using pip if it's not already installed: `pip install rasterio`. + +3. **Amazon Web Services (AWS) CLI**: You must have the AWS CLI installed and configured with the necessary permissions to upload data to your AWS S3 bucket. + +## Usage + +### 1. Download data + +This target downloads the forest landscape integrity loss data from GCS and stores it in a specified data directory. + +``` +make download_forest_landscape_integrity_loss + +``` + +### 2. Resample dorest landscape integrity loss + +This target resamples the downloaded data to a different resolution using Rasterio. It takes the input file from the data directory, performs resampling, and overwrites the file in the data directory with the resampled version + +``` +make resample_forest_landscape_integrity_loss +``` + +### 3. Upload results + +This target uploads the processed forest landscape integrity loss data to an AWS S3 bucket. Make sure to set the `AWS_S3_BUCKET_URL` environment variable to the destination S3 bucket URL. + +``` +make upload_results +``` + +### 4. Write checksums + +This target generates a SHA256 checksum for the processed data and writes it to a file in the specified checksums directory. + +``` +make write_checksum +``` + +`Note`: Make sure you have the necessary permissions and access to the data sources and AWS resources mentioned in this README before running the pipeline.