From 43336b7f545b3355231d454db4ae4612c136d27e Mon Sep 17 00:00:00 2001 From: Biel Stela Date: Thu, 25 Jan 2024 17:16:23 +0100 Subject: [PATCH] Rest of datasets --- data/base_data_importer/data/4.material.csv | 4 +- .../h3_data_importer/data_checksums/earthstat | 40 ++++++-- data/h3_data_importer/data_checksums/grassnes | 2 - .../raster_folder_to_h3_table.py | 1 + data/preprocessing/earthstat/Makefile | 60 +++--------- data/preprocessing/earthstat/aggregate.py | 91 +++++++++++++++++++ data/preprocessing/earthstat/extract.sh | 15 +++ data/preprocessing/earthstat/resample.sh | 5 +- data/preprocessing/earthstat/urls.txt | 14 +++ .../natural_crop_conversion/Makefile | 2 +- data/pyproject.toml | 8 +- 11 files changed, 172 insertions(+), 70 deletions(-) delete mode 100644 data/h3_data_importer/data_checksums/grassnes create mode 100644 data/preprocessing/earthstat/aggregate.py create mode 100644 data/preprocessing/earthstat/extract.sh create mode 100644 data/preprocessing/earthstat/urls.txt diff --git a/data/base_data_importer/data/4.material.csv b/data/base_data_importer/data/4.material.csv index bf408fe42..965b4686c 100644 --- a/data/base_data_importer/data/4.material.csv +++ b/data/base_data_importer/data/4.material.csv @@ -117,11 +117,11 @@ f792ad86-722c-48ae-a3e8-2054600d62ee,2ca7e810-7cf1-40eb-a393-d363730669db,"Hop c a09e2d72-e8ed-4178-ba3a-a6ddef208b3f,2ca7e810-7cf1-40eb-a393-d363730669db,"Locust beans, locust seeds","Locust beans, seaweeds and other algae, sugar beet, sugar cane, fresh, chilled, frozen or dried, whether or not ground; fruit stones, kernels and other vegetable products (including unroasted chicory roots) used primarily for human consumption, n.e.c.",1212,,,inactive,"{""name"": ""Global sugarbeet distribution in 2010"",""resolution"": ""10km x 10km"",""geographic coverage"": ""Global coverage"",""source"": ""International Food Policy Research Institute, 2019, “Global Spatially-Disaggregated Crop Production Statistics Data for 2010 Version 2.0”, https://doi.org/10.7910/DVN/PRFF8V, Harvard Dataverse, V4"",""datasets"": [ ""SPAM 2010 v2.0 Global Data (Updated 2020-07-15)""],""frequency of updates"": """",""date of content"": ""2010"",""cautions"": ""Global distributions of sugarbeet in 2010 expressed in tonnes (T)."",""license"": ""CC0 1.0"",""overview"": ""'All crops' represent all of the crops that are included in the tool as displayed in the menu. Pixel colors are shaded by level of production. The crop layers displayed on the map reflect 2010 data regardless of the timeframe selected."",""citation"": ""International Food Policy Research Institute, 2019, “Global Spatially-Disaggregated Crop Production Statistics Data for 2010 Version 2.0”, https://doi.org/10.7910/DVN/PRFF8V, Harvard Dataverse, V4""} ",spam_sugb,2ca7e810-7cf1-40eb-a393-d363730669db.a09e2d72-e8ed-4178-ba3a-a6ddef208b3f. 66c5d0f6-6592-4a7a-aab9-2c99d3d74d7c,2ca7e810-7cf1-40eb-a393-d363730669db,"Cereal straw and husks, unprepared","Cereal straw and husks, unprepared; whether or not chopped, ground, pressed or in the form of pellets",1213,,,inactive,"{""name"": ""Global distribution of other cereals crops in 2010"",""resolution"": ""10km x 10km"",""geographic coverage"": ""Global coverage"",""source"": ""International Food Policy Research Institute, 2019, “Global Spatially-Disaggregated Crop Production Statistics Data for 2010 Version 2.0”, https://doi.org/10.7910/DVN/PRFF8V, Harvard Dataverse, V4"",""datasets"": [""SPAM 2010 v2.0 Global Data (Updated 2020-07-15)""],""frequency of updates"": """",""date of content"": ""2010"",""cautions"": ""Global distributions of other cereals crops in 2010 expressed in tonnes (T)."",""license"": ""CC0 1.0"",""overview"": ""'All crops' represent all of the crops that are included in the tool as displayed in the menu. Pixel colors are shaded by level of production. The crop layers displayed on the map reflect 2010 data regardless of the timeframe selected."",""citation"": ""International Food Policy Research Institute, 2019, “Global Spatially-Disaggregated Crop Production Statistics Data for 2010 Version 2.0”, https://doi.org/10.7910/DVN/PRFF8V, Harvard Dataverse, V4""}",spam_ocer,2ca7e810-7cf1-40eb-a393-d363730669db.66c5d0f6-6592-4a7a-aab9-2c99d3d74d7c. 51d656dd-e2e0-4160-b3b1-6bc70f407f76,2ca7e810-7cf1-40eb-a393-d363730669db,Other oleaginous fruits,"Swedes, mangolds, fodder roots, hay, lucerne (alfalfa), clover, sainfoin, forage kale, lupines, vetches and similar forage products, whether or not in the form of pellets",1214,,,inactive,"{""name"": ""Global distribution of rest of crops in 2010"",""resolution"": ""10km x 10km"",""geographic coverage"": ""Global coverage"",""source"": ""International Food Policy Research Institute, 2019, “Global Spatially-Disaggregated Crop Production Statistics Data for 2010 Version 2.0”, https://doi.org/10.7910/DVN/PRFF8V, Harvard Dataverse, V4"",""datasets"": [""SPAM 2010 v2.0 Global Data (Updated 2020-07-15)""],""frequency of updates"": """",""date of content"": ""2010"",""cautions"": ""Global distributions of rest of crops in 2010 expressed in tonnes (T)."",""license"": ""CC0 1.0"",""overview"": ""'All crops' represent all of the crops that are included in the tool as displayed in the menu. Pixel colors are shaded by level of production. The crop layers displayed on the map reflect 2010 data regardless of the timeframe selected."",""citation"": ""International Food Policy Research Institute, 2019, “Global Spatially-Disaggregated Crop Production Statistics Data for 2010 Version 2.0”, https://doi.org/10.7910/DVN/PRFF8V, Harvard Dataverse, V4""}",earthstat_grassnes,2ca7e810-7cf1-40eb-a393-d363730669db.51d656dd-e2e0-4160-b3b1-6bc70f407f76. -c894f98f-2732-42a9-ac62-dd037de5f478,51d656dd-e2e0-4160-b3b1-6bc70f407f76,Grass silage,Grass silage,12.14.c.a,,,inactive,,earthstat_grassness_legumenes_mixedgrass_ryefor_sorghumfor_swedefor_turnipfor_vegfor_oilseedfor_fornes,51d656dd-e2e0-4160-b3b1-6bc70f407f76.c894f98f-2732-42a9-ac62-dd037de5f478 +c894f98f-2732-42a9-ac62-dd037de5f478,51d656dd-e2e0-4160-b3b1-6bc70f407f76,Grass silage,Grass silage,12.14.c.a,,,inactive,,earthstat_grassnesslegumenesmixedgrassryeforsorghumforswedeforturnipforvegforoilseedforfornes,51d656dd-e2e0-4160-b3b1-6bc70f407f76.c894f98f-2732-42a9-ac62-dd037de5f478 c3bb33fd-cf5b-475d-9aec-fd731b35e70b,51d656dd-e2e0-4160-b3b1-6bc70f407f77,Fresh grass,Fresh grass,12.14.c.b,,,inactive,,earthstat_clover,51d656dd-e2e0-4160-b3b1-6bc70f407f76.c3bb33fd-cf5b-475d-9aec-fd731b35e70b d424ea90-64ba-42cc-ac17-2447e0585c63,51d656dd-e2e0-4160-b3b1-6bc70f407f78,By-products,By-products,12.14.c.c,,,inactive,,,51d656dd-e2e0-4160-b3b1-6bc70f407f76.d424ea90-64ba-42cc-ac17-2447e0585c63 751526be-67be-4bdb-9aeb-d51beb5ef560,51d656dd-e2e0-4160-b3b1-6bc70f407f79,Maize silage (corn),Maize silage (corn),12.14.c.d,,,inactive,,earthstat_maizefor,51d656dd-e2e0-4160-b3b1-6bc70f407f76.751526be-67be-4bdb-9aeb-d51beb5ef560 -3ec334e2-8afb-4095-998f-90bae1263fbe,51d656dd-e2e0-4160-b3b1-6bc70f407f80,Other concentrates,Other concentrates,12.14.c.e,,,inactive,,earhstat_25%maizefor_15%soyb_10%oilseedfor_10%citrusnes_10%rapeseed_5%beetfor_5%whea,51d656dd-e2e0-4160-b3b1-6bc70f407f76.3ec334e2-8afb-4095-998f-90bae1263fbe +3ec334e2-8afb-4095-998f-90bae1263fbe,51d656dd-e2e0-4160-b3b1-6bc70f407f80,Other concentrates,Other concentrates,12.14.c.e,,,inactive,,earhstat_maizeforsoyboilseedforcitrusnesrapeseedbeetforwhea,51d656dd-e2e0-4160-b3b1-6bc70f407f76.3ec334e2-8afb-4095-998f-90bae1263fbe bd93937b-c839-453f-be59-7512df89f0d4,51d656dd-e2e0-4160-b3b1-6bc70f407f81,"Protein (soy meal), soy meal","Protein (soy meal), soy meal",12.14.c.f,,,inactive,,spam_soyb,51d656dd-e2e0-4160-b3b1-6bc70f407f76.bd93937b-c839-453f-be59-7512df89f0d4 1ca340bf-5025-40a6-8606-414f3fb579e2,51d656dd-e2e0-4160-b3b1-6bc70f407f82,By-products (wheat),By-products (wheat),12.14.c.g,,,inactive,,spam_whea,51d656dd-e2e0-4160-b3b1-6bc70f407f76.1ca340bf-5025-40a6-8606-414f3fb579e2 29f8dbce-6d6f-4f98-b2f8-02e128d50204,51d656dd-e2e0-4160-b3b1-6bc70f407f83,Alfalfa,Alfalfa,12.14.c.h,,,inactive,,earthstat_alfalfa,51d656dd-e2e0-4160-b3b1-6bc70f407f76.29f8dbce-6d6f-4f98-b2f8-02e128d50204 diff --git a/data/h3_data_importer/data_checksums/earthstat b/data/h3_data_importer/data_checksums/earthstat index 8acf0780d..ca5008e80 100644 --- a/data/h3_data_importer/data_checksums/earthstat +++ b/data/h3_data_importer/data_checksums/earthstat @@ -1,10 +1,30 @@ -e9bc4b5da0a989a2dee26bb5f77cab64dd72103c33328d0e6b6497ffc162ba4d harvest/earthstat_global_alfalfa_harvest_ha.tif -984c66e6326b9b66594d04314485adcc622255a13bd71512847b4f1b1e06f61d harvest/earthstat_global_clover_harvest_ha.tif -705abfab0294c2bfca4d2a5e333bf0b7237339b52b52880f7cfb78a21c7c2aa3 harvest/earthstat_global_grassnes_harvest_ha.tif -1b360b651c7f376daa750266ccec0df67306f0692727b3738c35f959bd055b9f harvest/earthstat_global_maizefor_harvest_ha.tif -5c4c5bd48c14f51030ad5cd19aafbe0dddb154d8dc612b85cae5b75e0c69af06 harvest/earthstat_global_triticale_harvest_ha.tif -a73ae9e7af6af3ba06176f65b7af96e3d98f71d4c7f3d04f15ba0c8641d5bf5e production/earthstat_global_alfalfa_production_t.tif -74b2c08865d430eef071beae49c00e5a4f4d6f4a63b0fd8e228dfe09873fd2b3 production/earthstat_global_clover_production_t.tif -1bee00b30181661f8c8a5de5dec9a28983fff3ee973ff8465a46376ba7195f38 production/earthstat_global_grassnes_production_t.tif -7b794071b81f59a939ed156042817ca83863f16fda434577048a075d997489e1 production/earthstat_global_maizefor_production_t.tif -bca39c9f4ef90f65a8416e663181dcaa488ca5007b9b3a3e3587c15f80fc4793 production/earthstat_global_triticale_production_t.tif +cbe1a7805e0497ee844247872940af14e17d75ce6fe2121cd1ea9a8eb21fd18a harvest/earthstat_global_harvest_GrassnessLegumenesMixedgrassRyeforSorghumforSwedeforTurnipforVegforOilseedforFornes_ha.tif +3215dcabdbd9890d5af9421ef8f70602fd0e410cf71680e14faa676a6e53bc3a harvest/earthstat_global_harvest_MaizeforSoybOilseedforCitrusnesRapeseedBeetforWhea_ha.tif +e9bc4b5da0a989a2dee26bb5f77cab64dd72103c33328d0e6b6497ffc162ba4d harvest/earthstat_global_harvest_alfalfa_ha.tif +984c66e6326b9b66594d04314485adcc622255a13bd71512847b4f1b1e06f61d harvest/earthstat_global_harvest_clover_ha.tif +725c8af97d543e7d3a8c0a44c03fc0ab6b61e59d214a2272552ce9f69ea96ea8 harvest/earthstat_global_harvest_fornes_ha.tif +705abfab0294c2bfca4d2a5e333bf0b7237339b52b52880f7cfb78a21c7c2aa3 harvest/earthstat_global_harvest_grassnes_ha.tif +80a4097873281b7a5465b9676879e8676c85ce7118af70bca1e63315458bb055 harvest/earthstat_global_harvest_legumenes_ha.tif +1b360b651c7f376daa750266ccec0df67306f0692727b3738c35f959bd055b9f harvest/earthstat_global_harvest_maizefor_ha.tif +5634cda2060f6e2b221917fc1b306cd5a35ec658729fc8d276a7dcb032b4b2aa harvest/earthstat_global_harvest_mixedgrass_ha.tif +a936cc4a7892f0a99350a6f727d71e548e462ded2843251b66a4552f7597f354 harvest/earthstat_global_harvest_oilseedfor_ha.tif +b00509436d9583f7cf2a608774c2ac6058edb2e9267c279ae9889ab9ffc3fdd7 harvest/earthstat_global_harvest_ryefor_ha.tif +1ff3a7422c074f673b5911b14ced5acb657ea74bf9101290bc19563d9c345776 harvest/earthstat_global_harvest_swedefor_ha.tif +5c4c5bd48c14f51030ad5cd19aafbe0dddb154d8dc612b85cae5b75e0c69af06 harvest/earthstat_global_harvest_triticale_ha.tif +ebb1243f4dee27af2accf3d1cb4dd65080e3bd56d60a742865cbe0474b053555 harvest/earthstat_global_harvest_turnipfor_ha.tif +ca0883a8552533f062bc67f5a5363b7b19b71c92f7961945037360922c0e0407 harvest/earthstat_global_harvest_vegfor_ha.tif +e619e877dff9c1a594ff2877a120461fdb2cf3a54cba930ece38d0f292da177c production/earthstat_global_production_GrassnessLegumenesMixedgrassRyeforSorghumforSwedeforTurnipforVegforOilseedforFornes_t.tif +b7ed03d27e76fce9410fb00a422791a7a0113e2b9ae5c026a7c8ac405dbd7a64 production/earthstat_global_production_MaizeforSoybOilseedforCitrusnesRapeseedBeetforWhea_t.tif +a73ae9e7af6af3ba06176f65b7af96e3d98f71d4c7f3d04f15ba0c8641d5bf5e production/earthstat_global_production_alfalfa_t.tif +74b2c08865d430eef071beae49c00e5a4f4d6f4a63b0fd8e228dfe09873fd2b3 production/earthstat_global_production_clover_t.tif +333fad59cc36eeea714a9b2315d2f422c8247b9389abe0506f51bebfc6c38268 production/earthstat_global_production_fornes_t.tif +1bee00b30181661f8c8a5de5dec9a28983fff3ee973ff8465a46376ba7195f38 production/earthstat_global_production_grassnes_t.tif +5ef4a91ecebea50eea22284995ec4ac1aa0ec4ff32e6d45f1390d6a124030c5c production/earthstat_global_production_legumenes_t.tif +7b794071b81f59a939ed156042817ca83863f16fda434577048a075d997489e1 production/earthstat_global_production_maizefor_t.tif +84ae5ed01a1119f7a3ba4e0529345710e553f2d6529cfd082814624ca32f71f9 production/earthstat_global_production_mixedgrass_t.tif +6ac0d6424b35c905536f7f02d8ac658ec03d7d548ccd5c60800033594c453e04 production/earthstat_global_production_oilseedfor_t.tif +9949e83069e582e543a95e1e6660be1393eefcccaaebc91ea0c422a856626cb9 production/earthstat_global_production_ryefor_t.tif +acfb1a92ca203a3f97dcb002960c3b3cf688c99d126dbee06cbb5e2378330560 production/earthstat_global_production_swedefor_t.tif +bca39c9f4ef90f65a8416e663181dcaa488ca5007b9b3a3e3587c15f80fc4793 production/earthstat_global_production_triticale_t.tif +53ae5bde5a9ed3d6500eea1ea0aef64df9ee0dcbe63a5e69dc5d72860d1be662 production/earthstat_global_production_turnipfor_t.tif +1814503f3433f6fafdcf61b790dd2594cf3c0ab4d13bc1bcf45076c6a4107b45 production/earthstat_global_production_vegfor_t.tif diff --git a/data/h3_data_importer/data_checksums/grassnes b/data/h3_data_importer/data_checksums/grassnes deleted file mode 100644 index 45da64e2b..000000000 --- a/data/h3_data_importer/data_checksums/grassnes +++ /dev/null @@ -1,2 +0,0 @@ -705abfab0294c2bfca4d2a5e333bf0b7237339b52b52880f7cfb78a21c7c2aa3 earthstat_global_harvest_grassnes_ha.tif -1bee00b30181661f8c8a5de5dec9a28983fff3ee973ff8465a46376ba7195f38 earthstat_global_prod_grassnes_t.tif diff --git a/data/h3_data_importer/raster_folder_to_h3_table.py b/data/h3_data_importer/raster_folder_to_h3_table.py index 6fbab948d..5d923f581 100644 --- a/data/h3_data_importer/raster_folder_to_h3_table.py +++ b/data/h3_data_importer/raster_folder_to_h3_table.py @@ -178,6 +178,7 @@ def update_for_material(cursor: psycopg.Cursor, dataset: str, column_name: str, raise ValueError(f"h3_data with 'h3columnName' {column_name} does not exists") # FIXME: the current solution for naming a material datasets is hard to follow and easy to mess up. dataset_id = dataset + "_" + snakify(column_name).split("_")[-2] + log.info(f"Updating material_to_h3 for {column_name} of {dataset_id}") type_map = {"harvest_area": "harvest", "production": "producer"} delete_query = sql.SQL( 'DELETE FROM "material_to_h3" WHERE "materialId" = {material_id} AND "type" = {data_type}' diff --git a/data/preprocessing/earthstat/Makefile b/data/preprocessing/earthstat/Makefile index 6a884341d..3f9b930fd 100644 --- a/data/preprocessing/earthstat/Makefile +++ b/data/preprocessing/earthstat/Makefile @@ -1,13 +1,7 @@ checksums_dir=../../../h3_data_importer/data_checksums -data_dir=data resampling_resolution="0.083333" AWS_S3_BUCKET_URL=s3://landgriffon-raw-data -GRASSNES_URL=https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/grassnes_HarvAreaYield_Geotiff.zip -CLOVER_URL=https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/clover_HarvAreaYield_Geotiff.zip -MAIZEFOR_URL=https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/maizefor_HarvAreaYield_Geotiff.zip -ALFA_URL=https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/alfalfa_HarvAreaYield_Geotiff.zip -TRITICALE_URL=https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/triticale_HarvAreaYield_Geotiff.zip include ../../../.env @@ -16,56 +10,30 @@ export AWS_SECRET_ACCESS_KEY .PHONY: all download_grassnes extract_grassnes_rasters resample_grassnes upload_results write_checksum -all: clean download extract resample upload_results write_checksum +all: clean download extract resample aggregate upload_results write_checksum download: - wget $(GRASSNES_URL) -O data/grassnes_HarvAreaYield_Geotiff.zip - wget $(CLOVER_URL) -O data/clover_HarvAreaYield_Geotiff.zip - wget $(MAIZEFOR_URL) -O data/maizefor_HarvAreaYield_Geotiff.zip - wget $(ALFA_URL) -O data/alfalfa_HarvAreaYield_Geotiff.zip - wget $(TRITICALE_URL) -O data/triticale_HarvAreaYield_Geotiff.zip + for url in `cat urls.txt`; do \ + wget -P data/ $$url; \ + done extract: - unzip -oj data/grassnes_HarvAreaYield_Geotiff.zip \ - grassnes_HarvAreaYield_Geotiff/grassnes_Production.tif \ - grassnes_HarvAreaYield_Geotiff/grassnes_HarvestedAreaHectares.tif \ - -d data/ - rm data/grassnes_HarvAreaYield_Geotiff.zip - - unzip -oj data/clover_HarvAreaYield_Geotiff.zip \ - clover_HarvAreaYield_Geotiff/clover_Production.tif \ - clover_HarvAreaYield_Geotiff/clover_HarvestedAreaHectares.tif \ - -d data/ - rm data/clover_HarvAreaYield_Geotiff.zip - - unzip -oj data/maizefor_HarvAreaYield_Geotiff.zip \ - maizefor_HarvAreaYield_Geotiff/maizefor_Production.tif \ - maizefor_HarvAreaYield_Geotiff/maizefor_HarvestedAreaHectares.tif \ - -d data/ - rm data/maizefor_HarvAreaYield_Geotiff.zip - - unzip -oj data/alfalfa_HarvAreaYield_Geotiff.zip \ - alfalfa_HarvAreaYield_Geotiff/alfalfa_Production.tif \ - alfalfa_HarvAreaYield_Geotiff/alfalfa_HarvestedAreaHectares.tif \ - -d data/ - rm data/alfalfa_HarvAreaYield_Geotiff.zip - - unzip -oj data/triticale_HarvAreaYield_Geotiff.zip \ - triticale_HarvAreaYield_Geotiff/triticale_Production.tif \ - triticale_HarvAreaYield_Geotiff/triticale_HarvestedAreaHectares.tif \ - -d data/ - rm data/triticale_HarvAreaYield_Geotiff.zip + bash extract.sh resample: - mkdir -p $(data_dir)/harvest $(data_dir)/production + mkdir -p data/harvest data/production bash resample.sh +aggregate: + python aggregate.py data/harvest harvest + python aggregate.py data/production production + upload_results: - aws s3 sync $(data_dir) ${AWS_S3_BUCKET_URL}/processed/earthstat/ + aws s3 sync data ${AWS_S3_BUCKET_URL}/processed/earthstat/ write_checksum: - cd $(data_dir) && sha256sum harvest/* > $(checksums_dir)/earthstat - cd $(data_dir) && sha256sum production/* >> $(checksums_dir)/earthstat + cd data && sha256sum harvest/* > $(checksums_dir)/earthstat + cd data && sha256sum production/* >> $(checksums_dir)/earthstat clean: - rm -rf $(data_dir)/* + rm -rf data/* diff --git a/data/preprocessing/earthstat/aggregate.py b/data/preprocessing/earthstat/aggregate.py new file mode 100644 index 000000000..9ee6b30e7 --- /dev/null +++ b/data/preprocessing/earthstat/aggregate.py @@ -0,0 +1,91 @@ +import logging +from enum import Enum +from pathlib import Path +from time import sleep +from typing import Annotated + +import rasterio as rio +import typer + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("aggregate") + +GRAS_SILAGE_COMPONENTS = { + "grassness": 1, + "legumenes": 1, + "mixedgrass": 1, + "ryefor": 1, + "sorghumfor": 1, + "swedefor": 1, + "turnipfor": 1, + "vegfor": 1, + "oilseedfor": 1, + "fornes": 1, +} + +# earhstat_25%maizefor_15%soyb_10%oilseedfor_10%citrusnes_10%rapeseed_5%beetfor_5%whea +OTHER_CONCENTRATES_COMPONENTS = { + "maizefor": 0.25, + "soyb": 0.15, + "oilseedfor": 0.1, + "citrusnes": 0.1, + "rapeseed": 0.1, + "beetfor": 0.05, + "whea": 0.05, +} + + +class HarvestOrProd(str, Enum): + harvest = "harvest" + production = "production" + + +def crop(filename: str) -> str: + """Extract crop name from filename like: + earthstat_global_harvest_oilseedfor_ha.tif -> oilseedfor + """ + return filename.split("_")[3] + + +def aggregate(data_dir: Path, proportions: dict[str, float], harvest_or_prod: HarvestOrProd) -> str: + """Aggregate rasters that in files that are present in proportions map with the corresponding proportion + into one raster. + """ + files = list(data_dir.glob("*.tif")) + components = [(f, proportions[crop(f.stem)]) for f in files if crop(f.stem) in proportions.keys()] + for i, (file, proportion) in enumerate(components): + if i == 0: + with rio.open(file) as ref: + ref_meta = ref.meta.copy() + data = ref.read(1, masked=True) * proportion + else: + with rio.open(file) as src: + data += src.read(1, masked=True) * proportion + + unit = "ha" if harvest_or_prod == "harvest" else "t" + outfile = ( + data_dir + / f"earthstat_global_{harvest_or_prod.name}_{''.join(f.title() for f in proportions.keys())}_{unit}.tif" + ) + + with rio.open(outfile, "w", **ref_meta) as dest: + dest.write(data, 1) + + return outfile.as_posix() + + +def main( + data_dir: Annotated[Path, typer.Argument], + harvest_or_prod: Annotated[HarvestOrProd, typer.Argument(case_sensitive=False)], +) -> None: + if len(list(data_dir.glob("*.tif"))) == 0: + raise typer.BadParameter(f"Directory {data_dir} does not contain any tif files.") + sleep(10) + gras_silage_filename = aggregate(data_dir, GRAS_SILAGE_COMPONENTS, harvest_or_prod) + others_filename = aggregate(data_dir, OTHER_CONCENTRATES_COMPONENTS, harvest_or_prod) + log.info(f"Created {gras_silage_filename}") + log.info(f"Created {others_filename}") + + +if __name__ == "__main__": + typer.run(main) diff --git a/data/preprocessing/earthstat/extract.sh b/data/preprocessing/earthstat/extract.sh new file mode 100644 index 000000000..a35af1e86 --- /dev/null +++ b/data/preprocessing/earthstat/extract.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +set -e + +for file in data/*.zip; do + filename=`basename $file .zip`; + crop_type="${filename%%_*}" + + unzip -oj $file \ + "${filename}/${crop_type}_Production.tif" \ + "${filename}/${crop_type}_HarvestedAreaHectares.tif" \ + -d data/; + + rm $file; +done diff --git a/data/preprocessing/earthstat/resample.sh b/data/preprocessing/earthstat/resample.sh index 326181fa7..4ef0b4349 100644 --- a/data/preprocessing/earthstat/resample.sh +++ b/data/preprocessing/earthstat/resample.sh @@ -1,4 +1,5 @@ -#!/bin/bash +#!/usr/bin/env bash + set -e data_dir="data" @@ -14,7 +15,7 @@ for file in $data_dir/*.tif; do harv_or_prod="production" unit="t" fi - outfile="earthstat_global_${crop_type}_${harv_or_prod}_${unit}.tif" + outfile="earthstat_global_${harv_or_prod}_${crop_type}_${unit}.tif" echo "Resampling $file to $outfile" rio warp \ $file \ diff --git a/data/preprocessing/earthstat/urls.txt b/data/preprocessing/earthstat/urls.txt new file mode 100644 index 000000000..2f1265bd1 --- /dev/null +++ b/data/preprocessing/earthstat/urls.txt @@ -0,0 +1,14 @@ +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/grassnes_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/clover_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/maizefor_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/alfalfa_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/triticale_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/legumenes_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/mixedgrass_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/ryefor_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/sorghumfor_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/swedefor_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/turnipfor_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/vegfor_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/oilseedfor_HarvAreaYield_Geotiff.zip +https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/fornes_HarvAreaYield_Geotiff.zip diff --git a/data/preprocessing/natural_crop_conversion/Makefile b/data/preprocessing/natural_crop_conversion/Makefile index 751adbb9e..5468f0313 100644 --- a/data/preprocessing/natural_crop_conversion/Makefile +++ b/data/preprocessing/natural_crop_conversion/Makefile @@ -1,5 +1,5 @@ # Variables -data_dir=./data/ +data_dir=./data resampling_resolution="0.083333" checksums_dir=../../../h3_data_importer/data_checksums AWS_S3_BUCKET_URL=s3://landgriffon-raw-data diff --git a/data/pyproject.toml b/data/pyproject.toml index 6c4656395..0e17d1b7a 100644 --- a/data/pyproject.toml +++ b/data/pyproject.toml @@ -1,10 +1,4 @@ -[tool.black] -line-length = 120 - -[tool.isort] -profile = "black" - [tool.ruff] -select = ["E", "F", "N"] +select = ["E", "F", "N", "I"] line-length = 120 ignore = []