diff --git a/constants_and_names.py b/constants_and_names.py index 4eebfd23..03d0cad2 100644 --- a/constants_and_names.py +++ b/constants_and_names.py @@ -8,7 +8,7 @@ ######## ######## # Model version -version = '1.3.1' +version = '1.3.2' version_filename = version.replace('.', '_') @@ -44,7 +44,7 @@ ### Constants # Number of years of tree cover loss. If input loss raster is changed, this must be changed, too. -loss_years = 22 +loss_years = 23 # Number of years in tree cover gain. If input cover gain raster is changed, this must be changed, too. gain_years = 20 @@ -153,7 +153,7 @@ ### Model extent ###### pattern_model_extent = 'model_extent' -model_extent_dir = os.path.join(s3_base_dir, 'model_extent/standard/20231114/') +model_extent_dir = os.path.join(s3_base_dir, 'model_extent/standard/20240308/') ###### ### Biomass tiles @@ -205,9 +205,9 @@ gain_spreadsheet = 'gain_rate_continent_ecozone_age_20230821.xlsx' gain_spreadsheet_dir = os.path.join(s3_base_dir, 'removal_rate_tables/') -# Annual Hansen loss tiles (2001-2022) -pattern_loss = 'GFW2022' -loss_dir = 's3://gfw2-data/forest_change/hansen_2022/' +# Annual Hansen loss tiles (2001-2023) +pattern_loss = 'GFW2023' +loss_dir = 's3://gfw2-data/forest_change/hansen_2023/' # Hansen removals tiles based on canopy height (2000-2020) # From https://www.frontiersin.org/articles/10.3389/frsen.2022.856903/full @@ -333,13 +333,13 @@ # Drivers of tree cover loss drivers_raw_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/tree_cover_loss_drivers/raw/') -pattern_drivers_raw = 'TCL_DD_2022_20230407_wgs84_setnodata.tif' +pattern_drivers_raw = 'Goode_FinalClassification_2023_wgs84_v20240402.tif' pattern_drivers = 'tree_cover_loss_driver_processed' -drivers_processed_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/tree_cover_loss_drivers/processed/drivers_2022/20230407/') +drivers_processed_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/tree_cover_loss_drivers/processed/drivers_2023/20240402/') # Tree cover loss from fires -TCLF_raw_dir = 's3://gfw-data-lake/umd_tree_cover_loss_from_fires/v20230315/raw/' -TCLF_processed_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/tree_cover_loss_fires/20230315/processed/') +TCLF_raw_dir = 's3://gfw2-data/forest_change/hansen_2023_fire/' +TCLF_processed_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/tree_cover_loss_fires/20240304/processed/') pattern_TCLF_processed = 'tree_cover_loss_fire_processed' @@ -402,7 +402,8 @@ # Age categories over entire model extent, as a precursor to assigning IPCC default removal rates pattern_age_cat_IPCC = 'forest_age_category_IPCC__1_young_2_mid_3_old' -age_cat_IPCC_dir = os.path.join(s3_base_dir, 'forest_age_category_IPCC/standard/20231114/') +age_cat_IPCC_dir = os.path.join(s3_base_dir, 'forest_age_category_IPCC/standard/20240308/') + ### US-specific removal precursors @@ -464,31 +465,34 @@ # Annual aboveground biomass removals rate using IPCC default removal rates pattern_annual_gain_AGB_IPCC_defaults = 'annual_removal_factor_AGB_Mg_ha_IPCC_defaults_all_ages' -annual_gain_AGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGB_IPCC_defaults_all_ages/standard/20231114/') +annual_gain_AGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGB_IPCC_defaults_all_ages/standard/20240308/') # Annual aboveground biomass removals rate using IPCC default removal rates pattern_annual_gain_BGB_IPCC_defaults = 'annual_removal_factor_BGB_Mg_ha_IPCC_defaults_all_ages' -annual_gain_BGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'annual_removal_factor_BGB_IPCC_defaults_all_ages/standard/20231114/') +annual_gain_BGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'annual_removal_factor_BGB_IPCC_defaults_all_ages/standard/20240308/') + ### Annual composite removal factor # Annual aboveground removals rate for all forest types pattern_annual_gain_AGC_all_types = 'annual_removal_factor_AGC_Mg_ha_all_forest_types' -annual_gain_AGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGC_all_forest_types/standard/20231114/') +annual_gain_AGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGC_all_forest_types/standard/20240308/') # Annual belowground removals rate for all forest types pattern_annual_gain_BGC_all_types = 'annual_removal_factor_BGC_Mg_ha_all_forest_types' -annual_gain_BGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_BGC_all_forest_types/standard/20231114/') +annual_gain_BGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_BGC_all_forest_types/standard/20240308/') # Annual aboveground+belowground removals rate for all forest types pattern_annual_gain_AGC_BGC_all_types = 'annual_removal_factor_AGC_BGC_Mg_ha_all_forest_types' -annual_gain_AGC_BGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGC_BGC_all_forest_types/standard/20231114/') +annual_gain_AGC_BGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGC_BGC_all_forest_types/standard/20240308/') + ### Removal forest types (sources) # Forest type used in removals model pattern_removal_forest_type = 'removal_forest_type' -removal_forest_type_dir = os.path.join(s3_base_dir, 'removal_forest_type/standard/20231114/') +removal_forest_type_dir = os.path.join(s3_base_dir, 'removal_forest_type/standard/20240308/') + # Removal model forest type codes mangrove_rank = 6 @@ -503,26 +507,27 @@ # Number of removals years for all forest types pattern_gain_year_count = 'gain_year_count_all_forest_types' -gain_year_count_dir = os.path.join(s3_base_dir, 'gain_year_count_all_forest_types/standard/20231114/') +gain_year_count_dir = os.path.join(s3_base_dir, 'gain_year_count_all_forest_types/standard/20240308/') + ### Cumulative gross carbon dioxide removals # Gross aboveground removals for all forest types pattern_cumul_gain_AGCO2_all_types = f'gross_removals_AGCO2_Mg_ha_all_forest_types_2001_{loss_years}' -cumul_gain_AGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_all_forest_types/standard/per_hectare/20231114/') +cumul_gain_AGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_all_forest_types/standard/per_hectare/20240308/') # Gross belowground removals for all forest types pattern_cumul_gain_BGCO2_all_types = f'gross_removals_BGCO2_Mg_ha_all_forest_types_2001_{loss_years}' -cumul_gain_BGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_BGCO2_all_forest_types/standard/per_hectare/20231114/') +cumul_gain_BGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_BGCO2_all_forest_types/standard/per_hectare/20240308/') # Gross aboveground and belowground removals for all forest types in all pixels pattern_cumul_gain_AGCO2_BGCO2_all_types = f'gross_removals_AGCO2_BGCO2_Mg_ha_all_forest_types_2001_{loss_years}' -cumul_gain_AGCO2_BGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/full_extent/per_hectare/20231114/') +cumul_gain_AGCO2_BGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/full_extent/per_hectare/20240308/') # Gross aboveground and belowground removals for all forest types in pixels within forest extent pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent = f'gross_removals_AGCO2_BGCO2_Mg_ha_all_forest_types_forest_extent_2001_{loss_years}' -cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/forest_extent/per_hectare/20231114/') +cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/forest_extent/per_hectare/20240308/') ###### @@ -558,7 +563,8 @@ ## Carbon emitted_pools in loss year # Date to include in the output directory for all emissions year carbon emitted_pools -emis_pool_run_date = '20231114' +emis_pool_run_date = '20240308' + # Aboveground carbon in the year of emission for all forest types in loss pixels pattern_AGC_emis_year = "Mg_AGC_ha_emis_year" @@ -640,7 +646,7 @@ ### Emissions from biomass and soil (all carbon emitted_pools) # Date to include in the output directory -emis_run_date_biomass_soil = '20231114' +emis_run_date_biomass_soil = '20240402' # pattern_gross_emis_commod_biomass_soil = f'gross_emis_commodity_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' pattern_gross_emis_commod_biomass_soil = f'gross_emis_commodity_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' @@ -679,7 +685,8 @@ ### Emissions from soil only # Date to include in the output directory -emis_run_date_soil_only = '20231114' +emis_run_date_soil_only = '20240402' + pattern_gross_emis_commod_soil_only = f'gross_emis_commodity_Mg_CO2e_ha_soil_only_2001_{loss_years}' gross_emis_commod_soil_only_dir = f'{s3_base_dir}gross_emissions/commodities/soil_only/standard/{emis_run_date_soil_only}/' @@ -717,11 +724,11 @@ # Net emissions for all forest types and all carbon emitted_pools in all pixels pattern_net_flux = f'net_flux_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' -net_flux_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/full_extent/per_hectare/20231114/') +net_flux_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/full_extent/per_hectare/20240402/') # Net emissions for all forest types and all carbon emitted_pools in forest extent pattern_net_flux_forest_extent = f'net_flux_Mg_CO2e_ha_biomass_soil_forest_extent_2001_{loss_years}' -net_flux_forest_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/forest_extent/per_hectare/20231114/') +net_flux_forest_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/forest_extent/per_hectare/20240402/') ### Per pixel model outputs @@ -729,27 +736,27 @@ # Gross removals per pixel in all pixels pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent = f'gross_removals_AGCO2_BGCO2_Mg_pixel_all_forest_types_full_extent_2001_{loss_years}' -cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/full_extent/per_pixel/20231114/') +cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/full_extent/per_pixel/20240308/') # Gross removals per pixel in forest extent pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent = f'gross_removals_AGCO2_BGCO2_Mg_pixel_all_forest_types_forest_extent_2001_{loss_years}' -cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/forest_extent/per_pixel/20231114/') +cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/forest_extent/per_pixel/20240308/') # Gross emissions per pixel in all pixels pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent = f'gross_emis_all_gases_all_drivers_Mg_CO2e_pixel_biomass_soil_full_extent_2001_{loss_years}' -gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'gross_emissions/all_drivers/all_gases/biomass_soil/standard/full_extent/per_pixel/20231114/') +gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'gross_emissions/all_drivers/all_gases/biomass_soil/standard/full_extent/per_pixel/20240402/') # Gross emissions per pixel in forest extent pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent = f'gross_emis_all_gases_all_drivers_Mg_CO2e_pixel_biomass_soil_forest_extent_2001_{loss_years}' -gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'gross_emissions/all_drivers/all_gases/biomass_soil/standard/forest_extent/per_pixel/20231114/') +gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'gross_emissions/all_drivers/all_gases/biomass_soil/standard/forest_extent/per_pixel/20240402/') # Net flux per pixel in all pixels pattern_net_flux_per_pixel_full_extent = f'net_flux_Mg_CO2e_pixel_biomass_soil_full_extent_2001_{loss_years}' -net_flux_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/full_extent/per_pixel/20231114/') +net_flux_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/full_extent/per_pixel/20240402/') # Net flux per pixel in forest extent pattern_net_flux_per_pixel_forest_extent = f'net_flux_Mg_CO2e_pixel_biomass_soil_forest_extent_2001_{loss_years}' -net_flux_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/forest_extent/per_pixel/20231114/') +net_flux_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/forest_extent/per_pixel/20240402/') ### 4x4 km aggregation tiles for mapping @@ -759,7 +766,7 @@ pattern_aggreg_sensit_perc_diff = f'net_flux_0_04deg_modelv{version_filename}_perc_diff_std' pattern_aggreg_sensit_sign_change = f'net_flux_0_04deg_modelv{version_filename}_sign_change_std' -output_aggreg_dir = os.path.join(s3_base_dir, '0_04deg_output_aggregation/biomass_soil/standard/20231114/') +output_aggreg_dir = os.path.join(s3_base_dir, '0_04deg_output_aggregation/biomass_soil/standard/20240402/') @@ -800,11 +807,11 @@ # Standard deviation for annual aboveground biomass removal factors using IPCC default removal rates pattern_stdev_annual_gain_AGB_IPCC_defaults = 'annual_removal_factor_stdev_AGB_Mg_ha_IPCC_defaults_all_ages' -stdev_annual_gain_AGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'stdev_annual_removal_factor_AGB_IPCC_defaults_all_ages/standard/20231114/') +stdev_annual_gain_AGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'stdev_annual_removal_factor_AGB_IPCC_defaults_all_ages/standard/20240308/') # Standard deviation for aboveground and belowground removal factors for all forest types pattern_stdev_annual_gain_AGC_all_types = 'annual_removal_factor_stdev_AGC_Mg_ha_all_forest_types' -stdev_annual_gain_AGC_all_types_dir = os.path.join(s3_base_dir, 'stdev_annual_removal_factor_AGC_all_forest_types/standard/20231114/') +stdev_annual_gain_AGC_all_types_dir = os.path.join(s3_base_dir, 'stdev_annual_removal_factor_AGC_all_forest_types/standard/20240308/') # Raw mineral soil C file site diff --git a/data_prep/mp_prep_other_inputs_annual.py b/data_prep/mp_prep_other_inputs_annual.py index 711da4ee..421d99ca 100644 --- a/data_prep/mp_prep_other_inputs_annual.py +++ b/data_prep/mp_prep_other_inputs_annual.py @@ -1,12 +1,15 @@ ''' -This script processes the inputs for the emissions script that haven't been processed by another script. -At this point, that is: climate zone, Indonesia/Malaysia plantations before 2000, tree cover loss drivers (TSC drivers), -combining IFL2000 (extratropics) and primary forests (tropics) into a single layer, -Hansenizing some removal factor standard deviation inputs, Hansenizing the European removal factors, -and Hansenizing three US-specific removal factor inputs. - -python -m data_prep.mp_prep_other_inputs_annual -l 00N_000E -nu -python -m data_prep.mp_prep_other_inputs_annual -l all +This script processes the tree cover loss inputs for the emissions script that haven't been processed by another script. +At this point, that is: tree cover loss drivers (TCLD) and tree cover loss due to fires (TCLF). +These need to be processed each year after we recieve the updated TCL data from UMD and before running the full model. + +python -m data_prep.mp_prep_other_inputs_annual -l 00N_000E -nu -p tcld +python -m data_prep.mp_prep_other_inputs_annual -l all -p tclf + +Options for process argument (-p): + - tcld: Pre-processes drivers of tree cover loss tiles + - tclf: Pre-processes tree cover loss due to fires tiles + - all: Pre-processes both drivers of tree cover loss and tree cover loss due to fires tiles ''' import argparse @@ -20,7 +23,7 @@ import constants_and_names as cn import universal_util as uu -def mp_prep_other_inputs(tile_id_list): +def mp_prep_other_inputs(tile_id_list, process): os.chdir(cn.docker_tile_dir) sensit_type='std' @@ -38,16 +41,24 @@ def mp_prep_other_inputs(tile_id_list): ''' Before processing the driver, it needs to be reprojected from Goode Homolosine to WGS84. - gdal_warp is producing a weird output, so I did it in ArcMap for the 2022 update, + gdal_warp is producing a weird output, so I did it in ArcMap for the 2022 and 2023 update, with the output cell size being 0.005 x 0.005 degree and the method being nearest. + + 2023 TCL Update: + arcpy.management.ProjectRaster("Goode_FinalClassification_2024_v20240402.tif", r"C:\GIS\carbon_model\Goode_FinalClassification_2023_wgs84_v20240402.tif", + 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0], + UNIT["Degree",0.0174532925199433]]', "NEAREST", "0.005 0.005", None, None, 'PROJCS["WGS_1984_Goode_Homolosine", + GEOGCS["GCS_unknown",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0], + UNIT["Degree",0.0174532925199433]],PROJECTION["Goode_Homolosine"],PARAMETER["False_Easting",0.0], + PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Option",1.0],UNIT["Meter",1.0]]', "NO_VERTICAL") + 2022 TCL Update: arcpy.management.ProjectRaster("TCL_DD_2022_20230407.tif", r"C:\GIS\raw_data\TCL_DD_2022_20230407_wgs84.tif", 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0], UNIT["Degree",0.0174532925199433]]', "NEAREST", "0.005 0.005", None, None, 'PROJCS["WGS_1984_Goode_Homolosine", GEOGCS["GCS_unknown",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0], UNIT["Degree",0.0174532925199433]],PROJECTION["Goode_Homolosine"],PARAMETER["False_Easting",0.0], PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Option",1.0],UNIT["Meter",1.0]]', "NO_VERTICAL") - The 2022 drivers had 0 instead of NoData, so I used Copy Raster to turn the 0 into NoData: arcpy.management.CopyRaster("TCL_DD_2022_20230407_wgs84.tif", @@ -55,123 +66,169 @@ def mp_prep_other_inputs(tile_id_list): "CURRENT_SLICE", "NO_TRANSPOSE") ''' - - # List of output directories and output file name patterns - output_dir_list = [ - cn.drivers_processed_dir - # ,cn.TCLF_processed_dir - ] - output_pattern_list = [ - cn.pattern_drivers - # ,cn.pattern_TCLF_processed - ] - - - # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - - uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - - - # A date can optionally be provided by the full model script or a run of this script. - # This replaces the date in constants_and_names. - # Only done if output upload is enabled. - if cn.RUN_DATE is not None and cn.NO_UPLOAD is False: - output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) - - - ### Drivers of tree cover loss processing - uu.print_log("STEP 1: Preprocess drivers of tree cover loss") - - uu.s3_file_download(os.path.join(cn.drivers_raw_dir, cn.pattern_drivers_raw), cn.docker_tile_dir, sensit_type) - - # Creates tree cover loss driver tiles. - # The raw driver tile should have NoData for unassigned drivers as opposed to 0 for unassigned drivers. - # For the 2020 driver update, I reclassified the 0 values as NoData in ArcMap. I also unprojected the global drivers - # map to WGS84 because running the homolosine projection that Jimmy provided was giving incorrect processed results. - source_raster = cn.pattern_drivers_raw - out_pattern = cn.pattern_drivers - dt = 'Byte' - if cn.count == 96: - processes = 87 # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = 100 GB peak; 87 = 125 GB peak - else: - processes = int(cn.count/2) - uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), - tile_id_list) - pool.close() - pool.join() - - - # ### Tree cover loss from fires processing - # uu.print_log("STEP 2: Preprocess tree cover loss from fires") - # - # # TCLF is downloaded to its own folder because it doesn't have a standardized file name pattern. - # # This way, the entire contents of the TCLF folder can be worked on without mixing with other files. - # TCLF_s3_dir = os.path.join(cn.docker_tile_dir, 'TCLF') - # if os.path.exists(TCLF_s3_dir): - # os.rmdir(TCLF_s3_dir) - # os.mkdir(TCLF_s3_dir) - # cmd = ['aws', 's3', 'cp', cn.TCLF_raw_dir, TCLF_s3_dir, '--request-payer', 'requester', - # '--include', '*', '--exclude', 'tiles*', '--exclude', '*geojason', '--exclude', '*Store', '--recursive'] - # uu.log_subprocess_output_full(cmd) - # - # # Creates global vrt of TCLF - # uu.print_log("Creating vrt of TCLF...") - # tclf_vrt = 'TCLF.vrt' - # os.system(f'gdalbuildvrt -srcnodata 0 {tclf_vrt} {TCLF_s3_dir}/*.tif') - # uu.print_log(" TCLF vrt created") - # - # # Creates TCLF tiles - # source_raster = tclf_vrt - # out_pattern = cn.pattern_TCLF_processed - # dt = 'Byte' - # if cn.count == 96: - # processes = 34 # 30 = 510 GB initial peak; 34=600 GB peak - # else: - # processes = int(cn.count/2) - # uu.print_log(f'Creating TCLF tiles with {processes} processors...') - # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) - # pool.close() - # pool.join() - - - for output_pattern in [ - cn.pattern_drivers - # ,cn.pattern_TCLF_processed - ]: - + if process == 'tcld' or process == 'all': + + # List of output directories and output file name patterns + output_dir_list = [ + cn.drivers_processed_dir + ] + output_pattern_list = [ + cn.pattern_drivers + ] + + # If the model run isn't the standard one, the output directory and file names are changed + if sensit_type != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) + output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + + # A date can optionally be provided by the full model script or a run of this script. + # This replaces the date in constants_and_names. + # Only done if output upload is enabled. + if cn.RUN_DATE is not None and cn.NO_UPLOAD is False: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) + + ### Drivers of tree cover loss processing + uu.print_log("STEP 1: Preprocess drivers of tree cover loss") + + uu.s3_file_download(os.path.join(cn.drivers_raw_dir, cn.pattern_drivers_raw), cn.docker_tile_dir, sensit_type) + + # Creates tree cover loss driver tiles. + # The raw driver tile should have NoData for unassigned drivers as opposed to 0 for unassigned drivers. + # For the 2020 driver update, I reclassified the 0 values as NoData in ArcMap. I also unprojected the global drivers + # map to WGS84 because running the homolosine projection that Jimmy provided was giving incorrect processed results. + source_raster = cn.pattern_drivers_raw + out_pattern = cn.pattern_drivers + dt = 'Byte' if cn.count == 96: - processes = 50 # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak - uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() - elif cn.count <= 2: # For local tests - processes = 1 - uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors using light function...') - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() + processes = 87 # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = 100 GB peak; 87 = 125 GB peak else: - processes = int(cn.count / 2) - uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() - uu.print_log("\n") - - - # Uploads output tiles to s3 - for i in range(0, len(output_dir_list)): - uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + processes = int(cn.count/2) + uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes)) + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) + pool.close() + pool.join() + + for output_pattern in [ + cn.pattern_drivers + ]: + + if cn.count == 96: + processes = 50 # 50 = XXX GB peak + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + elif cn.count <= 2: # For local tests + processes = 1 + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors using light function...') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + else: + processes = int(cn.count / 2) + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + uu.print_log("\n") + + # Uploads output tiles to s3 + if cn.NO_UPLOAD == False: + for i in range(0, len(output_dir_list)): + uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + + if process == 'tclf' or process == 'all': + # List of output directories and output file name patterns + output_dir_list = [ + cn.TCLF_processed_dir + ] + + output_pattern_list = [ + cn.pattern_TCLF_processed + ] + + # If the model run isn't the standard one, the output directory and file names are changed + if sensit_type != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) + output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + + # A date can optionally be provided by the full model script or a run of this script. + # This replaces the date in constants_and_names. + # Only done if output upload is enabled. + if cn.RUN_DATE is not None and cn.NO_UPLOAD is False: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) + + ### Tree cover loss from fires processing + uu.print_log("STEP 2: Preprocess tree cover loss from fires") + + # TCLF is downloaded to its own folder because it doesn't have a standardized file name pattern. + # This way, the entire contents of the TCLF folder can be worked on without mixing with other files. + TCLF_s3_dir = os.path.join(cn.docker_tile_dir, 'TCLF') + if os.path.exists(TCLF_s3_dir): + os.rmdir(TCLF_s3_dir) + os.mkdir(TCLF_s3_dir) + cmd = ['aws', 's3', 'cp', cn.TCLF_raw_dir, TCLF_s3_dir, '--request-payer', 'requester', + '--include', '*', '--exclude', 'tiles*', '--exclude', '*geojason', '--exclude', '*Store', '--recursive'] + uu.log_subprocess_output_full(cmd) + + # Creates global vrt of TCLF + uu.print_log("Creating vrt of TCLF...") + tclf_vrt = 'TCLF.vrt' + os.system(f'gdalbuildvrt -srcnodata 0 {tclf_vrt} {TCLF_s3_dir}/*.tif') + uu.print_log(" TCLF vrt created") + + # Creates TCLF tiles + source_raster = tclf_vrt + out_pattern = cn.pattern_TCLF_processed + dt = 'Byte' + if cn.count == 96: + processes = 34 # 30 = 510 GB initial peak; 34=600 GB peak + else: + processes = int(cn.count/2) + uu.print_log(f'Creating TCLF tiles with {processes} processors...') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) + pool.close() + pool.join() + + for output_pattern in [ + cn.pattern_TCLF_processed + ]: + + if cn.count == 96: + processes = 50 # 50 = XXX GB peak + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + elif cn.count <= 2: # For local tests + processes = 1 + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors using light function...') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + else: + processes = int(cn.count / 2) + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + uu.print_log("\n") + + # Uploads output tiles to s3 + if cn.NO_UPLOAD == False: + for i in range(0, len(output_dir_list)): + uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) if __name__ == '__main__': @@ -184,10 +241,15 @@ def mp_prep_other_inputs(tile_id_list): help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--process', '-p', required=True, + help='Specifies which annual process to run: 1 = Pre-process drivers of tree cover loss') args = parser.parse_args() + + # Sets global variables to the command line arguments tile_id_list = args.tile_id_list run_date = args.run_date cn.NO_UPLOAD = args.no_upload + process = args.process # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): @@ -199,4 +261,4 @@ def mp_prep_other_inputs(tile_id_list): # Checks whether the tile_id_list argument is valid tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_prep_other_inputs(tile_id_list=tile_id_list) \ No newline at end of file + mp_prep_other_inputs(tile_id_list=tile_id_list, process=str(process)) diff --git a/data_prep/mp_prep_other_inputs_one_off.py b/data_prep/mp_prep_other_inputs_one_off.py index 181cddb3..d8e081f8 100644 --- a/data_prep/mp_prep_other_inputs_one_off.py +++ b/data_prep/mp_prep_other_inputs_one_off.py @@ -1,15 +1,27 @@ ''' This script processes the inputs for the emissions script that haven't been processed by another script. -At this point, that is: climate zone, Indonesia/Malaysia plantations before 2000, tree cover loss drivers (TSC drivers), +At this point, that is: climate zone, Indonesia/Malaysia plantations before 2000, combining IFL2000 (extratropics) and primary forests (tropics) into a single layer, Hansenizing some removal factor standard deviation inputs, Hansenizing the European removal factors, and Hansenizing three US-specific removal factor inputs. -python -m data_prep.mp_prep_other_inputs_one_off -l 00N_000E -nu -python -m data_prep.mp_prep_other_inputs_one_off -l all - -#TODO make each preprocessing step have its own if statement -#TODO make the process command line argument text rather than numeric and ability to launch all of the pre-processing steps individually +python -m data_prep.mp_prep_other_inputs_one_off -l 00N_000E -nu -p young_forest +python -m data_prep.mp_prep_other_inputs_one_off -l all -p sdptv2 + +Options for process argument (-p): +1) young_forest: Creates young natural forest removal rate and standard deviation tiles +2) pre_2000_plantations: Creates pre-2000 oil palm plantation tiles +3) climate: Creates climate zone tiles +4) europe_forest: Creates European natural forest removal rate and standard deviation tiles +5) us_forest: Creates US forest age, group, and region category tiles +6) ifl_primary: Creates intact forest landscape/ primary forest tiles +7) agb_bgb: Creates Hansen tiles of AGB:BGB based on Huang et al. 2021 +8) sdptv2: Creates 6 SDPTv2 tile sets from gfw-data-lake bucket to gfw2-data: AGC removal factors, AGC standard deviations, AGC+BGC removal factors, AGC+BGC standard deviations, planted forest type, and planted forest established year + +#TODO: Make the process command line to individually launch all of the pre-processing steps parallelized +#TODO: Update constants_and_names.py with datalake_pf_estab_year_dir and uncomment 4 lines in sdptv2 section +#TODO: Plantations gets stuck during the unzip process (line 172) +#TODO: AGB_BGB netcdf download process fails (line 527) ''' import argparse import multiprocessing @@ -23,12 +35,12 @@ import constants_and_names as cn import universal_util as uu -from . import prep_other_inputs_one_off +from . import prep_other_inputs_one_off as prep_other_inputs def mp_prep_other_inputs(tile_id_list, process): os.chdir(cn.docker_tile_dir) - sensit_type='std' + sensit_type = 'std' # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': @@ -41,34 +53,23 @@ def mp_prep_other_inputs(tile_id_list, process): uu.print_log(tile_id_list) uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") - if process == 1: + ################################################################################################## + + if process == 'young_forest' or process == 'all': + + uu.print_log('Pre-processing young natural forest removal rate and standard deviation tiles') #List of output directories and output file name patterns output_dir_list = [ - cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir, - cn.ifl_primary_processed_dir, cn.annual_gain_AGC_natrl_forest_young_dir, - cn.stdev_annual_gain_AGC_natrl_forest_young_dir, - cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir, - cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir, - cn.FIA_forest_group_processed_dir, - cn.age_cat_natrl_forest_US_dir, - cn.FIA_regions_processed_dir, + cn.stdev_annual_gain_AGC_natrl_forest_young_dir ] output_pattern_list = [ - cn.pattern_climate_zone, cn.pattern_plant_pre_2000, - cn.pattern_ifl_primary, cn.pattern_annual_gain_AGC_natrl_forest_young, - cn.pattern_stdev_annual_gain_AGC_natrl_forest_young, - cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe, - cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe, - cn.pattern_FIA_forest_group_processed, - cn.pattern_age_cat_natrl_forest_US, - cn.pattern_FIA_regions_processed, + cn.pattern_stdev_annual_gain_AGC_natrl_forest_young ] - # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': @@ -76,37 +77,18 @@ def mp_prep_other_inputs(tile_id_list, process): output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) - - # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest - uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type) - uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type) - uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type) - uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type) - uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type) - uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_base_dir, sensit_type) - uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, sensit_type) # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster. - # Thus, using wget instead. - cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)] - uu.log_subprocess_output_full(cmd) - uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type) - cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive'] - uu.log_subprocess_output_full(cmd) - - uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_base_dir, sensit_type, tile_id_list) - - uu.print_log("Unzipping pre-2000 plantations...") - cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)] + cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_tile_dir)] uu.log_subprocess_output_full(cmd) + uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_tile_dir, sensit_type) ### Creates young natural forest removal rate tiles source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw @@ -122,7 +104,6 @@ def mp_prep_other_inputs(tile_id_list, process): pool.close() pool.join() - ### Creates young natural forest removal rate standard deviation tiles source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young @@ -137,6 +118,60 @@ def mp_prep_other_inputs(tile_id_list, process): pool.close() pool.join() + for output_pattern in [ + cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young + ]: + # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData) + # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not + # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't + # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version. + processes = int(cn.count / 2) + uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + + # Uploads output tiles to s3 + if cn.NO_UPLOAD == False: + for i in range(0, len(output_dir_list)): + uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + + ################################################################################################## + + if process == 'pre_2000_plantations' or process == 'all': + + uu.print_log('Pre-processing pre-2000 oil palm plantation tiles') + + #List of output directories and output file name patterns + output_dir_list = [ + cn.plant_pre_2000_processed_dir + ] + + output_pattern_list = [ + cn.pattern_plant_pre_2000 + ] + + # If the model run isn't the standard one, the output directory and file names are changed + if sensit_type != 'std': + + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) + output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + + # A date can optionally be provided by the full model script or a run of this script. + # This replaces the date in constants_and_names. + # Only done if output upload is enabled. + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) + + # Files to process: IDN/MYS plantations before 2000 + uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_tile_dir, sensit_type) + # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster. + + uu.print_log("Unzipping pre-2000 plantations...") + cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)] + uu.log_subprocess_output_full(cmd) ### Creates pre-2000 oil palm plantation tiles if cn.count == 96: @@ -149,6 +184,49 @@ def mp_prep_other_inputs(tile_id_list, process): pool.close() pool.join() + for output_pattern in output_pattern_list: + processes = int(cn.count / 2) + uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + + # Uploads output tiles to s3 + if cn.NO_UPLOAD == False: + for i in range(0, len(output_dir_list)): + uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + + ################################################################################################## + + if process == 'climate' or process == 'all': + + uu.print_log('Pre-processing climate zone tiles') + + #List of output directories and output file name patterns + output_dir_list = [ + cn.climate_zone_processed_dir + ] + + output_pattern_list = [ + cn.pattern_climate_zone + ] + + # If the model run isn't the standard one, the output directory and file names are changed + if sensit_type != 'std': + + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) + output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + + # A date can optionally be provided by the full model script or a run of this script. + # This replaces the date in constants_and_names. + # Only done if output upload is enabled. + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) + + # Files to process: climate zone + uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_tile_dir, sensit_type) ### Creates climate zone tiles if cn.count == 96: @@ -161,6 +239,51 @@ def mp_prep_other_inputs(tile_id_list, process): pool.close() pool.join() + for output_pattern in output_pattern_list: + processes = int(cn.count / 2) + uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + + # Uploads output tiles to s3 + if cn.NO_UPLOAD == False: + for i in range(0, len(output_dir_list)): + uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + + ################################################################################################## + + if process == 'europe_forest' or process == 'all': + + uu.print_log('Pre-processing European natural forest removal rate and standard deviation tiles') + + # List of output directories and output file name patterns + output_dir_list = [ + cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir, + cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir # raw files deleted + ] + + output_pattern_list = [ + cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe, + #cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe + ] + + # If the model run isn't the standard one, the output directory and file names are changed + if sensit_type != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) + output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + + # A date can optionally be provided by the full model script or a run of this script. + # This replaces the date in constants_and_names. + # Only done if output upload is enabled. + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) + + # Files to process: Europe natural forest + uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_tile_dir, sensit_type) + uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_tile_dir, sensit_type) ### Creates European natural forest removal rate tiles source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw @@ -169,14 +292,14 @@ def mp_prep_other_inputs(tile_id_list, process): if cn.count == 96: processes = 60 # 32 processors = 60 GB peak; 60 = XXX GB peak else: - processes = int(cn.count/2) + processes = int(cn.count / 2) uu.print_log("Creating European natural forest removals rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) pool.close() pool.join() - ### Creates European natural forest standard deviation of removal rate tiles source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe @@ -184,48 +307,64 @@ def mp_prep_other_inputs(tile_id_list, process): if cn.count == 96: processes = 32 # 32 processors = 60 GB peak; 60 = XXX GB peak else: - processes = int(cn.count/2) - uu.print_log("Creating standard deviation for European natural forest removals rate tiles with {} processors...".format(processes)) + processes = int(cn.count / 2) + uu.print_log( + "Creating standard deviation for European natural forest removals rate tiles with {} processors...".format( + processes)) pool = multiprocessing.Pool(processes) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) pool.close() pool.join() + for output_pattern in output_pattern_list: + processes = int(cn.count / 2) + uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() - ### Creates humid tropical primary forest tiles - # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters - uu.print_log("Creating vrt of humid tropial primary forest...") - primary_vrt = 'primary_2001.vrt' - os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt)) - uu.print_log(" Humid tropical primary forest vrt created") + # Uploads output tiles to s3 + if cn.NO_UPLOAD == False: + for i in range(0, len(output_dir_list)): + uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) - # Creates primary forest tiles - source_raster = primary_vrt - out_pattern = 'primary_2001' - dt = 'Byte' - if cn.count == 96: - processes = 45 # 45 processors = 650 GB peak - else: - processes = int(cn.count/2) - uu.print_log("Creating primary forest tiles with {} processors...".format(processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) - pool.close() - pool.join() + ################################################################################################## + if process == 'us_forest' or process == 'all': - ### Creates a combined IFL/primary forest raster - # Uses very little memory since it's just file renaming - if cn.count == 96: - processes = 60 # 60 processors = 10 GB peak - else: - processes = int(cn.count/2) - uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes)) - pool = multiprocessing.Pool(processes) - pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list) - pool.close() - pool.join() + uu.print_log('Pre-processing US forest age, group, and region category tiles') + #List of output directories and output file name patterns + output_dir_list = [ + cn.FIA_forest_group_processed_dir, + cn.age_cat_natrl_forest_US_dir, + cn.FIA_regions_processed_dir, + ] + + output_pattern_list = [ + cn.pattern_FIA_forest_group_processed, + cn.pattern_age_cat_natrl_forest_US, + cn.pattern_FIA_regions_processed, + ] + + # If the model run isn't the standard one, the output directory and file names are changed + if sensit_type != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) + output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + + # A date can optionally be provided by the full model script or a run of this script. + # This replaces the date in constants_and_names. + # Only done if output upload is enabled. + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) + + # Files to process: US forest region, age, and group categories + uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_tile_dir, sensit_type) + uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_tile_dir, sensit_type) + uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_tile_dir, sensit_type) ### Creates forest age category tiles for US forests source_raster = cn.name_age_cat_natrl_forest_US_raw @@ -269,28 +408,106 @@ def mp_prep_other_inputs(tile_id_list, process): pool.close() pool.join() - - for output_pattern in [ - cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young - ]: - # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData) - # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not - # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't - # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version. + for output_pattern in output_pattern_list: processes = int(cn.count / 2) - uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) + uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format( + output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() + # Uploads output tiles to s3 + if cn.NO_UPLOAD == False: + for i in range(0, len(output_dir_list)): + uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + + ################################################################################################## + + if process == 'ifl_primary' or process == 'all': + + uu.print_log('Pre-processing intact forest landscape/ primary forest tiles') + + #List of output directories and output file name patterns + output_dir_list = [ + cn.ifl_primary_processed_dir + ] + + output_pattern_list = [ + cn.pattern_ifl_primary + ] + + # If the model run isn't the standard one, the output directory and file names are changed + if sensit_type != 'std': + + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) + output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + + # A date can optionally be provided by the full model script or a run of this script. + # This replaces the date in constants_and_names. + # Only done if output upload is enabled. + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) + + # Files to process: combine IFL and primary forest + uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_tile_dir, sensit_type, tile_id_list) + + # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster. + # Thus, using wget instead. + cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_tile_dir, '--recursive'] + uu.log_subprocess_output_full(cmd) + + ### Creates humid tropical primary forest tiles + # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters + uu.print_log("Creating vrt of humid tropial primary forest...") + primary_vrt = 'primary_2001.vrt' + os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt)) + uu.print_log(" Humid tropical primary forest vrt created") + + # Creates primary forest tiles + source_raster = primary_vrt + out_pattern = 'primary_2001' + dt = 'Byte' + if cn.count == 96: + processes = 45 # 45 processors = 650 GB peak + else: + processes = int(cn.count/2) + uu.print_log("Creating primary forest tiles with {} processors...".format(processes)) + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) + pool.close() + pool.join() + + ### Creates a combined IFL/primary forest raster + # Uses very little memory since it's just file renaming + if cn.count == 96: + processes = 60 # 60 processors = 10 GB peak + else: + processes = int(cn.count/2) + uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes)) + pool = multiprocessing.Pool(processes) + pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list) + pool.close() + pool.join() + + for output_pattern in output_pattern_list: + processes = int(cn.count / 2) + uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format( + output_pattern, processes)) + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() # Uploads output tiles to s3 - for i in range(0, len(output_dir_list)): - uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + if cn.NO_UPLOAD == False: + for i in range(0, len(output_dir_list)): + uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + ################################################################################################## - if process == 2: + if process == 'agb_bgb' or process == 'all': ### Creates Hansen tiles of AGB:BGB based on Huang et al. 2021: https://essd.copernicus.org/articles/13/4263/2021/ output_dir_list = [cn.BGB_AGB_ratio_dir] output_pattern_list = [cn.pattern_BGB_AGB_ratio] @@ -421,7 +638,6 @@ def mp_prep_other_inputs(tile_id_list, process): pool.close() pool.join() - for output_pattern in [ cn.pattern_BGB_AGB_ratio ]: @@ -454,10 +670,13 @@ def mp_prep_other_inputs(tile_id_list, process): for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + ################################################################################################## - if process == 3: - # Preprocess 6 SDPTv2 tile sets from gfw-data-lake bucket to gfw2-data: - # AGC removal factors, AGC+BGC removal factors, AGC standard deviations, AGC+BGC standard deviations, planted forest type, and planted forest established year + if process == 'sdptv2' or process == 'all': + # Preprocess 6 SDPTv2 tile sets from gfw-data-lake bucket to gfw2-data: + # AGC removal factors, AGC standard deviations + # AGC+BGC removal factors, AGC+BGC standard deviations + # planted forest type, and planted forest established year # Files to download for the SDPTv2 update download_dict_sdptv2 = { @@ -466,7 +685,7 @@ def mp_prep_other_inputs(tile_id_list, process): cn.datalake_pf_agc_sd_dir: [cn.pattern_data_lake], cn.datalake_pf_agcbgc_sd_dir: [cn.pattern_data_lake], cn.datalake_pf_simplename_dir: [cn.pattern_data_lake], - cn.datalake_pf_estab_year_dir: [cn.pattern_data_lake], + #cn.datalake_pf_estab_year_dir: [cn.pattern_data_lake], } # List of output directories for SDPTv2 update upload @@ -476,7 +695,7 @@ def mp_prep_other_inputs(tile_id_list, process): cn.planted_forest_aboveground_standard_deviation_dir, cn.planted_forest_aboveground_belowground_standard_deviation_dir, cn.planted_forest_type_dir, - cn.planted_forest_estab_year_dir, + #cn.planted_forest_estab_year_dir, ] # List of output file name patterns for SDPTv2 to upload to EC2 @@ -486,7 +705,7 @@ def mp_prep_other_inputs(tile_id_list, process): cn.pattern_pf_sd_agc_ec2, cn.pattern_pf_sd_agcbgc_ec2, cn.pattern_planted_forest_type, - cn.pattern_planted_forest_estab_year, + #cn.pattern_planted_forest_estab_year, ] # List of data types for each dataset to use in the gdal_warp_to_hansen @@ -499,7 +718,6 @@ def mp_prep_other_inputs(tile_id_list, process): 'Byte', ] - # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') @@ -520,10 +738,9 @@ def mp_prep_other_inputs(tile_id_list, process): output_pattern_list_sdptv2[2]: [output_dir_list_sdptv2[2], output_dt_list_sdptv2[2]], output_pattern_list_sdptv2[3]: [output_dir_list_sdptv2[3], output_dt_list_sdptv2[3]], output_pattern_list_sdptv2[4]: [output_dir_list_sdptv2[4], output_dt_list_sdptv2[4]], - output_pattern_list_sdptv2[5]: [output_dir_list_sdptv2[5], output_dt_list_sdptv2[5]], + #output_pattern_list_sdptv2[5]: [output_dir_list_sdptv2[5], output_dt_list_sdptv2[5]], } - # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list uu.print_log("STEP 1: Downloading datasets from gfw-data-lake") for key, values in download_dict_sdptv2.items(): @@ -531,7 +748,6 @@ def mp_prep_other_inputs(tile_id_list, process): pattern = values[0] uu.s3_flexible_download(directory, pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) - uu.print_log("STEP 2: Rewindowing tiles to 40,000 x 1 pixel windows") for key in upload_dict_sdptv2.keys(): @@ -552,7 +768,6 @@ def mp_prep_other_inputs(tile_id_list, process): pool.close() pool.join() - uu.print_log("STEP 3: Warping SDPT datasets to Hansen dataset") for key, values in upload_dict_sdptv2.items(): @@ -573,8 +788,16 @@ def mp_prep_other_inputs(tile_id_list, process): pool.close() pool.join() + uu.print_log("STEP 4: Checking that tile contains data") + for output_pattern in output_pattern_list_sdptv2: + processes = int(cn.count / 2) + uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() - uu.print_log("STEP 4: Uploading datasets to their output directories") + uu.print_log("STEP 5: Uploading datasets to their output directories") if cn.NO_UPLOAD == False: for key, values in upload_dict_sdptv2.items(): @@ -617,4 +840,4 @@ def mp_prep_other_inputs(tile_id_list, process): # Checks whether the tile_id_list argument is valid tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_prep_other_inputs(tile_id_list=tile_id_list, process=int(process)) + mp_prep_other_inputs(tile_id_list=tile_id_list, process=str(process)) diff --git a/data_prep/prep_other_inputs_one_off.py b/data_prep/prep_other_inputs_one_off.py index b4e5da87..6bf4e868 100644 --- a/data_prep/prep_other_inputs_one_off.py +++ b/data_prep/prep_other_inputs_one_off.py @@ -9,6 +9,7 @@ import numpy as np from scipy import stats import os +from subprocess import Popen, PIPE, STDOUT import constants_and_names as cn import universal_util as uu diff --git a/emissions/cpp_util/constants.h b/emissions/cpp_util/constants.h index 57e11f9e..8b966bf6 100644 --- a/emissions/cpp_util/constants.h +++ b/emissions/cpp_util/constants.h @@ -5,7 +5,7 @@ namespace constants { // Emissions constants // per https://www.learncpp.com/cpp-tutorial/global-constants-and-inline-variables/ - constexpr int model_years {22}; // How many loss years are in the model. Must also be updated in equations.cpp! + constexpr int model_years {23}; // How many loss years are in the model. Must also be updated in equations.cpp! constexpr int CH4_equiv {27}; // The CO2 equivalency (global warming potential) of CH4, AR6 WG1 Table 7.15 @@ -38,7 +38,7 @@ namespace constants constexpr char legal_Amazon_loss[] = "_legal_Amazon_annual_loss_2001_2019.tif"; - constexpr char lossyear[] = "GFW2022_"; + constexpr char lossyear[] = "GFW2023_"; constexpr char burnyear[] = "_tree_cover_loss_fire_processed.tif"; constexpr char fao_ecozones[] = "_fao_ecozones_bor_tem_tro_processed.tif"; constexpr char climate_zones[] = "_climate_zone_processed.tif"; diff --git a/emissions/cpp_util/equations.cpp b/emissions/cpp_util/equations.cpp index 36569f1f..559cdd80 100644 --- a/emissions/cpp_util/equations.cpp +++ b/emissions/cpp_util/equations.cpp @@ -17,7 +17,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli { int model_years; // How many loss years are in the model - model_years = 22; + model_years = 23; int tropical; // The ecozone code for the tropics tropical = 1; @@ -59,7 +59,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli CH4 = 4.7; N2O = 0.26; peatburn_CO2_only = 446; - peatburn_non_CO2 = 85; + peatburn_non_CO2 = 82; peat_drain_annual_CO2_only = 2; peat_drain_annual_non_CO2 = 1; peat_drain_total_CO2_only = (model_years - lossyr) * peat_drain_annual_CO2_only; @@ -72,7 +72,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli CH4 = 4.7; N2O = 0.26; peatburn_CO2_only = 446; - peatburn_non_CO2 = 85; + peatburn_non_CO2 = 82; peat_drain_annual_CO2_only = 11; peat_drain_annual_non_CO2 = 3; peat_drain_total_CO2_only = (model_years - lossyr) * peat_drain_annual_CO2_only; @@ -84,7 +84,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli CH4 = 6.8; N2O = 0.2; peatburn_CO2_only = 264; - peatburn_non_CO2 = 91; + peatburn_non_CO2 = 88; if (plant_data == 1) // Commodities/shifting ag/urbanization, tropics, oil palm { @@ -124,7 +124,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli CH4 = 4.7; N2O = 0.26; peatburn_CO2_only = 446; - peatburn_non_CO2 = 85; + peatburn_non_CO2 = 82; peat_drain_annual_CO2_only = 2; peat_drain_annual_non_CO2 = 1; peat_drain_total_CO2_only = (model_years - lossyr) * peat_drain_annual_CO2_only; @@ -137,7 +137,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli CH4 = 4.7; N2O = 0.26; peatburn_CO2_only = 446; - peatburn_non_CO2 = 85; + peatburn_non_CO2 = 82; peat_drain_annual_CO2_only = 11; peat_drain_annual_non_CO2 = 3; peat_drain_total_CO2_only = (model_years - lossyr) * peat_drain_annual_CO2_only; @@ -149,7 +149,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli CH4 = 6.8; N2O = 0.2; peatburn_CO2_only = 264; - peatburn_non_CO2 = 91; + peatburn_non_CO2 = 88; if (plant_data == 1) // Forestry, tropics, oil palm { @@ -189,7 +189,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli CH4 = 4.7; N2O = 0.26; peatburn_CO2_only = 446; - peatburn_non_CO2 = 85; + peatburn_non_CO2 = 82; peat_drain_annual_CO2_only = 2; peat_drain_annual_non_CO2 = 1; peat_drain_total_CO2_only = (model_years - lossyr) * peat_drain_annual_CO2_only; @@ -202,7 +202,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli CH4 = 4.7; N2O = 0.26; peatburn_CO2_only = 446; - peatburn_non_CO2 = 85; + peatburn_non_CO2 = 82; peat_drain_annual_CO2_only = 11; peat_drain_annual_non_CO2 = 3; peat_drain_total_CO2_only = (model_years - lossyr) * peat_drain_annual_CO2_only; @@ -214,7 +214,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli CH4 = 6.8; N2O = 0.2; peatburn_CO2_only = 601; - peatburn_non_CO2 = 208; + peatburn_non_CO2 = 200; if (plant_data == 1) // Wildfire, tropics, oil palm { @@ -254,7 +254,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli CH4 = 4.7; N2O = 0.26; peatburn_CO2_only = 446; - peatburn_non_CO2 = 85; + peatburn_non_CO2 = 82; peat_drain_annual_CO2_only = 2; peat_drain_annual_non_CO2 = 1; peat_drain_total_CO2_only = (model_years - lossyr) * peat_drain_annual_CO2_only; @@ -267,7 +267,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli CH4 = 4.7; N2O = 0.26; peatburn_CO2_only = 446; - peatburn_non_CO2 = 85; + peatburn_non_CO2 = 82; peat_drain_annual_CO2_only = 11; peat_drain_annual_non_CO2 = 3; peat_drain_total_CO2_only = (model_years - lossyr) * peat_drain_annual_CO2_only; @@ -279,7 +279,7 @@ void def_variables(float *q, int ecozone, int forestmodel_data, int ifl, int cli CH4 = 6.8; N2O = 0.2; peatburn_CO2_only = 264; - peatburn_non_CO2 = 91; + peatburn_non_CO2 = 88; if (plant_data == 1) // Forestry, tropics, oil palm { diff --git a/readme.md b/readme.md index 318e635b..b0fbe201 100644 --- a/readme.md +++ b/readme.md @@ -2,7 +2,7 @@ ### Purpose and scope This framework maps gross greenhouse gas emissions from forests, -gross carbon removals (sequestration) by forests, and the difference between them (net flux), all between 2001 and 2022. +gross carbon removals (sequestration) by forests, and the difference between them (net flux), all between 2001 and 2023. Gross emissions includes CO2, NH4, and N20 and all carbon pools (aboveground biomass, belowground biomass, dead wood, litter, and soil), and gross removals includes removals into aboveground and belowground biomass carbon. Although the framework is run for all tree canopy densities in 2000 (per Hansen et al. 2013), it is most relevant to @@ -12,7 +12,7 @@ The framework essentially spatially applies IPCC national greenhouse gas invento It covers only forests converted to non-forests, non-forests converted to forests and forests remaining forests (no other land use transitions). The framework is described and published in [Harris et al. (2021) Nature Climate Change "Global maps of twenty-first century forest carbon fluxes"](https://www.nature.com/articles/s41558-020-00976-6). -Although the original manuscript covered 2001-2019, the same methods were used to update the framework to include 2022, +Although the original manuscript covered 2001-2019, the same methods were used to update the framework to include 2023, with a few changes to some input layers and constants. You can read about the changes since publication [here](https://www.globalforestwatch.org/blog/data-and-research/whats-new-carbon-flux-monitoring). @@ -39,7 +39,7 @@ A complete list of inputs, including changes made to the framework since the ori [here](http://gfw2-data.s3.amazonaws.com/climate/carbon_model/Table_S3_data_sources__updated_20230406.pdf). ### Outputs -There are three key outputs produced: gross GHG emissions, gross removals, and net flux, all summed per pixel for 2001-2022. +There are three key outputs produced: gross GHG emissions, gross removals, and net flux, all summed per pixel for 2001-2023. These are produced at two resolutions: 0.00025x0.00025 degrees (approximately 30x30 m at the equator) in 10x10 degree rasters (to make outputs a manageable size), and 0.04x0.04 degrees (approximately 4x4km at the equator) as global rasters for static maps. @@ -60,7 +60,7 @@ The 30-m outputs are used for zonal statistics (i.e. emissions, removals, or net and mapping on the Global Forest Watch web platform or at small scales (where 30-m pixels can be distinguished). Individual emissions pixels can be assigned specific years based on Hansen loss during further analyses but removals and net flux are cumulative over the entire framework run and cannot be assigned specific years. -This 30-m output is in megagrams (Mg) CO2e/ha 2001-2022 (i.e. densities) and includes all tree cover densities ("full extent"): +This 30-m output is in megagrams (Mg) CO2e/ha 2001-2023 (i.e. densities) and includes all tree cover densities ("full extent"): `((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0)`. However, the framework is designed to be used specifically for forests, so the framework creates three derivative 30-m outputs for each key output (gross emissions, gross removals, net flux) as well (only for the standard version, not for sensitivity analyses). @@ -336,9 +336,12 @@ Change the tree cover loss tile pattern in `constants_and_names.py`. 6) Obtain and pre-process the updated drivers of tree cover loss framework and tree cover loss from fires using `mp_prep_other_inputs_annual.py`. Note that the drivers map probably needs to be reprojected to WGS84 - and resampled (0.005x0.005 deg) in ArcMap or similar - before processing into 0.00025x0.00025 deg 10x10 tiles using this script. - `mp_prep_other_inputs_annual.py` has some additional notes about that. + and resampled (0.005x0.005 deg) in ArcMap or similar before processing into 0.00025x0.00025 deg 10x10 tiles using this script. + `mp_prep_other_inputs_annual.py` has some additional notes about that. You can choose which set of tiles to pre-process + by providing the following options for the process argument (-p): + - tcld: Pre-processes drivers of tree cover loss tiles + - tclf: Pre-processes tree cover loss due to fires tiles + - all: Pre-processes both drivers of tree cover loss and tree cover loss due to fires tiles 7) Make sure that changes in forest age category produced by `mp_forest_age_category_IPCC.py` and the number of gain years produced by `mp_gain_year_count_all_forest_types.py` still make sense. diff --git a/removals/gain_year_count_all_forest_types.py b/removals/gain_year_count_all_forest_types.py index 5c569fa2..48f77a4a 100644 --- a/removals/gain_year_count_all_forest_types.py +++ b/removals/gain_year_count_all_forest_types.py @@ -82,7 +82,7 @@ def create_gain_year_count_gain_only_standard(tile_id): if not os.path.exists(gain): uu.print_log(f' Gain tile not found for {tile_id}. Skipping gain-only pixel gain year count.') - # Need to check if loss tile exists because the calc string is depends on the presence/absence of the loss tile + # Need to check if loss tile exists because the calc string depends on the presence/absence of the loss tile elif os.path.exists(loss): uu.print_log(f' Loss tile found for {tile_id}. Using it in gain-only pixel gain year count.') gain_calc = f'--calc=(A==0)*(B==1)*(C>0)*({cn.gain_years}/2)' @@ -256,7 +256,7 @@ def create_gain_year_count_loss_and_gain_standard(tile_id): uu.check_memory() - if not os.path.exists(loss) and not os.path.exists(gain): + if not os.path.exists(loss) or not os.path.exists(gain): uu.print_log(f' Loss and gain tiles not found for {tile_id}. Skipping loss-and-gain pixel gain year count.') else: uu.print_log(f' Loss and gain tiles found for {tile_id}. Using them in loss-and-gain pixel gain year count.') diff --git a/removals/mp_gain_year_count_all_forest_types.py b/removals/mp_gain_year_count_all_forest_types.py index 145cd3d2..8cdbfda4 100644 --- a/removals/mp_gain_year_count_all_forest_types.py +++ b/removals/mp_gain_year_count_all_forest_types.py @@ -142,7 +142,7 @@ def mp_gain_year_count_all_forest_types(tile_id_list): # Creates gain year count tiles using only pixels that had neither loss nor gain pixels if cn.count == 96: - processes = 90 # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak + processes = 50 # 50 = XXX GB peak else: processes = int(cn.count/2) uu.print_log(f'Gain year count no-change pixels max processors={processes}') @@ -156,9 +156,9 @@ def mp_gain_year_count_all_forest_types(tile_id_list): pool.close() pool.join() - # Creates gain year count tiles using only pixels that had only gain + # Creates gain year count tiles using only pixels that had both loss and gain if cn.count == 96: - processes = 90 # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak + processes = 50 # 50 = XXX GB peak else: processes = int(cn.count/2) uu.print_log(f'Gain year count loss & gain pixels max processors={processes}') @@ -174,7 +174,7 @@ def mp_gain_year_count_all_forest_types(tile_id_list): # Combines the four above gain year count tiles for each Hansen tile into a single output tile if cn.count == 96: - processes = 84 # 28 processors = 220 GB peak; 62 = 470 GB peak; 78 = 600 GB peak; 80 = 620 GB peak; 84 = 630 GB peak + processes = 50 # 50 = XXX GB peak elif cn.count < 4: processes = 1 else: diff --git a/universal_util.py b/universal_util.py index eb2f4dd2..659a3b42 100644 --- a/universal_util.py +++ b/universal_util.py @@ -591,7 +591,7 @@ def s3_flexible_download(source_dir, pattern, dest, sensit_type, tile_id_list): s3_folder_download(source_dir, dest, sensit_type, pattern) -# Downloads all tiles in an s3 folder, adpating to sensitivity analysis type +# Downloads all tiles in an s3 folder, adapating to sensitivity analysis type # Source=source file on s3 # dest=where to download onto spot machine # sensit_type = whether the model is standard or a sensitivity analysis model run