Skip to content

Commit

Permalink
Add functionality to allow user to input crop calendar file
Browse files Browse the repository at this point in the history
For #11 #10

- flexibility to use user provided crop calendar
- flexibility to select crops
- updated to the latest FAO data
- updated with FAO crop to MIRCA2000 crop mapping
- updated default MIRCA2000 crops to GCAM commodity mapping
- make cleaning sage data its own function
  • Loading branch information
mengqi-z committed Feb 14, 2025
1 parent f83561f commit 01a0525
Show file tree
Hide file tree
Showing 18 changed files with 528 additions and 161 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(agprodchange_interp)
export(agprodchange_ref)
export(clean_sage)
export(clean_yield)
export(climate_impact)
export(colname_replace)
Expand Down Expand Up @@ -35,6 +36,7 @@ export(plot_yield_impact)
export(prep_regression)
export(regression_fixed_effects)
export(smooth_impacts)
export(verify_crop)
export(weather_agg)
export(weather_clean)
export(weighted_climate)
Expand Down
303 changes: 222 additions & 81 deletions R/crop_calendars.R

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions R/data_aggregation.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ data_aggregation <- function(climate_hist_dir = NULL,
input_file = "crop_calendar.csv"
)

crop_select <- names(crop_cal)[!names(crop_cal) %in% c('iso', 'plant', 'harvest')]

# merge fao yield and fao irrigation equip data
yield <- gaia::merge_data(fao_yield, fao_irr_equip, "iso", "year")

Expand Down Expand Up @@ -60,7 +62,7 @@ data_aggregation <- function(climate_hist_dir = NULL,

crop_historic <- data.table::data.table()

for (crop_i in mapping_mirca_sage$crop_mirca) {
for (crop_i in crop_select) {
crop_id <- crop_mirca$crop_id[grepl(crop_i, crop_mirca$crop_name)]

# filter out the file for the crop
Expand Down Expand Up @@ -121,7 +123,7 @@ data_aggregation <- function(climate_hist_dir = NULL,

crop_projection <- data.table::data.table()

for (crop_i in mapping_mirca_sage$crop_mirca) {
for (crop_i in crop_select) {
crop_id <- crop_mirca$crop_id[grepl(crop_i, crop_mirca$crop_name)]

# filter out the file for the crop
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
14 changes: 13 additions & 1 deletion R/yield_impact.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#' @param gcam_version Default = 'gcam7'. String for the GCAM version. Only support gcam6 and gcam7
#' @param gcam_timestep Default = 5. Integer for the time step of GCAM (Select either 1 or 5 years for GCAM use)
#' @param gcamdata_dir Default = NULL. String for directory to the gcamdata folder within the specific GCAM version. The gcamdata need to be run with drake to have the CSV outputs beforehand.
#' @param crop_calendar_file Default = NULL. String for the path of the crop calendar file. If crop_cal is provided, crop_select will be set to crops in crop calendar. User provided crop_calendar_file can include any crops MIRCA2000 crops: "wheat", "maize", "rice", "barley", "rye", "millet", "sorghum", "soybean", "sunflower", "root_tuber", "cassava", "sugarcane", "sugarbeet", "oil_palm", "rape_seed", "groundnuts", "pulses", "citrus", "date_palm", "grapes", "cotton", "cocoa", "coffee", "others_perennial", "fodder_grasses", "other_annual"
#' @param crop_select Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye"
#' @param use_default_coeff Default = FALSE. Binary for using default regression coefficients. Set to TRUE will use the default coefficients instead of calculating coefficients from the historical climate data.
#' @param base_year Default = 2015. Integer for the base year (for GCAM)
#' @param start_year Default = NULL. Integer for the start year of the projected data
Expand Down Expand Up @@ -47,6 +49,8 @@ yield_impact <- function(pr_hist_ncdf = NULL,
gcam_version = "gcam7",
gcam_timestep = 5,
gcamdata_dir = NULL,
crop_calendar_file = NULL,
crop_select = NULL,
use_default_coeff = FALSE,
base_year = 2015,
start_year = NULL,
Expand Down Expand Up @@ -86,7 +90,13 @@ yield_impact <- function(pr_hist_ncdf = NULL,


# Step 2: Generate planting months for each country
gaia::crop_calendars(output_dir = output_dir)
crop_cal <- gaia::crop_calendars(
crop_calendar_file = crop_calendar_file,
crop_select = crop_select,
output_dir = output_dir)

# update finalized selected crops
crop_select <- names(crop_cal)[!names(crop_cal) %in% c('iso', 'plant', 'harvest')]

# Step 3: Process multiple models to analyze historical weather variables and crop yields
if (all(
Expand Down Expand Up @@ -117,6 +127,7 @@ yield_impact <- function(pr_hist_ncdf = NULL,
# Step 4: Yield regressions and create figures
if (!use_default_coeff) {
gaia::yield_regression(
crop_select = crop_select,
diagnostics = diagnostics,
output_dir = output_dir
)
Expand All @@ -133,6 +144,7 @@ yield_impact <- function(pr_hist_ncdf = NULL,
end_year = end_year,
gcam_timestep = gcam_timestep,
smooth_window = smooth_window,
crop_select = crop_select,
diagnostics = diagnostics,
output_dir = output_dir
)
Expand Down
120 changes: 88 additions & 32 deletions R/yield_impacts_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,43 +7,89 @@
#' Function to clean FAO yield data
#'
#' @param fao_yield Default = NULL. Data frame for the fao yield table
#' @param fao_to_mirca Default = NULL. Data frame for the fao to mirca crop mapping
#' @returns A data frame of formatted FAO yield data
#' @keywords internal
#' @export

clean_yield <- function(fao_yield = NULL) {
clean_yield <- function(fao_yield = NULL,
fao_to_mirca = NULL) {
AreaName <- crop <- NULL

d <- subset(fao_yield, select = c("AreaName", "ElementName", "ItemName", "Year", "Value"))
d <- subset(d, AreaName != "")
d <- subset(d, AreaName != "China, mainland")
d <- gaia::colname_replace(d, "AreaName", "country_name")
d <- gaia::colname_replace(d, "ElementName", "var")
d <- gaia::colname_replace(d, "ItemName", "crop")
d <- gaia::colname_replace(d, "Year", "year")
d <- gaia::colname_replace(d, "Value", "value")
d$crop <- tolower(d$crop)
d$var <- gsub("Area harvested", "area_harvest", d$var)
d$var <- gsub("Yield", "yield", d$var)
d <- subset(d, crop != "cottonseed") # Cotton seed has no data, FAO yields are for seed cotton
d$crop <- gsub("rice, paddy", "rice", d$crop)
d$crop <- gsub("soybeans", "soybean", d$crop)
d$crop <- gsub("sugar cane", "sugarcane", d$crop)
d$crop <- gsub("sugar beet", "sugarbeet", d$crop)
d$crop <- gsub("seed cotton", "cotton", d$crop)
d$crop <- gsub("sunflower seed", "sunflower", d$crop)
d$crop <- gsub(" ", "_", d$crop)
d <- data.table::dcast(d, country_name + crop + year ~ var, value.var = "value")
d <- merge(d, mapping_gcam_iso, by = "country_name", all.x = TRUE)
d <- gaia::iso_replace(d)
# For these countries, root_tuber == cassava; others root_tuber == potato
d$crop <- ifelse(d$crop == "potatoes", "root_tuber", d$crop)
# d$crop <- ifelse( ( ( d$iso == "ben" | d$iso == "cmr" | d$iso == "caf" | d$iso == "cog" | d$iso == "cod" | d$iso == "civ" | d$iso == "gab" | d$iso == "gha" |
# d$iso == "gin" | d$iso == "lbr" | d$iso == "nga" | d$iso == "sle" | d$iso == "som" | d$iso == "tgo" ) & d$crop == "cassava" ), "root_tuber", d$crop )
# d$crop <- ifelse( ( ( d$iso == "ben" | d$iso == "cmr" | d$iso == "caf" | d$iso == "cog" | d$iso == "cod" | d$iso == "civ" | d$iso == "gab" | d$iso == "gha" |
# d$iso == "gin" | d$iso == "lbr" | d$iso == "nga" | d$iso == "sle" | d$iso == "som" | d$iso == "tgo" ) & d$crop == "potatoes" ), "potatoes", d$crop )
d <- subset(d, select = c("iso", "crop", "year", "area_harvest", "yield"))
return(d)
# clean up FAOSTAT yield and harvest area data and left join the iso code
df <- fao_yield %>%
dplyr::filter(`Item Code` %in% unique(fao_to_mirca$fao_crop_id),
Element %in% c('Area harvested', 'Yield'),
# Exclude entire China because FAOSTAT distinguishes China mainland, Taiwan, and Hongkong
# GCAM also has Taiwan as individual region
!Area %in% c('China')) %>%
dplyr::select(country_code = `Area Code`, country_name = Area, var = Element,
fao_crop_name = Item, fao_crop_id = `Item Code`,
paste0('Y', 1961:2020)) %>%
dplyr::left_join(fao_iso %>% dplyr::select(country_code, iso),
by = 'country_code') %>%
dplyr::filter(!is.na(iso))

# clean up the iso code which megers some FAOSTAT specific 'f' iso codes
df <- gaia::iso_replace(df)

# restructure data
df <- df %>%
tidyr::pivot_longer(cols = paste0('Y', 1961:2020),
names_to = 'year', values_to = 'value') %>%
dplyr::mutate(year = as.integer(gsub('Y', '', year)),
var = dplyr::case_when(var == 'Yield' ~ 'yield',
var == 'Area harvested' ~ 'area_harvest')) %>%
tidyr::pivot_wider(names_from = var, values_from = value)

# Join FAO to MIRCA2000 crops and aggregated by country, MIRCA2000 crop type
# for harvest areas, take sum of all FAO crops for each MIRCA2000 crop
# for yields, take mean of all FAO crops for each MIRCA2000 crop
df_mirca <- df %>%
dplyr::left_join(fao_to_mirca %>%
tidyr::pivot_longer(cols = paste('crop', sprintf('%02d', 1:26), sep = ''),
names_to = 'crop_id', values_to = 'to_mirca') %>%
dplyr::filter(to_mirca == 'X') %>%
dplyr::left_join(crop_mirca %>% dplyr::select(crop_id, crop = crop_name)) %>%
dplyr::select(fao_crop_id, crop) %>%
dplyr::distinct(),
by = 'fao_crop_id') %>%
dplyr::group_by(iso, crop, year) %>%
dplyr::summarise(area_harvest = sum(area_harvest, na.rm = T),
yield = mean(yield, na.rm = T)) %>%
dplyr::filter(area_harvest > 0, yield > 0)
#
#
# d <- subset(fao_yield, select = c("Area", "Element", "Item", "Year", "Value"))
# d <- subset(d, Area != "")
# d <- subset(d, Area != "China, mainland")
# d <- gaia::colname_replace(d, "Area", "country_name")
# d <- gaia::colname_replace(d, "Element", "var")
# d <- gaia::colname_replace(d, "Item", "crop")
# d <- gaia::colname_replace(d, "Year", "year")
# d <- gaia::colname_replace(d, "Value", "value")
# d$crop <- tolower(d$crop)
# d$var <- gsub("Area harvested", "area_harvest", d$var)
# d$var <- gsub("Yield", "yield", d$var)
# d <- subset(d, crop != "cottonseed") # Cotton seed has no data, FAO yields are for seed cotton
# d$crop <- gsub("rice, paddy", "rice", d$crop)
# d$crop <- gsub("soybeans", "soybean", d$crop)
# d$crop <- gsub("sugar cane", "sugarcane", d$crop)
# d$crop <- gsub("sugar beet", "sugarbeet", d$crop)
# d$crop <- gsub("seed cotton, unginned", "cotton", d$crop)
# d$crop <- gsub("sunflower seed", "sunflower", d$crop)
# d$crop <- gsub(" ", "_", d$crop)
# d <- data.table::dcast(d, country_name + crop + year ~ var, value.var = "value")
# d <- merge(d, mapping_gcam_iso, by = "country_name", all.x = TRUE)
# d <- gaia::iso_replace(d)
# # For these countries, root_tuber == cassava; others root_tuber == potato
# d$crop <- ifelse(d$crop == "potatoes", "root_tuber", d$crop)
# # d$crop <- ifelse( ( ( d$iso == "ben" | d$iso == "cmr" | d$iso == "caf" | d$iso == "cog" | d$iso == "cod" | d$iso == "civ" | d$iso == "gab" | d$iso == "gha" |
# # d$iso == "gin" | d$iso == "lbr" | d$iso == "nga" | d$iso == "sle" | d$iso == "som" | d$iso == "tgo" ) & d$crop == "cassava" ), "root_tuber", d$crop )
# # d$crop <- ifelse( ( ( d$iso == "ben" | d$iso == "cmr" | d$iso == "caf" | d$iso == "cog" | d$iso == "cod" | d$iso == "civ" | d$iso == "gab" | d$iso == "gha" |
# # d$iso == "gin" | d$iso == "lbr" | d$iso == "nga" | d$iso == "sle" | d$iso == "som" | d$iso == "tgo" ) & d$crop == "potatoes" ), "potatoes", d$crop )
# d <- subset(d, select = c("iso", "crop", "year", "area_harvest", "yield"))
return(df_mirca)
}


Expand Down Expand Up @@ -241,7 +287,7 @@ data_merge <- function(data = NULL,
data = d,
save_path = file.path(output_dir, "data_processed"),
file_name = paste0("historic_vars_", crop_name, ".csv"),
data_info = "Merged yield data"
data_info = "Merged historical yield data"
)

return(d)
Expand Down Expand Up @@ -671,6 +717,16 @@ climate_impact <- function(use_default_coeff = FALSE,
# create base year vector
baseYears <- c(paste("X", (start_year:end_year), sep = ""))

# check if the base year exists at all
missing_year <- setdiff(baseYears, colnames(d))
if(length(missing_year) > 0){

for(missing_year_i in missing_year){
d[[missing_year_i]] <- NA
}

}

# if start year from the available data is 9 years or earlier before the base year
# then historical years are determined as (base_year - 9) to base_year
# else historical years are determined as start_year to base year
Expand Down
8 changes: 7 additions & 1 deletion R/yield_regression.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
#' using average growing season temperature and precipitation, max and min months
#'
#' @param formula Default = NULL. String for regression formula
#' @param crop_select Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye"
#' @param diagnostics Default = TRUE. Logical for performing diagnostic plot
#' @param output_dir Default = file.path(getwd(), 'output'). String for output directory
#' @returns No return value, called for the side effects of processing and writing output files
#' @export

yield_regression <- function(formula = NULL,
crop_select = NULL,
diagnostics = TRUE,
output_dir = file.path(getwd(), "output")) {
# Decided not to use irrigation equipped land because data is so spotty
Expand All @@ -24,8 +26,12 @@ yield_regression <- function(formula = NULL,
message(paste0("Starting step: yield_regression"))
message(paste0("Using formula: ", formula))

if(is.null(crop_select)){
crop_select <- gaia::verify_crop(crop_select = crop_select)$crop_mirca
}

# conduct regression for each crop
for (crop_i in mapping_mirca_sage$crop_mirca) {
for (crop_i in crop_select) {
# read in the historical crop data and weather data
d_crop <- gaia::input_data(
folder_path = file.path(output_dir, "data_processed"),
Expand Down
7 changes: 6 additions & 1 deletion R/yield_shock_projection.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#' @param end_year Default = NULL. Integer for the end year of the data
#' @param gcam_timestep Default = 5. Integer for the time step of GCAM (Select either 1 or 5 years for GCAM use)
#' @param smooth_window Default = 20. Integer for smoothing window in years
#' @param crop_select Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye"
#' @param diagnostics Default = TRUE. Logical for performing diagnostic plot
#' @param output_dir Default = file.path(getwd(), 'output'). String for output directory
#' @returns A data frame of formatted smoothed annual crop yield shocks under climate impacts
Expand All @@ -24,6 +25,7 @@ yield_shock_projection <- function(use_default_coeff = FALSE,
end_year = NULL,
gcam_timestep = 5,
smooth_window = 20,
crop_select = NULL,
diagnostics = TRUE,
output_dir = file.path(getwd(), "output")) {
message("Starting Step: yield_shock_projection")
Expand All @@ -40,8 +42,11 @@ yield_shock_projection <- function(use_default_coeff = FALSE,
stringsAsFactors = FALSE
)

if(is.null(crop_select)){
crop_select <- gaia::verify_crop(crop_select = crop_select)$crop_mirca
}

for (crop_i in mapping_mirca_sage$crop_mirca) {
for (crop_i in crop_select) {
print(paste(climate_model, climate_scenario, crop_i, sep = " "))

# calculate yield impact for each crop and country
Expand Down
Loading

0 comments on commit 01a0525

Please sign in to comment.