diff --git a/NAMESPACE b/NAMESPACE index ec61b52..b3a00fc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(agprodchange_interp) export(agprodchange_ref) +export(clean_sage) export(clean_yield) export(climate_impact) export(colname_replace) @@ -35,6 +36,7 @@ export(plot_yield_impact) export(prep_regression) export(regression_fixed_effects) export(smooth_impacts) +export(verify_crop) export(weather_agg) export(weather_clean) export(weighted_climate) diff --git a/R/crop_calendars.R b/R/crop_calendars.R index c4aaf0d..3c9af55 100644 --- a/R/crop_calendars.R +++ b/R/crop_calendars.R @@ -3,45 +3,173 @@ #' Generate planting months for each country #' Data from SAGE #' +#' @param crop_calendar_file Default = NULL. String for the path of the crop calendar file. If crop_cal is provided, crop_select will be set to crops in crop calendar. User provided crop_calendar_file can include any crops MIRCA2000 crops: "wheat", "maize", "rice", "barley", "rye", "millet", "sorghum", "soybean", "sunflower", "root_tuber", "cassava", "sugarcane", "sugarbeet", "oil_palm", "rape_seed", "groundnuts", "pulses", "citrus", "date_palm", "grapes", "cotton", "cocoa", "coffee", "others_perennial", "fodder_grasses", "others_annual" +#' @param crop_select Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye" #' @param output_dir Default = file.path(getwd(), 'output'). String for output directory #' @returns A data frame of crop calendars #' @export -crop_calendars <- function(output_dir = file.path(getwd(), "output")) { +crop_calendars <- function(crop_calendar_file = NULL, + crop_select = NULL, + output_dir = file.path(getwd(), "output")) { iso <- crop <- NULL message("Starting Step: crop_calendars") - # load SAGE data - d <- sage - - # format, subset data and replace location name - d$Location <- ifelse(d$Location == "Georgia" & d$Source == "USDA UPHD", "Georgia_USA", d$Location) - d <- subset(d, select = c("Data.ID", "Location", "Crop", "Qualifier", "Plant.start", "Plant.end", "Harvest.start", "Harvest.end")) - d <- gaia::colname_replace(d, "Location", "country_name") - d <- gaia::colname_replace(d, "Crop", "crop") - d$crop <- tolower(d$crop) - - # Average plant and harvest month - d$plant <- ifelse(d$Plant.start < d$Plant.end, ((d$Plant.start + d$Plant.end) / 2 / 30), ((d$Plant.start + d$Plant.end + 365) / 2 / 30)) - d$plant <- ceiling(ifelse(d$plant > 12, d$plant - 12, d$plant)) - d$plant <- ifelse(d$crop == "sugarcane", 1, d$plant) # Sugarcane is a multiyear crop, will assume that 12 months preceding annual value equals growing season. - d$harvest <- ifelse(d$Harvest.start < d$Harvest.end, ((d$Harvest.start + d$Harvest.end) / 2) / 30, ((d$Harvest.start + d$Harvest.end + 365) / 2) / 30) - d$harvest <- ceiling(ifelse(d$harvest > 12, d$harvest - 12, d$harvest)) - d$harvest <- ifelse(d$crop == "sugarcane", 12, d$harvest) # Sugarcane is a multiyear crop, will assume that 12 months preceding annual value equals growing season. - - # Country codes - d <- merge(d, mapping_gcam_iso, by = "country_name", all.x = TRUE) - d <- gaia::iso_replace(d) - - # Indicators for crops - crops <- mapping_mirca_sage - - for (i in 1:nrow(crops)) { - d <- d %>% - dplyr::mutate(!!paste0(crops$crop_mirca[i]) := ifelse(!is.na(iso) & (crop == crops$crop_sage[i]), 1, 0)) + # verify the crops provided or selected are within the MIRCA crops + crops <- gaia::verify_crop( + crop_calendar_file = crop_calendar_file, + crop_select = crop_select) + + # use user data if provided + if(!is.null(crop_calendar_file)){ + + d <- data.table::fread( + file = crop_calendar_file, + skip = 0, + stringsAsFactors = FALSE, + header = TRUE + ) + + } else { + + # load SAGE data + d <- sage + + # format, subset data and replace location name + d$Location <- ifelse(d$Location == "Georgia" & d$Source == "USDA UPHD", "Georgia_USA", d$Location) + d <- subset(d, select = c("Data.ID", "Location", "Crop", "Qualifier", "Plant.start", "Plant.end", "Harvest.start", "Harvest.end")) + d <- gaia::colname_replace(d, "Location", "country_name") + d <- gaia::colname_replace(d, "Crop", "crop") + d$crop <- tolower(d$crop) + + # Average plant and harvest month + d$plant <- ifelse(d$Plant.start < d$Plant.end, ((d$Plant.start + d$Plant.end) / 2 / 30), ((d$Plant.start + d$Plant.end + 365) / 2 / 30)) + d$plant <- ceiling(ifelse(d$plant > 12, d$plant - 12, d$plant)) + d$plant <- ifelse(d$crop == "sugarcane", 1, d$plant) # Sugarcane is a multiyear crop, will assume that 12 months preceding annual value equals growing season. + d$harvest <- ifelse(d$Harvest.start < d$Harvest.end, ((d$Harvest.start + d$Harvest.end) / 2) / 30, ((d$Harvest.start + d$Harvest.end + 365) / 2) / 30) + d$harvest <- ceiling(ifelse(d$harvest > 12, d$harvest - 12, d$harvest)) + d$harvest <- ifelse(d$crop == "sugarcane", 12, d$harvest) # Sugarcane is a multiyear crop, will assume that 12 months preceding annual value equals growing season. + + # Country codes + d <- merge(d, mapping_gcam_iso, by = "country_name", all.x = TRUE) + d <- gaia::iso_replace(d) + + for (i in 1:nrow(crops)) { + d <- d %>% + dplyr::mutate(!!paste0(crops$crop_mirca[i]) := ifelse(!is.na(iso) & (crop == crops$crop_sage[i]), 1, 0)) + } + + # clean up crop calendar + d <- gaia::clean_sage(d = d) + + } + + # filter the crops selected + d.crop.cal <- subset(d, select = c("iso", crops$crop_mirca, "plant", "harvest")) + d.crop.cal <- subset(d.crop.cal, !is.na(iso)) + d <- NULL + + # save crop calendar data + save_dir <- file.path(output_dir, "data_processed") + if (!dir.exists(save_dir)) { + dir.create(save_dir, recursive = TRUE) + } + utils::write.csv(d.crop.cal, file = file.path(save_dir, "crop_calendar.csv")) + + return(d.crop.cal) +} + + +# ------------------------------------------------------------------------------ +#' verify_crop +#' +#' Identify crops selected by users are available crop types from both SAGE and MIRCA +#' +#' @param crop_calendar_file Default = NULL. String for the path of the crop calendar file. If crop_cal is provided, crop_select will be set to crops in crop calendar. User provided crop_calendar_file can include any crops MIRCA2000 crops: "wheat", "maize", "rice", "barley", "rye", "millet", "sorghum", "soybean", "sunflower", "root_tuber", "cassava", "sugarcane", "sugarbeet", "oil_palm", "rape_seed", "groundnuts", "pulses", "citrus", "date_palm", "grapes", "cotton", "cocoa", "coffee", "others_perennial", "fodder_grasses", "other_annual" +#' @param crop_select Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye" +#' @returns A data frame of crop name mapping between MIRCA and SAGE +#' @keywords internal +#' @export + +verify_crop <- function(crop_calendar_file = NULL, + crop_select = c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat")){ + + + # Step 1: Finalize crop_select + # read the user provided crop calendar + if(!is.null(crop_calendar_file)){ + + df_crop_cal <- data.table::fread( + file = crop_calendar_file, + skip = 0, + stringsAsFactors = FALSE, + header = TRUE + ) + + # check if user provided crop_select + if(!is.null(crop_select)){ + + warning('Replace crop_select with crops from user provided crop_calendar_file') + + # Use crops from the crop_calendar_file + crop_select <- names(crop_cal)[!names(crop_cal) %in% c('iso', 'plant', 'harvest')] + + } + } else { + + # if neither crop_calendar_file nor crop_select is provided, use default crop selections + if(is.null(crop_select)){ + crop_select <- mapping_mirca_sage$crop_mirca + } else { + crop_select <- unique(c(crop_select, mapping_mirca_sage$crop_mirca)) + + } + } + # Step 2: Check crop_select in MIRCA + + # if crop_calendar_file is provided, we only need to check whether those crops exist in MIRCA, + # if crop_select is provided based on the given list, then the selection will be within both MIRCA and SAGE datasets + crop_select_mirca <- crop_mirca$crop_name[crop_mirca$crop_name %in% crop_select] + + if(length(crop_select_mirca) < length(crop_select)){ + + crop_not_mirca <- crop_select[is.na(match(crop_select, crop_mirca$crop_name))] + + warning(paste0(paste(crop_not_mirca, collapse = ', '), ' are not MIRCA crops. Remove from crop calendar calculation.')) + + } + + # Step 3: make the mapping between MIRCA and SAGE crops + crop_select_sage <- crop_mirca$crop_sage[match(crop_select_mirca, crop_mirca$crop_name)] + + # create mapping + mapping_crop <- tibble::tibble( + crop_mirca = crop_select_mirca, + crop_sage = crop_select_sage + ) + + # print the selected crops + print(paste0('Final selected crops are: ', paste(crop_select_mirca, collapse = ', '))) + + return(mapping_crop) +} + + +# ------------------------------------------------------------------------------ +#' clean_sage +#' +#' Clean up the sage crop calendar by reassigning the iso name and +#' make sure to keep one set of the plant and harvest for each country and crop +#' +#' @param d Default = NULL. data frame for crop calendar +#' @returns A data frame of clean crop calendar +#' @keywords internal +#' @export + +clean_sage <- function(d = NULL){ # ---------------------------------------------------------------------------- # Deal with exceptions (countries by states or regions) @@ -271,9 +399,11 @@ crop_calendars <- function(output_dir = file.path(getwd(), "output")) { # Somalia (two seasons, use the one with no qualifier code) d$cotton <- ifelse(d$country_name == "Somalia" & d$crop == "cotton" & d$Qualifier == 2, 0, d$cotton) + d$millet <- ifelse(d$country_name == "Somalia" & d$crop == "millet" & d$Qualifier == 2, 0, d$millet) #MZ # Kenya (two obs, nearly the same) d$cotton <- ifelse(d$country_name == "Kenya" & d$crop == "cotton" & d$Data.ID == 1711, 0, d$cotton) + d$millet <- ifelse(d$country_name == "Kenya" & d$crop == "millet" & d$Qualifier == 2, 0, d$millet) #MZ # SORGHUM CORRECTIONS # Dominican Republic (iso ID -- dom) listed thrice for sorghum @@ -311,48 +441,68 @@ crop_calendars <- function(output_dir = file.path(getwd(), "output")) { d$iso <- ifelse((d$country_name == "North Korea (central bowl)" & d$crop == "soybeans"), "prk", d$iso) d$soybean <- ifelse(d$country_name == "North Korea (central bowl)" & d$crop == "soybeans", 1, d$soybean) - # # PULSES CORRECTIONS - # # Duplicate listing for Burundi (iso ID -- bdi) - # # Seasonal production of pulses in Burundi: http://www.fao.org/docrep/004/w5956e/w5956e00.htm - # d$iso <- ifelse( ( d$country_name == "Burundi" & d$crop == "pulses" & d$Data.ID == 83 ), "bdi", d$iso ) - # d$pulses <- ifelse( d$country_name == "Burundi" & d$crop == "pulses" & d$Data.ID == 83, 1, d$pulses ) - # # Duplicate listing for Ethiopia (iso ID -- eth) - # # Ethiopia pulse production by month: http://www.fao.org/3/a-at305e.pdf - # d$iso <- ifelse( ( d$country_name == "Ethiopia" & d$crop == "pulses" & d$Data.ID == 1695 ), "eth", d$iso ) - # d$pulses <- ifelse( d$country_name == "Ethiopia" & d$crop == "pulses" & d$Data.ID == 1695, 1, d$pulses ) - # # Duplicate listing for Haiti (iso ID -- hti) - # # Haiti seasonality for pulse production: https://www.worldpulse.com/fr/node/9391 - # d$iso <- ifelse( ( d$country_name == "Haiti" & d$crop == "pulses" & d$Data.ID == 275 ), "hti", d$iso ) - # d$pulses <- ifelse( d$country_name == "Haiti" & d$crop == "pulses" & d$Data.ID == 275, 1, d$pulses ) - # # Duplicate listing for Kenya (iso ID -- ken) - # # Case study of Kenya pulse production: https://www.investinkenya.co.ke/main/view_article/330 - # d$iso <- ifelse( ( d$country_name == "Kenya" & d$crop == "pulses" & d$Data.ID == 341 ), "ken", d$iso ) - # d$pulses <- ifelse( d$country_name == "Kenya" & d$crop == "pulses" & d$Data.ID == 341, 1, d$pulses ) - # # Duplicate listing for Nicaragua (iso ID -- nic) - # # Pulse production by month in Nicaragua: http://www.fas.usda.gov/regions/nicaragua - # d$iso <- ifelse( ( d$country_name == "Nicaragua" & d$crop == "pulses" & d$Data.ID == 446 ), "nic", d$iso ) - # d$pulses <- ifelse( d$country_name == "Nicaragua" & d$crop == "pulses" & d$Data.ID == 446, 1, d$pulses ) - # # Duplicate listing for Rwanda (iso ID -- rwa) - # # Southern hemisphere; Rwanda pulse seasonality: http://allafrica.com/stories/201410090778.html - # d$iso <- ifelse( ( d$country_name == "Rwanda" & d$crop == "pulses" & d$Data.ID == 504 ), "rwa", d$iso ) - # d$pulses <- ifelse( d$country_name == "Rwanda" & d$crop == "pulses" & d$Data.ID == 504, 1, d$pulses ) - # # Duplicate listing for El Salvador (iso ID -- slv) - # # Pulse study in Central America: https://www.goift.com/market/americas/central-america/el-salvador-central-america/page/7/ - # d$iso <- ifelse( ( d$country_name == "El Salvador" & d$crop == "pulses" & d$Data.ID == 211 ), "slv", d$iso ) - # d$pulses <- ifelse( d$country_name == "El Salvador" & d$crop == "pulses" & d$Data.ID == 211, 1, d$pulses ) - # # Duplicate listing for Somalia (iso ID -- som) - # # Seasonality of pulse production in Somalia: https://conservancy.umn.edu/bitstream/handle/11299/163161/JohnsonBrittney.pdf?sequence=1&isAllowed=y - # d$iso <- ifelse( ( d$country_name == "Somalia" & d$crop == "pulses" & d$Data.ID == 1747 ), "som", d$iso ) - # d$pulses <- ifelse( d$country_name == "Somalia" & d$crop == "pulses" & d$Data.ID == 1747, 1, d$pulses ) - # # California has the largest pulse production of all states - # d$iso <- ifelse( ( d$country_name == "California" & d$crop == "pulses" & d$Data.ID == 1007 ), "usa", d$iso ) - # d$pulses <- ifelse( d$country_name == "California" & d$crop == "pulses" & d$Data.ID == 1007, 1, d$pulses ) - # # Maharashtra, of all Indian states listed (Gujarat, Himachal Pradesh, Kerala, Orissa, Rajasthan, Tamil Nadu), has 34% share of pulse production - # d$iso <- ifelse( ( d$country_name == "Maharashtra" & d$crop == "pulses" & d$Data.ID == 1424 ), "ind", d$iso ) - # d$pulses <- ifelse( d$country_name == "Maharashtra" & d$crop == "pulses" & d$Data.ID == 1424, 1, d$pulses ) - # # Uganda (South) should be associate with iso ID == uga and Data.ID == 611 - # d$iso <- ifelse( ( d$country_name == "Uganda (South)" & d$crop == "pulses" & d$Data.ID == 611 ), "uga", d$iso ) - # d$pulses <- ifelse( d$country_name == "Uganda (South)" & d$crop == "pulses" & d$Data.ID == 611, 1, d$pulses ) + # Barley Corrections (MZ) + d$barley <- ifelse(d$country_name == "Kenya" & d$crop == "barley" & d$Data.ID == 340, 0, d$barley) + d$barley <- ifelse(d$country_name == "United Kingdom" & d$crop == "barley" & d$Data.ID == 787, 0, d$barley) + d$barley <- ifelse(d$country_name == "Germany" & d$crop == "barley" & d$Data.ID == 713, 0, d$barley) + + # Groundnuts Corrections (MZ) + d$groundnuts <- ifelse(d$country_name == "Nigeria" & d$crop == "groundnuts" & d$Data.ID == 1730, 0, d$groundnuts) + d$groundnuts <- ifelse(d$country_name == "Somalia" & d$crop == "groundnuts" & d$Data.ID == 1743, 0, d$groundnuts) + + # Rapeseed Corrections (MZ) + d$rape_seed <- ifelse(d$country_name == "Ukraine" & d$crop == "rapeseed" & d$Data.ID == 779, 0, d$rape_seed) + + # PULSES CORRECTIONS + # Duplicate listing for Burundi (iso ID -- bdi) + # Seasonal production of pulses in Burundi: http://www.fao.org/docrep/004/w5956e/w5956e00.htm + d$iso <- ifelse( ( d$country_name == "Burundi" & d$crop == "pulses" & d$Data.ID == 83 ), "bdi", d$iso ) + d$pulses <- ifelse( d$country_name == "Burundi" & d$crop == "pulses" & d$Data.ID == 83, 1, d$pulses ) + d$pulses <- ifelse( d$country_name == "Burundi" & d$crop == "pulses" & d$Data.ID == 84, 0, d$pulses ) # MZ + # Duplicate listing for Ethiopia (iso ID -- eth) + # Ethiopia pulse production by month: http://www.fao.org/3/a-at305e.pdf + d$iso <- ifelse( ( d$country_name == "Ethiopia" & d$crop == "pulses" & d$Data.ID == 1695 ), "eth", d$iso ) + d$pulses <- ifelse( d$country_name == "Ethiopia" & d$crop == "pulses" & d$Data.ID == 1695, 1, d$pulses ) + d$pulses <- ifelse( d$country_name == "Ethiopia" & d$crop == "pulses" & d$Data.ID %in% c(1696, 1697), 0, d$pulses ) # MZ + # Duplicate listing for Haiti (iso ID -- hti) + # Haiti seasonality for pulse production: https://www.worldpulse.com/fr/node/9391 + d$iso <- ifelse( ( d$country_name == "Haiti" & d$crop == "pulses" & d$Data.ID == 275 ), "hti", d$iso ) + d$pulses <- ifelse( d$country_name == "Haiti" & d$crop == "pulses" & d$Data.ID == 275, 1, d$pulses ) + d$pulses <- ifelse( d$country_name == "Haiti" & d$crop == "pulses" & d$Data.ID %in% c(276, 277), 0, d$pulses ) # MZ + # Duplicate listing for Kenya (iso ID -- ken) + # Case study of Kenya pulse production: https://www.investinkenya.co.ke/main/view_article/330 + d$iso <- ifelse( ( d$country_name == "Kenya" & d$crop == "pulses" & d$Data.ID == 341 ), "ken", d$iso ) + d$pulses <- ifelse( d$country_name == "Kenya" & d$crop == "pulses" & d$Data.ID == 341, 1, d$pulses ) + d$pulses <- ifelse( d$country_name == "Kenya" & d$crop == "pulses" & d$Data.ID == 342, 0, d$pulses ) # MZ + # Duplicate listing for Nicaragua (iso ID -- nic) + # Pulse production by month in Nicaragua: http://www.fas.usda.gov/regions/nicaragua + d$iso <- ifelse( ( d$country_name == "Nicaragua" & d$crop == "pulses" & d$Data.ID == 446 ), "nic", d$iso ) + d$pulses <- ifelse( d$country_name == "Nicaragua" & d$crop == "pulses" & d$Data.ID == 446, 1, d$pulses ) + d$pulses <- ifelse( d$country_name == "Nicaragua" & d$crop == "pulses" & d$Data.ID %in% c(447, 448), 0, d$pulses ) # MZ + # Duplicate listing for Rwanda (iso ID -- rwa) + # Southern hemisphere; Rwanda pulse seasonality: http://allafrica.com/stories/201410090778.html + d$iso <- ifelse( ( d$country_name == "Rwanda" & d$crop == "pulses" & d$Data.ID == 504 ), "rwa", d$iso ) + d$pulses <- ifelse( d$country_name == "Rwanda" & d$crop == "pulses" & d$Data.ID == 504, 1, d$pulses ) + d$pulses <- ifelse( d$country_name == "Rwanda" & d$crop == "pulses" & d$Data.ID == 505, 0, d$pulses ) # MZ + # Duplicate listing for El Salvador (iso ID -- slv) + # Pulse study in Central America: https://www.goift.com/market/americas/central-america/el-salvador-central-america/page/7/ + d$iso <- ifelse( ( d$country_name == "El Salvador" & d$crop == "pulses" & d$Data.ID == 211 ), "slv", d$iso ) + d$pulses <- ifelse( d$country_name == "El Salvador" & d$crop == "pulses" & d$Data.ID == 211, 1, d$pulses ) + d$pulses <- ifelse( d$country_name == "El Salvador" & d$crop == "pulses" & d$Data.ID == 210, 0, d$pulses ) # MZ + # Duplicate listing for Somalia (iso ID -- som) + # Seasonality of pulse production in Somalia: https://conservancy.umn.edu/bitstream/handle/11299/163161/JohnsonBrittney.pdf?sequence=1&isAllowed=y + d$iso <- ifelse( ( d$country_name == "Somalia" & d$crop == "pulses" & d$Data.ID == 1747 ), "som", d$iso ) + d$pulses <- ifelse( d$country_name == "Somalia" & d$crop == "pulses" & d$Data.ID == 1747, 1, d$pulses ) + d$pulses <- ifelse( d$country_name == "Somalia" & d$crop == "pulses" & d$Data.ID == 1748, 0, d$pulses ) # MZ + # California has the largest pulse production of all states + d$iso <- ifelse( ( d$country_name == "California" & d$crop == "pulses" & d$Data.ID == 1007 ), "usa", d$iso ) + d$pulses <- ifelse( d$country_name == "California" & d$crop == "pulses" & d$Data.ID == 1007, 1, d$pulses ) + # Maharashtra, of all Indian states listed (Gujarat, Himachal Pradesh, Kerala, Orissa, Rajasthan, Tamil Nadu), has 34% share of pulse production + d$iso <- ifelse( ( d$country_name == "Maharashtra" & d$crop == "pulses" & d$Data.ID == 1424 ), "ind", d$iso ) + d$pulses <- ifelse( d$country_name == "Maharashtra" & d$crop == "pulses" & d$Data.ID == 1424, 1, d$pulses ) + # Uganda (South) should be associate with iso ID == uga and Data.ID == 611 + d$iso <- ifelse( ( d$country_name == "Uganda (South)" & d$crop == "pulses" & d$Data.ID == 611 ), "uga", d$iso ) + d$pulses <- ifelse( d$country_name == "Uganda (South)" & d$crop == "pulses" & d$Data.ID == 611, 1, d$pulses ) # Remove irrigated and other subcrops d$maize <- ifelse(d$maize == 1 & d$Qualifier != "", 0, d$maize) # Remove irrigated and other subcrops @@ -363,15 +513,6 @@ crop_calendars <- function(output_dir = file.path(getwd(), "output")) { # # only Kenya has both cassava and potato production. Potato production ~ 2x higher than cassava, so use cassava # d$root_tuber <- ifelse( d$iso == "ken" & d$crop == "cassava", 0, d$root_tuber ) - d.crop.cal <- subset(d, select = c("iso", "wheat", "rice", "maize", "cassava", "soybean", "sugarcane", "sugarbeet", "cotton", "sorghum", "root_tuber", "sunflower", "plant", "harvest")) - d.crop.cal <- subset(d.crop.cal, !is.na(iso)) - d <- NULL + return(d) - save_dir <- file.path(output_dir, "data_processed") - if (!dir.exists(save_dir)) { - dir.create(save_dir, recursive = TRUE) - } - utils::write.csv(d.crop.cal, file = file.path(save_dir, "crop_calendar.csv")) - - return(d.crop.cal) } diff --git a/R/data_aggregation.R b/R/data_aggregation.R index 7bb5964..2f30166 100644 --- a/R/data_aggregation.R +++ b/R/data_aggregation.R @@ -29,6 +29,8 @@ data_aggregation <- function(climate_hist_dir = NULL, input_file = "crop_calendar.csv" ) + crop_select <- names(crop_cal)[!names(crop_cal) %in% c('iso', 'plant', 'harvest')] + # merge fao yield and fao irrigation equip data yield <- gaia::merge_data(fao_yield, fao_irr_equip, "iso", "year") @@ -60,7 +62,7 @@ data_aggregation <- function(climate_hist_dir = NULL, crop_historic <- data.table::data.table() - for (crop_i in mapping_mirca_sage$crop_mirca) { + for (crop_i in crop_select) { crop_id <- crop_mirca$crop_id[grepl(crop_i, crop_mirca$crop_name)] # filter out the file for the crop @@ -121,7 +123,7 @@ data_aggregation <- function(climate_hist_dir = NULL, crop_projection <- data.table::data.table() - for (crop_i in mapping_mirca_sage$crop_mirca) { + for (crop_i in crop_select) { crop_id <- crop_mirca$crop_id[grepl(crop_i, crop_mirca$crop_name)] # filter out the file for the crop diff --git a/R/sysdata.rda b/R/sysdata.rda index dc9047d..61e2f10 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/yield_impact.R b/R/yield_impact.R index 2f2fbbc..0b86b50 100644 --- a/R/yield_impact.R +++ b/R/yield_impact.R @@ -18,6 +18,8 @@ #' @param gcam_version Default = 'gcam7'. String for the GCAM version. Only support gcam6 and gcam7 #' @param gcam_timestep Default = 5. Integer for the time step of GCAM (Select either 1 or 5 years for GCAM use) #' @param gcamdata_dir Default = NULL. String for directory to the gcamdata folder within the specific GCAM version. The gcamdata need to be run with drake to have the CSV outputs beforehand. +#' @param crop_calendar_file Default = NULL. String for the path of the crop calendar file. If crop_cal is provided, crop_select will be set to crops in crop calendar. User provided crop_calendar_file can include any crops MIRCA2000 crops: "wheat", "maize", "rice", "barley", "rye", "millet", "sorghum", "soybean", "sunflower", "root_tuber", "cassava", "sugarcane", "sugarbeet", "oil_palm", "rape_seed", "groundnuts", "pulses", "citrus", "date_palm", "grapes", "cotton", "cocoa", "coffee", "others_perennial", "fodder_grasses", "other_annual" +#' @param crop_select Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye" #' @param use_default_coeff Default = FALSE. Binary for using default regression coefficients. Set to TRUE will use the default coefficients instead of calculating coefficients from the historical climate data. #' @param base_year Default = 2015. Integer for the base year (for GCAM) #' @param start_year Default = NULL. Integer for the start year of the projected data @@ -47,6 +49,8 @@ yield_impact <- function(pr_hist_ncdf = NULL, gcam_version = "gcam7", gcam_timestep = 5, gcamdata_dir = NULL, + crop_calendar_file = NULL, + crop_select = NULL, use_default_coeff = FALSE, base_year = 2015, start_year = NULL, @@ -86,7 +90,13 @@ yield_impact <- function(pr_hist_ncdf = NULL, # Step 2: Generate planting months for each country - gaia::crop_calendars(output_dir = output_dir) + crop_cal <- gaia::crop_calendars( + crop_calendar_file = crop_calendar_file, + crop_select = crop_select, + output_dir = output_dir) + + # update finalized selected crops + crop_select <- names(crop_cal)[!names(crop_cal) %in% c('iso', 'plant', 'harvest')] # Step 3: Process multiple models to analyze historical weather variables and crop yields if (all( @@ -117,6 +127,7 @@ yield_impact <- function(pr_hist_ncdf = NULL, # Step 4: Yield regressions and create figures if (!use_default_coeff) { gaia::yield_regression( + crop_select = crop_select, diagnostics = diagnostics, output_dir = output_dir ) @@ -133,6 +144,7 @@ yield_impact <- function(pr_hist_ncdf = NULL, end_year = end_year, gcam_timestep = gcam_timestep, smooth_window = smooth_window, + crop_select = crop_select, diagnostics = diagnostics, output_dir = output_dir ) diff --git a/R/yield_impacts_functions.R b/R/yield_impacts_functions.R index e3310ba..ac1f58f 100644 --- a/R/yield_impacts_functions.R +++ b/R/yield_impacts_functions.R @@ -7,43 +7,89 @@ #' Function to clean FAO yield data #' #' @param fao_yield Default = NULL. Data frame for the fao yield table +#' @param fao_to_mirca Default = NULL. Data frame for the fao to mirca crop mapping #' @returns A data frame of formatted FAO yield data #' @keywords internal #' @export -clean_yield <- function(fao_yield = NULL) { +clean_yield <- function(fao_yield = NULL, + fao_to_mirca = NULL) { AreaName <- crop <- NULL - d <- subset(fao_yield, select = c("AreaName", "ElementName", "ItemName", "Year", "Value")) - d <- subset(d, AreaName != "") - d <- subset(d, AreaName != "China, mainland") - d <- gaia::colname_replace(d, "AreaName", "country_name") - d <- gaia::colname_replace(d, "ElementName", "var") - d <- gaia::colname_replace(d, "ItemName", "crop") - d <- gaia::colname_replace(d, "Year", "year") - d <- gaia::colname_replace(d, "Value", "value") - d$crop <- tolower(d$crop) - d$var <- gsub("Area harvested", "area_harvest", d$var) - d$var <- gsub("Yield", "yield", d$var) - d <- subset(d, crop != "cottonseed") # Cotton seed has no data, FAO yields are for seed cotton - d$crop <- gsub("rice, paddy", "rice", d$crop) - d$crop <- gsub("soybeans", "soybean", d$crop) - d$crop <- gsub("sugar cane", "sugarcane", d$crop) - d$crop <- gsub("sugar beet", "sugarbeet", d$crop) - d$crop <- gsub("seed cotton", "cotton", d$crop) - d$crop <- gsub("sunflower seed", "sunflower", d$crop) - d$crop <- gsub(" ", "_", d$crop) - d <- data.table::dcast(d, country_name + crop + year ~ var, value.var = "value") - d <- merge(d, mapping_gcam_iso, by = "country_name", all.x = TRUE) - d <- gaia::iso_replace(d) - # For these countries, root_tuber == cassava; others root_tuber == potato - d$crop <- ifelse(d$crop == "potatoes", "root_tuber", d$crop) - # d$crop <- ifelse( ( ( d$iso == "ben" | d$iso == "cmr" | d$iso == "caf" | d$iso == "cog" | d$iso == "cod" | d$iso == "civ" | d$iso == "gab" | d$iso == "gha" | - # d$iso == "gin" | d$iso == "lbr" | d$iso == "nga" | d$iso == "sle" | d$iso == "som" | d$iso == "tgo" ) & d$crop == "cassava" ), "root_tuber", d$crop ) - # d$crop <- ifelse( ( ( d$iso == "ben" | d$iso == "cmr" | d$iso == "caf" | d$iso == "cog" | d$iso == "cod" | d$iso == "civ" | d$iso == "gab" | d$iso == "gha" | - # d$iso == "gin" | d$iso == "lbr" | d$iso == "nga" | d$iso == "sle" | d$iso == "som" | d$iso == "tgo" ) & d$crop == "potatoes" ), "potatoes", d$crop ) - d <- subset(d, select = c("iso", "crop", "year", "area_harvest", "yield")) - return(d) + # clean up FAOSTAT yield and harvest area data and left join the iso code + df <- fao_yield %>% + dplyr::filter(`Item Code` %in% unique(fao_to_mirca$fao_crop_id), + Element %in% c('Area harvested', 'Yield'), + # Exclude entire China because FAOSTAT distinguishes China mainland, Taiwan, and Hongkong + # GCAM also has Taiwan as individual region + !Area %in% c('China')) %>% + dplyr::select(country_code = `Area Code`, country_name = Area, var = Element, + fao_crop_name = Item, fao_crop_id = `Item Code`, + paste0('Y', 1961:2020)) %>% + dplyr::left_join(fao_iso %>% dplyr::select(country_code, iso), + by = 'country_code') %>% + dplyr::filter(!is.na(iso)) + + # clean up the iso code which megers some FAOSTAT specific 'f' iso codes + df <- gaia::iso_replace(df) + + # restructure data + df <- df %>% + tidyr::pivot_longer(cols = paste0('Y', 1961:2020), + names_to = 'year', values_to = 'value') %>% + dplyr::mutate(year = as.integer(gsub('Y', '', year)), + var = dplyr::case_when(var == 'Yield' ~ 'yield', + var == 'Area harvested' ~ 'area_harvest')) %>% + tidyr::pivot_wider(names_from = var, values_from = value) + + # Join FAO to MIRCA2000 crops and aggregated by country, MIRCA2000 crop type + # for harvest areas, take sum of all FAO crops for each MIRCA2000 crop + # for yields, take mean of all FAO crops for each MIRCA2000 crop + df_mirca <- df %>% + dplyr::left_join(fao_to_mirca %>% + tidyr::pivot_longer(cols = paste('crop', sprintf('%02d', 1:26), sep = ''), + names_to = 'crop_id', values_to = 'to_mirca') %>% + dplyr::filter(to_mirca == 'X') %>% + dplyr::left_join(crop_mirca %>% dplyr::select(crop_id, crop = crop_name)) %>% + dplyr::select(fao_crop_id, crop) %>% + dplyr::distinct(), + by = 'fao_crop_id') %>% + dplyr::group_by(iso, crop, year) %>% + dplyr::summarise(area_harvest = sum(area_harvest, na.rm = T), + yield = mean(yield, na.rm = T)) %>% + dplyr::filter(area_harvest > 0, yield > 0) + # + # + # d <- subset(fao_yield, select = c("Area", "Element", "Item", "Year", "Value")) + # d <- subset(d, Area != "") + # d <- subset(d, Area != "China, mainland") + # d <- gaia::colname_replace(d, "Area", "country_name") + # d <- gaia::colname_replace(d, "Element", "var") + # d <- gaia::colname_replace(d, "Item", "crop") + # d <- gaia::colname_replace(d, "Year", "year") + # d <- gaia::colname_replace(d, "Value", "value") + # d$crop <- tolower(d$crop) + # d$var <- gsub("Area harvested", "area_harvest", d$var) + # d$var <- gsub("Yield", "yield", d$var) + # d <- subset(d, crop != "cottonseed") # Cotton seed has no data, FAO yields are for seed cotton + # d$crop <- gsub("rice, paddy", "rice", d$crop) + # d$crop <- gsub("soybeans", "soybean", d$crop) + # d$crop <- gsub("sugar cane", "sugarcane", d$crop) + # d$crop <- gsub("sugar beet", "sugarbeet", d$crop) + # d$crop <- gsub("seed cotton, unginned", "cotton", d$crop) + # d$crop <- gsub("sunflower seed", "sunflower", d$crop) + # d$crop <- gsub(" ", "_", d$crop) + # d <- data.table::dcast(d, country_name + crop + year ~ var, value.var = "value") + # d <- merge(d, mapping_gcam_iso, by = "country_name", all.x = TRUE) + # d <- gaia::iso_replace(d) + # # For these countries, root_tuber == cassava; others root_tuber == potato + # d$crop <- ifelse(d$crop == "potatoes", "root_tuber", d$crop) + # # d$crop <- ifelse( ( ( d$iso == "ben" | d$iso == "cmr" | d$iso == "caf" | d$iso == "cog" | d$iso == "cod" | d$iso == "civ" | d$iso == "gab" | d$iso == "gha" | + # # d$iso == "gin" | d$iso == "lbr" | d$iso == "nga" | d$iso == "sle" | d$iso == "som" | d$iso == "tgo" ) & d$crop == "cassava" ), "root_tuber", d$crop ) + # # d$crop <- ifelse( ( ( d$iso == "ben" | d$iso == "cmr" | d$iso == "caf" | d$iso == "cog" | d$iso == "cod" | d$iso == "civ" | d$iso == "gab" | d$iso == "gha" | + # # d$iso == "gin" | d$iso == "lbr" | d$iso == "nga" | d$iso == "sle" | d$iso == "som" | d$iso == "tgo" ) & d$crop == "potatoes" ), "potatoes", d$crop ) + # d <- subset(d, select = c("iso", "crop", "year", "area_harvest", "yield")) + return(df_mirca) } @@ -241,7 +287,7 @@ data_merge <- function(data = NULL, data = d, save_path = file.path(output_dir, "data_processed"), file_name = paste0("historic_vars_", crop_name, ".csv"), - data_info = "Merged yield data" + data_info = "Merged historical yield data" ) return(d) @@ -671,6 +717,16 @@ climate_impact <- function(use_default_coeff = FALSE, # create base year vector baseYears <- c(paste("X", (start_year:end_year), sep = "")) + # check if the base year exists at all + missing_year <- setdiff(baseYears, colnames(d)) + if(length(missing_year) > 0){ + + for(missing_year_i in missing_year){ + d[[missing_year_i]] <- NA + } + + } + # if start year from the available data is 9 years or earlier before the base year # then historical years are determined as (base_year - 9) to base_year # else historical years are determined as start_year to base year diff --git a/R/yield_regression.R b/R/yield_regression.R index d313a5e..e5bd794 100644 --- a/R/yield_regression.R +++ b/R/yield_regression.R @@ -4,12 +4,14 @@ #' using average growing season temperature and precipitation, max and min months #' #' @param formula Default = NULL. String for regression formula +#' @param crop_select Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye" #' @param diagnostics Default = TRUE. Logical for performing diagnostic plot #' @param output_dir Default = file.path(getwd(), 'output'). String for output directory #' @returns No return value, called for the side effects of processing and writing output files #' @export yield_regression <- function(formula = NULL, + crop_select = NULL, diagnostics = TRUE, output_dir = file.path(getwd(), "output")) { # Decided not to use irrigation equipped land because data is so spotty @@ -24,8 +26,12 @@ yield_regression <- function(formula = NULL, message(paste0("Starting step: yield_regression")) message(paste0("Using formula: ", formula)) + if(is.null(crop_select)){ + crop_select <- gaia::verify_crop(crop_select = crop_select)$crop_mirca + } + # conduct regression for each crop - for (crop_i in mapping_mirca_sage$crop_mirca) { + for (crop_i in crop_select) { # read in the historical crop data and weather data d_crop <- gaia::input_data( folder_path = file.path(output_dir, "data_processed"), diff --git a/R/yield_shock_projection.R b/R/yield_shock_projection.R index 9250169..41cc741 100644 --- a/R/yield_shock_projection.R +++ b/R/yield_shock_projection.R @@ -11,6 +11,7 @@ #' @param end_year Default = NULL. Integer for the end year of the data #' @param gcam_timestep Default = 5. Integer for the time step of GCAM (Select either 1 or 5 years for GCAM use) #' @param smooth_window Default = 20. Integer for smoothing window in years +#' @param crop_select Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye" #' @param diagnostics Default = TRUE. Logical for performing diagnostic plot #' @param output_dir Default = file.path(getwd(), 'output'). String for output directory #' @returns A data frame of formatted smoothed annual crop yield shocks under climate impacts @@ -24,6 +25,7 @@ yield_shock_projection <- function(use_default_coeff = FALSE, end_year = NULL, gcam_timestep = 5, smooth_window = 20, + crop_select = NULL, diagnostics = TRUE, output_dir = file.path(getwd(), "output")) { message("Starting Step: yield_shock_projection") @@ -40,8 +42,11 @@ yield_shock_projection <- function(use_default_coeff = FALSE, stringsAsFactors = FALSE ) + if(is.null(crop_select)){ + crop_select <- gaia::verify_crop(crop_select = crop_select)$crop_mirca + } - for (crop_i in mapping_mirca_sage$crop_mirca) { + for (crop_i in crop_select) { print(paste(climate_model, climate_scenario, crop_i, sep = " ")) # calculate yield impact for each crop and country diff --git a/data-raw/DATASET.R b/data-raw/DATASET.R index d85e1e8..47b7b84 100644 --- a/data-raw/DATASET.R +++ b/data-raw/DATASET.R @@ -293,17 +293,6 @@ sage <- sage %>% dplyr::select(-V1) -#------------------------------------------------------------------------------- -# FAO Data - Harvested Area -#------------------------------------------------------------------------------- -fao_yield <- gaia::input_data( - folder_path = file.path(model_data.dir, 'data_raw'), - input_file = 'FAO_yield_ha.csv', - skip_number = 0 -) - -fao_yield <- gaia::clean_yield(fao_yield) - #------------------------------------------------------------------------------- # FAO Data - Irrigation Equip #------------------------------------------------------------------------------- @@ -330,38 +319,84 @@ gdp <- gdp %>% dplyr::rename(gdp_pcap_ppp = gdp_pcap_ppp_thous) %>% dplyr::select(-V1) +#------------------------------------------------------------------------------- +# Mapping of FAO crops to MIRCA2000 +#------------------------------------------------------------------------------- + +# Source: supporting information from Portmann, F. T., Siebert, S., & Döll, P. (2010). MIRCA2000—Global monthly irrigated and rainfed crop areas around the year 2000: A new high‐resolution data set for agricultural and hydrological modeling. Global biogeochemical cycles, 24(1). https://doi.org/10.1029/2008GB003435 +# Manually added the FAO Item Code based on the FAOSTAT data https://www.fao.org/faostat/en/#data/QCL, accessed Feb 2025 +# Note that FAOSTAT has updated the crop names/groups since MIRCA2000, so there are few FAO crops listed in Portmann et al., 2010 are not available from the latest FAOSTAT. +fao_to_mirca <- gaia::input_data( + folder_path = file.path(model_data.dir, 'data_ext'), + input_file = 'mirca2000_fao_crop_mapping_edit.csv', + skip_number = 0) %>% + dplyr::filter(!is.na(fao_crop_id)) +# colnames(fao_to_mirca) <- c('fao_crop', paste('crop', sprintf('%02d', 1:26), sep = '')) + +#------------------------------------------------------------------------------- +# FAOSTAT Country ISO mapping +#------------------------------------------------------------------------------- +fao_iso <- gaia::input_data( + folder_path = file.path(model_data.dir, 'data_ext', 'FAO_Production_Crops_Livestock_E_All_Data'), + input_file = 'FAOSTAT_Area_Code.csv', + skip_number = 0) %>% + dplyr::select(country_code = `Country Code`, + country_name = Country, + iso = `ISO3 Code`) %>% + dplyr::mutate(iso = tolower(iso)) %>% + dplyr::distinct() + + +#------------------------------------------------------------------------------- +# FAO Data - Harvested Area +#------------------------------------------------------------------------------- +# fao_yield <- gaia::input_data( +# folder_path = file.path(model_data.dir, 'data_raw'), +# input_file = 'FAO_yield_ha.csv', +# skip_number = 0 +# ) + +# Latest full FAO production data from https://www.fao.org/faostat/en/#data/QCL, accessed Feb 2025 +fao_yield <- gaia::input_data( + folder_path = file.path(model_data.dir, 'data_ext', 'FAO_Production_Crops_Livestock_E_All_Data'), + input_file = 'Production_Crops_Livestock_E_All_Data_NOFLAG.csv', + skip_number = 0 +) + +fao_yield <- gaia::clean_yield(fao_yield = fao_yield, + fao_to_mirca = fao_to_mirca) #------------------------------------------------------------------------------- # MIRCA Crop Mapping #------------------------------------------------------------------------------- crop_mirca <- tibble::tribble( - ~ crop_id, ~ crop, ~ crop_name, - "crop01", "wheat", "wheat", - "crop02", "maize", "maize", - "crop03", "rice", "rice", - "crop04", "barley", NA, - "crop05", "rye", NA, - "crop06", "millet", NA, - "crop07", "sorghum", "sorghum", - "crop08", "soybean", "soybean", - "crop09", "sunflower", "sunflower", - "crop10", "potatoes", "root_tuber", - "crop11", "cassava", "cassava", - "crop12", "sugarcane", "sugarcane", - "crop13", "sugarbeet", "sugarbeet", - "crop14", "oil palm", NA, - "crop15", "rape seed,canola", NA, - "crop16", "groundnuts,peanuts", NA, - "crop17", "pulses", NA, - "crop18", "citrus", NA, - "crop19", "date palm", NA, - "crop20", "grapes,vine", NA, - "crop21", "cotton", "cotton", - "crop22", "cocoa", NA, - "crop23", "coffee", NA, - "crop24", "others perennial", NA, - "crop25", "fodder grasses", NA, - "crop26", "other annual", NA + ~ crop_id, ~ crop, ~ crop_name, ~ crop_sage, + "crop01", "wheat", "wheat", "wheat", + "crop02", "maize", "maize", "maize", + "crop03", "rice", "rice", "rice", + "crop04", "barley", "barley", "barley", + "crop05", "rye", "rye", "rye", + "crop06", "millet", "millet", "millet", + "crop07", "sorghum", "sorghum", "sorghum", + "crop08", "soybean", "soybean", "soybeans", + "crop09", "sunflower", "sunflower", "sunflower", + "crop10", "potatoes", "root_tuber", "potatoes", + "crop11", "cassava", "cassava", "cassava", + "crop12", "sugarcane", "sugarcane", "sugarcane", + "crop13", "sugarbeet", "sugarbeet", "sugarbeets", + "crop14", "oil palm", "oil_palm", NA, + "crop15", "rape seed,canola", "rape_seed", "rapeseed", + "crop16", "groundnuts,peanuts", "groundnuts", "groundnuts", + "crop17", "pulses", "pulses", "pulses", + "crop18", "citrus", "citrus", NA, + "crop19", "date palm", "date_palm", NA, + "crop20", "grapes,vine", "grapes", NA, + "crop21", "cotton", "cotton", "cotton", + "crop22", "cocoa", "cocoa", NA, + "crop23", "coffee", "coffee", NA, + "crop24", "others perennial", "others_perennial", NA, + "crop25", "fodder grasses", "fodder_grasses", NA, + "crop26", "others annual", "others_annual", NA ) @@ -374,24 +409,41 @@ gcam_commod <- tibble::tribble( "biomass", "maize", "Grass", "biomass", "sorghum", "Grass", "biomass", "sugarcane", "Grass", + "biomass", "fodder_grasses", "Grass", "Corn", "maize", "C4", "FiberCrop", "cotton", "", "FodderHerb", "maize", "C4", "FodderHerb", "sorghum", "C4", "Fruits", "rice", "", "Fruits", "wheat", "", + "Fruits", "citrus", "Tree", + "Fruits", "date_palm", "Tree", + "Fruits", "grapes", "", "Legumes", "maize", "", "Legumes", "wheat", "", + "Legumes", "pulses", "", "MiscCrop", "rice", "", "MiscCrop", "wheat", "", + "MiscCrop", "coffee", "Tree", + "MiscCrop", "cocoa", "Tree", + "MiscCrop", "others_perennial", "", + "MiscCrop", "others_annual", "", "NutsSeeds", "rice", "", "NutsSeeds", "wheat", "", + "NutsSeeds", "groundnuts", "", "OilCrop", "soybean", "", + "OilCrop", "sunflower", "", + "OilCrop", "rape_seed", "", + "OilCrop", "groundnuts", "", "OilPalm", "rice", "", - "OilPalm", 'wheat', "", + "OilPalm", "wheat", "", + "OilPalm", "oil_palm", "Tree", "OtherGrain", "rice", "", "OtherGrain", "sorghum", "C4", "OtherGrain", "wheat", "", + "OtherGrain", "barley", "", + "OtherGrain", "rye", "", + "OtherGrain", "millet", "C4", "Rice", "rice", "", "RootTuber", "root_tuber", "", "RootTuber", "potato", "", @@ -542,5 +594,5 @@ usethis::use_data(mapping_country, grid_fao_glu, mapping_fao_glu, mapping_gcam_i mirca_harvest_area, sage, fao_yield, fao_irr_equip, gdp, waldhoff_formula, y_hat, reg_vars, weight_var, n_sig, fit_name, col_scale_region, col_fill_region, theme_basic, - map_country, + map_country, fao_to_mirca, fao_iso, internal = TRUE, overwrite = TRUE ) diff --git a/man/clean_sage.Rd b/man/clean_sage.Rd new file mode 100644 index 0000000..96eabac --- /dev/null +++ b/man/clean_sage.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/crop_calendars.R +\name{clean_sage} +\alias{clean_sage} +\title{clean_sage} +\usage{ +clean_sage(d = NULL) +} +\arguments{ +\item{d}{Default = NULL. data frame for crop calendar} +} +\value{ +A data frame of clean crop calendar +} +\description{ +Clean up the sage crop calendar by reassigning the iso name and +make sure to keep one set of the plant and harvest for each country and crop +} +\keyword{internal} diff --git a/man/clean_yield.Rd b/man/clean_yield.Rd index 701b70e..fdb9cd7 100644 --- a/man/clean_yield.Rd +++ b/man/clean_yield.Rd @@ -4,10 +4,12 @@ \alias{clean_yield} \title{clean_yield} \usage{ -clean_yield(fao_yield = NULL) +clean_yield(fao_yield = NULL, fao_to_mirca = NULL) } \arguments{ \item{fao_yield}{Default = NULL. Data frame for the fao yield table} + +\item{fao_to_mirca}{Default = NULL. Data frame for the fao to mirca crop mapping} } \value{ A data frame of formatted FAO yield data diff --git a/man/crop_calendars.Rd b/man/crop_calendars.Rd index a03b1e0..94822f1 100644 --- a/man/crop_calendars.Rd +++ b/man/crop_calendars.Rd @@ -4,9 +4,17 @@ \alias{crop_calendars} \title{crop_calendars} \usage{ -crop_calendars(output_dir = file.path(getwd(), "output")) +crop_calendars( + crop_calendar_file = NULL, + crop_select = NULL, + output_dir = file.path(getwd(), "output") +) } \arguments{ +\item{crop_calendar_file}{Default = NULL. String for the path of the crop calendar file. If crop_cal is provided, crop_select will be set to crops in crop calendar. User provided crop_calendar_file can include any crops MIRCA2000 crops: "wheat", "maize", "rice", "barley", "rye", "millet", "sorghum", "soybean", "sunflower", "root_tuber", "cassava", "sugarcane", "sugarbeet", "oil_palm", "rape_seed", "groundnuts", "pulses", "citrus", "date_palm", "grapes", "cotton", "cocoa", "coffee", "others_perennial", "fodder_grasses", "others_annual"} + +\item{crop_select}{Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye"} + \item{output_dir}{Default = file.path(getwd(), 'output'). String for output directory} } \value{ diff --git a/man/verify_crop.Rd b/man/verify_crop.Rd new file mode 100644 index 0000000..bafcd51 --- /dev/null +++ b/man/verify_crop.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/crop_calendars.R +\name{verify_crop} +\alias{verify_crop} +\title{verify_crop} +\usage{ +verify_crop( + crop_calendar_file = NULL, + crop_select = c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", + "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat") +) +} +\arguments{ +\item{crop_calendar_file}{Default = NULL. String for the path of the crop calendar file. If crop_cal is provided, crop_select will be set to crops in crop calendar. User provided crop_calendar_file can include any crops MIRCA2000 crops: "wheat", "maize", "rice", "barley", "rye", "millet", "sorghum", "soybean", "sunflower", "root_tuber", "cassava", "sugarcane", "sugarbeet", "oil_palm", "rape_seed", "groundnuts", "pulses", "citrus", "date_palm", "grapes", "cotton", "cocoa", "coffee", "others_perennial", "fodder_grasses", "other_annual"} + +\item{crop_select}{Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye"} +} +\value{ +A data frame of crop name mapping between MIRCA and SAGE +} +\description{ +Identify crops selected by users are available crop types from both SAGE and MIRCA +} +\keyword{internal} diff --git a/man/yield_impact.Rd b/man/yield_impact.Rd index 54c8101..451510a 100644 --- a/man/yield_impact.Rd +++ b/man/yield_impact.Rd @@ -21,6 +21,8 @@ yield_impact( gcam_version = "gcam7", gcam_timestep = 5, gcamdata_dir = NULL, + crop_calendar_file = NULL, + crop_select = NULL, use_default_coeff = FALSE, base_year = 2015, start_year = NULL, @@ -65,6 +67,10 @@ yield_impact( \item{gcamdata_dir}{Default = NULL. String for directory to the gcamdata folder within the specific GCAM version. The gcamdata need to be run with drake to have the CSV outputs beforehand.} +\item{crop_calendar_file}{Default = NULL. String for the path of the crop calendar file. If crop_cal is provided, crop_select will be set to crops in crop calendar. User provided crop_calendar_file can include any crops MIRCA2000 crops: "wheat", "maize", "rice", "barley", "rye", "millet", "sorghum", "soybean", "sunflower", "root_tuber", "cassava", "sugarcane", "sugarbeet", "oil_palm", "rape_seed", "groundnuts", "pulses", "citrus", "date_palm", "grapes", "cotton", "cocoa", "coffee", "others_perennial", "fodder_grasses", "other_annual"} + +\item{crop_select}{Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye"} + \item{use_default_coeff}{Default = FALSE. Binary for using default regression coefficients. Set to TRUE will use the default coefficients instead of calculating coefficients from the historical climate data.} \item{base_year}{Default = 2015. Integer for the base year (for GCAM)} diff --git a/man/yield_regression.Rd b/man/yield_regression.Rd index 89ab201..ef4d5ce 100644 --- a/man/yield_regression.Rd +++ b/man/yield_regression.Rd @@ -6,6 +6,7 @@ \usage{ yield_regression( formula = NULL, + crop_select = NULL, diagnostics = TRUE, output_dir = file.path(getwd(), "output") ) @@ -13,6 +14,8 @@ yield_regression( \arguments{ \item{formula}{Default = NULL. String for regression formula} +\item{crop_select}{Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye"} + \item{diagnostics}{Default = TRUE. Logical for performing diagnostic plot} \item{output_dir}{Default = file.path(getwd(), 'output'). String for output directory} diff --git a/man/yield_shock_projection.Rd b/man/yield_shock_projection.Rd index cd7ee22..0e03636 100644 --- a/man/yield_shock_projection.Rd +++ b/man/yield_shock_projection.Rd @@ -13,6 +13,7 @@ yield_shock_projection( end_year = NULL, gcam_timestep = 5, smooth_window = 20, + crop_select = NULL, diagnostics = TRUE, output_dir = file.path(getwd(), "output") ) @@ -34,6 +35,8 @@ yield_shock_projection( \item{smooth_window}{Default = 20. Integer for smoothing window in years} +\item{crop_select}{Default = NULL. Vector of strings for the selected crops from our database. If NULL, the default crops will be used in the crop calendar: c("cassava", "cotton", "maize", "rice", "root_tuber", "sorghum", "soybean", "sugarbeet", "sugarcane", "sunflower", "wheat"). The additional crops available for selection from our crop calendar database are: "barley", "groundnuts", "millet", "pulses", "rape_seed", "rye"} + \item{diagnostics}{Default = TRUE. Logical for performing diagnostic plot} \item{output_dir}{Default = file.path(getwd(), 'output'). String for output directory} diff --git a/vignettes/vignette.R b/vignettes/vignette.R index d861dea..6dd8d99 100644 --- a/vignettes/vignette.R +++ b/vignettes/vignette.R @@ -14,7 +14,8 @@ knitr::opts_chunk$set(warning = FALSE, message = FALSE) # download_url = 'https://zenodo.org/records/13976521/files/weighted_climate.zip?download=1', # data_dir = output_dir # ) -# + +## ----eval=F------------------------------------------------------------------- # # Path to the folder that holds cropland-area-weighted precipitation and temperature TXT files # # historical climate observation # climate_hist_dir <- file.path(data_dir, 'climate_hist') @@ -36,6 +37,8 @@ knitr::opts_chunk$set(warning = FALSE, message = FALSE) # data_dir = output_dir # ) # + +## ----eval=F------------------------------------------------------------------- # # Path to the precipitation and temperature NetCDF files # # NOTE: Each variable can have more than one file # # projected climate data @@ -201,6 +204,16 @@ knitr::kable(crop_cal_update[1:10], kable_styling(bootstrap_options = "striped", full_width = T, position = 'center') %>% footnote(general = 'This only shows the first 10 lines of the example data.') +## ----eval=F, echo=T----------------------------------------------------------- +# +# # Path to the output folder where you wish to save the outputs. Change it accordingly +# output_dir <- 'gaia_output' +# +# # crop calendars +# crop_cal <- crop_calendars(crop_calendar_file = 'path/to/your/crop/calendar/file', +# output_dir = output_dir) +# + ## ----eval=F, echo=T----------------------------------------------------------- # # # Path to the output folder where you wish to save the outputs. Change it accordingly diff --git a/vignettes/vignette.Rmd b/vignettes/vignette.Rmd index 35818b6..796579f 100644 --- a/vignettes/vignette.Rmd +++ b/vignettes/vignette.Rmd @@ -525,6 +525,19 @@ knitr::kable(crop_cal_update[1:10], footnote(general = 'This only shows the first 10 lines of the example data.') ``` +If you plan to use your own crop calendar data, make sure it follows the format in [Table 3](#table3). Then, simply provide the path to your crop calendar file in the `crop_calendar` function to run and the subsequent steps will use the crop calendar data you provided. + +```{r eval=F, echo=T} + +# Path to the output folder where you wish to save the outputs. Change it accordingly +output_dir <- 'gaia_output' + +# crop calendars +crop_cal <- crop_calendars(crop_calendar_file = 'path/to/your/crop/calendar/file', + output_dir = output_dir) + +``` +
### data_aggregation