Merge branch 'main' of https://github.com/IDEMSInternational/rpicsa

IDEMSInternational · Mar 4, 2024 · 7176d66 · 7176d66
2 parents 06a0cf8 + 65df3ea
commit 7176d66
Show file tree

Hide file tree

Showing 17 changed files with 312 additions and 33 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -9,6 +9,7 @@ export(dekad)
 export(elements_wider)
 export(end_rains)
 export(end_season)
+export(get_extremes)
 export(mean_temperature)
 export(null_to_string)
 export(pentad)
@@ -18,6 +19,7 @@ export(seasonal_rain)
 export(shift_dates)
 export(spells)
 export(start_rains)
+export(summary_temperature)
 export(yday_366)
 importFrom(lubridate,yday)
 importFrom(magrittr,"%>%")

diff --git a/R/assert_column_names.R b/R/assert_column_names.R
@@ -11,4 +11,4 @@ assert_column_names <- function(data, columns) {
     if (length(columns) > 1) stop("Not all columns: ", paste(columns, collapse = ", "), " found in data")
     else stop("Not all columns: ", "'", paste(columns, collapse = ", "), "'", " found in data")
   }
-}
+}
diff --git a/R/crops_definitions.R b/R/crops_definitions.R
@@ -79,6 +79,7 @@ crops_definitions <- function (data, date_time, station = NULL, rain, year = NUL
   } else {
     season_data <- season_data %>% dplyr::select(c(.data[[year]], start_day, end_day))
   }
+  season_data <- stats::na.omit(season_data)
   df <- dplyr::full_join(df, season_data)
   #df <- df %>% dplyr::filter(stats::complete.cases(df))
   if (lubridate::is.Date(df[[start_day]])) 

diff --git a/R/end_rains.R b/R/end_rains.R
@@ -46,11 +46,11 @@ end_rains <- function(data, date_time, station = NULL, year = NULL, rain = NULL,
 
   # Do we have a shifted start doy?
   if (!is.null(s_start_doy)){
-    data <- shift_dates(data = data, date = date_time, s_start_doy = s_start_doy)
+    data <- shift_dates(data = data, date = date_time, s_start_doy = s_start_doy - 1)
     year <- "year"
     doy <- "doy"
-    data[[year]] <- data[["s_doy"]]
-    data[[doy]] <- data[["s_year"]]
+    data[[year]] <- data[["s_year"]]
+    data[[doy]] <- data[["s_doy"]]
   } else {
     # calculate doy, year from date
     if(is.null(year)){#if(!year %in% names(data)) { # do instead of is.null because of epicsawrap. we always read in "year" whether it exists or not.

diff --git a/R/end_season.R b/R/end_season.R
@@ -50,11 +50,11 @@ end_season <- function(data, date_time, station = NULL, year = NULL, rain = NULL
   if (end_day <= start_day) stop("The `end_day` must be after the `start_day`")
   # Do we have a shifted start doy?
   if (!is.null(s_start_doy)){
-    data <- shift_dates(data = data, date = date_time, s_start_doy = s_start_doy)
+    data <- shift_dates(data = data, date = date_time, s_start_doy = s_start_doy - 1)
     year <- "year"
     doy <- "doy"
-    data[[year]] <- data[["s_doy"]]
-    data[[doy]] <- data[["s_year"]]
+    data[[year]] <- data[["s_year"]]
+    data[[doy]] <- data[["s_doy"]]
   } else {
     # calculate doy, year from date
     if(is.null(year)){#if(!year %in% names(data)) { # do instead of is.null because of epicsawrap. we always read in "year" whether it exists or not.
@@ -101,11 +101,11 @@ end_season <- function(data, date_time, station = NULL, year = NULL, rain = NULL
   } else {
     end_of_season <- end_of_season %>%
       dplyr::summarise(end_season_doy = ifelse(is.na(x=dplyr::first(wb)),
-                                               NA,
-                                               dplyr::first(.data[[doy]]))) %>%
-      dplyr::summarise(end_season_date = dplyr::if_else(is.na(x=dplyr::first(wb)),
-                                                        as.Date(NA),
-                                                        dplyr::first(.data[[date_time]])))
+                                           NA,
+                                           dplyr::first(.data[[doy]])),
+                       end_season_date = dplyr::if_else(is.na(x=dplyr::first(wb)),
+                                                   as.Date(NA),
+                                                   dplyr::first(.data[[date_time]])))
   }
   return(end_of_season)
 }
diff --git a/R/get_extremes.R b/R/get_extremes.R
@@ -0,0 +1,41 @@
+#' Get Extreme Data
+#' 
+#' This function identifies extreme values in a specified element (column) of a data frame. It can operate in two modes: percentile-based and threshold-based.
+#' 
+#' @param data A data frame containing the data to be analysed.
+#' @param element The name of the column in 'data' for which extremes are to be found.
+#' @param type A character string specifying the mode of operation. It can be either `"percentile"` or `"threshold"`. Here, `"percentile"` identifies values above a certain percentile (e.g., 95th percentile); `"threshold"` identifies values above a specific threshold value.
+#' @param value A numeric value specifying the percentile or threshold, depending on the 'type' parameter. If `type == "percentile"`, `value` is the percentile (e.g., 95 for 95th percentile). If `type == "threshold"`, `value` is the threshold value (e.g., 50 mm for rainfall).
+#' @param direction A character string specifying the direction for the operation. It can be either `"greater"` or `"less"`.
+#' 
+#' @export
+#' @return A filtered data frame where the `element` values are considered extreme based on the specified `type` and `value`.
+#' 
+#' @examples
+#' # data(daily_niger)
+#' # filtered_data <- get_extremes(data = daily_niger, element = "rain", type = "threshold", value = 50)
+get_extremes <- function(data, element, type = c("percentile", "threshold"), value = 95, direction = c("greater", "less")) {
+  type <- match.arg(type)
+  direction <- match.arg(direction)
+
+  # Check if element exists in data
+  if (!element %in% names(data)) {
+    stop("Element column not found in the data frame.")
+  }
+
+  # Determine the threshold value based on the specified type
+  if (type == "percentile") {
+    threshold_value <- quantile(data[[element]], probs = value/100, na.rm = TRUE)
+  } else {
+    threshold_value <- value
+  }
+
+  # Filter data based on the threshold and direction
+  if (direction == "greater") {
+    extreme_data <- data %>% dplyr::filter(.data[[element]] > threshold_value)
+  } else {
+    extreme_data <- data %>% dplyr::filter(.data[[element]] < threshold_value)
+  }
+
+  return(extreme_data)
+}
diff --git a/R/mean_temperatures.R b/R/mean_temperatures.R
@@ -0,0 +1,41 @@
+#' Summary Temperature (Month or annually)
+#' @description Returns a summary data frame giving either the mean of the minimum and/or maximum temperatures each year from 1 Jan to 31 Dec, or by year and month.
+#' 
+#' @param data The data.frame to calculate from.
+#' @param date_time \code{\link[base]{Date}} The name of the date column in \code{data}.
+#' @param tmin \code{character(1)} The name of the minimum temperature column in \code{data} to apply the function to.
+#' @param tmax \code{character(1)} The name of the maximum temperature column in \code{data} to apply the function to.
+#' @param year \code{character(1)} The name of the year column in \code{data}. If \code{NULL} it will be created using \code{lubridate::year(data[[date_time]])}.
+#' @param month \code{character(1)} The name of the month column in \code{data}. If \code{NULL} it will be created using \code{lubridate::month(data[[date_time]])}.
+#' @param station \code{character(1)} The name of the station column in \code{data}, if the data are for multiple station.
+#' @param to \code{character(1)} Default `annual`. The period of time to calculate the mean temperature columns over (options are `annual` or `monthly`).
+#' @param summaries \code{character} The summaries to display. Options are `"mean"`, `"max"`, `"min"`.
+#' @param na_rm \code{logical(1)}. Should missing values (including \code{NaN}) be removed?
+#' @param na_prop \code{integer(1)} Max proportion of missing values allowed
+#' @param na_n \code{integer(1)} Max number of missing values allowed
+#' @param na_consec \code{integer(1)} Max number of consecutive missing values allowed
+#' @param na_n_non \code{integer(1)} Min number of non-missing values required
+#'
+#' @return A data.frame with mean summaries for each year or year and month for the minimum daily temperature and/or the maximum daily temperature.
+#' @export
+#'
+#' @examples #daily_niger_1 <- daily_niger %>% filter(year < 1950)
+#' #mean_temperature(data = daily_niger_1, date_time  = "date", station = "station_name",
+#' #             tmax = "tmax", tmin = "tmin", na_prop = 0.05)
+
+mean_temperature <- function(data, date_time, tmin = NULL, tmax = NULL, year = NULL,
+                                month = NULL, station = NULL, to = c("annual", "monthly"),
+                                summaries = c("mean", "min", "max"), na_rm = FALSE,
+                                na_prop = NULL, na_n = NULL, na_consec = NULL, na_n_non = NULL) {
+  to <- match.arg(to)
+  summaries_all <- c()
+  if ("mean" %in% summaries){ summaries_all <- cbind(summaries_all, mean = "mean")}
+  if ("min" %in% summaries){ summaries_all <- cbind(summaries_all, min = "min")}
+  if ("max" %in% summaries){ summaries_all <- cbind(summaries_all, max = "max")}
+  climatic_summary(data = data, date_time = date_time, 
+                   station = station, elements = c(tmin, tmax),
+                   year = year, month = month, to = to, 
+                   summaries = c(mean = "mean"), na_rm = na_rm, 
+                   na_prop = na_prop, na_n = na_n, 
+                   na_n_non = na_n_non, names = "{.fn}_{.col}")
+}
diff --git a/R/seasonal_length.R b/R/seasonal_length.R
@@ -1,8 +1,45 @@
 #' Length of the season
 #' @description Number of days between start of the rains and end of the season
 #'
-#' @inheritParams seasonal_rain
-#'
+# @inheritParams seasonal_rain
+#' @param summary_data Summary data frame containing the `start_date` and `end_date` variables. These variables are calculated from start of rains and end of season functions.
+#' If `NULL`, `start_date` and `end_date` are calculated from the `start_of_rains` and `end_of_season` functions respectively.
+#' @param start_date \code{character(1)} The name of the start of rains column in \code{summary_data}. If \code{NULL} it will be created using the \code{start_of_rains} function.
+#' @param end_date \code{character(1)} The name of the end of season column in \code{summary_data}. If \code{NULL} it will be created using the \code{end_of_seasons} function.
+#' @param data The daily data.frame to calculate rainfall from.
+#' @param date_time \code{\link[base]{Date}} The name of the date column in \code{data}.
+#' @param station \code{character(1)} The name of the station column in \code{data}, if the data are for multiple station.
+#' @param year \code{character(1)} The name of the year column in \code{data}. If \code{NULL} it will be created using \code{lubridate::year(data[[date_time]])}.
+#' @param rain \code{character(1)} The name of the rainfall column in \code{data} to apply the function to.
+#' @param doy \code{character(1)} The name of the day of year column in \code{data} to apply the function to. If \code{NULL} it will be created using the \code{date_time} variable.
+#' @param threshold \code{numerical(1)} threshold value for amount (mm) of rainfall in order to count it as a rainy day.
+#' @param sor_start_day \code{numerical(1)} The first day to calculate from in the year (1-366).
+#' @param sor_end_day \code{numerical(1)} The last day to calculate to in the year (1-366).
+#' @param sor_total_rainfall \code{logical(1)} default `TRUE`. Start of the rains to be defined by the total or proportion of rainfall over a period.
+#' @param sor_over_days \code{numerical(1)} Only works if `total_rainfall = TRUE`. This is the number of days to total the rainfall over.
+#' @param sor_amount_rain \code{numerical(1)} If `total_rainfall = TRUE` and `proportion = FALSE`, the amount of rainfall to expect over the period defined in `over_days`. 
+#' @param sor_proportion \code{logical(1)} default `FALSE`, only valid if `total_rainfall = TRUE`. If `TRUE`, Start of the rains to be defined by proportion of rainfall over a period. This proportion is given in `prob_rain_day`. Otherwise, defined by the amount of rainfall over a period. The amount is given in `amount_rain`.
+#' @param sor_prob_rain_day \code{numerical(1)} Only works if `total_rainfall = TRUE` and `proportion = TRUE` This is the number 
+#' @param sor_number_rain_days \code{logical(1)} default `FALSE`. If `TRUE`, define start of the rains by the number of rainy days (`min_rain_days`) over a period. The period is given in days in the `rain_day_interval` parameter.
+#' @param sor_min_rain_days \code{numerical(1)} Only if `number_rain_days = TRUE`. This is the minimum number of rainy days to define start of rains in a given period. The period is given in days in the `rain_day_interval` parameter.
+#' @param sor_rain_day_interval \code{numerical(1)} Only if `number_rain_days = TRUE`, the interval in days that the `number_rain_days` is defined in.
+#' @param sor_dry_spell \code{logical(1)} default `FALSE`. If `TRUE`, define start of the rains by a maximum number of dry days (`spell_max_dry_days`) over a given period of days (`spell_interval`).
+#' @param sor_spell_max_dry_days \code{numerical(1)} Only if `dry_spell = TRUE`. This is the maximum number of dry days to define start of rains in a given period. The period is given in days in the `spell_interval` parameter.
+#' @param sor_spell_interval \code{numerical(1)} Only if `dry_spell = TRUE`, the interval in days that the `dry_spell` is defined in.
+#' @param sor_dry_period \code{logical(1)} default `FALSE`. If `TRUE`, define start of the rains by the maximum rain and maximum dry days in a given interval. The maximum rainfall amount is given in the `max_rain` parameter, the maximum dry days is given in the `period_max_dry_days` parameter, and the interval length is given in the `period_interval` parameter. 
+#' @param sor_max_rain \code{numerical(1)} Only if `dry_period = TRUE`, the maximum rainfall to occur in a given period.
+#' @param sor_period_max_dry_days \code{numerical(1)} Only if `dry_period = TRUE`. the maximum period of dry days to occur in a given period.
+#' @param sor_period_interval \code{numerical(1)} Only if `dry_period = TRUE`, the interval in days that the `dry_period` is defined in.
+#' @param end_type \code{character(1)} If `is.null(end_date)`, `end_type` is whether the end of seasons or end of rains is used. Options are c(`"season", "rains"`), default `"season"`.
+#' @param eos_start_day \code{numerical(1)} The first day to calculate from in the year (1-366).
+#' @param eos_end_day \code{numerical(1)} The last day to calculate to in the year (1-366).
+#' @param eor_interval_length \code{numerical(1)} Number of days for the minimum rainfall to fall in.
+#' @param eor_min_rainfall \code{numerical(1)} Minimum amount of rainfall to occur on the set of days  defined in `interval_length`.
+#' @param eos_capacity \code{numerical(1)} Water capacity of the soil (default `60`).
+#' @param eos_water_balance_max \code{numerical(1)} Maximum water balance value (default `0.5`).
+#' @param eos_evaporation \code{character(1)} Whether to give evaporation as a value or variable. Default `"value"`.
+#' @param eos_evaporation_value \code{numerical(1)} If `evaporation = "value"`, the numerical value of amount of evaporation per day (default `5`).
+#' @param eos_evaporation_variable \code{character(1)} If `evaporation = "variable"`, the variable in `data` that corresponds to the evaporation column.
 #' @return A data.frame with length of rainfall season for each year in the specified season (between start of the rains and end of season).
 #' @export
 #'

diff --git a/R/seasonal_rain.R b/R/seasonal_rain.R
@@ -22,6 +22,7 @@
 #' @param na_n \code{integer(1)} Max number of missing values allowed
 #' @param na_consec \code{integer(1)} Max number of consecutive missing values allowed
 #' @param na_n_non \code{integer(1)} Min number of non-missing values required
+#' @param s_start_doy The starting day of the year (DOY) for the shift.
 #' @param sor_start_day \code{numerical(1)} The first day to calculate from in the year (1-366).
 #' @param sor_end_day \code{numerical(1)} The last day to calculate to in the year (1-366).
 #' @param sor_total_rainfall \code{logical(1)} default `TRUE`. Start of the rains to be defined by the total or proportion of rainfall over a period.
@@ -82,7 +83,8 @@ seasonal_rain <- function (summary_data = NULL, start_date = NULL, end_date = NU
                            data, date_time, year = NULL, station = NULL, doy = NULL, 
                            rain = NULL, total_rain = TRUE, n_rain = TRUE, rain_day = 0.85, 
                            na_rm = FALSE, na_prop = NULL, na_n = NULL, na_consec = NULL, 
-                           na_n_non = NULL, threshold = 0.85, sor_start_day = 1, sor_end_day = 366, 
+                           na_n_non = NULL, threshold = 0.85, sor_start_day = 1, sor_end_day = 366,
+                           s_start_doy = NULL,
                            sor_total_rainfall = TRUE, sor_over_days = 1, sor_amount_rain = 20, 
                            sor_proportion = FALSE, sor_prob_rain_day = 0.8, sor_number_rain_days = FALSE, 
                            sor_min_rain_days = 1, sor_rain_day_interval = 2, sor_dry_spell = FALSE, 
@@ -143,6 +145,15 @@ seasonal_rain <- function (summary_data = NULL, start_date = NULL, end_date = NU
   if (!total_rain && !n_rain) {
     stop("No summaries selected. At least one of\n         'total_rain' or 'n_rain' must be TRUE.")
   }
+
+  if(!is.null(s_start_doy)){ # any(grepl("-", summary_data[[year]]))){
+    data <- shift_dates(data = data, date = "date", s_start_doy = s_start_doy - 1)
+    year <- "year"
+    doy <- "doy"
+    data[[year]] <- data[["s_year"]]
+    data[[doy]] <- data[["s_doy"]]
+  }
+
   summary_data <- dplyr::full_join(data %>% dplyr::select(c({{ station }}, {{ year }}, {{ date_time }}, 
                                                             {{ doy }}, {{ rain }})), summary_data)
   summary_data <- summary_data %>% dplyr::group_by(.data[[station]], .data[[year]])

diff --git a/R/start_rains.R b/R/start_rains.R
@@ -85,7 +85,7 @@ start_rains <- function(data, date_time, station = NULL, year = NULL, rain = NUL
 
   # Do we have a shifted start doy?
   if (!is.null(s_start_doy)){
-    data <- shift_dates(data = data, date = date_time, s_start_doy = s_start_doy)
+    data <- shift_dates(data = data, date = date_time, s_start_doy = s_start_doy - 1)
     year <- "year"
     doy <- "doy"
     data[[doy]] <- data[["s_doy"]]
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,4 +11,4 @@ assert_column_names <- function(data, columns) { @@
         if (length(columns) > 1) stop("Not all columns: ", paste(columns, collapse = ", "), " found in data")
         else stop("Not all columns: ", "'", paste(columns, collapse = ", "), "'", " found in data")
       }
-    }
+    }