Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
lilyclements committed Mar 4, 2024
2 parents 06a0cf8 + 65df3ea commit 7176d66
Show file tree
Hide file tree
Showing 17 changed files with 312 additions and 33 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export(dekad)
export(elements_wider)
export(end_rains)
export(end_season)
export(get_extremes)
export(mean_temperature)
export(null_to_string)
export(pentad)
Expand All @@ -18,6 +19,7 @@ export(seasonal_rain)
export(shift_dates)
export(spells)
export(start_rains)
export(summary_temperature)
export(yday_366)
importFrom(lubridate,yday)
importFrom(magrittr,"%>%")
Expand Down
2 changes: 1 addition & 1 deletion R/assert_column_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ assert_column_names <- function(data, columns) {
if (length(columns) > 1) stop("Not all columns: ", paste(columns, collapse = ", "), " found in data")
else stop("Not all columns: ", "'", paste(columns, collapse = ", "), "'", " found in data")
}
}
}
1 change: 1 addition & 0 deletions R/crops_definitions.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ crops_definitions <- function (data, date_time, station = NULL, rain, year = NUL
} else {
season_data <- season_data %>% dplyr::select(c(.data[[year]], start_day, end_day))
}
season_data <- stats::na.omit(season_data)
df <- dplyr::full_join(df, season_data)
#df <- df %>% dplyr::filter(stats::complete.cases(df))
if (lubridate::is.Date(df[[start_day]]))
Expand Down
6 changes: 3 additions & 3 deletions R/end_rains.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ end_rains <- function(data, date_time, station = NULL, year = NULL, rain = NULL,

# Do we have a shifted start doy?
if (!is.null(s_start_doy)){
data <- shift_dates(data = data, date = date_time, s_start_doy = s_start_doy)
data <- shift_dates(data = data, date = date_time, s_start_doy = s_start_doy - 1)
year <- "year"
doy <- "doy"
data[[year]] <- data[["s_doy"]]
data[[doy]] <- data[["s_year"]]
data[[year]] <- data[["s_year"]]
data[[doy]] <- data[["s_doy"]]
} else {
# calculate doy, year from date
if(is.null(year)){#if(!year %in% names(data)) { # do instead of is.null because of epicsawrap. we always read in "year" whether it exists or not.
Expand Down
16 changes: 8 additions & 8 deletions R/end_season.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ end_season <- function(data, date_time, station = NULL, year = NULL, rain = NULL
if (end_day <= start_day) stop("The `end_day` must be after the `start_day`")
# Do we have a shifted start doy?
if (!is.null(s_start_doy)){
data <- shift_dates(data = data, date = date_time, s_start_doy = s_start_doy)
data <- shift_dates(data = data, date = date_time, s_start_doy = s_start_doy - 1)
year <- "year"
doy <- "doy"
data[[year]] <- data[["s_doy"]]
data[[doy]] <- data[["s_year"]]
data[[year]] <- data[["s_year"]]
data[[doy]] <- data[["s_doy"]]
} else {
# calculate doy, year from date
if(is.null(year)){#if(!year %in% names(data)) { # do instead of is.null because of epicsawrap. we always read in "year" whether it exists or not.
Expand Down Expand Up @@ -101,11 +101,11 @@ end_season <- function(data, date_time, station = NULL, year = NULL, rain = NULL
} else {
end_of_season <- end_of_season %>%
dplyr::summarise(end_season_doy = ifelse(is.na(x=dplyr::first(wb)),
NA,
dplyr::first(.data[[doy]]))) %>%
dplyr::summarise(end_season_date = dplyr::if_else(is.na(x=dplyr::first(wb)),
as.Date(NA),
dplyr::first(.data[[date_time]])))
NA,
dplyr::first(.data[[doy]])),
end_season_date = dplyr::if_else(is.na(x=dplyr::first(wb)),
as.Date(NA),
dplyr::first(.data[[date_time]])))
}
return(end_of_season)
}
41 changes: 41 additions & 0 deletions R/get_extremes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#' Get Extreme Data
#'
#' This function identifies extreme values in a specified element (column) of a data frame. It can operate in two modes: percentile-based and threshold-based.
#'
#' @param data A data frame containing the data to be analysed.
#' @param element The name of the column in 'data' for which extremes are to be found.
#' @param type A character string specifying the mode of operation. It can be either `"percentile"` or `"threshold"`. Here, `"percentile"` identifies values above a certain percentile (e.g., 95th percentile); `"threshold"` identifies values above a specific threshold value.
#' @param value A numeric value specifying the percentile or threshold, depending on the 'type' parameter. If `type == "percentile"`, `value` is the percentile (e.g., 95 for 95th percentile). If `type == "threshold"`, `value` is the threshold value (e.g., 50 mm for rainfall).
#' @param direction A character string specifying the direction for the operation. It can be either `"greater"` or `"less"`.
#'
#' @export
#' @return A filtered data frame where the `element` values are considered extreme based on the specified `type` and `value`.
#'
#' @examples
#' # data(daily_niger)
#' # filtered_data <- get_extremes(data = daily_niger, element = "rain", type = "threshold", value = 50)
get_extremes <- function(data, element, type = c("percentile", "threshold"), value = 95, direction = c("greater", "less")) {
type <- match.arg(type)
direction <- match.arg(direction)

# Check if element exists in data
if (!element %in% names(data)) {
stop("Element column not found in the data frame.")
}

# Determine the threshold value based on the specified type
if (type == "percentile") {
threshold_value <- quantile(data[[element]], probs = value/100, na.rm = TRUE)
} else {
threshold_value <- value
}

# Filter data based on the threshold and direction
if (direction == "greater") {
extreme_data <- data %>% dplyr::filter(.data[[element]] > threshold_value)
} else {
extreme_data <- data %>% dplyr::filter(.data[[element]] < threshold_value)
}

return(extreme_data)
}
41 changes: 41 additions & 0 deletions R/mean_temperatures.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#' Summary Temperature (Month or annually)
#' @description Returns a summary data frame giving either the mean of the minimum and/or maximum temperatures each year from 1 Jan to 31 Dec, or by year and month.
#'
#' @param data The data.frame to calculate from.
#' @param date_time \code{\link[base]{Date}} The name of the date column in \code{data}.
#' @param tmin \code{character(1)} The name of the minimum temperature column in \code{data} to apply the function to.
#' @param tmax \code{character(1)} The name of the maximum temperature column in \code{data} to apply the function to.
#' @param year \code{character(1)} The name of the year column in \code{data}. If \code{NULL} it will be created using \code{lubridate::year(data[[date_time]])}.
#' @param month \code{character(1)} The name of the month column in \code{data}. If \code{NULL} it will be created using \code{lubridate::month(data[[date_time]])}.
#' @param station \code{character(1)} The name of the station column in \code{data}, if the data are for multiple station.
#' @param to \code{character(1)} Default `annual`. The period of time to calculate the mean temperature columns over (options are `annual` or `monthly`).
#' @param summaries \code{character} The summaries to display. Options are `"mean"`, `"max"`, `"min"`.
#' @param na_rm \code{logical(1)}. Should missing values (including \code{NaN}) be removed?
#' @param na_prop \code{integer(1)} Max proportion of missing values allowed
#' @param na_n \code{integer(1)} Max number of missing values allowed
#' @param na_consec \code{integer(1)} Max number of consecutive missing values allowed
#' @param na_n_non \code{integer(1)} Min number of non-missing values required
#'
#' @return A data.frame with mean summaries for each year or year and month for the minimum daily temperature and/or the maximum daily temperature.
#' @export
#'
#' @examples #daily_niger_1 <- daily_niger %>% filter(year < 1950)
#' #mean_temperature(data = daily_niger_1, date_time = "date", station = "station_name",
#' # tmax = "tmax", tmin = "tmin", na_prop = 0.05)

mean_temperature <- function(data, date_time, tmin = NULL, tmax = NULL, year = NULL,
month = NULL, station = NULL, to = c("annual", "monthly"),
summaries = c("mean", "min", "max"), na_rm = FALSE,
na_prop = NULL, na_n = NULL, na_consec = NULL, na_n_non = NULL) {
to <- match.arg(to)
summaries_all <- c()
if ("mean" %in% summaries){ summaries_all <- cbind(summaries_all, mean = "mean")}
if ("min" %in% summaries){ summaries_all <- cbind(summaries_all, min = "min")}
if ("max" %in% summaries){ summaries_all <- cbind(summaries_all, max = "max")}
climatic_summary(data = data, date_time = date_time,
station = station, elements = c(tmin, tmax),
year = year, month = month, to = to,
summaries = c(mean = "mean"), na_rm = na_rm,
na_prop = na_prop, na_n = na_n,
na_n_non = na_n_non, names = "{.fn}_{.col}")
}
41 changes: 39 additions & 2 deletions R/seasonal_length.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,45 @@
#' Length of the season
#' @description Number of days between start of the rains and end of the season
#'
#' @inheritParams seasonal_rain
#'
# @inheritParams seasonal_rain
#' @param summary_data Summary data frame containing the `start_date` and `end_date` variables. These variables are calculated from start of rains and end of season functions.
#' If `NULL`, `start_date` and `end_date` are calculated from the `start_of_rains` and `end_of_season` functions respectively.
#' @param start_date \code{character(1)} The name of the start of rains column in \code{summary_data}. If \code{NULL} it will be created using the \code{start_of_rains} function.
#' @param end_date \code{character(1)} The name of the end of season column in \code{summary_data}. If \code{NULL} it will be created using the \code{end_of_seasons} function.
#' @param data The daily data.frame to calculate rainfall from.
#' @param date_time \code{\link[base]{Date}} The name of the date column in \code{data}.
#' @param station \code{character(1)} The name of the station column in \code{data}, if the data are for multiple station.
#' @param year \code{character(1)} The name of the year column in \code{data}. If \code{NULL} it will be created using \code{lubridate::year(data[[date_time]])}.
#' @param rain \code{character(1)} The name of the rainfall column in \code{data} to apply the function to.
#' @param doy \code{character(1)} The name of the day of year column in \code{data} to apply the function to. If \code{NULL} it will be created using the \code{date_time} variable.
#' @param threshold \code{numerical(1)} threshold value for amount (mm) of rainfall in order to count it as a rainy day.
#' @param sor_start_day \code{numerical(1)} The first day to calculate from in the year (1-366).
#' @param sor_end_day \code{numerical(1)} The last day to calculate to in the year (1-366).
#' @param sor_total_rainfall \code{logical(1)} default `TRUE`. Start of the rains to be defined by the total or proportion of rainfall over a period.
#' @param sor_over_days \code{numerical(1)} Only works if `total_rainfall = TRUE`. This is the number of days to total the rainfall over.
#' @param sor_amount_rain \code{numerical(1)} If `total_rainfall = TRUE` and `proportion = FALSE`, the amount of rainfall to expect over the period defined in `over_days`.
#' @param sor_proportion \code{logical(1)} default `FALSE`, only valid if `total_rainfall = TRUE`. If `TRUE`, Start of the rains to be defined by proportion of rainfall over a period. This proportion is given in `prob_rain_day`. Otherwise, defined by the amount of rainfall over a period. The amount is given in `amount_rain`.
#' @param sor_prob_rain_day \code{numerical(1)} Only works if `total_rainfall = TRUE` and `proportion = TRUE` This is the number
#' @param sor_number_rain_days \code{logical(1)} default `FALSE`. If `TRUE`, define start of the rains by the number of rainy days (`min_rain_days`) over a period. The period is given in days in the `rain_day_interval` parameter.
#' @param sor_min_rain_days \code{numerical(1)} Only if `number_rain_days = TRUE`. This is the minimum number of rainy days to define start of rains in a given period. The period is given in days in the `rain_day_interval` parameter.
#' @param sor_rain_day_interval \code{numerical(1)} Only if `number_rain_days = TRUE`, the interval in days that the `number_rain_days` is defined in.
#' @param sor_dry_spell \code{logical(1)} default `FALSE`. If `TRUE`, define start of the rains by a maximum number of dry days (`spell_max_dry_days`) over a given period of days (`spell_interval`).
#' @param sor_spell_max_dry_days \code{numerical(1)} Only if `dry_spell = TRUE`. This is the maximum number of dry days to define start of rains in a given period. The period is given in days in the `spell_interval` parameter.
#' @param sor_spell_interval \code{numerical(1)} Only if `dry_spell = TRUE`, the interval in days that the `dry_spell` is defined in.
#' @param sor_dry_period \code{logical(1)} default `FALSE`. If `TRUE`, define start of the rains by the maximum rain and maximum dry days in a given interval. The maximum rainfall amount is given in the `max_rain` parameter, the maximum dry days is given in the `period_max_dry_days` parameter, and the interval length is given in the `period_interval` parameter.
#' @param sor_max_rain \code{numerical(1)} Only if `dry_period = TRUE`, the maximum rainfall to occur in a given period.
#' @param sor_period_max_dry_days \code{numerical(1)} Only if `dry_period = TRUE`. the maximum period of dry days to occur in a given period.
#' @param sor_period_interval \code{numerical(1)} Only if `dry_period = TRUE`, the interval in days that the `dry_period` is defined in.
#' @param end_type \code{character(1)} If `is.null(end_date)`, `end_type` is whether the end of seasons or end of rains is used. Options are c(`"season", "rains"`), default `"season"`.
#' @param eos_start_day \code{numerical(1)} The first day to calculate from in the year (1-366).
#' @param eos_end_day \code{numerical(1)} The last day to calculate to in the year (1-366).
#' @param eor_interval_length \code{numerical(1)} Number of days for the minimum rainfall to fall in.
#' @param eor_min_rainfall \code{numerical(1)} Minimum amount of rainfall to occur on the set of days defined in `interval_length`.
#' @param eos_capacity \code{numerical(1)} Water capacity of the soil (default `60`).
#' @param eos_water_balance_max \code{numerical(1)} Maximum water balance value (default `0.5`).
#' @param eos_evaporation \code{character(1)} Whether to give evaporation as a value or variable. Default `"value"`.
#' @param eos_evaporation_value \code{numerical(1)} If `evaporation = "value"`, the numerical value of amount of evaporation per day (default `5`).
#' @param eos_evaporation_variable \code{character(1)} If `evaporation = "variable"`, the variable in `data` that corresponds to the evaporation column.
#' @return A data.frame with length of rainfall season for each year in the specified season (between start of the rains and end of season).
#' @export
#'
Expand Down
13 changes: 12 additions & 1 deletion R/seasonal_rain.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#' @param na_n \code{integer(1)} Max number of missing values allowed
#' @param na_consec \code{integer(1)} Max number of consecutive missing values allowed
#' @param na_n_non \code{integer(1)} Min number of non-missing values required
#' @param s_start_doy The starting day of the year (DOY) for the shift.
#' @param sor_start_day \code{numerical(1)} The first day to calculate from in the year (1-366).
#' @param sor_end_day \code{numerical(1)} The last day to calculate to in the year (1-366).
#' @param sor_total_rainfall \code{logical(1)} default `TRUE`. Start of the rains to be defined by the total or proportion of rainfall over a period.
Expand Down Expand Up @@ -82,7 +83,8 @@ seasonal_rain <- function (summary_data = NULL, start_date = NULL, end_date = NU
data, date_time, year = NULL, station = NULL, doy = NULL,
rain = NULL, total_rain = TRUE, n_rain = TRUE, rain_day = 0.85,
na_rm = FALSE, na_prop = NULL, na_n = NULL, na_consec = NULL,
na_n_non = NULL, threshold = 0.85, sor_start_day = 1, sor_end_day = 366,
na_n_non = NULL, threshold = 0.85, sor_start_day = 1, sor_end_day = 366,
s_start_doy = NULL,
sor_total_rainfall = TRUE, sor_over_days = 1, sor_amount_rain = 20,
sor_proportion = FALSE, sor_prob_rain_day = 0.8, sor_number_rain_days = FALSE,
sor_min_rain_days = 1, sor_rain_day_interval = 2, sor_dry_spell = FALSE,
Expand Down Expand Up @@ -143,6 +145,15 @@ seasonal_rain <- function (summary_data = NULL, start_date = NULL, end_date = NU
if (!total_rain && !n_rain) {
stop("No summaries selected. At least one of\n 'total_rain' or 'n_rain' must be TRUE.")
}

if(!is.null(s_start_doy)){ # any(grepl("-", summary_data[[year]]))){
data <- shift_dates(data = data, date = "date", s_start_doy = s_start_doy - 1)
year <- "year"
doy <- "doy"
data[[year]] <- data[["s_year"]]
data[[doy]] <- data[["s_doy"]]
}

summary_data <- dplyr::full_join(data %>% dplyr::select(c({{ station }}, {{ year }}, {{ date_time }},
{{ doy }}, {{ rain }})), summary_data)
summary_data <- summary_data %>% dplyr::group_by(.data[[station]], .data[[year]])
Expand Down
2 changes: 1 addition & 1 deletion R/start_rains.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ start_rains <- function(data, date_time, station = NULL, year = NULL, rain = NUL

# Do we have a shifted start doy?
if (!is.null(s_start_doy)){
data <- shift_dates(data = data, date = date_time, s_start_doy = s_start_doy)
data <- shift_dates(data = data, date = date_time, s_start_doy = s_start_doy - 1)
year <- "year"
doy <- "doy"
data[[doy]] <- data[["s_doy"]]
Expand Down
Loading

0 comments on commit 7176d66

Please sign in to comment.