From 5046c0928ef18ef9c6233f701a6ebf33e44dd6af Mon Sep 17 00:00:00 2001 From: Daniel Schlaepfer Date: Mon, 23 Oct 2023 13:22:50 -0400 Subject: [PATCH 1/7] New `upgrade_weatherDF()` adds requested weather columns to a data frame. --- NAMESPACE | 1 + NEWS.md | 3 ++ R/A_swGenericMethods.R | 2 +- R/D_swWeatherData.R | 36 +++++++++++++++++-- man/sw_upgrade.Rd | 19 +++++++++- .../test_Upgrade_rSOILWAT_S4_classes.R | 13 ++++++- 6 files changed, 68 insertions(+), 6 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8b07e24c..6eca04c9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -148,6 +148,7 @@ export(swrc_vwc_to_swp) export(time_columns) export(update_biomass) export(update_requested_years) +export(upgrade_weatherDF) export(upgrade_weatherHistory) export(weatherGenerator_dataColumns) export(weatherHistory) diff --git a/NEWS.md b/NEWS.md index 8b9fa2ce..b7dfce1f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,9 @@ memory leaks on error (#239; @dschlaep, @N1ckP3rsl3y). * Code no longer requires (unused) `pcg` header (@dschlaep). +## New features +* New `upgrade_weatherDF()` adds requested weather columns to a data frame. + # rSOILWAT2 v6.0.2 * This version produces the same output as the previous version. diff --git a/R/A_swGenericMethods.R b/R/A_swGenericMethods.R index 88fa20ac..9d403ca4 100644 --- a/R/A_swGenericMethods.R +++ b/R/A_swGenericMethods.R @@ -230,7 +230,7 @@ format_timestamp <- function(object) { #' `"SWA"` added as `outkey` 8 for a new total of 30 #' #' @examples -#' x <- sw_upgrade(rSOILWAT2::sw_exampleData, verbose = TRUE) +#' x <- sw_upgrade(rSOILWAT2::sw_exampleData, verbose = TRUE) #' #' @md #' @exportMethod sw_upgrade diff --git a/R/D_swWeatherData.R b/R/D_swWeatherData.R index 540a7a71..38c07ddc 100644 --- a/R/D_swWeatherData.R +++ b/R/D_swWeatherData.R @@ -188,12 +188,42 @@ swWeatherData <- function(...) { do.call("new", args = c("swWeatherData", dots[dns %in% sns])) } +#' @param weatherDF A data frame with weather variables. +#' @param template_weatherColumns A vector with requested weather variables. +#' +#' @return For [upgrade_weatherDF()]: +#' an updated `weatherDF` with requested columns. +#' +#' @examples +#' upgrade_weatherDF( +#' data.frame(DOY = 1:2, Tmax_C = runif(2), dummy = runif(2)) +#' ) +#' +#' @md +#' @rdname sw_upgrade +#' @export +upgrade_weatherDF <- function( + weatherDF, + template_weatherColumns = c("Year", "DOY", weather_dataColumns()) +) { + template_data <- as.data.frame( + array( + dim = c(nrow(weatherDF), length(template_weatherColumns)), + dimnames = list(NULL, template_weatherColumns) + ) + ) + + cns <- intersect(template_weatherColumns, colnames(weatherDF)) + if (length(cns) < 1L) stop("Required weather variables not found.") + template_data[, cns] <- weatherDF[, cns] + template_data +} upgrade_swWeatherData <- function(data, year, template = new("swWeatherData")) { - stopifnot(colnames(data) %in% colnames(template@data)) template@year <- as.integer(year) - template@data <- template@data[seq_len(nrow(data)), , drop = FALSE] - template@data[, colnames(data)] <- data + template@data <- data.matrix( + upgrade_weatherDF(data, c("DOY", weather_dataColumns())) + ) template } diff --git a/man/sw_upgrade.Rd b/man/sw_upgrade.Rd index feb4de2a..716117c3 100644 --- a/man/sw_upgrade.Rd +++ b/man/sw_upgrade.Rd @@ -8,6 +8,7 @@ \alias{sw_upgrade,swFiles-method} \alias{sw_upgrade,swMonthlyScalingParams-method} \alias{sw_upgrade,swWeather-method} +\alias{upgrade_weatherDF} \alias{upgrade_weatherHistory} \alias{sw_upgrade,swProd-method} \alias{sw_upgrade,swSite-method} @@ -28,6 +29,11 @@ sw_upgrade(object, verbose = FALSE) \S4method{sw_upgrade}{swWeather}(object, verbose = FALSE) +upgrade_weatherDF( + weatherDF, + template_weatherColumns = c("Year", "DOY", weather_dataColumns()) +) + upgrade_weatherHistory(object, verbose = FALSE) \S4method{sw_upgrade}{swProd}(object, verbose = FALSE) @@ -48,10 +54,17 @@ upgrade_weatherHistory(object, verbose = FALSE) \item{object}{An object of a \code{rSOILWAT2} class.} \item{verbose}{A logical value.} + +\item{weatherDF}{A data frame with weather variables.} + +\item{template_weatherColumns}{A vector with requested weather variables.} } \value{ The upgraded \code{object}, if needed, to match the current version with missing slots and elements filled with default values. + +For \code{\link[=upgrade_weatherDF]{upgrade_weatherDF()}}: +an updated \code{weatherDF} with requested columns. } \description{ Missing slots and elements are added and @@ -95,6 +108,10 @@ new slot \code{"vegType"} } \examples{ - x <- sw_upgrade(rSOILWAT2::sw_exampleData, verbose = TRUE) +x <- sw_upgrade(rSOILWAT2::sw_exampleData, verbose = TRUE) + +upgrade_weatherDF( + data.frame(DOY = 1:2, Tmax_C = runif(2), dummy = runif(2)) +) } diff --git a/tests/testthat/test_Upgrade_rSOILWAT_S4_classes.R b/tests/testthat/test_Upgrade_rSOILWAT_S4_classes.R index 4208a887..54c475c5 100644 --- a/tests/testthat/test_Upgrade_rSOILWAT_S4_classes.R +++ b/tests/testthat/test_Upgrade_rSOILWAT_S4_classes.R @@ -50,7 +50,6 @@ test_that("Upgrade old rSOILWAT2 weather objects", { expect_gt(length(fnames_vdata), 0L) - # Upgrade weather data, i.e., lists of class `swWeatherData` for (k in seq_along(fnames_vdata)) { x <- readRDS(fnames_vdata[k]) @@ -58,6 +57,18 @@ test_that("Upgrade old rSOILWAT2 weather objects", { expect_false(dbW_check_weatherData(x)) } + # Upgrade weather data, i.e., lists of class `swWeatherData` expect_true(dbW_check_weatherData(upgrade_weatherHistory(x))) + + # Upgrade weather data frames + expect_true( + dbW_check_weatherData( + dbW_dataframe_to_weatherData( + upgrade_weatherDF( + dbW_weatherData_to_dataframe(x) + ) + ) + ) + ) } }) From 9e0d673886ca4f31f42f514b8f1ea06bef0a2dc4 Mon Sep 17 00:00:00 2001 From: Daniel Schlaepfer Date: Mon, 23 Oct 2023 13:39:39 -0400 Subject: [PATCH 2/7] Improved user control for rounding of weather values - `dbW_weatherData_round()` now rounds both "weatherList" and "weather data frame" objects; argument "digits" can now also be logical (if TRUE, then digits takes the default value of 4) or not finite (e.g., NA; not finite values return the input without rounding). - argument "round" of `dbW_dataframe_to_weatherData()` is deprecated and changed the default value from rounding to 2 digits to no rounding (NA) - recommended replacement is a separate call to `dbW_weatherData_round()`. - argument "digits" of `dbW_generateWeather()` changed the default value from rounding to 4 digits to no rounding (NA) --- NEWS.md | 12 ++++++ R/swWeatherGenerator.R | 11 ++---- R/sw_dbW_WeatherDatabase.R | 60 +++++++++++++++++++++-------- man/dbW_dataframe_to_weatherData.Rd | 4 +- man/dbW_generateWeather.Rd | 6 +-- man/dbW_weatherData_round.Rd | 10 +++-- man/dbW_weather_to_SOILWATfiles.Rd | 2 +- man/sw_weather_data.Rd | 4 +- tests/testthat/test_WeatherData.R | 3 +- vignettes/rSOILWAT2_demo.Rmd | 3 +- 10 files changed, 74 insertions(+), 41 deletions(-) diff --git a/NEWS.md b/NEWS.md index b7dfce1f..3e0af174 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,10 +5,22 @@ * This version produces the same output as the previous version. * Update `SOILWAT2` to v7.2.0 which improves error handling and fixes memory leaks on error (#239; @dschlaep, @N1ckP3rsl3y). + +## Bugfix * Code no longer requires (unused) `pcg` header (@dschlaep). ## New features * New `upgrade_weatherDF()` adds requested weather columns to a data frame. +* User control for rounding of weather values + * `dbW_weatherData_round()` now rounds both `"weatherList"` and + `"weatherDF"` objects; argument `"digits"` can now also be logical + (if `TRUE`, then digits takes the default value of 4) or not finite + (e.g., `NA`; not finite values return the input without rounding). + * Argument `"round"` of `dbW_dataframe_to_weatherData()` is deprecated and + changed the default value from rounding to 2 digits to no rounding (`NA`); + recommended replacement is a separate call to `dbW_weatherData_round()`. + * Argument `"digits"` of `dbW_generateWeather()` changed the default value + from rounding to 4 digits to no rounding (`NA`). # rSOILWAT2 v6.0.2 diff --git a/R/swWeatherGenerator.R b/R/swWeatherGenerator.R index 38ac9f4f..715fdb6a 100644 --- a/R/swWeatherGenerator.R +++ b/R/swWeatherGenerator.R @@ -1019,10 +1019,9 @@ compare_weather <- function( #' \code{\link{dbW_estimate_WGen_coefs}}. #' #' @inheritParams dbW_estimate_WGen_coefs +#' @inheritParams sw_weather_data #' @param years An integer vector. The calendar years for which to generate #' daily weather. If \code{NULL}, then extracted from \code{weatherData}. -#' @param digits An integer value. The returned values will be rounded to -#' the specified number of decimal places. #' @param wgen_coeffs A list with two named elements \var{mkv_doy} and #' \var{mkv_woy}, i.e., the return value of #' \code{\link{dbW_estimate_WGen_coefs}}. If \code{NULL}, then determined @@ -1101,10 +1100,9 @@ dbW_generateWeather <- function( wgen_coeffs = NULL, imputation_type = "mean", imputation_span = 5L, - digits = 4L, + digits = NA, seed = NULL ) { - #--- Obtain missing/null arguments if (is.null(wgen_coeffs)) { wgen_coeffs <- dbW_estimate_WGen_coefs( @@ -1116,10 +1114,7 @@ dbW_generateWeather <- function( } if (is.data.frame(weatherData)) { - weatherData <- dbW_dataframe_to_weatherData( - weatherData, - round = digits + 2L - ) + weatherData <- dbW_dataframe_to_weatherData(weatherData) } if (is.null(years)) { diff --git a/R/sw_dbW_WeatherDatabase.R b/R/sw_dbW_WeatherDatabase.R index e64dce8c..e287d938 100644 --- a/R/sw_dbW_WeatherDatabase.R +++ b/R/sw_dbW_WeatherDatabase.R @@ -39,9 +39,9 @@ #' #' @param years A numeric vector. The calendar years. #' @param digits An integer value. The number of decimal places for rounding -#' weather values. +#' weather values (or `TRUE` but no rounding if `FALSE` or not finite). #' @param round An integer value. The number of decimal places for rounding -#' weather values. +#' weather values (or `TRUE` but no rounding if `FALSE` or not finite). #' #' @param weather_tag A character string. The base file name without extension #' for `SOILWAT2`-formatted input files; default is `"weath"` @@ -2173,14 +2173,16 @@ dbW_weatherData_to_dataframe <- function(weatherData, valNA = NULL) { ) } -#' Round weather data in a list class \code{\linkS4class{swWeatherData}} +#' Round weather data #' #' @inheritParams sw_weather_data #' #' @section Notes: #' `weatherDF_dataColumns` lists the columns of `weatherData` to be rounded. #' -#' @return A list with \code{\linkS4class{swWeatherData}} elements. +#' @return A list with class [`swWeatherData`] elements or +#' a data frame where columns represent weather variables +#' (depending on `weatherData`). #' #' @export #' @md @@ -2189,16 +2191,31 @@ dbW_weatherData_round <- function( digits = 4L, weatherDF_dataColumns = weather_dataColumns() ) { - lapply( - weatherData, - function(x) { - slot(x, "data")[, weatherDF_dataColumns] <- round( - slot(x, "data")[, weatherDF_dataColumns], - digits = digits - ) - x - } - ) + if (isFALSE(is.na(digits)) && isTRUE(is.logical(digits))) { + digits <- if (isTRUE(as.logical(digits))) 4L else NA + } + + if (!isTRUE(is.finite(digits))) return(weatherData) + + if (dbW_check_weatherData(weatherData, check_all = FALSE)) { + lapply( + weatherData, + function(x) { + slot(x, "data")[, weatherDF_dataColumns] <- round( + slot(x, "data")[, weatherDF_dataColumns], + digits = digits + ) + x + } + ) + + } else { + weatherData[, weatherDF_dataColumns] <- round( + weatherData[, weatherDF_dataColumns], + digits = digits + ) + weatherData + } } @@ -2406,8 +2423,16 @@ dbW_dataframe_to_weatherData <- function( weatherDF, years = NULL, weatherDF_dataColumns = NULL, - round = 2 + round = NA ) { + if (isTRUE(is.finite(round))) { + .Deprecated( + msg = paste( + "Argument 'round' is deprecated. + Please call `dbW_weatherData_round()` instead." + ) + ) + } if (is.null(weatherDF_dataColumns)) { weatherDF_dataColumns <- intersect( @@ -2428,8 +2453,9 @@ dbW_dataframe_to_weatherData <- function( ylist <- get_years_from_weatherDF(weatherDF, years, weatherDF_dataColumns) - if (isTRUE(is.logical(round) && round || is.numeric(round))) { - weatherDF <- round(weatherDF, digits = if (is.logical(round)) 2 else round) + # Remove call to `dbW_weatherData_round()` once argument `round` is removed. + if (isTRUE(is.finite(round))) { + weatherDF <- dbW_weatherData_round(weatherDF, digits = round) } template <- new("swWeatherData") diff --git a/man/dbW_dataframe_to_weatherData.Rd b/man/dbW_dataframe_to_weatherData.Rd index f08aac59..1c7cd80c 100644 --- a/man/dbW_dataframe_to_weatherData.Rd +++ b/man/dbW_dataframe_to_weatherData.Rd @@ -8,7 +8,7 @@ dbW_dataframe_to_weatherData( weatherDF, years = NULL, weatherDF_dataColumns = NULL, - round = 2 + round = NA ) } \arguments{ @@ -24,7 +24,7 @@ calendar year \code{year} (optional) and day of year \code{DOY}, see \code{\link[=weather_dataColumns]{weather_dataColumns()}}.} \item{round}{An integer value. The number of decimal places for rounding -weather values.} +weather values (or \code{TRUE} but no rounding if \code{FALSE} or not finite).} } \description{ Conversion: data.frame to object of class \code{\linkS4class{swWeatherData}} diff --git a/man/dbW_generateWeather.Rd b/man/dbW_generateWeather.Rd index 0af0fca6..d7e23d34 100644 --- a/man/dbW_generateWeather.Rd +++ b/man/dbW_generateWeather.Rd @@ -10,7 +10,7 @@ dbW_generateWeather( wgen_coeffs = NULL, imputation_type = "mean", imputation_span = 5L, - digits = 4L, + digits = NA, seed = NULL ) } @@ -42,8 +42,8 @@ based on \code{weatherData}.} \item{imputation_span}{An integer value. The number of non-missing values considered if \code{imputation_type = "mean"}.} -\item{digits}{An integer value. The returned values will be rounded to -the specified number of decimal places.} +\item{digits}{An integer value. The number of decimal places for rounding +weather values (or \code{TRUE} but no rounding if \code{FALSE} or not finite).} \item{seed}{An integer value or \code{NULL}. See \code{\link{set.seed}}.} } diff --git a/man/dbW_weatherData_round.Rd b/man/dbW_weatherData_round.Rd index 7751263c..49afb84b 100644 --- a/man/dbW_weatherData_round.Rd +++ b/man/dbW_weatherData_round.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/sw_dbW_WeatherDatabase.R \name{dbW_weatherData_round} \alias{dbW_weatherData_round} -\title{Round weather data in a list class \code{\linkS4class{swWeatherData}}} +\title{Round weather data} \usage{ dbW_weatherData_round( weatherData, @@ -15,7 +15,7 @@ dbW_weatherData_round( that each hold daily weather data for one calendar year.} \item{digits}{An integer value. The number of decimal places for rounding -weather values.} +weather values (or \code{TRUE} but no rounding if \code{FALSE} or not finite).} \item{weatherDF_dataColumns}{A vector of character strings. The column names of \code{weatherDF} in the correct order for \code{SOILWAT2} including @@ -23,10 +23,12 @@ calendar year \code{year} (optional) and day of year \code{DOY}, see \code{\link[=weather_dataColumns]{weather_dataColumns()}}.} } \value{ -A list with \code{\linkS4class{swWeatherData}} elements. +A list with class \code{\link{swWeatherData}} elements or +a data frame where columns represent weather variables +(depending on \code{weatherData}). } \description{ -Round weather data in a list class \code{\linkS4class{swWeatherData}} +Round weather data } \section{Notes}{ diff --git a/man/dbW_weather_to_SOILWATfiles.Rd b/man/dbW_weather_to_SOILWATfiles.Rd index 74802547..fcb7c1a2 100644 --- a/man/dbW_weather_to_SOILWATfiles.Rd +++ b/man/dbW_weather_to_SOILWATfiles.Rd @@ -34,7 +34,7 @@ calendar year \code{year} (optional) and day of year \code{DOY}, see \code{\link[=weather_dataColumns]{weather_dataColumns()}}.} \item{digits}{An integer value. The number of decimal places for rounding -weather values.} +weather values (or \code{TRUE} but no rounding if \code{FALSE} or not finite).} } \description{ Conversion: object of class \code{\linkS4class{swWeatherData}} or diff --git a/man/sw_weather_data.Rd b/man/sw_weather_data.Rd index 73a323d5..dc1aa170 100644 --- a/man/sw_weather_data.Rd +++ b/man/sw_weather_data.Rd @@ -25,10 +25,10 @@ calendar year \code{year} (optional) and day of year \code{DOY}, see \item{years}{A numeric vector. The calendar years.} \item{digits}{An integer value. The number of decimal places for rounding -weather values.} +weather values (or \code{TRUE} but no rounding if \code{FALSE} or not finite).} \item{round}{An integer value. The number of decimal places for rounding -weather values.} +weather values (or \code{TRUE} but no rounding if \code{FALSE} or not finite).} \item{weather_tag}{A character string. The base file name without extension for \code{SOILWAT2}-formatted input files; default is \code{"weath"}} diff --git a/tests/testthat/test_WeatherData.R b/tests/testthat/test_WeatherData.R index 63887211..e8e48c48 100644 --- a/tests/testthat/test_WeatherData.R +++ b/tests/testthat/test_WeatherData.R @@ -247,8 +247,7 @@ test_that("Weather data object conversions", { res <- rSOILWAT2::dbW_dataframe_to_weatherData( weatherDF = rSOILWAT2::dbW_weatherData_to_dataframe( rSOILWAT2::weatherData - ), - round = FALSE + ) ) expect_true(rSOILWAT2::dbW_check_weatherData(res)) diff --git a/vignettes/rSOILWAT2_demo.Rmd b/vignettes/rSOILWAT2_demo.Rmd index 15bf626f..6e8b3b68 100644 --- a/vignettes/rSOILWAT2_demo.Rmd +++ b/vignettes/rSOILWAT2_demo.Rmd @@ -294,8 +294,7 @@ You may organize weather data in a variety of ways: weatherData = rSOILWAT2::dbW_dataframe_to_weatherData( weatherDF = xdf2[, -1L, drop = FALSE], years = unique(xdf2[, "Year"]), - weatherDF_dataColumns = colnames(xdf2)[-1L], - round = 4L + weatherDF_dataColumns = colnames(xdf2)[-1L] ) ) From 64ca1d449ce801ca19b857e0cfed751896ba1d97 Mon Sep 17 00:00:00 2001 From: Daniel Schlaepfer Date: Tue, 24 Oct 2023 14:55:07 -0400 Subject: [PATCH 3/7] `dbW_generateWeather()` now returns a user requested weather object type - `dbW_generateWeather()` gained `"return_weatherDF"` and now returns a user requested weather object type. If `return_weatherDF` is `TRUE`, then the result is converted to a data frame where columns represent weather variables; otherwise, a list of elements of class `swWeatherData` is returned (as previously). --- NEWS.md | 7 +- R/swWeatherGenerator.R | 179 +++++++++++++++++---------------- man/compare_weather.Rd | 4 +- man/dbW_estimate_WGen_coefs.Rd | 156 +++++++++++++--------------- man/dbW_generateWeather.Rd | 52 +++++----- vignettes/rSOILWAT2_demo.Rmd | 10 +- 6 files changed, 202 insertions(+), 206 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3e0af174..f4187f0e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,7 +11,7 @@ ## New features * New `upgrade_weatherDF()` adds requested weather columns to a data frame. -* User control for rounding of weather values +* Improved usability of weather functionality * `dbW_weatherData_round()` now rounds both `"weatherList"` and `"weatherDF"` objects; argument `"digits"` can now also be logical (if `TRUE`, then digits takes the default value of 4) or not finite @@ -21,6 +21,11 @@ recommended replacement is a separate call to `dbW_weatherData_round()`. * Argument `"digits"` of `dbW_generateWeather()` changed the default value from rounding to 4 digits to no rounding (`NA`). + * `dbW_generateWeather()` gained `"return_weatherDF"` and now returns a + user requested weather object type. If `return_weatherDF` is `TRUE`, + then the result is converted to a data frame where columns represent + weather variables; otherwise, a list of elements of class `swWeatherData` + is returned (as previously). # rSOILWAT2 v6.0.2 diff --git a/R/swWeatherGenerator.R b/R/swWeatherGenerator.R index 715fdb6a..9ed74390 100644 --- a/R/swWeatherGenerator.R +++ b/R/swWeatherGenerator.R @@ -25,94 +25,91 @@ weatherGenerator_dataColumns <- function() { } -#' Estimate coefficients for use by \var{SOILWAT2} weather generator +#' Estimate coefficients of the `SOILWAT2` weather generator #' -#' Estimates coefficients for the two site-specific files -#' \var{mkv_covar.in} and \var{mkv_prob.in} required by the first-order -#' Markov weather generator in \var{SOILWAT2} \var{> v4.2.5}. +#' Estimated coefficients correspond to what is required by the two files +#' `"mkv_covar.in"` and `"mkv_prob.in"` for the first-order +#' Markov weather generator in `SOILWAT2` `> v4.2.5`. #' #' @section Notes: This code is a complete overhaul compared to the version -#' from \var{rSFSTEP2} on \code{2019-Feb-10} -#' commit \var{cd9e161971136e1e56d427a4f76062bbb0f3d03a} -#' \url{https://github.com/DrylandEcology/rSFSTEP2}. +# nolint start: line_length_linter. +#' from [`rSFSTEP2` on `2019-Feb-10`](https://github.com/DrylandEcology/rSFSTEP2/commit/cd9e161971136e1e56d427a4f76062bbb0f3d03a). +# nolint end: line_length_linter. #' -#' @section Notes: This function will produce \code{NA}s in the output if there -#' are insufficient weather observation in the input data \code{weatherData} -#' for a specific day or week of the year. Such \code{NA}s will cause a -#' \var{SOILWAT2} run to fail (potentially non-graciously and -#' with non-obvious error messages). To avoid that, this function offers -#' imputation approaches in order to fill in those failed coefficient -#' estimates; see \code{\link[rSW2utils]{impute_df}}, but please note that -#' any such imputation likely introduces biases in the generated weather. +#' @section Notes: This function will produce `NA`s in the output if there +#' are insufficient weather observation in the input data `weatherData` +#' for a specific day or week of the year. Such `NA`s will cause a +#' `SOILWAT2` run to fail (potentially non-graciously and +#' with non-obvious error messages). To avoid that, this function offers +#' imputation approaches in order to fill in those failed coefficient +#' estimates; see [rSW2utils::impute_df()], but please note that +#' any such imputation likely introduces biases in the generated weather. #' -#' @section Details: Most users will likely want to set \code{propagate_NAs} to -#' \code{FALSE}. Note: \code{propagate_NAs} corresponds to \code{!na.rm} -#' from previous versions of this function with a different default value. -#' Consider an example: a the 30-year long input \code{weatherData} is -#' complete except for missing values on Jan 1, 2018. -#' \itemize{ -#' \item If \code{propagate_NAs} is set to \code{TRUE}, then the -#' coefficients for day 1 and week 1 of year will be \code{NA} -- +#' @section Details: +#' Most users will likely want to set `propagate_NAs` to `FALSE`. +#' Note: `propagate_NAs` corresponds to `!na.rm` +#' from previous versions of this function with a different default value. +#' Consider an example: a the 30-year long input `weatherData` is +#' complete except for missing values on Jan 1, 2018. +#' * If `propagate_NAs` is set to `TRUE`, then the +#' coefficients for day 1 and week 1 of year will be `NA` -- #' despite all the available data. In this case, the missing coefficients #' for day 1 and week 1 of year will be imputed. -#' \item If \code{propagate_NAs} is set to \code{FALSE}, then the coefficients +#' * If `propagate_NAs` is set to `FALSE`, then the coefficients #' for day 1 and week 1 of year will be calculated based on the non-missing #' values for that day respectively that week of year. No imputation occurs. -#' } #' -#' @param weatherData A list of elements of class -#' \code{\linkS4class{swWeatherData}} or a \code{data.frame} as returned by -#' \code{\link{dbW_weatherData_to_dataframe}}. +#' +#' @param weatherData A list of elements of class [`swWeatherData`] +#' or a data frame as returned by [dbW_weatherData_to_dataframe()]. #' @param WET_limit_cm A numeric value. A day with more precipitation than -#' this value is considered \var{wet} instead of \var{dry}. Default is 0. -#' This values should be equal to the corresponding value used in -#' \var{SOILWAT2}'s function \code{SW_MKV_today}. -#' @param propagate_NAs A logical value. If \code{TRUE}, then missing weather -#' values in the input \code{weatherData} are excluded; if \code{FALSE}, then -#' missing values are propagated to the estimation. See Details. +#' this value is considered `wet` instead of `dry`. Default is 0. +#' This values should be equal to the corresponding value used in +#' `SOILWAT2`'s function `SW_MKV_today()`. +#' @param propagate_NAs A logical value. If `TRUE`, then missing weather +#' values in the input `weatherData` are excluded; if `FALSE`, then +#' missing values are propagated to the estimation. See Details. +#' @param imputation_type A text string. Passed to [rSW2utils::impute_df()] +#' for imputation of missing values in estimated weather generator coefficients. +#' @param imputation_span An integer value. Passed to [rSW2utils::impute_df()] +#' for imputation of missing values in estimated weather generator coefficients. #' @inheritParams set_missing_weather -#' @inheritParams rSW2utils::impute_df #' #' @return A list with two named elements: -#' \describe{ -#' \item{\code{mkv_doy}}{A data.frame with 366 rows (day of year) and -#' 5 columns: \describe{ -#' \item{DOY}{Day of year.} -#' \item{p_W_W}{Probability that doy is wet if the previous day -#' (doy - 1) was wet.} -#' \item{p_W_D}{Probability that doy is wet if the previous day -#' (doy - 1) was dry.} -#' \item{PPT_avg}{Average amount of precipitation (centimeters) -#' on doy if it is wet.} -#' \item{PPT_sd}{Standard deviation of amount of precipitation -#' (centimeters) on doy if it is wet.} -#' }} -#' \item{\code{mkv_woy}}{A data.frame with 53 rows (\var{SOILWAT2} -#' weeks of year, i.e., counted as consecutive \var{heptads} of days) and -#' 11 columns: \describe{ -#' \item{WEEK}{Week of year.} -#' \item{wTmax_C}{Average daily maximum temperature (C) for week.} -#' \item{wTmin_C}{Average daily minimum temperature (C) for week.} -#' \item{var_MAX}{Variance of daily maximum temperature for week.} -#' \item{cov_MAXMIN}{Covariance of daily maximum and minimum -#' temperatures for week.} -#' \item{cov_MINMAX}{Identical to \code{cov_MAXMIN}.} -#' \item{var_MIN}{Variance of daily minimum temperature for week.} -#' \item{CF_Tmax_wet}{Difference between average daily maximum -#' temperature (C) for wet days of week and \code{wTmax_C}.} -#' \item{CF_Tmax_dry}{Difference between average daily maximum -#' temperature (C) for dry days of week and \code{wTmax_C}.} -#' \item{CF_Tmin_wet}{Same as \code{CF_Tmax_wet} but for daily -#' minimum temperature.} -#' \item{CF_Tmin_dry}{Same as \code{CF_Tmax_dry} but for daily -#' minimum temperature.} -#' }} -#' } +#' * `"mkv_doy"`: A data frame with 366 rows (day of year) and 5 columns: +#' * `"DOY"`: Rows represent day of year. +#' * `"p_W_W"`: Probability that `DOY` is wet if the previous day +#' `(doy - 1)` was wet. +#' * `"p_W_D"`: Probability that `DOY` is wet if the previous day +#' `(doy - 1)` was dry. +#' * `"PPT_avg"`: Average amount of precipitation `[cm]` on `DOY` if wet. +#' * `"PPT_sd"`: Standard deviation of amount of precipitation `[cm]` +#' on `DOY` if wet. +#' +#' * `"mkv_woy"`: A data frame with 53 rows `SOILWAT2` weeks of year +#' (i.e., consecutive `heptads` of days) and 11 columns +#' * `"WEEK"`: Rows represent week of year. +#' * `"wTmax_C"`: Average daily maximum temperature `[C]` for week. +#' * `"wTmin_C"`: Average daily minimum temperature `[C]` for week. +#' * `"var_MAX"`: Variance of daily maximum temperature for week. +#' * `"cov_MAXMIN"`: Covariance of daily maximum and minimum +#' temperatures for week. +#' * `"cov_MINMAX"`: Identical to `"cov_MAXMIN"`. +#' * `"var_MIN"`: Variance of daily minimum temperature for week. +#' * `"CF_Tmax_wet"`: Difference between average daily maximum +#' temperature `[C]` for wet days of week and `"wTmax_C"`. +#' * `"CF_Tmax_dry"`: Difference between average daily maximum +#' temperature `[C]` for dry days of week and `"wTmax_C"`. +#' * `"CF_Tmin_wet"`: Same as `"CF_Tmax_wet"` but for daily +#' minimum temperature. +#' * `"CF_Tmin_dry"`: Same as `"CF_Tmax_dry"` but for daily +#' minimum temperature. #' -#' @seealso \code{\link{print_mkv_files}} to print values to \var{SOILWAT2} -#' compatible files. \code{\link{swMarkov_Prob}} and -#' \code{\link{swMarkov_Conv}} to extract/replace values in a \pkg{rSOILWAT2} -#' input object of class \code{\linkS4class{swInputData}}. +#' +#' @seealso [print_mkv_files()] to print values to `SOILWAT2` +#' compatible files. [swMarkov_Prob()] and +#' [swMarkov_Conv()] to extract/replace values in a `rSOILWAT2` +#' input object of class [`swInputData`]. #' #' @examples #' res1 <- dbW_estimate_WGen_coefs(rSOILWAT2::weatherData) @@ -123,6 +120,7 @@ weatherGenerator_dataColumns <- function() { #' swMarkov_Prob(sw_in) <- res2[["mkv_doy"]] #' swMarkov_Conv(sw_in) <- res2[["mkv_woy"]] #' +#' @md #' @export dbW_estimate_WGen_coefs <- function( weatherData, @@ -1015,21 +1013,24 @@ compare_weather <- function( #' Generate daily weather data using SOILWAT2's weather generator #' -#' This function is a convenience wrapper for -#' \code{\link{dbW_estimate_WGen_coefs}}. +#' This function is a convenience wrapper for [dbW_estimate_WGen_coefs()]. #' #' @inheritParams dbW_estimate_WGen_coefs #' @inheritParams sw_weather_data #' @param years An integer vector. The calendar years for which to generate -#' daily weather. If \code{NULL}, then extracted from \code{weatherData}. -#' @param wgen_coeffs A list with two named elements \var{mkv_doy} and -#' \var{mkv_woy}, i.e., the return value of -#' \code{\link{dbW_estimate_WGen_coefs}}. If \code{NULL}, then determined -#' based on \code{weatherData}. -#' @inheritParams rSW2utils::impute_df -#' @param seed An integer value or \code{NULL}. See \code{\link{set.seed}}. +#' daily weather. If `NULL`, then extracted from `weatherData`. +#' @param wgen_coeffs A list with two named elements `"mkv_doy"` and +#' `"mkv_woy"`, i.e., the return value of [dbW_estimate_WGen_coefs()]. +#' If `NULL`, then [dbW_estimate_WGen_coefs()] is called on `weatherData`. +#' @param seed An integer value or `NULL`. See [base::set.seed()]. +#' @param return_weatherDF A logical value. See section "Value". #' -#' @return A list of elements of class \code{\linkS4class{swWeatherData}}. +#' @return An updated copy of `weatherData` where missing values are imputed +#' by the weather generator. +#' If `return_weatherDF` is `TRUE`, then the result is converted to a +#' data frame where columns represent weather variables. +#' If `return_weatherDF` is `FALSE`, then the result is +#' a list of elements of class [`swWeatherData`]. #' #' @section Details: #' The current implementation of the weather generator produces values @@ -1052,7 +1053,7 @@ compare_weather <- function( #' x[ids, -(1:2)] <- NA #' #' ## Example 1: generate weather for any missing values in our 'dataset' -#' wout1 <- dbW_generateWeather(x) +#' wout1 <- dbW_generateWeather(x, return_weatherDF = TRUE) #' #' ## Example 2: generate weather based on our 'dataset' but for #' ## years 2005-2015 and use estimated weather generator coefficients from @@ -1085,7 +1086,7 @@ compare_weather <- function( #' path <- tempdir() #' compare_weather( #' ref_weather = x, -#' weather = dbW_weatherData_to_dataframe(wout1), +#' weather = wout1, #' N = 1, #' path = path, #' tag = "Example1-WeatherGenerator" @@ -1100,6 +1101,7 @@ dbW_generateWeather <- function( wgen_coeffs = NULL, imputation_type = "mean", imputation_span = 5L, + return_weatherDF = FALSE, digits = NA, seed = NULL ) { @@ -1113,10 +1115,11 @@ dbW_generateWeather <- function( ) } - if (is.data.frame(weatherData)) { + if (!dbW_check_weatherData(weatherData, check_all = FALSE)) { weatherData <- dbW_dataframe_to_weatherData(weatherData) } + if (is.null(years)) { years <- get_years_from_weatherData(weatherData) } @@ -1152,5 +1155,9 @@ dbW_generateWeather <- function( set.seed(seed) res <- .Call(C_rSW2_processAllWeather, weatherData, sw_in) + if (isTRUE(as.logical(return_weatherDF[[1L]]))) { + res <- dbW_weatherData_to_dataframe(res) + } + dbW_weatherData_round(res, digits = digits) } diff --git a/man/compare_weather.Rd b/man/compare_weather.Rd index 9c63dbab..caf7a35d 100644 --- a/man/compare_weather.Rd +++ b/man/compare_weather.Rd @@ -32,9 +32,9 @@ compared against \code{ref_weather}.} in \code{weather}.} \item{WET_limit_cm}{A numeric value. A day with more precipitation than -this value is considered \var{wet} instead of \var{dry}. Default is 0. +this value is considered \code{wet} instead of \code{dry}. Default is 0. This values should be equal to the corresponding value used in -\var{SOILWAT2}'s function \code{SW_MKV_today}.} +\code{SOILWAT2}'s function \code{SW_MKV_today()}.} \item{path}{A character string. The directory path in which output figures will be saved.} diff --git a/man/dbW_estimate_WGen_coefs.Rd b/man/dbW_estimate_WGen_coefs.Rd index 8de81822..9254e87d 100644 --- a/man/dbW_estimate_WGen_coefs.Rd +++ b/man/dbW_estimate_WGen_coefs.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/swWeatherGenerator.R \name{dbW_estimate_WGen_coefs} \alias{dbW_estimate_WGen_coefs} -\title{Estimate coefficients for use by \var{SOILWAT2} weather generator} +\title{Estimate coefficients of the \code{SOILWAT2} weather generator} \usage{ dbW_estimate_WGen_coefs( weatherData, @@ -14,14 +14,13 @@ dbW_estimate_WGen_coefs( ) } \arguments{ -\item{weatherData}{A list of elements of class -\code{\linkS4class{swWeatherData}} or a \code{data.frame} as returned by -\code{\link{dbW_weatherData_to_dataframe}}.} +\item{weatherData}{A list of elements of class \code{\link{swWeatherData}} +or a data frame as returned by \code{\link[=dbW_weatherData_to_dataframe]{dbW_weatherData_to_dataframe()}}.} \item{WET_limit_cm}{A numeric value. A day with more precipitation than -this value is considered \var{wet} instead of \var{dry}. Default is 0. +this value is considered \code{wet} instead of \code{dry}. Default is 0. This values should be equal to the corresponding value used in -\var{SOILWAT2}'s function \code{SW_MKV_today}.} +\code{SOILWAT2}'s function \code{SW_MKV_today()}.} \item{propagate_NAs}{A logical value. If \code{TRUE}, then missing weather values in the input \code{weatherData} are excluded; if \code{FALSE}, then @@ -30,94 +29,83 @@ missing values are propagated to the estimation. See Details.} \item{valNA}{The (numerical) value of missing weather data. If \code{NULL}, then default values are interpreted as missing.} -\item{imputation_type}{A character string describing the imputation method; - currently, one of three values: \describe{ - \item{\var{"none"}}{no imputation is carried out;} - \item{\var{"mean"}}{missing values will be replaced by the average - of \code{imputation_span} non-missing values before and - \code{imputation_span} non-missing values after; note: - this may fail if there are less than \code{2 * imputation_span} - non-missing values;} - \item{\var{"locf"}}{missing values will be replaced by the - \var{"last-observation-carried-forward"} imputation method.} -}} +\item{imputation_type}{A text string. Passed to \code{\link[rSW2utils:impute_df]{rSW2utils::impute_df()}} +for imputation of missing values in estimated weather generator coefficients.} -\item{imputation_span}{An integer value. The number of non-missing values -considered if \code{imputation_type = "mean"}.} +\item{imputation_span}{An integer value. Passed to \code{\link[rSW2utils:impute_df]{rSW2utils::impute_df()}} +for imputation of missing values in estimated weather generator coefficients.} } \value{ A list with two named elements: - \describe{ - \item{\code{mkv_doy}}{A data.frame with 366 rows (day of year) and - 5 columns: \describe{ - \item{DOY}{Day of year.} - \item{p_W_W}{Probability that doy is wet if the previous day - (doy - 1) was wet.} - \item{p_W_D}{Probability that doy is wet if the previous day - (doy - 1) was dry.} - \item{PPT_avg}{Average amount of precipitation (centimeters) - on doy if it is wet.} - \item{PPT_sd}{Standard deviation of amount of precipitation - (centimeters) on doy if it is wet.} - }} - \item{\code{mkv_woy}}{A data.frame with 53 rows (\var{SOILWAT2} - weeks of year, i.e., counted as consecutive \var{heptads} of days) and - 11 columns: \describe{ - \item{WEEK}{Week of year.} - \item{wTmax_C}{Average daily maximum temperature (C) for week.} - \item{wTmin_C}{Average daily minimum temperature (C) for week.} - \item{var_MAX}{Variance of daily maximum temperature for week.} - \item{cov_MAXMIN}{Covariance of daily maximum and minimum - temperatures for week.} - \item{cov_MINMAX}{Identical to \code{cov_MAXMIN}.} - \item{var_MIN}{Variance of daily minimum temperature for week.} - \item{CF_Tmax_wet}{Difference between average daily maximum - temperature (C) for wet days of week and \code{wTmax_C}.} - \item{CF_Tmax_dry}{Difference between average daily maximum - temperature (C) for dry days of week and \code{wTmax_C}.} - \item{CF_Tmin_wet}{Same as \code{CF_Tmax_wet} but for daily - minimum temperature.} - \item{CF_Tmin_dry}{Same as \code{CF_Tmax_dry} but for daily - minimum temperature.} - }} - } +\itemize{ +\item \code{"mkv_doy"}: A data frame with 366 rows (day of year) and 5 columns: +\itemize{ +\item \code{"DOY"}: Rows represent day of year. +\item \code{"p_W_W"}: Probability that \code{DOY} is wet if the previous day +\code{(doy - 1)} was wet. +\item \code{"p_W_D"}: Probability that \code{DOY} is wet if the previous day +\code{(doy - 1)} was dry. +\item \code{"PPT_avg"}: Average amount of precipitation \verb{[cm]} on \code{DOY} if wet. +\item \code{"PPT_sd"}: Standard deviation of amount of precipitation \verb{[cm]} +on \code{DOY} if wet. +} +\item \code{"mkv_woy"}: A data frame with 53 rows \code{SOILWAT2} weeks of year +(i.e., consecutive \code{heptads} of days) and 11 columns +\itemize{ +\item \code{"WEEK"}: Rows represent week of year. +\item \code{"wTmax_C"}: Average daily maximum temperature \verb{[C]} for week. +\item \code{"wTmin_C"}: Average daily minimum temperature \verb{[C]} for week. +\item \code{"var_MAX"}: Variance of daily maximum temperature for week. +\item \code{"cov_MAXMIN"}: Covariance of daily maximum and minimum +temperatures for week. +\item \code{"cov_MINMAX"}: Identical to \code{"cov_MAXMIN"}. +\item \code{"var_MIN"}: Variance of daily minimum temperature for week. +\item \code{"CF_Tmax_wet"}: Difference between average daily maximum +temperature \verb{[C]} for wet days of week and \code{"wTmax_C"}. +\item \code{"CF_Tmax_dry"}: Difference between average daily maximum +temperature \verb{[C]} for dry days of week and \code{"wTmax_C"}. +\item \code{"CF_Tmin_wet"}: Same as \code{"CF_Tmax_wet"} but for daily +minimum temperature. +\item \code{"CF_Tmin_dry"}: Same as \code{"CF_Tmax_dry"} but for daily +minimum temperature. +} +} } \description{ -Estimates coefficients for the two site-specific files -\var{mkv_covar.in} and \var{mkv_prob.in} required by the first-order -Markov weather generator in \var{SOILWAT2} \var{> v4.2.5}. +Estimated coefficients correspond to what is required by the two files +\code{"mkv_covar.in"} and \code{"mkv_prob.in"} for the first-order +Markov weather generator in \code{SOILWAT2} \verb{> v4.2.5}. } \section{Notes}{ This code is a complete overhaul compared to the version - from \var{rSFSTEP2} on \code{2019-Feb-10} - commit \var{cd9e161971136e1e56d427a4f76062bbb0f3d03a} - \url{https://github.com/DrylandEcology/rSFSTEP2}. +from \href{https://github.com/DrylandEcology/rSFSTEP2/commit/cd9e161971136e1e56d427a4f76062bbb0f3d03a}{\code{rSFSTEP2} on \code{2019-Feb-10}}. This function will produce \code{NA}s in the output if there - are insufficient weather observation in the input data \code{weatherData} - for a specific day or week of the year. Such \code{NA}s will cause a - \var{SOILWAT2} run to fail (potentially non-graciously and - with non-obvious error messages). To avoid that, this function offers - imputation approaches in order to fill in those failed coefficient - estimates; see \code{\link[rSW2utils]{impute_df}}, but please note that - any such imputation likely introduces biases in the generated weather. +are insufficient weather observation in the input data \code{weatherData} +for a specific day or week of the year. Such \code{NA}s will cause a +\code{SOILWAT2} run to fail (potentially non-graciously and +with non-obvious error messages). To avoid that, this function offers +imputation approaches in order to fill in those failed coefficient +estimates; see \code{\link[rSW2utils:impute_df]{rSW2utils::impute_df()}}, but please note that +any such imputation likely introduces biases in the generated weather. } \section{Details}{ - Most users will likely want to set \code{propagate_NAs} to - \code{FALSE}. Note: \code{propagate_NAs} corresponds to \code{!na.rm} - from previous versions of this function with a different default value. - Consider an example: a the 30-year long input \code{weatherData} is - complete except for missing values on Jan 1, 2018. - \itemize{ - \item If \code{propagate_NAs} is set to \code{TRUE}, then the - coefficients for day 1 and week 1 of year will be \code{NA} -- - despite all the available data. In this case, the missing coefficients - for day 1 and week 1 of year will be imputed. - \item If \code{propagate_NAs} is set to \code{FALSE}, then the coefficients - for day 1 and week 1 of year will be calculated based on the non-missing - values for that day respectively that week of year. No imputation occurs. - } + +Most users will likely want to set \code{propagate_NAs} to \code{FALSE}. +Note: \code{propagate_NAs} corresponds to \code{!na.rm} +from previous versions of this function with a different default value. +Consider an example: a the 30-year long input \code{weatherData} is +complete except for missing values on Jan 1, 2018. +\itemize{ +\item If \code{propagate_NAs} is set to \code{TRUE}, then the +coefficients for day 1 and week 1 of year will be \code{NA} -- +despite all the available data. In this case, the missing coefficients +for day 1 and week 1 of year will be imputed. +\item If \code{propagate_NAs} is set to \code{FALSE}, then the coefficients +for day 1 and week 1 of year will be calculated based on the non-missing +values for that day respectively that week of year. No imputation occurs. +} } \examples{ @@ -131,8 +119,8 @@ swMarkov_Conv(sw_in) <- res2[["mkv_woy"]] } \seealso{ -\code{\link{print_mkv_files}} to print values to \var{SOILWAT2} - compatible files. \code{\link{swMarkov_Prob}} and - \code{\link{swMarkov_Conv}} to extract/replace values in a \pkg{rSOILWAT2} - input object of class \code{\linkS4class{swInputData}}. +\code{\link[=print_mkv_files]{print_mkv_files()}} to print values to \code{SOILWAT2} +compatible files. \code{\link[=swMarkov_Prob]{swMarkov_Prob()}} and +\code{\link[=swMarkov_Conv]{swMarkov_Conv()}} to extract/replace values in a \code{rSOILWAT2} +input object of class \code{\link{swInputData}}. } diff --git a/man/dbW_generateWeather.Rd b/man/dbW_generateWeather.Rd index d7e23d34..c68da70d 100644 --- a/man/dbW_generateWeather.Rd +++ b/man/dbW_generateWeather.Rd @@ -10,49 +10,45 @@ dbW_generateWeather( wgen_coeffs = NULL, imputation_type = "mean", imputation_span = 5L, + return_weatherDF = FALSE, digits = NA, seed = NULL ) } \arguments{ -\item{weatherData}{A list of elements of class -\code{\linkS4class{swWeatherData}} or a \code{data.frame} as returned by -\code{\link{dbW_weatherData_to_dataframe}}.} +\item{weatherData}{A list of elements of class \code{\link{swWeatherData}} +or a data frame as returned by \code{\link[=dbW_weatherData_to_dataframe]{dbW_weatherData_to_dataframe()}}.} \item{years}{An integer vector. The calendar years for which to generate daily weather. If \code{NULL}, then extracted from \code{weatherData}.} -\item{wgen_coeffs}{A list with two named elements \var{mkv_doy} and -\var{mkv_woy}, i.e., the return value of -\code{\link{dbW_estimate_WGen_coefs}}. If \code{NULL}, then determined -based on \code{weatherData}.} - -\item{imputation_type}{A character string describing the imputation method; - currently, one of three values: \describe{ - \item{\var{"none"}}{no imputation is carried out;} - \item{\var{"mean"}}{missing values will be replaced by the average - of \code{imputation_span} non-missing values before and - \code{imputation_span} non-missing values after; note: - this may fail if there are less than \code{2 * imputation_span} - non-missing values;} - \item{\var{"locf"}}{missing values will be replaced by the - \var{"last-observation-carried-forward"} imputation method.} -}} - -\item{imputation_span}{An integer value. The number of non-missing values -considered if \code{imputation_type = "mean"}.} +\item{wgen_coeffs}{A list with two named elements \code{"mkv_doy"} and +\code{"mkv_woy"}, i.e., the return value of \code{\link[=dbW_estimate_WGen_coefs]{dbW_estimate_WGen_coefs()}}. +If \code{NULL}, then \code{\link[=dbW_estimate_WGen_coefs]{dbW_estimate_WGen_coefs()}} is called on \code{weatherData}.} + +\item{imputation_type}{A text string. Passed to \code{\link[rSW2utils:impute_df]{rSW2utils::impute_df()}} +for imputation of missing values in estimated weather generator coefficients.} + +\item{imputation_span}{An integer value. Passed to \code{\link[rSW2utils:impute_df]{rSW2utils::impute_df()}} +for imputation of missing values in estimated weather generator coefficients.} + +\item{return_weatherDF}{A logical value. See section "Value".} \item{digits}{An integer value. The number of decimal places for rounding weather values (or \code{TRUE} but no rounding if \code{FALSE} or not finite).} -\item{seed}{An integer value or \code{NULL}. See \code{\link{set.seed}}.} +\item{seed}{An integer value or \code{NULL}. See \code{\link[base:Random]{base::set.seed()}}.} } \value{ -A list of elements of class \code{\linkS4class{swWeatherData}}. +An updated copy of \code{weatherData} where missing values are imputed +by the weather generator. +If \code{return_weatherDF} is \code{TRUE}, then the result is converted to a +data frame where columns represent weather variables. +If \code{return_weatherDF} is \code{FALSE}, then the result is +a list of elements of class \code{\link{swWeatherData}}. } \description{ -This function is a convenience wrapper for -\code{\link{dbW_estimate_WGen_coefs}}. +This function is a convenience wrapper for \code{\link[=dbW_estimate_WGen_coefs]{dbW_estimate_WGen_coefs()}}. } \section{Details}{ @@ -77,7 +73,7 @@ ids <- x[, "Year"] == 2008 & x[, "DOY"] >= 153 & x[, "DOY"] <= 244 x[ids, -(1:2)] <- NA ## Example 1: generate weather for any missing values in our 'dataset' -wout1 <- dbW_generateWeather(x) +wout1 <- dbW_generateWeather(x, return_weatherDF = TRUE) ## Example 2: generate weather based on our 'dataset' but for ## years 2005-2015 and use estimated weather generator coefficients from @@ -110,7 +106,7 @@ wout3 <- dbW_generateWeather( path <- tempdir() compare_weather( ref_weather = x, - weather = dbW_weatherData_to_dataframe(wout1), + weather = wout1, N = 1, path = path, tag = "Example1-WeatherGenerator" diff --git a/vignettes/rSOILWAT2_demo.Rmd b/vignettes/rSOILWAT2_demo.Rmd index 6e8b3b68..e36a733c 100644 --- a/vignettes/rSOILWAT2_demo.Rmd +++ b/vignettes/rSOILWAT2_demo.Rmd @@ -310,11 +310,11 @@ You may organize weather data in a variety of ways: if (NROW(needs_wgen) > 0) { tmp1 <- rSOILWAT2::dbW_generateWeather( weatherData = xdf4, + return_weatherDF = TRUE, seed = 123 ) - tmp2 <- rSOILWAT2::dbW_weatherData_to_dataframe(tmp1) - - xdf4[, vars_wgen][needs_wgen] <- tmp2[, vars_wgen][needs_wgen] + + xdf4[, vars_wgen][needs_wgen] <- tmp1[, vars_wgen][needs_wgen] } ids_dif <- 2L + which(dif) @@ -323,12 +323,12 @@ You may organize weather data in a variety of ways: arr.ind = TRUE ) if (NROW(needs_locf) > 0) { - tmp1 <- rSW2utils::impute_df( + tmp2 <- rSW2utils::impute_df( xdf4[, ids_dif, drop = FALSE], imputation_type = "locf" ) - xdf4[, ids_dif][needs_locf] <- tmp1[needs_locf] + xdf4[, ids_dif][needs_locf] <- tmp2[needs_locf] } wdata <- rSOILWAT2::dbW_dataframe_to_weatherData(xdf4, round = 4L) From d641d432ead040acd21b8e2096bd2cd08e9f0917 Mon Sep 17 00:00:00 2001 From: Daniel Schlaepfer Date: Fri, 27 Oct 2023 07:16:13 -0400 Subject: [PATCH 4/7] New `dbW_imputeWeather()` - `dbW_imputeWeather()` combines the use of the weather generator and `rSW2utils::impute_df()` for replacing missing weather values - `dbW_imputeWeather()` simplifies the weather section of the demo vignette substantially --- NAMESPACE | 1 + NEWS.md | 20 +- R/swWeatherGenerator.R | 178 ++++++++++++++++++ man/dbW_generateWeather.Rd | 3 + man/dbW_imputeWeather.Rd | 113 +++++++++++ .../test_WeatherGenerator_functionality.R | 83 +++++--- vignettes/rSOILWAT2_demo.Rmd | 46 +---- 7 files changed, 373 insertions(+), 71 deletions(-) create mode 100644 man/dbW_imputeWeather.Rd diff --git a/NAMESPACE b/NAMESPACE index 6eca04c9..9cd514ee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -57,6 +57,7 @@ export(dbW_has_siteIDs) export(dbW_has_sites) export(dbW_has_weatherData) export(dbW_have_sites_all_weatherData) +export(dbW_imputeWeather) export(dbW_setConnection) export(dbW_updateSites) export(dbW_upgrade_to_rSOILWAT2) diff --git a/NEWS.md b/NEWS.md index f4187f0e..c1dc2f76 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,12 +6,13 @@ * Update `SOILWAT2` to v7.2.0 which improves error handling and fixes memory leaks on error (#239; @dschlaep, @N1ckP3rsl3y). -## Bugfix +## Bug fixes * Code no longer requires (unused) `pcg` header (@dschlaep). ## New features -* New `upgrade_weatherDF()` adds requested weather columns to a data frame. -* Improved usability of weather functionality +* New `upgrade_weatherDF()` adds requested weather columns to a data frame + (@dschlaep). +* Improved rounding of weather functionality (@dschlaep): * `dbW_weatherData_round()` now rounds both `"weatherList"` and `"weatherDF"` objects; argument `"digits"` can now also be logical (if `TRUE`, then digits takes the default value of 4) or not finite @@ -21,11 +22,14 @@ recommended replacement is a separate call to `dbW_weatherData_round()`. * Argument `"digits"` of `dbW_generateWeather()` changed the default value from rounding to 4 digits to no rounding (`NA`). - * `dbW_generateWeather()` gained `"return_weatherDF"` and now returns a - user requested weather object type. If `return_weatherDF` is `TRUE`, - then the result is converted to a data frame where columns represent - weather variables; otherwise, a list of elements of class `swWeatherData` - is returned (as previously). +* `dbW_generateWeather()` gained `"return_weatherDF"` and now returns a + user requested weather object type (@dschlaep). + If `return_weatherDF` is `TRUE`, then the result is converted to a + data frame where columns represent weather variables; otherwise, + a list of elements of class `swWeatherData` is returned (as previously). +* New `dbW_imputeWeather()` replaces missing weather values using + using the weather generator and + using functionality by `rSW2utils::impute_df()` (@dschlaep). # rSOILWAT2 v6.0.2 diff --git a/R/swWeatherGenerator.R b/R/swWeatherGenerator.R index 9ed74390..b6867cb1 100644 --- a/R/swWeatherGenerator.R +++ b/R/swWeatherGenerator.R @@ -1040,6 +1040,8 @@ compare_weather <- function( #' all implemented variables will be replaced by those produced #' by the weather generator. #' +#' @seealso [dbW_imputeWeather()] +#' #' @examples #' # Load data for 1949-2010 #' wdata <- data.frame(dbW_weatherData_to_dataframe(rSOILWAT2::weatherData)) @@ -1161,3 +1163,179 @@ dbW_generateWeather <- function( dbW_weatherData_round(res, digits = digits) } + + + + + +#' Impute missing weather values +#' +#' Impute missing weather values first by the weather generator +#' (for supported variables, see [weatherGenerator_dataColumns()]) and +#' second, if any missing values remain, using one of the imputation types +#' provided by [rSW2utils::impute_df()] for each variable separately. +#' +#' @inheritParams dbW_generateWeather +#' @param use_wg A logical value. Should the weather generator be used first? +#' @param method_after_wg A string. The imputation type passed +#' to [rSW2utils::impute_df()], if any missing values remain after the +#' weather generator. +#' @param nmax_run An integer value. Runs (sets of consecutive missing values) +#' that are equal or shorter to `nmax_run` are imputed; +#' longer runs remain unchanged. Passed to [rSW2utils::impute_df()]. +#' +#' @return An updated copy of `weatherData` where missing values are imputed. +#' If `return_weatherDF` is `TRUE`, then a +#' data frame where columns represent weather variables is returned. +#' If `return_weatherDF` is `FALSE`, then the result is converted to a +#' a list of elements of class [`swWeatherData`]. +#' +#' @section Details: +#' The weather generator (see [dbW_generateWeather()]) produces new values +#' for all implemented variables [weatherGenerator_dataColumns()] on days +#' where at least one of these variables is missing; this is to maintain +#' physical consistency among those variables. +#' This differs from the approach employed by `method_after_wg` +#' which imputes missing variables for each variable separately +#' (see [rSW2utils::impute_df()]). +#' +#' @seealso [rSW2utils::impute_df()], [dbW_generateWeather()] +#' +#' @examples +#' # Load example data +#' path_demo <- system.file("extdata", "example1", package = "rSOILWAT2") +#' dif <- c(rep(TRUE, 3L), rep(FALSE, 11L)) +#' dif[13L] <- TRUE # ACTUAL_VP +#' dif[14L] <- TRUE # SHORT_WR, desc_rsds = 2 +#' wdata <- getWeatherData_folders( +#' LookupWeatherFolder = file.path(path_demo, "Input"), +#' weatherDirName = "data_weather_daymet", +#' filebasename = "weath", +#' startYear = 1980, +#' endYear = 1981, +#' dailyInputFlags = dif, +#' method = "C" +#' ) +#' x0 <- x <- dbW_weatherData_to_dataframe(wdata) +#' dif0 <- calc_dailyInputFlags(x0) +#' +#' # Set June-August of 1980 as missing +#' ids_missing <- x[, "Year"] == 1980 & x[, "DOY"] >= 153 & x[, "DOY"] <= 244 +#' x[ids_missing, -(1:2)] <- NA +#' +#' x1 <- dbW_imputeWeather(x, return_weatherDF = TRUE) +#' x2 <- dbW_imputeWeather(x, method_after_wg = "none", return_weatherDF = TRUE) +#' x3 <- dbW_imputeWeather( +#' x, +#' use_wg = FALSE, +#' method_after_wg = "locf", +#' return_weatherDF = TRUE +#' ) +#' +#' if (requireNamespace("graphics")) { +#' ## Compare original vs imputed values for May-Sep of 1980 +#' ip <- x[, "Year"] == 1980 & x[, "DOY"] >= 123 & x[, "DOY"] <= 274 +#' doy <- x[ip, "DOY"] +#' +#' vars <- names(dif0)[dif0] +#' nr <- ceiling(sqrt(length(vars))) +#' +#' par_prev <- graphics::par(mfrow = c(nr, ceiling(length(vars) / nr))) +#' +#' for (k in seq_along(vars)) { +#' graphics::plot(doy, x0[ip, vars[[k]]], ylab = vars[[k]], type = "l") +#' graphics::lines(doy, x1[ip, vars[[k]]], col = "red", lty = 2L) +#' graphics::lines(doy, x2[ip, vars[[k]]], col = "purple", lty = 3L) +#' graphics::lines(doy, x3[ip, vars[[k]]], col = "darkgreen", lty = 3L) +#' graphics::lines(doy, x0[ip, vars[[k]]], col = "gray") +#' } +#' +#' graphics::par(par_prev) +#' } +#' +#' @md +#' @export +dbW_imputeWeather <- function( + weatherData, + use_wg = TRUE, + seed = NULL, + method_after_wg = c("interp", "locf", "mean", "none", "fail"), + nmax_run = Inf, + return_weatherDF = FALSE +) { + + method_after_wg <- match.arg(method_after_wg) + + if (method_after_wg == "interp" || isTRUE(is.finite(nmax_run))) { + stopifnot( + # `rSW2utils::impute_df()` v0.2.1 required for + # type "interp" and argument "nmax_run" + check_version( + as.numeric_version(getNamespaceVersion("rSW2utils")), + expected_version = "0.2.1", + level = "patch" + ) + ) + } + + if (dbW_check_weatherData(weatherData, check_all = FALSE)) { + weatherData <- dbW_weatherData_to_dataframe(weatherData) + } + + + vars_wgen <- weatherGenerator_dataColumns() + + if ( + isTRUE(as.logical(use_wg)[[1L]]) && + any(is_missing_weather(weatherData[, vars_wgen, drop = FALSE])) + ) { + #--- Use weather generator for available variables + tmp <- dbW_generateWeather( + weatherData = weatherData, + return_weatherDF = TRUE, + seed = seed + ) + + weatherData[, vars_wgen] <- tmp[, vars_wgen] + } + + #--- Imputation after first pass of weather generator + vars_meteo <- intersect(weather_dataColumns(), colnames(weatherData)) + + # see `calc_dailyInputFlags(weatherData, vars_meteo)` + tmp_miss <- is_missing_weather(weatherData[, vars_meteo, drop = FALSE]) + tmp <- apply(!tmp_miss, MARGIN = 2L, FUN = any) + vars_wv <- names(tmp)[tmp] # variables with values + + needs_im <- which( + tmp_miss[, vars_wv, drop = FALSE], + arr.ind = TRUE + ) + + if (NROW(needs_im) > 0) { + if (method_after_wg == "fail") { + stop( + "Missing values in variables ", + if (use_wg) "after weather generator ", + "(method_after_wg = 'fail'): ", + toString(vars_wv[unique(needs_im[, "col"])]) + ) + } + + weatherData[, vars_wv][needs_im] <- NA # set all types of missingness to NA + + tmp <- rSW2utils::impute_df( + weatherData[, vars_wv, drop = FALSE], + imputation_type = method_after_wg, + nmax_run = nmax_run + ) + + weatherData[, vars_wv][needs_im] <- tmp[needs_im] + } + + if (isTRUE(as.logical(return_weatherDF[[1L]]))) { + weatherData + } else { + dbW_dataframe_to_weatherData(weatherData) + } +} diff --git a/man/dbW_generateWeather.Rd b/man/dbW_generateWeather.Rd index c68da70d..fed06b04 100644 --- a/man/dbW_generateWeather.Rd +++ b/man/dbW_generateWeather.Rd @@ -114,3 +114,6 @@ compare_weather( unlink(list.files(path), force = TRUE) } +\seealso{ +\code{\link[=dbW_imputeWeather]{dbW_imputeWeather()}} +} diff --git a/man/dbW_imputeWeather.Rd b/man/dbW_imputeWeather.Rd new file mode 100644 index 00000000..7d390c6f --- /dev/null +++ b/man/dbW_imputeWeather.Rd @@ -0,0 +1,113 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/swWeatherGenerator.R +\name{dbW_imputeWeather} +\alias{dbW_imputeWeather} +\title{Impute missing weather values} +\usage{ +dbW_imputeWeather( + weatherData, + use_wg = TRUE, + seed = NULL, + method_after_wg = c("interp", "locf", "mean", "none", "fail"), + nmax_run = Inf, + return_weatherDF = FALSE +) +} +\arguments{ +\item{weatherData}{A list of elements of class \code{\link{swWeatherData}} +or a data frame as returned by \code{\link[=dbW_weatherData_to_dataframe]{dbW_weatherData_to_dataframe()}}.} + +\item{use_wg}{A logical value. Should the weather generator be used first?} + +\item{seed}{An integer value or \code{NULL}. See \code{\link[base:Random]{base::set.seed()}}.} + +\item{method_after_wg}{A string. The imputation type passed +to \code{\link[rSW2utils:impute_df]{rSW2utils::impute_df()}}, if any missing values remain after the +weather generator.} + +\item{nmax_run}{An integer value. Runs (sets of consecutive missing values) +that are equal or shorter to \code{nmax_run} are imputed; +longer runs remain unchanged. Passed to \code{\link[rSW2utils:impute_df]{rSW2utils::impute_df()}}.} + +\item{return_weatherDF}{A logical value. See section "Value".} +} +\value{ +An updated copy of \code{weatherData} where missing values are imputed. +If \code{return_weatherDF} is \code{TRUE}, then a +data frame where columns represent weather variables is returned. +If \code{return_weatherDF} is \code{FALSE}, then the result is converted to a +a list of elements of class \code{\link{swWeatherData}}. +} +\description{ +Impute missing weather values first by the weather generator +(for supported variables, see \code{\link[=weatherGenerator_dataColumns]{weatherGenerator_dataColumns()}}) and +second, if any missing values remain, using one of the imputation types +provided by \code{\link[rSW2utils:impute_df]{rSW2utils::impute_df()}} for each variable separately. +} +\section{Details}{ + +The weather generator (see \code{\link[=dbW_generateWeather]{dbW_generateWeather()}}) produces new values +for all implemented variables \code{\link[=weatherGenerator_dataColumns]{weatherGenerator_dataColumns()}} on days +where at least one of these variables is missing; this is to maintain +physical consistency among those variables. +This differs from the approach employed by \code{method_after_wg} +which imputes missing variables for each variable separately +(see \code{\link[rSW2utils:impute_df]{rSW2utils::impute_df()}}). +} + +\examples{ +# Load example data +path_demo <- system.file("extdata", "example1", package = "rSOILWAT2") +dif <- c(rep(TRUE, 3L), rep(FALSE, 11L)) +dif[13L] <- TRUE # ACTUAL_VP +dif[14L] <- TRUE # SHORT_WR, desc_rsds = 2 +wdata <- getWeatherData_folders( + LookupWeatherFolder = file.path(path_demo, "Input"), + weatherDirName = "data_weather_daymet", + filebasename = "weath", + startYear = 1980, + endYear = 1981, + dailyInputFlags = dif, + method = "C" +) +x0 <- x <- dbW_weatherData_to_dataframe(wdata) +dif0 <- calc_dailyInputFlags(x0) + +# Set June-August of 1980 as missing +ids_missing <- x[, "Year"] == 1980 & x[, "DOY"] >= 153 & x[, "DOY"] <= 244 +x[ids_missing, -(1:2)] <- NA + +x1 <- dbW_imputeWeather(x, return_weatherDF = TRUE) +x2 <- dbW_imputeWeather(x, method_after_wg = "none", return_weatherDF = TRUE) +x3 <- dbW_imputeWeather( + x, + use_wg = FALSE, + method_after_wg = "locf", + return_weatherDF = TRUE +) + +if (requireNamespace("graphics")) { + ## Compare original vs imputed values for May-Sep of 1980 + ip <- x[, "Year"] == 1980 & x[, "DOY"] >= 123 & x[, "DOY"] <= 274 + doy <- x[ip, "DOY"] + + vars <- names(dif0)[dif0] + nr <- ceiling(sqrt(length(vars))) + + par_prev <- graphics::par(mfrow = c(nr, ceiling(length(vars) / nr))) + + for (k in seq_along(vars)) { + graphics::plot(doy, x0[ip, vars[[k]]], ylab = vars[[k]], type = "l") + graphics::lines(doy, x1[ip, vars[[k]]], col = "red", lty = 2L) + graphics::lines(doy, x2[ip, vars[[k]]], col = "purple", lty = 3L) + graphics::lines(doy, x3[ip, vars[[k]]], col = "darkgreen", lty = 3L) + graphics::lines(doy, x0[ip, vars[[k]]], col = "gray") + } + + graphics::par(par_prev) +} + +} +\seealso{ +\code{\link[rSW2utils:impute_df]{rSW2utils::impute_df()}}, \code{\link[=dbW_generateWeather]{dbW_generateWeather()}} +} diff --git a/tests/testthat/test_WeatherGenerator_functionality.R b/tests/testthat/test_WeatherGenerator_functionality.R index c2c0000e..3ec67312 100644 --- a/tests/testthat/test_WeatherGenerator_functionality.R +++ b/tests/testthat/test_WeatherGenerator_functionality.R @@ -64,7 +64,7 @@ test_that("Weather generator: estimate input parameters", { }) -test_that("Weather generator: generate weather", { +test_that("Weather generator: generate and impute weather", { digits <- 9L for (k in seq_along(tests)) { @@ -74,10 +74,23 @@ test_that("Weather generator: generate weather", { years <- get_years_from_weatherData(test_dat) n <- length(test_dat) + test_dat1_df <- dbW_weatherData_to_dataframe(test_dat) + wout <- list() - # Case 1: generate weather for dataset and impute missing values - wout[[1]] <- suppressMessages( + # Case 1: generate weather for dataset and impute missing wgen parameters + wout[[1L]] <- suppressMessages( + dbW_generateWeather( + test_dat, + imputation_type = "mean", + imputation_span = 5, + digits = digits, + seed = 123 + ) + ) + + # weather generator is exactly reproducible + tmp <- suppressMessages( dbW_generateWeather( test_dat, imputation_type = "mean", @@ -86,8 +99,27 @@ test_that("Weather generator: generate weather", { seed = 123 ) ) + expect_equal(tmp, wout[[1L]], tolerance = 10 ^ (-digits)) + + + # Case 1b: generate weather for missing days via impute weather function + wout[[2L]] <- suppressMessages( + dbW_imputeWeather( + test_dat, + use_wg = TRUE, + seed = 123, + method_after_wg = "none" + ) + ) - # Case 2: generate weather based on partial dataset, + # Expect that weather generator is equivalent between the two functions + expect_equal( + dbW_weatherData_to_dataframe(wout[[2L]]), + dbW_weatherData_to_dataframe(wout[[1L]]), + tolerance = 10 ^ (-digits) + ) + + # Case 3: generate weather based on partial dataset, # use estimated weather generator coefficients from full dataset wgen_coeffs <- suppressMessages( dbW_estimate_WGen_coefs( @@ -97,16 +129,16 @@ test_that("Weather generator: generate weather", { ) ) - wout[[2]] <- dbW_generateWeather( + wout[[3L]] <- dbW_generateWeather( test_dat[(n - 5):n], years = years[length(years)] + 0:10 - 5, wgen_coeffs = wgen_coeffs ) - # Case 3: generate weather based only on estimated weather generator + # Case 4: generate weather based only on estimated weather generator # coefficients from full dataset x_empty <- weatherHistory() - wout[[3]] <- dbW_generateWeather( + wout[[4L]] <- dbW_generateWeather( x_empty, years = years[length(years)] + 30:40, wgen_coeffs = wgen_coeffs @@ -128,27 +160,28 @@ test_that("Weather generator: generate weather", { #--- Expect that values remain unchanged - # wgen-variables: on days where all wgen-variables are non-missing - # Non-wgen variables: any non-missing value remain unchanged - - wout1_df <- rSOILWAT2::dbW_weatherData_to_dataframe(wout[[1L]]) - test_dat1_df <- rSOILWAT2::dbW_weatherData_to_dataframe(test_dat) - + # for wgen-variables: on days where all wgen-variables are non-missing ids_wgen <- which( - colnames(test_dat1_df) %in% rSOILWAT2::weatherGenerator_dataColumns() + colnames(test_dat1_df) %in% weatherGenerator_dataColumns() ) + # indices of weather generator variables + # * for days where at least one variable is missing (is_missing_wgen) + # * for days where no variable is missing (isnot_missing_wgen) tmp <- apply( !is_missing_weather(test_dat1_df[, ids_wgen, drop = FALSE]), MARGIN = 1L, FUN = all ) - isnot_missing_wgen <- as.matrix(data.frame( - row = rep(which(tmp), times = length(ids_wgen)), - col = rep(ids_wgen, each = sum(tmp)) - )) + isnot_missing_wgen <- as.matrix( + data.frame( + row = rep(which(tmp), times = length(ids_wgen)), + col = rep(ids_wgen, each = sum(tmp)) + ) + ) + # for non-wgen variables: any non-missing value remain unchanged ids_nowgen <- which( - !colnames(test_dat1_df) %in% rSOILWAT2::weatherGenerator_dataColumns() + !colnames(test_dat1_df) %in% weatherGenerator_dataColumns() ) isnot_missing_nowgen <- which( !is_missing_weather(test_dat1_df[, ids_nowgen, drop = FALSE]), @@ -159,11 +192,13 @@ test_that("Weather generator: generate weather", { isnot_missing <- rbind(isnot_missing_wgen, isnot_missing_nowgen) - expect_equal( - test_dat1_df[isnot_missing], - wout1_df[isnot_missing], - tolerance = 10 ^ (-digits) - ) + for (ke in 1:2) { + expect_equal( + test_dat1_df[isnot_missing], + dbW_weatherData_to_dataframe(wout[[ke]])[isnot_missing], + tolerance = 10 ^ (-digits) + ) + } } }) diff --git a/vignettes/rSOILWAT2_demo.Rmd b/vignettes/rSOILWAT2_demo.Rmd index e36a733c..95f37e8c 100644 --- a/vignettes/rSOILWAT2_demo.Rmd +++ b/vignettes/rSOILWAT2_demo.Rmd @@ -290,48 +290,16 @@ You may organize weather data in a variety of ways: # Convert `DayMet`'s `noleap` calendar to proleptic Gregorian calendar - xdf3 <- rSOILWAT2::dbW_convert_to_GregorianYears( - weatherData = rSOILWAT2::dbW_dataframe_to_weatherData( - weatherDF = xdf2[, -1L, drop = FALSE], - years = unique(xdf2[, "Year"]), - weatherDF_dataColumns = colnames(xdf2)[-1L] - ) - ) + xdf3 <- rSOILWAT2::dbW_convert_to_GregorianYears(xdf2, type = "asis") # Impute values for added leap days - # Use weather generator for available variables, use LOCF otherwise - xdf4 <- xdf3 - - vars_wgen <- rSOILWAT2::weatherGenerator_dataColumns() - needs_wgen <- which( - !is.finite(as.matrix(xdf4[, vars_wgen, drop = FALSE])), - arr.ind = TRUE - ) - if (NROW(needs_wgen) > 0) { - tmp1 <- rSOILWAT2::dbW_generateWeather( - weatherData = xdf4, - return_weatherDF = TRUE, - seed = 123 - ) - - xdf4[, vars_wgen][needs_wgen] <- tmp1[, vars_wgen][needs_wgen] - } - - ids_dif <- 2L + which(dif) - needs_locf <- which( - !is.finite(as.matrix(xdf4[, ids_dif, drop = FALSE])), - arr.ind = TRUE + # Use weather generator for available variables, otherwise interpolate + wdata <- rSOILWAT2::dbW_imputeWeather( + weatherData = xdf3, + use_wg = TRUE, + method_after_wg = "interp", + seed = 123L ) - if (NROW(needs_locf) > 0) { - tmp2 <- rSW2utils::impute_df( - xdf4[, ids_dif, drop = FALSE], - imputation_type = "locf" - ) - - xdf4[, ids_dif][needs_locf] <- tmp2[needs_locf] - } - - wdata <- rSOILWAT2::dbW_dataframe_to_weatherData(xdf4, round = 4L) # Check that weather data is well-formed stopifnot(rSOILWAT2::dbW_check_weatherData(wdata)) From c99c99454d3d5ebd392dc576f62ba41108551889 Mon Sep 17 00:00:00 2001 From: Daniel Schlaepfer Date: Mon, 30 Oct 2023 11:37:10 -0400 Subject: [PATCH 5/7] New `dbW_substituteWeather()` `dbW_substituteWeather()` replaces missing weather values in one weather data object with values from a second weather data object --- NAMESPACE | 1 + NEWS.md | 2 + R/swWeatherGenerator.R | 142 ++++++++++++++++++++++++++++++ man/dbW_substituteWeather.Rd | 81 +++++++++++++++++ tests/testthat/test_WeatherData.R | 89 +++++++++++++++++++ 5 files changed, 315 insertions(+) create mode 100644 man/dbW_substituteWeather.Rd diff --git a/NAMESPACE b/NAMESPACE index 9cd514ee..535f061a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -59,6 +59,7 @@ export(dbW_has_weatherData) export(dbW_have_sites_all_weatherData) export(dbW_imputeWeather) export(dbW_setConnection) +export(dbW_substituteWeather) export(dbW_updateSites) export(dbW_upgrade_to_rSOILWAT2) export(dbW_upgrade_v1to2) diff --git a/NEWS.md b/NEWS.md index c1dc2f76..1e559958 100644 --- a/NEWS.md +++ b/NEWS.md @@ -30,6 +30,8 @@ * New `dbW_imputeWeather()` replaces missing weather values using using the weather generator and using functionality by `rSW2utils::impute_df()` (@dschlaep). +* New `dbW_substituteWeather()` replaces missing weather values in one + weather data object with values from a second weather data object (@dschlaep). # rSOILWAT2 v6.0.2 diff --git a/R/swWeatherGenerator.R b/R/swWeatherGenerator.R index b6867cb1..fa12ec9b 100644 --- a/R/swWeatherGenerator.R +++ b/R/swWeatherGenerator.R @@ -1339,3 +1339,145 @@ dbW_imputeWeather <- function( dbW_dataframe_to_weatherData(weatherData) } } + + +#' Replace missing values with values from another weather data set +#' +#' @inheritParams dbW_generateWeather +#' @param subData A weather data object. +#' @param vars_substitute Names of variables for which missing values +#' in `weatherData` should be replaced by values from `subData`. +#' If `NULL`, then all weather variables (i.e., [weather_dataColumns()]). +#' @param by Names of variables used to match days (rows) between +#' `weatherData` and `subData`. If `NULL`, then all variables occurring both +#' in `weatherData` and `subData` that are not weather variables. +#' @param by_weatherData See `by`. +#' @param by_subData See `by`. +#' @param return_weatherDF A logical value. See section "Value". +#' +#' @return An updated copy of `weatherData` where missing values have been +#' replaced by corresponding values from `subData`. +#' If `return_weatherDF` is `TRUE`, then the result is converted to a +#' data frame where columns represent weather variables. +#' If `return_weatherDF` is `FALSE`, then the result is +#' a list of elements of class [`swWeatherData`]. +#' +#' @seealso [dbW_generateWeather()], [dbW_imputeWeather()] +#' +#' @examples +#' # Load example data +#' path_demo <- system.file("extdata", "example1", package = "rSOILWAT2") +#' dif <- c(rep(TRUE, 3L), rep(FALSE, 11L)) +#' dif[13L] <- TRUE # ACTUAL_VP +#' dif[14L] <- TRUE # SHORT_WR, desc_rsds = 2 +#' wdata <- getWeatherData_folders( +#' LookupWeatherFolder = file.path(path_demo, "Input"), +#' weatherDirName = "data_weather_daymet", +#' filebasename = "weath", +#' startYear = 1980, +#' endYear = 1981, +#' dailyInputFlags = dif, +#' method = "C" +#' ) +#' x0 <- x <- dbW_weatherData_to_dataframe(wdata) +#' dif0 <- calc_dailyInputFlags(x0) +#' +#' # Set June-August of 1980 as missing +#' ids_1980 <- x[, "Year"] == 1980 +#' ids_missing <- ids_1980 & x[, "DOY"] >= 153 & x[, "DOY"] <= 244 +#' x[ids_missing, -(1:2)] <- NA +#' +#' # Substitute missing values +#' all.equal( +#' dbW_substituteWeather(x, x0[ids_1980, ], return_weatherDF = TRUE), +#' x0 +#' ) +#' +#' +#' @md +#' @export +dbW_substituteWeather <- function( + weatherData, + subData, + vars_substitute = NULL, + by = NULL, + by_weatherData = by, + by_subData = by, + return_weatherDF = FALSE +) { + #--- Convert to data frames + if (dbW_check_weatherData(weatherData, check_all = FALSE)) { + weatherData <- dbW_weatherData_to_dataframe(weatherData) + } + + if (dbW_check_weatherData(subData, check_all = FALSE)) { + subData <- dbW_weatherData_to_dataframe(subData) + } + + #--- Align days (rows) and variables (columns) + vars_both <- intersect(colnames(weatherData), colnames(subData)) + + if (is.null(vars_substitute)) { + vars_req <- vars_both + } else { + vars_req <- intersect(vars_both, vars_substitute) + if (length(vars_req) != length(vars_substitute)) { + warning( + "Not all requested variables present in both datasets." + ) + } + } + + vars_meteo <- intersect(vars_req, weather_dataColumns()) + + if (is.null(by_weatherData)) { + by_weatherData <- by_subData + } + + if (is.null(by_subData)) { + by_subData <- by_weatherData + } + + if (is.null(by_weatherData) && is.null(by_subData)) { + by_weatherData <- by_subData <- setdiff(vars_both, weather_dataColumns()) + } + + if ( + any( + length(by_weatherData) == 0L, + length(by_weatherData) != length(by_subData), + !all(by_weatherData %in% colnames(weatherData)), + !all(by_subData %in% colnames(subData)) + ) + ) { + stop("Insufficient/bad information to match days.") + } + + wdids <- do.call( + paste, + args = c(as.list(as.data.frame(weatherData)[by_weatherData]), sep = "-") + ) + sdids <- do.call( + paste, + args = c(as.list(as.data.frame(subData)[by_subData]), sep = "-") + ) + + ids <- match(wdids, sdids, nomatch = 0L) + idsnn <- ids > 0L + + if (!any(idsnn)) { + warning("No matching days found.") + } + + needsSub <- is_missing_weather(weatherData[idsnn, vars_meteo, drop = FALSE]) + + weatherData[idsnn, vars_meteo][needsSub] <- subData[ids, vars_meteo][needsSub] + + + #--- Return + if (isTRUE(as.logical(return_weatherDF[[1L]]))) { + weatherData + } else { + dbW_dataframe_to_weatherData(weatherData) + } +} diff --git a/man/dbW_substituteWeather.Rd b/man/dbW_substituteWeather.Rd new file mode 100644 index 00000000..f33ca680 --- /dev/null +++ b/man/dbW_substituteWeather.Rd @@ -0,0 +1,81 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/swWeatherGenerator.R +\name{dbW_substituteWeather} +\alias{dbW_substituteWeather} +\title{Replace missing values with values from another weather data set} +\usage{ +dbW_substituteWeather( + weatherData, + subData, + vars_substitute = NULL, + by = NULL, + by_weatherData = by, + by_subData = by, + return_weatherDF = FALSE +) +} +\arguments{ +\item{weatherData}{A list of elements of class \code{\link{swWeatherData}} +or a data frame as returned by \code{\link[=dbW_weatherData_to_dataframe]{dbW_weatherData_to_dataframe()}}.} + +\item{subData}{A weather data object.} + +\item{vars_substitute}{Names of variables for which missing values +in \code{weatherData} should be replaced by values from \code{subData}. +If \code{NULL}, then all weather variables (i.e., \code{\link[=weather_dataColumns]{weather_dataColumns()}}).} + +\item{by}{Names of variables used to match days (rows) between +\code{weatherData} and \code{subData}. If \code{NULL}, then all variables occurring both +in \code{weatherData} and \code{subData} that are not weather variables.} + +\item{by_weatherData}{See \code{by}.} + +\item{by_subData}{See \code{by}.} + +\item{return_weatherDF}{A logical value. See section "Value".} +} +\value{ +An updated copy of \code{weatherData} where missing values have been +replaced by corresponding values from \code{subData}. +If \code{return_weatherDF} is \code{TRUE}, then the result is converted to a +data frame where columns represent weather variables. +If \code{return_weatherDF} is \code{FALSE}, then the result is +a list of elements of class \code{\link{swWeatherData}}. +} +\description{ +Replace missing values with values from another weather data set +} +\examples{ +# Load example data +path_demo <- system.file("extdata", "example1", package = "rSOILWAT2") +dif <- c(rep(TRUE, 3L), rep(FALSE, 11L)) +dif[13L] <- TRUE # ACTUAL_VP +dif[14L] <- TRUE # SHORT_WR, desc_rsds = 2 +wdata <- getWeatherData_folders( + LookupWeatherFolder = file.path(path_demo, "Input"), + weatherDirName = "data_weather_daymet", + filebasename = "weath", + startYear = 1980, + endYear = 1981, + dailyInputFlags = dif, + method = "C" +) +x0 <- x <- dbW_weatherData_to_dataframe(wdata) +dif0 <- calc_dailyInputFlags(x0) + +# Set June-August of 1980 as missing +ids_1980 <- x[, "Year"] == 1980 +ids_missing <- ids_1980 & x[, "DOY"] >= 153 & x[, "DOY"] <= 244 +x[ids_missing, -(1:2)] <- NA + +# Substitute missing values +all.equal( + dbW_substituteWeather(x, x0[ids_1980, ], return_weatherDF = TRUE), + x0 +) + + +} +\seealso{ +\code{\link[=dbW_generateWeather]{dbW_generateWeather()}}, \code{\link[=dbW_imputeWeather]{dbW_imputeWeather()}} +} diff --git a/tests/testthat/test_WeatherData.R b/tests/testthat/test_WeatherData.R index e8e48c48..036b02a7 100644 --- a/tests/testthat/test_WeatherData.R +++ b/tests/testthat/test_WeatherData.R @@ -255,3 +255,92 @@ test_that("Weather data object conversions", { expect_identical(res, rSOILWAT2::weatherData) }) + + + + +test_that("Weather data substitution", { + # Load example data + path_demo <- system.file("extdata", "example1", package = "rSOILWAT2") + dif <- c(rep(TRUE, 3L), rep(FALSE, 11L)) + dif[13L] <- TRUE # ACTUAL_VP + dif[14L] <- TRUE # SHORT_WR, desc_rsds = 2 + wdata <- rSOILWAT2::getWeatherData_folders( + LookupWeatherFolder = file.path(path_example1, "Input"), + weatherDirName = grep( + "data_weather_daymet", + x = dir_weather, + value = TRUE, + fixed = TRUE + ), + filebasename = "weath", + startYear = 1980, + endYear = 1981, + dailyInputFlags = dif + ) + + # Prepare example data + x0 <- x <- dbW_weatherData_to_dataframe(wdata) + dif0 <- calc_dailyInputFlags(x0) + + # Set June-August of 1980 as missing + ids_1980 <- x[, "Year"] == 1980 + ids_missing <- ids_1980 & x[, "DOY"] >= 153 & x[, "DOY"] <= 244 + x[ids_missing, -(1:2)] <- NA + + # Test: substitute missing values of all variables + expect_identical( + dbW_substituteWeather(x, x0[ids_1980, ], return_weatherDF = TRUE), + x0 + ) + + # Test: substitute missing values of some variables + var_test <- "shortWR" + expect_identical( + dbW_substituteWeather( + x, + subData = x0[ids_1980, ], + vars_substitute = var_test, + return_weatherDF = TRUE + )[, var_test], + x0[, var_test] + ) + + # Test: substitute missing values if only some variables are available + vars_has <- c("Year", "DOY", "Tmax_C", "shortWR") + expect_identical( + dbW_substituteWeather( + x, + subData = x0[ids_1980, vars_has], + return_weatherDF = TRUE + )[, vars_has], + x0[, vars_has] + ) + + expect_warning( + dbW_substituteWeather( + x, + subData = x0[ids_1980, vars_has], + vars_substitute = weather_dataColumns(), + return_weatherDF = TRUE + ), + regexp = "Not all requested variables present" + ) + + # Test: match rows if "by" variables differ + expect_identical( + dbW_substituteWeather( + x, + subData = data.frame( + annus = x0[, "Year"], + dies = x0[, "DOY"], + x0[, setdiff(colnames(x0), c("Year", "DOY"))] + )[ids_1980, ], + by_weatherData = c("Year", "DOY"), + by_subData = c("annus", "dies"), + return_weatherDF = TRUE + ), + x0 + ) + +}) From 2184235cc8b150e863ccb7347b669264f66668bf Mon Sep 17 00:00:00 2001 From: Daniel Schlaepfer Date: Fri, 3 Nov 2023 15:19:16 -0400 Subject: [PATCH 6/7] New `dbW_fixWeather()` fixes missing weather values using a sequence of approaches Approaches are linear interpolation for short missing spells, possibly fixed values for short missing precipitation spells, substitution from a second weather data object, and finally replacement with long term daily mean values --- NAMESPACE | 1 + NEWS.md | 5 + R/swWeatherGenerator.R | 305 ++++++++++++++++++++++++++++++ man/dbW_fixWeather.Rd | 111 +++++++++++ tests/testthat/test_WeatherData.R | 63 ++++++ 5 files changed, 485 insertions(+) create mode 100644 man/dbW_fixWeather.Rd diff --git a/NAMESPACE b/NAMESPACE index 535f061a..2fdc1aa3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,6 +44,7 @@ export(dbW_deleteSiteData) export(dbW_delete_duplicated_weatherData) export(dbW_disconnectConnection) export(dbW_estimate_WGen_coefs) +export(dbW_fixWeather) export(dbW_generateWeather) export(dbW_getIDs) export(dbW_getScenarioId) diff --git a/NEWS.md b/NEWS.md index 1e559958..baba9383 100644 --- a/NEWS.md +++ b/NEWS.md @@ -32,6 +32,11 @@ using functionality by `rSW2utils::impute_df()` (@dschlaep). * New `dbW_substituteWeather()` replaces missing weather values in one weather data object with values from a second weather data object (@dschlaep). +* New `dbW_fixWeather()` fixes missing weather values using a sequence of + approaches including linear interpolation for short missing spells, + a fixed value for short spells of missing precipitation (optionally), + substitution from a second weather data object, and + replacement with long term daily mean values (@dschlaep). # rSOILWAT2 v6.0.2 diff --git a/R/swWeatherGenerator.R b/R/swWeatherGenerator.R index fa12ec9b..e5ac54d7 100644 --- a/R/swWeatherGenerator.R +++ b/R/swWeatherGenerator.R @@ -1481,3 +1481,308 @@ dbW_substituteWeather <- function( dbW_dataframe_to_weatherData(weatherData) } } + + + +#' Fix weather data +#' +#' Missing values are `"fixed"` with the following approach: +#' 1. `weatherData` is formatted for `rSOILWAT2`, i.e., converted to a +#' Gregorian calendar and required but missing variables added. +#' 2. Short spells of missing values +#' (consecutive days shorter than `nmax_interp`) are linearly interpolated +#' from adjacent non-missing values +#' (meta data tag `"interpolateLinear (<= X days)"`) +#' 3. Short spells of missing precipitation values are +#' * Linearly interpolated if `precip_lt_nmax` is `NA` +#' * Set to a fixed numeric value of `precip_lt_nmax` +#' (meta data tag `"fixedValue"`) +#' * Substituted with values from `subData` if `precip_lt_nmax` is `Inf` +#' (see next point) +#' 4. Values from a second weather data object `subData` are used to replace +#' (meta data tag `substituteData"`): +#' * Missing precipitation values (if `precip_lt_nmax` is `Inf`) +#' * Values before first day with any non-missing values +#' * Variables absent in `weatherData` and present in `subData` +#' 5. Long-term daily means are used to replace any remaining missing values +#' (meta data tag `"longTermDailyMean"`); +#' for instance, this approach may be applied for +#' * Values of variables that are present in `weatherData` and +#' absent in `subData`, before first day with any non-missing values +#' * Values after end of available values in both `weatherData` and +#' `subData` +#' +#' @inheritParams dbW_substituteWeather +#' @inheritParams dbW_convert_to_GregorianYears +#' @param nmax_interp An integer value. Maximum spell length of missing values +#' for which linear interpolation is applied. +#' @param precip_lt_nmax A numeric value. Should short spells of +#' missing precipitation values be +#' linearly interpolated (if `NA`), +#' substituted with values from `subData` (if `Inf`), or +#' replaced by a fixed numeric value (default is 0)? +#' +#' @return A list with two named elements +#' * `"weatherData"`: An updated copy of the input `weatherData` +#' where missing values have been replaced. +#' If `return_weatherDF` is `TRUE`, then the result is converted to a +#' data frame where columns represent weather variables. +#' If `return_weatherDF` is `FALSE`, then the result is +#' a list of elements of class [`swWeatherData`]. +#' * `"meta"`: a data frame with the same dimensions as `"weatherData"` +#' with tags indicating which approach was used to replaced missing values +#' in corresponding cells of `weatherData` (see section `Details`) +#' +#' @seealso [dbW_imputeWeather()], [dbW_substituteWeather()], +#' [dbW_generateWeather()] +#' +#' @examples +#' x0 <- x <- dbW_weatherData_to_dataframe(rSOILWAT2::weatherData) +#' +#' tmp <- x[, "Year"] == 1981 +#' ids_to_interp <- tmp & x[, "DOY"] >= 144 & x[, "DOY"] <= 145 +#' x[ids_to_interp, -(1:2)] <- NA +#' +#' tmp <- x[, "Year"] == 1980 +#' ids_to_sub <- tmp & x[, "DOY"] >= 153 & x[, "DOY"] <= 244 +#' x[ids_to_sub, -(1:2)] <- NA +#' +#' xf <- dbW_fixWeather(x, x0, return_weatherDF = TRUE) +#' all.equal( +#' xf[["weatherData"]][!ids_to_interp, ], +#' as.data.frame(x0)[!ids_to_interp, ] +#' ) +#' table(xf[["meta"]]) +#' +#' @md +#' @export +dbW_fixWeather <- function( + weatherData, + subData = NULL, + new_startYear = NULL, + new_endYear = NULL, + nmax_interp = 7L, + precip_lt_nmax = 0, + return_weatherDF = FALSE +) { + nmax_interp <- as.integer(nmax_interp) + vars_time <- c("Year", "DOY") + + #--- Convert to data frames and add missing variables (if any) + weatherData <- if (dbW_check_weatherData(weatherData, check_all = FALSE)) { + dbW_weatherData_to_dataframe(weatherData) + } else { + upgrade_weatherDF(weatherData) + } + + stopifnot( + c(vars_time, weather_dataColumns()) %in% colnames(weatherData) + ) + + #--- Locate years + if (is.null(new_startYear) || is.null(new_endYear)) { + years <- get_years_from_weatherDF( + weatherDF = weatherData, + years = NULL + )[["years"]] + + if (is.null(new_startYear)) new_startYear <- years[[1L]] + if (is.null(new_endYear)) new_endYear <- years[[length(years)]] + } + + + #--- Add missing days to complete full requested calendar years + weatherData1 <- dbW_convert_to_GregorianYears( + weatherData = weatherData, + new_startYear = new_startYear, + new_endYear = new_endYear, + type = "asis" + ) + + is_miss1 <- is_missing_weather(weatherData1[, weather_dataColumns()]) + meta <- array(dim = dim(is_miss1), dimnames = dimnames(is_miss1)) + + + #--- Determine first and last day with at least one observation + tmp <- which(rowSums(!is_miss1) > 0L) + ids <- tmp[c(1L, length(tmp))] + + ids_startend <- + # before start + (weatherData1[["Year"]] < weatherData1[ids[[1L]], "Year"]) | + (weatherData1[["Year"]] == weatherData1[ids[[1L]], "Year"] & + weatherData1[["DOY"]] < weatherData1[ids[[1L]], "DOY"]) | + # after end + (weatherData1[["Year"]] == weatherData1[ids[[2L]], "Year"] & + weatherData1[["DOY"]] > weatherData1[ids[[2L]], "DOY"]) | + (weatherData1[["Year"]] > weatherData1[ids[[2L]], "Year"]) + + + + #--- Interpolate short missing runs + weatherData2 <- suppressWarnings( + dbW_imputeWeather( + weatherData1, + use_wg = FALSE, + method_after_wg = "interp", + nmax_run = nmax_interp, + return_weatherDF = TRUE + ) + ) + + # special treatment of precipitation + # (NA = interpolate; x = fixedValue; Inf = subData) + is_pptFixedValue <- NULL + + if (!isTRUE(is.na(precip_lt_nmax[[1L]]))) { + + if (isTRUE(is.finite(precip_lt_nmax[[1L]]))) { + # Find linear interpolated precip values and replace with fixed value + ppt_miss2a <- is_missing_weather(weatherData2[, "PPT_cm"]) + is_pptFixedValue <- which(!ppt_miss2a[, 1L] & is_miss1[, "PPT_cm"]) + weatherData2[["PPT_cm"]][is_pptFixedValue] <- precip_lt_nmax[[1L]] + + } else { + # Reset for replacement by subData in following step + weatherData2[["PPT_cm"]] <- weatherData1[["PPT_cm"]] + } + } + + # Set values outside original time periods to missing + weatherData2[ids_startend, weather_dataColumns()] <- NA + + is_miss2 <- is_missing_weather(weatherData2[, weather_dataColumns()]) + meta[!is_miss2 & is_miss1] <- sprintf( + "interpolateLinear (<= %d days)", + nmax_interp + ) + + if (length(is_pptFixedValue) > 0L) { + meta[is_pptFixedValue, "PPT_cm"] <- "fixedValue" + } + + + #--- Use subData + # * for missing values before first observation day + # * for variables missing weatherData but available in subData + # * for missing precipitation + if (is.null(subData)) { + weatherData3 <- weatherData2 + is_miss3 <- is_miss2 + + } else { + subData <- if (dbW_check_weatherData(subData, check_all = FALSE)) { + dbW_weatherData_to_dataframe(subData) + } else { + upgrade_weatherDF(subData) + } + + stopifnot( + c(vars_time, weather_dataColumns()) %in% colnames(subData) + ) + + subData1 <- dbW_convert_to_GregorianYears( + weatherData = subData, + new_startYear = new_startYear, + new_endYear = new_endYear, + type = "asis" + ) + + weatherData3 <- dbW_substituteWeather( + weatherData = weatherData2, + subData = subData1, + return_weatherDF = TRUE + ) + + is_miss3 <- is_missing_weather(weatherData3[, weather_dataColumns()]) + meta[!is_miss3 & is_miss2] <- "substituteData" + } + + + #--- Use long-term daily means to impute the rest + # - for variables not in subData before first observed value in weatherData + # - values after end of observed values in weatherData + dif_wd3 <- rSOILWAT2::calc_dailyInputFlags(weatherData3) + vars_wd3 <- names(dif_wd3)[dif_wd3] + + if (!any(is_missing_weather(weatherData3[, vars_wd3]))) { + weatherData4 <- weatherData3 + + } else { + daymeans <- data.frame( + Year = NA, + aggregate( + weatherData1[, weather_dataColumns()], + by = weatherData1["DOY"], + FUN = mean, + na.rm = TRUE + ) + ) + + + if (!is.null(subData)) { + dif_wd <- rSOILWAT2::calc_dailyInputFlags(weatherData1) + dif_sd <- rSOILWAT2::calc_dailyInputFlags(subData) + + tmp_vars <- setdiff(names(dif_sd)[dif_sd], names(dif_wd)[dif_wd]) + + if (length(tmp_vars) > 0L) { + sd_daymeans <- data.frame( + Year = NA, + aggregate( + subData[, weather_dataColumns()], + by = subData["DOY"], + FUN = mean, + na.rm = TRUE + ) + ) + + daymeans[, tmp_vars] <- sd_daymeans[, tmp_vars] + } + } + + # Linear interpolate any missing long-term daily means + daymeans2 <- suppressWarnings( + dbW_imputeWeather( + daymeans, + use_wg = FALSE, + method_after_wg = "interp", + nmax_run = Inf, + return_weatherDF = TRUE + ) + ) + + # Create object with long-term daily means repeated for each requested year + daymeans_years <- do.call( + rbind, + args = lapply( + seq(new_startYear, new_endYear), + function(year) { + tmp <- daymeans2 + tmp[["Year"]] <- year + tmp + } + ) + ) + + weatherData4 <- dbW_substituteWeather( + weatherData = weatherData3, + subData = daymeans_years, + return_weatherDF = TRUE + ) + + is_miss4 <- is_missing_weather(weatherData4[, weather_dataColumns()]) + meta[!is_miss4 & is_miss3] <- "longTermDailyMean" + } + + + #--- Return + list( + weatherData = if (isTRUE(as.logical(return_weatherDF[[1L]]))) { + weatherData4 + } else { + dbW_dataframe_to_weatherData(weatherData4) + }, + meta = meta + ) +} diff --git a/man/dbW_fixWeather.Rd b/man/dbW_fixWeather.Rd new file mode 100644 index 00000000..17933f1e --- /dev/null +++ b/man/dbW_fixWeather.Rd @@ -0,0 +1,111 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/swWeatherGenerator.R +\name{dbW_fixWeather} +\alias{dbW_fixWeather} +\title{Fix weather data} +\usage{ +dbW_fixWeather( + weatherData, + subData = NULL, + new_startYear = NULL, + new_endYear = NULL, + nmax_interp = 7L, + precip_lt_nmax = 0, + return_weatherDF = FALSE +) +} +\arguments{ +\item{weatherData}{A list of elements of class \code{\link{swWeatherData}} +or a data frame as returned by \code{\link[=dbW_weatherData_to_dataframe]{dbW_weatherData_to_dataframe()}}.} + +\item{subData}{A weather data object.} + +\item{new_startYear}{An integer value. The first Calendar year of the new +time period. If \code{NULL}, then the first year of \code{weatherData}.} + +\item{new_endYear}{An integer value. The last Calendar year of the new +time period. If \code{NULL}, then the last year of \code{weatherData}.} + +\item{nmax_interp}{An integer value. Maximum spell length of missing values +for which linear interpolation is applied.} + +\item{precip_lt_nmax}{A numeric value. Should short spells of +missing precipitation values be +linearly interpolated (if \code{NA}), +substituted with values from \code{subData} (if \code{Inf}), or +replaced by a fixed numeric value (default is 0)?} + +\item{return_weatherDF}{A logical value. See section "Value".} +} +\value{ +A list with two named elements +\itemize{ +\item \code{"weatherData"}: An updated copy of the input \code{weatherData} +where missing values have been replaced. +If \code{return_weatherDF} is \code{TRUE}, then the result is converted to a +data frame where columns represent weather variables. +If \code{return_weatherDF} is \code{FALSE}, then the result is +a list of elements of class \code{\link{swWeatherData}}. +\item \code{"meta"}: a data frame with the same dimensions as \code{"weatherData"} +with tags indicating which approach was used to replaced missing values +in corresponding cells of \code{weatherData} (see section \code{Details}) +} +} +\description{ +Missing values are \code{"fixed"} with the following approach: +\enumerate{ +\item \code{weatherData} is formatted for \code{rSOILWAT2}, i.e., converted to a +Gregorian calendar and required but missing variables added. +\item Short spells of missing values +(consecutive days shorter than \code{nmax_interp}) are linearly interpolated +from adjacent non-missing values +(meta data tag \code{"interpolateLinear (<= X days)"}) +\item Short spells of missing precipitation values are +\itemize{ +\item Linearly interpolated if \code{precip_lt_nmax} is \code{NA} +\item Set to a fixed numeric value of \code{precip_lt_nmax} +(meta data tag \code{"fixedValue"}) +\item Substituted with values from \code{subData} if \code{precip_lt_nmax} is \code{Inf} +(see next point) +} +\item Values from a second weather data object \code{subData} are used to replace +(meta data tag \verb{substituteData"}): +\itemize{ +\item Missing precipitation values (if \code{precip_lt_nmax} is \code{Inf}) +\item Values before first day with any non-missing values +\item Variables absent in \code{weatherData} and present in \code{subData} +} +\item Long-term daily means are used to replace any remaining missing values +(meta data tag \code{"longTermDailyMean"}); +for instance, this approach may be applied for +\itemize{ +\item Values of variables that are present in \code{weatherData} and +absent in \code{subData}, before first day with any non-missing values +\item Values after end of available values in both \code{weatherData} and +\code{subData} +} +} +} +\examples{ +x0 <- x <- dbW_weatherData_to_dataframe(rSOILWAT2::weatherData) + +tmp <- x[, "Year"] == 1981 +ids_to_interp <- tmp & x[, "DOY"] >= 144 & x[, "DOY"] <= 145 +x[ids_to_interp, -(1:2)] <- NA + +tmp <- x[, "Year"] == 1980 +ids_to_sub <- tmp & x[, "DOY"] >= 153 & x[, "DOY"] <= 244 +x[ids_to_sub, -(1:2)] <- NA + +xf <- dbW_fixWeather(x, x0, return_weatherDF = TRUE) +all.equal( + xf[["weatherData"]][!ids_to_interp, ], + as.data.frame(x0)[!ids_to_interp, ] +) +table(xf[["meta"]]) + +} +\seealso{ +\code{\link[=dbW_imputeWeather]{dbW_imputeWeather()}}, \code{\link[=dbW_substituteWeather]{dbW_substituteWeather()}}, +\code{\link[=dbW_generateWeather]{dbW_generateWeather()}} +} diff --git a/tests/testthat/test_WeatherData.R b/tests/testthat/test_WeatherData.R index 036b02a7..c1ec7a28 100644 --- a/tests/testthat/test_WeatherData.R +++ b/tests/testthat/test_WeatherData.R @@ -344,3 +344,66 @@ test_that("Weather data substitution", { ) }) + + +test_that("Weather data fixing", { + x0 <- x <- as.data.frame(dbW_weatherData_to_dataframe(rSOILWAT2::weatherData)) + dif <- calc_dailyInputFlags(x0) + vars <- names(dif)[dif] + + + #--- * Check no change to no missing values ------ + xf <- dbW_fixWeather(x0, return_weatherDF = TRUE) + expect_identical(xf[["weatherData"]], x0) + expect_true(all(is.na(xf[["meta"]]))) + + + #--- * Check interpolation and substitution ------ + # * Expect short missing spell to interpolate (except precipitation) + # Set May 23-24 of 1981 as missing + tmp <- x[, "Year"] == 1981 + ids_to_interp <- tmp & x[, "DOY"] >= 144 & x[, "DOY"] <= 145 + x[ids_to_interp, -(1:2)] <- NA + + # * Expect long missing spell to substitute + # Set June-August of 1980 as missing + tmp <- x[, "Year"] == 1980 + ids_to_sub <- tmp & x[, "DOY"] >= 153 & x[, "DOY"] <= 244 + x[ids_to_sub, -(1:2)] <- NA + + + xf <- dbW_fixWeather( + weatherData = x, + subData = x0, + new_endYear = max(x[["Year"]]) + 1L, # expect long term daily mean + nmax_interp = 5L, + return_weatherDF = TRUE + ) + + expect_false(anyNA(xf[["weatherData"]][, vars])) + + ids_has <- seq_len(nrow(x0)) + expect_identical( + xf[["weatherData"]][ids_has[!ids_to_interp], vars], + x0[!ids_to_interp, vars] + ) + + tmpc <- table(xf[["meta"]]) + expect_identical( + tmpc[["interpolateLinear (<= 5 days)"]], + sum(ids_to_interp) * length(setdiff(vars, "PPT_cm")) + ) + expect_identical( + tmpc[["fixedValue"]], + sum(ids_to_interp) # precipitation + ) + expect_identical( + tmpc[["substituteData"]], + sum(ids_to_sub) * length(vars) + ) + expect_identical( + tmpc[["longTermDailyMean"]], + 365L * length(vars) + ) + +}) From e0017bc0ae1d08ccfae7f83eeda8366fca085cb5 Mon Sep 17 00:00:00 2001 From: Daniel Schlaepfer Date: Fri, 1 Dec 2023 18:07:18 -0500 Subject: [PATCH 7/7] Obtain external weather data and format for rSOILWAT2 New family of functions `sw_meteo_obtain` that obtain (download) weather data from external sources and prepare for use by `"rSOILWAT2"` * `sw_meteo_obtain_DayMet()` obtains and formats data from `"Daymet"` * `sw_meteo_obtain_SCAN()` obtains and formats data from `"SCAN"` - vignette "demo" is simplified thanks to `sw_meteo_obtain_DayMet()` --- NAMESPACE | 2 + NEWS.md | 19 +- R/sw_WeatherExtract.R | 316 ++++++++++++++++++++++++ man/sw_meteo_obtain.Rd | 119 +++++++++ tests/testthat/test_WeatherExtraction.R | 60 +++++ vignettes/rSOILWAT2_demo.Rmd | 61 +---- 6 files changed, 521 insertions(+), 56 deletions(-) create mode 100644 R/sw_WeatherExtract.R create mode 100644 man/sw_meteo_obtain.Rd create mode 100644 tests/testthat/test_WeatherExtraction.R diff --git a/NAMESPACE b/NAMESPACE index 2fdc1aa3..bd73f88f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -141,6 +141,8 @@ export(sw_dailyC4_TempVar) export(sw_exec) export(sw_inputData) export(sw_inputDataFromFiles) +export(sw_meteo_obtain_DayMet) +export(sw_meteo_obtain_SCAN) export(sw_out_flags) export(sw_outputData) export(sw_verbosity) diff --git a/NEWS.md b/NEWS.md index baba9383..6e5b5a80 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,13 +1,5 @@ # rSOILWAT2 v6.0.4-9000 - - -# rSOILWAT2 v6.0.3 * This version produces the same output as the previous version. -* Update `SOILWAT2` to v7.2.0 which improves error handling and fixes - memory leaks on error (#239; @dschlaep, @N1ckP3rsl3y). - -## Bug fixes -* Code no longer requires (unused) `pcg` header (@dschlaep). ## New features * New `upgrade_weatherDF()` adds requested weather columns to a data frame @@ -37,6 +29,17 @@ a fixed value for short spells of missing precipitation (optionally), substitution from a second weather data object, and replacement with long term daily mean values (@dschlaep). +* New family of functions `sw_meteo_obtain` that obtain (download) weather + data from external sources and prepare for use by `"rSOILWAT2"` (@dschlaep): + * New `sw_meteo_obtain_DayMet()` obtains and formats data from `"Daymet"` + * New `sw_meteo_obtain_SCAN()` obtains and formats data from `"SCAN"` + + +# rSOILWAT2 v6.0.3 +* This version produces the same output as the previous version. +* Update `SOILWAT2` to v7.2.0 which improves error handling and fixes + memory leaks on error (#239; @dschlaep, @N1ckP3rsl3y). +* Code no longer requires (unused) `pcg` header (@dschlaep). # rSOILWAT2 v6.0.2 diff --git a/R/sw_WeatherExtract.R b/R/sw_WeatherExtract.R new file mode 100644 index 00000000..4f597a6f --- /dev/null +++ b/R/sw_WeatherExtract.R @@ -0,0 +1,316 @@ +#--- sw_meteo_obtain ------ + +#' Extract meteorological data from external source and format for `rSOILWAT2` +#' +#' @param x Identifying information for the station/location for which +#' meteorological data is requested, see details. +#' @param start_year A integer value. The first calendar year of the simulation. +#' @param end_year A integer value. The last calendar year of the simulation. +#' @param rawdata A data object retrieved previously. +#' @param ... Additional arguments are ignored. +#' +#' @return A list with the following elements +#' * `"metadata"`: site metadata (if available) +#' * `"rawdata"`: data as downloaded (or argument `rawdata` if provided) +#' * `"weatherDF"`: data frame with weather data formatted for `rSOILWAT2` +#' * `"vals_missing"`: logical matrix indicating which weather values in +#' `"weatherDF"` are missing +#' * `"desc_rsds"`: solar radiation descriptor (if available) +#' * `"use_cloudCoverMonthly"`: flag indicating need for monthly cloud cover +#' * `"use_windSpeedMonthly"`: flag indicating need for monthly wind speed +#' * `"use_humidityMonthly"`: flag indicating need for monthly humidity +#' * `"dailyInputFlags"`: logical vector indicating which weather variables +#' contain at least some values (see [calc_dailyInputFlags()]) +#' +#' @md +#' @name sw_meteo_obtain +NULL + + + +sw_download_DayMet <- function(longitude, latitude, years) { + stopifnot( + requireNamespace("daymetr") + ) + + res <- try( + daymetr::download_daymet( + lat = latitude, + lon = longitude, + start = years[[1L]], + end = years[[length(years)]], + internal = TRUE, + force = TRUE, + simplify = FALSE, + silent = TRUE + ), + silent = TRUE + ) + + if (inherits(res, "try-error")) { + stop("Download DayMet data failed: ", res) + } + + res +} + + +#' @rdname sw_meteo_obtain +#' +#' @section Details: +#' `sw_meteo_obtain_DayMet()` uses data from +#' [`DayMet ORNL DAAC`](https://daymet.ornl.gov/single-pixel/) +#' via [daymetr::download_daymet()]. +#' The argument `x` is a named vector with `"longitude"` and `"latitude"` in +#' decimal degrees. +#' +#' @examples +#' ## Example: Daymet weather for "Mccracken Mesa" location +#' ## (see `mm_scan[["metadata"]]`) +#' if (requireNamespace("curl") && curl::has_internet()) { +#' mm_dm <- rSOILWAT2::sw_meteo_obtain_DayMet( +#' x = c(longitude = -109.3378, latitude = 37.44671), +#' start_year = 2015, +#' end_year = 2023 +#' ) +#' +#' # Fill in missing values +#' mm_dm_wdata <- rSOILWAT2::dbW_fixWeather(mm_dm[["weatherDF"]]) +#' +#' # Prepare weather setup for a SOILWAT2 simulation +#' swin <- rSOILWAT2::sw_exampleData +#' rSOILWAT2::swYears_EndYear(swin) <- 2023 +#' rSOILWAT2::swYears_StartYear(swin) <- 2015 +#' swin@weather@desc_rsds <- mm_dm[["desc_rsds"]] +#' swin@weather@use_cloudCoverMonthly <- mm_dm[["use_cloudCoverMonthly"]] +#' swin@weather@use_windSpeedMonthly <- mm_dm[["use_windSpeedMonthly"]] +#' swin@weather@use_humidityMonthly <- mm_dm[["use_humidityMonthly"]] +#' swin@weather@dailyInputFlags <- mm_dm[["dailyInputFlags"]] +#' +#' # Run simulation (after providing inputs for CO2, etc.) +#' swout <- try( +#' rSOILWAT2::sw_exec( +#' inputData = swin, +#' weatherList = mm_dm_wdata[["weatherData"]], +#' quiet = TRUE +#' ), +#' silent = TRUE +#' ) +#' } +#' +#' @md +#' @export +sw_meteo_obtain_DayMet <- function( + x, + start_year, + end_year, + rawdata = NULL, + ... +) { + + #--- Download NRCS station data (if needed) ------ + if (is.null(rawdata)) { + rawdata <- sw_download_DayMet( + longitude = x[["longitude"]], + latitude = x[["latitude"]], + years = start_year:end_year + ) + } + + #--- Prepare requested station data ------ + + #--- * Daily weather data (update with additional variables) ------ + stopifnot( + length(rawdata[["data"]][["tmax..deg.c."]]) > 0L + ) + + tmpm <- data.frame( + Year = rawdata[["data"]][["year"]], + DOY = rawdata[["data"]][["yday"]], + Tmax_C = rawdata[["data"]][["tmax..deg.c."]], + Tmin_C = rawdata[["data"]][["tmin..deg.c."]], + PPT_cm = rawdata[["data"]][["prcp..mm.day."]] / 10, # convert mm -> cm + actVP_kPa = 1e-3 * rawdata[["data"]][["vp..Pa."]], # convert Pa -> kPa + shortWR = + 1e-6 * rawdata[["data"]][["srad..W.m.2."]] * + rawdata[["data"]][["dayl..s."]] # this will be desc_rsds = 1 + ) + + + # Add missing variables and missing days to complete full calendar years + dm_weather <- dbW_convert_to_GregorianYears( + weatherData = upgrade_weatherDF(tmpm), + new_startYear = start_year, + new_endYear = end_year, + type = "asis" + ) + + vars_meteo <- weather_dataColumns() + dif <- calc_dailyInputFlags(dm_weather, name_data = vars_meteo) + + list( + metadata = rawdata[c("site", "tile", "latitude", "longitude", "altitude")], + rawdata = rawdata, + weatherDF = dm_weather, + vals_missing = is_missing_weather(dm_weather[, vars_meteo, drop = FALSE]), + desc_rsds = 1L, + use_cloudCoverMonthly = FALSE, # use radiation instead + use_windSpeedMonthly = TRUE, + use_humidityMonthly = FALSE, # use vapor pressure instead + dailyInputFlags = dif + ) +} + + + +sw_download_SCAN <- function(nrcs_site_code, years) { + stopifnot(requireNamespace("soilDB")) + + res <- try( + soilDB::fetchSCAN( + site.code = nrcs_site_code, + year = years, + report = "SCAN", + timeseries = "Daily" + ), + silent = TRUE + ) + + if (inherits(res, "try-error")) { + stop("Download NRCS station data failed: ", res) + } + + res +} + + + +#' @rdname sw_meteo_obtain +#' +#' @section Details: +#' `sw_meteo_obtain_SCAN()` uses data from +# nolint start: line_length_linter. +#' [`USDA-NRCS` `SCAN/SNOTEL` stations](https://www.nrcs.usda.gov/resources/data-and-reports/soil-climate-analysis-network) +# nolint end: line_length_linter. +#' via [soilDB::fetchSCAN()]. +#' The argument `x` takes a `NRCS` `SCAN/SNOTEL` station code. +#' +#' @examples +#' ## Example: SCAN station "Mccracken Mesa" +#' if (requireNamespace("curl") && curl::has_internet()) { +#' mm_scan <- rSOILWAT2::sw_meteo_obtain_SCAN( +#' x = 2140, # SCAN station code +#' start_year = 2015, +#' end_year = 2023 +#' ) +#' } +#' +#' if (exists("mm_scan") && exists("mm_dm") && requireNamespace("graphics")) { +#' vars <- c("Tmax_C", "Tmin_C", "PPT_cm") +#' par_prev <- graphics::par(mfrow = grDevices::n2mfrow(length(vars))) +#' +#' for (k in seq_along(vars)) { +#' graphics::plot( +#' x = mm_scan[["weatherDF"]][[vars[[k]]]], +#' y = mm_dm[["weatherDF"]][[vars[[k]]]], +#' xlab = paste("SCAN", vars[[k]]), +#' ylab = paste("DayMet", vars[[k]]) +#' ) +#' graphics::abline(a = 0, b = 1, col = "red") +#' } +#' +#' graphics::par(par_prev) +#' } +#' +#' @md +#' @export +sw_meteo_obtain_SCAN <- function( + x, + start_year, + end_year, + rawdata = NULL, + ... +) { + + #--- Download NRCS station data (if needed) ------ + if (is.null(rawdata)) { + rawdata <- sw_download_SCAN(x, years = start_year:end_year) + } + + #--- Prepare requested station data ------ + + #--- * Daily weather data (update with additional variables) ------ + stopifnot( + nrow(rawdata[["TMAX"]]) > 0L, + nrow(rawdata[["TMIN"]]) > 0L, + nrow(rawdata[["PRCP"]]) > 0L + ) + + + tmp <- merge( + # Daily air temperature maximum [C] + rawdata[["TMAX"]][, c("Date", "value"), drop = FALSE], + # Daily air temperature minimum [C] + rawdata[["TMIN"]][, c("Date", "value"), drop = FALSE], + by = "Date", + suffixes = c("_tmaxC", "_tminC"), + all = TRUE + ) + + tmp <- merge( + tmp, + # Daily precipitation amount [in] + rawdata[["PRCP"]][, c("Date", "value"), drop = FALSE], + by = "Date", + all = TRUE + ) + + tmp_dates <- as.POSIXlt(tmp[["Date"]]) + + tmpm <- data.frame( + Date = tmp[["Date"]], + Year = 1900L + tmp_dates$year, + DOY = 1L + tmp_dates$yday, + Tmax_C = tmp[["value_tmaxC"]], + Tmin_C = tmp[["value_tminC"]], + PPT_cm = 1 / 2.54 * tmp[["value"]], # convert [in] -> [cm] + stringsAsFactors = FALSE + ) + + + if (nrow(rawdata[["WSPDV"]]) > 0L) { + stopifnot("WSPDV is not yet implemented") + + tmp2 <- merge( + tmpm, + # Wind speed + rawdata[["WSPDV"]][, c("Date", "value"), drop = FALSE], + by = "Date", + all = TRUE + ) + } + + + # Add missing variables and missing days to complete full calendar years + scan_weather <- dbW_convert_to_GregorianYears( + weatherData = upgrade_weatherDF(tmpm), + new_startYear = start_year, + new_endYear = end_year, + type = "asis" + ) + + vars_meteo <- weather_dataColumns() + dif <- calc_dailyInputFlags(scan_weather, name_data = vars_meteo) + + list( + metadata = rawdata[["metadata"]], + rawdata = rawdata, + weatherDF = scan_weather, + vals_missing = is_missing_weather(scan_weather[, vars_meteo, drop = FALSE]), + desc_rsds = NA_integer_, + use_cloudCoverMonthly = TRUE, + use_windSpeedMonthly = unname(!dif[which(vars_meteo == "windSpeed_mPERs")]), + use_humidityMonthly = TRUE, + dailyInputFlags = dif + ) +} diff --git a/man/sw_meteo_obtain.Rd b/man/sw_meteo_obtain.Rd new file mode 100644 index 00000000..6e2a816b --- /dev/null +++ b/man/sw_meteo_obtain.Rd @@ -0,0 +1,119 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sw_WeatherExtract.R +\name{sw_meteo_obtain} +\alias{sw_meteo_obtain} +\alias{sw_meteo_obtain_DayMet} +\alias{sw_meteo_obtain_SCAN} +\title{Extract meteorological data from external source and format for \code{rSOILWAT2}} +\usage{ +sw_meteo_obtain_DayMet(x, start_year, end_year, rawdata = NULL, ...) + +sw_meteo_obtain_SCAN(x, start_year, end_year, rawdata = NULL, ...) +} +\arguments{ +\item{x}{Identifying information for the station/location for which +meteorological data is requested, see details.} + +\item{start_year}{A integer value. The first calendar year of the simulation.} + +\item{end_year}{A integer value. The last calendar year of the simulation.} + +\item{rawdata}{A data object retrieved previously.} + +\item{...}{Additional arguments are ignored.} +} +\value{ +A list with the following elements +\itemize{ +\item \code{"metadata"}: site metadata (if available) +\item \code{"rawdata"}: data as downloaded (or argument \code{rawdata} if provided) +\item \code{"weatherDF"}: data frame with weather data formatted for \code{rSOILWAT2} +\item \code{"vals_missing"}: logical matrix indicating which weather values in +\code{"weatherDF"} are missing +\item \code{"desc_rsds"}: solar radiation descriptor (if available) +\item \code{"use_cloudCoverMonthly"}: flag indicating need for monthly cloud cover +\item \code{"use_windSpeedMonthly"}: flag indicating need for monthly wind speed +\item \code{"use_humidityMonthly"}: flag indicating need for monthly humidity +\item \code{"dailyInputFlags"}: logical vector indicating which weather variables +contain at least some values (see \code{\link[=calc_dailyInputFlags]{calc_dailyInputFlags()}}) +} +} +\description{ +Extract meteorological data from external source and format for \code{rSOILWAT2} +} +\section{Details}{ + +\code{sw_meteo_obtain_DayMet()} uses data from +\href{https://daymet.ornl.gov/single-pixel/}{\verb{DayMet ORNL DAAC}} +via \code{\link[daymetr:download_daymet]{daymetr::download_daymet()}}. +The argument \code{x} is a named vector with \code{"longitude"} and \code{"latitude"} in +decimal degrees. + + +\code{sw_meteo_obtain_SCAN()} uses data from +\href{https://www.nrcs.usda.gov/resources/data-and-reports/soil-climate-analysis-network}{\code{USDA-NRCS} \code{SCAN/SNOTEL} stations} +via \code{\link[soilDB:fetchSCAN]{soilDB::fetchSCAN()}}. +The argument \code{x} takes a \code{NRCS} \code{SCAN/SNOTEL} station code. +} + +\examples{ +## Example: Daymet weather for "Mccracken Mesa" location +## (see `mm_scan[["metadata"]]`) +if (requireNamespace("curl") && curl::has_internet()) { + mm_dm <- rSOILWAT2::sw_meteo_obtain_DayMet( + x = c(longitude = -109.3378, latitude = 37.44671), + start_year = 2015, + end_year = 2023 + ) + + # Fill in missing values + mm_dm_wdata <- rSOILWAT2::dbW_fixWeather(mm_dm[["weatherDF"]]) + + # Prepare weather setup for a SOILWAT2 simulation + swin <- rSOILWAT2::sw_exampleData + rSOILWAT2::swYears_EndYear(swin) <- 2023 + rSOILWAT2::swYears_StartYear(swin) <- 2015 + swin@weather@desc_rsds <- mm_dm[["desc_rsds"]] + swin@weather@use_cloudCoverMonthly <- mm_dm[["use_cloudCoverMonthly"]] + swin@weather@use_windSpeedMonthly <- mm_dm[["use_windSpeedMonthly"]] + swin@weather@use_humidityMonthly <- mm_dm[["use_humidityMonthly"]] + swin@weather@dailyInputFlags <- mm_dm[["dailyInputFlags"]] + + # Run simulation (after providing inputs for CO2, etc.) + swout <- try( + rSOILWAT2::sw_exec( + inputData = swin, + weatherList = mm_dm_wdata[["weatherData"]], + quiet = TRUE + ), + silent = TRUE + ) +} + +## Example: SCAN station "Mccracken Mesa" +if (requireNamespace("curl") && curl::has_internet()) { + mm_scan <- rSOILWAT2::sw_meteo_obtain_SCAN( + x = 2140, # SCAN station code + start_year = 2015, + end_year = 2023 + ) +} + +if (exists("mm_scan") && exists("mm_dm") && requireNamespace("graphics")) { + vars <- c("Tmax_C", "Tmin_C", "PPT_cm") + par_prev <- graphics::par(mfrow = grDevices::n2mfrow(length(vars))) + + for (k in seq_along(vars)) { + graphics::plot( + x = mm_scan[["weatherDF"]][[vars[[k]]]], + y = mm_dm[["weatherDF"]][[vars[[k]]]], + xlab = paste("SCAN", vars[[k]]), + ylab = paste("DayMet", vars[[k]]) + ) + graphics::abline(a = 0, b = 1, col = "red") + } + + graphics::par(par_prev) +} + +} diff --git a/tests/testthat/test_WeatherExtraction.R b/tests/testthat/test_WeatherExtraction.R new file mode 100644 index 00000000..e20bec02 --- /dev/null +++ b/tests/testthat/test_WeatherExtraction.R @@ -0,0 +1,60 @@ + +# see sw_meteo_obtain +vars_mm <- c( + "metadata", + "rawdata", + "weatherDF", + "vals_missing", + "desc_rsds", + "use_cloudCoverMonthly", + "use_windSpeedMonthly", + "use_humidityMonthly", + "dailyInputFlags" +) + +expect_obtained_meteo <- function(x) { + expect_false(inherits(!!x, "try-error")) + + expect_named(!!x, expected = vars_mm, ignore.order = TRUE) + + wd <- dbW_dataframe_to_weatherData(x[["weatherDF"]]) + expect_true(dbW_check_weatherData(wd)) + expect_identical(nrow(x[["weatherDF"]]), nrow(x[["vals_missing"]])) +} + + +test_that("Weather data extraction", { + skip_on_ci() + skip_if_offline() + + #--- Daymet weather for "Mccracken Mesa" location + if (requireNamespace("daymetr")) { + mm_dm <- suppressMessages( + try( + rSOILWAT2::sw_meteo_obtain_DayMet( + x = c(longitude = -109.3378, latitude = 37.44671), + start_year = 2015, + end_year = 2023 + ) + ) + ) + + expect_obtained_meteo(mm_dm) + } + + + #--- SCAN station "Mccracken Mesa" + if (requireNamespace("soilDB")) { + mm_scan <- suppressMessages( + try( + rSOILWAT2::sw_meteo_obtain_SCAN( + x = 2140, # SCAN station code + start_year = 2015, + end_year = 2023 + ) + ) + ) + + expect_obtained_meteo(mm_scan) + } +}) diff --git a/vignettes/rSOILWAT2_demo.Rmd b/vignettes/rSOILWAT2_demo.Rmd index 95f37e8c..57e263dd 100644 --- a/vignettes/rSOILWAT2_demo.Rmd +++ b/vignettes/rSOILWAT2_demo.Rmd @@ -247,55 +247,20 @@ You may organize weather data in a variety of ways: curl::has_internet() ) { - # Download from `DayMet` - dm_Laramie <- try(daymetr::download_daymet( - site = "testsite_name", - lat = 41.3167, - lon = -105.6833, - start = rSOILWAT2::swYears_StartYear(sw_in), - end = rSOILWAT2::swYears_EndYear(sw_in), - internal = TRUE, - simplify = FALSE - )) - - if (!inherits(dm_Laramie, "try-error")) { - # Convert data to a `rSOILWAT2`-formatted weather object - dif <- c(rep(TRUE, 3L), rep(FALSE, 11L)) # Tmax, Tmin, PPT - dif[13L] <- TRUE # ACTUAL_VP - dif[14L] <- TRUE # SHORT_WR, desc_rsds = 2 - - vars <- c( - "year", "yday", - "tmax..deg.c.", "tmin..deg.c.", "prcp..mm.day.", - "vp..Pa.", - "srad..W.m.2." - ) - xdf <- dm_Laramie[["data"]][, vars] - xdf[, "prcp..mm.day."] <- xdf[, "prcp..mm.day."] / 10 # convert mm -> cm - xdf[, "vp..Pa."] <- xdf[, "vp..Pa."] / 1000 # convert Pa -> kPa - colnames(xdf) <- c( - "Year", "DOY", - "Tmax_C", "Tmin_C", "PPT_cm", "actVP_kPa", "shortWR" - ) - - xdf2 <- array( - dim = c(nrow(xdf), 2L + length(rSOILWAT2::weather_dataColumns())), - dimnames = list( - NULL, - c("Year", "DOY", rSOILWAT2::weather_dataColumns()) - ) + mm_dm <- try( + rSOILWAT2::sw_meteo_obtain_DayMet( + x = c(longitude = -105.6833, latitude = 41.3167), # Laramie WY + start_year = rSOILWAT2::swYears_StartYear(sw_in), + end_year = rSOILWAT2::swYears_EndYear(sw_in) ) + ) - xdf2[, colnames(xdf)] <- as.matrix(xdf) - - - # Convert `DayMet`'s `noleap` calendar to proleptic Gregorian calendar - xdf3 <- rSOILWAT2::dbW_convert_to_GregorianYears(xdf2, type = "asis") + if (!inherits(mm_dm, "try-error")) { # Impute values for added leap days # Use weather generator for available variables, otherwise interpolate wdata <- rSOILWAT2::dbW_imputeWeather( - weatherData = xdf3, + weatherData = mm_dm[["weatherDF"]], use_wg = TRUE, method_after_wg = "interp", seed = 123L @@ -305,11 +270,11 @@ You may organize weather data in a variety of ways: stopifnot(rSOILWAT2::dbW_check_weatherData(wdata)) # Set use flags - sw_in@weather@desc_rsds <- 2L # flux density over the daylight period - sw_in@weather@use_cloudCoverMonthly <- FALSE # use radiation instead - sw_in@weather@use_windSpeedMonthly <- TRUE - sw_in@weather@use_humidityMonthly <- FALSE # use vapor pressure instead - sw_in@weather@dailyInputFlags <- dif + sw_in@weather@desc_rsds <- mm_dm[["desc_rsds"]] + sw_in@weather@use_cloudCoverMonthly <- mm_dm[["use_cloudCoverMonthly"]] + sw_in@weather@use_windSpeedMonthly <- mm_dm[["use_windSpeedMonthly"]] + sw_in@weather@use_humidityMonthly <- mm_dm[["use_humidityMonthly"]] + sw_in@weather@dailyInputFlags <- mm_dm[["dailyInputFlags"]] has_daymet <- TRUE }