diff --git a/NAMESPACE b/NAMESPACE index 155eeb5..f065fff 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,6 +31,7 @@ importFrom(lubridate,ymd_hms) importFrom(stats,approx) importFrom(stats,lm) importFrom(stats,na.omit) +importFrom(stats,predict) importFrom(utils,browseURL) importFrom(utils,head) importFrom(utils,read.csv) diff --git a/R/models.R b/R/models.R index f5c294e..049af7d 100644 --- a/R/models.R +++ b/R/models.R @@ -14,8 +14,8 @@ #' diffusion theory; see Hutchinson and Mosier (1981) and Nakano et al. (2004). #' #' For each model type, the following columns are returned: -#' * Model statistics \code{AIC}, \code{r.squared}, \code{sigma}, -#' and \code{p.value}; +#' * Model statistics \code{AIC}, \code{r.squared}, \code{RMSE}, +#' and \code{p.value}; #' * Flux (slope) statistics \code{flux.estimate} and \code{flux.std.error}; #' * Intercept statistics \code{int.estimate} and \code{int.std.error}; #' * For the robust linear regression model only, @@ -38,7 +38,7 @@ #' 1981. \url{http://dx.doi.org/10.2136/sssaj1981.03615995004500020017x} #' @importFrom broom glance tidy #' @importFrom MASS rlm -#' @importFrom stats lm +#' @importFrom stats lm predict #' @export #' @examples #' # Toy data - linear @@ -64,9 +64,10 @@ ffi_fit_models <- function(time, conc, area, volume) { # Linear model overall metrics. 'glance' produces 12 different ones; # we keep the first 5 (adjR2, R2, sigma, statistic, p-value) lin_model_stats <- glance(mod)[c("r.squared", "sigma", "p.value", "AIC")] + names(lin_model_stats) <- paste0("lin_", names(lin_model_stats)) - # Slope and intercept statistics + # Slope and intercept statistics tmod <- tidy(mod) lin_slope_stats <- tmod[2, c("estimate", "std.error")] names(lin_slope_stats) <- paste0("lin_flux.", names(lin_slope_stats)) @@ -84,11 +85,13 @@ ffi_fit_models <- function(time, conc, area, volume) { rob_model_stats <- glance(robust)[c("sigma", "converged", "AIC")] tmod <- tidy(robust) rob_slope_stats <- tmod[2, c("estimate", "std.error")] + # rob_int_stats <- tmod[1, c("estimate", "std.error")] }, error = function(e) { warning("Could not fit robust linear model") - rob_model_stats <- data.frame(sigma = NA_real_, converged = FALSE, AIC = NA_real_) + rob_model_stats <- data.frame(sigma = NA_real_, converged = FALSE, + AIC = NA_real_, RMSE = NA_real_) rob_slope_stats <- data.frame(estimate = NA_real_, std.error = NA_real_) # rob_int_stats <- data.frame(estimate = NA_real_, std.error = NA_real_) }) @@ -101,11 +104,11 @@ ffi_fit_models <- function(time, conc, area, volume) { # int_stats <- cbind(int_stats, rob_int_stats) # Add polynomial regression as a QA/QC check - poly_model_stats <- data.frame(r.squared = NA_real_, AIC = NA_real_) + poly_model_stats <- data.frame(r.squared = NA_real_, AIC = NA_real_, RMSE = NA_real_) if(length(time) > 3) { try({ poly <- lm(conc ~ poly(time, 3)) - poly_model_stats <- glance(poly)[c("r.squared", "AIC")] + poly_model_stats <- glance(poly)[c("r.squared", "sigma", "AIC")] }) } @@ -117,11 +120,11 @@ ffi_fit_models <- function(time, conc, area, volume) { # The HM1981 approach is based on an exponential model, so derive fit # statistics by log-transforming the data - if(!is.na(slope_stats$HM81_flux.estimate)) { - ffi_message("NOTE: HM81_flux.estimate is not NA, implying nonlinear data") + if(is.na(slope_stats$HM81_flux.estimate)) { hm81_model_stats <- data.frame(r.squared = NA_real_, sigma = NA_real_, p.value = NA_real_, AIC = NA_real_) } else { + ffi_message("NOTE: HM81_flux.estimate is not NA, implying nonlinear data") # The time values are probably normalized, i.e. starting at zero # Add a (presumably) tiny offset so we don't get log(0) errors mod <- lm(conc ~ log(time + 0.01)) @@ -131,6 +134,9 @@ ffi_fit_models <- function(time, conc, area, volume) { names(hm81_model_stats) <- paste0("HM81_", names(hm81_model_stats)) model_stats <- cbind(model_stats, hm81_model_stats) + # Change "sigma" to "RMSE"; more intuitive for most users + names(model_stats) <- gsub("sigma", "RMSE", names(model_stats)) + # Combine, sort columns, and return out <- cbind(model_stats, slope_stats, int_stats) return(out[sort(names(out))]) diff --git a/man/ffi_fit_models.Rd b/man/ffi_fit_models.Rd index 82b6d77..72451b8 100644 --- a/man/ffi_fit_models.Rd +++ b/man/ffi_fit_models.Rd @@ -25,7 +25,7 @@ diffusion theory; see Hutchinson and Mosier (1981) and Nakano et al. (2004). For each model type, the following columns are returned: \itemize{ -\item Model statistics \code{AIC}, \code{r.squared}, \code{sigma}, +\item Model statistics \code{AIC}, \code{r.squared}, \code{RMSE}, and \code{p.value}; \item Flux (slope) statistics \code{flux.estimate} and \code{flux.std.error}; \item Intercept statistics \code{int.estimate} and \code{int.std.error}; diff --git a/vignettes/intro-to-fluxfinder.Rmd b/vignettes/intro-to-fluxfinder.Rmd index 7177d59..8a5dd97 100644 --- a/vignettes/intro-to-fluxfinder.Rmd +++ b/vignettes/intro-to-fluxfinder.Rmd @@ -275,8 +275,9 @@ x <- round(x, 3) From the diagnostics returned by `ffi_compute_fluxes`: * `HM81_flux.estimate` is not `NA`, which only occurs with saturating behavior; -* The `lin_AIC` (`r x$lin_AIC`) and `rob_AIC` (`r x$rob_AIC`) values are similar, so no indication of influential outliers; +* The `lin_AIC` (`r x$lin_AIC`) and `rob_AIC` (`r x$rob_AIC`) [Akaike information criterion](https://en.wikipedia.org/wiki/Akaike_information_criterion) values are similar, so no indication of influential outliers; * The `lin_r.squared` (`r x$lin_r.squared`) and `poly_r.squared` (`r x$poly_r.squared`) values are _very_ different, suggesting a failure of the linear model; +* The [root mean square error](https://en.wikipedia.org/wiki/Root_mean_square_deviation) (RMSE) of the linear model is much higher than the other models' values; * The `HM81_r.squared` (`r x$HM81_r.squared`) and `HM81_AIC` (`r x$HM81_AIC`) are considerably higher and lower, respectively, than the linear model. All of these metrics point to a common conclusion: a linear model is _not_