diff --git a/DESCRIPTION b/DESCRIPTION index d9196b2a0..dc6ea9590 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: sdmTMB Title: Spatial and Spatiotemporal SPDE-Based GLMMs with 'TMB' -Version: 0.4.3.9002 +Version: 0.4.2.9003 Authors@R: c( person(c("Sean", "C."), "Anderson", , "sean@seananderson.ca", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index b97077bdc..6627deb3a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ export(censored_poisson) export(delta_beta) export(delta_gamma) export(delta_gamma_mix) +export(delta_gengamma) export(delta_lognormal) export(delta_lognormal_mix) export(delta_poisson_link_gamma) @@ -38,6 +39,7 @@ export(dharma_residuals) export(extract_mcmc) export(gamma_mix) export(gather_sims) +export(gengamma) export(get_cog) export(get_crs) export(get_index) diff --git a/NEWS.md b/NEWS.md index ad867864c..2f2cc5852 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # sdmTMB (development version) +* Add the generalized gamma distribution (thanks to J.T. Thorson with additional + work by J.C. Dunic.) See `gengamma()`. This distribution is still in a testing + phase and is not recommended for applied use yet. #286 + * Detect possible issue with factor(time) in formula if same column name is used for `time` and `extra_time` is specified. #320 diff --git a/R/enum.R b/R/enum.R index 667920217..59affbf1e 100644 --- a/R/enum.R +++ b/R/enum.R @@ -16,7 +16,8 @@ censored_poisson = 12, gamma_mix = 13, lognormal_mix = 14, - nbinom2_mix = 15 + nbinom2_mix = 15, + gengamma = 16 ) .valid_link <- c( identity = 0, diff --git a/R/families.R b/R/families.R index 4dd5b2807..8c46d34ab 100644 --- a/R/families.R +++ b/R/families.R @@ -72,6 +72,40 @@ lognormal <- function(link = "log") { add_to_family(x) } +#' @export +#' @rdname families +#' @examples +#' gengamma(link = "log") +#' @details +#' The `gengamma()` family was implemented by J.T. Thorson and uses the Prentice +#' (1974) parameterization such that the lognormal occurs as the internal +#' parameter `gengamma_Q` (reported in `print()` as "Generalized gamma lambda") +#' approaches 0. +#' +#' @references +#' +#' *Generalized gamma family*: +#' +#' Prentice, R.L. 1974. A log gamma model and its maximum likelihood estimation. +#' Biometrika 61(3): 539–544. \doi{10.1093/biomet/61.3.539} +#' +#' Stacy, E.W. 1962. A Generalization of the Gamma Distribution. The Annals of +#' Mathematical Statistics 33(3): 1187–1192. Institute of Mathematical +#' Statistics. + +gengamma <- function(link = "log") { + linktemp <- substitute(link) + if (!is.character(linktemp)) + linktemp <- deparse(linktemp) + okLinks <- c("identity", "log", "inverse") + if (linktemp %in% okLinks) + stats <- stats::make.link(linktemp) + else if (is.character(link)) + stats <- stats::make.link(link) + x <- c(list(family = "gengamma", link = linktemp), stats) + add_to_family(x) +} + #' @details The families ending in `_mix()` are 2-component mixtures where each #' distribution has its own mean but a shared scale parameter. #' (Thorson et al. 2011). See the model-description vignette for details. @@ -328,6 +362,30 @@ delta_gamma_mix <- function(link1 = "logit", link2 = "log") { clean_name = "delta_gamma_mix(link1 = 'logit', link2 = 'log')"), class = "family") } +#' @export +#' @examples +#' delta_gengamma() +#' @rdname families +delta_gengamma <- function(link1 = "logit", link2 = "log", type = c("standard", "poisson-link")) { + type <- match.arg(type) + l1 <- substitute(link1) + if (!is.character(l1)) l1 <- deparse(l1) + l2 <- substitute(link2) + if (!is.character(l2)) l2 <- deparse(l2) + f1 <- binomial(link = l1) + f2 <- gengamma(link = l2) + if (type == "poisson-link") { + .type <- "poisson_link_delta" + clean_name <- paste0("delta_gengamma(link1 = '", l1, "', link2 = '", l2, "', type = 'poisson-link')") + } else { + .type <- "standard" + clean_name <- paste0("delta_gengamma(link1 = '", l1, "', link2 = '", l2, "')") + } + structure(list(f1, f2, delta = TRUE, link = c("logit", "log"), + type = .type, family = c("binomial", "gengamma"), + clean_name = clean_name), class = "family") +} + #' @export #' @examples #' delta_lognormal() diff --git a/R/fit.R b/R/fit.R index 07481d9ee..dc47fb93f 100644 --- a/R/fit.R +++ b/R/fit.R @@ -27,8 +27,10 @@ NULL #' \code{\link[sdmTMB:families]{censored_poisson()}}, #' \code{\link[sdmTMB:families]{gamma_mix()}}, #' \code{\link[sdmTMB:families]{lognormal_mix()}}, -#' \code{\link[sdmTMB:families]{student()}}, and -#' \code{\link[sdmTMB:families]{tweedie()}}. Supports the delta/hurdle models: +#' \code{\link[sdmTMB:families]{student()}}, +#' \code{\link[sdmTMB:families]{tweedie()}}, and +#' \code{\link[sdmTMB:families]{gengamma()}}. +#' Supports the delta/hurdle models: #' \code{\link[sdmTMB:families]{delta_beta()}}, #' \code{\link[sdmTMB:families]{delta_gamma()}}, #' \code{\link[sdmTMB:families]{delta_gamma_mix()}}, @@ -1151,6 +1153,7 @@ sdmTMB <- function( ln_kappa = matrix(0, 2L, n_m), # ln_kappa = rep(log(sqrt(8) / median(stats::dist(spde$mesh$loc))), 2), thetaf = 0, + gengamma_Q = 1, # Not defined at exactly 0 logit_p_mix = 0, log_ratio_mix = 0, ln_phi = rep(0, n_m), @@ -1182,6 +1185,7 @@ sdmTMB <- function( tmb_map$b_j <- NULL if (delta) tmb_map$b_j2 <- NULL if (family$family[[1]] == "tweedie") tmb_map$thetaf <- NULL + if ("gengamma" %in% family$family) tmb_map$gengamma_Q <- NULL if (family$family[[1]] %in% c("gamma_mix", "lognormal_mix", "nbinom2_mix")) { tmb_map$log_ratio_mix <- NULL tmb_map$logit_p_mix <- NULL diff --git a/R/print.R b/R/print.R index 02adcab4f..c70104d6c 100644 --- a/R/print.R +++ b/R/print.R @@ -261,9 +261,13 @@ print_other_parameters <- function(x, m = 1L) { b <- tidy(x, "ran_pars", model = m, silent = TRUE) get_term_text <- function(term_name = "", pretext = "") { + b2 <- as.list(x$sd_report, what = "Estimate") if (term_name %in% b$term) { a <- mround(b$estimate[b$term == term_name], 2L) a <- paste0(pretext, ": ", a, "\n") + } else if (term_name %in% names(b2)) { + a <- mround(b2[[term_name]], 2L) + a <- paste0(pretext, ": ", a, "\n") } else { a <- "" } @@ -272,6 +276,9 @@ print_other_parameters <- function(x, m = 1L) { phi <- get_term_text("phi", "Dispersion parameter") tweedie_p <- get_term_text("tweedie_p", "Tweedie p") + gengamma_par <- if ('gengamma' %in% family(x)[[m]]) { + get_term_text("gengamma_Q", "Generalized gamma lambda") + } else "" sigma_O <- get_term_text("sigma_O", "Spatial SD") xtra <- if (x$spatiotemporal[m] == "ar1") "marginal " else "" sigma_E <- get_term_text("sigma_E", @@ -292,7 +299,7 @@ print_other_parameters <- function(x, m = 1L) { sigma_Z <- "" } - named_list(phi, tweedie_p, sigma_O, sigma_E, sigma_Z, rho) + named_list(phi, tweedie_p, sigma_O, sigma_E, sigma_Z, rho, gengamma_par) } print_header <- function(x) { @@ -340,6 +347,7 @@ print_one_model <- function(x, m = 1) { cat(other$phi) cat(other$tweedie_p) + cat(other$gengamma_par) cat(other$rho) cat(range) cat(other$sigma_O) diff --git a/man/families.Rd b/man/families.Rd index 9402824ba..355d3e57f 100644 --- a/man/families.Rd +++ b/man/families.Rd @@ -4,6 +4,7 @@ \alias{Families} \alias{Beta} \alias{lognormal} +\alias{gengamma} \alias{gamma_mix} \alias{lognormal_mix} \alias{nbinom2_mix} @@ -16,6 +17,7 @@ \alias{censored_poisson} \alias{delta_gamma} \alias{delta_gamma_mix} +\alias{delta_gengamma} \alias{delta_lognormal} \alias{delta_lognormal_mix} \alias{delta_truncated_nbinom2} @@ -29,6 +31,8 @@ Beta(link = "logit") lognormal(link = "log") +gengamma(link = "log") + gamma_mix(link = "log") lognormal_mix(link = "log") @@ -57,6 +61,12 @@ delta_gamma( delta_gamma_mix(link1 = "logit", link2 = "log") +delta_gengamma( + link1 = "logit", + link2 = "log", + type = c("standard", "poisson-link") +) + delta_lognormal( link1 = "logit", link2 = "log", @@ -100,6 +110,11 @@ Additional families compatible with \code{\link[=sdmTMB]{sdmTMB()}}. deprecated in favour of \code{delta_gamma(type = "poisson-link")} and \code{delta_lognormal(type = "poisson-link")}. +The \code{gengamma()} family was implemented by J.T. Thorson and uses the Prentice +(1974) parameterization such that the lognormal occurs as the internal +parameter \code{gengamma_Q} (reported in \code{print()} as "Generalized gamma lambda") +approaches 0. + The families ending in \verb{_mix()} are 2-component mixtures where each distribution has its own mean but a shared scale parameter. (Thorson et al. 2011). See the model-description vignette for details. @@ -119,6 +134,7 @@ For \code{student()}, the degrees of freedom parameter is currently not estimate \examples{ Beta(link = "logit") lognormal(link = "log") +gengamma(link = "log") gamma_mix(link = "log") lognormal_mix(link = "log") nbinom2_mix(link = "log") @@ -131,6 +147,7 @@ tweedie(link = "log") censored_poisson(link = "log") delta_gamma() delta_gamma_mix() +delta_gengamma() delta_lognormal() delta_lognormal_mix() delta_truncated_nbinom2() @@ -138,6 +155,15 @@ delta_truncated_nbinom1() delta_beta() } \references{ +\emph{Generalized gamma family}: + +Prentice, R.L. 1974. A log gamma model and its maximum likelihood estimation. +Biometrika 61(3): 539–544. \doi{10.1093/biomet/61.3.539} + +Stacy, E.W. 1962. A Generalization of the Gamma Distribution. The Annals of +Mathematical Statistics 33(3): 1187–1192. Institute of Mathematical +Statistics. + \emph{Families ending in \verb{_mix()}}: Thorson, J.T., Stewart, I.J., and Punt, A.E. 2011. Accounting for fish shoals diff --git a/man/sdmTMB.Rd b/man/sdmTMB.Rd index 9fe0ae17a..a3a2bcde9 100644 --- a/man/sdmTMB.Rd +++ b/man/sdmTMB.Rd @@ -59,8 +59,10 @@ downstream, supply the time argument.} \code{\link[sdmTMB:families]{censored_poisson()}}, \code{\link[sdmTMB:families]{gamma_mix()}}, \code{\link[sdmTMB:families]{lognormal_mix()}}, -\code{\link[sdmTMB:families]{student()}}, and -\code{\link[sdmTMB:families]{tweedie()}}. Supports the delta/hurdle models: +\code{\link[sdmTMB:families]{student()}}, +\code{\link[sdmTMB:families]{tweedie()}}, and +\code{\link[sdmTMB:families]{gengamma()}}. +Supports the delta/hurdle models: \code{\link[sdmTMB:families]{delta_beta()}}, \code{\link[sdmTMB:families]{delta_gamma()}}, \code{\link[sdmTMB:families]{delta_gamma_mix()}}, diff --git a/src/sdmTMB.cpp b/src/sdmTMB.cpp index 1940e55db..8c7ed1a82 100644 --- a/src/sdmTMB.cpp +++ b/src/sdmTMB.cpp @@ -20,7 +20,8 @@ enum valid_family { censored_poisson_family = 12, gamma_mix_family = 13, lognormal_mix_family = 14, - nbinom2_mix_family = 15 + nbinom2_mix_family = 15, + gengamma_family = 16 }; enum valid_link { @@ -233,6 +234,7 @@ Type objective_function::operator()() PARAMETER_ARRAY(ln_kappa); // Matern parameter PARAMETER(thetaf); // tweedie only + PARAMETER(gengamma_Q); // gengamma only PARAMETER(logit_p_mix); // ECE / positive mixture only PARAMETER(log_ratio_mix); // ECE / positive mixture only @@ -934,6 +936,11 @@ Type objective_function::operator()() } break; } + case gengamma_family: { + tmp_ll = sdmTMB::dgengamma(y_i(i,m), mu_i(i,m), phi(m), gengamma_Q, true); + SIMULATE{y_i(i,m) = sdmTMB::rgengamma(mu_i(i,m), phi(m), gengamma_Q);} + break; + } default: error("Family not implemented."); } diff --git a/src/utils.h b/src/utils.h index 209a644cb..7218dc069 100644 --- a/src/utils.h +++ b/src/utils.h @@ -4,6 +4,61 @@ bool isNA(Type x) { return R_IsNA(asDouble(x)); } +// dgengamma +// Written by J. Thorson based on scripts listed below +// using Prentice-1974 parameterization for lambda instead of k, so that lognormal occurs as lambda -> 0 +// using mean parameterization to back out theta +// CV is a function of sigma and lambda and NOT mean (i.e., CV is fixed for all values of mean) +// See: C:\Users\James.Thorson\Desktop\Work files\AFSC\2021-10 -- Generalized gamma-lognormal\Explore gengamma.R +template +Type dgengamma( Type x, + Type mean, + Type sigma, + Type Q, + int give_log=0){ + + + // First convert from mean to mu + // Rename based on flexdist + Type lambda = Q; + // Using https://stats.stackexchange.com/questions/345564/generalized-gamma-log-normal-as-limiting-special-case + Type k = pow( lambda, -2); + Type beta = pow( sigma, -1) * lambda; + // Use wikipedia expression for the mean: mean = a * gamma((d+1)/p) / gamma(d/p) where d/p = k, theta = a, and beta = p + // https://en.wikipedia.org/wiki/Generalized_gamma_distribution#Software_implementation + Type log_theta = log(mean) - lgamma( (k*beta+1)/beta ) + lgamma( k ); + // Using https://stats.stackexchange.com/questions/345564/generalized-gamma-log-normal-as-limiting-special-case + Type mu = log_theta + log(k) / beta; + + // Next evaluate PDF + // from https://github.com/chjackson/flexsurv-dev/blob/master/src/gengamma.h#L54-L56 + Type y = log(x); + Type w = (y - mu) / sigma; + Type qi = pow(Q, -2); + Type qw = Q * w; // 0.5*log(pow(x,2)) as trick for abs(log(x)) + Type logres = -log(sigma*x) + 0.5*log(pow(lambda,2)) * (1 - 2 * qi) + qi * (qw - exp(qw)) - lgamma(qi); + + // return stuff + if(give_log) return logres; else return exp(logres); +} + +// rgengamma +// Written by J. Thorson based on scripts listed below +template +Type rgengamma( Type mean, + Type sigma, + Type Q){ + + // See: C:\Users\James.Thorson\Desktop\Work files\AFSC\2021-10 -- Generalized gamma-lognormal\Explore gengamma.R + Type lambda = Q; + Type k = pow( lambda, -2 ); + Type beta = pow( sigma, -1 ) * lambda; + Type log_theta = log(mean) - lgamma( (k*beta+1)/beta ) + lgamma( k ); + Type w = log(rgamma(k, Type(1.0))); + Type y = w/beta + log_theta; + return exp(y); +} + template Type ppois_log(Type x, Type lambda) { return atomic::Rmath::Rf_ppois(asDouble(x), asDouble(lambda), true, true); diff --git a/tests/testthat/test-3-families.R b/tests/testthat/test-3-families.R index 7d295cf86..642460d5b 100644 --- a/tests/testthat/test-3-families.R +++ b/tests/testthat/test-3-families.R @@ -431,3 +431,117 @@ test_that("Binomial simulation/residuals works with weights argument or cbind()" expect_equal(mean(dat$y), mean(s2), tolerance = 0.1) expect_equal(mean(apply(s2, 1, mean) - dat$y), 0, tolerance = 0.01) }) + +test_that("Generalized gamma works", { + skip_on_cran() + skip_on_ci() + d <- subset(pcod_2011, density > 0) + fit1 <- sdmTMB( + density ~ 1 + depth_scaled, + data = d, + spatial = "off", + family = lognormal(link = "log") + ) + fit2 <- sdmTMB( + density ~ 1 + depth_scaled, + data = d, + spatial = "off", + family = Gamma(link = "log") + ) + fit3 <- sdmTMB( + density ~ 1 + depth_scaled, + data = d, + spatial = "off", + family = gengamma(link = "log") + ) + expect_s3_class(fit2, "sdmTMB") + logLik(fit1) + logLik(fit2) + logLik(fit3) + get_df <- function(x) { + L <- logLik(x) + attr(L, "df") + } + df1 <- get_df(fit1) + df2 <- get_df(fit2) + df3 <- get_df(fit3) + expect_identical(df1, 3L) + expect_identical(df3, 4L) + + b <- as.list(fit3$sd_report, "Estimate") + expect_equal(b$gengamma_Q, 0.04212623, tolerance = 0.001) + AIC(fit1) + AIC(fit2) + AIC(fit3) + + expect_error(residuals(fit3), regexp = "supported") +}) + + +test_that("Generalized gamma matches Gamma when Q = sigma", { + skip_on_cran() + skip_on_ci() + + # Generate values drawn from generaliased gamma distribution given the mean of those values + rgengamma <- function(n, mean, sigma, Q) { + # Get mu from mean + k <- Q^-2 + beta <- Q / sigma + log_theta <- log(mean) - lgamma( (k*beta+1)/beta ) + lgamma( k ) + mu <- log_theta + log(k) / beta + + if (Q != 0) { + w <- log(Q^2 * rgamma(n, 1 / Q^(2), 1)) / Q + y <- exp(mu + (sigma * w)) + + } else { + y <- rlnorm(n, mu, sigma) + } + return(y) + } + + sigma <- 0.5 + Q <- sigma + mean <- 5 + n <- 10000 + + # Regression coefficients (effects) + intercept <- 1 + b1 <- 1.8 + # Generate covariate values + set.seed(1) + x <- runif(n, min = 0, max = 2) + + # Compute mu's + coefs_true <- matrix(c(intercept, b1)) + X <- matrix(cbind(1, x), ncol = 2) + y_mean <- exp(X %*% coefs_true) + + set.seed(10) + y <- rgengamma(n = n, mean = y_mean, sigma = sigma, Q = Q) + # Should get the same answers with flexsurv::rgengamma + # set.seed(10) + # y_flex <- flexsurv::rgengamma(n = n, mu = y_mu, sigma = sigma, Q = Q) + + d <- data.frame(x = x, y = y) + + fit1 <- sdmTMB( + y ~ x, + data = d, + spatial = "off", + family = gengamma(link = "log") + ) + + fit2 <- sdmTMB( + y ~ x, + data = d, + spatial = "off", + family = Gamma(link = "log") + ) + + b <- as.list(fit1$sd_report, "Estimate") + expect_equal(b$gengamma_Q, 0.5, tolerance = 0.1) + expect_equal(b$b_j[1], 1, tolerance = 0.01) + expect_equal(b$b_j[2], 1.8, tolerance = 0.01) + +}) diff --git a/tests/testthat/test-delta.R b/tests/testthat/test-delta.R index 996d13068..823f3d0b8 100644 --- a/tests/testthat/test-delta.R +++ b/tests/testthat/test-delta.R @@ -12,7 +12,6 @@ test_that("Delta-Gamma family fits", { ) fit_dg$sd_report nd <- replicate_df(qcs_grid, "year", unique(pcod$year)) - p <- predict(fit_dg, newdata = nd) expect_equal( round(tidy(fit_dg, "ran_pars", model = 1)$estimate, 3), @@ -180,3 +179,63 @@ test_that("Anisotropy with delta model", { expect_equal(sr_gamma$ln_tau_E, sr_dg$ln_tau_E[2], tolerance = 1e-4) expect_equal(sr_gamma$ln_kappa[1,1], sr_dg$ln_kappa[1,2], tolerance = 1e-4) }) + +test_that("Delta-Gengamma family fits", { + skip_on_cran() + skip_on_ci() + + fit_dgg <- sdmTMB(density ~ 1, + data = pcod, mesh = pcod_spde, + spatial = 'off', + family = delta_gengamma(link1 = 'logit', link2 = 'log', type = 'standard') + ) + fit_dgg$sd_report + + expect_equal( + round(tidy(fit_dgg, "fixed", model = 1)$estimate, 3), + -0.152 + ) + expect_equal( + round(tidy(fit_dgg, "fixed", model = 2)$estimate, 3), + c(4.418) + ) + + nd <- replicate_df(qcs_grid, "year", unique(pcod$year)) + p <- predict(fit_dgg, newdata = nd, return_tmb_object = TRUE) + + # check + fit_bin <- sdmTMB(present ~ 1, + data = pcod, mesh = pcod_spde, + spatial = "off", + family = binomial() + ) + fit_gengamma <- sdmTMB(density ~ 1, + data = pcod_pos, mesh = pcod_spde_pos, + spatial = "off", + family = gengamma(link = "log") + ) + sr_bin <- as.list(fit_bin$sd_report, "Estimate") + sr_gengamma <- as.list(fit_gengamma$sd_report, "Estimate") + sr_dgg <- as.list(fit_dgg$sd_report, "Estimate") + expect_equal(sr_bin$b_j[1], sr_dgg$b_j[1], tolerance = 1e-4) + expect_equal(sr_gengamma$b_j[1], sr_dgg$b_j2[1], tolerance = 1e-4) + expect_equal(sr_gengamma$ln_phi, sr_dgg$ln_phi[2], tolerance = 1e-4) + expect_equal(sr_gengamma$ln_tau_O, sr_dgg$ln_tau_O[2], tolerance = 1e-4) + expect_equal(sr_gengamma$ln_tau_E, sr_dgg$ln_tau_E[2], tolerance = 1e-4) + expect_equal(sr_gengamma$ln_kappa[1,1], sr_dgg$ln_kappa[1,2], tolerance = 1e-4) + expect_equal(sr_bin$ln_kappa[1,1], sr_dgg$ln_kappa[1,1], tolerance = 1e-4) +}) + +test_that("delta_gengamma() Poisson-link family fits", { + skip_on_cran() + skip_on_ci() + + fit_pgg <- sdmTMB(density ~ 1, + data = pcod, mesh = pcod_spde, + spatial = "off", + family = delta_gengamma(link1 = 'log', link2 = 'log', type = "poisson-link") + ) + fit_pgg$sd_report + s <- as.list(fit_pgg$sd_report, "Std. Error") + expect_true(sum(is.na(s$b_j)) == 0L) +})