From b66d1e79576787f157f555fe74e380a3f30a3c1f Mon Sep 17 00:00:00 2001 From: Ken Kellner Date: Fri, 1 Mar 2024 12:22:38 -0500 Subject: [PATCH] Better automatic value of K in multinomial models. Fixes #275 --- DESCRIPTION | 4 ++-- R/distsampOpen.R | 18 +----------------- R/gmultmix.R | 2 +- R/multmixOpen.R | 7 +------ R/utils.R | 30 ++++++++++++++++++++++++++++++ tests/testthat/test_distsampOpen.R | 14 ++++++++++++++ tests/testthat/test_gdistremoval.R | 15 +++++++++++++++ tests/testthat/test_gdistsamp.R | 10 ++++++++++ tests/testthat/test_gmultmix.R | 29 ++++++++++++++++++++--------- tests/testthat/test_multmixOpen.R | 9 +++++++++ tests/testthat/test_simulate.R | 2 +- 11 files changed, 104 insertions(+), 36 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c7e57999..33c35633 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: unmarked -Version: 1.4.1.9001 -Date: 2024-01-23 +Version: 1.4.1.9002 +Date: 2024-03-01 Type: Package Title: Models for Data from Unmarked Animals Authors@R: c( diff --git a/R/distsampOpen.R b/R/distsampOpen.R index 92588a4c..c062a0f1 100644 --- a/R/distsampOpen.R +++ b/R/distsampOpen.R @@ -85,23 +85,7 @@ distsampOpen <- function(lambdaformula, gammaformula, omegaformula, pformula, last <- apply(!ytna, 1, function(x) max(which(x))) first1 <- which(first==1)[1] - #K stuff - if(missing(K)) { - K <- max(y, na.rm=T) + 20 - warning("K was not specified and was set to ", K, ".") - } - - J <- ncol(data@y) / data@numPrimary - inds <- split(1:ncol(data@y), ceiling(1:ncol(data@y)/J)) - Tobs <- sapply(1:length(inds), function(i){ - rowSums(data@y[,inds[[i]], drop=FALSE], na.rm=TRUE) - }) - Kmin <- max(Tobs, na.rm=TRUE) - - if(K < Kmin){ - stop("Specified K is too small, must be larger than the max total count in a primary period", - call.=FALSE) - } + K <- check_K_multinomial(K, K_adjust = 20, D$y, T) # in utils.R k <- 0:K lk <- length(k) #Some k-related indices to avoid repeated calculations in likelihood diff --git a/R/gmultmix.R b/R/gmultmix.R index a6da9d5c..37426096 100644 --- a/R/gmultmix.R +++ b/R/gmultmix.R @@ -29,7 +29,7 @@ if(is.null(Xlam.offset)) Xlam.offset <- rep(0, nrow(Xlam)) if(is.null(Xphi.offset)) Xphi.offset <- rep(0, nrow(Xphi)) if(is.null(Xdet.offset)) Xdet.offset <- rep(0, nrow(Xdet)) -if(missing(K) || is.null(K)) K <- max(y, na.rm=TRUE) + 100 +K <- check_K_multinomial(K, K_adjust = 100, y, data@numPrimary) k <- 0:K lk <- length(k) M <- nrow(y) diff --git a/R/multmixOpen.R b/R/multmixOpen.R index 3d8eede8..56bec1a8 100644 --- a/R/multmixOpen.R +++ b/R/multmixOpen.R @@ -64,12 +64,7 @@ multmixOpen <- function(lambdaformula, gammaformula, omegaformula, pformula, if(is.null(Xiota.offset)) Xiota.offset <- rep(0, M*(T-1)) #K stuff - if(missing(K)) { - K <- max(y, na.rm=T) + 20 - warning("K was not specified and was set to ", K, ".") - } - if(K <= max(y, na.rm = TRUE)) - stop("specified K is too small. Try a value larger than any observation") + K <- check_K_multinomial(K, K_adjust = 20, D$y, T) k <- 0:K lk <- length(k) #Some k-related indices to avoid repeated calculations in likelihood diff --git a/R/utils.R b/R/utils.R index 8b977eb9..f5d8bfdc 100644 --- a/R/utils.R +++ b/R/utils.R @@ -927,3 +927,33 @@ lapply2 <- function(X, FUN, ..., cl = NULL){ } lapply(X=X, FUN=FUN, ...) } + +# Determine automatic K or check provided K for multinomial-type models +# (gdistsamp, gmultmix, distsampOpen, multmixOpen, gdistremoval) +check_K_multinomial <- function(K, K_adjust = 0, y, T = 1){ + + safe_sum <- function(x){ + if(all(is.na(x))) return(NA) else return(sum(x, na.rm=TRUE)) + } + + if(T == 1){ + yt <- apply(y, 1, safe_sum) + } else { + M <- nrow(y) + J <- ncol(y) / T + ya <- array(y, c(M, J, T)) + ya <- aperm(ya, c(1,3,2)) + yt <- apply(ya, 1:2, safe_sum) + } + Kmin <- max(yt, na.rm = TRUE) + if(missing(K)){ + Kout <- Kmin + K_adjust + warning("K was not specified and was set to ", Kout, ".", call.=FALSE) + } else { + if(K <= Kmin){ + stop("specified K is too small. Try a value larger than the max count at any site", call.=FALSE) + } + Kout <- K + } + Kout +} diff --git a/tests/testthat/test_distsampOpen.R b/tests/testthat/test_distsampOpen.R index 2797b803..fe915f44 100644 --- a/tests/testthat/test_distsampOpen.R +++ b/tests/testthat/test_distsampOpen.R @@ -132,6 +132,20 @@ test_that("dso halfnorm key function works",{ # Check K that is too small expect_error(distsampOpen(~1, ~1, ~1, ~x1, data = umf, K=5,keyfun="halfnorm")) + # Check auto-K + fm <- expect_warning(distsampOpen(~1, ~1, ~1, ~x1, data = umf, keyfun="halfnorm")) + expect_false(fm@K == max(umf@y)+20) # the wrong way + + # Check that automatic K value is correct + ya <- array(umf@y, c(50, 4, 7)) + ya <- aperm(ya, c(1,3,2)) + yt <- apply(ya, 1:2, function(x) { + if(all(is.na(x))) + return(NA) + else return(sum(x, na.rm=TRUE)) + }) + expect_equal(max(yt)+20, fm@K) # the correct way + fm <- distsampOpen(~1, ~1, ~1, ~x1, data = umf, K=10,keyfun="halfnorm") expect_equivalent(coef(fm), c(1.4185,1.0471,-0.8275,3.1969,-0.0790), diff --git a/tests/testthat/test_gdistremoval.R b/tests/testthat/test_gdistremoval.R index fdc2ca17..600d84be 100644 --- a/tests/testthat/test_gdistremoval.R +++ b/tests/testthat/test_gdistremoval.R @@ -282,6 +282,10 @@ test_that("gdistremoval can fit models",{ expect_is(fit, "unmarkedFitGDR") expect_equivalent(coef(fit), c(1.4571,0.3374,4.0404,-1.65389,0.16789), tol=1e-3) + # Check automatic setting of K + yt <- apply(umf@yDistance, 1, sum) + expect_equal(max(yt)+40, fit@K) + # With unequal period lengths umf2 <- unmarkedFrameGDR(dat$y, dat$yRem, siteCovs=sc, obsCovs=oc, dist.breaks=c(0,25,50,75,100), unitsIn='m', @@ -445,6 +449,17 @@ test_that("multi-period data works with gdistremoval",{ c(2.1013,-0.1142,-1.3187,-0.1483,3.3981,-0.5142,0.233678), tol=1e-3) + # Check that automatic K value is correct + ya <- array(umf@y, c(30, 4, 5)) + ya <- aperm(ya, c(1,3,2)) + yt <- apply(ya, 1:2, function(x) { + if(all(is.na(x))) + return(NA) + else return(sum(x, na.rm=TRUE)) + }) + expect_false(max(umf@y)+40 == fit@K) # the incorrect way + expect_equal(max(yt)+40, fit@K) # the correct way + # Predict pr <- predict(fit, 'phi') expect_equal(dim(pr), c(30*5,4)) diff --git a/tests/testthat/test_gdistsamp.R b/tests/testthat/test_gdistsamp.R index 96636d00..1b232b2f 100644 --- a/tests/testthat/test_gdistsamp.R +++ b/tests/testthat/test_gdistsamp.R @@ -96,6 +96,16 @@ test_that("gdistsamp with halfnorm keyfunction works",{ control=list(maxit=1)) expect_equal(coef(fm_R), coef(fm_C)) + # Check that automatic K value is correct + ya <- array(umf@y, c(R, J, T)) + ya <- aperm(ya, c(1,3,2)) + yt <- apply(ya, 1:2, function(x) { + if(all(is.na(x))) + return(NA) + else return(sum(x, na.rm=TRUE)) + }) + expect_equal(max(yt)+100, fm_C@K) + #When output = density #fm_R <- gdistsamp(~1, ~1, ~1, umf, output="density", se=FALSE, engine="R") fm_C <- gdistsamp(~1, ~1, ~1, umf, output="density", se=FALSE, engine="C") diff --git a/tests/testthat/test_gmultmix.R b/tests/testthat/test_gmultmix.R index 5caafa6b..f8e73b6f 100644 --- a/tests/testthat/test_gmultmix.R +++ b/tests/testthat/test_gmultmix.R @@ -76,8 +76,19 @@ test_that("gmultmix removal model works",{ umf <- unmarkedFrameGMM(y = y, siteCovs = siteCovs, obsCovs = obsCovs, yearlySiteCovs = yrSiteCovs, type="removal", numPrimary=2) #fm_R <- gmultmix(~x, ~yr, ~o1 + o2, data = umf, K=23, engine="R") - expect_warning(fm_C <- gmultmix(~x, ~yr, ~o1 + o2, data = umf, K=23, engine="C")) - + expect_warning(fm_C <- gmultmix(~x, ~yr, ~o1 + o2, data = umf, engine="C")) + + # Check K calculation + expect_false(fm_C@K == max(y) + 100) # wrong way to do it + # Correct way + ya <- array(y, c(5, 2, 2)) + yt <- apply(ya, c(1,3), sum, na.rm=TRUE) + expect_true(fm_C@K == max(yt) + 100) + # Throw error when K is too low + expect_error(expect_warning(gmultmix(~x, ~yr, ~o1+o2, data=umf, K = 5))) + + # Fit with the old K to keep tests correct + expect_warning(fm_C <- gmultmix(~x, ~yr, ~o1 + o2, data = umf, K = 23, engine="C")) expect_equal(fm_C@sitesRemoved, 3) coef_truth <- c(2.50638554, 0.06226627, 0.21787839, 6.46029769, -1.51885928, -0.03409375, 0.43424295) @@ -164,7 +175,7 @@ test_that("gmultmix double model works",{ umf2 <- umf[1:5,] expect_equal(numSites(umf2), 5) - fm <- gmultmix(~1,~1,~observer, umf) + fm <- expect_warning(gmultmix(~1,~1,~observer, umf)) expect_equivalent(coef(fm), c(2.2586,0.17385,-0.7425), tol=1e-4) gp <- getP(fm) @@ -195,7 +206,7 @@ test_that("gmultmix dependent double model works",{ expect_warning(umf <- unmarkedFrameGMM(y=y[,1:2], obsCovs=list(observer=observer), type="depDouble",numPrimary=1)) - fm <- gmultmix(~1,~1,~observer, umf) + fm <- expect_warning(gmultmix(~1,~1,~observer, umf)) expect_equivalent(coef(fm), c(1.7762,0.2493,0.2008), tol=1e-4) gp <- getP(fm) @@ -309,14 +320,14 @@ test_that("gmultmix ZIP model works",{ type="double",numPrimary=1)) # Compare R and C engines - fmR <- gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="R", se=FALSE, - control=list(maxit=1)) - fmC <- gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="C", se=FALSE, - control=list(maxit=1)) + fmR <- expect_warning(gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="R", se=FALSE, + control=list(maxit=1))) + fmC <- expect_warning(gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="C", se=FALSE, + control=list(maxit=1))) expect_equal(coef(fmR), coef(fmC)) # Fit model full - fm <- gmultmix(~1,~1,~observer, umf, mixture="ZIP") + fm <- expect_warning(gmultmix(~1,~1,~observer, umf, mixture="ZIP")) expect_equivalent(coef(fm), c(2.2289,0.1858,-0.9285,-0.9501), tol=1e-4) # Check methods diff --git a/tests/testthat/test_multmixOpen.R b/tests/testthat/test_multmixOpen.R index c66d3f93..b0f162a5 100644 --- a/tests/testthat/test_multmixOpen.R +++ b/tests/testthat/test_multmixOpen.R @@ -64,6 +64,15 @@ test_that("multmixOpen can fit removal models",{ expect_equivalent(coef(fit), c(1.38860,0.043406,-0.68448, 1.40703,0.03174,-0.00235), tol=1e-5) + # Check auto K setting + fit <- expect_warning(multmixOpen(~x1, ~1, ~1, ~x1, data=umf)) + expect_false(fit@K == max(umf@y) + 20) # Wrong way + + ya <- array(umf@y, c(100, 3, 5)) + yt <- apply(ya, c(1,3), sum, na.rm=TRUE) + + expect_true(fit@K == max(yt) + 20) # correct way + #Check predict pr <- predict(fit, type='lambda') expect_equivalent(as.numeric(pr[1,]), diff --git a/tests/testthat/test_simulate.R b/tests/testthat/test_simulate.R index be8a3566..32885f14 100644 --- a/tests/testthat/test_simulate.R +++ b/tests/testthat/test_simulate.R @@ -111,7 +111,7 @@ test_that("simulate can generate new datasets from scratch",{ forms_gmm <- list(lambda=~elev, phi=~1, det=~1) umf9 <- simulate("gmultmix", formulas=forms_gmm, design=design_colext, coefs=cf_gmm, type='removal') - fm <- gmultmix(~elev,~1,~1, umf9) + fm <- expect_warning(gmultmix(~elev,~1,~1, umf9)) expect_equivalent(coef(fm), c(1.9529,0.5321,0.0529,-0.0373), tol=1e-4) #gpcount