Skip to content

Commit

Permalink
Better automatic value of K in multinomial models. Fixes #275
Browse files Browse the repository at this point in the history
  • Loading branch information
kenkellner committed Mar 1, 2024
1 parent 85603a5 commit b66d1e7
Show file tree
Hide file tree
Showing 11 changed files with 104 additions and 36 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: unmarked
Version: 1.4.1.9001
Date: 2024-01-23
Version: 1.4.1.9002
Date: 2024-03-01
Type: Package
Title: Models for Data from Unmarked Animals
Authors@R: c(
Expand Down
18 changes: 1 addition & 17 deletions R/distsampOpen.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,23 +85,7 @@ distsampOpen <- function(lambdaformula, gammaformula, omegaformula, pformula,
last <- apply(!ytna, 1, function(x) max(which(x)))
first1 <- which(first==1)[1]

#K stuff
if(missing(K)) {
K <- max(y, na.rm=T) + 20
warning("K was not specified and was set to ", K, ".")
}

J <- ncol(data@y) / data@numPrimary
inds <- split(1:ncol(data@y), ceiling(1:ncol(data@y)/J))
Tobs <- sapply(1:length(inds), function(i){
rowSums(data@y[,inds[[i]], drop=FALSE], na.rm=TRUE)
})
Kmin <- max(Tobs, na.rm=TRUE)

if(K < Kmin){
stop("Specified K is too small, must be larger than the max total count in a primary period",
call.=FALSE)
}
K <- check_K_multinomial(K, K_adjust = 20, D$y, T) # in utils.R
k <- 0:K
lk <- length(k)
#Some k-related indices to avoid repeated calculations in likelihood
Expand Down
2 changes: 1 addition & 1 deletion R/gmultmix.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ if(is.null(Xlam.offset)) Xlam.offset <- rep(0, nrow(Xlam))
if(is.null(Xphi.offset)) Xphi.offset <- rep(0, nrow(Xphi))
if(is.null(Xdet.offset)) Xdet.offset <- rep(0, nrow(Xdet))

if(missing(K) || is.null(K)) K <- max(y, na.rm=TRUE) + 100
K <- check_K_multinomial(K, K_adjust = 100, y, data@numPrimary)
k <- 0:K
lk <- length(k)
M <- nrow(y)
Expand Down
7 changes: 1 addition & 6 deletions R/multmixOpen.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,7 @@ multmixOpen <- function(lambdaformula, gammaformula, omegaformula, pformula,
if(is.null(Xiota.offset)) Xiota.offset <- rep(0, M*(T-1))

#K stuff
if(missing(K)) {
K <- max(y, na.rm=T) + 20
warning("K was not specified and was set to ", K, ".")
}
if(K <= max(y, na.rm = TRUE))
stop("specified K is too small. Try a value larger than any observation")
K <- check_K_multinomial(K, K_adjust = 20, D$y, T)
k <- 0:K
lk <- length(k)
#Some k-related indices to avoid repeated calculations in likelihood
Expand Down
30 changes: 30 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -927,3 +927,33 @@ lapply2 <- function(X, FUN, ..., cl = NULL){
}
lapply(X=X, FUN=FUN, ...)
}

# Determine automatic K or check provided K for multinomial-type models
# (gdistsamp, gmultmix, distsampOpen, multmixOpen, gdistremoval)
check_K_multinomial <- function(K, K_adjust = 0, y, T = 1){

safe_sum <- function(x){
if(all(is.na(x))) return(NA) else return(sum(x, na.rm=TRUE))
}

if(T == 1){
yt <- apply(y, 1, safe_sum)
} else {
M <- nrow(y)
J <- ncol(y) / T
ya <- array(y, c(M, J, T))
ya <- aperm(ya, c(1,3,2))
yt <- apply(ya, 1:2, safe_sum)
}
Kmin <- max(yt, na.rm = TRUE)
if(missing(K)){
Kout <- Kmin + K_adjust
warning("K was not specified and was set to ", Kout, ".", call.=FALSE)
} else {
if(K <= Kmin){
stop("specified K is too small. Try a value larger than the max count at any site", call.=FALSE)
}
Kout <- K
}
Kout
}
14 changes: 14 additions & 0 deletions tests/testthat/test_distsampOpen.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,20 @@ test_that("dso halfnorm key function works",{
# Check K that is too small
expect_error(distsampOpen(~1, ~1, ~1, ~x1, data = umf, K=5,keyfun="halfnorm"))

# Check auto-K
fm <- expect_warning(distsampOpen(~1, ~1, ~1, ~x1, data = umf, keyfun="halfnorm"))
expect_false(fm@K == max(umf@y)+20) # the wrong way

# Check that automatic K value is correct
ya <- array(umf@y, c(50, 4, 7))
ya <- aperm(ya, c(1,3,2))
yt <- apply(ya, 1:2, function(x) {
if(all(is.na(x)))
return(NA)
else return(sum(x, na.rm=TRUE))
})
expect_equal(max(yt)+20, fm@K) # the correct way

fm <- distsampOpen(~1, ~1, ~1, ~x1, data = umf, K=10,keyfun="halfnorm")

expect_equivalent(coef(fm), c(1.4185,1.0471,-0.8275,3.1969,-0.0790),
Expand Down
15 changes: 15 additions & 0 deletions tests/testthat/test_gdistremoval.R
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,10 @@ test_that("gdistremoval can fit models",{
expect_is(fit, "unmarkedFitGDR")
expect_equivalent(coef(fit), c(1.4571,0.3374,4.0404,-1.65389,0.16789), tol=1e-3)

# Check automatic setting of K
yt <- apply(umf@yDistance, 1, sum)
expect_equal(max(yt)+40, fit@K)

# With unequal period lengths
umf2 <- unmarkedFrameGDR(dat$y, dat$yRem, siteCovs=sc, obsCovs=oc,
dist.breaks=c(0,25,50,75,100), unitsIn='m',
Expand Down Expand Up @@ -445,6 +449,17 @@ test_that("multi-period data works with gdistremoval",{
c(2.1013,-0.1142,-1.3187,-0.1483,3.3981,-0.5142,0.233678),
tol=1e-3)

# Check that automatic K value is correct
ya <- array(umf@y, c(30, 4, 5))
ya <- aperm(ya, c(1,3,2))
yt <- apply(ya, 1:2, function(x) {
if(all(is.na(x)))
return(NA)
else return(sum(x, na.rm=TRUE))
})
expect_false(max(umf@y)+40 == fit@K) # the incorrect way
expect_equal(max(yt)+40, fit@K) # the correct way

# Predict
pr <- predict(fit, 'phi')
expect_equal(dim(pr), c(30*5,4))
Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/test_gdistsamp.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ test_that("gdistsamp with halfnorm keyfunction works",{
control=list(maxit=1))
expect_equal(coef(fm_R), coef(fm_C))

# Check that automatic K value is correct
ya <- array(umf@y, c(R, J, T))
ya <- aperm(ya, c(1,3,2))
yt <- apply(ya, 1:2, function(x) {
if(all(is.na(x)))
return(NA)
else return(sum(x, na.rm=TRUE))
})
expect_equal(max(yt)+100, fm_C@K)

#When output = density
#fm_R <- gdistsamp(~1, ~1, ~1, umf, output="density", se=FALSE, engine="R")
fm_C <- gdistsamp(~1, ~1, ~1, umf, output="density", se=FALSE, engine="C")
Expand Down
29 changes: 20 additions & 9 deletions tests/testthat/test_gmultmix.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,19 @@ test_that("gmultmix removal model works",{
umf <- unmarkedFrameGMM(y = y, siteCovs = siteCovs, obsCovs = obsCovs,
yearlySiteCovs = yrSiteCovs, type="removal", numPrimary=2)
#fm_R <- gmultmix(~x, ~yr, ~o1 + o2, data = umf, K=23, engine="R")
expect_warning(fm_C <- gmultmix(~x, ~yr, ~o1 + o2, data = umf, K=23, engine="C"))

expect_warning(fm_C <- gmultmix(~x, ~yr, ~o1 + o2, data = umf, engine="C"))

# Check K calculation
expect_false(fm_C@K == max(y) + 100) # wrong way to do it
# Correct way
ya <- array(y, c(5, 2, 2))
yt <- apply(ya, c(1,3), sum, na.rm=TRUE)
expect_true(fm_C@K == max(yt) + 100)
# Throw error when K is too low
expect_error(expect_warning(gmultmix(~x, ~yr, ~o1+o2, data=umf, K = 5)))

# Fit with the old K to keep tests correct
expect_warning(fm_C <- gmultmix(~x, ~yr, ~o1 + o2, data = umf, K = 23, engine="C"))
expect_equal(fm_C@sitesRemoved, 3)
coef_truth <- c(2.50638554, 0.06226627, 0.21787839, 6.46029769, -1.51885928,
-0.03409375, 0.43424295)
Expand Down Expand Up @@ -164,7 +175,7 @@ test_that("gmultmix double model works",{
umf2 <- umf[1:5,]
expect_equal(numSites(umf2), 5)

fm <- gmultmix(~1,~1,~observer, umf)
fm <- expect_warning(gmultmix(~1,~1,~observer, umf))
expect_equivalent(coef(fm), c(2.2586,0.17385,-0.7425), tol=1e-4)

gp <- getP(fm)
Expand Down Expand Up @@ -195,7 +206,7 @@ test_that("gmultmix dependent double model works",{
expect_warning(umf <- unmarkedFrameGMM(y=y[,1:2], obsCovs=list(observer=observer),
type="depDouble",numPrimary=1))

fm <- gmultmix(~1,~1,~observer, umf)
fm <- expect_warning(gmultmix(~1,~1,~observer, umf))
expect_equivalent(coef(fm), c(1.7762,0.2493,0.2008), tol=1e-4)

gp <- getP(fm)
Expand Down Expand Up @@ -309,14 +320,14 @@ test_that("gmultmix ZIP model works",{
type="double",numPrimary=1))

# Compare R and C engines
fmR <- gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="R", se=FALSE,
control=list(maxit=1))
fmC <- gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="C", se=FALSE,
control=list(maxit=1))
fmR <- expect_warning(gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="R", se=FALSE,
control=list(maxit=1)))
fmC <- expect_warning(gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="C", se=FALSE,
control=list(maxit=1)))
expect_equal(coef(fmR), coef(fmC))

# Fit model full
fm <- gmultmix(~1,~1,~observer, umf, mixture="ZIP")
fm <- expect_warning(gmultmix(~1,~1,~observer, umf, mixture="ZIP"))
expect_equivalent(coef(fm), c(2.2289,0.1858,-0.9285,-0.9501), tol=1e-4)

# Check methods
Expand Down
9 changes: 9 additions & 0 deletions tests/testthat/test_multmixOpen.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ test_that("multmixOpen can fit removal models",{
expect_equivalent(coef(fit), c(1.38860,0.043406,-0.68448,
1.40703,0.03174,-0.00235), tol=1e-5)

# Check auto K setting
fit <- expect_warning(multmixOpen(~x1, ~1, ~1, ~x1, data=umf))
expect_false(fit@K == max(umf@y) + 20) # Wrong way

ya <- array(umf@y, c(100, 3, 5))
yt <- apply(ya, c(1,3), sum, na.rm=TRUE)

expect_true(fit@K == max(yt) + 20) # correct way

#Check predict
pr <- predict(fit, type='lambda')
expect_equivalent(as.numeric(pr[1,]),
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_simulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ test_that("simulate can generate new datasets from scratch",{
forms_gmm <- list(lambda=~elev, phi=~1, det=~1)
umf9 <- simulate("gmultmix", formulas=forms_gmm, design=design_colext, coefs=cf_gmm,
type='removal')
fm <- gmultmix(~elev,~1,~1, umf9)
fm <- expect_warning(gmultmix(~elev,~1,~1, umf9))
expect_equivalent(coef(fm), c(1.9529,0.5321,0.0529,-0.0373), tol=1e-4)

#gpcount
Expand Down

0 comments on commit b66d1e7

Please sign in to comment.