diff --git a/.Rbuildignore b/.Rbuildignore index 70e89583..1ac88473 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,5 @@ README.Rmd ^\.github$ ^_pkgdown\.yml$ ^vignettes/colext.Rmd.orig +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore index fe18f2a1..eaf91ee0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ *.Rd~ *.R~ NAMESPACE~ +.RData # TeX @@ -28,3 +29,6 @@ bc .Rhistory symbols.rds +# Rproj +.Rproj.user +*.Rproj diff --git a/DESCRIPTION b/DESCRIPTION index ecfedd14..d42b5b99 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: unmarked -Version: 1.3.2.9005 -Date: 2023-12-10 +Version: 1.4.1.9001 +Date: 2024-01-23 Type: Package Title: Models for Data from Unmarked Animals Authors@R: c( @@ -12,6 +12,7 @@ Authors@R: c( person("Jeff", "Hostetler", role="aut"), person("Rebecca", "Hutchinson", role="aut"), person("Adam", "Smith", role="aut"), + person("Lea", "Pautrel", role="aut"), person("Marc", "Kery", role="ctb"), person("Mike", "Meredith", role="ctb"), person("Auriel", "Fournier", role="ctb"), @@ -51,12 +52,14 @@ Collate: 'classes.R' 'unmarkedEstimate.R' 'mapInfo.R' 'unmarkedFrame.R' 'unmarkedCrossVal.R' 'piFun.R' 'vif.R' 'makePiFun.R' 'posteriorSamples.R' 'nmixTTD.R' 'gdistremoval.R' + 'IDS.R' 'plotEffects.R' 'mixedModelTools.R' 'power.R' 'simulate.R' 'predict.R' 'goccu.R' + 'occuCOP.R' 'RcppExports.R' 'zzz.R' LinkingTo: diff --git a/NAMESPACE b/NAMESPACE index a91651a8..c4bbc636 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,7 @@ importFrom(stats, confint, fitted, coef, vcov, predict, update, profile, pnorm, qchisq, qnorm, quantile, rbinom, reshape, rmultinom, rnbinom, rpois, runif, sd, uniroot, update.formula, sigma) -importFrom(graphics, plot, hist, abline, axis, lines, points, polygon, segments) +importFrom(graphics, plot, hist, abline, axis, lines, points, polygon, segments, title) importFrom(utils, head, read.csv) importFrom(grDevices, devAskNewPage, dev.interactive, palette.colors) importFrom(MASS, mvrnorm) @@ -23,7 +23,8 @@ importFrom(Rcpp, evalCpp) export(occu, occuFP, occuRN, pcount, pcountOpen, multinomPois, distsamp, colext, gmultmix, gdistsamp, gpcount, occuPEN, occuPEN_CV, occuMulti, occuMS, computeMPLElambda, pcount.spHDS, occuTTD, distsampOpen, - multmixOpen, nmixTTD, gdistremoval, goccu) + multmixOpen, nmixTTD, gdistremoval, goccu, occuCOP, IDS) + export(removalPiFun, doublePiFun) export(makeRemPiFun, makeCrPiFun, makeCrPiFunMb, makeCrPiFunMh) @@ -32,7 +33,7 @@ exportClasses(unmarkedFit, unmarkedFitOccu, unmarkedFitOccuFP, unmarkedFitDS, unmarkedFitPCount, unmarkedFitMPois, unmarkedFitPCO, unmarkedFrameDSO, unmarkedFitDSO, unmarkedFrameMMO, unmarkedFitMMO, - unmarkedFitOccuMulti, + unmarkedFitOccuMulti, unmarkedFitIDS, unmarkedFrame, unmarkedFrameOccu, unmarkedFramePCount, unmarkedFrameMPois, unmarkedFrameDS, unmarkedMultFrame, unmarkedFrameGMM, unmarkedFrameGDS, unmarkedFramePCO, @@ -50,7 +51,8 @@ exportMethods(backTransform, coef, confint, coordinates, fitted, getData, "siteCovs<-", summary, update, vcov, yearlySiteCovs, "yearlySiteCovs<-", "[", smoothed, projected, nonparboot, logLik, LRT, ranef, bup, crossVal, posteriorSamples, sigma, randomTerms, - optimizePenalty, unmarkedPowerList, plotEffectsData, plotEffects) + optimizePenalty, unmarkedPowerList, plotEffectsData, plotEffects, + getL) S3method("print", "unmarkedPostSamples") @@ -60,7 +62,8 @@ export(unmarkedEstimate, fitList, mapInfo, unmarkedFrame, unmarkedFrameDS, unmarkedMultFrame, unmarkedFrameGMM, unmarkedFramePCO, unmarkedFrameGDS, unmarkedFrameGPC, unmarkedFrameOccuMulti, unmarkedFrameOccuMS, unmarkedFrameOccuTTD, unmarkedFrameDSO, - unmarkedFrameMMO, unmarkedFrameGDR, unmarkedFrameGOccu) + unmarkedFrameMMO, unmarkedFrameGDR, unmarkedFrameGOccu, + unmarkedFrameOccuCOP) # Formatting export(csvToUMF, formatLong, formatWide, formatMult, formatDistData) diff --git a/NEWS.md b/NEWS.md index 1ae93877..14b00fd2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# unmarked 1.4.0 + +* Added count-data occupancy model (occuCOP) +* Added multi-scale occupancy model (goccu) +* Added ZIP support to gdistsamp, gmultmix, and gpcount +* Fixed bug in TMB engine for occu that resulted in incorrect detection coefficient estimates when there were many interspersed NAs in the encounter history + # unmarked 1.3.3 * Increase required R version to 4.0 diff --git a/R/IDS.R b/R/IDS.R new file mode 100644 index 00000000..38338b12 --- /dev/null +++ b/R/IDS.R @@ -0,0 +1,684 @@ +setClassUnion("unmarkedFrameOrNULL", members=c("unmarkedFrame", "NULL")) + +setClass("unmarkedFitIDS", + representation( + formlist = "list", + keyfun = "character", + K = "numeric", + dataPC = "unmarkedFrameOrNULL", + dataOC = "unmarkedFrameOrNULL", + maxDist = "list", + surveyDurations = "list", + unitsOut = "character"), + contains = "unmarkedFit") + +get_ds_info <- function(db){ + J <- length(db) - 1 + a <- u <- rep(NA, J) + a[1] <- pi*db[2]^2 + if(J > 1){ + for (j in 2:J){ + a[j] <- pi*db[j+1]^2 - sum(a[1:(j-1)]) + } + } + + total_area <- sum(a) + u <- a/total_area + w <- diff(db) + list(total_area=total_area, a=a, u=u, w=w) +} + +IDS <- function(lambdaformula = ~1, + detformulaDS = ~1, + detformulaPC = NULL, + detformulaOC = NULL, + dataDS, + dataPC = NULL, + dataOC = NULL, + availformula = NULL, + durationDS = NULL, + durationPC = NULL, + durationOC = NULL, + keyfun = "halfnorm", + maxDistPC, + maxDistOC, + K = 100, + unitsOut = "ha", + starts = NULL, + method = "BFGS", + ... + ){ + + # Process inputs------------------------------------------------------------- + + form_hds <- as.formula(paste(c(as.character(detformulaDS), + as.character(lambdaformula)), collapse="")) + + if(is.null(detformulaPC)){ + form_pc <- as.formula(paste(c(as.character(detformulaDS), + as.character(lambdaformula)), collapse="")) + } else{ + form_pc <- as.formula(paste(c(as.character(detformulaPC), + as.character(lambdaformula)), collapse="")) + } + + if(is.null(detformulaOC)){ + form_oc <- as.formula(paste(c(as.character(detformulaDS), + as.character(lambdaformula)), collapse="")) + } else{ + form_oc <- as.formula(paste(c(as.character(detformulaOC), + as.character(lambdaformula)), collapse="")) + } + + formlist <- list(lam=lambdaformula, ds=form_hds, pc=form_pc, oc=form_oc, + phi=availformula) + + stopifnot(inherits(dataDS, "unmarkedFrameDS")) + stopifnot(inherits(dataPC, c("unmarkedFramePCount", "NULL"))) + stopifnot(inherits(dataOC, c("unmarkedFrameOccu", "NULL"))) + #stopifnot(!is.null(dataPC) | !is.null(dataOC)) + + has_avail <- FALSE + if(!is.null(durationDS) | !is.null(durationPC) | !is.null(durationOC)){ + has_avail <- TRUE + if(is.null(availformula)) availformula <- ~1 + form_avail <- as.formula(paste("~1", paste(as.character(availformula), collapse=""))) + stopifnot(!is.null(durationDS)) + if(!is.null(dataPC)) stopifnot(!is.null(durationPC)) + if(!is.null(dataOC)) stopifnot(!is.null(durationOC)) + } + + if(has_avail & !is.null(dataOC)){ + stop("Availability estimation doesn't work with detection-nondetection data", call.=FALSE) + } + + stopifnot(is.null(durationDS) || (length(durationDS) == numSites(dataDS))) + stopifnot(is.null(durationPC) || (length(durationPC) == numSites(dataPC))) + stopifnot(is.null(durationOC) || (length(durationOC) == numSites(dataOC))) + surveyDurations <- list(ds=durationDS, pc=durationPC, oc=durationOC) + + stopifnot(keyfun %in% c("halfnorm", "exp")) + keyidx <- switch(keyfun, "halfnorm"={1}, "exp"={2}) + + if(missing(maxDistPC)) maxDistPC <- max(dataDS@dist.breaks) + if(missing(maxDistOC)) maxDistOC <- max(dataDS@dist.breaks) + + + # Design matrices------------------------------------------------------------ + + # Need to add offset support here eventually + gd_hds <- getDesign(dataDS, form_hds) + if(any(is.na(gd_hds$y))){ + stop("Missing values in only some distance bins is not supported", call.=FALSE) + } + ds_hds <- get_ds_info(dataDS@dist.breaks) + Xavail_ds <- matrix(0,0,0) + if(has_avail) Xavail_ds <- getDesign(dataDS, form_avail)$X + if(is.null(durationDS)) durationDS <- rep(0,0) + + gd_pc <- list(y=matrix(0,0,0), X=matrix(0,0,0), V=matrix(0,0,0)) + ds_pc <- list(total_area=0, db=c(0,0), a=0, w=0, u=0) + Xavail_pc <- matrix(0,0,0) + if(is.null(durationPC)) durationPC <- rep(0,0) + if(!is.null(dataPC)){ + gd_pc <- getDesign(dataPC, form_pc) + ds_pc <- get_ds_info(c(0, maxDistPC)) + if(has_avail) Xavail_pc <- getDesign(dataPC, form_avail)$X + } + + gd_oc <- list(y=matrix(0,0,0), X=matrix(0,0,0), V=matrix(0,0,0)) + ds_oc <- list(total_area=0, db=c(0,0), a=0, w=0, u=0) + Kmin_oc <- rep(0,0) + Xavail_oc <- matrix(0,0,0) + if(is.null(durationOC)) durationOC <- rep(0,0) + if(!is.null(dataOC)){ + gd_oc <- getDesign(dataOC, form_oc) + ds_oc <- get_ds_info(c(0, maxDistOC)) + Kmin_oc <- apply(gd_oc$y, 1, max, na.rm=T) + if(has_avail) Xavail_oc <- getDesign(dataOC, form_avail)$X + } + + # Density conversion and unequal area correction + lam_adjust <- c(ds_hds$total_area, ds_pc$total_area, ds_oc$total_area) + names(lam_adjust) <- c("hds", "pc", "oc") + + switch(dataDS@unitsIn, + m = lam_adjust <- lam_adjust / 1e6, + km = lam_adjust <- lam_adjust) + + stopifnot(unitsOut %in% c("m","ha","kmsq")) + switch(unitsOut, + m = lam_adjust <- lam_adjust * 1e6, + ha = lam_adjust <- lam_adjust * 100, + kmsq = lam_adjust <- lam_adjust) + + # Parameter stuff------------------------------------------------------------ + # Doesn't support hazard + pind_mat <- matrix(0, nrow=5, ncol=2) + pind_mat[1,] <- c(1, ncol(gd_hds$X)) + pind_mat[2,] <- max(pind_mat) + c(1, ncol(gd_hds$V)) + if(!is.null(detformulaPC) & !is.null(dataPC)){ + pind_mat[3,] <- max(pind_mat) + c(1, ncol(gd_pc$V)) + } + if(!is.null(detformulaOC) & !is.null(dataOC)){ + pind_mat[4,] <- max(pind_mat) + c(1, ncol(gd_oc$V)) + } + if(has_avail){ + pind_mat[5,] <- max(pind_mat) + c(1, ncol(Xavail_ds)) + } + + if(is.null(starts)){ + lam_init <- log(mean(apply(dataDS@y, 1, sum, na.rm=TRUE)) / lam_adjust[1]) + params_tmb <- list(beta_lam = c(lam_init, rep(0, ncol(gd_hds$X)-1)), + beta_hds = c(log(median(dataDS@dist.breaks)),rep(0, ncol(gd_hds$V)-1)), + beta_pc = rep(0,0), + beta_oc = rep(0,0), + beta_avail = rep(0,0)) + + if(!is.null(detformulaPC) & !is.null(dataPC)){ + params_tmb$beta_pc <- c(log(maxDistPC/2), rep(0, ncol(gd_pc$V)-1)) + } + if(!is.null(detformulaOC) & !is.null(dataOC)){ + params_tmb$beta_oc <- c(log(maxDistOC/2), rep(0, ncol(gd_oc$V)-1)) + } + if(has_avail){ + params_tmb$beta_avail <- rep(0, ncol(Xavail_ds)) + } + starts <- unlist(params_tmb) + } else { + if(length(starts) != max(pind_mat)){ + stop("Length of starts should be ", max(pind_mat), call.=FALSE) + } + params_tmb <- list(beta_lam = starts[pind_mat[1,1]:pind_mat[1,2]], + beta_hds = starts[pind_mat[2,1]:pind_mat[2,2]], + beta_pc = rep(0,0), + beta_oc = rep(0,0), + beta_avail = rep(0,0)) + + if(!is.null(detformulaPC) & !is.null(dataPC)){ + params_tmb$beta_pc <- starts[pind_mat[3,1]:pind_mat[3,2]] + } + if(!is.null(detformulaOC) & !is.null(dataOC)){ + params_tmb$beta_oc <- starts[pind_mat[4,1]:pind_mat[4,2]] + } + if(has_avail){ + params_tmb$beta_avail <- starts[pind_mat[5,1]:pind_mat[5,2]] + } + } + + # Construct TMB data list---------------------------------------------------- + tmb_dat <- list( + # Common + pind = pind_mat, lam_adjust = lam_adjust, + + # HDS data + y_hds = gd_hds$y, X_hds = gd_hds$X, V_hds = gd_hds$V, key_hds = keyidx, + db_hds = dataDS@dist.breaks, a_hds = ds_hds$a, w_hds = ds_hds$w, + u_hds = ds_hds$u, + + # PC data + y_pc = gd_pc$y, X_pc = gd_pc$X, V_pc = gd_pc$V, key_pc = keyidx, + db_pc = c(0, maxDistPC), a_pc = ds_pc$a, w_pc = ds_pc$w, u_pc = ds_pc$u, + + # occ data + y_oc = gd_oc$y, X_oc = gd_oc$X, V_oc = gd_oc$V, key_oc = keyidx, + db_oc = c(0, maxDistOC), a_oc = ds_oc$a, w_oc = ds_oc$w, u_oc = ds_oc$u, + K_oc = K, Kmin_oc = Kmin_oc, + + # avail data + durationDS = durationDS, durationPC = durationPC, durationOC = durationOC, + Xa_hds = Xavail_ds, Xa_pc = Xavail_pc, Xa_oc = Xavail_oc + ) + + tmb_obj <- TMB::MakeADFun(data = c(model = "tmb_IDS", tmb_dat), parameters = params_tmb, + DLL = "unmarked_TMBExports", silent=TRUE) + + opt <- optim(unlist(params_tmb), fn=tmb_obj$fn, gr=tmb_obj$gr, method=method, ...) + + fmAIC <- 2 * opt$value + 2 * length(unlist(params_tmb)) + + sdr <- TMB::sdreport(tmb_obj) + + lam_coef <- get_coef_info(sdr, "lam", colnames(gd_hds$X), + pind_mat[1,1]:pind_mat[1,2]) + + lam_est <- unmarkedEstimate(name="Density", short.name="lam", + estimates = lam_coef$ests, covMat = lam_coef$cov, fixed=1:ncol(gd_hds$X), + invlink = "exp", invlinkGrad = "exp") + + dist_coef <- get_coef_info(sdr, "hds", colnames(gd_hds$V), + pind_mat[2,1]:pind_mat[2,2]) + + dist_est <- unmarkedEstimate(name="Distance sampling detection", short.name="ds", + estimates = dist_coef$ests, covMat = dist_coef$cov, fixed=1:ncol(gd_hds$V), + invlink = "exp", invlinkGrad = "exp") + + est_list <- list(lam=lam_est, ds=dist_est) + + if(!is.null(detformulaPC) & !is.null(dataPC)){ + pc_coef <- get_coef_info(sdr, "pc", colnames(gd_pc$V), + pind_mat[3,1]:pind_mat[3,2]) + + pc_est <- unmarkedEstimate(name="Point count detection", short.name="pc", + estimates = pc_coef$ests, covMat = pc_coef$cov, fixed=1:ncol(gd_pc$V), + invlink = "exp", invlinkGrad = "exp") + est_list <- c(est_list, list(pc=pc_est)) + } + + if(!is.null(detformulaOC) & !is.null(dataOC)){ + oc_coef <- get_coef_info(sdr, "oc", colnames(gd_oc$V), + pind_mat[4,1]:pind_mat[4,2]) + oc_est <- unmarkedEstimate(name="Presence/absence detection", short.name="oc", + estimates = oc_coef$ests, covMat = oc_coef$cov, fixed=1:ncol(gd_oc$V), + invlink = "exp", invlinkGrad = "exp") + est_list <- c(est_list, list(oc=oc_est)) + } + + if(has_avail){ + avail_coef <- get_coef_info(sdr, "avail", colnames(Xavail_ds), + pind_mat[5,1]:pind_mat[5,2]) + avail_est <- unmarkedEstimate(name="Availability", short.name="phi", + estimates=avail_coef$ests, covMat=avail_coef$cov, fixed=1:ncol(Xavail_ds), + invlink="exp", invlinkGrad="exp") + est_list <- c(est_list, list(phi=avail_est)) + } + + est_list <- unmarkedEstimateList(est_list) + + new("unmarkedFitIDS", fitType = "IDS", call = match.call(), + opt = opt, formula = lambdaformula, formlist=formlist, + data = dataDS, dataPC=dataPC, dataOC=dataOC, + surveyDurations=surveyDurations, + maxDist = list(pc=maxDistPC, oc=maxDistOC), + keyfun=keyfun, + # this needs to be fixed + sitesRemoved = gd_hds$removed.sites, + unitsOut=unitsOut, + estimates = est_list, AIC = fmAIC, negLogLike = opt$value, + nllFun = tmb_obj$fn, TMB=tmb_obj) + +} + +setMethod("summary", "unmarkedFitIDS", function(object) +{ + cat("\nCall:\n") + print(object@call) + cat("\n") + summaryOut <- summary(object@estimates) + cat("AIC:", object@AIC,"\n") + cat("Number of distance sampling sites:", numSites(object@data)) + if(!is.null(object@dataPC)){ + cat("\nNumber of point count sites:", numSites(object@dataPC)) + } + if(!is.null(object@dataOC)){ + cat("\nNumber of presence/absence sites:", numSites(object@dataOC)) + } + cat("\noptim convergence code:", object@opt$convergence) + cat("\noptim iterations:", object@opt$counts[1], "\n") + if(!identical(object@opt$convergence, 0L)) + warning("Model did not converge. Try providing starting values or increasing maxit control argment.") + cat("Bootstrap iterations:", length(object@bootstrapSamples), "\n\n") + invisible(summaryOut) +}) + +# Need special method since an included dataset may not have a corresponding +# unique submodel in the unmarked estimates +setMethod("names", "unmarkedFitIDS", function(x){ + out <- c("lam","ds") + if(!is.null(x@dataPC)) out <- c(out, "pc") + if(!is.null(x@dataOC)) out <- c(out, "oc") + if("phi" %in% names(x@estimates)) out <- c(out, "phi") + out +}) + +# This function converts IDS objects into simpler distsamp objects +# so we can re-use distsamp methods +# the abundance model (lam) is combined with one of the detection models +# to yield a distsamp model +IDS_convert_class <- function(inp, type, ds_type=NULL){ + stopifnot(type %in% names(inp)) + if(is.null(ds_type)) ds_type <- type + if(type == "lam") type <- "ds" + data <- inp@data + if(ds_type == "pc"){ + tempdat <- inp@dataPC + maxDist <- inp@maxDist$pc + } + if(ds_type == "oc"){ + tempdat <- inp@dataOC + maxDist <- inp@maxDist$oc + } + if(ds_type %in% c("pc", "oc")){ + if(is.null(maxDist)) maxDist <- max(data@dist.breaks) + data <- unmarkedFrameDS(y=tempdat@y, siteCovs=siteCovs(tempdat), + dist.breaks=c(0, maxDist), survey="point", + unitsIn=data@unitsIn) + } + + # Select appropriate model estimates; if sigma was not estimated + # separately for this model, pick the DS model for detection + det_mod <- type + if(! det_mod %in% names(inp@estimates)) det_mod <- "ds" + est <- inp@estimates@estimates[c("lam", det_mod)] + names(est) <- c("state", "det") + + form <- inp@formlist[[type]] + if(type=="phi") form <- as.formula(paste(c(as.character(form), "~1"), collapse="")) + + new("unmarkedFitDS", fitType="IDS", opt=inp@opt, formula=form, + data=data, keyfun=inp@keyfun, unitsOut=inp@unitsOut, + estimates=unmarkedEstimateList(est), + AIC=inp@AIC, output="density", TMB=inp@TMB) +} + +# This predict method uses IDS_convert_class to allow pass-through to +# distsamp predict method +setMethod("predict", "unmarkedFitIDS", function(object, type, newdata, + backTransform=TRUE, appendData=FALSE, level=0.95, ...){ + stopifnot(type %in% names(object)) + + # Special case of phi and no newdata + # We need a separate prediction for each detection dataset + if(type == "phi" & missing(newdata)){ + + dists <- names(object)[names(object) %in% c("ds", "pc", "oc")] + out <- lapply(dists, function(x){ + conv <- IDS_convert_class(object, "phi", ds_type=x) + predict(conv, "det", backTransform=backTransform, appendData=appendData, + level=level, ...) + }) + names(out) <- dists + + } else { # Regular situation + conv <- IDS_convert_class(object, type) + type <- switch(type, lam="state", ds="det", pc="det", oc="det", phi="det") + out <- predict(conv, type, newdata, backTransform=backTransform, appendData=appendData, + level=level, ...) + } + out +}) + +# Get availability probability +setGeneric("getAvail", function(object, ...) standardGeneric("getAvail")) + +# Get availability for each data type and site as a probability +setMethod("getAvail", "unmarkedFitIDS", function(object, ...){ + stopifnot("phi" %in% names(object)) + phi <- predict(object, "phi") + dur <- object@surveyDurations + out <- lapply(names(phi), function(x){ + 1 - exp(-1 * dur[[x]] * phi[[x]]$Predicted) + }) + names(out) <- names(phi) + out +}) + +# Fitted method returns a list of matrices, one per data type +setMethod("fitted", "unmarkedFitIDS", function(object, na.rm=FALSE){ + + dists <- names(object)[names(object) %in% c("ds", "pc")] + + # If there is an availability model, get availability + # Otherwise set it to 1 + avail <- list(ds=1, pc=1, oc=1) + if("phi" %in% names(object)){ + avail <- getAvail(object) + } + + # fitted for distance and N-mix data components + out <- lapply(dists, function(x){ + conv <- IDS_convert_class(object, type=x) + fitted(conv) * avail[[x]] + }) + names(out) <- dists + + # fitted for occupancy data + if("oc" %in% names(object)){ + conv <- IDS_convert_class(object, type="oc") + lam <- predict(conv, 'state')$Predicted + A <- pi*max(conv@data@dist.breaks)^2 + switch(conv@data@unitsIn, + m = A <- A / 1e6, + km = A <- A) + switch(conv@unitsOut, + m = A <- A * 1e6, + ha = A <- A * 100, + kmsq = A <- A) + lam <- lam * A + + p <- getP(conv) * avail$oc + out$oc <- 1 - exp(-lam*p) ## analytical integration. + } + + out +}) + +# getP returns detection probability WITHOUT availability +setMethod("getP", "unmarkedFitIDS", function(object, ...){ + + dets <- names(object)[! names(object) %in% c("lam","phi")] + + out <- lapply(dets, function(x){ + conv <- IDS_convert_class(object, type=x) + getP(conv) + }) + names(out) <- dets + out +}) + + +setMethod("residuals", "unmarkedFitIDS", function(object, ...){ + + dists <- names(object)[names(object) %in% c("ds", "pc")] + + # distance and N-mix data + out <- lapply(dists, function(x){ + conv <- IDS_convert_class(object, type=x) + residuals(conv) + }) + names(out) <- dists + + # occupancy data + if("oc" %in% names(object)){ + y <- object@dataOC@y + ft <- fitted(object)$oc + out$oc <- y - ft + } + + out +}) + +setMethod("hist", "unmarkedFitIDS", function(x, lwd=1, lty=1, ...){ + + conv <- IDS_convert_class(x, type='ds') + hist(conv, lwd=lwd, lty=lty, ...) + +}) + +setMethod("plot", c(x="unmarkedFitIDS", y="missing"), function(x, y, ...){ + + r <- residuals(x) + f <- fitted(x) + nr <- length(r) + long_names <- sapply(x@estimates@estimates, function(x) x@name) + long_names <- long_names[long_names != "Density"] + + old_par <- graphics::par()$mfrow + graphics::par(mfrow=c(nr,1)) + + for (i in 1:nr){ + plot(f[[i]], r[[i]], ylab = "Residuals", xlab = "Predicted values", + main=long_names[i]) + abline(h = 0, lty = 3, col = "gray") + } + graphics::par(mfrow=old_par) + +}) + + +setMethod("simulate", "unmarkedFitIDS", + function(object, nsim = 1, seed = NULL, na.rm = FALSE){ + + dets <- c("ds","pc","oc") + + if("phi" %in% names(object)) avail <- getAvail(object) + + temp <- lapply(dets, function(x){ + if(! x %in% names(object)) return(NULL) + conv <- IDS_convert_class(object, type=x) + sims <- simulate(conv, nsim=nsim, na.rm=na.rm) + # availability process + if("phi" %in% names(object)){ + sims <- lapply(sims, function(z){ + avail_expand <- matrix(rep(avail[[x]], ncol(z)), ncol=ncol(z)) + out <- rbinom(length(z), z, avail_expand) + matrix(out, nrow=nrow(z), ncol=ncol(z)) + }) + } + if(x=="oc"){ + sims <- lapply(sims, function(z){ + z[z>1] <- 1 + z + }) + } + sims + }) + + #"permute" + lapply(1:nsim, function(i){ + sim <- lapply(temp, function(x) x[[i]]) + names(sim) <- c("ds","pc","oc") + sim + }) + +}) + +setMethod("update", "unmarkedFitIDS", + function(object, lambdaformula, detformulaDS, detformulaPC, detformulaOC, + dataDS, dataPC, dataOC, ...){ + call <- object@call + + if(!missing(lambdaformula)){ + call[["lambdaformula"]] <- lambdaformula + } else { + call[["lambdaformula"]] <- object@formlist$lam + } + + if(!missing(detformulaDS)){ + call[["detformulaDS"]] <- detformulaDS + } else { + call[["detformulaDS"]] <- split_formula(object@formlist$ds)[[1]] + } + + if(!missing(detformulaPC)){ + call[["detformulaPC"]] <- detformulaPC + } else if(!is.null(object@dataPC) & !is.null(call$detformulaPC)){ + call[["detformulaPC"]] <- split_formula(object@formlist$pc)[[1]] + } + + if(!missing(detformulaOC)){ + call[["detformulaOC"]] <- detformulaOC + } else if(!is.null(object@dataOC) & !is.null(call$detformulaOC)){ + call[["detformulaOC"]] <- split_formula(object@formlist$oc)[[1]] + } + + if(!missing(dataDS)){ + call$dataDS <- dataDS + } else { + call$dataDS <- object@data + } + + if(!missing(dataPC)){ + call$dataPC <- dataPC + } else { + call$dataPC <- object@dataPC + } + + if(!missing(dataOC)){ + call$dataOC <- dataOC + } else { + call$dataOC <- object@dataOC + } + + extras <- match.call(call=sys.call(-1), + expand.dots = FALSE)$... + if (length(extras) > 0) { + existing <- !is.na(match(names(extras), names(call))) + for (a in names(extras)[existing]) + call[[a]] <- extras[[a]] + if (any(!existing)) { + call <- c(as.list(call), extras[!existing]) + call <- as.call(call) + } + } + + eval(call, parent.frame(2)) + +}) + + +setMethod("parboot", "unmarkedFitIDS", + function(object, statistic=SSE, nsim=10, ...) +{ + dots <- list(...) + statistic <- match.fun(statistic) + call <- match.call(call = sys.call(-1)) + starts <- as.numeric(coef(object)) + + t0 <- statistic(object, ...) + lt0 <- length(t0) + t.star <- matrix(NA, nsim, lt0) + #if(!missing(report)) + # cat("t0 =", t0, "\n") + + simList <- simulate(object, nsim = nsim, na.rm = FALSE) + + dataDS <- object@data + dataPC <- object@dataPC + has_pc <- "pc" %in% names(object) + dataOC <- object@dataOC + has_oc <- "oc" %in% names(object) + + t.star <- lapply(1:nsim, function(i){ + dataDS@y <- simList[[i]]$ds + if(has_pc) dataPC@y <- simList[[i]]$pc + if(has_oc) dataOC@y <- simList[[i]]$oc + fit <- update(object, dataDS=dataDS, dataPC=dataPC, dataOC=dataOC, + durationDS = object@surveyDurations$ds, + durationPC = object@surveyDurations$pc, + durationOC = object@surveyDurations$oc, + starts=starts) + statistic(fit) + }) + if(lt0 > 1){ + t.star <- t(t.star) + } else { + t.star <- matrix(t.star, ncol=lt0) + } + + if (!is.null(names(t0))){ + colnames(t.star) <- names(t0) + } else{ + colnames(t.star) <- paste("t*", 1:lt0, sep="") + } + + out <- new("parboot", call = call, t0 = t0, t.star = t.star) + return(out) +}) + +setMethod("SSE", "unmarkedFitIDS", function(fit, ...){ + r <- unlist(residuals(fit)) + return(c(SSE = sum(r^2, na.rm=T))) +}) + +setMethod("nonparboot", "unmarkedFitIDS", + function(object, B = 0, keepOldSamples = TRUE, ...) +{ + stop("Not currently supported for unmarkedFitIDS", call.=FALSE) +}) + +setMethod("ranef", "unmarkedFitIDS", + function(object, B = 0, keepOldSamples = TRUE, ...) +{ + stop("Not currently supported for unmarkedFitIDS", call.=FALSE) +}) diff --git a/R/RcppExports.R b/R/RcppExports.R index 79ab9671..70b57ac2 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -49,6 +49,10 @@ nll_occu <- function(y, X, V, beta_psi, beta_p, nd, knownOcc, navec, X_offset, V .Call(`_unmarked_nll_occu`, y, X, V, beta_psi, beta_p, nd, knownOcc, navec, X_offset, V_offset, link_psi) } +nll_occuCOP <- function(y, L, Xpsi, Xlambda, beta_psi, beta_lambda, removed) { + .Call(`_unmarked_nll_occuCOP`, y, L, Xpsi, Xlambda, beta_psi, beta_lambda, removed) +} + nll_occuMS <- function(beta, y, dm_state, dm_phi, dm_det, sind, pind, dind, prm, S, T, J, N, naflag, guide) { .Call(`_unmarked_nll_occuMS`, beta, y, dm_state, dm_phi, dm_det, sind, pind, dind, prm, S, T, J, N, naflag, guide) } diff --git a/R/boot.R b/R/boot.R index 8f854c87..3da4fd08 100644 --- a/R/boot.R +++ b/R/boot.R @@ -69,7 +69,7 @@ setMethod("parboot", "unmarkedFit", function(object, statistic=SSE, nsim=10, simdata <- replaceY(object@data, x) tryCatch({ #if(runif(1,0,1) < 0.5) stop("fail") # for testing error trapping - fit <- update(object, data=simdata, starts=starts, se=FALSE) + fit <- update(object, data = simdata, starts = starts, se = FALSE) statistic(fit, ...) }, error=function(e){ t0[] <- NA diff --git a/R/occuCOP.R b/R/occuCOP.R new file mode 100644 index 00000000..f22c2354 --- /dev/null +++ b/R/occuCOP.R @@ -0,0 +1,964 @@ +# Fit the occupancy model COP +# (Counting Occurrences Process) + +# Occupancy +# z_i ~ Bernoulli(psi_i) +# +# with: +# z_i = Occupancy state of site i +# = 1 if the site i is occupied +# = 0 else +# psi_i = Occupancy probability of site i + +# Detection +# N_ij | z_i = 1 ~ Poisson(lambda_ij*L_ij) +# N_ij | z_i = 0 ~ 0 +# +# with: +# N_ij = Number of detections of site i during observation j +# z_i = Occupancy state of site i +# lambda_ij = Detection rate of the observation j in site i +# L_ij = Length/Duration of the observation j in site i + +# CLASSES ---------------------------------------------------------------------- + +## unmarkedFrameOccuCOP class ---- +setClass( + "unmarkedFrameOccuCOP", + representation(L = "matrix"), + contains = "unmarkedFrame", + validity = function(object) { + errors <- character(0) + M <- nrow(object@y) + J <- ncol(object@y) + y_integers = sapply(object@y, check.integer, na.ignore = T) + if (!all(y_integers)) { + errors <- c(errors, + paste( + "Count detection should be integers. Non-integer values:", + paste(object@y[which(!y_integers)], collapse = ', ') + )) + } + if (!all(all(dim(object@L) == dim(object@y)))){ + errors <- c( errors, paste( + "L should be a matrix of the same dimension as y, with M =", M, + "rows (sites) and J =", J, "columns (sampling occasions)." + ))} + if (length(errors) == 0) TRUE + else errors + } +) + +## unmarkedFitOccuCOP class ---- +setClass("unmarkedFitOccuCOP", + representation(removed_obs = "matrix", + formlist = "list"), + contains = "unmarkedFit") + + +# Methods ---------------------------------------------------------------------- + +## getDesign method ---- +setMethod( + "getDesign", "unmarkedFrameOccuCOP", + function(umf, formlist, na.rm = TRUE) { + + " + getDesign convert the information in the unmarkedFrame to a format + usable by the likelihood function: + - It creates model design matrices for fixed effects (X) for each parameter (here lambda, psi) + - It creates model design matrices for random effects (Z) for each parameter (here lambda, psi) + - It handles missing data + " + + # Retrieve useful informations from umf + M <- numSites(umf) + J <- obsNum(umf) + y <- getY(umf) + L <- getL(umf) + + # Occupancy submodel ------------------------------------------------------- + # Retrieve the fixed-effects part of the formula + psiformula <- lme4::nobars(as.formula(formlist$psiformula)) + psiVars <- all.vars(psiformula) + + # Retrieve the site covariates + sc <- siteCovs(umf) + if(is.null(sc)) { + sc <- data.frame(.dummy = rep(0, M)) + } + + # Check for missing variables + psiMissingVars <- psiVars[!(psiVars %in% names(sc))] + if (length(psiMissingVars) > 0) { + stop(paste0( + "Variable(s) '", + paste(psiMissingVars, collapse = "', '"), + "' not found in siteCovs" + ), call. = FALSE) + } + + # State model matrix for fixed effects + Xpsi <- model.matrix( + psiformula, + model.frame(psiformula, sc, na.action = NULL) + ) + # State model matrix for random effects + Zpsi <- get_Z(formlist$psiformula, sc) + + # Detection submodel ------------------------------------------------------- + + # Retrieve the fixed-effects part of the formula + lambdaformula <- lme4::nobars(as.formula(formlist$lambdaformula)) + lambdaVars <- all.vars(lambdaformula) + + # Retrieve the observation covariates + oc <- obsCovs(umf) + if(is.null(oc)) { + oc <- data.frame(.dummy = rep(0, M*J)) + } + + # Check for missing variables + lambdaMissingVars <- lambdaVars[!(lambdaVars %in% names(oc))] + if (length(lambdaMissingVars) > 0) { + stop(paste( + "Variable(s)", + paste(lambdaMissingVars, collapse = ", "), + "not found in obsCovs" + ), call. = FALSE) + } + + # Detection model matrix for fixed effects + Xlambda <- model.matrix( + lambdaformula, + model.frame(lambdaformula, oc, na.action = NULL) + ) + # Detection model matrix for random effects + Zlambda <- get_Z(formlist$lambdaformula, oc) + + # Missing data ------------------------------------------------------------- + + # Missing count data + missing_y <- is.na(y) + + # Missing site covariates + # (TRUE if at least one site covariate is missing in a site) + missing_sc <- apply(Xpsi, 1, function(x) any(is.na(x))) + + # Missing observation covariates + # (TRUE if at least one observation covariate is missing in a sampling occasion in a site) + missing_oc <- apply(Xlambda, 1, function(x) any(is.na(x))) + + # Matrix MxJ of values to not use because there is some data missing + removed_obs <- + # If there is count data missing in site i and obs j + missing_y | + # If there is any site covariate missing in site i + replicate(n = J, missing_sc) | + # If there is any observation covariate missing in site i and obs j + matrix(missing_oc, M, J, byrow = T) + + if (any(removed_obs)) { + if (na.rm) { + nb_missing_sites <- sum(rowSums(!removed_obs) == 0) + nb_missing_observations <- sum(is.na(removed_obs)) + warning("There is missing data: ", + sum(missing_y), " missing count data, ", + sum(missing_sc), " missing site covariate(s), ", + sum(missing_oc), " missing observation covariate(s). ", + "Data from only ", (M*J)-sum(removed_obs), " observations out of ", (M*J), " are used, ", + "from ", M-nb_missing_sites, " sites out of ", M, ".\n\t" + ) + } else { + stop("na.rm=FALSE and there is missing data :\n\t", + sum(missing_y), " missing count data (y)\n\t", + sum(missing_sc), " missing site covariates (siteCovs)\n\t", + sum(missing_oc), " missing observation covariates (obsCovs)") + } + } + + # Output ------------------------------------------------------------------- + return(list( + y = y, + Xpsi = Xpsi, + Zpsi = Zpsi, + Xlambda = Xlambda, + Zlambda = Zlambda, + removed_obs = removed_obs + )) + }) + + +## getL method ---- +setGeneric("getL", function(object) standardGeneric("getL")) +setMethod("getL", "unmarkedFrameOccuCOP", function(object) { + return(object@L) +}) + + +## show method ---- +setMethod("show", "unmarkedFrameOccuCOP", function(object) { + J <- ncol(object@L) + df_unmarkedFrame <- as(object, "data.frame") + df_L <- data.frame(object@L) + colnames(df_L) <- paste0("L.", 1:J) + if (ncol(df_unmarkedFrame) > J) { + df <- cbind(df_unmarkedFrame[, 1:J, drop = FALSE], + df_L, + df_unmarkedFrame[, (J + 1):ncol(df_unmarkedFrame), drop = FALSE]) + } else { + df <- cbind(df_unmarkedFrame[, 1:J], + df_L) + } + cat("Data frame representation of unmarkedFrame object.\n") + print(df) +}) +# LP note: as is defined in unmarkedFrame.R part "COERCION" + + +## summary method ---- +setMethod("summary", "unmarkedFrameOccuCOP", function(object,...) { + cat("unmarkedFrameOccuCOP Object\n\n") + + cat(nrow(object@y), "sites\n") + cat("Maximum number of sampling occasions per site:",obsNum(object),"\n") + mean.obs <- mean(rowSums(!is.na(getY(object)))) + cat("Mean number of sampling occasions per site:",round(mean.obs,2),"\n") + cat("Sites with at least one detection:", + sum(apply(getY(object), 1, function(x) any(x > 0, na.rm=TRUE))), + "\n\n") + + cat("Tabulation of y observations:") + print(table(object@y, exclude=NULL)) + + if(!is.null(object@L)) { + cat("\nTabulation of sampling occasions length:") + print(table(object@L)) + } + + if(!is.null(object@siteCovs)) { + cat("\nSite-level covariates:\n") + print(summary(object@siteCovs)) + } + + if(!is.null(object@obsCovs)) { + cat("\nObservation-level covariates:\n") + print(summary(object@obsCovs)) + } +}) + + +## umf[i, j] ---- +setMethod("[", c("unmarkedFrameOccuCOP", "numeric", "numeric", "missing"), + function(x, i, j) { + # Gey dimensions of x + M <- numSites(x) + J <- obsNum(x) + + if (length(i) == 0 & length(j) == 0) { + return(x) + } + + # Check i + if (any(i < 0) && + any(i > 0)) { + stop("i must be all positive or all negative indices.") + } + if (all(i < 0)) { + i <- (1:M)[i] + } + + # Check j + if (any(j < 0) && + any(j > 0)) { + stop("j must be all positive or all negative indices.") + } + if (all(j < 0)) { + j <- (1:J)[j] + } + + # y observation count data subset + y <- getY(x)[i, j, drop = FALSE] + if (min(length(i), length(j)) == 1) { + y <- t(y) + } + + # L subset + L <- x@L[i, j, drop = FALSE] + if (min(length(i), length(j)) == 1) { + L <- t(L) + } + + # siteCovs subset + siteCovs <- siteCovs(x) + if (!is.null(siteCovs)) { + siteCovs <- siteCovs(x)[i, , drop = FALSE] + } + + # obsCovs subset + obsCovs <- obsCovs(x) + if (!is.null(obsCovs)) { + MJ_site <- rep(1:M, each = J) + MJ_obs <- rep(1:J, times = M) + obsCovs <- obsCovs[((MJ_obs %in% j) & (MJ_site %in% i)), , drop = FALSE] + rownames(obsCovs) <- NULL + } + + # Recreate umf + new( + Class = "unmarkedFrameOccuCOP", + y = y, + L = L, + siteCovs = siteCovs, + obsCovs = obsCovs, + obsToY = diag(length(j)), + mapInfo = x@mapInfo + ) + }) + + +## umf[i, ] ---- +setMethod("[", c("unmarkedFrameOccuCOP", "numeric", "missing", "missing"), + function(x, i) { + x[i, 1:obsNum(x)] + }) + +## umf[, j] ---- +setMethod("[", c("unmarkedFrameOccuCOP", "missing", "numeric", "missing"), + function(x, j) { + x[1:numSites(x), j] + }) + + +## fl_getY ---- +setMethod("fl_getY", "unmarkedFitOccuCOP", function(fit, ...){ + getDesign(getData(fit), fit@formlist)$y +}) + + +## predict_inputs_from_umf ---- +setMethod("predict_inputs_from_umf", "unmarkedFitOccuCOP", + function(object, type, newdata, na.rm, re.form = NULL) { + designMats = getDesign(umf = newdata, + formlist = object@formlist, + na.rm = na.rm) + if (type == "psi") list_els <- c("Xpsi", "Zpsi") + if (type == "lambda") list_els <- c("Xlambda", "Zlambda") + X <- designMats[[list_els[1]]] + if (is.null(re.form)) X <- cbind(X, designMats[[list_els[2]]]) + return(list(X = X, offset = NULL)) + }) + + +## get_formula ---- +setMethod("get_formula", "unmarkedFitOccuCOP", function(object, type, ...) { + fl <- object@formlist + switch(type, psi = fl$psiformula, lambda = fl$lambdaformula) +}) + + +## get_orig_data ---- +setMethod("get_orig_data", "unmarkedFitOccuCOP", function(object, type, ...){ + clean_covs <- clean_up_covs(object@data, drop_final=FALSE) + datatype <- switch(type, psi = 'site_covs', lambda = 'obs_covs') + clean_covs[[datatype]] +}) + + +## getP ---- +setMethod("getP", "unmarkedFitOccuCOP", function(object, na.rm = TRUE) { + data <- object@data + M = nrow(getY(data)) + J = ncol(getY(data)) + des <- getDesign(data, object@formlist, na.rm = na.rm) + matLambda = do.call(object["lambda"]@invlink, + list(matrix( + as.numeric(des$Xlambda %*% coef(object, 'lambda')), + nrow = M, ncol = J, byrow = T))) + return(matLambda) +}) + + +## fitted ---- +setMethod("fitted", "unmarkedFitOccuCOP", function(object, na.rm = FALSE) { + data <- object@data + M = nrow(getY(data)) + J = ncol(getY(data)) + des <- getDesign(data, object@formlist, na.rm = na.rm) + estim_psi = as.numeric(do.call(object["psi"]@invlink, + list(as.matrix(des$Xpsi %*% coef(object, 'psi'))))) + estim_lambda = do.call(object["lambda"]@invlink, + list(matrix( + as.numeric(des$Xlambda %*% coef(object, 'lambda')), + nrow = M, ncol = J, byrow = T))) + return(estim_psi * estim_lambda) +}) + + +## residuals ---- +setMethod("residuals", "unmarkedFitOccuCOP", function(object) { + y <- getY(object@data) + e <- fitted(object) + r <- y - e + return(r) +}) + + +## plot ---- +setMethod("plot", c(x = "unmarkedFitOccuCOP", y = "missing"), function(x, y, ...) { + y <- getY(x) + r <- residuals(x) + e <- fitted(x) + + old_graph <- graphics::par("mfrow", "mar") + on.exit(graphics::par(mfrow = old_graph$mfrow, mar = old_graph$mar)) + + { + graphics::par(mfrow = c(2, 1)) + graphics::par(mar = c(0, 5, 2, 2)) + plot(e, y, + ylab = "Observed data", + xlab = "Predicted data", + xaxt = 'n') + abline(a = 0, b = 1, lty = 3, col = "red") + title(main = "COP model - detection events count", outer = F) + + graphics::par(mar = c(4, 5, 0.5, 2)) + plot(e, r, + ylab = "Residuals", + xlab = "Predicted data") + abline(h = 0, lty = 3, col = "red") + } +}) + + +## get_umf_components ---- +setMethod("get_umf_components", "unmarkedFitOccuCOP", + function(object, formulas, guide, design, ...){ + sc <- generate_data(formulas$psi, guide, design$M) + oc <- generate_data(formulas$lambda, guide, design$J*design$M) + yblank <- matrix(0, design$M, design$J) + list(y=yblank, siteCovs=sc, obsCovs=oc) +}) + + +## simulate_fit ---- +setMethod("simulate_fit", "unmarkedFitOccuCOP", + function(object, formulas, guide, design, ...){ + # Generate covariates and create a y matrix of zeros + parts <- get_umf_components(object, formulas, guide, design, ...) + umf <- unmarkedFrameOccuCOP(y = parts$y, siteCovs = parts$siteCovs, obsCovs=parts$obsCovs) + fit <- suppressMessages( + occuCOP( + data = umf, + psiformula = formula(formulas$psi), + lambdaformula = formula(formulas$lambda), + se = FALSE, + control = list(maxit = 1) + ) + ) + return(fit) +}) + + +## simulate ---- +setMethod("simulate", "unmarkedFitOccuCOP", + function(object, nsim = 1, seed = NULL, na.rm = TRUE){ + # set.seed(seed) + # Purposefully not implemented + formula <- object@formula + umf <- object@data + designMats <- getDesign(umf = umf, formlist = object@formlist, na.rm = na.rm) + y <- designMats$y + M <- nrow(y) + J <- ncol(y) + + # Occupancy probability psi depending on the site covariates + psiParms = coef(object, type = "psi", fixedOnly = FALSE) + psi <- as.numeric(plogis(as.matrix(designMats$Xpsi %*% psiParms))) + + # Detection rate lambda depending on the observation covariates + lambda = getP(object = object) + + # Simulations + simList <- vector("list", nsim) + for(i in 1:nsim) { + Z <- rbinom(M, 1, psi) + # Z[object@knownOcc] <- 1 + y = matrix(rpois(n = M * J, lambda = as.numeric(t(lambda))), + nrow = M, ncol = J, byrow = T) * Z + simList[[i]] <- y + } + return(simList) +}) + + +## nonparboot ---- +setMethod("nonparboot", "unmarkedFitOccuCOP", + function(object, B = 0, keepOldSamples = TRUE, ...) { + stop("Not currently supported for unmarkedFitOccuCOP", call.=FALSE) +}) + + +## ranef ---- +setMethod("ranef", "unmarkedFitOccuCOP", function(object, ...) { + # Sites removed (srm) and sites kept (sk) + srm <- object@sitesRemoved + if (length(srm) > 0) { + sk = 1:numSites(getData(object))[-srm] + } else{ + sk = 1:numSites(getData(object)) + } + + # unmarkedFrame informations + M <- length(sk) + J <- obsNum(getData(object)) + y <- getY(getData(object))[sk,] + + # Estimated parameters + psi <- predict(object, type = "psi")[sk, 1] + lambda <- getP(object)[sk,] + + # Estimate posterior distributions + z = c(0, 1) + post <- array(0, c(M, 2, 1), dimnames = list(NULL, z)) + for (i in 1:M) { + # psi density + f <- dbinom(x = z, + size = 1, + prob = psi[i]) + + # lambda density + g <- c(1, 1) + for (j in 1:J) { + if (is.na(y[i, j]) | is.na(lambda[i, j])) { + next + } + g = g * dpois(x = y[i, j], lambda = lambda[i, j] * z) + } + + # psi*lambda density + fudge <- f * g + post[i, , 1] <- fudge / sum(fudge) + } + + new("unmarkedRanef", post = post) +}) + + +# Useful functions ------------------------------------------------------------- + +check.integer <- function(x, na.ignore = F) { + if (is.na(x) & na.ignore) { + return(T) + } + x %% 1 == 0 +} + +# unmarkedFrame ---------------------------------------------------------------- + +unmarkedFrameOccuCOP <- function(y, L, siteCovs = NULL, obsCovs = NULL) { + + # Verification that they are non-NA data in y + if (all(is.na(y))) { + stop("y only contains NA. y needs to contain non-NA integers.") + } + + # Verification that these are count data (and not detection/non-detection) + if (max(y, na.rm = T) == 1) { + warning("unmarkedFrameOccuCOP is for count data. ", + "The data furnished appear to be detection/non-detection.") + } + + # Number of sampling occasions + J <- ncol(y) + + # If missing L: replace by matrix of 1 + # and the lambda will be the detection rate per observation length + if (missing(L)) { + L <- y * 0 + 1 + warning("L is missing, replacing it by a matrix of 1.") + } else if (is.null(L)) { + L <- y * 0 + 1 + warning("L is missing, replacing it by a matrix of 1.") + } + + # Transformation observation covariates + obsCovs <- covsToDF( + covs = obsCovs, + name = "obsCovs", + obsNum = J, + numSites = nrow(y) + ) + + # Create S4 object of class unmarkedFrameOccuCOP + umf <- new( + Class = "unmarkedFrameOccuCOP", + y = y, + L = L, + siteCovs = siteCovs, + obsCovs = obsCovs, + obsToY = diag(J) + ) + + return(umf) +} + + +# occuCOP ---------------------------------------------------------------------- + +occuCOP <- function(data, + psiformula = ~1, + lambdaformula = ~1, + psistarts, + lambdastarts, + starts, + method = "BFGS", + se = TRUE, + engine = c("C", "R"), + na.rm = TRUE, + return.negloglik = NULL, + L1 = FALSE, + ...) { + #TODO: random effects + + # Neg loglikelihood COP ------------------------------------------------------ + R_nll_occuCOP <- function(params) { + + # Reading and transforming params + # Taking into account the covariates + + # psi as a function of covariates + # psi in params are transformed with a logit transformation (qlogis) + # so they're back-transformed to a proba with inverse logit (plogis) + # Xpsi is the matrix with occupancy covariates + # params is the vector with all the parameters + # psiIdx is the index of Occupancy Parameters in params + psi <- plogis(Xpsi %*% params[psiIdx]) + + # lambda as a function of covariates + # lambda in params are transformed with a log-transformation + # so they're back-transformed to a rate with exp here + # Xlambda is the matrix with detection covariates + # params is the vector with all the parameters + # lambdaIdx is the index of Occupancy Parameters in params + lambda <- exp(Xlambda %*% params[lambdaIdx]) + + # Listing sites analysed = sites not removed (due to NAs) + if (length(sitesRemoved) > 0) { + siteAnalysed = (1:M)[-sitesRemoved] + } else { + siteAnalysed = (1:M) + } + + # Probability for each site (i) + iProb <- rep(NA, M) + + for (i in siteAnalysed) { + # iIdx is the index to access the vectorised vectors of all obs in site i + iIdxall <- ((i - 1) * J + 1):(i * J) + + # Removing NAs + iIdx = iIdxall[!removed_obsvec[iIdxall]] + + if (SitesWithDetec[i]) { + # If there is at least one detection in site i + iProb[i] = psi[i] * ( + (sum(lambda[iIdx] * Lvec[iIdx])) ^ sum(yvec[iIdx]) / + factorial(sum(yvec[iIdx])) * + exp(-sum(lambda[iIdx] * Lvec[iIdx])) + ) + + } else { + # If there is zero detection in site i + iProb[i] = psi[i] * exp(-sum(lambda[iIdx] * Lvec[iIdx])) + (1 - psi[i]) + + } + } + + # log-likelihood + ll = sum(log(iProb[siteAnalysed])) + return(-ll) + } + + # Check arguments ------------------------------------------------------------ + if (!is(data, "unmarkedFrameOccuCOP")) { + stop("Data is not an unmarkedFrameOccuCOP object. See ?unmarkedFrameOccuCOP if necessary.") + } + stopifnot(class(psiformula) == "formula") + stopifnot(class(lambdaformula) == "formula") + if(!missing(psistarts)){stopifnot(class(psistarts) %in% c("numeric", "double", "integer"))} + if(!missing(lambdastarts)){stopifnot(class(lambdastarts) %in% c("numeric", "double", "integer"))} + se = as.logical(match.arg( + arg = as.character(se), + choices = c("TRUE", "FALSE", "0", "1") + )) + na.rm = as.logical(match.arg( + arg = as.character(na.rm), + choices = c("TRUE", "FALSE", "0", "1") + )) + engine <- match.arg(engine, c("C", "R")) + L1 = as.logical(match.arg( + arg = as.character(L1), + choices = c("TRUE", "FALSE", "0", "1") + )) + + + # Do not yet manage random effects!!! + if (has_random(psiformula) | has_random(lambdaformula)) { + stop("occuCOP does not currently handle random effects.") + } + + # Format input data ---------------------------------------------------------- + + # Retrieve formlist + formlist <- mget(c("psiformula", "lambdaformula")) + + # Get the design matrix (calling the getDesign method for unmarkedFrame) + # For more informations, see: getMethod("getDesign", "unmarkedFrameOccuCOP") + designMats <- getDesign(umf = data, formlist = formlist, na.rm = na.rm) + + # y is the count detection data (matrix of size M sites x J observations) + y <- getY(data) + + # L is the length of observations (matrix of size M sites x J observations) + L <- getL(data) + if (!L1) { + if (!any(is.na(L))) { + if (all(L == 1)) { + warning( + "All observations lengths (L) are set to 1. ", + "If they were not user-defined, lambda corresponds to the ", + "detection rate multiplied by the observation length, ", + "not just the detection rate per time-unit or space-unit.\n", + "You can remove this warning by adding 'L1=TRUE' in the function inputs." + ) + } + } + } + + # Xpsi is the fixed effects design matrix for occupancy + Xpsi <- designMats$Xpsi + + # Xlambda is the fixed effects design matrix for detection rate + Xlambda <- designMats$Xlambda + + # Zpsi is the random effects design matrix for occupancy + Zpsi <- designMats$Zpsi + + # Zlambda is the random effects design matrix for detection rate + Zlambda <- designMats$Zlambda + + # removed_obs is a M x J matrix of the observations removed from the analysis + removed_obs <- designMats$removed_obs + sitesRemoved <- unname(which(apply(removed_obs, 1, function(x) all(x)))) + + # Number of sites + M <- nrow(y) + + # Number of sampling occasions + J <- ncol(y) + + # Total number of detection per site + NbDetecPerSite = rowSums(y, na.rm=T) + + # Sites where there was at least one detection + SitesWithDetec = NbDetecPerSite > 0 + + # Number of sites where there was at least one detection + NbSitesWithDetec = sum(SitesWithDetec) + + # Set up parameter names and indices----------------------------------------- + + # ParamPsi Occupancy parameter names + ParamPsi <- colnames(Xpsi) + + # ParamLambda Detection parameter names + ParamLambda <- colnames(Xlambda) + + # NbParamPsi Number of occupancy parameters + NbParamPsi <- ncol(Xpsi) + + # NbParamLambda Number of detection parameters + NbParamLambda <- ncol(Xlambda) + + # nP Total number of parameters + nP <- NbParamPsi + NbParamLambda + + # psiIdx Index of the occupancy parameters + psiIdx <- 1:NbParamPsi + + # lambdaIdx Index of the detection parameters + lambdaIdx <- (NbParamPsi+1):nP + + # Re-format some variables for C and R engines + # From Matrix of dim MxJ to vector of length MxJ: + # c(ySite1Obs1, ySite1Obs2, ..., ySite1ObsJ, ysite2Obs1, ...) + yvec <- as.numeric(t(y)) + Lvec <- as.numeric(t(L)) + removed_obsvec <- as.logical(t(removed_obs)) + + # return.negloglik ----------------------------------------------------------- + if (!is.null(return.negloglik)) { + df_NLL = data.frame(t(as.data.frame(return.negloglik))) + rownames(df_NLL) = NULL + colnames(df_NLL) = c(paste0("logit(psi).", ParamPsi), + paste0("log(lambda).", ParamLambda)) + df_NLL$nll = NA + for (i in 1:nrow(df_NLL)) { + df_NLL$nll[i] = R_nll_occuCOP(params = as.numeric(as.vector(df_NLL[i, -ncol(df_NLL)]))) + } + return(df_NLL) + } + + # nll function depending on engine ------------------------------------------- + if (identical(engine, "C")) { + nll <- function(params) { + nll_occuCOP( + y = yvec, + L = Lvec, + Xpsi = Xpsi, + Xlambda = Xlambda, + beta_psi = params[psiIdx], + beta_lambda = params[lambdaIdx], + removed = removed_obsvec + ) + } + } else if (identical(engine, "R")) { + nll <- R_nll_occuCOP + } + + + # Optimisation --------------------------------------------------------------- + + ## Checking the starting point for optim ---- + # Check if either (psistarts AND lambdastarts) OR starts is provided + if (!missing(psistarts) & !missing(lambdastarts)) { + # Both psistarts and lambdastarts provided + if (!missing(starts)){ + if (!all(c(psistarts, lambdastarts) == starts)) { + warning( + "You provided psistarts, lambdastarts and starts. ", + "Please provide either (psistarts AND lambdastarts) OR starts. ", + "Using psistarts and lambdastarts." + ) + } + } + if (length(lambdastarts) != NbParamLambda) { + stop("lambdastarts (", paste(lambdastarts, collapse = ", "), ") ", + "should be of length ", NbParamLambda, " with lambdaformula ", lambdaformula) + } + if (length(psistarts) != NbParamPsi) { + stop("psistarts (", paste(psistarts, collapse = ", "), ") ", + "should be of length ", NbParamPsi, " with psiformula ", psiformula) + } + starts <- c(psistarts, lambdastarts) + } else if (!missing(starts)) { + # starts provided + if (length(starts) != nP) { + stop("starts (", paste(starts, collapse = ", "), ") ", + "should be of length ", nP, + " with psiformula ", psiformula, + " and lambdaformula ", lambdaformula) + } + + psistarts <- starts[1:NbParamPsi] + lambdastarts <- starts[(NbParamPsi + 1):(NbParamPsi + NbParamLambda)] + + } else { + # No arguments provided, apply default values + + if (missing(lambdastarts)) { + # If lambda starts argument was not given: + # 0 by default + # so lambda = exp(0) = 1 by default + lambdastarts = rep(0, NbParamLambda) + } else if (length(lambdastarts) != NbParamLambda) { + stop("lambdastarts (", paste(lambdastarts, collapse = ", "), ") ", + "should be of length ", NbParamLambda, " with lambdaformula ", lambdaformula) + } + + if (missing(psistarts)) { + # If psi starts argument was not given + # 0 by default + # so psi = plogis(0) = 0.5 by default + psistarts = rep(0, NbParamPsi) + } else if (length(psistarts) != NbParamPsi) { + stop("psistarts (", paste(psistarts, collapse = ", "), ") ", + "should be of length ", NbParamPsi, " with psiformula ", psiformula) + } + + starts <- c(psistarts, lambdastarts) + } + + ## Run optim ---- + opt <- optim( + starts, + nll, + method = method, + hessian = se, + ... + ) + + # Get output ----------------------------------------------------------------- + covMat <- invertHessian(opt, nP, se) + ests <- opt$par + tmb_mod <- NULL + fmAIC <- 2 * opt$value + 2 * nP + + # Organize effect estimates + names(ests) <- c(ParamPsi, ParamLambda) + psi_coef <- list(ests = ests[psiIdx], cov = as.matrix(covMat[psiIdx, psiIdx])) + lambda_coef <- list(ests = ests[lambdaIdx], + cov = as.matrix(covMat[lambdaIdx, lambdaIdx])) + + # No random effects + psi_rand_info <- lambda_rand_info <- list() + + # Create unmarkedEstimates --------------------------------------------------- + psi_uE <- unmarkedEstimate( + name = "Occupancy probability", + short.name = "psi", + estimates = psi_coef$ests, + covMat = psi_coef$cov, + fixed = 1:NbParamPsi, + invlink = "logistic", + invlinkGrad = "logistic.grad", + randomVarInfo = psi_rand_info + ) + + lambda_uE <- unmarkedEstimate( + name = "Detection rate", + short.name = "lambda", + estimates = lambda_coef$ests, + covMat = lambda_coef$cov, + fixed = 1:NbParamLambda, + invlink = "exp", + invlinkGrad = "exp", + randomVarInfo = lambda_rand_info + ) + + estimateList <- unmarkedEstimateList(list(psi = psi_uE, lambda = lambda_uE)) + + # Create unmarkedFit object-------------------------------------------------- + umfit <- new( + "unmarkedFitOccuCOP", + fitType = "occuCOP", + call = match.call(), + formula = as.formula(paste( + formlist["lambdaformula"], formlist["psiformula"], collapse = "" + )), + formlist = formlist, + data = data, + estimates = estimateList, + sitesRemoved = sitesRemoved, + removed_obs = removed_obs, + AIC = fmAIC, + opt = opt, + negLogLike = opt$value, + nllFun = nll, + TMB = tmb_mod + ) + + return(umfit) +} diff --git a/R/simulate.R b/R/simulate.R index efc372a4..b73f3fdd 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -76,6 +76,19 @@ setMethod("simulate", "character", } else if(object=="gdistremoval"){ umf@yDistance=x$yDistance umf@yRemoval=x$yRemoval + } else if(object == "IDS"){ + out <- list() + out$ds <- fit@data + out$ds@y <- x$ds + if("pc" %in% names(fit)){ + out$pc <- fit@dataPC + out$pc@y <- x$pc + } + if("oc" %in% names(fit)){ + out$oc <- fit@dataOC + out$oc@y <- x$oc + } + umf <- out } else { umf@y <- x } @@ -185,7 +198,6 @@ setMethod("simulate_fit", "unmarkedFitOccuRN", data=umf, se=FALSE, control=list(maxit=1)) }) - setMethod("get_umf_components", "unmarkedFitMPois", function(object, formulas, guide, design, ...){ args <- list(...) @@ -556,3 +568,107 @@ setMethod("simulate_fit", "unmarkedFitGDR", data=umf, keyfun=keyfun, output=output, unitsOut=unitsOut, mixture=mixture, K=K, se=FALSE, control=list(maxit=1), method='L-BFGS-B') }) + +# For simulating entirely new datasets +setMethod("get_umf_components", "unmarkedFitIDS", + function(object, formulas, guide, design, ...){ + + # Distance sampling dataset + sc_ds_lam <- generate_data(formulas$lam, guide, design$Mds) + sc_ds_det <- generate_data(formulas$ds, guide, design$Mds) + dat_ds <- list(sc_ds_lam, sc_ds_det) + if(!is.null(formulas$phi)){ + sc_ds_phi <- generate_data(formulas$phi, guide, design$Mds) + dat_ds <- c(dat_ds, list(sc_ds_phi)) + } + keep <- sapply(dat_ds, function(x) !is.null(x)) + dat_ds <- dat_ds[keep] + sc_ds <- do.call(cbind, dat_ds) + yblank_ds <- matrix(1, design$Mds, design$J) + + # Point count dataset + sc_pc <- yblank_pc <- NULL + if(!is.null(design$Mpc) && design$Mpc > 0){ + if(is.null(formulas$pc)) form_pc <- formulas$ds + sc_pc_lam <- generate_data(formulas$lam, guide, design$Mpc) + sc_pc_det <- generate_data(form_pc, guide, design$Mpc) + sc_pc <- list(sc_pc_lam, sc_pc_det) + if(!is.null(formulas$phi)){ + sc_pc_phi <- generate_data(formulas$phi, guide, design$Mpc) + sc_pc <- c(sc_pc, list(sc_pc_phi)) + } + keep <- sapply(sc_pc, function(x) !is.null(x)) + sc_pc <- sc_pc[keep] + sc_pc <- do.call(cbind, sc_pc) + yblank_pc <- matrix(1, design$Mpc, 1) + } + + # Presence/absence dataset + sc_oc <- yblank_oc <- NULL + if(!is.null(design$Moc) && design$Moc > 0){ + if(is.null(formulas$oc)){ + form_oc <- formulas$ds + } else { + form_oc <- formulas$oc + } + sc_oc_lam <- generate_data(formulas$lam, guide, design$Moc) + sc_oc_det <- generate_data(form_oc, guide, design$Moc) + sc_oc <- list(sc_oc_lam, sc_oc_det) + if(!is.null(formulas$phi)){ + sc_oc_phi <- generate_data(formulas$phi, guide, design$Moc) + sc_oc <- c(sc_oc, list(sc_oc_phi)) + } + keep <- sapply(sc_oc, function(x) !is.null(x)) + sc_oc <- sc_oc[keep] + sc_oc <- do.call(cbind, sc_oc) + yblank_oc <- matrix(1, design$Moc, 1) + } + + mget(c("yblank_ds", "sc_ds", "yblank_pc", "sc_pc", "yblank_oc", "sc_oc")) +}) + + +setMethod("simulate_fit", "unmarkedFitIDS", + function(object, formulas, guide, design, ...){ + parts <- get_umf_components(object, formulas, guide, design, ...) + args <- list(...) + + args$tlength <- 0 + args$survey <- "point" + + # Distance sampling dataset + umf_ds <- unmarkedFrameDS(y=parts$yblank_ds, siteCovs=parts$sc_ds, + tlength=args$tlength, survey=args$survey, + unitsIn=args$unitsIn, + dist.breaks=args$dist.breaks) + # Point count dataset + umf_pc <- NULL + if(!is.null(design$Mpc) && design$Mpc > 0){ + umf_pc <- unmarkedFramePCount(y=parts$yblank_pc, siteCovs=parts$sc_pc) + } + + # Occupancy dataset + umf_oc <- NULL + if(!is.null(design$Moc) && design$Moc > 0){ + umf_oc <- unmarkedFrameOccu(y=parts$yblank_oc, siteCovs=parts$sc_oc) + } + + keyfun <- ifelse(is.null(args$keyfun), "halfnorm", args$keyfun) + unitsOut <- ifelse(is.null(args$unitsOut), "ha", args$unitsOut) + K <- ifelse(is.null(args$K), 300, args$K) + if(is.null(args$maxDistPC)) args$maxDistPC <- max(args$dist.breaks) + if(is.null(args$maxDistOC)) args$maxDistOC <- max(args$dist.breaks) + + IDS(lambdaformula = formulas$lam, + detformulaDS = formulas$ds, + detformulaPC = formulas$pc, detformulaOC = formulas$oc, + dataDS = umf_ds, dataPC = umf_pc, dataOC = umf_oc, + availformula = formulas$phi, + durationDS = args$durationDS, durationPC = args$durationPC, + durationOC = args$durationOC, + maxDistPC = args$maxDistPC, maxDistOC = args$maxDistOC, + keyfun=keyfun, unitsOut=unitsOut, K=K ,control=list(maxit=1)) +}) + + + diff --git a/R/unmarkedEstimate.R b/R/unmarkedEstimate.R index 86caf409..71f87168 100644 --- a/R/unmarkedEstimate.R +++ b/R/unmarkedEstimate.R @@ -294,7 +294,7 @@ setMethod("vcov", "unmarkedEstimate", setMethod("confint", "unmarkedEstimate", function(object, parm, level = 0.95) { - if(missing(parm)) parm <- 1:length(object@estimates) + if(missing(parm)) parm <- object@fixed ests <- object@estimates[parm] ses <- SE(object)[parm] z <- qnorm((1-level)/2, lower.tail = FALSE) diff --git a/R/unmarkedFit.R b/R/unmarkedFit.R index 95c832ce..84740b39 100644 --- a/R/unmarkedFit.R +++ b/R/unmarkedFit.R @@ -363,7 +363,7 @@ setMethod("confint", "unmarkedFit", function(object, parm, level = 0.95, if(missing(type)) stop(paste("Must specify type as one of (", paste(names(object), collapse=", "),").",sep="")) if(missing(parm)) - parm <- 1:length(object[type]@estimates) + parm <- object[type]@fixed if(method == "normal") { callGeneric(object[type],parm = parm, level = level) } else { @@ -453,6 +453,7 @@ setMethod("fitted", "unmarkedFitDS", function(object, na.rm = FALSE) m = A <- A / 1e6, km = A <- A) switch(object@unitsOut, + m = A <- A * 1e6, ha = A <- A * 100, kmsq = A <- A) lambda <- lambda * A @@ -1660,11 +1661,13 @@ setMethod("getP", "unmarkedFitDS", point = { for(i in 1:M) { a[i, 1] <- pi*db[2]^2 - for(j in 2:J) + if(J > 1){ + for(j in 2:J) a[i, j] <- pi*db[j+1]^2 - sum(a[i, 1:(j-1)]) - u[i,] <- a[i,] / sum(a[i,]) } - }) + u[i,] <- a[i,] / sum(a[i,]) + } + }) switch(key, @@ -2089,6 +2092,7 @@ setMethod("simulate", "unmarkedFitDS", m = A <- A / 1e6, km = A <- A) switch(object@unitsOut, + m = A <- A * 1e6, ha = A <- A * 100, kmsq = A <- A) lambda <- lambda * A @@ -2104,7 +2108,6 @@ setMethod("simulate", "unmarkedFitDS", - setMethod("simulate", "unmarkedFitPCount", function(object, nsim = 1, seed = NULL, na.rm = TRUE) { diff --git a/R/unmarkedFrame.R b/R/unmarkedFrame.R index c936bdff..b97f23bb 100644 --- a/R/unmarkedFrame.R +++ b/R/unmarkedFrame.R @@ -1133,10 +1133,7 @@ setMethod("[", c("unmarkedFrame", "numeric", "missing", "missing"), if(all(i < 0)) { # if i is negative, then convert to positive i <- (1:M)[i] } - y <- getY(x)[i,] - if (length(i) == 1) { - y <- t(y) - } + y <- getY(x)[i,,drop=FALSE] siteCovs <- siteCovs(x) obsCovs <- obsCovs(x) if (!is.null(siteCovs)) { diff --git a/_pkgdown.yml b/_pkgdown.yml index 25e44c87..831489f7 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -19,6 +19,7 @@ reference: - occuMulti - occuPEN - occuTTD + - occuCOP - title: Abundance models contents: - occuRN @@ -48,6 +49,28 @@ reference: - fitList - modSel - crossVal,unmarkedFit-method + - title: Model data + - unmarkedFrame + - unmarkedFrame-class + - unmarkedMultFrame + - unmarkedFrameDS + - unmarkedFrameOccu + - unmarkedFrameOccuFP + - unmarkedFrameOccuMulti + - unmarkedFramePCount + - unmarkedFrameMPois + - unmarkedFrameOccuCOP + - unmarkedFrameOccuMS + - unmarkedFrameOccuTTD + - unmarkedFrameG3 + - unmarkedFramePCO + - unmarkedFrameGDR + - unmarkedFrameGMM + - unmarkedFrameGDS + - unmarkedFrameGPC + - unmarkedFrameGOccu + - unmarkedFrameMMO + - unmarkedFrameDSO - title: Model results contents: - coef,unmarkedFit-method diff --git a/man/IDS.Rd b/man/IDS.Rd new file mode 100644 index 00000000..5ec3923b --- /dev/null +++ b/man/IDS.Rd @@ -0,0 +1,142 @@ +\name{IDS} +\alias{IDS} +\alias{hist,unmarkedFitIDS-method} +\alias{names,unmarkedFitIDS-method} + +\title{ +Fit the integrated distance sampling model of Kery et al. (2022). +} + +\description{Model abundance using a combination of distance sampling data (DS) + and other similar data types, including simple point counts (PC) and + occupancy/detection-nondetection (OC/DND) data.} + +\usage{ +IDS(lambdaformula = ~1, detformulaDS = ~1, detformulaPC = NULL, detformulaOC = NULL, + dataDS, dataPC = NULL, dataOC = NULL, availformula = NULL, + durationDS = NULL, durationPC = NULL, durationOC = NULL, keyfun = "halfnorm", + maxDistPC, maxDistOC, K = 100, unitsOut = "ha", + starts = NULL, method = "BFGS", ...) +} + +\arguments{ + \item{lambdaformula}{Formula for abundance} + \item{detformulaDS}{Formula for distance-based (DS) detection probability} + \item{detformulaPC}{Formula for point count detection probability. If + \code{NULL}, will share a model with DS detection probability} + \item{detformulaOC}{Formula for occupancy/detection-nondetection detection + probability. If \code{NULL}, will share a model with DS detection probability} + \item{dataDS}{An object of class \code{unmarkedFrameDS}. Required} + \item{dataPC}{An object of class \code{unmarkedFramePCount}. If \code{NULL}, + no PC data will be used in the model} + \item{dataOC}{An object of class \code{unmarkedFrameOccu}. If \code{NULL}, + no OC/DND data will be used in the model} + \item{availformula}{Optional. If specified, formula for availability. Only possible to + use if you have variable detection survey lengths (see below)} + \item{durationDS}{Optional. Vector of survey durations at each distance sampling site} + \item{durationPC}{Optional. Vector of survey durations at each PC site} + \item{durationOC}{Optional. Vector of survey durations at each OC/DND site} + \item{keyfun}{Distance sampling key function; either "halfnorm" or "exp"} + \item{maxDistPC}{Maximum observation distance for PC surveys; defaults to + maximum of distance bins from the distance sampling data} + \item{maxDistOC}{Maximum observation distance for OC/DND surveys; defaults to + maximum of distance bins from the distance sampling data} + \item{K}{Integer, upper bound for integrating out latent abundance. Only used if + you have included OC/DND data} + \item{unitsOut}{Units of density for output. Either "ha" or "kmsq" for + hectares and square kilometers, respectively} + \item{starts}{A numeric vector of starting values for the model parameters} + \item{method}{Optimization method used by \code{\link{optim}}} + \item{\dots}{Additional arguments to optim, such as lower and upper + bounds} +} + +\value{An object of class unmarkedFitIDS} + +\details{ +This function facilitates a combined analysis of distance sampling data (DS) with other similar +data types, including simple point counts (PC) and occupancy/detection-nondetection (OC/DND) data. +The combined approach capitalizes on the strengths and minimizes the weaknesses +of each type. The PC and OC/DND data are viewed as latent distance sampling surveys +with an underlying abundance model shared by all data types. All analyses +must include some distance sampling data, but can include either PC or DND data +or both. If surveys are of variable duration, it is also possible to estimate +availability. + +Input data must be provided as a series of separate \code{unmarkedFrame}s: +\code{unmarkedFrameDS} for the distance sampling data, \code{unmarkedFramePCount} +for the point count data, and \code{unmarkedFrameOccu} for OC/DND data. +See the help files for these objects for guidance on how to organize the data. +} + +\note{ + Simulations indicated estimates of availability were very unreliable when + including detection/non-detection data, so the function will not allow you + to use DND data and estimate availability at the same time. + In general estimation of availability can be difficult; use simulations + to see how well it works for your specific situation. +} + +\references{ + Kery M, Royle JA, Hallman T, Robinson WD, Strebel N, Kellner KF. 2024. + Integrated distance sampling models for simple point counts. Ecology. +} +\author{Ken Kellner \email{contact@kenkellner.com}} +\seealso{\code{\link{distsamp}}} + +\examples{ + +\dontrun{ + +# Simulate data based on a real dataset + +# Formulas for each model +formulas <- list(lam=~elev, ds=~1, phi=~1) + +# Sample sizes +design <- list(Mds=2912, J=6, Mpc=506) + +# Model parameters +coefs <- list(lam = c(intercept=3, elev=-0.5), + ds = c(intercept=-2.5), + phi = c(intercept=-1.3)) + +# Survey durations +durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30)) + +set.seed(456) +sim_umf <- simulate("IDS", # name of model we are simulating for + nsim=1, # number of replicates + formulas=formulas, + coefs=coefs, + design=design, + # arguments used by unmarkedFrameDS + dist.breaks = seq(0, 0.30, length.out=7), + unitsIn="km", + # arguments used by IDS + # could also have e.g. keyfun here + durationDS=durs$ds, durationPC=durs$pc, durationOC=durs$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + +# Look at the results +lapply(sim_umf, head) + +# Fit a model +(mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, + availformula = ~1, durationDS=durs$ds, durationPC=durs$pc, + maxDistPC=0.5, + unitsOut="kmsq")) + +# Compare with known parameter values +# Note: this is an unusually good estimate of availability +# It is hard to estimate in most cases +cbind(truth=unlist(coefs), est=coef(mod_sim)) + +# Predict density at each distance sampling site +head(predict(mod_sim, 'lam')) + +} + +} diff --git a/man/MesoCarnivores.Rd b/man/MesoCarnivores.Rd index c51e06fd..7a580774 100644 --- a/man/MesoCarnivores.Rd +++ b/man/MesoCarnivores.Rd @@ -17,13 +17,13 @@ \item{\code{coyote}}{A 1437x3 occupancy matrix for coyote} \item{\code{redfox}}{A 1437x3 occupancy matrix for red fox} \item{\code{sitecovs}}{A data frame containing covariates for the 1437 sites, with the following columns: - \itemize{ - \item{\code{Dist_5km}{Proportion of disturbed land in 5 km radius}} - \item{\code{HDens_5km}{Housing density in 5 km radius}} - \item{\code{Latitude}{Latitude / 100}} - \item{\code{Longitude}{Longitude / 100}} - \item{\code{People_site}{Number of photos of people at site / 1000}} - \item{\code{Trail}{1 if camera was on trail, 0 if not}} + \describe{ + \item{\code{Dist_5km}}{Proportion of disturbed land in 5 km radius} + \item{\code{HDens_5km}}{Housing density in 5 km radius} + \item{\code{Latitude}}{Latitude / 100} + \item{\code{Longitude}}{Longitude / 100} + \item{\code{People_site}}{Number of photos of people at site / 1000} + \item{\code{Trail}}{1 if camera was on trail, 0 if not} } } } diff --git a/man/SSE.Rd b/man/SSE.Rd index e5354ee2..e3204f2a 100644 --- a/man/SSE.Rd +++ b/man/SSE.Rd @@ -4,6 +4,7 @@ \alias{SSE,unmarkedFit-method} \alias{SSE,unmarkedFitOccuMulti-method} \alias{SSE,unmarkedFitGDR-method} +\alias{SSE,unmarkedFitIDS-method} \title{Compute Sum of Squared Residuals for a Model Fit.} \description{ Compute the sum of squared residuals for an unmarked fit object. This diff --git a/man/fitted-methods.Rd b/man/fitted-methods.Rd index 88685652..9ead41cd 100644 --- a/man/fitted-methods.Rd +++ b/man/fitted-methods.Rd @@ -14,8 +14,11 @@ \alias{fitted,unmarkedFitDS-method} \alias{fitted,unmarkedFitGMM-method} \alias{fitted,unmarkedFitGDR-method} +\alias{fitted,unmarkedFitIDS-method} \alias{fitted,unmarkedFitDailMadsen-method} \alias{fitted,unmarkedFitGOccu-method} +\alias{fitted,unmarkedFitOccuCOP-method} + \title{Methods for Function fitted in Package `unmarked'} \description{Extracted fitted values from a fitted model. } diff --git a/man/getP-methods.Rd b/man/getP-methods.Rd index c554ebe5..32028ab4 100644 --- a/man/getP-methods.Rd +++ b/man/getP-methods.Rd @@ -17,17 +17,20 @@ \alias{getP,unmarkedFitDSO-method} \alias{getP,unmarkedFitMMO-method} \alias{getP,unmarkedFitGDR-method} +\alias{getP,unmarkedFitIDS-method} \alias{getP,unmarkedFitGOccu-method} +\alias{getP,unmarkedFitOccuCOP-method} + \title{Methods for Function getP in Package `unmarked'} \description{ -Methods for function \code{getP} in Package `unmarked'. These methods -return a matrix of detection probabilities. +Methods for function \code{getP} in Package `unmarked'. These methods return a matrix of the back-transformed detection parameter (\eqn{p} the detection probability or \eqn{\lambda} the detection rate, depending on the model). The matrix is of dimension MxJ, with M the number of sites and J the number of sampling periods; or of dimension MxJT for models with multiple primary periods T. } \section{Methods}{ \describe{ -\item{object = "unmarkedFit"}{A fitted model object} -\item{object = "unmarkedFitDS"}{A fitted model object} -\item{object = "unmarkedFitMPois"}{A fitted model object} -\item{object = "unmarkedFitGMM"}{A fitted model object} +\item{\code{signature(object = "unmarkedFit")}}{A fitted model object} +\item{\code{signature(object = "unmarkedFitDS")}}{A fitted model object} +\item{\code{signature(object = "unmarkedFitMPois")}}{A fitted model object} +\item{\code{signature(object = "unmarkedFitGMM")}}{A fitted model object} +\item{\code{signature(object = "unmarkedFitOccuCOP")}}{With \code{unmarkedFitOccuCOP} the object of a model fitted with \code{occuCOP}. Returns a matrix of \eqn{\lambda} the detection rate.} }} \keyword{methods} diff --git a/man/nonparboot-methods.Rd b/man/nonparboot-methods.Rd index 720f2413..53ff723f 100644 --- a/man/nonparboot-methods.Rd +++ b/man/nonparboot-methods.Rd @@ -17,7 +17,10 @@ \alias{nonparboot,unmarkedFitOccuMulti-method} \alias{nonparboot,unmarkedFitNmixTTD-method} \alias{nonparboot,unmarkedFitGDR-method} +\alias{nonparboot,unmarkedFitIDS-method} \alias{nonparboot,unmarkedFitDailMadsen-method} +\alias{nonparboot,unmarkedFitOccuCOP-method} + \title{ Nonparametric bootstrapping in unmarked } \description{ diff --git a/man/occuCOP.Rd b/man/occuCOP.Rd new file mode 100644 index 00000000..7549ca38 --- /dev/null +++ b/man/occuCOP.Rd @@ -0,0 +1,249 @@ +\name{occuCOP} + +\alias{occuCOP} + +\encoding{UTF-8} + +\title{Fit the occupancy model using count dta} + +\usage{ +occuCOP(data, + psiformula = ~1, lambdaformula = ~1, + psistarts, lambdastarts, starts, + method = "BFGS", se = TRUE, + engine = c("C", "R"), na.rm = TRUE, + return.negloglik = NULL, L1 = FALSE, ...)} + +\arguments{ + + \item{data}{An \code{\link{unmarkedFrameOccuCOP}} object created with the \code{\link{unmarkedFrameOccuCOP}} function.} + + \item{psiformula}{Formula describing the occupancy covariates.} + + \item{lambdaformula}{Formula describing the detection covariates.} + + \item{psistarts}{Vector of starting values for likelihood maximisation with \code{\link{optim}} for occupancy probability \eqn{\psi}{psi}. These values must be logit-transformed (with \code{\link{qlogis}}) (see details). By default, optimisation will start at 0, corresponding to an occupancy probability of 0.5 (\code{plogis(0)} is 0.5).} + + \item{lambdastarts}{Vector of starting values for likelihood maximisation with \code{\link{optim}} for detection rate \eqn{\lambda}{lambda}. These values must be log-transformed (with \code{\link{log}}) (see details). By default, optimisation will start at 0, corresponding to detection rate of 1 (\code{exp(0)} is 1).} + + \item{starts}{Vector of starting values for likelihood maximisation with \code{\link{optim}}. If \code{psistarts} and \code{lambdastarts} are provided, \code{starts = c(psistarts, lambdastarts)}.} + + \item{method}{Optimisation method used by \code{\link{optim}}.} + + \item{se}{Logical specifying whether to compute (\code{se=TRUE}) standard errors or not (\code{se=FALSE}).} + + \item{engine}{Code to use for optimisation. Either \code{"C"} for fast C++ code, or \code{"R"} for native R code.} + + \item{na.rm}{Logical specifying whether to fit the model (\code{na.rm=TRUE}) or not (\code{na.rm=FALSE}) if there are NAs in the \code{\link{unmarkedFrameOccuCOP}} object.} + + \item{return.negloglik}{A list of vectors of parameters (\code{c(psiparams, lambdaparams)}). If specified, the function will not maximise likelihood but return the negative log-likelihood for the those parameters in the \code{nll} column of a dataframe. See an example below.} + + \item{L1}{Logical specifying whether the length of observations (\code{L}) are purposefully set to 1 (\code{L1=TRUE}) or not (\code{L1=FALSE}).} + + \item{\dots}{Additional arguments to pass to \code{\link{optim}}, such as lower and upper bounds or a list of control parameters.} + } + +\description{This function fits a single season occupancy model using count data.} + +\details{ + + See \code{\link{unmarkedFrameOccuCOP}} for a description of how to supply data to the \code{data} argument. See \code{\link{unmarkedFrame}} for a more general documentation of \code{unmarkedFrame} objects for the different models implemented in \pkg{unmarked}. + + \subsection{The COP occupancy model}{ + + \code{occuCOP} fits a single season occupancy model using count data, as described in Pautrel et al. (2023). + + The \strong{occupancy sub-model} is: + + \deqn{z_i \sim \text{Bernoulli}(\psi_i)}{z_i ~ Bernoulli(psi_i)} + + \itemize{ + \item With \eqn{z_i}{z_i} the occupany state of site \eqn{i}{i}. \eqn{z_i=1}{z_i = 1} if site \eqn{i}{i} is occupied by the species, \emph{i.e.} if the species is present in site \eqn{i}{i}. \eqn{z_i=0}{z_i = 0} if site \eqn{i}{i} is not occupied. + \item With \eqn{\psi_i}{psi_i} the occupancy probability of site \eqn{i}{i}. + } + + The \strong{observation sub-model} is: + + \deqn{ + N_{ij} | z_i = 1 \sim \text{Poisson}(\lambda_{ij} L_{ij}) \\ + N_{ij} | z_i = 0 \sim 0 + }{ + N_ij | z_i = 1 ~ Poisson(lambda_is*L_is) + N_ij | z_i = 0 ~ 0 + } + + \itemize{ + \item With \eqn{N_{ij}}{N_ij} the count of detection events in site \eqn{i}{i} during observation \eqn{j}{j}. + \item With \eqn{\lambda_{ij}}{lambda_ij} the detection rate in site \eqn{i}{i} during observation \eqn{j}{j} (\emph{for example, 1 detection per day.}). + \item With \eqn{L_{ij}}{L_ij} the length of observation \eqn{j}{j} in site \eqn{i}{i} (\emph{for example, 7 days.}). + } + + What we call "observation" (\eqn{j}{j}) here can be a sampling occasion, a transect, a discretised session. Consequently, the unit of \eqn{\lambda_{ij}}{lambda_ij} and \eqn{L_{ij}}{L_ij} can be either a time-unit (day, hour, ...) or a space-unit (kilometer, meter, ...). + } + + \subsection{The transformation of parameters \eqn{\psi} and \eqn{\lambda}}{ + In order to perform unconstrained optimisation, parameters are transformed. + + The occupancy probability (\eqn{\psi}) is transformed with the logit function (\code{psi_transformed = qlogis(psi)}). It can be back-transformed with the "inverse logit" function (\code{psi = plogis(psi_transformed)}). + + The detection rate (\eqn{\lambda}) is transformed with the log function (\code{lambda_transformed = log(lambda)}). It can be back-transformed with the exponential function (\code{lambda = exp(lambda_transformed)}). + } + +} + +\value{\code{unmarkedFitOccuCOP} object describing the model fit. See the \code{\linkS4class{unmarkedFit}} classes.} + +\references{ + +Pautrel, L., Moulherat, S., Gimenez, O. & Etienne, M.-P. Submitted. \emph{Analysing biodiversity observation data collected in continuous time: Should we use discrete or continuous-time occupancy models?} Preprint at \doi{10.1101/2023.11.17.567350}. + +} + +\author{Léa Pautrel} + +\seealso{ + \code{\link{unmarked}}, + \code{\link{unmarkedFrameOccuCOP}}, + \code{\link{unmarkedFit-class}} +} + + +\examples{ +set.seed(123) +options(max.print = 50) + +# We simulate data in 100 sites with 3 observations of 7 days per site. +nSites <- 100 +nObs <- 3 + +# For an occupancy covariate, we associate each site to a land-use category. +landuse <- sample(factor(c("Forest", "Grassland", "City"), ordered = TRUE), + size = nSites, replace = TRUE) +simul_psi <- ifelse(landuse == "Forest", 0.8, + ifelse(landuse == "Grassland", 0.4, 0.1)) +z <- rbinom(n = nSites, size = 1, prob = simul_psi) + +# For a detection covariate, we create a fake wind variable. +wind <- matrix(rexp(n = nSites * nObs), nrow = nSites, ncol = nObs) +simul_lambda <- wind / 5 +L = matrix(7, nrow = nSites, ncol = nObs) + +# We now simulate count detection data +y <- matrix(rpois(n = nSites * nObs, lambda = simul_lambda * L), + nrow = nSites, ncol = nObs) * z + +# We create our unmarkedFrameOccuCOP object +umf <- unmarkedFrameOccuCOP( + y = y, + L = L, + siteCovs = data.frame("landuse" = landuse), + obsCovs = list("wind" = wind) +) +print(umf) + +# We fit our model without covariates +fitNull <- occuCOP(data = umf) +print(fitNull) + +# We fit our model with covariates +fitCov <- occuCOP(data = umf, psiformula = ~ landuse, lambdaformula = ~ wind) +print(fitCov) + +# We back-transform the parameter's estimates +## Back-transformed occupancy probability with no covariates +backTransform(fitNull, "psi") + +## Back-transformed occupancy probability depending on habitat use +predict(fitCov, + "psi", + newdata = data.frame("landuse" = c("Forest", "Grassland", "City")), + appendData = TRUE) + +## Back-transformed detection rate with no covariates +backTransform(fitNull, "lambda") + +## Back-transformed detection rate depending on wind +predict(fitCov, + "lambda", + appendData = TRUE) + +## This is not easily readable. We can show the results in a clearer way, by: +## - adding the site and observation +## - printing only the wind covariate used to get the predicted lambda +cbind( + data.frame( + "site" = rep(1:nSites, each = nObs), + "observation" = rep(1:nObs, times = nSites), + "wind" = getData(fitCov)@obsCovs + ), + predict(fitCov, "lambda", appendData = FALSE) +) + +# We can choose the initial parameters when fitting our model. +# For psi, intituively, the initial value can be the proportion of sites +# in which we have observations. +(psi_init <- mean(rowSums(y) > 0)) + +# For lambda, the initial value can be the mean count of detection events +# in sites in which there was at least one observation. +(lambda_init <- mean(y[rowSums(y) > 0, ])) + +# We have to transform them. +occuCOP( + data = umf, + psiformula = ~ 1, + lambdaformula = ~ 1, + psistarts = qlogis(psi_init), + lambdastarts = log(lambda_init) +) + +# If we have covariates, we need to have the right length for the start vectors. +# psi ~ landuse --> 3 param to estimate: Intercept, landuseForest, landuseGrassland +# lambda ~ wind --> 2 param to estimate: Intercept, wind +occuCOP( + data = umf, + psiformula = ~ landuse, + lambdaformula = ~ wind, + psistarts = rep(qlogis(psi_init), 3), + lambdastarts = rep(log(lambda_init), 2) +) + +# And with covariates, we could have chosen better initial values, such as the +# proportion of sites in which we have observations per land-use category. +(psi_init_covs <- c( + "City" = mean(rowSums(y[landuse == "City", ]) > 0), + "Forest" = mean(rowSums(y[landuse == "Forest", ]) > 0), + "Grassland" = mean(rowSums(y[landuse == "Grassland", ]) > 0) +)) +occuCOP( + data = umf, + psiformula = ~ landuse, + lambdaformula = ~ wind, + psistarts = qlogis(psi_init_covs)) + +# We can fit our model with a different optimisation algorithm. +occuCOP(data = umf, method = "Nelder-Mead") + +# We can run our model with a C++ or with a R likelihood function. +## They give the same result. +occuCOP(data = umf, engine = "C", psistarts = 0, lambdastarts = 0) +occuCOP(data = umf, engine = "R", psistarts = 0, lambdastarts = 0) + +## The C++ (the default) is faster. +system.time(occuCOP(data = umf, engine = "C", psistarts = 0, lambdastarts = 0)) +system.time(occuCOP(data = umf, engine = "R", psistarts = 0, lambdastarts = 0)) + +## However, if you want to understand how the likelihood is calculated, +## you can easily access the R likelihood function. +print(occuCOP(data = umf, engine = "R", psistarts = 0, lambdastarts = 0)@nllFun) + +# Finally, if you do not want to fit your model but only get the likelihood, +# you can get the negative log-likelihood for a given set of parameters. +occuCOP(data = umf, return.negloglik = list( + c("psi" = qlogis(0.25), "lambda" = log(2)), + c("psi" = qlogis(0.5), "lambda" = log(1)), + c("psi" = qlogis(0.75), "lambda" = log(0.5)) +)) +} + +\keyword{models} diff --git a/man/occuFP.Rd b/man/occuFP.Rd index 64feff23..daadb4ed 100644 --- a/man/occuFP.Rd +++ b/man/occuFP.Rd @@ -45,7 +45,7 @@ are specified to belong to 1 of the 3 data types and all or a subset of the data For type 1 data, the detection process is assumed to fit the assumptions of the standard MacKenzie model where false negative probabilities are estimated but false positive detections are assumed not to occur. If all of your -data is of this type you should use code{occu} to analyze data. The detection parameter p, which is modeled using the +data is of this type you should use \code{occu} to analyze data. The detection parameter p, which is modeled using the detformula is the only observation parameter for these data. For type 2 data, both false negative and false positive detection probabilities are estimated. If all data is of this diff --git a/man/parboot.Rd b/man/parboot.Rd index 6b6493f0..597eb54a 100644 --- a/man/parboot.Rd +++ b/man/parboot.Rd @@ -1,5 +1,6 @@ \name{parboot} \alias{parboot} +\alias{parboot,unmarkedFitIDS-method} \alias{plot,parboot,missing-method} \alias{show,parboot-method} \title{Parametric bootstrap method for fitted models inheriting class.} diff --git a/man/predict-methods.Rd b/man/predict-methods.Rd index 8b1fb380..79fa0196 100644 --- a/man/predict-methods.Rd +++ b/man/predict-methods.Rd @@ -16,6 +16,7 @@ \alias{predict,unmarkedFitPCO-method} \alias{predict,unmarkedFitDSO-method} \alias{predict,unmarkedFitGDR-method} +\alias{predict,unmarkedFitIDS-method} \alias{predict,unmarkedFitList-method} \alias{predict,unmarkedRanef-method} \title{ Methods for Function predict in Package `unmarked' } diff --git a/man/ranef-methods.Rd b/man/ranef-methods.Rd index c38e82e9..66452d79 100644 --- a/man/ranef-methods.Rd +++ b/man/ranef-methods.Rd @@ -21,6 +21,8 @@ \alias{ranef,unmarkedFitGDR-method} \alias{ranef,unmarkedFitDailMadsen-method} \alias{ranef,unmarkedFitGOccu-method} +\alias{ranef,unmarkedFitOccuCOP-method} +\alias{ranef,unmarkedFitIDS-method} \title{ Methods for Function \code{ranef} in Package \pkg{unmarked} } \description{ Estimate posterior distributions of the random variables (latent diff --git a/man/simulate-methods.Rd b/man/simulate-methods.Rd index fc8a008a..5d37dad3 100644 --- a/man/simulate-methods.Rd +++ b/man/simulate-methods.Rd @@ -17,9 +17,12 @@ \alias{simulate,unmarkedFitGDS-method} \alias{simulate,unmarkedFitGPC-method} \alias{simulate,unmarkedFitGDR-method} +\alias{simulate,unmarkedFitIDS-method} \alias{simulate,unmarkedFitDailMadsen-method} \alias{simulate,unmarkedFitGOccu-method} +\alias{simulate,unmarkedFitOccuCOP-method} \alias{simulate,character-method} + \title{Methods for Function simulate in Package `unmarked'} \description{ Simulate data from a fitted model. diff --git a/man/unmarked-package.Rd b/man/unmarked-package.Rd index 3bf8db0e..c0595a34 100644 --- a/man/unmarked-package.Rd +++ b/man/unmarked-package.Rd @@ -260,6 +260,6 @@ Sillett, S. and Chandler, R.B. and Royle, J.A. and Kery, M. and \docType{package} -\author{Ian Fiske, Richard Chandler, Andy Royle, Marc K\'{e}ry, David +\author{Ian Fiske, Richard Chandler, Andy Royle, Marc Kery, David Miller, and Rebecca Hutchinson} \keyword{package} diff --git a/man/unmarkedFit-class.Rd b/man/unmarkedFit-class.Rd index 4d7aa8fd..8237311d 100644 --- a/man/unmarkedFit-class.Rd +++ b/man/unmarkedFit-class.Rd @@ -18,6 +18,8 @@ \alias{plot,unmarkedFit,missing-method} \alias{plot,unmarkedFitOccuMulti,missing-method} \alias{plot,unmarkedFitGDR,missing-method} +\alias{plot,unmarkedFitIDS,missing-method} +\alias{plot,unmarkedFitOccuCOP,missing-method} \alias{profile,unmarkedFit-method} \alias{residuals,unmarkedFit-method} \alias{residuals,unmarkedFitOccu-method} @@ -26,6 +28,8 @@ \alias{residuals,unmarkedFitOccuMulti-method} \alias{residuals,unmarkedFitOccuTTD-method} \alias{residuals,unmarkedFitGDR-method} +\alias{residuals,unmarkedFitIDS-method} +\alias{residuals,unmarkedFitOccuCOP-method} \alias{update,unmarkedFit-method} \alias{update,unmarkedFitColExt-method} \alias{update,unmarkedFitGMM-method} @@ -34,6 +38,7 @@ \alias{update,unmarkedFitOccuTTD-method} \alias{update,unmarkedFitNmixTTD-method} \alias{update,unmarkedFitGDR-method} +\alias{update,unmarkedFitIDS-method} \alias{update,unmarkedFitDailMadsen-method} \alias{update,unmarkedFitGOccu-method} \alias{sampleSize} @@ -53,10 +58,12 @@ \alias{unmarkedFitNmixTTD-class} \alias{unmarkedFitDSO-class} \alias{unmarkedFitMMO-class} +\alias{unmarkedFitIDS-class} \alias{plot,profile,missing-method} \alias{show,unmarkedFit-method} \alias{summary,unmarkedFit-method} \alias{summary,unmarkedFitDS-method} +\alias{summary,unmarkedFitIDS-method} \alias{smoothed} \alias{smoothed,unmarkedFitColExt-method} \alias{projected} diff --git a/man/unmarkedFrame-class.Rd b/man/unmarkedFrame-class.Rd index 9df41577..ab41506a 100644 --- a/man/unmarkedFrame-class.Rd +++ b/man/unmarkedFrame-class.Rd @@ -51,11 +51,13 @@ \alias{show,unmarkedFrameOccuMulti-method} \alias{show,unmarkedFrameOccuTTD-method} \alias{show,unmarkedMultFrame-method} +\alias{show,unmarkedFrameOccuCOP-method} \alias{summary,unmarkedFrame-method} \alias{summary,unmarkedFrameDS-method} \alias{summary,unmarkedMultFrame-method} \alias{summary,unmarkedFrameOccuMulti-method} \alias{summary,unmarkedFrameOccuTTD-method} +\alias{summary,unmarkedFrameOccuCOP-method} \alias{[,unmarkedFrameOccuMulti,missing,numeric,missing-method} \alias{[,unmarkedFrameOccuTTD,missing,numeric,missing-method} \alias{[,unmarkedFrameGDR,missing,numeric,missing-method} @@ -65,6 +67,9 @@ \alias{[,unmarkedFrameDSO,numeric,missing,missing-method} \alias{[,unmarkedFrameGDR,numeric,missing,missing-method} \alias{[,unmarkedFrameGDR,logical,missing,missing-method} +\alias{[,unmarkedFrameOccuCOP,missing,numeric,missing-method} +\alias{[,unmarkedFrameOccuCOP,numeric,missing,missing-method} +\alias{[,unmarkedFrameOccuCOP,numeric,numeric,missing-method} \title{Class "unmarkedFrame" } \description{Methods for manipulating, summarizing and viewing @@ -120,15 +125,19 @@ argument of the fitting functions. modify site-level covariates } \item{summary}{\code{signature(object = "unmarkedFrame")}: summarize data } + \item{getL}{\code{signature(object = "unmarkedFrameOccuCOP")}: extract L } } } -\note{ This is a superclass with child classes for each fitting function } +\note{ This is a superclass with child classes for each fitting function.} \seealso{\code{\link{unmarkedFrame}}, \code{\linkS4class{unmarkedFit}}, \code{\link{unmarked-package}} } \examples{ +# List all the child classes of unmarkedFrame +showClass("unmarkedFrame") + # Organize data for pcount() data(mallard) mallardUMF <- unmarkedFramePCount(mallard.y, siteCovs = mallard.site, diff --git a/man/unmarkedFrameOccuCOP.Rd b/man/unmarkedFrameOccuCOP.Rd new file mode 100644 index 00000000..3bc1cb00 --- /dev/null +++ b/man/unmarkedFrameOccuCOP.Rd @@ -0,0 +1,105 @@ +\name{unmarkedFrameOccuCOP} +\alias{unmarkedFrameOccuCOP} +\alias{getL} +\alias{getL,unmarkedFrameOccuCOP-method} + +\title{Organize data for the occupancy model using count data fit by \code{occuCOP}} + +\usage{unmarkedFrameOccuCOP(y, L, siteCovs = NULL, obsCovs = NULL)} + +\description{Organizes count data along with the covariates. The \linkS4class{unmarkedFrame} S4 class required by the \code{data} argument of \code{\link{occuCOP}}.} + +\arguments{ + + \item{y}{An MxJ matrix of the count data, where M is the number of sites, J is the maximum number of observation periods (sampling occasions, transects, discretised sessions...) per site.} + + \item{L}{An MxJ matrix of the length of the observation periods. For example, duration of the sampling occasion in hours, duration of the discretised session in days, or length of the transect in meters.} + + \item{siteCovs}{A \code{\link{data.frame}} of covariates that vary at the site level. This should have M rows and one column per covariate} + + \item{obsCovs}{A named list of dataframes of dimension MxJ, with one dataframe per covariate that varies between sites and observation periods} + + %% \item{mapInfo}{Currently ignored} +} + +\details{ + unmarkedFrameOccuCOP is the \linkS4class{unmarkedFrame} S4 class that holds data to be passed to the \code{\link{occuCOP}} model-fitting function. +} + +\value{an object of class \code{unmarkedFrameOccuCOP}} + +\seealso{ + \code{\link{unmarkedFrame-class}}, + \code{\link{unmarkedFrame}}, + \code{\link{occuCOP}} +} + +\examples{ +# Fake data +M <- 4 # Number of sites +J <- 3 # Number of observation periods + +# Count data +(y <- matrix( + c(1, 3, 0, + 0, 0, 0, + 2, 0, 5, + 1, NA, 0), + nrow = M, + ncol = J, + byrow = TRUE +)) + +# Length of observation periods +(L <- matrix( + c(1, 3, NA, + 2, 2, 2, + 1, 2, 1, + 7, 1, 3), + nrow = M, + ncol = J, + byrow = TRUE +)) + +# Site covariates +(site.covs <- data.frame( + "elev" = rexp(4), + "habitat" = factor(c("forest", "forest", "grassland", "grassland")) +)) + +# Observation covariates (as a list) +(obs.covs.list <- list( + "rain" = matrix(rexp(M * J), nrow = M, ncol = J), + "wind" = matrix( + sample(letters[1:3], replace = TRUE, size = M * J), + nrow = M, ncol = J) +)) + +# Organise data in a unmarkedFrameOccuCOP object +umf <- unmarkedFrameOccuCOP( + y = y, + L = L, + siteCovs = site.covs, + obsCovs = obs.covs.list +) + +# Extract L +getL(umf) + +# Look at data +print(umf) # Print the whole data set +print(umf[1, 2]) # Print the data of the 1st site, 2nd observation +summary(umf) # Summarise the data set +plot(umf) # Plot the count of detection events + + +# L is optional, if absent, it will be replaced by a MxJ matrix of 1 +unmarkedFrameOccuCOP( + y = y, + siteCovs = site.covs, + obsCovs = obs.covs.list +) + +# Covariates are optional +unmarkedFrameOccuCOP(y = y) +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 6d89809a..0fe45a8a 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -338,6 +338,23 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// nll_occuCOP +double nll_occuCOP(arma::icolvec y, arma::icolvec L, arma::mat Xpsi, arma::mat Xlambda, arma::colvec beta_psi, arma::colvec beta_lambda, Rcpp::LogicalVector removed); +RcppExport SEXP _unmarked_nll_occuCOP(SEXP ySEXP, SEXP LSEXP, SEXP XpsiSEXP, SEXP XlambdaSEXP, SEXP beta_psiSEXP, SEXP beta_lambdaSEXP, SEXP removedSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< arma::icolvec >::type y(ySEXP); + Rcpp::traits::input_parameter< arma::icolvec >::type L(LSEXP); + Rcpp::traits::input_parameter< arma::mat >::type Xpsi(XpsiSEXP); + Rcpp::traits::input_parameter< arma::mat >::type Xlambda(XlambdaSEXP); + Rcpp::traits::input_parameter< arma::colvec >::type beta_psi(beta_psiSEXP); + Rcpp::traits::input_parameter< arma::colvec >::type beta_lambda(beta_lambdaSEXP); + Rcpp::traits::input_parameter< Rcpp::LogicalVector >::type removed(removedSEXP); + rcpp_result_gen = Rcpp::wrap(nll_occuCOP(y, L, Xpsi, Xlambda, beta_psi, beta_lambda, removed)); + return rcpp_result_gen; +END_RCPP +} // nll_occuMS double nll_occuMS(arma::vec beta, arma::mat y, Rcpp::List dm_state, Rcpp::List dm_phi, Rcpp::List dm_det, arma::mat sind, arma::mat pind, arma::mat dind, std::string prm, int S, int T, int J, int N, arma::mat naflag, arma::mat guide); RcppExport SEXP _unmarked_nll_occuMS(SEXP betaSEXP, SEXP ySEXP, SEXP dm_stateSEXP, SEXP dm_phiSEXP, SEXP dm_detSEXP, SEXP sindSEXP, SEXP pindSEXP, SEXP dindSEXP, SEXP prmSEXP, SEXP SSEXP, SEXP TSEXP, SEXP JSEXP, SEXP NSEXP, SEXP naflagSEXP, SEXP guideSEXP) { @@ -563,6 +580,7 @@ static const R_CallMethodDef CallEntries[] = { {"_unmarked_nll_multmixOpen", (DL_FUNC) &_unmarked_nll_multmixOpen, 39}, {"_unmarked_nll_nmixTTD", (DL_FUNC) &_unmarked_nll_nmixTTD, 13}, {"_unmarked_nll_occu", (DL_FUNC) &_unmarked_nll_occu, 11}, + {"_unmarked_nll_occuCOP", (DL_FUNC) &_unmarked_nll_occuCOP, 7}, {"_unmarked_nll_occuMS", (DL_FUNC) &_unmarked_nll_occuMS, 15}, {"_unmarked_nll_occuMulti_loglik", (DL_FUNC) &_unmarked_nll_occuMulti_loglik, 14}, {"_unmarked_nll_occuMulti", (DL_FUNC) &_unmarked_nll_occuMulti, 15}, diff --git a/src/TMB/tmb_IDS.hpp b/src/TMB/tmb_IDS.hpp new file mode 100644 index 00000000..cf1c5b4e --- /dev/null +++ b/src/TMB/tmb_IDS.hpp @@ -0,0 +1,179 @@ +#undef TMB_OBJECTIVE_PTR +#define TMB_OBJECTIVE_PTR obj + +// name of function below **MUST** match filename +template +Type tmb_IDS(objective_function* obj) { + DATA_MATRIX(pind); + DATA_VECTOR(lam_adjust); + + DATA_MATRIX(y_hds); + DATA_MATRIX(X_hds); + DATA_MATRIX(V_hds); + DATA_INTEGER(key_hds); + DATA_VECTOR(db_hds); + DATA_VECTOR(a_hds); + DATA_VECTOR(w_hds); + DATA_VECTOR(u_hds); + + DATA_MATRIX(y_pc); + DATA_MATRIX(X_pc); + DATA_MATRIX(V_pc); + DATA_INTEGER(key_pc); + DATA_VECTOR(db_pc); + DATA_VECTOR(a_pc); + DATA_VECTOR(w_pc); + DATA_VECTOR(u_pc); + + DATA_MATRIX(y_oc); + DATA_MATRIX(X_oc); + DATA_MATRIX(V_oc); + DATA_INTEGER(key_oc); + DATA_VECTOR(db_oc); + DATA_VECTOR(a_oc); + DATA_VECTOR(w_oc); + DATA_VECTOR(u_oc); + DATA_INTEGER(K_oc); + DATA_IVECTOR(Kmin_oc); + + DATA_VECTOR(durationDS); + DATA_VECTOR(durationPC); + DATA_VECTOR(durationOC); + DATA_MATRIX(Xa_hds); + DATA_MATRIX(Xa_pc); + DATA_MATRIX(Xa_oc); + + PARAMETER_VECTOR(beta_lam); + PARAMETER_VECTOR(beta_hds); + PARAMETER_VECTOR(beta_pc); + PARAMETER_VECTOR(beta_oc); + PARAMETER_VECTOR(beta_avail); + + int survey = 1; // Only point counts supported + + Type loglik = 0; + + // HDS + int M = y_hds.rows(); + int J = y_hds.cols(); + + vector lam_hds = X_hds * beta_lam; + lam_hds = exp(lam_hds); + lam_hds = lam_hds * lam_adjust(0); + + vector sigma_hds = V_hds * beta_hds; + sigma_hds = exp(sigma_hds); + + vector p_avail(M); + p_avail.setOnes(); + if(beta_avail.size() > 0){ + vector phi = Xa_hds * beta_avail; + phi = exp(phi); + p_avail = 1 - exp(-1 * durationDS.array() * phi.array()); + } + + for (int i=0; i cp = distance_prob(key_hds, sigma_hds(i), Type(0), survey, + db_hds, w_hds, a_hds, u_hds); + + for (int j=0; j 0){ + + vector lam_pc = X_pc * beta_lam; + lam_pc = exp(lam_pc); + lam_pc = lam_pc * lam_adjust(1); + + //vector sigma_pc = V_pc * beta_pc; + vector sigma_pc; + if(beta_pc.size() > 0){ + sigma_pc = V_pc * beta_pc; + } else{ + sigma_pc = V_pc * beta_hds; + } + sigma_pc = exp(sigma_pc); + + vector p_avail_pc(M); + p_avail_pc.setOnes(); + if(beta_avail.size() > 0){ + vector phi = Xa_pc * beta_avail; + phi = exp(phi); + p_avail_pc = 1 - exp(-1 * durationPC.array() * phi.array()); + } + + for (int i=0; i cp = distance_prob(key_pc, sigma_pc(i), Type(0), survey, + db_pc, w_pc, a_pc, u_pc); + loglik -= dpois(y_pc(i,0), lam_pc(i) * cp(0) * p_avail_pc(i), true); + } + + } + + //printf("pc done"); + + // R-N occupancy + M = y_oc.rows(); + + if(M > 0){ + + vector lam_oc = X_oc * beta_lam; + lam_oc = exp(lam_oc); + lam_oc = lam_oc * lam_adjust(2); + + //vector sigma_oc = V_oc * beta_oc; + vector sigma_oc; + if(beta_oc.size() > 0){ + sigma_oc = V_oc * beta_oc; + } else{ + sigma_oc = V_oc * beta_hds; + } + sigma_oc = exp(sigma_oc); + + vector p_avail_oc(M); + p_avail_oc.setOnes(); + if(beta_avail.size() > 0){ + vector phi = Xa_oc * beta_avail; + phi = exp(phi); + p_avail_oc = 1 - exp(-1 * durationOC.array() * phi.array()); + } + + Type f; + Type g; + Type p; + Type site_lp; + + for (int i=0; i q = 1 - distance_prob(key_oc, sigma_oc(i), Type(0), survey, + db_oc, w_oc, a_oc, u_oc) * p_avail_oc(i); + //vector q = 1 - distance_prob(key_oc, sigma_oc(i), Type(0), survey, + // db_oc, w_oc, a_oc, u_oc); + + site_lp = 0.0; + for (int k=Kmin_oc(i); k<(K_oc+1); k++){ + f = dpois(Type(k), lam_oc(i), false); + p = 1 - pow(q(0), Type(k)); + g = dbinom(y_oc(i,0), Type(1), p, false); + site_lp += f * g; + } + loglik -= log(site_lp); + + } + + } + + return loglik; + +} + +#undef TMB_OBJECTIVE_PTR +#define TMB_OBJECTIVE_PTR this diff --git a/src/TMB/tmb_occu.hpp b/src/TMB/tmb_occu.hpp index 0fef7150..c2825422 100644 --- a/src/TMB/tmb_occu.hpp +++ b/src/TMB/tmb_occu.hpp @@ -56,7 +56,10 @@ Type tmb_occu(objective_function* obj) { int pind = i * J; Type cp = 1.0; for (int j=0; j @@ -24,6 +25,8 @@ Type objective_function::operator() () { return tmb_distsamp(this); } else if(model == "tmb_gdistremoval"){ return tmb_gdistremoval(this); + } else if(model == "tmb_IDS"){ + return tmb_IDS(this); } else if(model == "tmb_goccu"){ return tmb_goccu(this); } else { diff --git a/src/nll_occuCOP.cpp b/src/nll_occuCOP.cpp new file mode 100644 index 00000000..8c18bbff --- /dev/null +++ b/src/nll_occuCOP.cpp @@ -0,0 +1,48 @@ +#include +#include + +using namespace Rcpp ; + +// [[Rcpp::export]] +double nll_occuCOP(arma::icolvec y, arma::icolvec L, + arma::mat Xpsi, arma::mat Xlambda, + arma::colvec beta_psi, arma::colvec beta_lambda, + Rcpp::LogicalVector removed) { + + // Number of sites M and obs J + int M = Xpsi.n_rows; + int J = y.n_elem / M; + + //Calculate psi back-transformed from logit + arma::colvec psi = 1.0/(1.0+exp(-Xpsi*beta_psi)); + + //Calculate lambda back-transformed from log + arma::colvec lambda = exp(Xlambda*beta_lambda); + + double ll=0.0; + int k=0; // counter + // for each site i in 1:M + for(int i=0; i0) { + ll += log(psi(i) * pow(iLambdaL, iN) / tgamma(iN + 1) * exp(-iLambdaL)); + } else { + ll += log(psi(i) * exp(-iLambdaL) + 1-psi(i)); + } + } + } + return -ll; +} diff --git a/tests/testthat/test_IDS.R b/tests/testthat/test_IDS.R new file mode 100644 index 00000000..61d348db --- /dev/null +++ b/tests/testthat/test_IDS.R @@ -0,0 +1,190 @@ +context("IDS fitting function") +skip_on_cran() + +test_that("IDS can fit models with covariates", { + set.seed(123) + formulas <- list(lam=~elev, ds=~1, phi=~1) + # Based on values from real data + design <- list(Mds=1000, J=6, Mpc=300) + # Based on values from real data + coefs <- list(lam = c(intercept=3, elev=-0.5), + ds = c(intercept=-2.5), + phi = c(intercept=-1.3)) + # Survey durations, loosely based on real data + durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30)) + + sim_umf <- simulate("IDS", # name of model we are simulating for + nsim=1, # number of replicates + formulas=formulas, + coefs=coefs, + design=design, + # arguments used by unmarkedFrameDS + dist.breaks = seq(0, 0.30, length.out=7), + unitsIn="km", + # arguments used by IDS + # could also have e.g. keyfun here + durationDS=durs$ds, durationPC=durs$pc, durationOC=durs$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + set.seed(123) + mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, + availformula = ~1, durationDS=durs$ds, durationPC=durs$pc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + + expect_equivalent(coef(mod_sim), c(3.0271179,-0.4858101,-2.5050244,-1.3729505), tol=1e-5) + + # Predict + pr <- predict(mod_sim, type = 'lam') + expect_equal(nrow(pr), 1000) # predicts only for the distance sampled sites + pr <- predict(mod_sim, type = 'lam', newdata=sim_umf$pc) + + pr <- predict(mod_sim, type = 'ds') + expect_equal(nrow(pr), 1000) + expect_equal(pr$Predicted[1], exp(-2.5), tol=0.05) + pr <- predict(mod_sim, type = 'pc') + expect_equal(nrow(pr), 300) + expect_equal(pr$Predicted[1], exp(-2.5), tol=0.05) + pr <- predict(mod_sim, type = 'phi') + expect_equal(pr$ds$Predicted[1], exp(-1.37), tol=0.05) + + # residuals + r <- residuals(mod_sim) + expect_equal(lapply(r, dim), list(ds=c(1000,6), pc = c(300,1))) + + # parboot + pb <- parboot(mod_sim, nsim=2) + + pdf(NULL) + plot(mod_sim) + hist(mod_sim) + dev.off() + + expect_error(nonparboot(mod_sim)) + expect_error(ranef(mod_sim)) + + # Separate detection models + mod_sep <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaPC = ~1, + dataDS=sim_umf$ds[1:100,], dataPC=sim_umf$pc[1:100,], + availformula = ~1, durationDS=durs$ds[1:100], durationPC=durs$pc[1:100], + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + expect_equal(length(coef(mod_sim)), 4) + expect_equal(length(coef(mod_sep)), 5) +}) + +test_that("IDS can fit models with occupancy data", { + + set.seed(123) + formulas <- list(lam=~elev, ds=~1, oc=~1) + # Based on values from real data + design <- list(Mds=100, J=6, Mpc=100, Moc=100) + # Based on values from real data + coefs <- list(lam = c(intercept=3, elev=-0.5), + ds = c(intercept=-2.5), + oc = c(intercept = -2)) + + sim_umf <- simulate("IDS", # name of model we are simulating for + nsim=1, # number of replicates + formulas=formulas, + coefs=coefs, + design=design, + # arguments used by unmarkedFrameDS + dist.breaks = seq(0, 0.30, length.out=7), + unitsIn="km", + # arguments used by IDS + # could also have e.g. keyfun here + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + + mod_oc <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + + expect_equivalent(coef(mod_oc), c(3.0557091, -0.4200719, -2.5384331, -2.0610341), + tol=1e-5) + + pr <- predict(mod_oc, type='oc') + expect_equal(pr$Predicted[1], 0.1273222, tol=1e-5) + + res <- residuals(mod_oc) + expect_equal(lapply(res, dim), list(ds=c(100,6), pc = c(100,1), oc=c(100,1))) + + pb <- parboot(mod_oc, nsim=1) + + # Don't estimate availability if OC data + expect_error( + mod_oc <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + availformula = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + maxDistPC=0.5, maxDistOC=0.5, + durationDS=durs$ds, durationPC=durs$pc, durationOC=durs$pc, + unitsOut="kmsq") + ) + + # Just occupancy data + mod_oc <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + dataDS=sim_umf$ds, dataOC=sim_umf$oc, + maxDistOC=0.5, + unitsOut="kmsq") + + expect_equal(names(mod_oc), c("lam","ds","oc")) + +}) + +test_that("IDS handles missing values", { + + set.seed(123) + design <- list(Mds=100, J=6, Mpc=100, Moc=100) + formulas <- list(lam=~elev, ds=~1, phi=~1) + # Based on values from real data + coefs <- list(lam = c(intercept=3, elev=-0.5), + ds = c(intercept=-2.5), + phi = c(intercept=-1.3)) + # Survey durations, loosely based on real data + durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30)) + + sim_umf <- simulate("IDS", # name of model we are simulating for + nsim=1, # number of replicates + formulas=formulas, + coefs=coefs, + design=design, + # arguments used by unmarkedFrameDS + dist.breaks = seq(0, 0.30, length.out=7), + unitsIn="km", + # arguments used by IDS + # could also have e.g. keyfun here + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + + sim_umf$pc@y[1,1] <- NA + sim_umf$pc@y[2,] <- NA + + sim_umf$oc@y[1,1] <- NA + sim_umf$oc@y[2,] <- NA + + expect_warning( + mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + ) + + expect_equivalent(coef(mod_sim), c(2.9354934,-0.4759405,-2.5314594,-2.3259133), + tol=1e-5) + + + sim_umf$ds@y[1,1] <- NA + sim_umf$ds@y[2,] <- NA + + expect_error( + expect_warning( + mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + )) + +}) diff --git a/tests/testthat/test_distsampOpen.R b/tests/testthat/test_distsampOpen.R index bd25e8bf..2797b803 100644 --- a/tests/testthat/test_distsampOpen.R +++ b/tests/testthat/test_distsampOpen.R @@ -325,7 +325,7 @@ test_that("distsampOpen dynamics models work",{ fm <- distsampOpen(~1, ~1, ~1, data = umf, K=25, keyfun="unif", dynamics="autoreg") - expect_equivalent(coef(fm), c(1.518686, -0.018026, -5.628779), tol=1e-5) + expect_equivalent(coef(fm), c(1.518686, -0.018026, -5.628779), tol=1e-4) #Sketchy estimates #Maybe just because data were simulated using a different process? diff --git a/tests/testthat/test_occu.R b/tests/testthat/test_occu.R index 043bc590..2538e681 100644 --- a/tests/testthat/test_occu.R +++ b/tests/testthat/test_occu.R @@ -374,6 +374,13 @@ test_that("occu can handle random effects",{ pb <- parboot(fm, nsim=1) expect_is(pb, "parboot") + # confint should only show fixed effects + ci <- confint(fm, type = 'state') + expect_equal(nrow(ci), 2) + + ci <- confint(fm['state']) + expect_equal(nrow(ci), 2) + # Check custom initial values expect_equal(fm@TMB$starts_order[1], "beta_det") fmi <- occu(~1~cov1 + (1|site_id), umf, starts=c(10,0,0,0)) @@ -407,3 +414,30 @@ test_that("occu can handle random effects",{ test <- modSel(fl) # shouldn't warn #options(warn=0) }) + +test_that("TMB engine gives correct det estimates when there are lots of NAs", { + + skip_on_cran() + set.seed(123) + M <- 200 + J <- 10 + psi <- 0.5 + + z <- rbinom(M, 1, psi) + + x <- matrix(rnorm(M*J), M, J) + + p <- plogis(0 + 0.3*x) + + y <- matrix(NA, M, J) + for (i in 1:M){ + y[i,] <- rbinom(J, 1, p[i,]) * z[i] + } + y[sample(1:(M*J), (M*J/2), replace=FALSE)] <- NA + + umf <- unmarkedFrameOccu(y=y, obsCovs=list(x=x)) + + fit <- occu(~x~1, umf) + fitT <- occu(~x~1, umf, engine="TMB") + expect_equal(coef(fit), coef(fitT), tol=1e-5) +}) diff --git a/tests/testthat/test_occuCOP.R b/tests/testthat/test_occuCOP.R new file mode 100644 index 00000000..ca880bda --- /dev/null +++ b/tests/testthat/test_occuCOP.R @@ -0,0 +1,485 @@ +context("occuCOP fitting function") +skip_on_cran() + +COPsimul <- function(psi = 0.5, + lambda = 1, + M = 100, + J = 5) { + + z_i <- sample( + x = c(0, 1), + size = M, + prob = c(1 - psi, psi), + replace = T + ) + + y = matrix(rpois(n = M * J, lambda = lambda), nrow = M, ncol = J) * z_i + + return(y) +} + + +test_that("unmarkedFrameOccuCOP is constructed correctly", { + set.seed(123) + + # Simulate data + M = 100 + J = 5 + y = COPsimul(psi = .5, + lambda = 1, + M = M, + J = J) + L = y * 0 + 1 + + psiCovs = data.frame( + "psiNum" = rnorm(M), + "psiInt" = as.integer(rpois(n = M, lambda = 5)), + "psiBool" = sample(c(T, F), size = M, replace = T), + "psiChar" = sample(letters[1:5], size = M, replace = T), + "psiFactUnord" = factor(sample( + letters[5:10], size = M, replace = T + )), + "psiFactOrd" = sample(factor(c("A", "B", "C"), ordered = T), size = + M, replace = T) + ) + + lambdaCovs = list( + "lambdaNum" = matrix( + rnorm(M * J), + nrow = M, ncol = J + ), + "lambdaInt" = matrix( + as.integer(rpois(n = M * J, lambda = 1e5)), + nrow = M, ncol = J + ), + "lambdaBool" = matrix( + sample(c(T, F), size = M * J, replace = T), + nrow = M, ncol = J + ), + "lambdaChar" = matrix( + sample(letters[1:5], size = M * J, replace = T), + nrow = M, ncol = J + ), + "lambdaFactUnord" = matrix( + factor(sample(letters[5:10], size = M * J, replace = T)), + nrow = M, ncol = J + ), + "lambdaFactOrd" = matrix( + sample(factor(c("A", "B", "C"), ordered = T), size = M * J, replace = T), + nrow = M, ncol = J + ) + ) + + + # Creating a unmarkedFrameOccuCOP object + expect_warning(umf <- unmarkedFrameOccuCOP(y = y)) + expect_no_error(umf <- unmarkedFrameOccuCOP(y = y, L = L)) + + + # Create subsets + expect_no_error(umf_sub_i <- umf[1:3, ]) + expect_no_error(umf_sub_j <- umf[, 1:2]) + expect_no_error(umf_sub_ij <- umf[1:3, 1:2]) + + # unmarkedFrameOccuCOP organisation ---------------------------------------------- + expect_true(inherits(umf, "unmarkedFrameOccuCOP")) + expect_equivalent(numSites(umf_sub_i), 3) + expect_equivalent(obsNum(umf_sub_j), 2) + expect_equivalent(numSites(umf_sub_ij), 3) + expect_equivalent(obsNum(umf_sub_ij), 2) + + # unmarkedFrameOccuCOP display --------------------------------------------------- + + # print method + expect_output(print(umf), "Data frame representation of unmarkedFrame object") + expect_output(print(umf_sub_i), "Data frame representation of unmarkedFrame object") + expect_output(print(umf[1,]), "Data frame representation of unmarkedFrame object") + expect_output(print(umf[,1]), "Data frame representation of unmarkedFrame object") + expect_output(print(umf[1,1]), "Data frame representation of unmarkedFrame object") + + # summary method for unmarkedFrameOccuCOP + expect_output(summary(umf), "unmarkedFrameOccuCOP Object") + expect_output(summary(umf_sub_ij), "unmarkedFrameOccuCOP Object") + + # plot method for unmarkedFrameOccuCOP + expect_no_error(plot(umf)) + expect_no_error(plot(umf_sub_ij)) + + + # Input handling: covariates ------------------------------------------------- + expect_no_error(umfCovs <- unmarkedFrameOccuCOP( + y = y, + L = L, + siteCovs = psiCovs, + obsCovs = lambdaCovs + )) + expect_output(print(umfCovs), "Data frame representation of unmarkedFrame object") + expect_output(summary(umfCovs), "unmarkedFrameOccuCOP Object") + expect_no_error(plot(umfCovs)) + + # Input handling: NA --------------------------------------------------------- + + # NA should not pose problems when creating the unmarkedFrameOccuCOP object. + # The warnings and potential errors will be displayed when fitting the model. + # Except when y only contains NA: then there's an error. + + ## NA in y + yNA <- y + yNA[1:2,] <- NA + yNA[3:5, 3:4] <- NA + yNA[,3] <- NA + expect_no_error(umfNA <- unmarkedFrameOccuCOP(y = yNA, L = L)) + expect_output(print(umfNA), "Data frame representation of unmarkedFrame object") + expect_output(summary(umfNA), "unmarkedFrameOccuCOP Object") + expect_no_error(plot(umfNA)) + + ## NA in L + obsLengthNA <- L + obsLengthNA[1:2,] <- NA + obsLengthNA[3:5, 3:4] <- NA + obsLengthNA[,5] <- NA + expect_no_error(umfNA <- unmarkedFrameOccuCOP(y = y, L = obsLengthNA)) + expect_output(print(umfNA), "Data frame representation of unmarkedFrame object") + expect_output(summary(umfNA), "unmarkedFrameOccuCOP Object") + + expect_no_error(plot(umfNA)) + + ## NA also in covariates + psiCovsNA <- psiCovs + psiCovsNA[1:5,] <- NA + psiCovsNA[c(8,10,12), 3] <- NA + psiCovsNA[,1] <- NA + lambdaCovsNA <- lambdaCovs + lambdaCovsNA[[1]][1:5,] <- NA + lambdaCovsNA[[1]][,3] <- NA + lambdaCovsNA[[3]][,5] <- NA + expect_no_error(umfCovsNA <- unmarkedFrameOccuCOP( + y = yNA, + L = obsLengthNA, + siteCovs = psiCovsNA, + obsCovs = lambdaCovsNA + )) + expect_output(print(umfCovsNA), "Data frame representation of unmarkedFrame object") + expect_output(summary(umfCovsNA), "unmarkedFrameOccuCOP Object") + expect_no_error(plot(umfCovsNA)) + + ## all NA in y + yallNA <- y + yallNA[1:M, 1:J] <- NA + expect_error(unmarkedFrameOccuCOP(y = yallNA, L = L)) + + # Input handling: error and warnings ----------------------------------------- + # Error when there are decimals in y + y_with_decimals = y + y_with_decimals[1, 1] = .5 + expect_error(unmarkedFrameOccuCOP(y = y_with_decimals, L = L)) + + # Warning if y is detection/non-detection instead of count + y_detec_nodetec = (y > 0) * 1 + expect_warning(unmarkedFrameOccuCOP(y = y_detec_nodetec, L = L)) + + # Error if the dimension of L is different than that of y + expect_error(unmarkedFrameOccuCOP(y = y, L = L[1:2, 1:2])) +}) + + +test_that("occuCOP can fit simple models", { + # Simulate data + set.seed(123) + M = 100 + J = 5 + y = COPsimul(psi = .5, + lambda = 1, + M = M, + J = J) + L = y * 0 + 1 + + # Create umf + umf <- unmarkedFrameOccuCOP(y = y, L = L) + + # Fitting options ---- + + ## With default parameters ---- + expect_no_error(fit_default <- occuCOP(data = umf, L1 = TRUE)) + expect_warning(occuCOP(data = umf, psiformula = ~ 1, lambdaformula = ~ 1, psistarts = 0, lambdastarts = 0)) + + ## With chosen starting points ---- + expect_no_error(occuCOP(data = umf, + psiformula = ~ 1, lambdaformula = ~ 1, + psistarts = qlogis(.7), + lambdastarts = log(0.1), L1=T)) + expect_no_error(occuCOP(data = umf, + psiformula = ~ 1, lambdaformula = ~ 1, + starts = c(qlogis(.7), log(0.1)), L1 = T)) + # warning if all starts and psistarts and lambdastarts were furnished + # and starts != c(psistarts, lambdastarts) + expect_no_error(occuCOP(data = umf, starts = c(0, 0), + psistarts = c(0), lambdastarts = c(0), L1 = T)) + expect_warning(occuCOP(data = umf, starts = c(0, 1), + psistarts = c(0), lambdastarts = c(0), L1 = T)) + # errors if starting vectors of the wrong length + expect_error(occuCOP(data = umf, starts = c(0), L1 = T)) + expect_error(occuCOP(data = umf, psistarts = c(0, 0), lambdastarts = 0, L1 = T)) + expect_error(occuCOP(data = umf, lambdastarts = c(0, 0), L1 = T)) + + # With different options + expect_no_error(occuCOP(data = umf, method = "Nelder-Mead", psistarts = 0, lambdastarts = 0, L1=T)) + expect_error(occuCOP(data = umf, method = "ABC", psistarts = 0, lambdastarts = 0, L1=T)) + + expect_no_error(occuCOP(data = umf, se = F, psistarts = 0, lambdastarts = 0, L1=T)) + expect_error(occuCOP(data = umf, se = "ABC")) + + expect_no_error(occuCOP(data = umf, engine = "R", psistarts = 0, lambdastarts = 0, L1=T)) + expect_error(occuCOP(data = umf, engine = "julia", psistarts = 0, lambdastarts = 0, L1=T)) + + expect_no_error(occuCOP(data = umf, na.rm = F, psistarts = 0, lambdastarts = 0, L1=T)) + expect_error(occuCOP(data = umf, na.rm = "no", psistarts = 0, lambdastarts = 0, L1=T)) + + # Looking at at COP model outputs ---- + expect_is(fit_default, "unmarkedFitOccuCOP") + expect_equivalent(coef(fit_default), c(0.13067954, 0.06077929), tol = 1e-5) + + ## backTransform + expect_no_error(backTransform(fit_default, type = "psi")) + expect_no_error(backTransform(fit_default, type = "lambda")) + expect_error(backTransform(fit_default, type = "state")) + expect_error(backTransform(fit_default, type = "det")) + expect_is(backTransform(fit_default, type = "psi"), "unmarkedBackTrans") + + ## predict with newdata = fit@data + expect_no_error(umpredpsi <- predict(object = fit_default, type = "psi")) + expect_equal(umpredpsi$Predicted[1], 0.5326235, tol = 1e-5) + expect_no_error(umpredlambda <- predict(object = fit_default, type = "lambda")) + expect_equal(umpredlambda$Predicted[1], 1.062664, tol = 1e-5) + expect_error(predict(object = fit_default, type = "state")) + + ## predict with newdata = 1 + expect_no_error(predict( + object = fit_default, + newdata = data.frame(1), + type = "psi" + )) + expect_no_error(predict( + object = fit_default, + newdata = data.frame(1), + type = "lambda" + )) + expect_no_error(predict( + object = fit_default, + newdata = data.frame("a"=1:5,"b"=10:14), + type = "psi" + )) + + # Fitting accurately ---- + ## psi = 0.50, lambda = 1 ---- + psi_test = .5 + lambda_test = 1 + fit_accur <- occuCOP(data = unmarkedFrameOccuCOP( + y = COPsimul( + psi = psi_test, + lambda = lambda_test, + M = 1000, + J = 10 + ), + L = matrix(1, nrow = 1000, ncol = 10) + ), psistarts = 0, lambdastarts = 0, L1=T) + psi_estimate = backTransform(fit_accur, type = "psi")@estimate + lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate + expect_equivalent( + psi_estimate, + psi_test, + tol = 0.05 + ) + expect_equivalent( + lambda_estimate, + lambda_test, + tol = 0.05 + ) + + ## psi = 0.25, lambda = 5 ---- + psi_test = 0.25 + lambda_test = 5 + fit_accur <- occuCOP(data = unmarkedFrameOccuCOP( + y = COPsimul( + psi = psi_test, + lambda = lambda_test, + M = 1000, + J = 10 + ), + L = matrix(1, nrow = 1000, ncol = 10) + ), psistarts = 0, lambdastarts = 0, L1=T) + psi_estimate = backTransform(fit_accur, type = "psi")@estimate + lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate + expect_equivalent( + psi_estimate, + psi_test, + tol = 0.05 + ) + expect_equivalent( + lambda_estimate, + lambda_test, + tol = 0.05 + ) + + ## psi = 0.75, lambda = 0.5 ---- + psi_test = 0.75 + lambda_test = 0.5 + fit_accur <- occuCOP(data = unmarkedFrameOccuCOP( + y = COPsimul( + psi = psi_test, + lambda = lambda_test, + M = 1000, + J = 10 + ), + L = matrix(1, nrow = 1000, ncol = 10) + ), psistarts = 0, lambdastarts = 0, L1=T) + psi_estimate = backTransform(fit_accur, type = "psi")@estimate + lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate + expect_equivalent( + psi_estimate, + psi_test, + tol = 0.05 + ) + expect_equivalent( + lambda_estimate, + lambda_test, + tol = 0.05 + ) + + # With NAs ---- + yNA <- y + yNA[1,] <- NA + yNA[3, 1] <- NA + yNA[4, 3] <- NA + yNA[, 5] <- NA + expect_no_error(umfNA <- unmarkedFrameOccuCOP(y = yNA, L = L)) + + expect_warning(fit_NA <- occuCOP(data = umfNA, psistarts = 0, lambdastarts = 0, L1=T)) + expect_error(occuCOP(data = umfNA, psistarts = 0, lambdastarts = 0, na.rm = F)) +}) + +test_that("We can simulate COP data", { + + # From scratch ---- + + # With no covariates + expect_no_error(simulate( + "occuCOP", + formulas = list(psi = ~ 1, lambda = ~ 1), + coefs = list( + psi = c(intercept = 0), + lambda = c(intercept = 0) + ), + design = list(M = 100, J = 100) + )) + + # With quantitative covariates + expect_no_error(simulate( + "occuCOP", + formulas = list(psi = ~ elev, lambda = ~ rain), + coefs = list( + psi = c(intercept = qlogis(.5), elev = -0.5), + lambda = c(intercept = log(3), rain = -1) + ), + design = list(M = 100, J = 5) + )) + + # With guides + expect_no_error(simulate( + "occuCOP", + formulas = list(psi = ~ elev, lambda = ~ rain), + coefs = list( + psi = c(intercept = qlogis(.5), elev = -0.5), + lambda = c(intercept = log(3), rain = -1) + ), + design = list(M = 100, J = 5), + guide = list(elev=list(dist=rnorm, mean=12, sd=0.5)) + )) + + # With qualitative covariates + expect_no_error(umf <- simulate( + "occuCOP", + formulas = list(psi = ~ elev + habitat, lambda = ~ 1), + coefs = list( + psi = c( + intercept = qlogis(.2), + elev = -0.5, + habitatB = .5, + habitatC = .8 + ), + lambda = c(intercept = log(3)) + ), + design = list(M = 100, J = 5), + guide = list(habitat = factor(levels = c("A", "B", "C"))) + )) + + # From unmarkedFitOccuCOP ---- + expect_no_error(umfit <- occuCOP( + umf, + psiformula = ~ habitat, + L1 = T, + psistarts = c(0,0,0), + lambdastarts = 0 + )) + expect_no_error(simulate(umfit)) +}) + +test_that("occuCOP can fit and predict models with covariates", { + # Simulate data with covariates ---- + set.seed(123) + expect_no_error(umf <- simulate( + "occuCOP", + formulas = list(psi = ~ elev + habitat, lambda = ~ rain), + coefs = list( + psi = c( + intercept = qlogis(.2), + elev = -0.5, + habitatB = .5, + habitatC = .8 + ), + lambda = c(intercept = log(3), rain = -1) + ), + design = list(M = 100, J = 5), + guide = list(habitat = factor(levels = c("A", "B", "C"))) + )) + + # Fit ---- + expect_no_error(umfit <- occuCOP( + umf, + psiformula = ~ habitat + elev, + lambdaformula = ~ rain, + L1 = T, + psistarts = c(0,0,0,0), + lambdastarts = c(0,0) + )) + + expect_error(occuCOP( + umf, + psiformula = ~ habitat+elev, + lambdaformula = ~ rain, + L1 = T, + psistarts = c(0), + lambdastarts = c(0,0) + )) + + expect_equivalent( + coef(umfit), + c(-1.5350679, 0.4229763, 0.7398768, -1.0456397, 1.2333424, -0.8344109), + tol = 1e-5 + ) + + # Predict ---- + expect_no_error(predict(umfit, type = "psi")) + expect_no_error(umpredpsi <- predict( + umfit, + type = "psi", + newdata = data.frame("habitat" = c("A", "B", "C"), "elev" = c(0, 0, 0)), + appendData = TRUE + )) + expect_equivalent(umpredpsi$Predicted, c(0.1772534, 0.2474811, 0.3110551), tol = 1e-5) + + expect_no_error(umpredlambda <- predict(umfit, type = "lambda", appendData = TRUE)) + expect_no_error(predict(umfit, type = "lambda", level = 0.5)) + expect_equal(umpredlambda$Predicted[1], 1.092008, tol = 1e-5) +}) + diff --git a/vignettes/contributing_to_unmarked.Rmd b/vignettes/contributing_to_unmarked.Rmd new file mode 100644 index 00000000..8ffab04a --- /dev/null +++ b/vignettes/contributing_to_unmarked.Rmd @@ -0,0 +1,318 @@ +--- +title: "Contributing to unmarked: guide to adding a new model to `unmarked`" +author: + - name: Ken Kellner + - name: Léa Pautrel +date: "December 08, 2023" +output: + rmarkdown::html_vignette: + fig_width: 5 + fig_height: 3.5 + number_sections: true + toc: true + toc_depth: 2 +vignette: > + %\VignetteIndexEntry{Contributing to unmarked} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8]{inputenc} +--- + +```{r setup, include = FALSE} +options(rmarkdown.html_vignette.check_title = FALSE) +library(unmarked) +``` + + +Follow the steps in this guide to add a new model to the `unmarked` package. Note that the order can be adjusted based on your preferences. For instance, you can start with [the likelihood function](#the-likelihood-function), as it forms the core of adding a model to unmarked, and then build the rest of the code around it. In this document, the steps are ordered as they would occur in an `unmarked` analysis workflow. + +This guide uses the recently developed `gdistremoval` function for examples, mainly because most of the relevant code is in a single file instead of spread around. It also uses `occu` functions to show simpler examples that may be easier to understand. + +# Prerequisites and advices {-} + +- Before you start coding, you should use git to version your code: + - Fork the `unmarked` [repository](https://github.com/rbchan/unmarked) on Github + - Make a new branch with your new function as the name + - Add the new code + +- `unmarked` uses S4 for objects and methods - if you aren't familiar with S4 you may want to consult a book or tutorial such as [this one](https://kasperdanielhansen.github.io/genbioconductor/html/R_S4.html). + +- If you are unfamiliar with building a package in R, here are two tutorials that may help you: [Karl Broman's guide to building packages](https://kbroman.org/pkg_primer/) and [the official R-project guide](https://cran.r-project.org/doc/manuals/R-exts.html). If you are using RStudio, their [documentation on writing package](https://docs.posit.co/ide/user/ide/guide/pkg-devel/writing-packages.html) could also be useful, especially to understand how to use the **Build** pane. + +- To avoid complex debugging in the end, I suggest you to regularly install and load the package as you add new code. You can easily do so in RStudio in the Build pane, by clicking on "Install > Clean and install". This will also allow you to test your functions cleanly. + +- Write [tests](#write-tests) and [documentation](#write-documentation) as you add new functions, classes, and methods. This eases the task, avoiding the need to write everything at the end. + +# Organise the input data: design the `unmarkedFrame` object + +Most model types in unmarked have their own `unmarkedFrame`, a specialized kind of data frame. This is an S4 object which contains, at a minimum, the response (y). It may also include site covariates, observation covariates, primary period covariates, and other info related to study design (such as distance breaks). + +In some cases you may be able to use an existing `unmarkedFrame` subclass. You can list all the existing `unmarkedFrame` subclasses by running the following code: + +```{r unmarkedFrame-subclasses} +showClass("unmarkedFrame") +``` +You can have more information about each `unmarkedFrame` subclass by looking at the documentation of the function that was written to create the `unmarkedFrame` object of this subclass, for example with `?unmarkedFrameGDR`, or on the [package's website](https://rbchan.github.io/unmarked/reference/unmarkedFrameGDR.html). + +## Define the `unmarkedFrame` subclass for this model + +- All `unmarkedFrame` subclasses are children of the `umarkedFrame` class, defined [here](https://github.com/rbchan/unmarked/blob/master/R/unmarkedFrame.R#L24-L30). +- [Example with `occu`](https://github.com/rbchan/unmarked/blob/master/R/unmarkedFrame.R#L65-L66) +- [Example with `gdistremoval`](https://github.com/rbchan/unmarked/blob/master/R/gdistremoval.R#L1-L11) +- All `unmarkedFrame` subclasses need to pass the [validunmarkedFrame](https://github.com/rbchan/unmarked/blob/master/R/unmarkedFrame.R#L4-L19) validity check. You may want to add complementary validity check, like, for example, the [`unmarkedFrameDS subclass](https://github.com/rbchan/unmarked/blob/master/R/unmarkedFrame.R#L43-L62). + +## Write the function that creates the `unmarkedFrame` object + +- [Example with `occu`](https://github.com/rbchan/unmarked/blob/master/R/unmarkedFrame.R#L232-L239) +- [Example with `gdistremoval`](https://github.com/rbchan/unmarked/blob/master/R/gdistremoval.R#L13-L50) + +## Write the S4 methods associated with the `unmarkedFrame` object {#methods-unmarkedFrame} + +Note that you may not have to write all of the S4 methods below. Most of them will work without having to re-write them, but you should test it to verify it. All the methods associated with `unmarkedFrame` objects are listed in the [`unmarkedFrame` class documentation](https://rbchan.github.io/unmarked/reference/unmarkedFrame-class.html) accessible with `help("unmarkedFrame-class")`. + +### Specific methods {-} + +Here are methods you probably will have to rewrite. + +- Subsetting the `unmarkedFrame` object: `umf[i, ]`, `umf[, j]` and `umf[i, j]` + - Example with `occu`: [code for `unmarkedFrame` mother class](https://github.com/rbchan/unmarked/blob/master/R/unmarkedFrame.R#L1126-L1235), as used to subset an `unmarkedFrameOccu` object. + - Example with `gdistremoval`: [`umf[i, ]` when `i` is numeric](https://github.com/rbchan/unmarked/blob/master/R/gdistremoval.R#L67-L120), [`umf[i, ]` when `i` is logical](https://github.com/rbchan/unmarked/blob/master/R/gdistremoval.R#L122-L126), [`umf[i, j]`](https://github.com/rbchan/unmarked/blob/master/R/gdistremoval.R#L128-L174) + +### Generic methods {-} + +Here are methods that you should test but probably will not have to rewrite. They are defined in the [`unmarkedFrame.R`](https://github.com/rbchan/unmarked/blob/master/R/unmarkedFrame.R) file, for the `unmarkedFrame` mother class. + +- `coordinates` +- `getY` +- `numSites` +- `numY` +- `obsCovs` +- `obsCovs<-` +- `obsNum` +- `obsToY` +- `obsToY<-` +- `plot` +- `projection` +- `show` +- `siteCovs` +- `siteCovs<-` +- `summary` + +### Methods to access new attributes {-} + +You may also need to add specific methods to allow users to access an attribute you added to your `unmarkedFrame` subclass. + +- For example, `getL` for `unmarkedFrameOccuCOP` + +# Fitting the model + +The fitting function can be declined into three main steps: reading the `unmarkedFrame` object, maximising the likelihood, and formatting the outputs. + +- [Example: the `occu()` function](https://github.com/rbchan/unmarked/blob/master/R/occu.R#L4-L161) +- [Example: the `gdistremoval()` function](https://github.com/rbchan/unmarked/blob/master/R/gdistremoval.R#L257-L472) + +## Inputs of the fitting function + +- R formulas for each submodel (e.g. state, detection). We have found over time it is better to have separate arguments per formula (*e.g.* the way `gdistremoval` does it) instead of a combined formula (*e.g.* the way `occu` does it). +- `data` for the `unmarkedFrame` +- Parameters for `optim`: optimisation algorithm (`method`), initial parameters, and other parameters (`...`) +- `engine` parameter to call one of the implemented likelihood functions +- Other model-specific settings, such as key functions or parameterizations to use + +## Read the `unmarkedFrame` object: write the `getDesign` method + +Most models have their own `getDesign` function, an S4 method. The purpose of this method is to convert the information in the `unmarkedFrame` into a format usable by the likelihood function. + +- It generates **design matrices** from formulas and components of the `unmarkedFrame`. +- It often also has code to handle **missing values**, such as by dropping sites that don't have measurements, or giving the user warnings if covariates are missing, etc. + +Writing the `getDesign` method is frequently the most tedious and difficult part of the work adding a new function. + +- [Example for `occu`](https://github.com/rbchan/unmarked/blob/master/R/getDesign.R#L10-L153), as used for `occu` +- [Example for `gdistremoval`](https://github.com/rbchan/unmarked/blob/master/R/gdistremoval.R#L177-L253) + +## The likelihood function + +- **Inputs**: a vector of parameter values, the response variable, design matrices, and other settings/required data +- **Outputs**: a numeric, the negative log-likelihood +- Should be written so it can be used with the `optim()` function +- Models can have three likelihood functions : coded in R, in C++ and with TMB (*which is technically in C++ too*). Users can specify which likelihood function to use in the `engine` argument of the fitting function. + +### The R likelihood function: easily understandable + +If you are mainly used to coding in R, you should probably start here. If users want to dig deeper into the likelihood of a model, it may be useful for them to be able to read the R code to calculate likelihood, as they may not be familiar with other languages. This likelihood function can be used only for **fixed-effects models**. + +- [Example for `occu`](https://github.com/rbchan/unmarked/blob/master/R/occu.R#L65-L74) +- `gdistremoval` doesn't have an R version of the likelihood function + +### The C++ likelihood function: faster + +The C++ likelihood function is essentially a C++ version of the R likelihood function, also designed exclusively for **fixed-effects models**. This function uses the `RcppArmadillo` R package, [presented here](https://github.com/RcppCore/RcppArmadillo). In the C++ code, you can use functions of the `Armadillo` C++ library, [documented here](https://arma.sourceforge.net/docs.html). + +Your C++ function should be in a `.cpp` file in the `./src/` folder of the package. You do not need to write a header file (`.hpp`), nor do you need to compile the code by yourself as it is all handled by the `RcppArmadillo` package. To test if your C++ function runs and gives you the expected result, you can compile and load the function with ` Rcpp::sourceCpp(./src/nll_yourmodel.cpp)`, and then use it like you would use a R function: `nll_yourmodel(params=params, arg1=arg1)`. + +- [Example for `occu`](https://github.com/rbchan/unmarked/blob/master/src/nll_occu.cpp) +- [Example for `gdistremoval`](https://github.com/rbchan/unmarked/blob/master/src/nll_gdistremoval.cpp) + +### The TMB likelihood function: for random effects + +> #TODO + +- [Example for `gdistremoval`](https://github.com/rbchan/unmarked/blob/master/src/TMB/tmb_gdistremoval.hpp) + +## Organise the output data + +### `unmarkedEstimate` objects per submodel + +Outputs from `optim` should be organized unto `unmarkedEstimate` (S4) objects, with one `unmarkedEstimate` per submodel (*e.g.* state, detection). These objects include the parameter estimates and other information about link functions etc. + +The `unmarkedEstimate` class is defined [here](https://github.com/rbchan/unmarked/blob/master/R/unmarkedEstimate.R#L5-L26) in the `unmarkedEstimate.R` file, and the `unmarkedEstimate` function is defined [here](https://github.com/rbchan/unmarked/blob/master/R/unmarkedEstimate.R#L86-L100), and is used to create new `unmarkedEstimate` objects. You normally will not need to create `unmarkedEstimate` subclass. + +- [Example for the state estimate for `occu`](https://github.com/rbchan/unmarked/blob/master/R/occu.R#L132-L139) +- [Example for the lambda estimate for `gdistremoval`](https://github.com/rbchan/unmarked/blob/master/R/gdistremoval.R#L429-L431C72) + + +### Design the `unmarkedFit` object + +You'll need to create a new `unmarkedFit` subclass for your model. The main component of `unmarkedFit` objects is a list of the `unmarkedEstimates` described above. + +- [Definition of the `unmarkedFit` mother class](https://github.com/rbchan/unmarked/blob/master/R/unmarkedFit.R#L1-L14) +- [Example of the `unmarkedFitOccu` subclass definition](https://github.com/rbchan/unmarked/blob/master/R/unmarkedFit.R#L68-L70) +- [Example of the `unmarkedFitGDR` subclass definition](https://github.com/rbchan/unmarked/blob/master/R/gdistremoval.R#L255) + +After you defined your `unmarkedFit` subclass, you can create the object in your fitting function. + +- [Example of the `unmarkedFitOccu` object creation](https://github.com/rbchan/unmarked/blob/master/R/occu.R#L153-L158) +- [Example of the `unmarkedFitGDR` object creation](https://github.com/rbchan/unmarked/blob/master/R/gdistremoval.R#L466-L470) + +The fitting function return this `unmarkedFit` object. + +## Test the complete fitting function process + +- Simulate some data using your model +- Construct the `unmarkedFrame` +- Provide formulas, `unmarkedFrame`, other options to your draft fitting function +- Process them with `getDesign` +- Pass results from `getDesign` as inputs to your likelihood function +- Optimize the likelihood function +- Check the resulting parameter estimates for accuracy + +# Write the methods associated with the `unmarkedFit` object + +Develop methods specific to your `unmarkedFit` type for operating on the output of your model. Like for the methods associated with an `unmarkedFrame` object [above](#methods-unmarkedFrame), you probably will not have to re-write all of them, but you should test them to see if they work. All the methods associated with `unmarkedFit` objects are listed in the [`unmarkedFit` class documentation](https://rbchan.github.io/unmarked/reference/unmarkedFit-class.html) accessible with `help("unmarkedFit-class")`. + +### Specific methods {-} + +Those are methods you will want to rewrite, adjusting them for your model. + +#### `getP` {-} + +The `getP` method ([defined here](https://github.com/rbchan/unmarked/blob/master/R/unmarkedFit.R#L1475-L1492)) "back-transforms" the detection parameter ($p$ the detection probability or $\lambda$ the detection rate, depending on the model). It returns a matrix of the estimated detection parameters. It is called by several other methods that are useful to extract information from the `unmarkedFit` object. + +- For `occu`, the generic method for `unmarkedFit` objects is called. +- [Example for `gdistremoval`](https://github.com/rbchan/unmarked/blob/master/R/gdistremoval.R#L476-L537) + +#### `simulate` {-} + +The generic `simulate` method ([defined here](https://github.com/rbchan/unmarked/blob/master/R/simulate.R#L62C33-L86)) calls the `simulate_fit` method that depends on the class of the `unmarkedFit` object, which depends on the model. + +- [Example of `simulate_fit` method for `occu`](https://github.com/rbchan/unmarked/blob/master/R/simulate.R#L158-L165) +- [Example of `simulate_fit` method for `gdistremoval`](https://github.com/rbchan/unmarked/blob/master/R/simulate.R#L536-L558) + +The `simulate` method can be used in two ways: + +- to generate datasets from scratch ([see the "Simulating datasets" vignette](https://rbchan.github.io/unmarked/articles/simulate.html)) +- to generate datasets from a fitted model (with `simulate(object = my_unmarkedFit_object)`). + +You should test both ways with your model. + +#### `plot` {-} + +This method plots the results of your model. The generic `plot` method for `unmarkedFit` ([defined here](https://github.com/rbchan/unmarked/blob/master/R/unmarkedFit.R#L1346-L1352)) plot the residuals of the model. + +- For `occu`, the generic method for `unmarkedFit` objects is called. +- [Example for `gdistremoval`](https://github.com/rbchan/unmarked/blob/master/R/gdistremoval.R#L837-L853) + +### Generic methods {-} + +Here are methods that you should test but probably will not have to rewrite. They are defined in the [`unmarkedFit.R`](https://github.com/rbchan/unmarked/blob/master/R/unmarkedFit.R) file, for the `unmarkedFit` mother class. + +- `[` +- `backTransform` +- `coef` +- `confint` +- `fitted` +- `getData` +- `hessian` +- `linearComb` +- `mle` +- `names` +- `nllFun` +- `parboot` +- `nonparboot` +- `predict` +- `profile` +- `residuals` +- `sampleSize` +- `SE` +- `show` +- `summary` +- `update` +- `vcov` +- `logLik` +- `LRT` + +### Methods to access new attributes {-} + +You may also need to add specific methods to allow users to access an attribute you added to your `unmarkedFit` subclass. + +For example, some methods are relevant for some type of models only: + +- `getFP` for occupancy models that account for false positives +- `getB` for occupancy models that account for false positives +- `smoothed` for colonization-extinction models +- `projected` for colonization-extinction models + +# Update the `NAMESPACE` file + +- Add your fitting function to the functions export [here](https://github.com/rbchan/unmarked/blob/master/NAMESPACE#L23-L27) +- Add the new subclasses (`unmarkedFrame`, `unmarkedFit`) to the classes export [here](https://github.com/rbchan/unmarked/blob/master/NAMESPACE#L31-L43) +- Add the function you wrote to create your `unmarkedFrame` object to the functions export [here](https://github.com/rbchan/unmarked/blob/master/NAMESPACE#L58-L64) +- If you wrote new methods, for example to [access new attributes for objects of a subclass](#Methods-to-access-new-attributes), add them to the methods export [here](https://github.com/rbchan/unmarked/blob/master/NAMESPACE#L45-L54) +- If required, export other functions you created that may be called by users of the package + +# Write tests + +Using `testthat` package, you need to write tests for your `unmarkedFrame` function, your fitting function, and methods described above. The tests should be fast, but cover all the key configurations. + +Write your tests in the `./tests/testthat/` folder, creating a R file for your model. If you are using RStudio, you can run the tests of your file easily by clicking on the "Run tests" button. You can run all the tests by clicking on the "Test" button in the Build pane. + +* [Example for `occu`](https://github.com/rbchan/unmarked/blob/master/tests/testthat/test_occu.R) +* [Example for `gdistremoval`](https://github.com/rbchan/unmarked/blob/master/tests/testthat/test_gdistremoval.R) + + +# Write documentation + +You need to write the documentation files for the new classes and functions you added. Documentation `.Rd` files are stored in the `man` folder. [Here](https://r-pkgs.org/man.html) is a documentation on how to format your documentation. + +- The most important, your fitting function! + - [Example for `occu`](https://github.com/rbchan/unmarked/blob/master/man/occu.Rd) + - [Example for `gdistremoval`](https://github.com/rbchan/unmarked/blob/master/man/gdistremoval.Rd) +- Your `unmarkedFrame` constructor function + - [Example for `occu`](https://github.com/rbchan/unmarked/blob/master/man/occu.Rd) + - [Example for `gdistremoval`](https://github.com/rbchan/unmarked/blob/master/man/gdistremoval.Rd) +- Add your fitting function to the reference of all functions to [_pkgdown.yml](https://github.com/rbchan/unmarked/blob/master/_pkgdown.yml) +- Add the specific "type" for the predict methods of your `unmarkedFit` class to [predict-methods.Rd](https://github.com/rbchan/unmarked/blob/master/man/predict-methods.Rd) +- Add your `getP` method for the signature of you `unmarkedFitList` object in [getP-methods.Rd](https://github.com/rbchan/unmarked/blob/master/man/getP-methods.Rd). + +Depending on how much you had to add, you may also need to update existing files: + +- If you added specific methods for your new `unmarkedFrame` class: add them to [unmarkedFrame-class.Rd](https://github.com/rbchan/unmarked/blob/master/man/unmarkedFrame-class.Rd) +- If you added specific methods for your new `unmarkedFit` class: add them to [unmarkedFit-class.Rd](https://github.com/rbchan/unmarked/blob/master/man/unmarkedFit-class.Rd). The same goes for your new `unmarkedFitList` class in [unmarkedFitList-class.Rd](https://github.com/rbchan/unmarked/blob/master/man/unmarkedFitList-class.rd). +- Add any specific function, method or class you created. For example, specific distance-sampling functions are documented in [detFuns.Rd](https://github.com/rbchan/unmarked/blob/master/man/detFuns.Rd). + + +# Add to `unmarked` + +- Send a pull request on Github +- Probably fix a few things +- Merged and done! diff --git a/vignettes/figures/COP-model.png b/vignettes/figures/COP-model.png new file mode 100644 index 00000000..5728dc98 Binary files /dev/null and b/vignettes/figures/COP-model.png differ diff --git a/vignettes/unmarked.bib b/vignettes/unmarked.bib index c6091c6f..82148aa4 100644 --- a/vignettes/unmarked.bib +++ b/vignettes/unmarked.bib @@ -468,3 +468,18 @@ @article{MacKenzie_2004 volume = {9}, pages = {300-318} } + + +@misc{pautrel2023, + title = {Analysing Biodiversity Observation Data Collected in Continuous Time: Should We Use Discrete or Continuous-Time Occupancy Models?}, + shorttitle = {Analysing Biodiversity Observation Data Collected in Continuous Time}, + author = {Pautrel, L{\'e}a and Moulherat, Sylvain and Gimenez, Olivier and Etienne, Marie-Pierre}, + year = {2023}, + month = nov, + pages = {2023.11.17.567350}, + publisher = {{bioRxiv}}, + doi = {10.1101/2023.11.17.567350}, + archiveprefix = {bioRxiv}, + copyright = {{\textcopyright} 2023, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + langid = {english}, +}