adding cAIC ...

... which only works when using models with measurement errors
James-Thorson-NOAA · Dec 11, 2024 · f2b5d50 · f2b5d50
1 parent 9df66d0
commit f2b5d50
Show file tree

Hide file tree

Showing 6 changed files with 313 additions and 1 deletion.
diff --git a/NAMESPACE b/NAMESPACE
@@ -11,6 +11,7 @@ S3method(vcov,dsem)
 export(TMBAIC)
 export(as_fitted_DAG)
 export(as_sem)
+export(cAIC)
 export(classify_variables)
 export(convert_equations)
 export(dsem)

diff --git a/R/cAIC.R b/R/cAIC.R
@@ -0,0 +1,122 @@
+
+#' @title Calculate conditional AIC
+#'
+#' @description
+#' Calculates the conditional Akaike Information criterion (cAIC).
+#'
+#' @param object Output from \code{\link{dsem}}
+#' @param what Whether to return the cAIC or the effective degrees of freedom
+#'        (EDF) for each group of random effects.
+#'
+#' @details
+#' cAIC is designed to optimize the expected out-of-sample predictive
+#' performance for new data that share the same random effects as the
+#' in-sample (fitted) data, e.g., spatial interpolation.  In this sense,
+#' it should be a fast approximation to optimizing the model structure
+#' based on k-fold crossvalidation.
+#' By contrast, \code{AIC} calculates the
+#' marginal Akaike Information Criterion, which is designed to optimize
+#' expected predictive performance for new data that have new random effects,
+#' e.g., extrapolation, or inference about generative parameters.
+#'
+#' cAIC also calculates as a byproduct the effective degrees of freedom,
+#' i.e., the number of fixed effects that would have an equivalent impact on
+#' model flexibility as a given random effect.
+#'
+#' Both cAIC and EDF are calculated using Eq. 6 of Zheng Cadigan Thorson 2024.
+#'
+#' Note that, for models that include profiled fixed effects, these profiles
+#' are turned off.
+#'
+#' @return
+#' Either the cAIC, or the effective degrees of freedom (EDF) by group
+#' of random effects
+#'
+#' @references
+#'
+#' **Deriving the general approximation to cAIC used here**
+#'
+#' Zheng, N., Cadigan, N., & Thorson, J. T. (2024).
+#' A note on numerical evaluation of conditional Akaike information for
+#' nonlinear mixed-effects models (arXiv:2411.14185). arXiv.
+#' \doi{10.48550/arXiv.2411.14185}
+#'
+#' **The utility of EDF to diagnose hierarchical model behavior**
+#'
+#' Thorson, J. T. (2024). Measuring complexity for hierarchical
+#' models using effective degrees of freedom. Ecology,
+#' 105(7), e4327 \doi{10.1002/ecy.4327}
+#'
+#' @export
+cAIC <-
+function( object,
+          what = c("cAIC","EDF") ){
+
+  what = match.arg(what)
+  data = object$tmb_inputs$data
+
+  # Error checks
+  if(any(is.na(object$tmb_inputs$map$x_tj))){
+    stop("cAIC is not implemented when fixing states at data using family=`fixed`")
+  }
+
+  # Turn on all GMRF parameters
+  map = object$tmb_inputs$map
+  map$x_tj = factor(seq_len(prod(dim(data$y_tj))))
+
+  # Make sure profile = NULL
+  #if( is.null(object$internal$control$profile) ){
+    obj = object$obj
+  #}else{
+    obj = TMB::MakeADFun( data = data,
+                     parameters = object$internal$parhat,
+                     random = object$tmb_inputs$random,
+                     map = map,
+                     profile = NULL,
+                     DLL="dsem",
+                     silent = TRUE )
+  #}
+
+  # Weights = 0 is equivalent to data = NA
+  data$y_tj[] = NA
+  # Make obj_new
+  obj_new = TMB::MakeADFun( data = data,
+                      parameters = object$internal$parhat,
+                      map = map,
+                      random = object$tmb_inputs$random,
+                      DLL = "dsem",
+                      profile = NULL )
+
+  #
+  par = obj$env$parList()
+  parDataMode <- obj$env$last.par
+  indx = obj$env$lrandom()
+  q = sum(indx)
+  p = length(object$opt$par)
+
+  ## use - for Hess because model returns negative loglikelihood;
+  Hess_new = -Matrix::Matrix(obj_new$env$f(parDataMode,order=1,type="ADGrad"),sparse = TRUE)
+  #cov_Psi_inv = -Hess_new[indx,indx]; ## this is the marginal prec mat of REs;
+  Hess_new = Hess_new[indx,indx]
+
+  ## Joint hessian etc
+  Hess = -Matrix::Matrix(obj$env$f(parDataMode,order=1,type="ADGrad"),sparse = TRUE)
+  Hess = Hess[indx,indx]
+  #negEDF = diag(as.matrix(solve(ddlj.r)) %*% ddlr.r)
+  negEDF = Matrix::diag(Matrix::solve(Hess, Hess_new))
+  #
+
+  if(what=="cAIC"){
+    jnll = obj$env$f(parDataMode)
+    cnll = jnll - obj_new$env$f(parDataMode)
+    cAIC = 2*cnll + 2*(p+q) - 2*sum(negEDF)
+    return(cAIC)
+  }
+  if(what=="EDF"){
+    #Sdims = object$tmb_inputs$tmb_data$Sdims
+    #group = rep.int( seq_along(Sdims), times=Sdims )
+    #names(negEDF) = names(obj$env$last.par)[indx]
+    EDF = length(negEDF) - sum(negEDF)
+    return(EDF)
+  }
+}
diff --git a/R/dsem.R b/R/dsem.R
@@ -287,6 +287,7 @@ function( sem,
     out$opt$par = out$opt$par - solve(h, g)
     out$opt$objective = obj$fn(out$opt$par)
   }
+  out$internal$parhat = obj$env$parList()
 
   if( isTRUE(control$extra_convergence_checks) ){
     # Gradient checks

diff --git a/man/cAIC.Rd b/man/cAIC.Rd
diff --git a/man/dsem.Rd b/man/dsem.Rd
diff --git a/scratch/test_cAIC.R b/scratch/test_cAIC.R
@@ -0,0 +1,132 @@
+
+if( FALSE ){
+  setwd( R'(C:\Users\James.Thorson\Desktop\Git\dsem\src\)' )
+  TMB::compile( 'dsem.cpp' )
+  devtools::document( R'(C:\Users\James.Thorson\Desktop\Git\dsem)' )
+  devtools::install_local( R'(C:\Users\James.Thorson\Desktop\Git\dsem)', dep=FALSE, force=TRUE )
+}
+
+#################
+# DFA example
+#################
+
+library(dsem)
+library(MARSS)
+library(ggplot2)
+data( harborSealWA, package="MARSS")
+
+# Define helper function
+grab = \(x,name) x[which(names(x)==name)]
+
+# Define number of factors
+# n_factors >= 3 doesn't seem to converge using DSEM or MARSS without penalties
+n_factors = 2
+
+# Add factors to data
+tsdata = harborSealWA[,c("SJI","EBays","SJF","PSnd","HC")]
+newcols = array( NA,
+                 dim = c(nrow(tsdata),n_factors),
+                 dimnames = list(NULL,paste0("F",seq_len(n_factors))) )
+tsdata = ts( cbind(tsdata, newcols), start=1978)
+
+# Scale and center (matches with MARSS does when fitting a DFA)
+tsdata = scale( tsdata, center=TRUE, scale=TRUE )
+
+#
+sem = make_dfa( variables = c("SJI","EBays","SJF","PSnd","HC"),
+                n_factors = n_factors )
+
+# Initial fit
+mydsem0 = dsem( tsdata = tsdata,
+               sem = sem,
+               family = c( rep("normal",5), rep("fixed",n_factors) ),
+               estimate_delta0 = TRUE,
+               control = dsem_control( quiet = TRUE,
+                                       run_model = FALSE,
+                                       gmrf_parameterization = "projection" ) )
+
+# fix all measurement errors at diagonal and equal
+map = mydsem0$tmb_inputs$map
+map$lnsigma_j = factor( rep(1,ncol(tsdata)) )
+
+# Fix factors to have initial value, and variables to not
+map$delta0_j = factor( c(rep(NA,ncol(harborSealWA)-1), 1:n_factors) )
+
+# Fix variables to have no stationary mean except what's predicted by initial value
+map$mu_j = factor( rep(NA,ncol(tsdata)) )
+
+# profile "delta0_j" to match MARSS (which treats initial condition as unpenalized random effect)
+mydfa = dsem( tsdata = tsdata,
+               sem = sem,
+               family = c( rep("normal",5), rep("fixed",n_factors) ),
+               estimate_delta0 = TRUE,
+               control = dsem_control( quiet = TRUE,
+                                       map = map,
+                                       use_REML = TRUE,
+                                       #profile = "delta0_j",
+                                       gmrf_parameterization = "projection" ) )
+
+cAIC(mydfa)
+cAIC(mydfa, what="EDF")
+
+###############
+# Klein example
+###############
+
+# Define model
+sem = "
+  # Link, lag, param_name
+  cprofits -> consumption, 0, a1
+  cprofits -> consumption, 1, a2
+  pwage -> consumption, 0, a3
+  gwage -> consumption, 0, a3
+  cprofits -> invest, 0, b1
+  cprofits -> invest, 1, b2
+  capital -> invest, 0, b3
+  gnp -> pwage, 0, c2
+  gnp -> pwage, 1, c3
+  time -> pwage, 0, c1
+"
+
+# Load data
+data(KleinI, package="AER")
+TS = ts(data.frame(KleinI, "time"=time(KleinI) - 1931))
+tsdata = TS[,c("time","gnp","pwage","cprofits",'consumption',
+               "gwage","invest","capital")]
+
+#
+n_missing = 20
+df_missing = expand.grid( seq_len(nrow(tsdata)), seq_len(ncol(tsdata)) )
+df_missing = df_missing[ sample(seq_len(nrow(df_missing)), size=n_missing, replace=FALSE), ]
+tsdata[ as.matrix(df_missing) ] = NA
+
+# Fit model
+fit = dsem( sem=sem,
+            tsdata = tsdata,
+            estimate_delta0 = TRUE,
+            control = dsem_control(quiet=TRUE,
+                                   getsd = FALSE,
+                                   extra_convergence_checks = FALSE,
+                                   newton_loops = 0) )
+
+cAIC(fit)
+cAIC(fit, what="EDF")
+
+###################
+# Linear model
+###################
+
+# simulate normal distribution
+x = rnorm(100)
+y = 1 + 0.5 * x + rnorm(100)
+data = data.frame(x=x, y=y)
+
+sem = "
+  x -> y, 0, beta
+"
+
+# Fit as DSEM
+fit = dsem( sem = sem,
+            tsdata = ts(data),
+            #family = c("fixed","normal"),
+            control = dsem_control(quiet=TRUE) ) # gmrf_parameterization = "projection",