From b35e95d3ef16cc24a573f8e28e642a7064d93fd0 Mon Sep 17 00:00:00 2001
From: Jim Thorson <James.T.Thorson@gmail.com>
Date: Sun, 15 Sep 2024 17:41:18 -0700
Subject: [PATCH] attempt at residuals for family="fixed"

---
 NAMESPACE            |   1 +
 R/utility.R          | 109 +++++++++++++++++++++++++++++++++++++++++++
 man/logLik.dsem.Rd   |   2 +-
 man/loo_residuals.Rd |  31 ++++++++++++
 4 files changed, 142 insertions(+), 1 deletion(-)
 create mode 100644 R/utility.R
 create mode 100644 man/loo_residuals.Rd

diff --git a/NAMESPACE b/NAMESPACE
index aedd77b..2a93ed7 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,6 +15,7 @@ export(classify_variables)
 export(dsem)
 export(dsem_control)
 export(list_parameters)
+export(loo_residuals)
 export(make_dfa)
 export(make_dsem_ram)
 export(parse_path)
diff --git a/R/utility.R b/R/utility.R
new file mode 100644
index 0000000..59a3646
--- /dev/null
+++ b/R/utility.R
@@ -0,0 +1,109 @@
+
+#' @title Calculate leave-one-out residuals
+#'
+#' @description Calculates quantile residuals using the predictive distribution from
+#'              a jacknife (i.e., leave-one-out predictive distribution)
+#'
+#' @param object Output from \code{\link{dsem}}
+#' @param ... Not used
+#'
+#' @details
+#' Conditional quantile residuals cannot be calculated when using \code{family = "fixed"}, because
+#' state-variables are fixed at available measurements and hence the conditional distribution is a Dirac
+#' delta function.  One alternative is to use leave-one-out residuals, where we calculate the predictive distribution
+#' for each state value when dropping the associated observation, and then either use that as the
+#' predictive distribution, or sample from that predictive distribution and then calculate
+#' a standard quantile distribution for a given non-fixed family.  This appraoch is followed here.
+#' It is currently only implemented when  all variables follow \code{family = "fixed"}, but
+#' could be generalized to a mix of families upon request.
+#'
+#' @return
+#' A matrix of residuals, with same order and dimensions as argument \code{tsdata}
+#' that was passed to \code{dsem}.
+#'
+#' @export
+loo_residuals <-
+function( object,
+          nsim = 100,
+          ... ){
+
+  # Extract and make object
+  tsdata = object$internal$tsdata
+  parlist = object$obj$env$parList()
+  df = expand.grid( time(tsdata), colnames(tsdata) )
+  df$obs = as.vector(tsdata)
+  df = na.omit(df)
+  df = cbind( df, est=NA, se=NA )
+
+  # Loop through observations
+  for(r in 1:nrow(df) ){
+    ts_r = tsdata
+    ts_r[df[r,1],df[r,2]] = NA
+
+    #
+    control = object$internal$control
+    control$quiet = TRUE
+    control$run_model = FALSE
+
+    # Build inputs
+    #fit_r = dsem( sem = object$internal$sem,
+    #                   tsdata = ts_r,
+    #                   family = object$internal$family,
+    #                   estimate_delta0 = object$internal$estimate_delta0,
+    #                   control = control )
+    fit_r = list( tmb_inputs = object$tmb_inputs )
+    Data = fit_r$tmb_inputs$data
+    fit_r$tmb_inputs$map$x_tj = factor(ifelse( is.na(as.vector(ts_r)) | (Data$familycode_j[col(tsdata)] %in% c(1,2,3,4)), seq_len(prod(dim(ts_r))), NA ))
+
+    # Modify inputs
+    map = fit_r$tmb_inputs$map
+    parameters = fit_r$tmb_inputs$parameters
+    parameters[c("beta_z","lnsigma_j","mu_j","delta0_j")] = parlist[c("beta_z","lnsigma_j","mu_j","delta0_j")]
+    for( v in c("beta_z","lnsigma_j","mu_j","delta0_j") ){
+      map[[v]] = factor( rep(NA,length(as.vector(parameters[[v]]))))
+    }
+
+    # Build object
+    obj = TMB::MakeADFun( data = fit_r$tmb_inputs$data,
+                     parameters = parameters,
+                     random = NULL,
+                     map = map,
+                     profile = control$profile,
+                     DLL="dsem",
+                     silent = TRUE )
+
+    # Rerun and record
+    opt = nlminb( start = obj$par,
+                  objective = obj$fn,
+                  gradient = obj$gr )
+    sdrep = TMB::sdreport( obj )
+    df[r,'est'] = as.list(sdrep, what="Estimate")$x_tj[df[r,1],df[r,2]]
+    df[r,'se'] = as.list(sdrep, what="Std. Error")$x_tj[df[r,1],df[r,2]]
+  }
+
+  # Compute quantile residuals
+  if( all(object$internal$family == "fixed") ){
+    resid_tj = array( NA, dim=dim(tsdata) )
+    resid_tj[cbind(df[,1],df[,2])] = pnorm( q=df$obs, mean=df$est, sd=df$se )
+  }else{
+    # Sample from leave-one-out predictive distribution for states
+    resid_rz = apply( df, MARGIN=1, FUN=\(vec){rnorm(n=nsim, mean=as.numeric(vec['est']), sd=as.numeric(vec['se']))} )
+    resid_tjr = array( NA, dim=c(dim(tsdata),nsim) )
+    # Sample from predictive distribution of data given states
+    for(r in 1:nrow(resid_rz) ){
+      parameters = object$obj$env$parList()
+      parameters$x_tj[which(!is.na(tsdata))] = resid_rz[r,]
+      obj = TMB::MakeADFun( data = object$tmb_inputs$data,
+                       parameters = parameters,
+                       random = NULL,
+                       map = object$tmb_inputs$map,
+                       profile = NULL,
+                       DLL="dsem",
+                       silent = TRUE )
+      resid_tjr[,,r] = obj$simulate()$y_tj
+    }
+    # Calculate quantile residuals
+    resid_tj = apply( resid_tjr > outer(tsdata,rep(1,nsim)), MARGIN=1:2, FUN=mean )
+  }
+  return( resid_tj )
+}
diff --git a/man/logLik.dsem.Rd b/man/logLik.dsem.Rd
index 58a320c..4da24bc 100644
--- a/man/logLik.dsem.Rd
+++ b/man/logLik.dsem.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/dsem.R
 \name{logLik.dsem}
 \alias{logLik.dsem}
-\title{Marglinal log-likelihood}
+\title{Marginal log-likelihood}
 \usage{
 \method{logLik}{dsem}(object, ...)
 }
diff --git a/man/loo_residuals.Rd b/man/loo_residuals.Rd
new file mode 100644
index 0000000..1e22152
--- /dev/null
+++ b/man/loo_residuals.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utility.R
+\name{loo_residuals}
+\alias{loo_residuals}
+\title{Calculate leave-one-out residuals}
+\usage{
+loo_residuals(object, ...)
+}
+\arguments{
+\item{object}{Output from \code{\link{dsem}}}
+
+\item{...}{Not used}
+}
+\value{
+A matrix of residuals, with same order and dimensions as argument \code{tsdata}
+that was passed to \code{dsem}.
+}
+\description{
+Calculates quantile residuals using the predictive distribution from
+             a jacknife (i.e., leave-one-out predictive distribution)
+}
+\details{
+Conditional quantile residuals cannot be calculated when using \code{family = "fixed"}, because
+state-variables are fixed at available measurements and hence the conditional distribution is a Dirac
+delta function.  One alternative is to use leave-one-out residuals, where we calculate the predictive distribution
+for each state value when dropping the associated observation, and then either use that as the
+predictive distribution, or sample from that predictive distribution and then calculate
+a standard quantile distribution for a given non-fixed family.  This appraoch is followed here.
+It is currently only implemented when  all variables follow \code{family = "fixed"}, but
+could be generalized to a mix of families upon request.
+}