From b35e95d3ef16cc24a573f8e28e642a7064d93fd0 Mon Sep 17 00:00:00 2001 From: Jim Thorson Date: Sun, 15 Sep 2024 17:41:18 -0700 Subject: [PATCH] attempt at residuals for family="fixed" --- NAMESPACE | 1 + R/utility.R | 109 +++++++++++++++++++++++++++++++++++++++++++ man/logLik.dsem.Rd | 2 +- man/loo_residuals.Rd | 31 ++++++++++++ 4 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 R/utility.R create mode 100644 man/loo_residuals.Rd diff --git a/NAMESPACE b/NAMESPACE index aedd77b..2a93ed7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(classify_variables) export(dsem) export(dsem_control) export(list_parameters) +export(loo_residuals) export(make_dfa) export(make_dsem_ram) export(parse_path) diff --git a/R/utility.R b/R/utility.R new file mode 100644 index 0000000..59a3646 --- /dev/null +++ b/R/utility.R @@ -0,0 +1,109 @@ + +#' @title Calculate leave-one-out residuals +#' +#' @description Calculates quantile residuals using the predictive distribution from +#' a jacknife (i.e., leave-one-out predictive distribution) +#' +#' @param object Output from \code{\link{dsem}} +#' @param ... Not used +#' +#' @details +#' Conditional quantile residuals cannot be calculated when using \code{family = "fixed"}, because +#' state-variables are fixed at available measurements and hence the conditional distribution is a Dirac +#' delta function. One alternative is to use leave-one-out residuals, where we calculate the predictive distribution +#' for each state value when dropping the associated observation, and then either use that as the +#' predictive distribution, or sample from that predictive distribution and then calculate +#' a standard quantile distribution for a given non-fixed family. This appraoch is followed here. +#' It is currently only implemented when all variables follow \code{family = "fixed"}, but +#' could be generalized to a mix of families upon request. +#' +#' @return +#' A matrix of residuals, with same order and dimensions as argument \code{tsdata} +#' that was passed to \code{dsem}. +#' +#' @export +loo_residuals <- +function( object, + nsim = 100, + ... ){ + + # Extract and make object + tsdata = object$internal$tsdata + parlist = object$obj$env$parList() + df = expand.grid( time(tsdata), colnames(tsdata) ) + df$obs = as.vector(tsdata) + df = na.omit(df) + df = cbind( df, est=NA, se=NA ) + + # Loop through observations + for(r in 1:nrow(df) ){ + ts_r = tsdata + ts_r[df[r,1],df[r,2]] = NA + + # + control = object$internal$control + control$quiet = TRUE + control$run_model = FALSE + + # Build inputs + #fit_r = dsem( sem = object$internal$sem, + # tsdata = ts_r, + # family = object$internal$family, + # estimate_delta0 = object$internal$estimate_delta0, + # control = control ) + fit_r = list( tmb_inputs = object$tmb_inputs ) + Data = fit_r$tmb_inputs$data + fit_r$tmb_inputs$map$x_tj = factor(ifelse( is.na(as.vector(ts_r)) | (Data$familycode_j[col(tsdata)] %in% c(1,2,3,4)), seq_len(prod(dim(ts_r))), NA )) + + # Modify inputs + map = fit_r$tmb_inputs$map + parameters = fit_r$tmb_inputs$parameters + parameters[c("beta_z","lnsigma_j","mu_j","delta0_j")] = parlist[c("beta_z","lnsigma_j","mu_j","delta0_j")] + for( v in c("beta_z","lnsigma_j","mu_j","delta0_j") ){ + map[[v]] = factor( rep(NA,length(as.vector(parameters[[v]])))) + } + + # Build object + obj = TMB::MakeADFun( data = fit_r$tmb_inputs$data, + parameters = parameters, + random = NULL, + map = map, + profile = control$profile, + DLL="dsem", + silent = TRUE ) + + # Rerun and record + opt = nlminb( start = obj$par, + objective = obj$fn, + gradient = obj$gr ) + sdrep = TMB::sdreport( obj ) + df[r,'est'] = as.list(sdrep, what="Estimate")$x_tj[df[r,1],df[r,2]] + df[r,'se'] = as.list(sdrep, what="Std. Error")$x_tj[df[r,1],df[r,2]] + } + + # Compute quantile residuals + if( all(object$internal$family == "fixed") ){ + resid_tj = array( NA, dim=dim(tsdata) ) + resid_tj[cbind(df[,1],df[,2])] = pnorm( q=df$obs, mean=df$est, sd=df$se ) + }else{ + # Sample from leave-one-out predictive distribution for states + resid_rz = apply( df, MARGIN=1, FUN=\(vec){rnorm(n=nsim, mean=as.numeric(vec['est']), sd=as.numeric(vec['se']))} ) + resid_tjr = array( NA, dim=c(dim(tsdata),nsim) ) + # Sample from predictive distribution of data given states + for(r in 1:nrow(resid_rz) ){ + parameters = object$obj$env$parList() + parameters$x_tj[which(!is.na(tsdata))] = resid_rz[r,] + obj = TMB::MakeADFun( data = object$tmb_inputs$data, + parameters = parameters, + random = NULL, + map = object$tmb_inputs$map, + profile = NULL, + DLL="dsem", + silent = TRUE ) + resid_tjr[,,r] = obj$simulate()$y_tj + } + # Calculate quantile residuals + resid_tj = apply( resid_tjr > outer(tsdata,rep(1,nsim)), MARGIN=1:2, FUN=mean ) + } + return( resid_tj ) +} diff --git a/man/logLik.dsem.Rd b/man/logLik.dsem.Rd index 58a320c..4da24bc 100644 --- a/man/logLik.dsem.Rd +++ b/man/logLik.dsem.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/dsem.R \name{logLik.dsem} \alias{logLik.dsem} -\title{Marglinal log-likelihood} +\title{Marginal log-likelihood} \usage{ \method{logLik}{dsem}(object, ...) } diff --git a/man/loo_residuals.Rd b/man/loo_residuals.Rd new file mode 100644 index 0000000..1e22152 --- /dev/null +++ b/man/loo_residuals.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utility.R +\name{loo_residuals} +\alias{loo_residuals} +\title{Calculate leave-one-out residuals} +\usage{ +loo_residuals(object, ...) +} +\arguments{ +\item{object}{Output from \code{\link{dsem}}} + +\item{...}{Not used} +} +\value{ +A matrix of residuals, with same order and dimensions as argument \code{tsdata} +that was passed to \code{dsem}. +} +\description{ +Calculates quantile residuals using the predictive distribution from + a jacknife (i.e., leave-one-out predictive distribution) +} +\details{ +Conditional quantile residuals cannot be calculated when using \code{family = "fixed"}, because +state-variables are fixed at available measurements and hence the conditional distribution is a Dirac +delta function. One alternative is to use leave-one-out residuals, where we calculate the predictive distribution +for each state value when dropping the associated observation, and then either use that as the +predictive distribution, or sample from that predictive distribution and then calculate +a standard quantile distribution for a given non-fixed family. This appraoch is followed here. +It is currently only implemented when all variables follow \code{family = "fixed"}, but +could be generalized to a mix of families upon request. +}