From ff2d3d90ff36ad5a5d5f4d40470967c1420f58e9 Mon Sep 17 00:00:00 2001
From: Jim Thorson <50178738+James-Thorson-NOAA@users.noreply.github.com>
Date: Mon, 22 Jul 2024 11:17:21 -0700
Subject: [PATCH] adding option for constant marginal variance ... (#23)

* adding option for constant marginal variance ...

... which is still experimental

* add alternative options for constant variance

* update docs

* further update docs

* adding constant_variance integrated tests

* fix bug in test

* fix Debian CRAN check warning

---------

Co-authored-by: Jim Thorson <James.T.Thorson@gmail.com>
---
 DESCRIPTION                         |  6 +--
 NEWS.md                             |  6 +++
 R/dsem.R                            | 27 ++++++++++-
 man/dsem_control.Rd                 | 16 ++++++
 src/dsem.cpp                        | 61 +++++++++++++++++++++--
 tests/testthat/test-gmrf-versions.R | 75 +++++++++++++++++++++++++++++
 6 files changed, 183 insertions(+), 8 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index eb08a06..4035e31 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: dsem
 Type: Package
 Title: Fit Dynamic Structural Equation Models
-Version: 1.2.1
-Date: 2024-04-02
+Version: 1.3.0
+Date: 2024-07-18
 Authors@R: 
     c(person(given = "James",
              family = "Thorson",
@@ -45,7 +45,7 @@ Description: Applies dynamic structural equation models to time-series data
 	constrained by ecological mechanisms."  
 License: GPL-3
 Encoding: UTF-8
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 VignetteBuilder: knitr
 LazyData: true
 URL: https://james-thorson-noaa.github.io/dsem/
diff --git a/NEWS.md b/NEWS.md
index 9350012..681fc75 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,9 @@
+# dsem 1.3.0
+
+* Adding option to specify constant marginal variance, as alternative to existing
+  calculation which results in a constant conditional variance but a changing marginal 
+  variance along the causal graph
+
 # dsem 1.2.1
 
 * removing `checkDepPackageVersion(dep_pkg="Matrix", this_pkg="TMB")` from `.onLoad()`
diff --git a/R/dsem.R b/R/dsem.R
index b3c9739..f3a197d 100644
--- a/R/dsem.R
+++ b/R/dsem.R
@@ -160,7 +160,13 @@ function( sem,
   }
 
   #
-  Data = list( "options" = ifelse(control$gmrf_parameterization=="separable",0,1),
+  options = c(
+    ifelse(control$gmrf_parameterization=="separable", 0, 1),
+    switch(control$constant_variance, "conditional"=0, "marginal"=1, "diagonal"=2)
+  )
+  
+  #
+  Data = list( "options" = options,
                "RAM" = as.matrix(na.omit(ram[,1:4])),
                "RAMstart" = as.numeric(ram[,5]),
                "familycode_j" = sapply(family, FUN=switch, "fixed"=0, "normal"=1, "gamma"=4 ),
@@ -337,6 +343,20 @@ function( sem,
 #'        a full-rank and IID precision for variables over time, and then projects
 #'        this using the inverse-cholesky of the precision, where this projection
 #'        can be rank-deficient.
+#' @param constant_variance Whether to specify a constant conditional variance 
+#'        \eqn{ \mathbf{\Gamma \Gamma}^t} using the default \code{constant_variance="conditional"}, 
+#'        which results in a changing marginal variance      
+#'        along the specified causal graph when lagged paths are present. Alternatively, the user can
+#'        specify a constant marginal variance using \code{constant_variance="diagonal"}
+#'        or \code{constant_variance="marginal"},
+#'        such that \eqn{ \mathbf{\Gamma}} and \eqn{\mathbf{I-P}} are rescaled to achieve this constraint.  
+#'        All options
+#'        are equivalent when the model includes no lags (only simultaneous effects) and
+#'        no covariances (no two-headed arrows).  \code{"diagonal"} and \code{"marginal"}
+#'        are equivalent when the model includes no covariances. Given some exogenous covariance, 
+#'        \code{constant_variance = "marginal"} preserves the conditional correlation and has
+#'        changing conditional variance, while \code{constant_variance = "marginal"} has changing
+#'        conditional correlation along the causal graph.  
 #' @param quiet Boolean indicating whether to run model printing messages to terminal or not;
 #' @param use_REML Boolean indicating whether to treat non-variance fixed effects as random,
 #'        either to motigate bias in estimated variance parameters or improve efficiency for
@@ -364,7 +384,8 @@ function( nlminb_loops = 1,
           getsd = TRUE,
           quiet = FALSE,
           run_model = TRUE,
-          gmrf_parameterization = c("separable","projection"),
+          gmrf_parameterization = c("separable", "projection"),
+          constant_variance = c("conditional", "marginal", "diagonal"),
           use_REML = TRUE,
           profile = NULL,
           parameters = NULL,
@@ -373,6 +394,7 @@ function( nlminb_loops = 1,
           extra_convergence_checks = TRUE ){
 
   gmrf_parameterization = match.arg(gmrf_parameterization)
+  constant_variance = match.arg(constant_variance)
 
   # Return
   structure( list(
@@ -385,6 +407,7 @@ function( nlminb_loops = 1,
     quiet = quiet,
     run_model = run_model,
     gmrf_parameterization = gmrf_parameterization,
+    constant_variance = constant_variance,
     use_REML = use_REML,
     profile = profile,
     parameters = parameters,
diff --git a/man/dsem_control.Rd b/man/dsem_control.Rd
index a9c36d7..20b2a9b 100644
--- a/man/dsem_control.Rd
+++ b/man/dsem_control.Rd
@@ -14,6 +14,7 @@ dsem_control(
   quiet = FALSE,
   run_model = TRUE,
   gmrf_parameterization = c("separable", "projection"),
+  constant_variance = c("conditional", "marginal", "diagonal"),
   use_REML = TRUE,
   profile = NULL,
   parameters = NULL,
@@ -52,6 +53,21 @@ a full-rank and IID precision for variables over time, and then projects
 this using the inverse-cholesky of the precision, where this projection
 can be rank-deficient.}
 
+\item{constant_variance}{Whether to specify a constant conditional variance 
+\eqn{ \mathbf{\Gamma \Gamma}^t} using the default \code{constant_variance="conditional"}, 
+which results in a changing marginal variance      
+along the specified causal graph when lagged paths are present. Alternatively, the user can
+specify a constant marginal variance using \code{constant_variance="diagonal"}
+or \code{constant_variance="marginal"},
+such that \eqn{ \mathbf{\Gamma}} and \eqn{\mathbf{I-P}} are rescaled to achieve this constraint.  
+All options
+are equivalent when the model includes no lags (only simultaneous effects) and
+no covariances (no two-headed arrows).  \code{"diagonal"} and \code{"marginal"}
+are equivalent when the model includes no covariances. Given some exogenous covariance, 
+\code{constant_variance = "marginal"} preserves the conditional correlation and has
+changing conditional variance, while \code{constant_variance = "marginal"} has changing
+conditional correlation along the causal graph.}
+
 \item{use_REML}{Boolean indicating whether to treat non-variance fixed effects as random,
 either to motigate bias in estimated variance parameters or improve efficiency for
 parameter estimation given correlated fixed and random effects}
diff --git a/src/dsem.cpp b/src/dsem.cpp
index a8d4c77..3b33ba4 100644
--- a/src/dsem.cpp
+++ b/src/dsem.cpp
@@ -16,7 +16,9 @@ Type objective_function<Type>::operator() ()
   using namespace density;
 
   // Data
-  DATA_IVECTOR( options ); // options(0) -> full rank or rank-reduced GMRF
+  DATA_IVECTOR( options ); 
+  // options(0) -> 0: full rank;  1: rank-reduced GMRF
+  // options(1) -> 0: constant conditional variance;  1: constant marginal variance
   //DATA_INTEGER( resimulate_gmrf );
   DATA_IMATRIX( RAM );
   DATA_VECTOR( RAMstart );
@@ -68,11 +70,64 @@ Type objective_function<Type>::operator() ()
     }
     //if(RAM(r,0)==2) Gammainv_kk.coeffRef( RAM(r,1)-1, RAM(r,2)-1 ) = 1 / tmp;
   }
-
-  // solve(I - Rho) %*% x
   Eigen::SparseMatrix<Type> IminusRho_kk = I_kk - Rho_kk;
+  
+  // Compute inverse LU-decomposition
   Eigen::SparseLU< Eigen::SparseMatrix<Type>, Eigen::COLAMDOrdering<int> > inverseIminusRho_kk;
   inverseIminusRho_kk.compute(IminusRho_kk);
+
+  // Rescale I-Rho and Gamma if using constant marginal variance options
+  if( (options(1)==1) || (options(1)==2) ){
+    Eigen::SparseMatrix<Type> invIminusRho_kk;
+    
+    // WORKS:  Based on: https://github.com/kaskr/adcomp/issues/74
+    invIminusRho_kk = inverseIminusRho_kk.solve(I_kk);
+    
+    // Hadamard squared LU-decomposition
+    // See: https://eigen.tuxfamily.org/dox/group__QuickRefPage.html
+    Eigen::SparseMatrix<Type> squared_invIminusRho_kk(n_k, n_k);
+    squared_invIminusRho_kk = invIminusRho_kk.cwiseProduct(invIminusRho_kk);
+    Eigen::SparseLU< Eigen::SparseMatrix<Type>, Eigen::COLAMDOrdering<int> > invsquared_invIminusRho_kk;
+    invsquared_invIminusRho_kk.compute(squared_invIminusRho_kk);
+    
+    if( options(1) == 1 ){
+      // 1-matrix
+      matrix<Type> ones_k1( n_k, 1 );
+      ones_k1.setOnes();
+
+      // Calculate diag( t(Gamma) * Gamma )
+      Eigen::SparseMatrix<Type> squared_Gamma_kk = Gamma_kk.cwiseProduct(Gamma_kk);
+      matrix<Type> sigma2_k1 = squared_Gamma_kk.transpose() * ones_k1;
+
+      // Rowsums
+      matrix<Type> margvar_k1 = invsquared_invIminusRho_kk.solve(sigma2_k1);
+      
+      // Rescale IminusRho_kk and Gamma
+      Eigen::SparseMatrix<Type> invmargsd_kk(n_k, n_k);
+      Eigen::SparseMatrix<Type> invsigma_kk(n_k, n_k);
+      for( int k=0; k<n_k; k++ ){
+        invmargsd_kk.coeffRef(k,k) = pow( margvar_k1(k,0), -0.5 );
+        invsigma_kk.coeffRef(k,k) = pow( sigma2_k1(k,0), -0.5 );
+      }
+      IminusRho_kk = invmargsd_kk * IminusRho_kk;
+      Gamma_kk = invsigma_kk * Gamma_kk;
+      
+      // Recompute inverse LU-decomposition
+      inverseIminusRho_kk.compute(IminusRho_kk);
+    }else{
+      // calculate diag(Gamma)^2
+      matrix<Type> targetvar_k1( n_k, 1 );
+      for( int k=0; k<n_k; k++ ){
+        targetvar_k1(k,0) = Gamma_kk.coeffRef(k,k) * Gamma_kk.coeffRef(k,k);
+      }
+      
+      // Rescale Gamma
+      matrix<Type> margvar_k1 = invsquared_invIminusRho_kk.solve(targetvar_k1);
+      for( int k=0; k<n_k; k++ ){
+        Gamma_kk.coeffRef(k,k) = pow( margvar_k1(k,0), 0.5 );
+      }
+    }
+  }
   
   // Calculate effect of initial condition -- SPARSE version
   vector<Type> delta_k( n_k );
diff --git a/tests/testthat/test-gmrf-versions.R b/tests/testthat/test-gmrf-versions.R
index 3a9f47e..5047280 100644
--- a/tests/testthat/test-gmrf-versions.R
+++ b/tests/testthat/test-gmrf-versions.R
@@ -49,6 +49,81 @@ test_that("dsem gmrf-parameterization options ", {
   expect_equal( as.numeric(fit1$opt$obj), as.numeric(fit2$opt$obj), tolerance=1e-2 )
 })
 
+test_that("dsem constant-variance options ", {
+  data(isle_royale)
+  data = ts( log(isle_royale[,2:3]), start=1959)
+  
+  # Show that constant_variance = "marginal" has constant marginal variance *with* crosscorrelation
+  sem = "
+    # Link, lag, param_name
+    wolves -> wolves, 1, arW
+    moose -> wolves, 1, MtoW
+    wolves -> moose, 1, WtoM
+    moose -> moose, 1, arM
+    moose <-> wolves, 0, NA, 0.2
+  "
+  # initial build of object
+  fit = dsem( sem = sem,
+               tsdata = data,
+               estimate_delta0 = TRUE,
+               control = dsem_control(
+                 nlminb_loops = 1,
+                 newton_loops = 0,
+                 getsd = FALSE,
+                 constant_variance = "marginal") )
+  margvar = array( diag(as.matrix(solve(fit$obj$report()$Q_kk))), dim=dim(data))
+  expect_equal( apply(margvar,MARGIN=2,FUN=sd), c(0,0), tolerance=0.05 )
+
+  # Show that constant_variance = "diagonal" has constant marginal variance *without* crosscorrelation 
+  sem = "
+    # Link, lag, param_name
+    wolves -> wolves, 1, arW
+    moose -> wolves, 1, MtoW
+    wolves -> moose, 1, WtoM
+    moose -> moose, 1, arM
+  "
+  fit = dsem( sem = sem,
+               tsdata = data,
+               estimate_delta0 = TRUE,
+               control = dsem_control(
+                 nlminb_loops = 1,
+                 newton_loops = 0,
+                 getsd = FALSE,
+                 constant_variance = "diagonal") )
+  margvar = array( diag(as.matrix(solve(fit$obj$report()$Q_kk))), dim=dim(data))
+  expect_equal( apply(margvar,MARGIN=2,FUN=sd), c(0,0), tolerance=0.01 )
+
+  # Show that marginal variance increases
+  sem = "
+    # Link, lag, param_name
+    wolves -> wolves, 1, arW
+    moose -> wolves, 1, MtoW
+    wolves -> moose, 1, WtoM
+    moose -> moose, 1, arM
+  "
+  fit0 = dsem( sem = sem,
+               tsdata = data,
+               estimate_delta0 = FALSE,
+               control = dsem_control(
+                 nlminb_loops = 1,
+                 newton_loops = 0,
+                 getsd = FALSE,
+                 constant_variance = "conditional") )
+  parameters = fit0$obj$env$parList()
+    parameters$delta0_j = rep( 0, ncol(data) )
+  fit = dsem( sem = sem,
+               tsdata = data,
+               estimate_delta0 = TRUE,
+               control = dsem_control(
+                 nlminb_loops = 1,
+                 newton_loops = 0,
+                 getsd = FALSE,
+                 constant_variance = "conditional",
+                 parameters = parameters) )
+  margvar = array( diag(as.matrix(solve(fit$obj$report()$Q_kk))), dim=dim(data))
+})
+
+
 #test_that("dfa using dsem is working ", {
 #  data(isle_royale)
 #  data = ts( cbind(log(isle_royale[,2:3]), "F"=NA), start=1959)