From 1111500493e3df5d6216d8dffdf75dba306e8292 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Paul=20Li=C3=A9tar?= <pl2113@ic.ac.uk>
Date: Tue, 30 Apr 2024 15:20:12 +0100
Subject: [PATCH] Allow correlations to be extended when resuming. (#293)

When resuming a simulation, it is possible to add new intervention.
The correlation of the new intervention may need configuring, both
relative to itself and to existing interventions.

The correlation parameters work by sampling, at the start of the
simulation, relative weights for each individual and intervention. The
weight are later used to select which individuals are targeted by the a
given intervention. There weights are drawn from a multivariate normal
distribution, whose variance-covariance matrix depends on the configured
correlation.

When adding interventions, new corresponding columns need to be added to
the weights matrix. A fresh mvnorm distribution cannot be used, since it
would override the already drawn weigths for the existing interventions.
The solution instead is to use a conditional mvnorm distribution on the
new columns, using the existing values as the conditions. This yields
new weigths which follow the determined variance-covariance matrix while
preserving the existing values.
---
 R/correlation.R                   | 162 +++++++++++++++++++---
 man/CorrelationParameters.Rd      |  31 ++++-
 tests/testthat/test-correlation.R | 218 ++++++++++++++++++++++++++++++
 tests/testthat/test-resume.R      | 150 +++++++++++++++++++-
 4 files changed, 535 insertions(+), 26 deletions(-)

diff --git a/R/correlation.R b/R/correlation.R
index 2a59fdbc..bb98fb3b 100644
--- a/R/correlation.R
+++ b/R/correlation.R
@@ -9,7 +9,26 @@ INTS <- c(
 )
 
 #' Class: Correlation parameters
-#' Describes an event in the simulation
+#'
+#' This class implements functionality that allows interventions to be
+#' correlated, positively or negatively. By default, interventions are applied
+#' independently and an individual's probability of receiving two interventions
+#' (either two separate interventions or two rounds of the same one) is the
+#' product of the probability of receiving each one.
+#'
+#' By setting a positive correlation between two interventions, we can make it
+#' so that the individuals that receive intervention A are more likely to
+#' receive intervention B. Conversely, a negative correlation will make it such
+#' that individuals that receive intervention A are less likely to also receive
+#' intervention B.
+#'
+#' Broadly speaking, the implementation works by assigning at startup a weight
+#' to each individual and intervention pair, reflecting how likely an individual
+#' is to receive that intervention. Those weights are derived stochastically
+#' from the configured correlation parameters.
+#'
+#' For a detailed breakdown of the calculations, see Protocol S2 of
+#' Griffin et al. (2010).
 CorrelationParameters <- R6::R6Class(
   'CorrelationParameters',
   private = list(
@@ -19,7 +38,40 @@ CorrelationParameters <- R6::R6Class(
     rho_matrix = NULL,
     rho = function() diag(private$rho_matrix),
     .sigma = NULL,
-    .mvnorm = NULL
+    .mvnorm = NULL,
+
+    #' Derive the mvnorm from the configured correlations.
+    #'
+    #' If a \code{restored_mvnorm} is specified, its columns (corresponding to
+    #' restored interventions) will be re-used as is. Missing columns (for new
+    #' interventions) are derived in accordance with the restored data.
+    calculate_mvnorm = function(restored_mvnorm = matrix(ncol=0, nrow=private$population)) {
+      sigma <- self$sigma()
+      V <- outer(sigma, sigma) * private$rho_matrix
+      diag(V) <- sigma ^ 2
+
+      restored_interventions <- match(colnames(restored_mvnorm), private$interventions)
+      new_interventions <- setdiff(seq_along(private$interventions), restored_interventions)
+
+      mvnorm <- matrix(
+        nrow = private$population,
+        ncol = length(private$interventions),
+        dimnames = list(NULL, private$interventions)
+      )
+      mvnorm[,restored_interventions] <- restored_mvnorm
+      if (length(new_interventions) > 0) {
+        mvnorm[,new_interventions] <- rcondmvnorm(
+          private$population,
+          mean = rep(0, length(private$interventions)),
+          sigma = V,
+          given = restored_mvnorm,
+          dependent.ind = new_interventions,
+          given.ind = restored_interventions
+        )
+      }
+
+      mvnorm
+    }
   ),
   public = list(
 
@@ -45,6 +97,8 @@ CorrelationParameters <- R6::R6Class(
     #' @param rho value between 0 and 1 representing the correlation between rounds of
     #' the intervention
     inter_round_rho = function(int, rho) {
+      stopifnot(is.null(private$.sigma) && is.null(private$.mvnorm))
+
       if (!(int %in% private$interventions)) {
         stop(paste0('invalid intervention name: ', int))
       }
@@ -55,8 +109,6 @@ CorrelationParameters <- R6::R6Class(
         rho <- 1 - .Machine$double.eps
       }
       private$rho_matrix[[int, int]] <- rho
-      private$.sigma <- NULL
-      private$.mvnorm <- NULL
     },
 
     #' @description Add rho between interventions
@@ -66,6 +118,8 @@ CorrelationParameters <- R6::R6Class(
     #' @param rho value between -1 and 1 representing the correlation between rounds of
     #' the intervention
     inter_intervention_rho = function(int_1, int_2, rho) {
+      stopifnot(is.null(private$.sigma) && is.null(private$.mvnorm))
+
       if (!(int_1 %in% private$interventions)) {
         stop(paste0('invalid intervention name: ', int_1))
       }
@@ -83,8 +137,6 @@ CorrelationParameters <- R6::R6Class(
       }
       private$rho_matrix[[int_1, int_2]] <- rho
       private$rho_matrix[[int_2, int_1]] <- rho
-      private$.sigma <- NULL
-      private$.mvnorm <- NULL
     },
 
     #' @description Standard deviation of each intervention between rounds
@@ -98,18 +150,9 @@ CorrelationParameters <- R6::R6Class(
     },
 
     #' @description multivariate norm draws for these parameters
-    #' @importFrom MASS mvrnorm
     mvnorm = function() {
       if (is.null(private$.mvnorm)) {
-        sigma <- self$sigma()
-        V <- outer(sigma, sigma) * private$rho_matrix
-        diag(V) <- sigma ^ 2
-        private$.mvnorm <- mvrnorm(
-          private$population,
-          rep(0, length(private$interventions)),
-          V
-        )
-        dimnames(private$.mvnorm)[[2]] <- private$interventions
+        private$.mvnorm <- private$calculate_mvnorm()
       }
       private$.mvnorm
     },
@@ -121,7 +164,7 @@ CorrelationParameters <- R6::R6Class(
       # otherwise we would be drawing a new, probably different, value.
       # The rest of the object is derived deterministically from the parameters
       # and does not need saving.
-      list(mvnorm=private$.mvnorm)
+      list(mvnorm=self$mvnorm())
     },
 
     #' @description Restore the correlation state.
@@ -135,7 +178,8 @@ CorrelationParameters <- R6::R6Class(
     #' @param state a previously saved correlation state, as returned by the
     #' save_state method.
     restore_state = function(timestep, state) {
-      private$.mvnorm <- state$mvnorm
+      stopifnot(is.null(private$.sigma) && is.null(private$.mvnorm))
+      private$.mvnorm <- private$calculate_mvnorm(state$mvnorm)
     }
   )
 )
@@ -205,3 +249,85 @@ sample_intervention <- function(target, intervention, p, correlations) {
   z <- rnorm(length(target))
   u0 + correlations$mvnorm()[target, intervention] + z < 0
 }
+
+#' Simulate from a conditional multivariate normal distribution.
+#'
+#' Given a multidimensional variable Z which follows a multivariate normal
+#' distribution, this function allows one to draw samples for a subset of Z,
+#' while putting conditions on the values of the rest of Z.
+#'
+#' This effectively allows one to grow a MVN distributed matrix (with columns as
+#' the dimensions and a row per sampled vector), adding new dimensions after the
+#' fact. The existing columns are used as the condition set on the distribution,
+#' and the values returned by this function are used as the new dimensions.
+#'
+#' The maths behind the implementation are described in various online sources:
+#' - https://statproofbook.github.io/P/mvn-cond.html
+#' - https://www.stats.ox.ac.uk/~doucet/doucet_simulationconditionalgaussian.pdf
+#' - https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Conditional_distributions
+#'
+#' @param n the number of samples to simulate
+#' @param mean the mean vector of the distribution, including both given and
+#' dependent variables
+#' @param sigma the variance-covariance matrix of the distribution, including
+#' both given and dependent variables
+#' @param given a matrix of given values used as conditions when simulating the
+#' distribution. The matrix should have \code{n} rows, each one specifying a
+#' different set of values for the given variables.
+#' @param dependent.ind the indices within \code{mean} and \code{sigma} of the
+#' variables to simulate.
+#' @param given.ind the indices within \code{mean} and \code{sigma} of the
+#' variables for which conditions are given. The length of this vector must be
+#' equal to the number of columns of the \code{given} matrix. If empty or NULL,
+#' this function is equivalent to simulating from an unconditional multivariate
+#' normal distribution.
+#' @return a matrix with \code{n} rows and \code{length(dependent.ind)} columns,
+#' containing the simulated value.
+#' @importFrom MASS mvrnorm
+#' @noRd
+rcondmvnorm <- function(n, mean, sigma, given, dependent.ind, given.ind) {
+  stopifnot(length(mean) == nrow(sigma))
+  stopifnot(length(mean) == ncol(sigma))
+  stopifnot(nrow(given) == n)
+  stopifnot(ncol(given) == length(given.ind))
+
+  sigma11 <- sigma[dependent.ind, dependent.ind, drop=FALSE]
+  sigma12 <- sigma[dependent.ind, given.ind, drop=FALSE]
+  sigma21 <- sigma[given.ind, dependent.ind, drop=FALSE]
+  sigma22 <- sigma[given.ind, given.ind, drop=FALSE]
+
+  if (all(sigma22 == 0)) {
+    # This covers two cases: there were no given variables and therefore their
+    # variance-covariance matrix is empty, or there were given variables but
+    # they had a variance of zero. The general formula can't support the latter
+    # case since it tries to invert the matrix, but we can safely ignore the
+    # values since they are all equal to their mean and don't influence the
+    # dependent variables.
+    #
+    # In both cases we revert to a standard MVN with no condition.
+    mvrnorm(n, mean[dependent.ind], sigma11)
+  } else {
+    # Available implementations of the conditional multivariate normal assume
+    # every sample is drawn using the same condition on the given variables.
+    # This is not true in our usecase, where every individual has already had an
+    # independent vector of values drawn for the given variable. We are
+    # effectively drawing from as many different distributions as there are
+    # individuals. Thankfully the same conditional covariance matrix can be
+    # used for all the distributions, only the mean vector needs to be
+    # different. We draw the underlying samples from the MVN at mean 0, and
+    # offset that later on a per-individual basis.
+    #
+    # To work over all the vectors directly they need to be as columns, which
+    # is why we start by transposing `given`. R will recycle the `m` matrix and
+    # `mean` vectors across all the columns. The last step is to transpose the
+    # result back into the expected configuration.
+
+    m <- sigma12 %*% solve(sigma22)
+    residual <- t(given) - mean[given.ind]
+    cond_mu <- t(m %*% residual + mean[dependent.ind])
+    cond_sigma <- sigma11 - m %*% sigma21
+
+    samples <- mvrnorm(n, rep(0, length(dependent.ind)), cond_sigma)
+    samples + cond_mu
+  }
+}
diff --git a/man/CorrelationParameters.Rd b/man/CorrelationParameters.Rd
index 8bb43d70..0aebf688 100644
--- a/man/CorrelationParameters.Rd
+++ b/man/CorrelationParameters.Rd
@@ -2,14 +2,37 @@
 % Please edit documentation in R/correlation.R
 \name{CorrelationParameters}
 \alias{CorrelationParameters}
-\title{Class: Correlation parameters
-Describes an event in the simulation}
+\title{Class: Correlation parameters}
 \description{
 Class: Correlation parameters
-Describes an event in the simulation
 
 Class: Correlation parameters
-Describes an event in the simulation
+}
+\details{
+This class implements functionality that allows interventions to be
+correlated, positively or negatively. By default, interventions are applied
+independently and an individual's probability of receiving two interventions
+(either two separate interventions or two rounds of the same one) is the
+product of the probability of receiving each one.
+
+By setting a positive correlation between two interventions, we can make it
+so that the individuals that receive intervention A are more likely to
+receive intervention B. Conversely, a negative correlation will make it such
+that individuals that receive intervention A are less likely to also receive
+intervention B.
+
+Broadly speaking, the implementation works by assigning at startup a weight
+to each individual and intervention pair, reflecting how likely an individual
+is to receive that intervention. Those weights are derived stochastically
+from the configured correlation parameters.
+
+For a detailed breakdown of the calculations, see Protocol S2 of
+Griffin et al. (2010).
+Derive the mvnorm from the configured correlations.
+
+If a \code{restored_mvnorm} is specified, its columns (corresponding to
+restored interventions) will be re-used as is. Missing columns (for new
+interventions) are derived in accordance with the restored data.
 }
 \section{Methods}{
 \subsection{Public methods}{
diff --git a/tests/testthat/test-correlation.R b/tests/testthat/test-correlation.R
index 882831f8..5653daaa 100644
--- a/tests/testthat/test-correlation.R
+++ b/tests/testthat/test-correlation.R
@@ -134,3 +134,221 @@ test_that('-1 correlation between interventions gives sensible samples', {
     pop * (pev_coverage + mda_coverage),
     tolerance=.1)
 })
+
+test_that('correlation between rounds is preserved when adding interventions', {
+  pop <- 1e6
+  target <- seq(pop)
+
+  pev_coverage_1 <- .2
+  pev_coverage_2 <- .4
+
+  initial <- CorrelationParameters$new(pop, c('pev'))
+  initial$inter_round_rho('pev', 1)
+
+  restored <- CorrelationParameters$new(pop, c('pev', 'mda'))
+  restored$inter_round_rho('pev', 1)
+  restored$inter_round_rho('mda', 1)
+  restored$inter_intervention_rho('pev', 'mda', 1)
+  restored$restore_state(0, initial$save_state())
+
+  round_1 <- sample_intervention(target, 'pev', pev_coverage_1, initial)
+  round_2 <- sample_intervention(target, 'pev', pev_coverage_2, restored)
+
+  expect_equal(sum(round_1), pop * pev_coverage_1, tolerance=.1)
+  expect_equal(sum(round_2), pop * pev_coverage_2, tolerance=.1)
+
+  expect_equal(
+    sum(round_1 & round_2),
+    pop * min(pev_coverage_1, pev_coverage_2),
+    tolerance=.1)
+
+  expect_equal(
+    sum(round_1 | round_2),
+    pop * max(pev_coverage_1, pev_coverage_2),
+    tolerance=.1)
+})
+
+test_that('1 correlation between interventions gives sensible samples when restored', {
+  pop <- 1e6
+  target <- seq(pop)
+
+  pev_coverage <- .2
+  mda_coverage <- .2
+
+  initial <- CorrelationParameters$new(pop, c('pev'))
+  initial$inter_round_rho('pev', 1)
+
+  restored <- CorrelationParameters$new(pop, c('pev', 'mda'))
+  restored$inter_round_rho('pev', 1)
+  restored$inter_round_rho('mda', 1)
+  restored$inter_intervention_rho('pev', 'mda', 1)
+  restored$restore_state(0, initial$save_state())
+
+  pev_sample <- sample_intervention(target, 'pev', pev_coverage, initial)
+  mda_sample <- sample_intervention(target, 'mda', mda_coverage, restored)
+
+  expect_equal(sum(pev_sample), pop * pev_coverage, tolerance=.1)
+  expect_equal(sum(mda_sample), pop * mda_coverage, tolerance=.1)
+
+  expect_equal(
+    sum(pev_sample & mda_sample),
+    pop * min(pev_coverage, mda_coverage),
+    tolerance=.1)
+
+  expect_equal(
+    sum(pev_sample | mda_sample),
+    pop * max(pev_coverage, mda_coverage),
+    tolerance=.1)
+})
+
+test_that('0 correlation between interventions gives sensible samples when restored', {
+  pop <- 1e6
+  target <- seq(pop)
+
+  pev_coverage <- .2
+  mda_coverage <- .2
+
+  initial <- CorrelationParameters$new(pop, c('pev'))
+  initial$inter_round_rho('pev', 1)
+
+  restored <- CorrelationParameters$new(pop, c('pev', 'mda'))
+  restored$inter_round_rho('pev', 1)
+  restored$inter_round_rho('mda', 1)
+  restored$inter_intervention_rho('pev', 'mda', 0)
+  restored$restore_state(0, initial$save_state())
+
+  pev_sample <- sample_intervention(target, 'pev', pev_coverage, initial)
+  mda_sample <- sample_intervention(target, 'mda', mda_coverage, restored)
+
+  expect_equal(sum(pev_sample), pop * pev_coverage, tolerance=.1)
+  expect_equal(sum(mda_sample), pop * mda_coverage, tolerance=.1)
+
+  expect_equal(
+    sum(pev_sample & mda_sample),
+    pop * pev_coverage * mda_coverage,
+    tolerance=.1)
+
+  expect_equal(
+    sum(pev_sample | mda_sample),
+    pop * (pev_coverage + mda_coverage - (pev_coverage * mda_coverage)),
+    tolerance=.1)
+})
+
+test_that('-1 correlation between interventions gives sensible samples when restored', {
+  pop <- 1e6
+  target <- seq(pop)
+  pev_coverage <- .2
+  mda_coverage <- .2
+
+  initial <- CorrelationParameters$new(pop, c('pev'))
+  initial$inter_round_rho('pev', 1)
+
+  restored <- CorrelationParameters$new(pop, c('pev', 'mda'))
+  restored$inter_round_rho('pev', 1)
+  restored$inter_round_rho('mda', 1)
+  restored$inter_intervention_rho('pev', 'mda', -1)
+  restored$restore_state(0, initial$save_state())
+
+  pev_sample <- sample_intervention(target, 'pev', pev_coverage, initial)
+  mda_sample <- sample_intervention(target, 'mda', mda_coverage, restored)
+
+  expect_equal(sum(pev_sample), pop * pev_coverage, tolerance=.1)
+  expect_equal(sum(mda_sample), pop * mda_coverage, tolerance=.1)
+
+  expect_equal(sum(pev_sample & mda_sample), 0, tolerance=.1)
+  expect_equal(
+    sum(pev_sample | mda_sample),
+    pop * (pev_coverage + mda_coverage),
+    tolerance=.1)
+})
+
+test_that("rcondmvnorm has correct distribution", {
+  set.seed(123)
+
+  pop <- 1e6
+
+  # These are completely arbitrary values. The statistics from the simulated
+  # sample will get compared back to this.
+  means <- c(-5, 7, 0, 0.3)
+  variance <- c(0.3, 0.6, 0.9, 0.2)
+  correlation <- matrix(0, ncol=4, nrow=4)
+  correlation[1,2] <- correlation[2,1] <- -0.6
+  correlation[1,3] <- correlation[3,1] <- 0.4
+  correlation[1,4] <- correlation[4,1] <- 1
+  correlation[2,3] <- correlation[3,2] <- 0
+  correlation[2,4] <- correlation[4,2] <- 0.1
+  correlation[3,4] <- correlation[4,3] <- 0.5
+
+  covariance <- outer(variance, variance) * correlation
+  diag(covariance) <- variance
+
+  # These are indices of variables that get simulated together.
+  wy.ind <- c(1, 3)
+  xz.ind <- c(2, 4)
+
+  # Simulate from the MVN for a subset of the dimensions. We intentionally pass
+  # in a subset of the mean and covariance matrices, as the rest of the
+  # parameters are not needed and may not be known. `dependent.ind` is relative
+  # to the subsetted mean and covariance, and therefore is set the first two
+  # indices as opposed to `wy.ind`.
+  wy <- rcondmvnorm(pop, means[wy.ind], covariance[wy.ind, wy.ind], given=NULL,
+                    dependent.ind=c(1,2), given.ind=NULL)
+  expect_equal(dim(wy), c(pop, 2))
+  expect_equal(apply(wy, 2, mean), means[wy.ind], tolerance=0.001)
+  expect_equal(cov(wy), covariance[wy.ind, wy.ind], tolerance=0.001)
+
+  # Now simulate some more, but conditional on the existing values.
+  # The call to rcondmvnorm needs all the mean and covariance parameters,
+  # including the ones that have already been simulated.
+  xz <- rcondmvnorm(pop, means, covariance, given=wy,
+                    dependent.ind=xz.ind, given.ind=wy.ind)
+  expect_equal(dim(xz), c(pop, 2))
+  expect_equal(apply(xz, 2, mean), means[xz.ind], tolerance=0.001)
+  expect_equal(cov(xz), covariance[xz.ind, xz.ind], tolerance=0.001)
+
+  # Stitch the variables back together and make sure the covariance across the
+  # separately simulated values match the expected one.
+  values <- cbind(wy[,1], xz[,1], wy[,2], xz[,2])
+  expect_equal(apply(values, 2, mean), means, tolerance=0.001)
+  expect_equal(cov(values), covariance, tolerance=0.001)
+})
+
+# This would usually be considered an uninteresting edge case, since null
+# variance just yields a constant vector. The way we use the function in the
+# simulation though, a null variance is actually the default and most common
+# case, as it is used where the different rounds of an intervention have no
+# correlation.
+#
+# We need to make sure the other variables are still correctly simulated.
+test_that("rcondmvnorm allows null variance of given variables", {
+  set.seed(123)
+
+  pop <- 1e6
+
+  means <- c(-5, 7, 0)
+  variance <- c(0.3, 0, 0.9)
+  correlation <- matrix(0, ncol=3, nrow=3)
+  correlation[1,3] <- 0.4
+  correlation[3,1] <- 0.4
+
+  covariance <- outer(variance, variance) * correlation
+  diag(covariance) <- variance
+
+  # These are indices of variables that get simulated together.
+  y.ind <- 2
+  xz.ind <- c(1,3)
+
+  # Simulate one dimension. Because its variance was null, the result is a
+  # constant vector.
+  y <- rcondmvnorm(pop, means, covariance, given=NULL,
+                   dependent.ind=y.ind, given.ind=NULL)
+  expect_equal(as.vector(y), rep(means[y.ind], pop))
+
+  # Simulate the rest. The original dimension has no influence on the result.
+  xz <- rcondmvnorm(pop, means, covariance, given=y,
+                    dependent.ind=xz.ind, given.ind=y.ind)
+
+  xyz <- cbind(xz[,1], y, xz[,2])
+  expect_equal(apply(xyz, 2, mean), means, tolerance=0.001)
+  expect_equal(cov(xyz), covariance, tolerance=0.001)
+})
diff --git a/tests/testthat/test-resume.R b/tests/testthat/test-resume.R
index 4d4a572d..1fb7ff87 100644
--- a/tests/testthat/test-resume.R
+++ b/tests/testthat/test-resume.R
@@ -11,20 +11,39 @@ test_resume <- function(
   warmup_parameters = parameters,
   warmup_timesteps = 50
   ) {
+
+  # This function is only used with null correlations. However a null
+  # correlation involves sampling random numbers during initialization, which
+  # disrupts the global RNG and affects the reproducibility if the size of the
+  # matrix is not always the same.
+  #
+  # We use a single correlation object, that we initialize eagerly, such that
+  # the simulation can run undisturbed.
+  correlations <- get_correlation_parameters(parameters)
+  correlations$mvnorm()
+
   set.seed(123)
-  uninterrupted_run <- run_simulation(timesteps, parameters=parameters)
+  uninterrupted_run <- run_simulation(
+    timesteps,
+    parameters = parameters,
+    correlations = correlations)
 
   set.seed(123)
-  first_phase <- run_resumable_simulation(warmup_timesteps, warmup_parameters)
+  first_phase <- run_resumable_simulation(
+    warmup_timesteps,
+    warmup_parameters,
+    correlations = correlations)
   second_phase <- run_resumable_simulation(
     timesteps,
     parameters,
-    initial_state=first_phase$state,
-    restore_random_state=TRUE)
+    initial_state = first_phase$state,
+    restore_random_state = TRUE)
 
   expect_equal(nrow(first_phase$data), warmup_timesteps)
   expect_equal(nrow(second_phase$data), timesteps - warmup_timesteps)
 
+  # The order of columns isn't always identical, hence why mapequal needs to be
+  # used.
   expect_mapequal(
     second_phase$data,
     uninterrupted_run[-(1:warmup_timesteps),])
@@ -159,3 +178,126 @@ test_that("Bednets intervention can be added when resuming", {
     parameters = base %>% set_default_bednets(c(25, 100)))
   expect_equal(data[diff(data$n_use_net) > 0, "timestep"], 100)
 })
+
+test_that("Correlations can be set when resuming with new interventions", {
+  set.seed(123)
+
+  # When adding a new intervention with a non-zero correlation, we cannot
+  # ensure that an uninterrupted run matches the stopped-and-resumed simulation
+  # exactly, as the correlation matrix ends up being randomly sampled in a
+  # different order. This stops us from using the `test_resume` used throughout
+  # the rest of this file. Instead we'll only do stopped-and-resumed simulations
+  #' and check its behaviour.
+  #
+  # We first do a warmup phase with only TBV enabled. We then resume that
+  # simulation three times, each time with MDA enabled. Each time we resume the
+  # simulation, we set a different correlation parameter between the TBV and
+  # MDA interventions, with values -1, 0 and 1.
+  #
+  # We look at the output data and confirm that the correlation worked as
+  # expected. For this we need not only how many people got each intervention,
+  # but also how many received both and how many received at least one. This is
+  # not normally exposed, so we add an extra process to render these values.
+  #
+  # For simplicity, for each intervention, we remove any selection process other
+  # than overall coverage, such as age range (set to 0-200years) and drug
+  # efficacy (set to 100%).
+  #
+  # We need a large population to make the statistical assertions succeed. We'll
+  # only simulate 3 timesteps to keep execution time down: one timestep for
+  # warmup during which TBV takes place, one in which MDA takes place and one
+  # final timestep to collect the updated variables.
+  population <- 10000
+  tbv_coverage <- 0.2
+  mda_coverage <- 0.4
+
+  warmup_parameters <- get_parameters(overrides=list(human_population=population)) %>%
+    set_tbv(
+      timesteps=1,
+      coverage=tbv_coverage,
+      ages=0:200)
+
+  drug <- SP_AQ_params
+  drug[1] <- 1. # Override the drug efficacy to 100%
+  parameters <- warmup_parameters %>%
+    set_drugs(list(drug)) %>%
+    set_mda(
+      drug = 1,
+      timesteps = 2,
+      coverages = mda_coverage,
+      min_ages = 0,
+      max_ages = 200*365)
+
+  create_processes_stub <- function(renderer, variables, events, parameters, ...) {
+    p <- function(t) {
+      pop <- parameters$human_population
+      tbv <- variables$tbv_vaccinated$get_index_of(a=-1, b=0)$not()
+      mda <- variables$drug_time$get_index_of(-1)$not()
+
+      renderer$render("total_tbv", tbv$size(), t)
+      renderer$render("total_mda", mda$size(), t)
+      renderer$render("total_tbv_and_mda", tbv$copy()$and(mda)$size(), t)
+      renderer$render("total_tbv_or_mda", tbv$copy()$or(mda)$size(), t)
+    }
+    c(create_processes(renderer, variables, events, parameters, ...), p)
+  }
+
+  mockery::stub(run_resumable_simulation, 'create_processes', create_processes_stub)
+
+  warmup_correlations <- get_correlation_parameters(warmup_parameters)
+  warmup_correlations$inter_round_rho('tbv', 1)
+
+  warmup <- run_resumable_simulation(1,
+    parameters=warmup_parameters,
+    correlations=warmup_correlations)
+
+  zero_correlation <- get_correlation_parameters(parameters)
+  zero_correlation$inter_round_rho('tbv', 1)
+  zero_correlation$inter_round_rho('mda', 1)
+
+  positive_correlation <- get_correlation_parameters(parameters)
+  positive_correlation$inter_round_rho('tbv', 1)
+  positive_correlation$inter_round_rho('mda', 1)
+  positive_correlation$inter_intervention_rho('tbv', 'mda', 1)
+
+  negative_correlation <- get_correlation_parameters(parameters)
+  negative_correlation$inter_round_rho('tbv', 1)
+  negative_correlation$inter_round_rho('mda', 1)
+  negative_correlation$inter_intervention_rho('tbv', 'mda', -1)
+
+  data <- run_resumable_simulation(
+    3,
+    initial_state=warmup$state,
+    parameters=parameters,
+    correlations=zero_correlation)$data %>% tail(1)
+  expect_equal(data$total_tbv, population * tbv_coverage, tolerance = 0.1)
+  expect_equal(data$total_mda, population * mda_coverage, tolerance = 0.1)
+  expect_equal(
+    data$total_tbv_and_mda,
+    population * (tbv_coverage * mda_coverage),
+    tolerance = 0.1)
+  expect_equal(
+    data$total_tbv_or_mda,
+    population * (tbv_coverage + mda_coverage - tbv_coverage * mda_coverage),
+    tolerance = 0.1)
+
+  data <- run_resumable_simulation(
+    3,
+    initial_state=warmup$state,
+    parameters=parameters,
+    correlations=positive_correlation)$data %>% tail(1)
+  expect_equal(data$total_tbv, population * tbv_coverage, tolerance = 0.1)
+  expect_equal(data$total_mda, population * mda_coverage, tolerance = 0.1)
+  expect_equal(data$total_tbv_and_mda, min(data$total_tbv, data$total_mda))
+  expect_equal(data$total_tbv_or_mda, max(data$total_tbv, data$total_mda))
+
+  data <- run_resumable_simulation(
+    3,
+    initial_state=warmup$state,
+    parameters=parameters,
+    correlations=negative_correlation)$data %>% tail(1)
+  expect_equal(data$total_tbv, population * tbv_coverage, tolerance = 0.1)
+  expect_equal(data$total_mda, population * mda_coverage, tolerance = 0.1)
+  expect_equal(data$total_tbv_and_mda, 0)
+  expect_equal(data$total_tbv_or_mda, data$total_tbv + data$total_mda)
+})