From e1b6891b63ccbba0b48bc371ffd125aa32e504c1 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 23 Aug 2024 11:24:17 +0100 Subject: [PATCH 1/8] add simulate_scenarios function --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/simulate.R | 104 ++++++++++++++++++++++++++++++++++++++ man/simulate_scenarios.Rd | 75 +++++++++++++++++++++++++++ 4 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 man/simulate_scenarios.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 969c6bbe..00e247a1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,4 +54,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 8101f663..477c1c4a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(rborel) export(rgborel) export(simulate_chain_stats) export(simulate_chains) +export(simulate_scenarios) importFrom(stats,aggregate) importFrom(stats,rgamma) importFrom(stats,rpois) diff --git a/R/simulate.R b/R/simulate.R index 22a2c017..a4100d51 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -477,3 +477,107 @@ simulate_chain_stats <- function(n_chains, return(out) } + +#' Calculate transmission chain statistics across a range of scenarios and +#' divide into intervals +#' +#' @details +#' The `offspring_dist` argument is not equivalent to the `offspring_dist` +#' argument in [simulate_chains()] and [simulate_chain_stats()], in those +#' functions `offspring_dist` should be a ``, whereas in +#' `simulate_scenarios()` `offspring_dist` should be a vector of `character` +#' strings which match function names. +#' +#' @inheritParams simulate_chains +#' @param offspring_dist Offspring distribution: a `character` string with the +#' name of the random number generator function. For example those +#' provided by R to generate random numbers from given distributions (e.g., +#' [rpois()] for Poisson). If the `character` string does not match +#' a function then the function will error (see [match.fun()] for details). +#' Examples that can be provided here are `"rpois"` for Poisson distributed +#' offspring, `"rnbinom"` for negative binomial offspring. +#' @param R_seq A `numeric` vector of reproduction number values to simulate +#' the branching process for. +#' @param k_seq A `numeric` vector of dispersion values to simulate the +#' branching process for. Only applicable for `offspring_dist = "rnbinom"`. +#' @inheritParams base::cut +#' @param ... [dots] Extra arguments to be passed to [simulate_chain_stats()]. +#' If argument does not match one from [simulate_chain_stats()] the function +#' will error. +#' +#' @return A ``. +#' @export +#' +#' @examples +#' simulate_scenarios( +#' statistic = c("size", "length"), +#' offspring_dist = c("rpois", "rnbinom"), +#' R_seq = seq(0.1, 0.5, 0.1), +#' k_seq = seq(0.1, 0.5, 0.1), +#' breaks = c(0, 2, 5, 10, 20, 50, 100, Inf) +#' ) +#' +#' # when R > 1 pass a stopping criteria to simulate_chain_stats() +#' simulate_scenarios( +#' statistic = c("size", "length"), +#' offspring_dist = c("rpois", "rnbinom"), +#' R_seq = seq(0.5, 2.0, 0.5), +#' k_seq = seq(0.1, 0.5, 0.1), +#' breaks = c(0, 2, 5, 10, 20, 50, 100, Inf), +#' stat_threshold = 10 +#' ) +simulate_scenarios <- function(statistic, + offspring_dist, + R_seq, + k_seq, + breaks, + ...) { + scenarios <- expand.grid( + offspring_dist = offspring_dist, + statistic = statistic, + R = R_seq, + k = k_seq, + stringsAsFactors = FALSE + ) + + # remove any variation in k for rpois as parameter not used + if ("rpois" %in% offspring_dist && length(unique(scenarios$k)) > 1) { + drop_rows <- c( + scenarios$offspring_dist == "rpois" & + scenarios$k != unique(scenarios$k)[1] + ) + scenarios <- scenarios[!drop_rows, ] + # convert rpois k to NA to make clear it isn't used + scenarios$k[scenarios$offspring_dist == "rpois"] <- NA_real_ + } + + df_list <- vector(mode = "list", length = nrow(scenarios)) + for (i in seq_len(nrow(scenarios))) { + args <- list( + n_chains = 1000, + offspring_dist = match.fun(scenarios[i, "offspring_dist"]), + statistic = scenarios[i, "statistic"] + ) + if (scenarios[i, "offspring_dist"] == "rpois") { + args$lambda <- scenarios[i, "R"] + } else { + # TODO: work out how to handle cases of functions that can be matched from + # base or in the users global environment but are parameterised + # differently + args_ <- list(mu = scenarios[i, "R"], size = scenarios[i, "k"]) + args <- c(args, args_) + } + args <- utils::modifyList(x = args, val = list(...)) + x <- do.call(epichains::simulate_chain_stats, args = args) + interval <- cut(x, breaks = breaks) + prop <- table(interval) / sum(table(interval)) + df_ <- as.data.frame(prop) + df_$R <- scenarios[i, "R"] + df_$k <- scenarios[i, "k"] + df_$offspring_dist <- scenarios[i, "offspring_dist"] + df_$statistic <- scenarios[i, "statistic"] + df_list[[i]] <- df_ + } + df <- do.call(rbind, df_list) + return(df) +} diff --git a/man/simulate_scenarios.Rd b/man/simulate_scenarios.Rd new file mode 100644 index 00000000..3f99b0df --- /dev/null +++ b/man/simulate_scenarios.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate.R +\name{simulate_scenarios} +\alias{simulate_scenarios} +\title{Calculate transmission chain statistics across a range of scenarios and +divide into intervals} +\usage{ +simulate_scenarios(statistic, offspring_dist, R_seq, k_seq, breaks, ...) +} +\arguments{ +\item{statistic}{The chain statistic to track as the +stopping criteria for each chain being simulated when \code{stat_threshold} is not +\code{Inf}; A \verb{}. It can be one of: +\itemize{ +\item "size": the total number of cases produced by a chain before it goes +extinct. +\item "length": the total number of generations reached by a chain before +it goes extinct. +}} + +\item{offspring_dist}{Offspring distribution: a \code{character} string with the +name of the random number generator function. For example those +provided by R to generate random numbers from given distributions (e.g., +\code{\link[=rpois]{rpois()}} for Poisson). If the \code{character} string does not match +a function then the function will error (see \code{\link[=match.fun]{match.fun()}} for details). +Examples that can be provided here are \code{"rpois"} for Poisson distributed +offspring, \code{"rnbinom"} for negative binomial offspring.} + +\item{R_seq}{A \code{numeric} vector of reproduction number values to simulate +the branching process for.} + +\item{k_seq}{A \code{numeric} vector of dispersion values to simulate the +branching process for. Only applicable for \code{offspring_dist = "rnbinom"}.} + +\item{breaks}{either a numeric vector of two or more unique cut points or a + single number (greater than or equal to 2) giving the number of + intervals into which \code{x} is to be cut.} + +\item{...}{\link{dots} Extra arguments to be passed to \code{\link[=simulate_chain_stats]{simulate_chain_stats()}}. +If argument does not match one from \code{\link[=simulate_chain_stats]{simulate_chain_stats()}} the function +will error.} +} +\value{ +A \verb{}. +} +\description{ +Calculate transmission chain statistics across a range of scenarios and +divide into intervals +} +\details{ +The \code{offspring_dist} argument is not equivalent to the \code{offspring_dist} +argument in \code{\link[=simulate_chains]{simulate_chains()}} and \code{\link[=simulate_chain_stats]{simulate_chain_stats()}}, in those +functions \code{offspring_dist} should be a \verb{}, whereas in +\code{simulate_scenarios()} \code{offspring_dist} should be a vector of \code{character} +strings which match function names. +} +\examples{ +simulate_scenarios( + statistic = c("size", "length"), + offspring_dist = c("rpois", "rnbinom"), + R_seq = seq(0.1, 0.5, 0.1), + k_seq = seq(0.1, 0.5, 0.1), + breaks = c(0, 2, 5, 10, 20, 50, 100, Inf) +) + +# when R > 1 pass a stopping criteria to simulate_chain_stats() +simulate_scenarios( + statistic = c("size", "length"), + offspring_dist = c("rpois", "rnbinom"), + R_seq = seq(0.5, 2.0, 0.5), + k_seq = seq(0.1, 0.5, 0.1), + breaks = c(0, 2, 5, 10, 20, 50, 100, Inf), + stat_threshold = 10 +) +} From 94674e6e6e84d2ee1e4742f800f34b2970024d61 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 23 Aug 2024 11:24:28 +0100 Subject: [PATCH 2/8] add unit tests for simulate_scenarios --- tests/testthat/test-simulate.R | 39 ++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/testthat/test-simulate.R b/tests/testthat/test-simulate.R index 72940556..c2ce483a 100644 --- a/tests/testthat/test-simulate.R +++ b/tests/testthat/test-simulate.R @@ -379,3 +379,42 @@ test_that("simulate_chain_stats is numerically correct", { c(1.00, 3.00) ) }) + +test_that("simulate_scenarios work as expected", { + df <- simulate_scenarios( + offspring_dist = c("rpois", "rnbinom"), + statistic = c("size", "length"), + R_seq = seq(0.1, 0.5, 0.1), + k_seq = seq(0.1, 0.5, 0.1), + breaks = c(0, 5, 10, 20, Inf) + ) + expect_s3_class(df, class = "data.frame") + expect_identical(dim(df), c(240L, 6L)) + expect_identical( + colnames(df), + c("interval", "Freq", "R", "k", "offspring_dist", "statistic") + ) + expect_identical(unique(df$statistic), c("size", "length")) + expect_identical(unique(df$offspring_dist), c("rpois", "rnbinom")) + expect_s3_class(df$interval, class = "factor") +}) + +test_that("simulate_scenarios work as expected using `...`", { + df <- simulate_scenarios( + offspring_dist = c("rpois", "rnbinom"), + statistic = c("size", "length"), + R_seq = seq(0.1, 0.5, 0.1), + k_seq = seq(0.1, 0.5, 0.1), + breaks = c(0, 5, 10, 20, Inf), + stat_threshold = 15 + ) + expect_s3_class(df, class = "data.frame") + expect_identical(dim(df), c(240L, 6L)) + expect_identical( + colnames(df), + c("interval", "Freq", "R", "k", "offspring_dist", "statistic") + ) + expect_identical(unique(df$statistic), c("size", "length")) + expect_identical(unique(df$offspring_dist), c("rpois", "rnbinom")) + expect_s3_class(df$interval, class = "factor") +}) \ No newline at end of file From 0e87e5e19fa510bd2bd46b074235f7a24dd2f784 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 6 Sep 2024 15:43:29 +0100 Subject: [PATCH 3/8] Reduce parameter space in simulate_scenarios() tests Co-authored-by: James Azam --- tests/testthat/test-simulate.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-simulate.R b/tests/testthat/test-simulate.R index c2ce483a..dd4e8a55 100644 --- a/tests/testthat/test-simulate.R +++ b/tests/testthat/test-simulate.R @@ -384,9 +384,9 @@ test_that("simulate_scenarios work as expected", { df <- simulate_scenarios( offspring_dist = c("rpois", "rnbinom"), statistic = c("size", "length"), - R_seq = seq(0.1, 0.5, 0.1), - k_seq = seq(0.1, 0.5, 0.1), - breaks = c(0, 5, 10, 20, Inf) + R_seq = seq(0.1, 0.2, 0.1), + k_seq = seq(0.1, 0.2, 0.1), + breaks = c(0, 20, Inf) ) expect_s3_class(df, class = "data.frame") expect_identical(dim(df), c(240L, 6L)) From 6237c9bc5ace35cdeb12268b544739dbeead4aa2 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 6 Sep 2024 17:07:27 +0100 Subject: [PATCH 4/8] add include_index_case argument in simulate_scenarios --- R/simulate.R | 10 +++++++++- man/simulate_scenarios.Rd | 13 ++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/R/simulate.R b/R/simulate.R index a4100d51..87919168 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -504,6 +504,8 @@ simulate_chain_stats <- function(n_chains, #' @param ... [dots] Extra arguments to be passed to [simulate_chain_stats()]. #' If argument does not match one from [simulate_chain_stats()] the function #' will error. +#' @param include_index_case A `logical` determining whether the index case +#' that seeds the outbreak is counted in the transmission chain statistic. #' #' @return A ``. #' @export @@ -531,7 +533,10 @@ simulate_scenarios <- function(statistic, R_seq, k_seq, breaks, - ...) { + ..., + include_index_case = TRUE) { + browser() + checkmate::assert_logical(include_index_case, any.missing = FALSE, len = 1) scenarios <- expand.grid( offspring_dist = offspring_dist, statistic = statistic, @@ -569,6 +574,9 @@ simulate_scenarios <- function(statistic, } args <- utils::modifyList(x = args, val = list(...)) x <- do.call(epichains::simulate_chain_stats, args = args) + if (!include_index_case) { + x <- x - 1 + } interval <- cut(x, breaks = breaks) prop <- table(interval) / sum(table(interval)) df_ <- as.data.frame(prop) diff --git a/man/simulate_scenarios.Rd b/man/simulate_scenarios.Rd index 3f99b0df..5bca2e90 100644 --- a/man/simulate_scenarios.Rd +++ b/man/simulate_scenarios.Rd @@ -5,7 +5,15 @@ \title{Calculate transmission chain statistics across a range of scenarios and divide into intervals} \usage{ -simulate_scenarios(statistic, offspring_dist, R_seq, k_seq, breaks, ...) +simulate_scenarios( + statistic, + offspring_dist, + R_seq, + k_seq, + breaks, + ..., + include_index_case = TRUE +) } \arguments{ \item{statistic}{The chain statistic to track as the @@ -39,6 +47,9 @@ branching process for. Only applicable for \code{offspring_dist = "rnbinom"}.} \item{...}{\link{dots} Extra arguments to be passed to \code{\link[=simulate_chain_stats]{simulate_chain_stats()}}. If argument does not match one from \code{\link[=simulate_chain_stats]{simulate_chain_stats()}} the function will error.} + +\item{include_index_case}{A \code{logical} determining whether the index case +that seeds the outbreak is counted in the transmission chain statistic.} } \value{ A \verb{}. From ce7c1c9e378f298f09e0217a1637c380773f10ed Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 6 Sep 2024 17:11:04 +0100 Subject: [PATCH 5/8] rm browser() --- R/simulate.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/simulate.R b/R/simulate.R index 87919168..267d8bd0 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -535,7 +535,6 @@ simulate_scenarios <- function(statistic, breaks, ..., include_index_case = TRUE) { - browser() checkmate::assert_logical(include_index_case, any.missing = FALSE, len = 1) scenarios <- expand.grid( offspring_dist = offspring_dist, From 4811ac34fa05733b3f5bc370da892173d83e2e6a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Mon, 9 Sep 2024 11:31:39 +0100 Subject: [PATCH 6/8] allow arguments to be passed to cut() via ... in simulate_scenarios --- R/simulate.R | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/R/simulate.R b/R/simulate.R index 267d8bd0..40c712fd 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -571,12 +571,23 @@ simulate_scenarios <- function(statistic, args_ <- list(mu = scenarios[i, "R"], size = scenarios[i, "k"]) args <- c(args, args_) } - args <- utils::modifyList(x = args, val = list(...)) + epichain_arg_names <- names(formals(epichains::simulate_chain_stats)) + dots <- list(...) + epichain_args <- dots[names(dots) %in% epichain_arg_names] + args <- utils::modifyList(x = args, val = epichain_args) x <- do.call(epichains::simulate_chain_stats, args = args) if (!include_index_case) { x <- x - 1 } - interval <- cut(x, breaks = breaks) + args <- list( + x = x, + breaks = breaks + ) + # the line below assumes the default method is called which is not reliable + cut_arg_names <- names(formals(cut.default)) + cut_args <- dots[names(dots) %in% cut_arg_names] + args <- utils::modifyList(x = args, val = cut_args) + interval <- do.call(cut, args = args) prop <- table(interval) / sum(table(interval)) df_ <- as.data.frame(prop) df_$R <- scenarios[i, "R"] From d5d1d380a18d262f15cf226c11918ae55694bdfd Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Mon, 9 Sep 2024 11:31:49 +0100 Subject: [PATCH 7/8] update test expectation --- tests/testthat/test-simulate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-simulate.R b/tests/testthat/test-simulate.R index dd4e8a55..8d69d5cd 100644 --- a/tests/testthat/test-simulate.R +++ b/tests/testthat/test-simulate.R @@ -389,7 +389,7 @@ test_that("simulate_scenarios work as expected", { breaks = c(0, 20, Inf) ) expect_s3_class(df, class = "data.frame") - expect_identical(dim(df), c(240L, 6L)) + expect_identical(dim(df), c(24L, 6L)) expect_identical( colnames(df), c("interval", "Freq", "R", "k", "offspring_dist", "statistic") From 9348ed782429afc98e4d1aee5c93753931a6bfff Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Mon, 9 Sep 2024 14:43:10 +0100 Subject: [PATCH 8/8] calculate and set stat_threshold and add progress bar to simulate_scenarios --- R/simulate.R | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/R/simulate.R b/R/simulate.R index 40c712fd..c610e797 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -535,6 +535,12 @@ simulate_scenarios <- function(statistic, breaks, ..., include_index_case = TRUE) { + # this is to ensure the trick to set the stat_threshold works + stopifnot( + "The last element in `breaks` needs to be at least 1 greater than + the second to last element" = + (breaks[length(breaks) - 1] + 1) < breaks[length(breaks)] + ) checkmate::assert_logical(include_index_case, any.missing = FALSE, len = 1) scenarios <- expand.grid( offspring_dist = offspring_dist, @@ -555,12 +561,15 @@ simulate_scenarios <- function(statistic, scenarios$k[scenarios$offspring_dist == "rpois"] <- NA_real_ } + pb <- utils::txtProgressBar(max = nrow(scenarios), style = 3) df_list <- vector(mode = "list", length = nrow(scenarios)) for (i in seq_len(nrow(scenarios))) { + utils::setTxtProgressBar(pb, i) args <- list( - n_chains = 1000, + n_chains = 10, # 1000 offspring_dist = match.fun(scenarios[i, "offspring_dist"]), - statistic = scenarios[i, "statistic"] + statistic = scenarios[i, "statistic"], + stat_threshold = breaks[length(breaks) - 1] + 1 ) if (scenarios[i, "offspring_dist"] == "rpois") { args$lambda <- scenarios[i, "R"] @@ -596,6 +605,7 @@ simulate_scenarios <- function(statistic, df_$statistic <- scenarios[i, "statistic"] df_list[[i]] <- df_ } + close(pb) df <- do.call(rbind, df_list) return(df) }