diff --git a/DESCRIPTION b/DESCRIPTION index 969c6bbe..00e247a1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,4 +54,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 8101f663..477c1c4a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(rborel) export(rgborel) export(simulate_chain_stats) export(simulate_chains) +export(simulate_scenarios) importFrom(stats,aggregate) importFrom(stats,rgamma) importFrom(stats,rpois) diff --git a/R/simulate.R b/R/simulate.R index 22a2c017..c610e797 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -477,3 +477,135 @@ simulate_chain_stats <- function(n_chains, return(out) } + +#' Calculate transmission chain statistics across a range of scenarios and +#' divide into intervals +#' +#' @details +#' The `offspring_dist` argument is not equivalent to the `offspring_dist` +#' argument in [simulate_chains()] and [simulate_chain_stats()], in those +#' functions `offspring_dist` should be a ``, whereas in +#' `simulate_scenarios()` `offspring_dist` should be a vector of `character` +#' strings which match function names. +#' +#' @inheritParams simulate_chains +#' @param offspring_dist Offspring distribution: a `character` string with the +#' name of the random number generator function. For example those +#' provided by R to generate random numbers from given distributions (e.g., +#' [rpois()] for Poisson). If the `character` string does not match +#' a function then the function will error (see [match.fun()] for details). +#' Examples that can be provided here are `"rpois"` for Poisson distributed +#' offspring, `"rnbinom"` for negative binomial offspring. +#' @param R_seq A `numeric` vector of reproduction number values to simulate +#' the branching process for. +#' @param k_seq A `numeric` vector of dispersion values to simulate the +#' branching process for. Only applicable for `offspring_dist = "rnbinom"`. +#' @inheritParams base::cut +#' @param ... [dots] Extra arguments to be passed to [simulate_chain_stats()]. +#' If argument does not match one from [simulate_chain_stats()] the function +#' will error. +#' @param include_index_case A `logical` determining whether the index case +#' that seeds the outbreak is counted in the transmission chain statistic. +#' +#' @return A ``. +#' @export +#' +#' @examples +#' simulate_scenarios( +#' statistic = c("size", "length"), +#' offspring_dist = c("rpois", "rnbinom"), +#' R_seq = seq(0.1, 0.5, 0.1), +#' k_seq = seq(0.1, 0.5, 0.1), +#' breaks = c(0, 2, 5, 10, 20, 50, 100, Inf) +#' ) +#' +#' # when R > 1 pass a stopping criteria to simulate_chain_stats() +#' simulate_scenarios( +#' statistic = c("size", "length"), +#' offspring_dist = c("rpois", "rnbinom"), +#' R_seq = seq(0.5, 2.0, 0.5), +#' k_seq = seq(0.1, 0.5, 0.1), +#' breaks = c(0, 2, 5, 10, 20, 50, 100, Inf), +#' stat_threshold = 10 +#' ) +simulate_scenarios <- function(statistic, + offspring_dist, + R_seq, + k_seq, + breaks, + ..., + include_index_case = TRUE) { + # this is to ensure the trick to set the stat_threshold works + stopifnot( + "The last element in `breaks` needs to be at least 1 greater than + the second to last element" = + (breaks[length(breaks) - 1] + 1) < breaks[length(breaks)] + ) + checkmate::assert_logical(include_index_case, any.missing = FALSE, len = 1) + scenarios <- expand.grid( + offspring_dist = offspring_dist, + statistic = statistic, + R = R_seq, + k = k_seq, + stringsAsFactors = FALSE + ) + + # remove any variation in k for rpois as parameter not used + if ("rpois" %in% offspring_dist && length(unique(scenarios$k)) > 1) { + drop_rows <- c( + scenarios$offspring_dist == "rpois" & + scenarios$k != unique(scenarios$k)[1] + ) + scenarios <- scenarios[!drop_rows, ] + # convert rpois k to NA to make clear it isn't used + scenarios$k[scenarios$offspring_dist == "rpois"] <- NA_real_ + } + + pb <- utils::txtProgressBar(max = nrow(scenarios), style = 3) + df_list <- vector(mode = "list", length = nrow(scenarios)) + for (i in seq_len(nrow(scenarios))) { + utils::setTxtProgressBar(pb, i) + args <- list( + n_chains = 10, # 1000 + offspring_dist = match.fun(scenarios[i, "offspring_dist"]), + statistic = scenarios[i, "statistic"], + stat_threshold = breaks[length(breaks) - 1] + 1 + ) + if (scenarios[i, "offspring_dist"] == "rpois") { + args$lambda <- scenarios[i, "R"] + } else { + # TODO: work out how to handle cases of functions that can be matched from + # base or in the users global environment but are parameterised + # differently + args_ <- list(mu = scenarios[i, "R"], size = scenarios[i, "k"]) + args <- c(args, args_) + } + epichain_arg_names <- names(formals(epichains::simulate_chain_stats)) + dots <- list(...) + epichain_args <- dots[names(dots) %in% epichain_arg_names] + args <- utils::modifyList(x = args, val = epichain_args) + x <- do.call(epichains::simulate_chain_stats, args = args) + if (!include_index_case) { + x <- x - 1 + } + args <- list( + x = x, + breaks = breaks + ) + # the line below assumes the default method is called which is not reliable + cut_arg_names <- names(formals(cut.default)) + cut_args <- dots[names(dots) %in% cut_arg_names] + args <- utils::modifyList(x = args, val = cut_args) + interval <- do.call(cut, args = args) + prop <- table(interval) / sum(table(interval)) + df_ <- as.data.frame(prop) + df_$R <- scenarios[i, "R"] + df_$k <- scenarios[i, "k"] + df_$offspring_dist <- scenarios[i, "offspring_dist"] + df_$statistic <- scenarios[i, "statistic"] + df_list[[i]] <- df_ + } + close(pb) + df <- do.call(rbind, df_list) + return(df) +} diff --git a/man/simulate_scenarios.Rd b/man/simulate_scenarios.Rd new file mode 100644 index 00000000..5bca2e90 --- /dev/null +++ b/man/simulate_scenarios.Rd @@ -0,0 +1,86 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate.R +\name{simulate_scenarios} +\alias{simulate_scenarios} +\title{Calculate transmission chain statistics across a range of scenarios and +divide into intervals} +\usage{ +simulate_scenarios( + statistic, + offspring_dist, + R_seq, + k_seq, + breaks, + ..., + include_index_case = TRUE +) +} +\arguments{ +\item{statistic}{The chain statistic to track as the +stopping criteria for each chain being simulated when \code{stat_threshold} is not +\code{Inf}; A \verb{}. It can be one of: +\itemize{ +\item "size": the total number of cases produced by a chain before it goes +extinct. +\item "length": the total number of generations reached by a chain before +it goes extinct. +}} + +\item{offspring_dist}{Offspring distribution: a \code{character} string with the +name of the random number generator function. For example those +provided by R to generate random numbers from given distributions (e.g., +\code{\link[=rpois]{rpois()}} for Poisson). If the \code{character} string does not match +a function then the function will error (see \code{\link[=match.fun]{match.fun()}} for details). +Examples that can be provided here are \code{"rpois"} for Poisson distributed +offspring, \code{"rnbinom"} for negative binomial offspring.} + +\item{R_seq}{A \code{numeric} vector of reproduction number values to simulate +the branching process for.} + +\item{k_seq}{A \code{numeric} vector of dispersion values to simulate the +branching process for. Only applicable for \code{offspring_dist = "rnbinom"}.} + +\item{breaks}{either a numeric vector of two or more unique cut points or a + single number (greater than or equal to 2) giving the number of + intervals into which \code{x} is to be cut.} + +\item{...}{\link{dots} Extra arguments to be passed to \code{\link[=simulate_chain_stats]{simulate_chain_stats()}}. +If argument does not match one from \code{\link[=simulate_chain_stats]{simulate_chain_stats()}} the function +will error.} + +\item{include_index_case}{A \code{logical} determining whether the index case +that seeds the outbreak is counted in the transmission chain statistic.} +} +\value{ +A \verb{}. +} +\description{ +Calculate transmission chain statistics across a range of scenarios and +divide into intervals +} +\details{ +The \code{offspring_dist} argument is not equivalent to the \code{offspring_dist} +argument in \code{\link[=simulate_chains]{simulate_chains()}} and \code{\link[=simulate_chain_stats]{simulate_chain_stats()}}, in those +functions \code{offspring_dist} should be a \verb{}, whereas in +\code{simulate_scenarios()} \code{offspring_dist} should be a vector of \code{character} +strings which match function names. +} +\examples{ +simulate_scenarios( + statistic = c("size", "length"), + offspring_dist = c("rpois", "rnbinom"), + R_seq = seq(0.1, 0.5, 0.1), + k_seq = seq(0.1, 0.5, 0.1), + breaks = c(0, 2, 5, 10, 20, 50, 100, Inf) +) + +# when R > 1 pass a stopping criteria to simulate_chain_stats() +simulate_scenarios( + statistic = c("size", "length"), + offspring_dist = c("rpois", "rnbinom"), + R_seq = seq(0.5, 2.0, 0.5), + k_seq = seq(0.1, 0.5, 0.1), + breaks = c(0, 2, 5, 10, 20, 50, 100, Inf), + stat_threshold = 10 +) +} diff --git a/tests/testthat/test-simulate.R b/tests/testthat/test-simulate.R index 72940556..8d69d5cd 100644 --- a/tests/testthat/test-simulate.R +++ b/tests/testthat/test-simulate.R @@ -379,3 +379,42 @@ test_that("simulate_chain_stats is numerically correct", { c(1.00, 3.00) ) }) + +test_that("simulate_scenarios work as expected", { + df <- simulate_scenarios( + offspring_dist = c("rpois", "rnbinom"), + statistic = c("size", "length"), + R_seq = seq(0.1, 0.2, 0.1), + k_seq = seq(0.1, 0.2, 0.1), + breaks = c(0, 20, Inf) + ) + expect_s3_class(df, class = "data.frame") + expect_identical(dim(df), c(24L, 6L)) + expect_identical( + colnames(df), + c("interval", "Freq", "R", "k", "offspring_dist", "statistic") + ) + expect_identical(unique(df$statistic), c("size", "length")) + expect_identical(unique(df$offspring_dist), c("rpois", "rnbinom")) + expect_s3_class(df$interval, class = "factor") +}) + +test_that("simulate_scenarios work as expected using `...`", { + df <- simulate_scenarios( + offspring_dist = c("rpois", "rnbinom"), + statistic = c("size", "length"), + R_seq = seq(0.1, 0.5, 0.1), + k_seq = seq(0.1, 0.5, 0.1), + breaks = c(0, 5, 10, 20, Inf), + stat_threshold = 15 + ) + expect_s3_class(df, class = "data.frame") + expect_identical(dim(df), c(240L, 6L)) + expect_identical( + colnames(df), + c("interval", "Freq", "R", "k", "offspring_dist", "statistic") + ) + expect_identical(unique(df$statistic), c("size", "length")) + expect_identical(unique(df$offspring_dist), c("rpois", "rnbinom")) + expect_s3_class(df$interval, class = "factor") +}) \ No newline at end of file