diff --git a/R/checkers.R b/R/checkers.R index 749e66cd..5de2eaa1 100644 --- a/R/checkers.R +++ b/R/checkers.R @@ -7,6 +7,19 @@ #' invalid. #' @keywords internal .check_rate_df <- function(x, age_range) { + + # get vector of all age groups + age_groups <- apply(x, 1, function(y) y[1]:y[2]) + + # remove last integer from bracket due to exclusive upper bound + # oldest age bracket has inclusive upper bound + age_groups_ <- lapply( + age_groups, + FUN = function(x) x[-length(x)] + ) + age_groups_[length(age_groups)] <- age_groups[length(age_groups)] + age_vec <- unlist(age_groups_) + stopifnot( "column names should be 'min_age', 'max_age' & 'rate'" = c("min_age", "max_age", "rate") %in% colnames(x), @@ -15,7 +28,11 @@ "maximum age of the highest age group should match upper age range" = age_range[2] == max(x$max_age), "rate should be between 0 and 1" = - min(x$rate) >= 0 & max(x$rate) <= 1 + min(x$rate) >= 0 & max(x$rate) <= 1, + "age groups should be non-overlapping" = + !anyDuplicated(age_vec) > 0, + "age groups should be contiguous" = + all(age_range[1]:age_range[2] %in% unique(age_vec)) ) rownames(x) <- paste0( diff --git a/tests/testthat/test-checkers.R b/tests/testthat/test-checkers.R index b634d522..3fc89597 100644 --- a/tests/testthat/test-checkers.R +++ b/tests/testthat/test-checkers.R @@ -51,4 +51,24 @@ test_that(".check_rate_df fails as expected", { .check_rate_df(age_dep_hosp_rate, age_range = c(1, 90)), regexp = "rate should be between 0 and 1" ) + + age_dep_hosp_rate <- data.frame( + min_age = c(1, 5, 60), + max_age = c(10, 70, 90), + rate = c(0.1, 0.05, 0.2) + ) + expect_error( + .check_rate_df(age_dep_hosp_rate, age_range = c(1, 90)), + regexp = "age groups should be non-overlapping" + ) + + age_dep_hosp_rate <- data.frame( + min_age = c(1, 10, 80), + max_age = c(3, 60, 90), + rate = c(0.1, 0.05, 0.2) + ) + expect_error( + .check_rate_df(age_dep_hosp_rate, age_range = c(1, 90)), + regexp = "age groups should be contiguous" + ) }) diff --git a/vignettes/age-strat-rates.Rmd b/vignettes/age-strat-rates.Rmd new file mode 100644 index 00000000..11a464ec --- /dev/null +++ b/vignettes/age-strat-rates.Rmd @@ -0,0 +1,260 @@ +--- +title: "Age-stratified hospitalisation and death rates" +output: + bookdown::html_vignette2: + code_folding: show +pkgdown: + as_is: true +vignette: > + %\VignetteIndexEntry{Age-stratified hospitalisation and death rates} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +::: {.alert .alert-info} +If you are unfamiliar with the {simulist} package or the `sim_linelist()` function [Get Started vignette](simulist.html) is a great place to start. +::: + +This vignette will describe how to simulate a linelist with age-stratified (or age-dependent) hospitalisation and death rates. + +There are three rates in {simulist}, specifically in the `sim_linelist()` function, that relate to hospitalisations and deaths: + +1. Hospitalisation rate (`hosp_rate`) +2. Death rate in hospitals (`hosp_death_rate`) +3. Death rate outside of hospitals (`non_hosp_death_rate`) + +The default for `sim_lineslist()` is a `0.2` (or 20%) hospitalisation rate for individuals infected, `0.5` (or 50%) death rate for hospitalised individuals, and `0.05` (or 5%) death rate for infected individuals outside of hospitals. These default rates are applied to all age groups in the populations. + +However, it may be unrealistic to assume that the probability an infected person is admitted to hospital or dies is independent of their age. For many diseases, young children or elderly individuals are at higher risk of being hospitalised and having adverse outcomes. + +The `sim_linelist()` function can accommodate age-stratified rates by accepting a `` instead of a single rate for the entire population. + +```{r setup} +library(simulist) +library(epiparameter) +``` + +Here is an example that uses the default hospitalisation and death rates (inside and outside of hospital). First we load the requested delay distributions using the {epiparameter} package. + +```{r, read-delay-dists} +serial_interval <- epidist( + disease = "COVID-19", + epi_dist = "serial interval", + prob_distribution = "gamma", + prob_distribution_params = c(shape = 1, scale = 1) +) + +# get onset to hospital admission from {epiparameter} database +onset_to_hosp <- epidist_db( + disease = "COVID-19", + epi_dist = "onset to hospitalisation", + single_epidist = TRUE +) + +# get onset to death from {epiparameter} database +onset_to_death <- epidist_db( + disease = "COVID-19", + epi_dist = "onset to death", + single_epidist = TRUE +) +``` + +We set the seed to ensure we have the same output each time the vignette is rendered. When using {simulist}, setting the seed is not required unless you need to simulate the same linelist multiple times. + +```{r, set-seed} +set.seed(1) +``` + +## Population-wide rates + +Simulate a linelist with population-wide default rates: + +* hospitalisation rate: `0.2` +* death rate in hospitals `0.5` +* death rate outside of hospitals `0.05` + +```{r, sim-linelist} +linelist <- sim_linelist( + R = 1.1, + serial_interval = serial_interval, + onset_to_hosp = onset_to_hosp, + onset_to_death = onset_to_death +) + +# first 6 rows of linelist +head(linelist) +``` + +We can run another simulation and change these rates. Still the rates are applied to the entire population. In this scenario the probability of being hospitalised if infected is higher, but the mortality rate for both hospitalised and non-hospitalised groups is lower. + +```{r, sim-linelist-diff-rates} +linelist <- sim_linelist( + R = 1.1, + serial_interval = serial_interval, + onset_to_hosp = onset_to_hosp, + onset_to_death = onset_to_death, + hosp_rate = 0.4, + hosp_death_rate = 0.2, + non_hosp_death_rate = 0.01 +) + +head(linelist) +``` + +## Age-stratified rates + +Now we can run age-stratified rates by creating a table (``) which contains the lower and upper limits of the age groups and their respective rates. + +In this example the hospitalisation rate will be: + +* `0.2` (or 20%) for over 80s +* `0.1` (or 10%) for under 5s +* `0.05` (or 5%) for the rest + +The minimum age of each age group is inclusive, and the maximum age of each age group is exclusive, except the oldest age group which is inclusive of the minimum and maximum age. The age groups are formed by their position in the input vectors. In this example the first age group is the first element of each vector, so the minimum age is 1, maximum age is five and the hospitalisation rate for that group is 0.1. Each age group forms a row in the table. + +```{r, make-hosp-rate-df} +age_dep_hosp_rate <- data.frame( + min_age = c(1, 5, 80), + max_age = c(5, 80, 90), + rate = c(0.1, 0.05, 0.2) +) +age_dep_hosp_rate +``` + +```{r, sim-age-strat-linelist} +linelist <- sim_linelist( + R = 1.1, + serial_interval = serial_interval, + onset_to_hosp = onset_to_hosp, + onset_to_death = onset_to_death, + hosp_rate = age_dep_hosp_rate +) + +head(linelist) +``` + +::: {.alert .alert-warning} + +The minimum age of the youngest age group and the maximum age of the oldest age group must match the age range specified in the `age_range` argument of `sim_linelist()`. If it does not the function will error. + +If the age-stratified rate table does not match the default (`c(1, 90)`), the `age_range` argument will need to be set to match. + +::: + +For example, the default age range of the population is 1 to 90 (inclusive). In our example above, the lowest age group started at 1 and the oldest age group stopped at 90. This matches the default `age_range = c(1, 90)`. However, see here that if the oldest age bracket was up to 95 the function will not run. + +```{r, sim-age_strat_linelist-error, error=TRUE} +age_dep_hosp_rate <- data.frame( + min_age = c(1, 5, 80), + max_age = c(5, 80, 95), + rate = c(0.1, 0.05, 0.2) +) +age_dep_hosp_rate + +linelist <- sim_linelist( + R = 1.1, + serial_interval = serial_interval, + onset_to_hosp = onset_to_hosp, + onset_to_death = onset_to_death, + hosp_rate = age_dep_hosp_rate +) +``` + +In order to make this code run with the age-stratified hospitalisation rate given, the `age_range` can be adjusted. + +```{r, sim-age_strat-linelist-diff-age-range} +age_dep_hosp_rate <- data.frame( + min_age = c(1, 5, 80), + max_age = c(5, 80, 95), + rate = c(0.1, 0.05, 0.2) +) + +linelist <- sim_linelist( + R = 1.1, + serial_interval = serial_interval, + onset_to_hosp = onset_to_hosp, + onset_to_death = onset_to_death, + hosp_rate = age_dep_hosp_rate, + age_range = c(1, 95) +) + +head(linelist) +``` + +Exactly the same method of age-stratified rates applies to death rates. First create the `` with the age groups and their respective, in this case, death rates, and then supply this to either the `hosp_death_rate` or `non_hosp_death_rate` arguments, or both. + +Here are a couple of examples: + +```{r, sim-age_strat_linelist-hosp-death-rate} +age_dep_hosp_death_rate <- data.frame( + min_age = c(1, 5, 80), + max_age = c(5, 80, 90), + rate = c(0.3, 0.1, 0.6) +) +age_dep_hosp_death_rate + +linelist <- sim_linelist( + R = 1.1, + serial_interval = serial_interval, + onset_to_hosp = onset_to_hosp, + onset_to_death = onset_to_death, + hosp_death_rate = age_dep_hosp_death_rate +) +``` + +```{r, sim-age_strat_linelist-non-hosp-death-rate} +age_dep_non_hosp_death_rate <- data.frame( + min_age = c(1, 5, 80), + max_age = c(5, 80, 90), + rate = c(0.1, 0.05, 0.1) +) +age_dep_non_hosp_death_rate + +linelist <- sim_linelist( + R = 1.1, + serial_interval = serial_interval, + onset_to_hosp = onset_to_hosp, + onset_to_death = onset_to_death, + non_hosp_death_rate = age_dep_non_hosp_death_rate +) +``` + +Up until now these age-stratified tables have only been supplied to one rate. However, these can be supplied in the same simulation. In this case the hospitalisation rate, and death rates inside and outside of hospital, are all specified. + +```{r, sim-age-strat-linelist-all} +age_dep_hosp_rate <- data.frame( + min_age = c(1, 5, 80), + max_age = c(5, 80, 90), + rate = c(0.1, 0.05, 0.2) +) +age_dep_hosp_death_rate <- data.frame( + min_age = c(1, 5, 80), + max_age = c(5, 80, 90), + rate = c(0.3, 0.1, 0.6) +) +age_dep_non_hosp_death_rate <- data.frame( + min_age = c(1, 5, 80), + max_age = c(5, 80, 90), + rate = c(0.1, 0.05, 0.1) +) + +linelist <- sim_linelist( + R = 1.1, + serial_interval = serial_interval, + onset_to_hosp = onset_to_hosp, + onset_to_death = onset_to_death, + hosp_rate = age_dep_hosp_rate, + hosp_death_rate = age_dep_hosp_death_rate, + non_hosp_death_rate = age_dep_non_hosp_death_rate +) + +head(linelist) +``` diff --git a/vignettes/simulist.Rmd b/vignettes/simulist.Rmd index 82f620cb..934ad791 100644 --- a/vignettes/simulist.Rmd +++ b/vignettes/simulist.Rmd @@ -157,3 +157,7 @@ head(linelist) ``` It is currently not possible to simulate with a non-uniform population age structure in {simulist}. + +## Age-stratified hospitalisation and death rates + +For an overview of how a linelist can be simulated with age-stratified (or age-dependent) hospitalisation and death rates see the [vignette dedicated to this topic](age-strat-rates.html).