From 00e3b9bd58f7209c1ea51de9aaaf7f800fe2457c Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Wed, 15 Feb 2023 11:22:05 +0000 Subject: [PATCH 01/20] First edits to add plotting functions in base R --- vignettes/Variation.Rmd | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index e8f39746..563d4f01 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -10,29 +10,47 @@ vignette: > ```{r setup, include=FALSE} library(malariasimulation) library(cowplot) -library(ggplot2) ``` +First we will create a few plotting functions to visualise outputs. +```{r} +plot_n_detect <- function(output){ + plot(x = output$timestep, y = output$n_detect_730_3650, + type = 'l', col = cols[4], + xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", + xaxs = "i", yaxs = "i", bty = "l") +} + +plot_p_detect <- function(output){ + plot(x = output$timestep, y = output$p_detect_730_3650, + type = 'l', col = cols[4], + xlab = 'Time (days)', ylab = "Sum of probabilities of case detection in children aged 2-10 years", + xaxs = "i", yaxs = "i", bty = "l") +} +``` ## Variation in outputs -Malariasimulation is a stochastic model, so there will be be variation in model outputs. For example, we could look at detected malaria cases over a year... +Malariasimulation is a stochastic model, so there will be be variation in model outputs. To illustrate this, we could compare the prevalence of malaria cases over a year in simulations with a small and a larger population. ```{r} # A small population -p <- get_parameters(list( +params <- get_parameters(list( human_population = 1000, individual_mosquitoes = FALSE )) -p <- set_equilibrium(p, 2) -small_o <- run_simulation(365, p) -p <- get_parameters(list( +params <- set_equilibrium(params, 2) +small_pop <- run_simulation(365, params) + +# A larger population +params <- get_parameters(list( human_population = 10000, individual_mosquitoes = FALSE )) -p <- set_equilibrium(p, 2) -big_o <- run_simulation(365, p) +params <- set_equilibrium(params, 2) +big_pop <- run_simulation(365, params) plot_grid( + ggplot(small_o) + geom_line(aes(timestep, n_detect_730_3650)) + ggtitle('n_detect at n = 1,000'), ggplot(small_o) + geom_line(aes(timestep, p_detect_730_3650)) + ggtitle('p_detect at n = 1,000'), ggplot(big_o) + geom_line(aes(timestep, n_detect_730_3650)) + ggtitle('n_detect at n = 10,000'), From 71e78f80269e6b04381989a53e684dc1bd4c27eb Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Wed, 15 Feb 2023 12:20:24 +0000 Subject: [PATCH 02/20] Updated some plots to base R --- vignettes/Variation.Rmd | 56 ++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index 563d4f01..ab44e644 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -14,6 +14,9 @@ library(cowplot) First we will create a few plotting functions to visualise outputs. ```{r} +cols <- c("#000000", "#E69F00", "#56B4E9", "#009E73", + "#F0E442", "#0072B2", "#D55E00", "#CC79A7") + plot_n_detect <- function(output){ plot(x = output$timestep, y = output$n_detect_730_3650, type = 'l', col = cols[4], @@ -49,13 +52,15 @@ params <- get_parameters(list( params <- set_equilibrium(params, 2) big_pop <- run_simulation(365, params) -plot_grid( - - ggplot(small_o) + geom_line(aes(timestep, n_detect_730_3650)) + ggtitle('n_detect at n = 1,000'), - ggplot(small_o) + geom_line(aes(timestep, p_detect_730_3650)) + ggtitle('p_detect at n = 1,000'), - ggplot(big_o) + geom_line(aes(timestep, n_detect_730_3650)) + ggtitle('n_detect at n = 10,000'), - ggplot(big_o) + geom_line(aes(timestep, p_detect_730_3650)) + ggtitle('p_detect at n = 10,000') -) +par(mfrow = c(2,2)) +plot_n_detect(small_pop) +title('n_detect at n = 1,000') +plot_p_detect(small_pop) +title('p_detect at n = 1,000') +plot_n_detect(big_pop) +title('n_detect at n = 10,000') +plot_p_detect(big_pop) +title('p_detect at n = 10,000') ``` The n_detect output shows the result of sampling individuals who would be detected by microscopy. @@ -66,19 +71,20 @@ Notice that the p_detect output is slightly smoother. That's because it forgoes ## Estimating variation -We can estimate the variation in the number of detectable cases by repeating the simulation several times... +We can estimate the variation in the number of detectable cases by repeating the simulation several times and plotting the IQR and 95% confidence intervals. ```{r} outputs <- run_simulation_with_repetitions( timesteps = 365, repetitions = 10, - overrides = p, + overrides = params, parallel=TRUE ) +# Calculate IQR and 95% confidence intervals for each of the repetitions df <- aggregate( outputs$n_detect_730_3650, - by=list(outputs$timestep), + by = list(outputs$timestep), FUN = function(x) { c( median = median(x), @@ -91,19 +97,23 @@ df <- aggregate( } ) -df <- data.frame(cbind(t = df$Group.1, df$x)) - -plot_grid( - ggplot(df) - + geom_line(aes(x = t, y = median, group = 1)) - + geom_ribbon(aes(x = t, ymin = lowq, ymax = highq), alpha = .2) - + xlab('timestep') + ylab('n_detect') + ggtitle('IQR spread'), - ggplot(df) - + geom_line(aes(x = t, y = mean, group = 1)) - + geom_ribbon(aes(x = t, ymin = lowci, ymax = highci), alpha = .2) - + xlab('timestep') + ylab('n_detect') + ggtitle('95% confidence interval') -) +df <- data.frame(cbind(timestep = df$Group.1, df$x)) + +par(mfrow = c(1,2)) +plot(x = df$timestep, y = df$median, + type = 'l', col = cols[4], + xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", + xaxs = "i", yaxs = "i", bty = "l") + + ggplot(df)+ + geom_line(aes(x = timestep, y = median, group = 1))+ + geom_ribbon(aes(x = timestep, ymin = lowq, ymax = highq), alpha = .2)+ + xlab('timestep') + ylab('Cases detected in children aged 2-10 years') + ggtitle('IQR spread') + ggplot(df)+ + geom_line(aes(x = timestep, y = mean, group = 1))+ + geom_ribbon(aes(x = timestep, ymin = lowci, ymax = highci), alpha = .2)+ + xlab('timestep') + ylab('Cases detected in children aged 2-10 years') + ggtitle('95% confidence interval') ``` -These are useful techniques for comparing the expected variance from model outputs to outcomes from trials with lower population sizes. \ No newline at end of file +These are useful techniques for comparing the expected variance from model outputs to outcomes from trials that had lower population sizes. \ No newline at end of file From ff529e6919161d2b79efcbf569ed23886dc325a1 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Wed, 15 Feb 2023 12:22:12 +0000 Subject: [PATCH 03/20] More plot modifications to avoid ggplot2 --- vignettes/Variation.Rmd | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index ab44e644..a566eb99 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -104,15 +104,17 @@ plot(x = df$timestep, y = df$median, type = 'l', col = cols[4], xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") +title('IQR spread') +plot(x = df$timestep, y = df$mean, + type = 'l', col = cols[4], + xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", + xaxs = "i", yaxs = "i", bty = "l") +title('95% Confidence interval') ggplot(df)+ - geom_line(aes(x = timestep, y = median, group = 1))+ - geom_ribbon(aes(x = timestep, ymin = lowq, ymax = highq), alpha = .2)+ - xlab('timestep') + ylab('Cases detected in children aged 2-10 years') + ggtitle('IQR spread') + geom_ribbon(aes(x = timestep, ymin = lowq, ymax = highq), alpha = .2) ggplot(df)+ - geom_line(aes(x = timestep, y = mean, group = 1))+ - geom_ribbon(aes(x = timestep, ymin = lowci, ymax = highci), alpha = .2)+ - xlab('timestep') + ylab('Cases detected in children aged 2-10 years') + ggtitle('95% confidence interval') + geom_ribbon(aes(x = timestep, ymin = lowci, ymax = highci), alpha = .2) ``` From db6a683ae4c8906beddf6f5a1add7d8772668535 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Wed, 15 Feb 2023 13:15:14 +0000 Subject: [PATCH 04/20] Removing cowplot dependency and changing last plot to base r --- vignettes/Variation.Rmd | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index a566eb99..1a68eaf5 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -9,7 +9,6 @@ vignette: > ```{r setup, include=FALSE} library(malariasimulation) -library(cowplot) ``` First we will create a few plotting functions to visualise outputs. @@ -100,22 +99,27 @@ df <- aggregate( df <- data.frame(cbind(timestep = df$Group.1, df$x)) par(mfrow = c(1,2)) +xx <- c(df$timestep, rev(df$timestep)) +yy <- c(df$lowq, rev(df$highq)) plot(x = df$timestep, y = df$median, - type = 'l', col = cols[4], + type = 'n', xlim = c(-1,365),ylim = c(510,565), xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") +polygon(xx, yy, + col = 'grey90', border= 'grey95') +lines(x = df$timestep, y = df$median, col = cols[4], lwd = 2) title('IQR spread') + +xx <- c(df$timestep, rev(df$timestep)) +yy <- c(df$lowci, rev(df$highci)) plot(x = df$timestep, y = df$mean, - type = 'l', col = cols[4], + type = 'n', xlim = c(-1,365),ylim = c(490,590), xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") +polygon(xx, yy, + col = 'grey90', border= 'grey95') +lines(x = df$timestep, y = df$mean, col = cols[4], lwd = 2) title('95% Confidence interval') - - ggplot(df)+ - geom_ribbon(aes(x = timestep, ymin = lowq, ymax = highq), alpha = .2) - ggplot(df)+ - geom_ribbon(aes(x = timestep, ymin = lowci, ymax = highci), alpha = .2) - ``` These are useful techniques for comparing the expected variance from model outputs to outcomes from trials that had lower population sizes. \ No newline at end of file From 8095e0796939ecc2e8c4cdcc3265309ed93322a9 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Tue, 21 Feb 2023 10:06:47 +0000 Subject: [PATCH 05/20] Updating plot rendering --- vignettes/Variation.Rmd | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index 1a68eaf5..a6213471 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -19,14 +19,14 @@ cols <- c("#000000", "#E69F00", "#56B4E9", "#009E73", plot_n_detect <- function(output){ plot(x = output$timestep, y = output$n_detect_730_3650, type = 'l', col = cols[4], - xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", + xlab = 'Time (days)', ylab = "Cases detected in children \naged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") } plot_p_detect <- function(output){ plot(x = output$timestep, y = output$p_detect_730_3650, type = 'l', col = cols[4], - xlab = 'Time (days)', ylab = "Sum of probabilities of case detection in children aged 2-10 years", + xlab = 'Time (days)', ylab = "Sum of probabilities of case detection\nin children aged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") } ``` @@ -34,7 +34,7 @@ plot_p_detect <- function(output){ Malariasimulation is a stochastic model, so there will be be variation in model outputs. To illustrate this, we could compare the prevalence of malaria cases over a year in simulations with a small and a larger population. -```{r} +```{r, fig.align='center',fig.height=6, fig.width=9} # A small population params <- get_parameters(list( human_population = 1000, @@ -62,17 +62,15 @@ plot_p_detect(big_pop) title('p_detect at n = 10,000') ``` -The n_detect output shows the result of sampling individuals who would be detected by microscopy. +The `n_detect` output shows the result of sampling individuals who would be detected by microscopy, while the `p_detect` output shows the sum of probabilities of detection in the population. -While the p_detect output shows the sum of probabilities of detection in the population. - -Notice that the p_detect output is slightly smoother. That's because it forgoes the sampling step. At low population sizes, p_detect will be smoother than the n_detect counterpart. +Notice that the `p_detect` output is slightly smoother. This is because it forgoes the sampling step, so at low population sizes, `p_detect` will be smoother than its `n_detect` counterpart. ## Estimating variation We can estimate the variation in the number of detectable cases by repeating the simulation several times and plotting the IQR and 95% confidence intervals. -```{r} +```{r, fig.align='center',fig.height=4, fig.width=8} outputs <- run_simulation_with_repetitions( timesteps = 365, repetitions = 10, @@ -98,28 +96,33 @@ df <- aggregate( df <- data.frame(cbind(timestep = df$Group.1, df$x)) +# Plot the IQR and 95% CI par(mfrow = c(1,2)) xx <- c(df$timestep, rev(df$timestep)) yy <- c(df$lowq, rev(df$highq)) plot(x = df$timestep, y = df$median, - type = 'n', xlim = c(-1,365),ylim = c(510,565), + type = 'n', xlim = c(-1,365),ylim = c(500,565), xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") polygon(xx, yy, col = 'grey90', border= 'grey95') lines(x = df$timestep, y = df$median, col = cols[4], lwd = 2) title('IQR spread') +legend('bottom', legend = c('Median','IQR Spread'), col = c(cols[4],'grey90'), + lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len=1, bty = 'n') xx <- c(df$timestep, rev(df$timestep)) yy <- c(df$lowci, rev(df$highci)) plot(x = df$timestep, y = df$mean, - type = 'n', xlim = c(-1,365),ylim = c(490,590), + type = 'n', xlim = c(-1,365),ylim = c(460,600), xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") polygon(xx, yy, col = 'grey90', border= 'grey95') lines(x = df$timestep, y = df$mean, col = cols[4], lwd = 2) title('95% Confidence interval') +legend('bottom', inset = 0, legend = c('Mean','95% CI'), col = c(cols[4],'grey90'), + lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len=1, bty = 'n') ``` -These are useful techniques for comparing the expected variance from model outputs to outcomes from trials that had lower population sizes. \ No newline at end of file +These are useful techniques for comparing the expected variance from model outputs to outcomes from trials that might have had lower population sizes. \ No newline at end of file From b212c2858b32617df24b4527dfd658411485b6d4 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Thu, 23 Feb 2023 11:41:08 +0000 Subject: [PATCH 06/20] Improved explanations and plots --- vignettes/Variation.Rmd | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index a6213471..6e16b845 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -11,7 +11,11 @@ vignette: > library(malariasimulation) ``` -First we will create a few plotting functions to visualise outputs. +## Variation in outputs + +Malariasimulation is a stochastic model, meaning that randomness is incorporated, so there will be be variation in model outputs. To illustrate this, we could compare the prevalence of malaria over a year in simulations with a small and a larger population. + +### First we will create a few plotting functions to visualise outputs. ```{r} cols <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") @@ -19,21 +23,23 @@ cols <- c("#000000", "#E69F00", "#56B4E9", "#009E73", plot_n_detect <- function(output){ plot(x = output$timestep, y = output$n_detect_730_3650, type = 'l', col = cols[4], - xlab = 'Time (days)', ylab = "Cases detected in children \naged 2-10 years", + xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") } plot_p_detect <- function(output){ plot(x = output$timestep, y = output$p_detect_730_3650, type = 'l', col = cols[4], - xlab = 'Time (days)', ylab = "Sum of probabilities of case detection\nin children aged 2-10 years", + xlab = 'Time (days)', ylab = "Sum of probabilities of case detection \nin children aged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") } ``` -## Variation in outputs -Malariasimulation is a stochastic model, so there will be be variation in model outputs. To illustrate this, we could compare the prevalence of malaria cases over a year in simulations with a small and a larger population. +### Simulations +The `n_detect` output below shows the result of sampling individuals who would have cases of malaria that were detected by microscopy, while the `p_detect` output shows the sum of individual probabilities of detection of malaria in the population. + +Notice that the `p_detect` output is slightly smoother. This is because it forgoes the sampling step, so at low population sizes, `p_detect` will be smoother than its `n_detect` counterpart. ```{r, fig.align='center',fig.height=6, fig.width=9} # A small population params <- get_parameters(list( @@ -62,9 +68,6 @@ plot_p_detect(big_pop) title('p_detect at n = 10,000') ``` -The `n_detect` output shows the result of sampling individuals who would be detected by microscopy, while the `p_detect` output shows the sum of probabilities of detection in the population. - -Notice that the `p_detect` output is slightly smoother. This is because it forgoes the sampling step, so at low population sizes, `p_detect` will be smoother than its `n_detect` counterpart. ## Estimating variation From 45b0d87e2eda7668cb30f9c9391e78034b3f0b2a Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Mon, 27 Feb 2023 15:57:21 +0000 Subject: [PATCH 07/20] Fixing formatting, edits to explanations --- vignettes/Variation.Rmd | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index 6e16b845..ee1275d8 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -11,11 +11,12 @@ vignette: > library(malariasimulation) ``` -## Variation in outputs +### Variation in outputs Malariasimulation is a stochastic model, meaning that randomness is incorporated, so there will be be variation in model outputs. To illustrate this, we could compare the prevalence of malaria over a year in simulations with a small and a larger population. -### First we will create a few plotting functions to visualise outputs. +### Plotting functions +First, we will create a few plotting functions to visualise outputs. ```{r} cols <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") @@ -35,7 +36,7 @@ plot_p_detect <- function(output){ } ``` -### Simulations +## Simulations The `n_detect` output below shows the result of sampling individuals who would have cases of malaria that were detected by microscopy, while the `p_detect` output shows the sum of individual probabilities of detection of malaria in the population. @@ -71,7 +72,7 @@ title('p_detect at n = 10,000') ## Estimating variation -We can estimate the variation in the number of detectable cases by repeating the simulation several times and plotting the IQR and 95% confidence intervals. +We can estimate the variation in the number of detectable cases by repeating the simulation several times and plotting the IQR and 95% confidence intervals. These are useful techniques for comparing the expected variance from model outputs to outcomes from trials that might have had lower population sizes. ```{r, fig.align='center',fig.height=4, fig.width=8} outputs <- run_simulation_with_repetitions( @@ -127,5 +128,3 @@ title('95% Confidence interval') legend('bottom', inset = 0, legend = c('Mean','95% CI'), col = c(cols[4],'grey90'), lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len=1, bty = 'n') ``` - -These are useful techniques for comparing the expected variance from model outputs to outcomes from trials that might have had lower population sizes. \ No newline at end of file From 557c0335d0a8ae837f3f69690191e98bac591e9e Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Tue, 28 Feb 2023 16:04:39 +0000 Subject: [PATCH 08/20] adding clarification/explanation --- vignettes/Variation.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index ee1275d8..a89bd46b 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -13,7 +13,7 @@ library(malariasimulation) ### Variation in outputs -Malariasimulation is a stochastic model, meaning that randomness is incorporated, so there will be be variation in model outputs. To illustrate this, we could compare the prevalence of malaria over a year in simulations with a small and a larger population. +`malariasimulation` is a stochastic model, meaning that randomness is incorporated, creating random variation in model outputs. To illustrate this, we could compare the prevalence of malaria over a year in simulations with a small and a larger population. A smaller population would be more likely to have wide variations in the output compared to a larger one. ### Plotting functions First, we will create a few plotting functions to visualise outputs. @@ -40,7 +40,7 @@ plot_p_detect <- function(output){ The `n_detect` output below shows the result of sampling individuals who would have cases of malaria that were detected by microscopy, while the `p_detect` output shows the sum of individual probabilities of detection of malaria in the population. -Notice that the `p_detect` output is slightly smoother. This is because it forgoes the sampling step, so at low population sizes, `p_detect` will be smoother than its `n_detect` counterpart. +Notice that the `p_detect` output is slightly smoother. This is because it forgoes the sampling step, so especially at low population sizes, `p_detect` will be smoother than its `n_detect` counterpart. ```{r, fig.align='center',fig.height=6, fig.width=9} # A small population params <- get_parameters(list( From ea354a6cbd28882a80b4d0855b24ce92a8a42f24 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Tue, 28 Feb 2023 16:34:24 +0000 Subject: [PATCH 09/20] fix y axis label --- vignettes/Variation.Rmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index a89bd46b..ceed89ff 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -29,9 +29,10 @@ plot_n_detect <- function(output){ } plot_p_detect <- function(output){ + par(mar = c(5, 6, 4, 2) + 0.1) plot(x = output$timestep, y = output$p_detect_730_3650, type = 'l', col = cols[4], - xlab = 'Time (days)', ylab = "Sum of probabilities of case detection \nin children aged 2-10 years", + xlab = 'Time (days)', ylab = "Sum of probabilities of case detection\nin children aged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") } ``` From 5283b19c0cc82b71229fca58d100ceec51c56076 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Thu, 2 Mar 2023 10:46:52 +0000 Subject: [PATCH 10/20] Updating plots, editing text, removed p_detect references --- vignettes/Variation.Rmd | 124 ++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 62 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index ceed89ff..b1303514 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -7,84 +7,82 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r setup, include=FALSE} +```{r setup} library(malariasimulation) ``` -### Variation in outputs - -`malariasimulation` is a stochastic model, meaning that randomness is incorporated, creating random variation in model outputs. To illustrate this, we could compare the prevalence of malaria over a year in simulations with a small and a larger population. A smaller population would be more likely to have wide variations in the output compared to a larger one. +`malariasimulation` is a stochastic, individual-based model and, as such, simulations run with identical parameterisations will generate non-identical, sometimes highly variable, outputs. To illustrate this, we could compare the prevalence of malaria over a year in simulations with a small and a larger population. A smaller population would be more likely to have wide variations in the output compared to a larger one. ### Plotting functions First, we will create a few plotting functions to visualise outputs. ```{r} -cols <- c("#000000", "#E69F00", "#56B4E9", "#009E73", +cols <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") -plot_n_detect <- function(output){ +plot_n_detect <- function(output, ylab=TRUE){ + if (ylab==TRUE) { + ylab = "Cases detected in children aged 2-10 years" + } else {ylab=''} plot(x = output$timestep, y = output$n_detect_730_3650, - type = 'l', col = cols[4], - xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", - xaxs = "i", yaxs = "i", bty = "l") -} - -plot_p_detect <- function(output){ - par(mar = c(5, 6, 4, 2) + 0.1) - plot(x = output$timestep, y = output$p_detect_730_3650, - type = 'l', col = cols[4], - xlab = 'Time (days)', ylab = "Sum of probabilities of case detection\nin children aged 2-10 years", + type = 'l', col = cols[3], + xlab = 'Time (days)', ylab = ylab, xaxs = "i", yaxs = "i", bty = "l") + grid(lty = 2, col = 'grey80', lwd = 1) } ``` -## Simulations - -The `n_detect` output below shows the result of sampling individuals who would have cases of malaria that were detected by microscopy, while the `p_detect` output shows the sum of individual probabilities of detection of malaria in the population. - -Notice that the `p_detect` output is slightly smoother. This is because it forgoes the sampling step, so especially at low population sizes, `p_detect` will be smoother than its `n_detect` counterpart. -```{r, fig.align='center',fig.height=6, fig.width=9} -# A small population -params <- get_parameters(list( - human_population = 1000, +## Parameterisation +First, we will use the `get_parameters()` function to generate a list of parameters, accepting the default values, for two different population sizes. Then we will update the parameters to match equilibrium parameters and to achieve the initial EIR using `set_equilibrium()` +```{r} +# A small population +simparams <- get_parameters(list( + human_population = 100, individual_mosquitoes = FALSE )) -params <- set_equilibrium(params, 2) -small_pop <- run_simulation(365, params) -# A larger population -params <- get_parameters(list( +simparams <- set_equilibrium(simparams, init_EIR = 20) + +# A larger population +simparams <- get_parameters(list( human_population = 10000, individual_mosquitoes = FALSE )) -params <- set_equilibrium(params, 2) -big_pop <- run_simulation(365, params) - -par(mfrow = c(2,2)) -plot_n_detect(small_pop) -title('n_detect at n = 1,000') -plot_p_detect(small_pop) -title('p_detect at n = 1,000') -plot_n_detect(big_pop) -title('n_detect at n = 10,000') -plot_p_detect(big_pop) -title('p_detect at n = 10,000') + +simparams <- set_equilibrium(simparams, init_EIR = 20) ``` +## Simulations + +The `n_detect_730_3650` output below shows the total number of individuals in the age group rendered (here, 730-3650 timesteps or 2-10 years) who have microscopy-detectable malaria. Notice that the output is smoother at a higher population. + +```{r, fig.align='center',fig.height=4, fig.width=10} +# A small population +output_small_pop <- run_simulation(365, simparams) + +# A larger population +output_big_pop <- run_simulation(365, simparams) + +# Plotting +par(mfrow = c(1,2)) +plot_n_detect(output_small_pop); title('n_detect at n = 100') +plot_n_detect(output_big_pop, ylab = FALSE); title('n_detect at n = 10,000') +``` + ## Estimating variation -We can estimate the variation in the number of detectable cases by repeating the simulation several times and plotting the IQR and 95% confidence intervals. These are useful techniques for comparing the expected variance from model outputs to outcomes from trials that might have had lower population sizes. +We can estimate the variation in the number of detectable cases by repeating the simulation several times with the function `run_simulation_with_repetitions()`, then plotting the IQR and 95% confidence intervals. These are useful techniques for comparing the expected variance from model outputs to outcomes from trials that might have had lower population sizes. -```{r, fig.align='center',fig.height=4, fig.width=8} +```{r, fig.align='center',fig.height=4, fig.width=10} outputs <- run_simulation_with_repetitions( - timesteps = 365, - repetitions = 10, - overrides = params, - parallel=TRUE + timesteps = 365, # We will run each simulation for 365 timesteps. + repetitions = 10, # The simulation will be run 10 times + overrides = simparams, # using the parameter list with the larger population that we created earlier. + parallel = TRUE # The runs will be done in parallel. ) # Calculate IQR and 95% confidence intervals for each of the repetitions -df <- aggregate( +output <- aggregate( outputs$n_detect_730_3650, by = list(outputs$timestep), FUN = function(x) { @@ -99,33 +97,35 @@ df <- aggregate( } ) -df <- data.frame(cbind(timestep = df$Group.1, df$x)) +output <- data.frame(cbind(timestep = output$Group.1, output$x)) # Plot the IQR and 95% CI par(mfrow = c(1,2)) -xx <- c(df$timestep, rev(df$timestep)) -yy <- c(df$lowq, rev(df$highq)) -plot(x = df$timestep, y = df$median, - type = 'n', xlim = c(-1,365),ylim = c(500,565), +xx <- c(output$timestep, rev(output$timestep)) +yy <- c(output$lowq, rev(output$highq)) +plot(x = output$timestep, y = output$median, + type = 'n', xlim = c(-1,365),ylim = c(1400,1700), xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") polygon(xx, yy, col = 'grey90', border= 'grey95') -lines(x = df$timestep, y = df$median, col = cols[4], lwd = 2) +lines(x = output$timestep, y = output$median, col = cols[3], lwd = 2) +grid(lty = 2, col = 'grey80', lwd = 1) title('IQR spread') -legend('bottom', legend = c('Median','IQR Spread'), col = c(cols[4],'grey90'), +legend('bottomleft', legend = c('Median','IQR Spread'), col = c(cols[3],'grey90'), lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len=1, bty = 'n') -xx <- c(df$timestep, rev(df$timestep)) -yy <- c(df$lowci, rev(df$highci)) -plot(x = df$timestep, y = df$mean, - type = 'n', xlim = c(-1,365),ylim = c(460,600), - xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", +xx <- c(output$timestep, rev(output$timestep)) +yy <- c(output$lowci, rev(output$highci)) +plot(x = output$timestep, y = output$mean, + type = 'n', xlim = c(-1,365),ylim = c(1400,1700), + xlab = 'Time (days)', ylab = "", xaxs = "i", yaxs = "i", bty = "l") polygon(xx, yy, col = 'grey90', border= 'grey95') -lines(x = df$timestep, y = df$mean, col = cols[4], lwd = 2) +lines(x = output$timestep, y = output$mean, col = cols[3], lwd = 2) +grid(lty = 2, col = 'grey80', lwd = 1) title('95% Confidence interval') -legend('bottom', inset = 0, legend = c('Mean','95% CI'), col = c(cols[4],'grey90'), +legend('bottomleft', inset = 0, legend = c('Mean','95% CI'), col = c(cols[3],'grey90'), lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len=1, bty = 'n') ``` From 0000e1204767726aeab0fa8bf7094aa1943355d7 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Thu, 2 Mar 2023 10:53:58 +0000 Subject: [PATCH 11/20] Last minutes updates to the text --- vignettes/Variation.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index b1303514..a3228307 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -11,7 +11,7 @@ vignette: > library(malariasimulation) ``` -`malariasimulation` is a stochastic, individual-based model and, as such, simulations run with identical parameterisations will generate non-identical, sometimes highly variable, outputs. To illustrate this, we could compare the prevalence of malaria over a year in simulations with a small and a larger population. A smaller population would be more likely to have wide variations in the output compared to a larger one. +`malariasimulation` is a stochastic, individual-based model and, as such, simulations run with identical parameterisations will generate non-identical, sometimes highly variable, outputs. To illustrate this, we will compare the prevalence of malaria over a year in simulations with a small and a larger population, where a smaller population is more likely to have wide variations between simulation runs. ### Plotting functions First, we will create a few plotting functions to visualise outputs. @@ -71,7 +71,7 @@ plot_n_detect(output_big_pop, ylab = FALSE); title('n_detect at n = 10,000') ## Estimating variation -We can estimate the variation in the number of detectable cases by repeating the simulation several times with the function `run_simulation_with_repetitions()`, then plotting the IQR and 95% confidence intervals. These are useful techniques for comparing the expected variance from model outputs to outcomes from trials that might have had lower population sizes. +We can estimate the variation in the number of detectable cases by repeating the simulation several times with the function `run_simulation_with_repetitions()`, then plotting the IQR and 95% confidence intervals. Running multiple simulations is particularly important for simulations of a small population size, due to the disproportionate impact of stochasticity in small populations. ```{r, fig.align='center',fig.height=4, fig.width=10} outputs <- run_simulation_with_repetitions( From edb933643f5c466dbbf9b5cf686a859b0e2d3094 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Thu, 2 Mar 2023 11:44:00 +0000 Subject: [PATCH 12/20] updating to rmarkdown::html_vignette to match other vignettes --- vignettes/Variation.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index a3228307..e965ee31 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -1,6 +1,6 @@ --- title: "Variation" -output: html_document +output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Variation} %\VignetteEngine{knitr::rmarkdown} From bb25014ae787af60458323182da4289cf5c5358b Mon Sep 17 00:00:00 2001 From: lhaile Date: Thu, 9 Mar 2023 14:21:32 +0000 Subject: [PATCH 13/20] added section on parameter variation --- vignettes/Variation.Rmd | 51 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index e965ee31..ebf1a4ab 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -129,3 +129,54 @@ title('95% Confidence interval') legend('bottomleft', inset = 0, legend = c('Mean','95% CI'), col = c(cols[3],'grey90'), lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len=1, bty = 'n') ``` + + + +## Estimating variation in parameters + +Default parameters for the malariasimulation model were obtained from the joint posterior of MCMC fitting. + +```{r plot parasite prevalence with default model parameters} + +simparams <- get_parameters(list( + human_population = 100, + individual_mosquitoes = FALSE +)) + +# obtain 50 random draws from the model parameter posterior distribution + +# Default (mdedian) model parameters +sim <- run_simulation(timesteps= 1000, simparams) + +# plot the default median parameter +plot(sim$timestep, sim$n_detect_730_3650 / sim$n_730_3650, t = "l", ylim = c(0, 1), ylab = "PfPr", xlab = "Time in days") + +``` + +If needed, we can estimate the variation in model parameters using the draws from this joint posterior using `set_parameter_draw`. This function will override the default model parameters with a sample from one of 1000 draws from the joint posterior. + +Keep in mind that `set_parameter_draw` must be called prior to `set_equilibrium`, as the model must be calibrated to new model parameters. + +```{r run simulation on different samples of the joint posterior distribution} + +# Default (mdedian) model parameters +sim <- run_simulation(timesteps= 1000, simparams) + +# plot the default median parameter +plot(sim$timestep, sim$n_detect_730_3650 / sim$n_730_3650, t = "l", ylim = c(0, 1), ylab = "PfPr", xlab = "Time in days") + +cols<- rainbow(50) +for (i in 1:50){ + +message(paste0('obtaining parameter draw ', i)) +param_draw<- simparams |> + set_parameter_draw(sample(1:1000, 1)) |> + set_equilibrium(init_EIR= 5) + +sim<- run_simulation(timesteps= 1000, param_draw) + +lines(sim$timestep, sim$n_detect_730_3650 / sim$n_730_3650, col = cols[i]) + +} + +``` From 3a5af92caabb358ca9ab6ff20f26c066ca3a08fa Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Fri, 10 Mar 2023 12:18:25 +0000 Subject: [PATCH 14/20] Edits from pete/hillary's comments; added incidence plot, cleaned up " vs. ', etc --- vignettes/Variation.Rmd | 116 +++++++++++++++++++++++----------------- 1 file changed, 68 insertions(+), 48 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index e965ee31..bc683fca 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -1,5 +1,5 @@ --- -title: "Variation" +title: "Stochastic Variation" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Variation} @@ -11,7 +11,7 @@ vignette: > library(malariasimulation) ``` -`malariasimulation` is a stochastic, individual-based model and, as such, simulations run with identical parameterisations will generate non-identical, sometimes highly variable, outputs. To illustrate this, we will compare the prevalence of malaria over a year in simulations with a small and a larger population, where a smaller population is more likely to have wide variations between simulation runs. +`malariasimulation` is a stochastic, individual-based model and, as such, simulations run with identical parameterisations will generate non-identical, sometimes highly variable, outputs. To illustrate this, we will compare the prevalence and incidence of malaria over a year in simulations with a small and a larger population, where a smaller population and the incidence measure are both more likely to have wide variations between simulation runs. ### Plotting functions First, we will create a few plotting functions to visualise outputs. @@ -19,65 +19,86 @@ First, we will create a few plotting functions to visualise outputs. cols <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") -plot_n_detect <- function(output, ylab=TRUE){ - if (ylab==TRUE) { - ylab = "Cases detected in children aged 2-10 years" - } else {ylab=''} - plot(x = output$timestep, y = output$n_detect_730_3650, - type = 'l', col = cols[3], - xlab = 'Time (days)', ylab = ylab, +plot_prev <- function(output, ylab = TRUE){ + if (ylab == TRUE) { + ylab = "Prevalence in children aged 2-10 years" + } else {ylab = ""} + plot(x = output$timestep, y = output$n_detect_730_3650 / output$n_730_3650, + type = "l", col = cols[3], ylim = c(0, 1), + xlab = "Time (days)", ylab = ylab, lwd = 2, xaxs = "i", yaxs = "i", bty = "l") - grid(lty = 2, col = 'grey80', lwd = 1) + grid(lty = 2, col = "grey80", lwd = 1) +} + +plot_inci <- function(output, ylab = TRUE){ + if (ylab == TRUE) { + ylab = "Incidence in children aged 0-5 years" + } else {ylab = ""} + plot(x = output$timestep, y = output$n_inc_clinical_0_1825 / output$n_0_1825, + type = "l", col = cols[5], ylim = c(0, 0.1), + xlab = "Time (days)", ylab = ylab, lwd = 2, + xaxs = "i", yaxs = "i", bty = "l") + grid(lty = 2, col = "grey80", lwd = 1) } ``` -## Parameterisation -First, we will use the `get_parameters()` function to generate a list of parameters, accepting the default values, for two different population sizes. Then we will update the parameters to match equilibrium parameters and to achieve the initial EIR using `set_equilibrium()` +## Parameterisation +First, we will use the `get_parameters()` function to generate a list of parameters, accepting the default values, for two different population sizes and use the `set_equilibrium()` function to initialise the model at a given entomological inoculation rate (EIR). The only parameter which changes between the two parameter sets is the argument for `human_population`. ```{r} # A small population -simparams <- get_parameters(list( - human_population = 100, +simparams_small <- get_parameters(list( + human_population = 1000, + clinical_incidence_rendering_min_ages = 0, + clinical_incidence_rendering_max_ages = 5 * 365, individual_mosquitoes = FALSE )) -simparams <- set_equilibrium(simparams, init_EIR = 20) +simparams_small <- set_equilibrium(parameters = simparams_small, init_EIR = 50) # A larger population -simparams <- get_parameters(list( +simparams_big <- get_parameters(list( human_population = 10000, + clinical_incidence_rendering_min_ages = 0, + clinical_incidence_rendering_max_ages = 5 * 365, individual_mosquitoes = FALSE )) -simparams <- set_equilibrium(simparams, init_EIR = 20) +simparams_big <- set_equilibrium(parameters = simparams_big, init_EIR = 50) ``` ## Simulations -The `n_detect_730_3650` output below shows the total number of individuals in the age group rendered (here, 730-3650 timesteps or 2-10 years) who have microscopy-detectable malaria. Notice that the output is smoother at a higher population. +The `n_detect_730_3650` output below shows the total number of individuals in the age group rendered (here, 730-3650 timesteps or 2-10 years) who have microscopy-detectable malaria. Notice that the output is smoother at a higher population. -```{r, fig.align='center',fig.height=4, fig.width=10} +Some outcomes will be more sensitive than others to stochastic variation. In the plots below, prevalence is smoother than incidence even at the same population. + +```{r, fig.align="center",fig.height=6, fig.width=8} # A small population -output_small_pop <- run_simulation(365, simparams) +output_small_pop <- run_simulation(timesteps = 365, parameters = simparams_small) # A larger population -output_big_pop <- run_simulation(365, simparams) +output_big_pop <- run_simulation(timesteps = 365, parameters = simparams_big) # Plotting -par(mfrow = c(1,2)) -plot_n_detect(output_small_pop); title('n_detect at n = 100') -plot_n_detect(output_big_pop, ylab = FALSE); title('n_detect at n = 10,000') +par(mfrow = c(2,2)) +plot_prev(output_small_pop); title("Prevalence at n = 1,000") +plot_inci(output_small_pop); title("Incidence at n = 1,000") +plot_prev(output_big_pop, ylab = FALSE); title("Prevalence at n = 10,000") +plot_inci(output_big_pop); title("Incidence at n = 10,000") ``` -## Estimating variation +## Estimating stochastic variation -We can estimate the variation in the number of detectable cases by repeating the simulation several times with the function `run_simulation_with_repetitions()`, then plotting the IQR and 95% confidence intervals. Running multiple simulations is particularly important for simulations of a small population size, due to the disproportionate impact of stochasticity in small populations. +One way to estimate the variation in prevalence is by repeating the simulation several times with the function `run_simulation_with_repetitions()`, then plotting the IQR and 95% confidence intervals. Running multiple simulations is particularly important for simulations of a small population size, due to the disproportionate impact of stochasticity in small populations. -```{r, fig.align='center',fig.height=4, fig.width=10} +__However, it is important to note that while running a simulation with repetitions is an option that we illustrate in this example, for the majority of use cases, it is much preferable to increase population size to minimise stochastic variation or to use parameter draws, while also using a large enough population, to estimate uncertainty (see the Parameter Draws vignette for more information). The required population size might depend on the outcome of interest. For example, a larger population size would be needed if measuring the impact of an intervention given only to children, versus an intervention given to the entire population. Additionally, as demonstrated above, some outcomes will be more sensitive than others to stochastic variation.__ + +```{r, fig.align="center",fig.height=4, fig.width=10} outputs <- run_simulation_with_repetitions( - timesteps = 365, # We will run each simulation for 365 timesteps. - repetitions = 10, # The simulation will be run 10 times - overrides = simparams, # using the parameter list with the larger population that we created earlier. + timesteps = 365, # Each repetition will be run for 365 days. + repetitions = 10, # The simulation will be run 10 times. + overrides = simparams_big, # Using the parameter list with the larger population that we created earlier. parallel = TRUE # The runs will be done in parallel. ) @@ -91,8 +112,8 @@ output <- aggregate( lowq = unname(quantile(x, probs = .25)), highq = unname(quantile(x, probs = .75)), mean = mean(x), - lowci = mean(x) + 1.96*sd(x), - highci = mean(x) - 1.96*sd(x) + lowci = mean(x) + 1.96 * sd(x), + highci = mean(x) - 1.96 * sd(x) ) } ) @@ -100,32 +121,31 @@ output <- aggregate( output <- data.frame(cbind(timestep = output$Group.1, output$x)) # Plot the IQR and 95% CI -par(mfrow = c(1,2)) +par(mfrow = c(1, 2)) xx <- c(output$timestep, rev(output$timestep)) yy <- c(output$lowq, rev(output$highq)) plot(x = output$timestep, y = output$median, - type = 'n', xlim = c(-1,365),ylim = c(1400,1700), - xlab = 'Time (days)', ylab = "Cases detected in children aged 2-10 years", + type = "n", xlim = c(-1, 365), ylim = c(1400, 1700), + xlab = "Time (days)", ylab = "Cases detected in children aged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") polygon(xx, yy, - col = 'grey90', border= 'grey95') + col = "grey90", border= "grey95") lines(x = output$timestep, y = output$median, col = cols[3], lwd = 2) -grid(lty = 2, col = 'grey80', lwd = 1) -title('IQR spread') -legend('bottomleft', legend = c('Median','IQR Spread'), col = c(cols[3],'grey90'), - lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len=1, bty = 'n') +grid(lty = 2, col = "grey80", lwd = 1) +title("IQR spread") +legend("bottomleft", legend = c("Median","IQR Spread"), col = c(cols[3], "grey90"), + lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len = 1, bty = "n") xx <- c(output$timestep, rev(output$timestep)) yy <- c(output$lowci, rev(output$highci)) plot(x = output$timestep, y = output$mean, - type = 'n', xlim = c(-1,365),ylim = c(1400,1700), - xlab = 'Time (days)', ylab = "", + type = "n", xlim = c(-1, 365), ylim = c(1400, 1700), + xlab = "Time (days)", ylab = "", xaxs = "i", yaxs = "i", bty = "l") -polygon(xx, yy, - col = 'grey90', border= 'grey95') +polygon(xx, yy, col = "grey90", border= "grey95") lines(x = output$timestep, y = output$mean, col = cols[3], lwd = 2) -grid(lty = 2, col = 'grey80', lwd = 1) -title('95% Confidence interval') -legend('bottomleft', inset = 0, legend = c('Mean','95% CI'), col = c(cols[3],'grey90'), - lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len=1, bty = 'n') +grid(lty = 2, col = "grey80", lwd = 1) +title("95% Confidence interval") +legend("bottomleft", inset = 0, legend = c("Mean", "95% CI"), col = c(cols[3], "grey90"), + lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len = 1, bty = "n") ``` From fd42b123884bf984bea6cb54c72b41d6c1350196 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Fri, 10 Mar 2023 12:20:11 +0000 Subject: [PATCH 15/20] Removed Lydia's section - she will make new vignette --- vignettes/Variation.Rmd | 53 +---------------------------------------- 1 file changed, 1 insertion(+), 52 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index 4eba72c5..a1ae13c7 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -148,55 +148,4 @@ grid(lty = 2, col = "grey80", lwd = 1) title("95% Confidence interval") legend("bottomleft", inset = 0, legend = c("Mean", "95% CI"), col = c(cols[3], "grey90"), lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len = 1, bty = "n") -``` - - - -## Estimating variation in parameters - -Default parameters for the malariasimulation model were obtained from the joint posterior of MCMC fitting. - -```{r plot parasite prevalence with default model parameters} - -simparams <- get_parameters(list( - human_population = 100, - individual_mosquitoes = FALSE -)) - -# obtain 50 random draws from the model parameter posterior distribution - -# Default (mdedian) model parameters -sim <- run_simulation(timesteps= 1000, simparams) - -# plot the default median parameter -plot(sim$timestep, sim$n_detect_730_3650 / sim$n_730_3650, t = "l", ylim = c(0, 1), ylab = "PfPr", xlab = "Time in days") - -``` - -If needed, we can estimate the variation in model parameters using the draws from this joint posterior using `set_parameter_draw`. This function will override the default model parameters with a sample from one of 1000 draws from the joint posterior. - -Keep in mind that `set_parameter_draw` must be called prior to `set_equilibrium`, as the model must be calibrated to new model parameters. - -```{r run simulation on different samples of the joint posterior distribution} - -# Default (mdedian) model parameters -sim <- run_simulation(timesteps= 1000, simparams) - -# plot the default median parameter -plot(sim$timestep, sim$n_detect_730_3650 / sim$n_730_3650, t = "l", ylim = c(0, 1), ylab = "PfPr", xlab = "Time in days") - -cols<- rainbow(50) -for (i in 1:50){ - -message(paste0('obtaining parameter draw ', i)) -param_draw<- simparams |> - set_parameter_draw(sample(1:1000, 1)) |> - set_equilibrium(init_EIR= 5) - -sim<- run_simulation(timesteps= 1000, param_draw) - -lines(sim$timestep, sim$n_detect_730_3650 / sim$n_730_3650, col = cols[i]) - -} - -``` +``` \ No newline at end of file From 2b057d097b7ebe0c5bf2da8d91d311b7e1d06d02 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Fri, 10 Mar 2023 13:25:40 +0000 Subject: [PATCH 16/20] changed last plots to plot prevalence / fixed y axes --- vignettes/Variation.Rmd | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index a1ae13c7..716ab97c 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -94,7 +94,7 @@ One way to estimate the variation in prevalence is by repeating the simulation s __However, it is important to note that while running a simulation with repetitions is an option that we illustrate in this example, for the majority of use cases, it is much preferable to increase population size to minimise stochastic variation or to use parameter draws, while also using a large enough population, to estimate uncertainty (see the Parameter Draws vignette for more information). The required population size might depend on the outcome of interest. For example, a larger population size would be needed if measuring the impact of an intervention given only to children, versus an intervention given to the entire population. Additionally, as demonstrated above, some outcomes will be more sensitive than others to stochastic variation.__ -```{r, fig.align="center",fig.height=4, fig.width=10} +```{r, fig.align="center",fig.height=5, fig.width=10} outputs <- run_simulation_with_repetitions( timesteps = 365, # Each repetition will be run for 365 days. repetitions = 10, # The simulation will be run 10 times. @@ -104,7 +104,7 @@ outputs <- run_simulation_with_repetitions( # Calculate IQR and 95% confidence intervals for each of the repetitions output <- aggregate( - outputs$n_detect_730_3650, + outputs$n_detect_730_3650 / outputs$n_730_3650, by = list(outputs$timestep), FUN = function(x) { c( @@ -125,8 +125,8 @@ par(mfrow = c(1, 2)) xx <- c(output$timestep, rev(output$timestep)) yy <- c(output$lowq, rev(output$highq)) plot(x = output$timestep, y = output$median, - type = "n", xlim = c(-1, 365), ylim = c(1400, 1700), - xlab = "Time (days)", ylab = "Cases detected in children aged 2-10 years", + type = "n", xlim = c(-1, 365), ylim = c(0.75, 0.85), + xlab = "Time (days)", ylab = "Prevalence in children aged 2-10 years", xaxs = "i", yaxs = "i", bty = "l") polygon(xx, yy, col = "grey90", border= "grey95") @@ -139,7 +139,7 @@ legend("bottomleft", legend = c("Median","IQR Spread"), col = c(cols[3], "grey90 xx <- c(output$timestep, rev(output$timestep)) yy <- c(output$lowci, rev(output$highci)) plot(x = output$timestep, y = output$mean, - type = "n", xlim = c(-1, 365), ylim = c(1400, 1700), + type = "n", xlim = c(-1, 365), ylim = c(0.75, 0.85), xlab = "Time (days)", ylab = "", xaxs = "i", yaxs = "i", bty = "l") polygon(xx, yy, col = "grey90", border= "grey95") From 1583772744d890964591390ffe1de8b9d074e3a2 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Tue, 28 Mar 2023 18:08:39 +0100 Subject: [PATCH 17/20] remove Estimating stochastic variation, add stochastic elimination --- vignettes/Variation.Rmd | 119 +++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 70 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index 716ab97c..75b94f64 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -9,9 +9,10 @@ vignette: > ```{r setup} library(malariasimulation) +set.seed(123) ``` -`malariasimulation` is a stochastic, individual-based model and, as such, simulations run with identical parameterisations will generate non-identical, sometimes highly variable, outputs. To illustrate this, we will compare the prevalence and incidence of malaria over a year in simulations with a small and a larger population, where a smaller population and the incidence measure are both more likely to have wide variations between simulation runs. +`malariasimulation` is a stochastic, individual-based model and, as such, simulations run with identical parameterisations will generate non-identical, sometimes highly variable, outputs. To illustrate this, we will compare the prevalence and incidence of malaria over a year in simulations with a small and a larger population. The simulation with a smaller population compared to the larger population, and the incidence measure compared to the prevalence measure will have larger variation between simulation runs. ### Plotting functions First, we will create a few plotting functions to visualise outputs. @@ -19,23 +20,23 @@ First, we will create a few plotting functions to visualise outputs. cols <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") -plot_prev <- function(output, ylab = TRUE){ +plot_prev <- function(output, ylab = TRUE, ylim = c(0,1)){ if (ylab == TRUE) { ylab = "Prevalence in children aged 2-10 years" } else {ylab = ""} plot(x = output$timestep, y = output$n_detect_730_3650 / output$n_730_3650, - type = "l", col = cols[3], ylim = c(0, 1), + type = "l", col = cols[3], ylim = ylim, xlab = "Time (days)", ylab = ylab, lwd = 2, xaxs = "i", yaxs = "i", bty = "l") grid(lty = 2, col = "grey80", lwd = 1) } -plot_inci <- function(output, ylab = TRUE){ +plot_inci <- function(output, ylab = TRUE, ylim){ if (ylab == TRUE) { - ylab = "Incidence in children aged 0-5 years" + ylab = "Incidence per 1000 children aged 0-5 years" } else {ylab = ""} - plot(x = output$timestep, y = output$n_inc_clinical_0_1825 / output$n_0_1825, - type = "l", col = cols[5], ylim = c(0, 0.1), + plot(x = output$timestep, y = output$n_inc_clinical_0_1825 / output$n_0_1825 * 1000, + type = "l", col = cols[5], ylim = ylim, xlab = "Time (days)", ylab = ylab, lwd = 2, xaxs = "i", yaxs = "i", bty = "l") grid(lty = 2, col = "grey80", lwd = 1) @@ -82,70 +83,48 @@ output_big_pop <- run_simulation(timesteps = 365, parameters = simparams_big) # Plotting par(mfrow = c(2,2)) -plot_prev(output_small_pop); title("Prevalence at n = 1,000") -plot_inci(output_small_pop); title("Incidence at n = 1,000") -plot_prev(output_big_pop, ylab = FALSE); title("Prevalence at n = 10,000") -plot_inci(output_big_pop); title("Incidence at n = 10,000") +plot_prev(output_small_pop, ylim = c(0.6, 0.8)); title(expression(paste(italic(Pf),"PR"[2-10], " at n = 1,000"))) +plot_inci(output_small_pop, ylim = c(0, 25)); title("Incidence per 1000 children at n = 1,000") +plot_prev(output_big_pop, ylim = c(0.6, 0.8)); title(expression(paste(italic(Pf),"PR"[2-10], " at n = 10,000"))) +plot_inci(output_big_pop, ylim = c(0, 25)); title("Incidence per 1000 children at n = 10,000") ``` -## Estimating stochastic variation - -One way to estimate the variation in prevalence is by repeating the simulation several times with the function `run_simulation_with_repetitions()`, then plotting the IQR and 95% confidence intervals. Running multiple simulations is particularly important for simulations of a small population size, due to the disproportionate impact of stochasticity in small populations. - -__However, it is important to note that while running a simulation with repetitions is an option that we illustrate in this example, for the majority of use cases, it is much preferable to increase population size to minimise stochastic variation or to use parameter draws, while also using a large enough population, to estimate uncertainty (see the Parameter Draws vignette for more information). The required population size might depend on the outcome of interest. For example, a larger population size would be needed if measuring the impact of an intervention given only to children, versus an intervention given to the entire population. Additionally, as demonstrated above, some outcomes will be more sensitive than others to stochastic variation.__ - -```{r, fig.align="center",fig.height=5, fig.width=10} -outputs <- run_simulation_with_repetitions( - timesteps = 365, # Each repetition will be run for 365 days. - repetitions = 10, # The simulation will be run 10 times. - overrides = simparams_big, # Using the parameter list with the larger population that we created earlier. - parallel = TRUE # The runs will be done in parallel. -) - -# Calculate IQR and 95% confidence intervals for each of the repetitions -output <- aggregate( - outputs$n_detect_730_3650 / outputs$n_730_3650, - by = list(outputs$timestep), - FUN = function(x) { - c( - median = median(x), - lowq = unname(quantile(x, probs = .25)), - highq = unname(quantile(x, probs = .75)), - mean = mean(x), - lowci = mean(x) + 1.96 * sd(x), - highci = mean(x) - 1.96 * sd(x) - ) - } -) - -output <- data.frame(cbind(timestep = output$Group.1, output$x)) - -# Plot the IQR and 95% CI +## Stochastic elimination +With stochastic models, random elimination of malaria in a small population with low transmisison may happen. In the example below, we run two simulations: one with a very small population, and one with a larger population. There is stochastic fade out (elimination) in the smaller population, while the larger population has stable transmission over time. For this reason, it is important to run models with large-enough populations to avoid stochastic elimination or trends that are not realistic. +```{r} +# A small population +simparams_small <- get_parameters(list( + human_population = 50, + clinical_incidence_rendering_min_ages = 0, + clinical_incidence_rendering_max_ages = 5 * 365, + individual_mosquitoes = FALSE +)) + +simparams_small <- set_equilibrium(parameters = simparams_small, init_EIR = 1) + +# A larger population +simparams_big <- get_parameters(list( + human_population = 1000, + clinical_incidence_rendering_min_ages = 0, + clinical_incidence_rendering_max_ages = 5 * 365, + individual_mosquitoes = FALSE +)) + +simparams_big <- set_equilibrium(parameters = simparams_big, init_EIR = 1) +``` + +## Simulations + +```{r, fig.align="center",fig.height=6, fig.width=8} +set.seed(555) +# A small population +output_small_pop <- run_simulation(timesteps = 365 * 2, parameters = simparams_small) + +# A larger population +output_big_pop <- run_simulation(timesteps = 365 * 2, parameters = simparams_big) + +# Plotting par(mfrow = c(1, 2)) -xx <- c(output$timestep, rev(output$timestep)) -yy <- c(output$lowq, rev(output$highq)) -plot(x = output$timestep, y = output$median, - type = "n", xlim = c(-1, 365), ylim = c(0.75, 0.85), - xlab = "Time (days)", ylab = "Prevalence in children aged 2-10 years", - xaxs = "i", yaxs = "i", bty = "l") -polygon(xx, yy, - col = "grey90", border= "grey95") -lines(x = output$timestep, y = output$median, col = cols[3], lwd = 2) -grid(lty = 2, col = "grey80", lwd = 1) -title("IQR spread") -legend("bottomleft", legend = c("Median","IQR Spread"), col = c(cols[3], "grey90"), - lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len = 1, bty = "n") - -xx <- c(output$timestep, rev(output$timestep)) -yy <- c(output$lowci, rev(output$highci)) -plot(x = output$timestep, y = output$mean, - type = "n", xlim = c(-1, 365), ylim = c(0.75, 0.85), - xlab = "Time (days)", ylab = "", - xaxs = "i", yaxs = "i", bty = "l") -polygon(xx, yy, col = "grey90", border= "grey95") -lines(x = output$timestep, y = output$mean, col = cols[3], lwd = 2) -grid(lty = 2, col = "grey80", lwd = 1) -title("95% Confidence interval") -legend("bottomleft", inset = 0, legend = c("Mean", "95% CI"), col = c(cols[3], "grey90"), - lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len = 1, bty = "n") +plot_prev(output_small_pop, ylim = c(0, 1)); title(expression(paste(italic(Pf),"PR"[2-10], " at n = 50"))) +plot_prev(output_big_pop, ylab = FALSE, ylim = c(0, 1)); title(expression(paste(italic(Pf),"PR"[2-10], " at n = 1,000"))) ``` \ No newline at end of file From 77b371c12daf1d247542e2dea053de7b0094b725 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Thu, 30 Mar 2023 14:49:53 +0100 Subject: [PATCH 18/20] Removing unclear sentence --- vignettes/Variation.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index 75b94f64..c0385587 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -12,7 +12,7 @@ library(malariasimulation) set.seed(123) ``` -`malariasimulation` is a stochastic, individual-based model and, as such, simulations run with identical parameterisations will generate non-identical, sometimes highly variable, outputs. To illustrate this, we will compare the prevalence and incidence of malaria over a year in simulations with a small and a larger population. The simulation with a smaller population compared to the larger population, and the incidence measure compared to the prevalence measure will have larger variation between simulation runs. +`malariasimulation` is a stochastic, individual-based model and, as such, simulations run with identical parameterisations will generate non-identical, sometimes highly variable, outputs. To illustrate this, we will compare the prevalence and incidence of malaria over a year in simulations with a small and a larger population. ### Plotting functions First, we will create a few plotting functions to visualise outputs. From dbd23230bf290755bd392dfe78f8e04af0a70cd6 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Thu, 30 Mar 2023 15:51:33 +0100 Subject: [PATCH 19/20] last edits --- vignettes/Variation.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index c0385587..0f9c46da 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -72,7 +72,7 @@ simparams_big <- set_equilibrium(parameters = simparams_big, init_EIR = 50) The `n_detect_730_3650` output below shows the total number of individuals in the age group rendered (here, 730-3650 timesteps or 2-10 years) who have microscopy-detectable malaria. Notice that the output is smoother at a higher population. -Some outcomes will be more sensitive than others to stochastic variation. In the plots below, prevalence is smoother than incidence even at the same population. +Some outcomes will be more sensitive than others to stochastic variation even with the same population size. In the plots below, prevalence is smoother than incidence even at the same population. This is because prevalence is a measure of existing infection, while incidence is recording new cases per timestep. ```{r, fig.align="center",fig.height=6, fig.width=8} # A small population @@ -90,7 +90,7 @@ plot_inci(output_big_pop, ylim = c(0, 25)); title("Incidence per 1000 children a ``` ## Stochastic elimination -With stochastic models, random elimination of malaria in a small population with low transmisison may happen. In the example below, we run two simulations: one with a very small population, and one with a larger population. There is stochastic fade out (elimination) in the smaller population, while the larger population has stable transmission over time. For this reason, it is important to run models with large-enough populations to avoid stochastic elimination or trends that are not realistic. +With stochastic models, random elimination of malaria in a small population with low transmisison may happen. In the example below, we run two simulations: one with a very small population, and one with a larger population. There is stochastic fade out (elimination) in the smaller population, while the larger population has stable transmission over time. For this reason, it is important to run models with large-enough populations to avoid stochastic elimination. ```{r} # A small population simparams_small <- get_parameters(list( From 05da94cfc9e27a9acfb09289d8d7ef8588472814 Mon Sep 17 00:00:00 2001 From: Kelly McCain Date: Fri, 21 Apr 2023 11:23:28 +0100 Subject: [PATCH 20/20] Changed plot so that beginning of sim is not 0% prev --- vignettes/Variation.Rmd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vignettes/Variation.Rmd b/vignettes/Variation.Rmd index 0f9c46da..01ac82d3 100644 --- a/vignettes/Variation.Rmd +++ b/vignettes/Variation.Rmd @@ -9,7 +9,7 @@ vignette: > ```{r setup} library(malariasimulation) -set.seed(123) +set.seed(555) ``` `malariasimulation` is a stochastic, individual-based model and, as such, simulations run with identical parameterisations will generate non-identical, sometimes highly variable, outputs. To illustrate this, we will compare the prevalence and incidence of malaria over a year in simulations with a small and a larger population. @@ -116,7 +116,7 @@ simparams_big <- set_equilibrium(parameters = simparams_big, init_EIR = 1) ## Simulations ```{r, fig.align="center",fig.height=6, fig.width=8} -set.seed(555) +set.seed(444) # A small population output_small_pop <- run_simulation(timesteps = 365 * 2, parameters = simparams_small) @@ -125,6 +125,6 @@ output_big_pop <- run_simulation(timesteps = 365 * 2, parameters = simparams_big # Plotting par(mfrow = c(1, 2)) -plot_prev(output_small_pop, ylim = c(0, 1)); title(expression(paste(italic(Pf),"PR"[2-10], " at n = 50"))) -plot_prev(output_big_pop, ylab = FALSE, ylim = c(0, 1)); title(expression(paste(italic(Pf),"PR"[2-10], " at n = 1,000"))) +plot_prev(output_small_pop, ylim = c(0, 0.2)); title(expression(paste(italic(Pf),"PR"[2-10], " at n = 50"))) +plot_prev(output_big_pop, ylab = FALSE, ylim = c(0, 0.2)); title(expression(paste(italic(Pf),"PR"[2-10], " at n = 1,000"))) ``` \ No newline at end of file