diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
index 4c1f48a..c1f7a96 100644
--- a/.github/workflows/deploy.yaml
+++ b/.github/workflows/deploy.yaml
@@ -31,11 +31,11 @@ jobs:
- name: Install dependencies
uses: r-lib/actions/setup-r-dependencies@v2
with:
- extra-packages:
+ packages:
any::gh,
any::knitr,
any::rmarkdown,
- local::.,
+ nfidd/nfidd
- name: Install cmdstan
uses: epinowcast/actions/install-cmdstan@v1
diff --git a/.github/workflows/render-readme.yaml b/.github/workflows/render-readme.yaml
index 05ec6aa..e00d225 100644
--- a/.github/workflows/render-readme.yaml
+++ b/.github/workflows/render-readme.yaml
@@ -31,7 +31,7 @@ jobs:
- name: Install dependencies
uses: r-lib/actions/setup-r-dependencies@v2
with:
- extra-packages: any::rmarkdown, any::allcontributors
+ packages: any::rmarkdown, any::allcontributors, nfidd/nfidd
- name: Update contributors
if: github.ref == 'refs/heads/main'
diff --git a/DESCRIPTION b/DESCRIPTION
deleted file mode 100644
index e258c46..0000000
--- a/DESCRIPTION
+++ /dev/null
@@ -1,42 +0,0 @@
-Package: nfidd
-Title: Material to support course on nowcasting and forecasting of infectious
- disease dynamics
-Version: 0.1.0.9000
-Authors@R: c(
- person(
- given = "NFIDD course contributors",
- family = "",
- role = c("cre", "aut"),
- email = "epiforecasts@gmail.com"
- )
- )
-Description: Resources to support a short course on nowcasting and forecasting
- of infectious disease dynamics.
-License: MIT + file LICENSE
-URL: https://github.com/nfidd/nfidd
-BugReports: https://github.com/nfidd/nfidd/issues
-Depends:
- R (>= 4.2.0)
-Imports:
- dplyr,
- tidyr
-Suggests:
- bayesplot,
- cmdstanr,
- ggplot2,
- lubridate,
- posterior,
- purrr,
- rmarkdown,
- scoringutils,
- tidybayes,
- qra
-Remotes:
- epiforecasts/scoringutils@b370557,
- epiforecasts/qra@v0.1.0
-Additional_repositories:
- https://stan-dev.r-universe.dev
-Encoding: UTF-8
-Language: en-GB
-RoxygenNote: 7.3.2
-LazyData: true
diff --git a/NAMESPACE b/NAMESPACE
deleted file mode 100644
index b87ba6c..0000000
--- a/NAMESPACE
+++ /dev/null
@@ -1,34 +0,0 @@
-# Generated by roxygen2: do not edit by hand
-
-export(add_delays)
-export(censored_delay_pmf)
-export(convolve_with_delay)
-export(geometric_diff_ar)
-export(geometric_random_walk)
-export(make_daily_infections)
-export(make_gen_time_pmf)
-export(make_ip_pmf)
-export(nfidd_cmdstan_model)
-export(nfidd_load_stan_functions)
-export(nfidd_stan_function_files)
-export(nfidd_stan_functions)
-export(nfidd_stan_models)
-export(nfidd_stan_path)
-export(renewal)
-export(simulate_onsets)
-importFrom(dplyr,count)
-importFrom(dplyr,full_join)
-importFrom(dplyr,if_else)
-importFrom(dplyr,join_by)
-importFrom(dplyr,left_join)
-importFrom(dplyr,mutate)
-importFrom(dplyr,n)
-importFrom(dplyr,select)
-importFrom(dplyr,tibble)
-importFrom(dplyr,transmute)
-importFrom(stats,rbinom)
-importFrom(stats,rgamma)
-importFrom(stats,rlnorm)
-importFrom(stats,rpois)
-importFrom(tidyr,expand)
-importFrom(tidyr,replace_na)
diff --git a/NEWS.md b/NEWS.md
deleted file mode 100644
index 12ab8fc..0000000
--- a/NEWS.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# nfidd 0.1.0.9000
-
-In development version of the package and teaching material for teaching in Bangkok in November 2024.
-
-# nfidd 0.1.0
-
-Initial release of the `nfidd` package and teaching material for teaching in June 2024 in Stockholm.
\ No newline at end of file
diff --git a/R/add_delays.r b/R/add_delays.r
deleted file mode 100644
index 05db107..0000000
--- a/R/add_delays.r
+++ /dev/null
@@ -1,37 +0,0 @@
-#' Simulate symptom onset and hospitalization times from infection times
-#'
-#' @param infection_times A data frame containing a column of infection times
-#'
-#' @return A data frame with columns for infection time, onset time, and
-#' hospitalization time (with 70% of hospitalizations set to NA)
-#'
-#' @importFrom dplyr mutate n if_else
-#' @importFrom stats rgamma rlnorm rbinom
-#'
-#' @export
-#'
-#' @examples
-#' add_delays(infection_times)
-add_delays <- function(infection_times) {
- # Add random delays from appropriate probability distributions
- df <- infection_times |>
- mutate(
- # Gamma distribution for onset times
- onset_time = infection_time + rgamma(n(), shape = 5, rate = 1),
- # Lognormal distribution for hospitalisation times
- hosp_time = onset_time + rlnorm(n(), meanlog = 1.75, sdlog = 0.5)
- )
-
- # Set random 70% of hospitalization dates to NA (30% hospitalized)
- df <- df |>
- mutate(
- hosp_time = if_else(
- # use the binomial distribution for random binary outcomes
- rbinom(n = n(), size = 1, prob = 0.3) == 1,
- hosp_time,
- NA_real_
- )
- )
-
- return(df)
-}
diff --git a/R/censored-delay-pmf.r b/R/censored-delay-pmf.r
deleted file mode 100644
index debe94f..0000000
--- a/R/censored-delay-pmf.r
+++ /dev/null
@@ -1,33 +0,0 @@
-#' Discretise a Continuous Delay Distribution
-#'
-#' This function discretises a continuous delay distribution into a probability
-#' mass function (PMF) over discrete days.
-#'
-#' @param rgen A function that generates random delays, e.g., `rgamma`,
-#' `rlnorm`.
-#' @param max The maximum delay.
-#' @param n The number of replicates to simulate. Defaults to `1e+6`.
-#' @param ... Additional parameters of the delay distribution.
-#'
-#' @return A vector of probabilities corresponding to discrete indices from
-#' `0` to `max`, representing the discretised delay distribution.
-#'
-#' @export
-#'
-#' @examples
-#' censored_delay_pmf(rgen = rgamma, max = 14, shape = 5, rate = 1)
-censored_delay_pmf <- function(rgen, max, n = 1e+6, ...) {
- ## first, simulate exact time of first event (given by day), uniformly
- ## between 0 and 1
- first <- runif(n, min = 0, max = 1)
- ## now, simulate the exact time of the second event
- second <- first + rgen(n, ...)
- ## round down to get the delay in days
- delay <- floor(second)
- ## get vector of counts
- counts <- table(factor(delay, levels = seq(0, max)))
- ## normalise to get pmf
- pmf <- counts / sum(counts)
- ## return
- return(pmf)
-}
diff --git a/R/convolve-with-delay.r b/R/convolve-with-delay.r
deleted file mode 100644
index ffca294..0000000
--- a/R/convolve-with-delay.r
+++ /dev/null
@@ -1,30 +0,0 @@
-#' Convolve a Time Series with a Delay Distribution
-#'
-#' This function convolves a time series with a delay distribution given as a
-#' probability mass function (PMF).
-#'
-#' @param ts A vector of the time series to convolve.
-#' @param delay_pmf The probability mass function of the delay, given as a
-#' vector of probabilities, corresponding to discrete indices 0, 1, 2 of the
-#' discretised delay distribution.
-#'
-#' @return A vector of the convolved time series.
-#'
-#' @export
-#'
-#' @examples
-#' convolve_with_delay(ts = c(10, 14, 10, 10), delay_pmf = c(0.1, 0.6, 0.3))
-convolve_with_delay <- function(ts, delay_pmf) {
- max_delay <- length(delay_pmf) - 1 ## subtract one because zero-indexed
- convolved <- vapply(seq_along(ts), \(i) {
- ## get vector of infections over the possible window of the delay period
- first_index <- max(1, i - max_delay)
- ts_segment <- ts[seq(first_index, i)]
- ## take reverse of pmf and cut if needed
- pmf <- rev(delay_pmf)[seq_len(i - first_index + 1)]
- ## convolve with delay distribution
- ret <- sum(ts_segment * pmf)
- return(ret)
- }, numeric(1))
- return(convolved)
-}
diff --git a/R/geometric-diff-ar.r b/R/geometric-diff-ar.r
deleted file mode 100644
index 1c15032..0000000
--- a/R/geometric-diff-ar.r
+++ /dev/null
@@ -1,31 +0,0 @@
-#' Geometric Differenced Autoregressive Process
-#'
-#' This function generates a geometric differenced autoregressive process.
-#'
-#' @param init The initial value.
-#' @param noise A vector of steps (on the standard normal scale).
-#' @param std The step size of the random walk.
-#' @param damp A damping parameter.
-#'
-#' @return A vector of the generated geometric differenced autoregressive
-#' process.
-#'
-#' @export
-#'
-#' @examples
-#' geometric_diff_ar(init = 1, noise = rnorm(100), phi = 0.1, std = 0.1)
-geometric_diff_ar <- function(init, noise, std, damp) {
- ## number of values: initial and each of the steps
- n <- length(noise) + 1
- ## declare vector
- x <- numeric(n)
- ## initial value
- x[1] <- init
- ## second value (no difference yet available for use)
- x[2] <- x[1] + noise[1] * std
- ## random walk
- for (i in 3:n) {
- x[i] <- x[i - 1] + damp * (x[i - 1] - x[i - 2]) + noise[i - 1] * std
- }
- return(exp(x))
-}
diff --git a/R/geometric-random-walk.r b/R/geometric-random-walk.r
deleted file mode 100644
index b2ec343..0000000
--- a/R/geometric-random-walk.r
+++ /dev/null
@@ -1,27 +0,0 @@
-#' Geometric Random Walk
-#'
-#' This function generates a geometric random walk.
-#'
-#' @param init The initial value.
-#' @param noise A vector of steps (on the standard normal scale).
-#' @param std The step size of the random walk.
-#'
-#' @return A vector of the generated geometric random walk.
-#'
-#' @export
-#'
-#' @examples
-#' geometric_random_walk(init = 1, noise = rnorm(100), std = 0.1)
-geometric_random_walk <- function(init, noise, std) {
- ## number of values: initial and each of the steps
- n <- length(noise) + 1
- ## declare vector
- x <- numeric(n)
- ## initial value
- x[1] <- init
- ## random walk
- for (i in 2:n) {
- x[i] <- x[i - 1] + noise[i - 1] * std
- }
- return(exp(x))
-}
diff --git a/R/make-daily-infections.r b/R/make-daily-infections.r
deleted file mode 100644
index 96158b0..0000000
--- a/R/make-daily-infections.r
+++ /dev/null
@@ -1,33 +0,0 @@
-#' Convert infection times to a daily time series
-#'
-#' @param infection_times A data frame containing a column of infection times
-#'
-#' @return A data frame with columns infection_day and infections, containing
-#' the daily count of infections
-#'
-#' @importFrom dplyr transmute count full_join join_by
-#' @importFrom tidyr expand replace_na
-#'
-#' @export
-#'
-#' @examples
-#' make_daily_infections(infection_times)
-make_daily_infections <- function(infection_times) {
- ## Censor infection times to days
- df <- infection_times |>
- transmute(infection_day = floor(infection_time))
-
- ## Summarise infections as a time series by day
- inf_ts <- df |>
- count(infection_day, name = "infections")
-
- ## fill data frame with zeroes for days without infection
- all_days <- expand(
- df, infection_day = seq(min(infection_day), max(infection_day))
- )
- inf_ts <- all_days |>
- full_join(inf_ts, by = join_by(infection_day)) |>
- replace_na(list(infections = 0))
-
- return(inf_ts)
-}
diff --git a/R/nfidd-stan-tools.r b/R/nfidd-stan-tools.r
deleted file mode 100644
index f1b048a..0000000
--- a/R/nfidd-stan-tools.r
+++ /dev/null
@@ -1,304 +0,0 @@
-#' Get the path to Stan code
-#'
-#' @return A character string with the path to the Stan code
-#'
-#' @family stantools
-#'
-#' @export
-nfidd_stan_path <- function() {
- system.file("stan", package = "nfidd")
-}
-
-#' Count the number of unmatched braces in a line
-#' @noRd
-.unmatched_braces <- function(line) {
- ifelse(
- grepl("{", line, fixed = TRUE),
- length(gregexpr("{", line, fixed = TRUE)), 0
- ) -
- ifelse(
- grepl("}", line, fixed = TRUE),
- length(gregexpr("}", line, fixed = TRUE)), 0
- )
-}
-
-#' Extract function names or content from Stan code
-#'
-#' @param content Character vector containing Stan code
-#'
-#' @param names_only Logical, if TRUE extract function names, otherwise
-#' extract function content.
-#'
-#' @param functions Optional, character vector of function names to extract
-#' content for.
-#' @return Character vector of function names or content
-#' @keywords internal
-.extract_stan_functions <- function(
- content, names_only = FALSE, functions = NULL) {
- def_pattern <- "^(real|vector|matrix|void|int|array\\s*<\\s*(real|vector|matrix|int)\\s*>|tuple\\s*<\\s*.*\\s*>)\\s+" # nolint
- func_pattern <- paste0(
- def_pattern, "(\\w+)\\s*\\("
- )
- func_lines <- grep(func_pattern, content, value = TRUE)
- # remove the func_pattern
- func_lines <- sub(def_pattern, "", func_lines)
- # get the next complete word after the pattern until the first (
- func_names <- sub("\\s*\\(.*$", "", func_lines)
- if (!is.null(functions)) {
- func_names <- intersect(func_names, functions)
- }
- if (names_only) {
- return(func_names)
- } else {
- func_content <- character(0)
- for (func_name in func_names) {
- start_line <- grep(paste0(def_pattern, func_name, "\\("), content)
- if (length(start_line) == 0) next
- end_line <- start_line
- brace_count <- 0
- # Ensure we find the first opening brace
- repeat {
- line <- content[end_line]
- brace_count <- brace_count + .unmatched_braces(line)
- end_line <- end_line + 1
- if (brace_count > 0) break
- }
- # Continue until all braces are closed
- repeat {
- line <- content[end_line]
- brace_count <- brace_count + .unmatched_braces(line)
- if (brace_count == 0) break
- end_line <- end_line + 1
- }
- func_content <- c(
- func_content, paste(content[start_line:end_line], collapse = "\n")
- )
- }
- return(func_content)
- }
-}
-
-#' Get Stan function names from Stan files
-#'
-#' This function reads all Stan files in the specified directory and extracts
-#' the names of all functions defined in those files.
-#'
-#' @param stan_path Character string specifying the path to the directory
-#' containing Stan files. Defaults to the Stan path of the nfidd
-#' package.
-#'
-#' @return A character vector containing unique names of all functions found in
-#' the Stan files.
-#'
-#' @export
-#'
-#' @family stantools
-nfidd_stan_functions <- function(
- stan_path = nfidd::nfidd_stan_path()) {
- stan_files <- list.files(
- file.path(stan_path, "functions"),
- pattern = "\\.stan$", full.names = TRUE,
- recursive = TRUE
- )
- functions <- character(0)
- for (file in stan_files) {
- content <- readLines(file)
- functions <- c(
- functions, .extract_stan_functions(content, names_only = TRUE)
- )
- }
- unique(functions)
-}
-
-#' Get Stan files containing specified functions
-#'
-#' This function retrieves Stan files from a specified directory, optionally
-#' filtering for files that contain specific functions.
-#'
-#' @param functions Character vector of function names to search for. If NULL,
-#' all Stan files are returned.
-#' @inheritParams nfidd_stan_functions
-#'
-#' @return A character vector of file paths to Stan files.
-#'
-#' @export
-#'
-#' @family stantools
-nfidd_stan_function_files <- function(
- functions = NULL,
- stan_path = nfidd::nfidd_stan_path()) {
- # List all Stan files in the directory
- all_files <- list.files(
- file.path(stan_path, "functions"),
- pattern = "\\.stan$",
- full.names = TRUE,
- recursive = TRUE
- )
-
- if (is.null(functions)) {
- return(all_files)
- } else {
- # Initialize an empty vector to store matching files
- matching_files <- character(0)
-
- for (file in all_files) {
- content <- readLines(file)
- extracted_functions <- .extract_stan_functions(content, names_only = TRUE)
-
- if (any(functions %in% extracted_functions)) {
- matching_files <- c(matching_files, file)
- }
- }
-
- # remove the path from the file names
- matching_files <- sub(
- paste0(stan_path, "/"), "", matching_files
- )
- return(matching_files)
- }
-}
-
-#' Load Stan functions as a string
-#'
-#' @param functions Character vector of function names to load. Defaults to all
-#' functions.
-#'
-#' @param stan_path Character string, the path to the Stan code. Defaults to the
-#' path to the Stan code in the nfidd package.
-#'
-#' @param wrap_in_block Logical, whether to wrap the functions in a
-#' `functions{}` block. Default is FALSE.
-#'
-#' @param write_to_file Logical, whether to write the output to a file. Default
-#' is FALSE.
-#'
-#' @param output_file Character string, the path to write the output file if
-#' write_to_file is TRUE. Defaults to "nfidd_functions.stan".
-#'
-#' @return A character string containing the requested Stan functions
-#'
-#' @family stantools
-#'
-#' @export
-nfidd_load_stan_functions <- function(
- functions = NULL, stan_path = nfidd::nfidd_stan_path(),
- wrap_in_block = FALSE, write_to_file = FALSE,
- output_file = "nfidd_functions.stan") {
- stan_files <- list.files(
- file.path(stan_path, "functions"),
- pattern = "\\.stan$", full.names = TRUE,
- recursive = TRUE
- )
- all_content <- character(0)
-
- for (file in stan_files) {
- content <- readLines(file)
- if (is.null(functions)) {
- all_content <- c(all_content, content)
- } else {
- for (func in functions) {
- func_content <- .extract_stan_functions(
- content,
- names_only = FALSE,
- functions = func
- )
- all_content <- c(all_content, func_content)
- }
- }
- }
-
- # Add version comment
- version_comment <- paste(
- "// Stan functions from nfidd version",
- utils::packageVersion("nfidd")
- )
- all_content <- c(version_comment, all_content)
-
- if (wrap_in_block) {
- all_content <- c("functions {", all_content, "}")
- }
-
- result <- paste(all_content, collapse = "\n")
-
- if (write_to_file) {
- writeLines(result, output_file)
- message("Stan functions written to: ", output_file, "\n")
- }
-
- return(result)
-}
-
-#' List Available Stan Models in NFIDD
-#'
-#' This function finds all available Stan models in the NFIDD package and
-#' returns their names without the .stan extension.
-#'
-#' @param stan_path Character string specifying the path to Stan files. Defaults
-#' to the result of `nfidd_stan_path()`.
-#'
-#' @return A character vector of available Stan model names.
-#'
-#' @export
-#'
-#' @examples
-#' nfidd_list_stan_models()
-nfidd_stan_models <- function(
- stan_path = nfidd::nfidd_stan_path()
- ) {
- stan_files <- list.files(
- stan_path,
- pattern = "\\.stan$", full.names = FALSE,
- recursive = FALSE
- )
-
- # Remove .stan extension
- model_names <- tools::file_path_sans_ext(stan_files)
-
- return(model_names)
-}
-
-#' Create a CmdStanModel with NFIDD Stan functions
-#'
-#' This function creates a CmdStanModel object using a specified Stan model from
-#' the NFIDD package and optionally includes additional user-specified Stan
-#' files.
-#'
-#' @param model_name Character string specifying which Stan model to use.
-#' @param include_paths Character vector of paths to include for Stan
-#' compilation. Defaults to the result of `nfidd_stan_path()`.
-#' @param ... Additional arguments passed to cmdstanr::cmdstan_model().
-#'
-#' @return A CmdStanModel object.
-#'
-#' @export
-#'
-#' @family modelhelpers
-#'
-#' @examplesIf requireNamespace("cmdstanr", quietly = TRUE)
-#' if (!is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE))) {
-#' model <- nfidd_cmdstan_model("simple-nowcast", compile = FALSE)
-#' model
-#' }
-nfidd_cmdstan_model <- function(
- model_name,
- include_paths = nfidd::nfidd_stan_path(),
- ...) {
- if (!requireNamespace("cmdstanr", quietly = TRUE)) {
- stop("Package 'cmdstanr' is required but not installed for this function.")
- }
-
- stan_model <- system.file(
- "stan", paste0(model_name, ".stan"),
- package = "nfidd"
- )
-
- if (stan_model == "") {
- stop(sprintf("Model '%s.stan' not found in NFIDD package", model_name))
- }
-
- cmdstanr::cmdstan_model(
- stan_model,
- include_paths = include_paths,
- ...
- )
-}
\ No newline at end of file
diff --git a/R/renewal.r b/R/renewal.r
deleted file mode 100644
index bc29f89..0000000
--- a/R/renewal.r
+++ /dev/null
@@ -1,40 +0,0 @@
-#' Simulate Infections using the Renewal Equation
-#'
-#' This function simulates infections using the renewal equation.
-#'
-#' @param I0 The initial number of infections.
-#' @param R The reproduction number, given as a vector with one entry per time
-#' point.
-#' @param gen_time The generation time distribution, given as a vector with one
-#' entry per day after infection (the first element corresponding to one day
-#' after infection).
-#'
-#' @return A vector of simulated infections over time.
-#'
-#' @export
-#'
-#' @examples
-#' renewal(
-#' I0 = 5,
-#' R = c(rep(3, 4), rep(0.5, 5)),
-#' gen_time = c(0.1, 0.2, 0.3, 0.2, 0.1)
-#' )
-renewal <- function(I0, R, gen_time) {
- ## set the maximum generation time
- max_gen_time <- length(gen_time)
- ## number of time points
- times <- length(R)
- I <- c(I0, rep(0, times)) ## set up vector holding number of infected
- ## iterate over time points
- for (t in 1:times) {
- ## calculate convolution
- first_index <- max(1, t - max_gen_time + 1)
- I_segment <- I[seq(first_index, t)]
- ## iterate over generation times
- ## take reverse of pmf and reverse if needed
- gen_pmf <- rev(gen_time[seq_len(t - first_index + 1)])
- ## convolve infections with generation time
- I[t + 1] <- sum(I_segment * gen_pmf) * R[t]
- }
- return(I[-1]) ## remove I0 from time series
-}
diff --git a/R/simulate_onsets.r b/R/simulate_onsets.r
deleted file mode 100644
index 9f715db..0000000
--- a/R/simulate_onsets.r
+++ /dev/null
@@ -1,70 +0,0 @@
-#' Generate a probability mass function for the generation time
-#'
-#' @param max Maximum delay to consider
-#' @param shape Shape parameter for the gamma distribution
-#' @param rate Rate parameter for the gamma distribution
-#'
-#' @return A vector of probabilities representing the generation time PMF
-#'
-#' @export
-#'
-#' @importFrom stats rgamma
-make_gen_time_pmf <- function(max = 14, shape = 4, rate = 1) {
- pmf <- censored_delay_pmf(rgamma, max = max, shape = shape, rate = rate)
- pmf <- pmf[-1] # remove first element
- pmf <- pmf / sum(pmf) # renormalise
- return(pmf)
-}
-
-#' Generate a probability mass function for the incubation period
-#'
-#' @param max Maximum delay to consider
-#' @param shape Shape parameter for the gamma distribution
-#' @param rate Rate parameter for the gamma distribution
-#'
-#' @return A vector of probabilities representing the incubation period PMF
-#'
-#' @export
-#'
-#' @importFrom stats rgamma
-make_ip_pmf <- function(max = 14, shape = 5, rate = 1) {
- censored_delay_pmf(rgamma, max = max, shape = shape, rate = rate)
-}
-
-#' Simulate symptom onsets from daily infection counts
-#'
-#' @param inf_ts A data frame containing columns infection_day and infections,
-#' as returned by `make_daily_infections()`.
-#'
-#' @param gen_time_pmf A vector of probabilities representing the generation
-#' time PMF, as returned by `make_gen_time_pmf()`.
-#'
-#' @param ip_pmf A vector of probabilities representing the incubation period
-#' PMF, as returned by `make_ip_pmf()`.
-#'
-#' @return A data frame with columns day, onsets, and infections containing
-#' the daily count of symptom onsets and infections
-#'
-#' @importFrom stats rgamma rpois
-#' @importFrom dplyr tibble left_join select
-#' @importFrom tidyr replace_na
-#'
-#' @export
-#'
-#' @examples
-#' gt_pmf <- make_gen_time_pmf()
-#' ip_pmf <- make_ip_pmf()
-#' simulate_onsets(make_daily_infections(infection_times), gt_pmf, ip_pmf)
-simulate_onsets <- function(inf_ts, gen_time_pmf = make_gen_time_pmf(),
- ip_pmf = make_ip_pmf()) {
- onsets <- convolve_with_delay(inf_ts$infections, ip_pmf)
- onsets <- rpois(n = length(onsets), lambda = onsets)
- onset_df <- tibble(day = seq_along(onsets), onsets = onsets) |>
- left_join(
- inf_ts |> select(day = infection_day, infections),
- by = "day"
- ) |>
- replace_na(list(infections = 0))
-
- return(onset_df)
-}
diff --git a/README.Rmd b/README.Rmd
index 9559ab4..d8528d5 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -2,9 +2,9 @@
output: github_document
---
-# Nowcasting and forecasting infectious disease dynamics
+# Understanding, evaluating, and improving forecasts of infectious disease burden,
-This repository contains the material to create the nfidd course page.
+This repository contains the material to create the course page.
All the raw material is in the folder `sessions/` and is written in
`quarto`. Any changes to the quarto files are automatically
diff --git a/_quarto.yml b/_quarto.yml
index a998455..762d063 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -4,9 +4,9 @@ project:
- "*.qmd"
website:
- title: "NFIDD"
+ title: "UEIFID"
site-url: https://nfidd.github.io/nfidd/
- description: "Nowcasting and forecasting infectious disease dynamics"
+ description: "Understanding, evaluating, and improving forecasts of infectious disease burden"
body-footer: ''
open-graph: true
navbar:
diff --git a/data-raw/epicurve.r b/data-raw/epicurve.r
deleted file mode 100644
index 329fe41..0000000
--- a/data-raw/epicurve.r
+++ /dev/null
@@ -1,14 +0,0 @@
-library("epichains")
-library("usethis")
-library("dplyr")
-library("ggplot2")
-
-set.seed(12345)
-infection_times <- simulate_chains(
- index_cases = 1, offspring_dist = rpois, statistic = "size", lambda = 1.5,
- pop = 10000, generation_time = function(n) rgamma(n, shape = 4)
-) |>
- select(infection_time = time) |>
- as.data.frame()
-
-usethis::use_data(infection_times, overwrite = TRUE)
diff --git a/data-raw/generate-example-forecasts.r b/data-raw/generate-example-forecasts.r
deleted file mode 100644
index 9c35a72..0000000
--- a/data-raw/generate-example-forecasts.r
+++ /dev/null
@@ -1,120 +0,0 @@
-library("dplyr")
-library("tidyr")
-library("cmdstanr")
-library("tidybayes")
-library("purrr")
-library("usethis")
-library("nfidd")
-
-set.seed(12345)
-
-# simulate data
-gen_time_pmf <- make_gen_time_pmf()
-ip_pmf <- make_ip_pmf()
-onset_df <- simulate_onsets(
- make_daily_infections(infection_times), gen_time_pmf, ip_pmf
-)
-
-# define a function to fit and forecast for a single date
-forecast_target_day <- function(
- mod, onset_df, target_day, horizon, gen_time_pmf, ip_pmf, data_to_list, ...
-) {
-
- message("Fitting and forecasting for target day: ", target_day)
-
- train_df <- onset_df |>
- filter(day <= target_day)
-
- test_df <- onset_df |>
- filter(day > target_day, day <= target_day + horizon)
-
- data <- data_to_list(train_df, horizon, gen_time_pmf, ip_pmf)
-
- fit <- mod$sample(
- data = data, chains = 4, parallel_chains = 4, adapt_delta = 0.95,
- max_treedepth = 15, iter_warmup = 1000, iter_sampling = 500, ...)
-
- forecast <- fit |>
- gather_draws(forecast[day]) |>
- filter(!is.na(.value)) |>
- slice_head(n = 1000) |>
- mutate(horizon = day) |>
- mutate(day = day + target_day) |>
- mutate(target_day = target_day) |>
- mutate(.draw = 1:n()) |>
- select(-.chain, -.iteration)
- return(forecast)
-}
-
-# define a forecast horizon
-horizon <- 14
-
-target_days <- onset_df |>
- filter(day <= max(day) - horizon) |>
- filter(day > 21) |>
- pull(day)
-
-# create forecasts every 7 days
-target_days <- target_days[seq(1, length(target_days), 7)]
-
-# load the model
-rw_mod <- nfidd_cmdstan_model("estimate-inf-and-r-rw-forecast")
-
-data_to_list_rw <- function(train_df, horizon, gen_time_pmf, ip_pmf) {
- data <- list(
- n = nrow(train_df),
- I0 = 1,
- obs = train_df$onsets,
- gen_time_max = length(gen_time_pmf),
- gen_time_pmf = gen_time_pmf,
- ip_max = length(ip_pmf) - 1,
- ip_pmf = ip_pmf,
- h = horizon
- )
- return(data)
-}
-
-rw_forecasts <- target_days |>
- map_dfr(
- \(x) forecast_target_day(
- rw_mod, onset_df, x, horizon, gen_time_pmf, ip_pmf,
- data_to_list_rw, init = \() list(init_R = 0, rw_sd = 0.01)
- )
- ) |>
- mutate(model = "Random walk")
-
-usethis::use_data(rw_forecasts, overwrite = TRUE)
-
-# AR model forecass
-stat_mod <- nfidd_cmdstan_model("statistical-r")
-
-stat_forecasts <- target_days |>
- map_dfr(
- \(x) forecast_target_day(
- stat_mod, onset_df, x, horizon, gen_time_pmf, ip_pmf, data_to_list_rw,
- init = \() list(init_R = 0, rw_sd = 0.01)
- )
- ) |>
- mutate(model = "Statistical")
-
-usethis::use_data(stat_forecasts, overwrite = TRUE)
-
-# Mechanistic model forecasts
-
-mech_mod <- nfidd_cmdstan_model("mechanistic-r")
-
-data_to_list_mech <- function(train_df, horizon, gen_time_pmf, ip_pmf) {
- data <- data_to_list_rw(train_df, horizon, gen_time_pmf, ip_pmf)
- data$N_prior <- c(10000, 2000)
- return(data)
-}
-
-mech_forecasts <- target_days |>
- map_dfr(
- \(x) forecast_target_day(
- mech_mod, onset_df, x, horizon, gen_time_pmf, ip_pmf, data_to_list_mech
- )
- ) |>
- mutate(model = "Mechanistic")
-
-usethis::use_data(mech_forecasts, overwrite = TRUE)
diff --git a/data/infection_times.rda b/data/infection_times.rda
deleted file mode 100644
index a4d31c5..0000000
Binary files a/data/infection_times.rda and /dev/null differ
diff --git a/data/mech_forecasts.rda b/data/mech_forecasts.rda
deleted file mode 100644
index 6e44a5e..0000000
Binary files a/data/mech_forecasts.rda and /dev/null differ
diff --git a/data/rw_forecasts.rda b/data/rw_forecasts.rda
deleted file mode 100644
index feb7b99..0000000
Binary files a/data/rw_forecasts.rda and /dev/null differ
diff --git a/data/stat_forecasts.rda b/data/stat_forecasts.rda
deleted file mode 100644
index 1d83e5b..0000000
Binary files a/data/stat_forecasts.rda and /dev/null differ
diff --git a/footer.html b/footer.html
index 01d2ee9..1f381bf 100644
--- a/footer.html
+++ b/footer.html
@@ -2,14 +2,10 @@
This web site and the material contained in it were originally created in support of a
short course
- on
- Nowcasting and Forecasting Infectious Disease Dynamics at
- the
- European Summer Program in Infectious Disease Analysis and Modelling run
- at Stochkolm University, Sweden. All material is under
- a MIT
+ on Understanding, evaluating, and improving forecasts of infectious disease burden, at the in Bangkok, November 2024. All material is under
+ a MIT
license. Please report any issues or suggestions for improvement on the
- corresponding GitHub
+ corresponding GitHub
issue tracker. We are always keen to hear about any uses of the material
here, so please do get in touch using the Discussion
board if you have any questions
diff --git a/getting-set-up.qmd b/getting-set-up.qmd
index 7d86e39..4e29911 100644
--- a/getting-set-up.qmd
+++ b/getting-set-up.qmd
@@ -26,34 +26,6 @@ Then you can check that the installation completed successfully by loading the p
library("nfidd")
```
-## Installing `cmdstan`
-
-The course relies on running stan through the `cmdstanr` **R** package, which itself uses the `cmdstan` software.
-This requires a separate installation step:
-
-```{r cmdstan_install, eval = FALSE}
-cmdstanr::install_cmdstan()
-```
-
-::: callout-note
-This may take a few minutes.
-Also you're likely to see lots of warnings and other messages printed to your screen - don't worry, this is normal and doesn't mean there is a problem.
-:::
-
-If there are any problems with this, you can try (on Windows) to fix them using
-
-```{r cmdstan_toolchain, eval = FALSE}
-cmdstanr::check_cmdstan_toolchain(fix = TRUE)
-```
-
-You can test that you have a working `cmdstanr` setup using
-
-```{r cmdstan_test}
-cmdstanr::cmdstan_version()
-```
-
-For more details, and for links to resources in case something goes wrong, see the [Getting Started with CmdStanr](https://mc-stan.org/cmdstanr/articles/cmdstanr.html) vignette of the package.
-
# Accessing the course
To be able to use the code in each session, you will need a local copy of the course material.
@@ -61,10 +33,10 @@ To be able to use the code in each session, you will need a local copy of the co
- Directly download the course material:
::: callout-tip
- [[**Download**]{.underline}](https://github.com/nfidd/nfidd/archive/refs/heads/main.zip)
+ [[**Download**]{.underline}](https://github.com/nfidd/ueifid/archive/refs/heads/main.zip)
:::
-- Alternatively, if you are familiar with git you can clone the [repo](https://github.com/nfidd/nfidd).
+- Alternatively, if you are familiar with git you can clone the [repo](https://github.com/nfidd/ueifid).
### Interacting with the course
@@ -72,13 +44,13 @@ In this course, all content is written using [R Notebooks](https://bookdown.org/
This means that we can combine text with code and see the output directly.
The notebooks are then directly reproduced on the course website (for example, this page).
-To interact with each session in the course, we recommend opening the RStudio Project file (`nfidd.RProj`) in the `nfidd` folder you have just downloaded.
+To interact with each session in the course, we recommend opening the RStudio Project file (`ueifid.RProj`) in the `ueifid` folder you have just downloaded.
Then you can choose to:
- View each session on the website, and copy-paste the code into your own R script.
- Tip: if you hover over each code chunk on the website you can use a "Copy" button at the top right corner.
- Open the R Notebook for each session.
- - Each notebook is saved in `nfidd/sessions/` as a `.qmd` file.
+ - Each notebook is saved in `ueifid/sessions/` as a `.qmd` file.
- Execute code with the single green "play" button at the top-right corner of each code chunk ("Run current chunk"). You can also execute code line-by-line using `Ctrl/Cmd + Enter`.
- We suggest using "Visual" view for a better experience (top-left of the RStudio pane).
diff --git a/index.qmd b/index.qmd
index d1bed31..20f0de2 100644
--- a/index.qmd
+++ b/index.qmd
@@ -1,24 +1,22 @@
---
-title: "Nowcasting and forecasting infectious disease dynamics"
+title: "Understanding, evaluating, and improving forecasts of infectious disease burden"
site: distill::distill_website
comments: false
about:
template: jolla
links:
- icon: chat-fill
- text: Dive into the course
- href: nfidd/sessions/introduction-and-course-overview.html
+ text: Go to the course
+ href: ueifid/sessions/introduction-and-course-overview.html
- icon: github
text: Visit us on GitHub
- href: https://github.com/nfidd/nfidd
+ href: https://github.com/nfidd/ueifid
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
```
-A course and living resource for learning about nowcasting and forecasting infectious disease dynamics.
+This course is a shorter version of a course on [Nowcasting and forecasting infectious disease dynamics](https://nfidd.github.io/nfidd/).
-We invite anyone to use the materials here in their own teaching and learning. For questions and discussion of the course or its content, we welcome all users to the [Discussion board](https://github.com/nfidd/nfidd/discussions). We welcome all forms of [contributions, additions and suggestions](https://github.com/nfidd/nfidd/issues) for improving the materials.
-
-All materials here are provided under the permissive [MIT License](https://github.com/nfidd/nfidd/blob/main/LICENSE).
+All materials here are provided under the permissive [MIT License](https://github.com/nfidd/ueifid/blob/main/LICENSE).
diff --git a/inst/stan/censored-delay-model.stan b/inst/stan/censored-delay-model.stan
deleted file mode 100644
index acebd23..0000000
--- a/inst/stan/censored-delay-model.stan
+++ /dev/null
@@ -1,28 +0,0 @@
-data {
- int n;
- array[n] int onset_to_hosp;
-}
-
-parameters {
- real meanlog;
- real sdlog;
- array[n] real onset_day_time;
- array[n] real hosp_day_time;
-}
-
-transformed parameters {
- array[n] real true_onset_to_hosp;
- for (i in 1:n) {
- true_onset_to_hosp[i] =
- onset_to_hosp[i] + hosp_day_time[i] - onset_day_time[i];
- }
-}
-
-model {
- meanlog ~ normal(0, 10);
- sdlog ~ normal(0, 10) T[0, ];
- onset_day_time ~ uniform(0, 1);
- hosp_day_time ~ uniform(0, 1);
-
- true_onset_to_hosp ~ lognormal(meanlog, sdlog);
-}
diff --git a/inst/stan/coin.stan b/inst/stan/coin.stan
deleted file mode 100644
index bfde5e3..0000000
--- a/inst/stan/coin.stan
+++ /dev/null
@@ -1,15 +0,0 @@
-data {
- int N; // integer, minimum 1
- int x; // integer, minimum 0
-}
-
-parameters {
- real theta; // real, between 0 and 1
-}
-
-model {
- // Uniform prior
- theta ~ uniform(0, 1);
- // Binomial likelihood
- x ~ binomial(N, theta);
-}
diff --git a/inst/stan/estimate-inf-and-r-rw-forecast.stan b/inst/stan/estimate-inf-and-r-rw-forecast.stan
deleted file mode 100644
index 66fbf3f..0000000
--- a/inst/stan/estimate-inf-and-r-rw-forecast.stan
+++ /dev/null
@@ -1,49 +0,0 @@
-functions {
- #include "functions/convolve_with_delay.stan"
- #include "functions/renewal.stan"
- #include "functions/geometric_random_walk.stan"
-}
-
-data {
- int n; // number of days
- int I0; // number initially infected
- array[n] int obs; // observed symptom onsets
- int gen_time_max; // maximum generation time
- array[gen_time_max] real gen_time_pmf; // pmf of generation time distribution
- int ip_max; // max incubation period
- array[ip_max + 1] real ip_pmf;
- int h; // number of days to forecast
-}
-
-transformed data {
- int m = n + h;
-}
-
-parameters {
- real init_R; // initial reproduction number
- array[m-1] real rw_noise; // random walk noise
- real rw_sd; // random walk standard deviation
-}
-
-transformed parameters {
- array[m] real R = geometric_random_walk(init_R, rw_noise, rw_sd);
- array[m] real infections = renewal(I0, R, gen_time_pmf);
- array[m] real onsets = convolve_with_delay(infections, ip_pmf);
-}
-
-model {
- // priors
- init_R ~ normal(-.1, 0.5); // Approximately Normal(1, 0.5)
- rw_noise ~ std_normal();
- rw_sd ~ normal(0, 0.05) T[0,];
- obs ~ poisson(onsets[1:n]);
-}
-
-generated quantities {
- array[h] real forecast;
- if (h > 0) {
- for (i in 1:h) {
- forecast[i] = poisson_rng(onsets[n + i]);
- }
- }
-}
diff --git a/inst/stan/estimate-inf-and-r-rw.stan b/inst/stan/estimate-inf-and-r-rw.stan
deleted file mode 100644
index 8db6868..0000000
--- a/inst/stan/estimate-inf-and-r-rw.stan
+++ /dev/null
@@ -1,35 +0,0 @@
-functions {
- #include "functions/convolve_with_delay.stan"
- #include "functions/renewal.stan"
- #include "functions/geometric_random_walk.stan"
-}
-
-data {
- int n; // number of days
- int I0; // number initially infected
- array[n] int obs; // observed symptom onsets
- int gen_time_max; // maximum generation time
- array[gen_time_max] real gen_time_pmf; // pmf of generation time distribution
- int ip_max; // max incubation period
- array[ip_max + 1] real ip_pmf;
-}
-
-parameters {
- real init_R; // initial reproduction number
- array[n-1] real rw_noise; // random walk noise
- real rw_sd; // random walk standard deviation
-}
-
-transformed parameters {
- array[n] real R = geometric_random_walk(init_R, rw_noise, rw_sd);
- array[n] real infections = renewal(I0, R, gen_time_pmf);
- array[n] real onsets = convolve_with_delay(infections, ip_pmf);
-}
-
-model {
- // priors
- init_R ~ normal(-.1, 0.5); // Approximately Normal(1, 0.5)
- rw_noise ~ std_normal();
- rw_sd ~ normal(0, 0.05) T[0,];
- obs ~ poisson(onsets);
-}
diff --git a/inst/stan/estimate-inf-and-r.stan b/inst/stan/estimate-inf-and-r.stan
deleted file mode 100644
index 0691cd0..0000000
--- a/inst/stan/estimate-inf-and-r.stan
+++ /dev/null
@@ -1,29 +0,0 @@
-functions {
- #include "functions/convolve_with_delay.stan"
- #include "functions/renewal.stan"
-}
-
-data {
- int n; // number of days
- int I0; // number initially infected
- array[n] int obs; // observed symptom onsets
- int gen_time_max; // maximum generation time
- array[gen_time_max] real gen_time_pmf; // pmf of generation time distribution
- int ip_max; // max incubation period
- array[ip_max + 1] real ip_pmf;
-}
-
-parameters {
- array[n] real R;
-}
-
-transformed parameters {
- array[n] real infections = renewal(I0, R, gen_time_pmf);
- array[n] real onsets = convolve_with_delay(infections, ip_pmf);
-}
-
-model {
- // priors
- R ~ normal(1, 1) T[0, ];
- obs ~ poisson(onsets);
-}
diff --git a/inst/stan/estimate-infections.stan b/inst/stan/estimate-infections.stan
deleted file mode 100644
index 11fd78f..0000000
--- a/inst/stan/estimate-infections.stan
+++ /dev/null
@@ -1,25 +0,0 @@
-functions {
- #include "functions/convolve_with_delay.stan"
-}
-
-data {
- int n; // number of time days
- array[n] int obs; // observed onsets
- int ip_max; // max incubation period
- // probability mass function of incubation period distribution (first index zero)
- array[ip_max + 1] real ip_pmf;
-}
-
-parameters {
- array[n] real infections;
-}
-
-transformed parameters {
- array[n] real onsets = convolve_with_delay(infections, ip_pmf);
-}
-
-model {
- // priors
- infections ~ normal(0, 10) T[0, ];
- obs ~ poisson(onsets);
-}
diff --git a/inst/stan/estimate-r.stan b/inst/stan/estimate-r.stan
deleted file mode 100644
index f576326..0000000
--- a/inst/stan/estimate-r.stan
+++ /dev/null
@@ -1,25 +0,0 @@
-functions {
- #include "functions/renewal.stan"
-}
-
-data {
- int n; // number of days
- int I0; // number initially infected
- array[n] int obs; // observed infections
- int gen_time_max; // maximum generation time
- array[gen_time_max] real gen_time_pmf; // pmf of generation time distribution
-}
-
-parameters {
- array[n] real R;
-}
-
-transformed parameters {
- array[n] real infections = renewal(I0, R, gen_time_pmf);
-}
-
-model {
- // priors
- R ~ normal(1, 1) T[0, ];
- obs ~ poisson(infections);
-}
diff --git a/inst/stan/functions/combine_obs_with_predicted_obs_rng.stan b/inst/stan/functions/combine_obs_with_predicted_obs_rng.stan
deleted file mode 100644
index 7ae78be..0000000
--- a/inst/stan/functions/combine_obs_with_predicted_obs_rng.stan
+++ /dev/null
@@ -1,25 +0,0 @@
-array[] int combine_obs_with_predicted_obs_rng(array[] int obs, array[] real complete_onsets_by_report, array[] int P, array[] int p, int d, array[] int D) {
- int n = num_elements(p);
- array[n] int reported_onsets = rep_array(0, n);
- for (i in 1:n) {
- int missing_reports = d - p[i];
- int P_index = 0;
- int D_index = 0;
- if (i != 1) {
- P_index = P[i - 1];
- D_index = D[i - 1];
- }
- if (missing_reports != d) {
- for (j in 1:p[i]) {
- reported_onsets[i] += obs[P_index + j];
- }
- }
- if (missing_reports > 0) {
- for (j in 1:missing_reports) {
- reported_onsets[i] += poisson_rng(complete_onsets_by_report[D_index + p[i] + j]);
- }
- }
- }
- return reported_onsets;
- }
-
\ No newline at end of file
diff --git a/inst/stan/functions/condition_onsets_by_report.stan b/inst/stan/functions/condition_onsets_by_report.stan
deleted file mode 100644
index 4713a4d..0000000
--- a/inst/stan/functions/condition_onsets_by_report.stan
+++ /dev/null
@@ -1,13 +0,0 @@
-array[] real condition_onsets_by_report(array[] real onsets, array[] real delay) {
- // length of time series
- int n = num_elements(onsets);
- // length of delay
- int p = num_elements(delay);
-
- array[n] real onsets_with_delay = onsets;
-
- for (i in 1:min(p, n)) {
- onsets_with_delay[n - i + 1] *= delay[i];
- }
- return onsets_with_delay;
-}
\ No newline at end of file
diff --git a/inst/stan/functions/convolve_with_delay.stan b/inst/stan/functions/convolve_with_delay.stan
deleted file mode 100644
index 5599b5a..0000000
--- a/inst/stan/functions/convolve_with_delay.stan
+++ /dev/null
@@ -1,14 +0,0 @@
-array[] real convolve_with_delay(array[] real ts, array[] real delay) {
- // length of time series
- int n = num_elements(ts);
- int max_delay = num_elements(delay) - 1;
- array[n] real ret;
- for (i in 1:n) {
- int first_index = max(1, i - max_delay);
- int len = i - first_index + 1;
- array[len] real pmf = reverse(delay)[1:len];
- array[i - first_index + 1] real ts_segment = ts[first_index:i];
- ret[i] = dot_product(ts_segment, pmf);
- }
- return(ret);
-}
diff --git a/inst/stan/functions/geometric_diff_ar.stan b/inst/stan/functions/geometric_diff_ar.stan
deleted file mode 100644
index 7ad449e..0000000
--- a/inst/stan/functions/geometric_diff_ar.stan
+++ /dev/null
@@ -1,10 +0,0 @@
-array[] real geometric_diff_ar(real init, array[] real noise, real std, real damp) {
- int n = num_elements(noise) + 1;
- array[n] real x;
- x[1] = init;
- x[2] = init + noise[1] * std;
- for (i in 3:n) {
- x[i] = x[i - 1] + damp * (x[i -1] - x[i- 2]) + noise[i - 1] * std;
- }
- return exp(x);
-}
diff --git a/inst/stan/functions/geometric_random_walk.stan b/inst/stan/functions/geometric_random_walk.stan
deleted file mode 100644
index 3ae4d84..0000000
--- a/inst/stan/functions/geometric_random_walk.stan
+++ /dev/null
@@ -1,9 +0,0 @@
-array[] real geometric_random_walk(real init, array[] real noise, real std) {
- int n = num_elements(noise) + 1;
- array[n] real x;
- x[1] = init;
- for (i in 2:n) {
- x[i] = x[i-1] + noise[i-1] * std;
- }
- return exp(x);
-}
diff --git a/inst/stan/functions/observe_onsets_with_delay.stan b/inst/stan/functions/observe_onsets_with_delay.stan
deleted file mode 100644
index 5d7b7a5..0000000
--- a/inst/stan/functions/observe_onsets_with_delay.stan
+++ /dev/null
@@ -1,17 +0,0 @@
-array[] real observe_onsets_with_delay(array[] real onsets, vector reporting_delay, array[] int P, array[] int p) {
- int n = num_elements(onsets);
- int m = sum(p);
- array[m] real onsets_by_report;
- for (i in 1:n) {
- int obs_index;
- if (i == 1) {
- obs_index = 0;
- } else{
- obs_index = P[i - 1];
- }
- for (j in 1:p[i]) {
- onsets_by_report[obs_index + j] = onsets[i] * reporting_delay[j];
- }
- }
- return onsets_by_report;
-}
diff --git a/inst/stan/functions/pop_bounded_renewal.stan b/inst/stan/functions/pop_bounded_renewal.stan
deleted file mode 100644
index 92c18ee..0000000
--- a/inst/stan/functions/pop_bounded_renewal.stan
+++ /dev/null
@@ -1,16 +0,0 @@
-array[] real pop_bounded_renewal(real I0, real R, array[] real gen_time, real N, int n) {
- int max_gen_time = num_elements(gen_time);
- array[n + 1] real I;
- I[1] = I0;
- real S = N - I0;
-
- for (i in 1:n) {
- int first_index = max(1, i - max_gen_time + 1);
- int len = i - first_index + 1;
- array[len] real I_segment = I[first_index:i];
- array[len] real gen_pmf = reverse(gen_time[1:len]);
- I[i + 1] = S / N * dot_product(I_segment, gen_pmf) * R;
- S = S - I[i + 1];
- }
- return(I[2:(n + 1)]);
-}
diff --git a/inst/stan/functions/renewal.stan b/inst/stan/functions/renewal.stan
deleted file mode 100644
index c1ff7ce..0000000
--- a/inst/stan/functions/renewal.stan
+++ /dev/null
@@ -1,15 +0,0 @@
-array[] real renewal(real I0, array[] real R, array[] real gen_time) {
- // length of time series
- int n = num_elements(R);
- int max_gen_time = num_elements(gen_time);
- array[n + 1] real I;
- I[1] = I0;
- for (i in 1:n) {
- int first_index = max(1, i - max_gen_time + 1);
- int len = i - first_index + 1;
- array[len] real I_segment = I[first_index:i];
- array[len] real gen_pmf = reverse(gen_time[1:len]);
- I[i + 1] = dot_product(I_segment, gen_pmf) * R[i];
- }
- return(I[2:(n + 1)]);
-}
diff --git a/inst/stan/gamma.stan b/inst/stan/gamma.stan
deleted file mode 100644
index 6acd54f..0000000
--- a/inst/stan/gamma.stan
+++ /dev/null
@@ -1,16 +0,0 @@
-// gamma_model.stan
-data {
- int N;
- array[N] real y;
-}
-
-parameters {
- real alpha;
- real beta;
-}
-
-model {
- alpha ~ normal(0, 10) T[0,];
- beta ~ normal(0, 10) T[0,];
- y ~ gamma(alpha, beta);
-}
diff --git a/inst/stan/joint-nowcast-with-r.stan b/inst/stan/joint-nowcast-with-r.stan
deleted file mode 100644
index 7dd11b3..0000000
--- a/inst/stan/joint-nowcast-with-r.stan
+++ /dev/null
@@ -1,56 +0,0 @@
-functions {
- #include "functions/geometric_random_walk.stan"
- #include "functions/renewal.stan"
- #include "functions/convolve_with_delay.stan"
- #include "functions/observe_onsets_with_delay.stan"
- #include "functions/combine_obs_with_predicted_obs_rng.stan"
-}
-
-data {
- int n; // number of days
- int m; // number of reports
- array[n] int p; // number of observations per day
- array[m] int obs; // observed symptom onsets
- int d; // number of reporting delays
- int gen_time_max; // maximum generation time
- array[gen_time_max] real gen_time_pmf; // pmf of generation time distribution
- int ip_max; // max incubation period
- array[ip_max + 1] real ip_pmf;
-}
-
-transformed data{
- array[n] int P = to_int(cumulative_sum(p));
- array[n] int D = to_int(cumulative_sum(rep_array(d, n)));
-}
-
-parameters {
- real init_I; // initial number of infected
- real init_R; // initial reproduction number
- array[n-1] real rw_noise; // random walk noise
- real rw_sd; // random walk standard deviation
- simplex[d] reporting_delay; // reporting delay distribution
-}
-
-transformed parameters {
- array[n] real R = geometric_random_walk(init_R, rw_noise, rw_sd);
- array[n] real infections = renewal(init_I, R, gen_time_pmf);
- array[n] real onsets = convolve_with_delay(infections, ip_pmf);
- array[m] real onsets_by_report = observe_onsets_with_delay(onsets, reporting_delay, P, p);
-}
-
-model {
- // Prior
- init_I ~ lognormal(-1, 1);
- init_R ~ normal(-.1, 0.5); // Approximately Normal(1, 0.5)
- rw_noise ~ std_normal();
- rw_sd ~ normal(0, 0.05) T[0,];
- reporting_delay ~ dirichlet(rep_vector(1, d));
- // Likelihood
- obs ~ poisson(onsets_by_report);
-}
-
-generated quantities {
- array[d*n] real complete_onsets_by_report = observe_onsets_with_delay(onsets, reporting_delay, D, rep_array(d, n));
- array[n] int nowcast = combine_obs_with_predicted_obs_rng(obs, complete_onsets_by_report, P, p, d, D);
-}
-
diff --git a/inst/stan/joint-nowcast.stan b/inst/stan/joint-nowcast.stan
deleted file mode 100644
index b5c91eb..0000000
--- a/inst/stan/joint-nowcast.stan
+++ /dev/null
@@ -1,46 +0,0 @@
-functions {
- #include "functions/geometric_random_walk.stan"
- #include "functions/observe_onsets_with_delay.stan"
- #include "functions/combine_obs_with_predicted_obs_rng.stan"
-}
-
-data {
- int n; // number of days
- int m; // number of reports
- array[n] int p; // number of observations per day
- array[m] int obs; // observed symptom onsets
- int d; // number of reporting delays
-}
-
-transformed data{
- array[n] int P = to_int(cumulative_sum(p));
- array[n] int D = to_int(cumulative_sum(rep_array(d, n)));
-}
-
-parameters {
- real init_onsets;
- array[n-1] real rw_noise;
- real rw_sd;
- simplex[d] reporting_delay; // reporting delay distribution
-}
-
-transformed parameters {
- array[n] real onsets = geometric_random_walk(init_onsets, rw_noise, rw_sd);
- array[m] real onsets_by_report = observe_onsets_with_delay(onsets, reporting_delay, P, p);
-}
-
-model {
- // Prior
- init_onsets ~ normal(1, 1) T[0,];
- rw_noise ~ std_normal();
- rw_sd ~ normal(0, 0.1) T[0,];
- reporting_delay ~ dirichlet(rep_vector(1, d));
- // Likelihood
- obs ~ poisson(onsets_by_report);
-}
-
-generated quantities {
- array[d*n] real complete_onsets_by_report = observe_onsets_with_delay(onsets, reporting_delay, D, rep_array(d, n));
- array[n] int nowcast = combine_obs_with_predicted_obs_rng(obs, complete_onsets_by_report, P, p, d, D);
-}
-
diff --git a/inst/stan/lognormal.stan b/inst/stan/lognormal.stan
deleted file mode 100644
index 58f83e3..0000000
--- a/inst/stan/lognormal.stan
+++ /dev/null
@@ -1,17 +0,0 @@
-// lognormal_model.stan
-data {
- int n; // number of data points
- array[n] real y; // data
-}
-
-parameters {
- real meanlog;
- real sdlog;
-}
-
-model {
- meanlog ~ normal(0, 10); // prior distribution
- sdlog ~ normal(0, 10) T[0, ]; // prior distribution
-
- y ~ lognormal(meanlog, sdlog);
-}
diff --git a/inst/stan/mechanistic-r.stan b/inst/stan/mechanistic-r.stan
deleted file mode 100644
index 076d2b9..0000000
--- a/inst/stan/mechanistic-r.stan
+++ /dev/null
@@ -1,46 +0,0 @@
-functions {
- #include "functions/convolve_with_delay.stan"
- #include "functions/pop_bounded_renewal.stan"
-}
-
-data {
- int n; // number of days
- int I0; // number initially infected
- array[n] int obs; // observed symptom onsets
- int gen_time_max; // maximum generation time
- array[gen_time_max] real gen_time_pmf; // pmf of generation time distribution
- int ip_max; // max incubation period
- array[ip_max + 1] real ip_pmf;
- int h; // number of days to forecast
- array[2] real N_prior; // prior for total population
-}
-
-transformed data {
- int m = n + h;
-}
-
-parameters {
- real R; // initial reproduction number
- real N; // total population
-}
-
-transformed parameters {
- array[m] real infections = pop_bounded_renewal(I0, R, gen_time_pmf, N, m);
- array[m] real onsets = convolve_with_delay(infections, ip_pmf);
-}
-
-model {
- // priors
- R ~ normal(1, 0.5) T[0,];
- N ~ normal(N_prior[1], N_prior[2]) T[0,];
- obs ~ poisson(onsets[1:n]);
-}
-
-generated quantities {
- array[h] real forecast;
- if (h > 0) {
- for (i in 1:h) {
- forecast[i] = poisson_rng(onsets[n + i]);
- }
- }
-}
diff --git a/inst/stan/simple-nowcast-rw.stan b/inst/stan/simple-nowcast-rw.stan
deleted file mode 100644
index 18bdc58..0000000
--- a/inst/stan/simple-nowcast-rw.stan
+++ /dev/null
@@ -1,30 +0,0 @@
-functions {
- #include "functions/geometric_random_walk.stan"
- #include "functions/condition_onsets_by_report.stan"
-}
-
-data {
- int n; // number of days
- array[n] int obs; // observed symptom onsets
- int report_max; // max reporting delay
- array[report_max + 1] real report_cdf;
-}
-
-parameters {
- real init_onsets;
- array[n-1] real rw_noise;
- real rw_sd;
-}
-
-transformed parameters {
- array[n] real onsets = geometric_random_walk(init_onsets, rw_noise, rw_sd);
- array[n] real reported_onsets = condition_onsets_by_report(onsets, report_cdf);
-}
-
-model {
- init_onsets ~ lognormal(0, 1) T[0,];
- rw_noise ~ std_normal();
- rw_sd ~ normal(0, 5) T[0,];
- //Likelihood
- obs ~ poisson(reported_onsets);
-}
diff --git a/inst/stan/simple-nowcast.stan b/inst/stan/simple-nowcast.stan
deleted file mode 100644
index 8e00c99..0000000
--- a/inst/stan/simple-nowcast.stan
+++ /dev/null
@@ -1,24 +0,0 @@
-functions {
- #include "functions/condition_onsets_by_report.stan"
-}
-
-data {
- int n; // number of days
- array[n] int obs; // observed symptom onsets
- int report_max; // max reporting delay
- array[report_max + 1] real report_cdf;
-}
-
-parameters {
- array[n] real onsets;
-}
-
-transformed parameters {
- array[n] real reported_onsets = condition_onsets_by_report(onsets, report_cdf);
-}
-
-model {
- onsets ~ normal(5, 20) T[0,];
- // Likelihood
- obs ~ poisson(reported_onsets);
-}
diff --git a/inst/stan/statistical-r.stan b/inst/stan/statistical-r.stan
deleted file mode 100644
index a90fda6..0000000
--- a/inst/stan/statistical-r.stan
+++ /dev/null
@@ -1,51 +0,0 @@
-functions {
- #include "functions/convolve_with_delay.stan"
- #include "functions/renewal.stan"
- #include "functions/geometric_diff_ar.stan"
-}
-
-data {
- int n; // number of days
- int I0; // number initially infected
- array[n] int obs; // observed symptom onsets
- int gen_time_max; // maximum generation time
- array[gen_time_max] real gen_time_pmf; // pmf of generation time distribution
- int ip_max; // max incubation period
- array[ip_max + 1] real ip_pmf;
- int h; // number of days to forecast
-}
-
-transformed data {
- int m = n + h;
-}
-
-parameters {
- real init_R; // initial reproduction number
- array[m-1] real rw_noise; // random walk noise
- real rw_sd; // random walk standard deviation
- real damp; // damping
-}
-
-transformed parameters {
- array[m] real R = geometric_diff_ar(init_R, rw_noise, rw_sd, damp);
- array[m] real infections = renewal(I0, R, gen_time_pmf);
- array[m] real onsets = convolve_with_delay(infections, ip_pmf);
-}
-
-model {
- // priors
- init_R ~ normal(-.1, 0.5); // Approximately Normal(1, 0.5)
- rw_noise ~ std_normal();
- rw_sd ~ normal(0, 0.01) T[0,];
- damp ~ normal(1, 0.05) T[0, 1];
- obs ~ poisson(onsets[1:n]);
-}
-
-generated quantities {
- array[h] real forecast;
- if (h > 0) {
- for (i in 1:h) {
- forecast[i] = poisson_rng(onsets[n + i]);
- }
- }
-}
diff --git a/inst/stan/truncated-delay-model.stan b/inst/stan/truncated-delay-model.stan
deleted file mode 100644
index 8f13558..0000000
--- a/inst/stan/truncated-delay-model.stan
+++ /dev/null
@@ -1,19 +0,0 @@
-data {
- int n;
- array[n] real onset_to_hosp;
- array[n] real time_since_onset;
-}
-
-parameters {
- real meanlog;
- real sdlog;
-}
-
-model {
- meanlog ~ normal(0, 10);
- sdlog ~ normal(0, 10) T[0, ];
-
- for (i in 1:n) {
- onset_to_hosp[i] ~ lognormal(meanlog, sdlog) T[0, time_since_onset[i]];
- }
-}
diff --git a/learning_objectives.qmd b/learning_objectives.qmd
index 1030ee7..1db341c 100644
--- a/learning_objectives.qmd
+++ b/learning_objectives.qmd
@@ -3,42 +3,6 @@ title: "Independent learning outcomes"
comments: false
---
-## R and statistical concepts used
-
-- familiarity with R concepts used in the course
- - to be completed once the course has been fully written but likely includes functions, accessing documentation, etc.
-- understanding of statistical concepts used in the course
- - to be completed once the course has been fully written but likely includes discrete and continuous probability distributions
-
-## Delay distributions
-
-- understanding of the ubiquity of delays in epidemiological data
-- understanding of how delays affect population-level epidemiological data via discrete convolutions
-- ability to apply convolutions of discrete probability distributions to epidemiological data in R
-
-## Biases in delay distributions
-
-- understanding of how censoring affects the estimation and interpretation of epidemiological delay distributions
-- ability to estimate parameters of probability distributions from observed delays, taking into account censoring, using R
-- understanding of right truncation in epidemiolgical data
-- ability to estimate parameters of probability distributions from observed delays, taking into account truncation, in R
-
-## $R_t$ estimation and the renewal equation
-
-- understanding of the reproduction number and challenges in its estimation
-- awareness of broad categories of methods for estimating the reproduction number, including estimation from population-level data
-- understanding of the renewal equation as an epidemiological model
-- awareness of connections of the renewal equation with other epidemiological models
-- familiarity with the generation time as a particular type of delay distributions
-- ability to estimate static and time-varying reproduction numbers from time-series data in R
-
-## Nowcasting
-
-- understanding of nowcasting as a particular right truncation problem
-- Ability to perform a simple nowcast in R
-- awareness of the breadth of methods to perform nowcasting
-- $R_t$ estimation as a nowcasting problem
-
## Forecasting
- understanding of forecasting as an epidemiological problem, and its relationship with nowcasting and $R_t$ estimation
@@ -47,12 +11,14 @@ comments: false
- ability to use a common forecasting model on an epidemiological time series in R
- ability to use a semi-mechanistic model for forecasting an epidemiological time series in R
+## Evaluating forecasts
+
+- familiarity with metrics for evaluating probabilistic forecasts and their properties
+- ability to score probabilistic forecasts in R
+
## Ensemble models
- understanding of predictive ensembles and their properties
- ability to create a predictive ensemble of forecasts in R
+- understanding the concept of weighted forecast ensembles
-## Evaluating forecasts (and nowcasts)
-
-- familiarity with metrics for evaluating probabilistic forecasts and their properties
-- ability to score probabilistic forecasts in R
diff --git a/man/add_delays.Rd b/man/add_delays.Rd
deleted file mode 100644
index e3f46bd..0000000
--- a/man/add_delays.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/add_delays.r
-\name{add_delays}
-\alias{add_delays}
-\title{Simulate symptom onset and hospitalization times from infection times}
-\usage{
-add_delays(infection_times)
-}
-\arguments{
-\item{infection_times}{A data frame containing a column of infection times}
-}
-\value{
-A data frame with columns for infection time, onset time, and
- hospitalization time (with 70% of hospitalizations set to NA)
-}
-\description{
-Simulate symptom onset and hospitalization times from infection times
-}
-\examples{
-add_delays(infection_times)
-}
diff --git a/man/censored_delay_pmf.Rd b/man/censored_delay_pmf.Rd
deleted file mode 100644
index 92d63ee..0000000
--- a/man/censored_delay_pmf.Rd
+++ /dev/null
@@ -1,29 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/censored-delay-pmf.r
-\name{censored_delay_pmf}
-\alias{censored_delay_pmf}
-\title{Discretise a Continuous Delay Distribution}
-\usage{
-censored_delay_pmf(rgen, max, n = 1e+06, ...)
-}
-\arguments{
-\item{rgen}{A function that generates random delays, e.g., `rgamma`,
-`rlnorm`.}
-
-\item{max}{The maximum delay.}
-
-\item{n}{The number of replicates to simulate. Defaults to `1e+6`.}
-
-\item{...}{Additional parameters of the delay distribution.}
-}
-\value{
-A vector of probabilities corresponding to discrete indices from
- `0` to `max`, representing the discretised delay distribution.
-}
-\description{
-This function discretises a continuous delay distribution into a probability
-mass function (PMF) over discrete days.
-}
-\examples{
-censored_delay_pmf(rgen = rgamma, max = 14, shape = 5, rate = 1)
-}
diff --git a/man/convolve_with_delay.Rd b/man/convolve_with_delay.Rd
deleted file mode 100644
index 7e63e6a..0000000
--- a/man/convolve_with_delay.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/convolve-with-delay.r
-\name{convolve_with_delay}
-\alias{convolve_with_delay}
-\title{Convolve a Time Series with a Delay Distribution}
-\usage{
-convolve_with_delay(ts, delay_pmf)
-}
-\arguments{
-\item{ts}{A vector of the time series to convolve.}
-
-\item{delay_pmf}{The probability mass function of the delay, given as a
-vector of probabilities, corresponding to discrete indices 0, 1, 2 of the
-discretised delay distribution.}
-}
-\value{
-A vector of the convolved time series.
-}
-\description{
-This function convolves a time series with a delay distribution given as a
-probability mass function (PMF).
-}
-\examples{
-convolve_with_delay(ts = c(10, 14, 10, 10), delay_pmf = c(0.1, 0.6, 0.3))
-}
diff --git a/man/dot-extract_stan_functions.Rd b/man/dot-extract_stan_functions.Rd
deleted file mode 100644
index d43411f..0000000
--- a/man/dot-extract_stan_functions.Rd
+++ /dev/null
@@ -1,24 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/nfidd-stan-tools.r
-\name{.extract_stan_functions}
-\alias{.extract_stan_functions}
-\title{Extract function names or content from Stan code}
-\usage{
-.extract_stan_functions(content, names_only = FALSE, functions = NULL)
-}
-\arguments{
-\item{content}{Character vector containing Stan code}
-
-\item{names_only}{Logical, if TRUE extract function names, otherwise
-extract function content.}
-
-\item{functions}{Optional, character vector of function names to extract
-content for.}
-}
-\value{
-Character vector of function names or content
-}
-\description{
-Extract function names or content from Stan code
-}
-\keyword{internal}
diff --git a/man/geometric_diff_ar.Rd b/man/geometric_diff_ar.Rd
deleted file mode 100644
index 647204a..0000000
--- a/man/geometric_diff_ar.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/geometric-diff-ar.r
-\name{geometric_diff_ar}
-\alias{geometric_diff_ar}
-\title{Geometric Differenced Autoregressive Process}
-\usage{
-geometric_diff_ar(init, noise, std, damp)
-}
-\arguments{
-\item{init}{The initial value.}
-
-\item{noise}{A vector of steps (on the standard normal scale).}
-
-\item{std}{The step size of the random walk.}
-
-\item{damp}{A damping parameter.}
-}
-\value{
-A vector of the generated geometric differenced autoregressive
-process.
-}
-\description{
-This function generates a geometric differenced autoregressive process.
-}
-\examples{
-geometric_diff_ar(init = 1, noise = rnorm(100), phi = 0.1, std = 0.1)
-}
diff --git a/man/geometric_random_walk.Rd b/man/geometric_random_walk.Rd
deleted file mode 100644
index cb6bf2c..0000000
--- a/man/geometric_random_walk.Rd
+++ /dev/null
@@ -1,24 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/geometric-random-walk.r
-\name{geometric_random_walk}
-\alias{geometric_random_walk}
-\title{Geometric Random Walk}
-\usage{
-geometric_random_walk(init, noise, std)
-}
-\arguments{
-\item{init}{The initial value.}
-
-\item{noise}{A vector of steps (on the standard normal scale).}
-
-\item{std}{The step size of the random walk.}
-}
-\value{
-A vector of the generated geometric random walk.
-}
-\description{
-This function generates a geometric random walk.
-}
-\examples{
-geometric_random_walk(init = 1, noise = rnorm(100), std = 0.1)
-}
diff --git a/man/make_daily_infections.Rd b/man/make_daily_infections.Rd
deleted file mode 100644
index 0ccfb24..0000000
--- a/man/make_daily_infections.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/make-daily-infections.r
-\name{make_daily_infections}
-\alias{make_daily_infections}
-\title{Convert infection times to a daily time series}
-\usage{
-make_daily_infections(infection_times)
-}
-\arguments{
-\item{infection_times}{A data frame containing a column of infection times}
-}
-\value{
-A data frame with columns infection_day and infections, containing
- the daily count of infections
-}
-\description{
-Convert infection times to a daily time series
-}
-\examples{
-make_daily_infections(infection_times)
-}
diff --git a/man/make_gen_time_pmf.Rd b/man/make_gen_time_pmf.Rd
deleted file mode 100644
index 493d8c1..0000000
--- a/man/make_gen_time_pmf.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/simulate_onsets.r
-\name{make_gen_time_pmf}
-\alias{make_gen_time_pmf}
-\title{Generate a probability mass function for the generation time}
-\usage{
-make_gen_time_pmf(max = 14, shape = 4, rate = 1)
-}
-\arguments{
-\item{max}{Maximum delay to consider}
-
-\item{shape}{Shape parameter for the gamma distribution}
-
-\item{rate}{Rate parameter for the gamma distribution}
-}
-\value{
-A vector of probabilities representing the generation time PMF
-}
-\description{
-Generate a probability mass function for the generation time
-}
diff --git a/man/make_ip_pmf.Rd b/man/make_ip_pmf.Rd
deleted file mode 100644
index 6d85d27..0000000
--- a/man/make_ip_pmf.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/simulate_onsets.r
-\name{make_ip_pmf}
-\alias{make_ip_pmf}
-\title{Generate a probability mass function for the incubation period}
-\usage{
-make_ip_pmf(max = 14, shape = 5, rate = 1)
-}
-\arguments{
-\item{max}{Maximum delay to consider}
-
-\item{shape}{Shape parameter for the gamma distribution}
-
-\item{rate}{Rate parameter for the gamma distribution}
-}
-\value{
-A vector of probabilities representing the incubation period PMF
-}
-\description{
-Generate a probability mass function for the incubation period
-}
diff --git a/man/nfidd_cmdstan_model.Rd b/man/nfidd_cmdstan_model.Rd
deleted file mode 100644
index d255261..0000000
--- a/man/nfidd_cmdstan_model.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/nfidd-stan-tools.r
-\name{nfidd_cmdstan_model}
-\alias{nfidd_cmdstan_model}
-\title{Create a CmdStanModel with NFIDD Stan functions}
-\usage{
-nfidd_cmdstan_model(model_name, include_paths = nfidd::nfidd_stan_path(), ...)
-}
-\arguments{
-\item{model_name}{Character string specifying which Stan model to use.}
-
-\item{include_paths}{Character vector of paths to include for Stan
-compilation. Defaults to the result of `nfidd_stan_path()`.}
-
-\item{...}{Additional arguments passed to cmdstanr::cmdstan_model().}
-}
-\value{
-A CmdStanModel object.
-}
-\description{
-This function creates a CmdStanModel object using a specified Stan model from
-the NFIDD package and optionally includes additional user-specified Stan
-files.
-}
-\examples{
-\dontshow{if (requireNamespace("cmdstanr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-if (!is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE))) {
- model <- nfidd_cmdstan_model("renewal", compile = FALSE)
- model
-}
-\dontshow{\}) # examplesIf}
-}
-\concept{modelhelpers}
diff --git a/man/nfidd_load_stan_functions.Rd b/man/nfidd_load_stan_functions.Rd
deleted file mode 100644
index 6e8ec1b..0000000
--- a/man/nfidd_load_stan_functions.Rd
+++ /dev/null
@@ -1,43 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/nfidd-stan-tools.r
-\name{nfidd_load_stan_functions}
-\alias{nfidd_load_stan_functions}
-\title{Load Stan functions as a string}
-\usage{
-nfidd_load_stan_functions(
- functions = NULL,
- stan_path = nfidd::nfidd_stan_path(),
- wrap_in_block = FALSE,
- write_to_file = FALSE,
- output_file = "nfidd_functions.stan"
-)
-}
-\arguments{
-\item{functions}{Character vector of function names to load. Defaults to all
-functions.}
-
-\item{stan_path}{Character string, the path to the Stan code. Defaults to the
-path to the Stan code in the nfidd package.}
-
-\item{wrap_in_block}{Logical, whether to wrap the functions in a
-`functions{}` block. Default is FALSE.}
-
-\item{write_to_file}{Logical, whether to write the output to a file. Default
-is FALSE.}
-
-\item{output_file}{Character string, the path to write the output file if
-write_to_file is TRUE. Defaults to "nfidd_functions.stan".}
-}
-\value{
-A character string containing the requested Stan functions
-}
-\description{
-Load Stan functions as a string
-}
-\seealso{
-Other stantools:
-\code{\link{nfidd_stan_function_files}()},
-\code{\link{nfidd_stan_functions}()},
-\code{\link{nfidd_stan_path}()}
-}
-\concept{stantools}
diff --git a/man/nfidd_stan_function_files.Rd b/man/nfidd_stan_function_files.Rd
deleted file mode 100644
index 5c16713..0000000
--- a/man/nfidd_stan_function_files.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/nfidd-stan-tools.r
-\name{nfidd_stan_function_files}
-\alias{nfidd_stan_function_files}
-\title{Get Stan files containing specified functions}
-\usage{
-nfidd_stan_function_files(
- functions = NULL,
- stan_path = nfidd::nfidd_stan_path()
-)
-}
-\arguments{
-\item{functions}{Character vector of function names to search for. If NULL,
-all Stan files are returned.}
-
-\item{stan_path}{Character string specifying the path to the directory
-containing Stan files. Defaults to the Stan path of the nfidd
-package.}
-}
-\value{
-A character vector of file paths to Stan files.
-}
-\description{
-This function retrieves Stan files from a specified directory, optionally
-filtering for files that contain specific functions.
-}
-\seealso{
-Other stantools:
-\code{\link{nfidd_load_stan_functions}()},
-\code{\link{nfidd_stan_functions}()},
-\code{\link{nfidd_stan_path}()}
-}
-\concept{stantools}
diff --git a/man/nfidd_stan_functions.Rd b/man/nfidd_stan_functions.Rd
deleted file mode 100644
index e873bb5..0000000
--- a/man/nfidd_stan_functions.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/nfidd-stan-tools.r
-\name{nfidd_stan_functions}
-\alias{nfidd_stan_functions}
-\title{Get Stan function names from Stan files}
-\usage{
-nfidd_stan_functions(stan_path = nfidd::nfidd_stan_path())
-}
-\arguments{
-\item{stan_path}{Character string specifying the path to the directory
-containing Stan files. Defaults to the Stan path of the nfidd
-package.}
-}
-\value{
-A character vector containing unique names of all functions found in
-the Stan files.
-}
-\description{
-This function reads all Stan files in the specified directory and extracts
-the names of all functions defined in those files.
-}
-\seealso{
-Other stantools:
-\code{\link{nfidd_load_stan_functions}()},
-\code{\link{nfidd_stan_function_files}()},
-\code{\link{nfidd_stan_path}()}
-}
-\concept{stantools}
diff --git a/man/nfidd_stan_models.Rd b/man/nfidd_stan_models.Rd
deleted file mode 100644
index 4fe5f17..0000000
--- a/man/nfidd_stan_models.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/nfidd-stan-tools.r
-\name{nfidd_stan_models}
-\alias{nfidd_stan_models}
-\title{List Available Stan Models in NFIDD}
-\usage{
-nfidd_stan_models(stan_path = nfidd::nfidd_stan_path())
-}
-\arguments{
-\item{stan_path}{Character string specifying the path to Stan files. Defaults
-to the result of `nfidd_stan_path()`.}
-}
-\value{
-A character vector of available Stan model names.
-}
-\description{
-This function finds all available Stan models in the NFIDD package and
-returns their names without the .stan extension.
-}
-\examples{
-nfidd_list_stan_models()
-}
diff --git a/man/nfidd_stan_path.Rd b/man/nfidd_stan_path.Rd
deleted file mode 100644
index 6d242bc..0000000
--- a/man/nfidd_stan_path.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/nfidd-stan-tools.r
-\name{nfidd_stan_path}
-\alias{nfidd_stan_path}
-\title{Get the path to Stan code}
-\usage{
-nfidd_stan_path()
-}
-\value{
-A character string with the path to the Stan code
-}
-\description{
-Get the path to Stan code
-}
-\seealso{
-Other stantools:
-\code{\link{nfidd_load_stan_functions}()},
-\code{\link{nfidd_stan_function_files}()},
-\code{\link{nfidd_stan_functions}()}
-}
-\concept{stantools}
diff --git a/man/renewal.Rd b/man/renewal.Rd
deleted file mode 100644
index e7447fa..0000000
--- a/man/renewal.Rd
+++ /dev/null
@@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/renewal.r
-\name{renewal}
-\alias{renewal}
-\title{Simulate Infections using the Renewal Equation}
-\usage{
-renewal(I0, R, gen_time)
-}
-\arguments{
-\item{I0}{The initial number of infections.}
-
-\item{R}{The reproduction number, given as a vector with one entry per time
-point.}
-
-\item{gen_time}{The generation time distribution, given as a vector with one
-entry per day after infection (the first element corresponding to one day
-after infection).}
-}
-\value{
-A vector of simulated infections over time.
-}
-\description{
-This function simulates infections using the renewal equation.
-}
-\examples{
-renewal(
- I0 = 5,
- R = c(rep(3, 4), rep(0.5, 5)),
- gen_time = c(0.1, 0.2, 0.3, 0.2, 0.1)
-)
-}
diff --git a/man/simulate_onsets.Rd b/man/simulate_onsets.Rd
deleted file mode 100644
index 64769d6..0000000
--- a/man/simulate_onsets.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/simulate_onsets.r
-\name{simulate_onsets}
-\alias{simulate_onsets}
-\title{Simulate symptom onsets from daily infection counts}
-\usage{
-simulate_onsets(
- inf_ts,
- gen_time_pmf = make_gen_time_pmf(),
- ip_pmf = make_ip_pmf()
-)
-}
-\arguments{
-\item{inf_ts}{A data frame containing columns infection_day and infections,
-as returned by `make_daily_infections()`.}
-
-\item{gen_time_pmf}{A vector of probabilities representing the generation
-time PMF, as returned by `make_gen_time_pmf()`.}
-
-\item{ip_pmf}{A vector of probabilities representing the incubation period
-PMF, as returned by `make_ip_pmf()`.}
-}
-\value{
-A data frame with columns day, onsets, and infections containing
- the daily count of symptom onsets and infections
-}
-\description{
-Simulate symptom onsets from daily infection counts
-}
-\examples{
-gt_pmf <- make_gen_time_pmf()
-ip_pmf <- make_ip_pmf()
-simulate_onsets(make_daily_infections(infection_times), gt_pmf, ip_pmf)
-}
diff --git a/sessions.qmd b/sessions.qmd
index eef7a29..12306df 100644
--- a/sessions.qmd
+++ b/sessions.qmd
@@ -5,106 +5,39 @@ comments: false
## Session 0: Introduction and course overview
-Monday June 24: 9.00-9.30
+13:30-13:35
-- Introduction to the course and the instructors (10 mins)
-- Motivating the course: From an epidemiological line list to informing decisions in real-time (20 mins)
+- Introduction to the course and the instructors (5 mins)
-## Session 1: R, Stan, and statistical concept background
+## Session 1: Forecasting
-Monday June 24: 9.30-10.30
+13:35-14:10
-- Introduction to statistical concepts used in the course (15 mins)
-- Introduction to stan concepts used in the course (15 mins)
-- Practice session: introduction to estimation in stan (30 mins)
+- Introduction to forecasting as an epidemiological problem (10 mins)
+- Practice session: Visualising infectious disease forecasts (25 minutes)
-## Session 2: Delay distributions
+## Session 2: Forecast evaluation
-Monday June 24: 11.00-11.45
-
-- Introduction to epidemiological delays and how to represent them with probability distributions (10 mins)
-- Practice session: simulate and estimate epidemiological delays (30 mins)
-- Wrap up (5 mins)
-
-## Session 3: Biases in delay distributions
-
-Monday June 24: 11.45-12.30 and 14.00-14.45
-
-- Introduction to biases in delay distributions (10 mins)
-- Practice session: Simulating biases in delay distributions and estimating delays without adjustment on these data (35 mins)
-- Practice session: estimating delay distributions with adjustments for bias (35 mins)
-- Wrap up (10 mins)
-
-## Session 4: Using delay distributions to model the data generating process of an epidemic
-
-Monday June 24: 14.45-15.30
-
-- Using delay distributions to model the data generating process of an epidemic (15 mins)
-- Practice session: implementing a convolution model and identifying potential problems (30 mins)
-
-## Session 5: $R_t$ estimation and the renewal equation
-
-Monday June 24: 16.00-17.30
-
-- Introduction to the time-varying reproduction number (10 mins)
-- Practice session: using the renewal equation to estimate R (35 mins)
-- Practice session: combining $R_t$ estimation with delay distribution convolutions (35 mins)
-- Wrap up (10 mins)
-
-## Session 6: Nowcasting concepts
-
-Tuesday June 25: 9.00-10.30
-
-- Introduction to nowcasting as a right-truncation problem (10 mins)
-- Practice session: Simulating the delay distribution (35 mins)
-- Practice session: Nowcasting using pre-estimated delay distributions (35 mins)
-- Wrap up (10 mins)
-
-## Session 7: Nowcasting with an unknown reporting delay
-
-Tuesday June 25: 11.00-12.30
-
-- Introduction to joint estimation of delays and nowcasts (10 mins)
-- Practice session: Joint estimation of delays and nowcasts (35 mins)
-- Practice session: Joint estimation of delays, nowcasts and reproduction numbers (35 mins)
-- Wrap up (10 mins)
-
-## Session 8: Forecasting concepts
-
-Tuesday June 25: 14.00-15.30
-
-- Introduction to forecasting as an epidemiological problem, and its relationship with nowcasting and $R_t$ estimation (10 mins)
-- Practice session: extending a model into the future (35 minutes)
-- Practice session: evaluate your forecast (35 mins)
-- Wrap up (10 mins)
-
-## Session 9: Forecasting models
-
-Tuesday June 25: 16.00-17.30
+14:10-15:20
- An overview of forecasting models (10 mins)
- Practice session: Evaluating forecasts from a range of models (60 mins)
-- Wrap up and discussion (20 mins)
-## Session 10: Examples of available tools
+15:30-15:50
-Tuesday June 25: 9.00-10.30
-
-- Introduction to tools for $R_t$ estimation, nowcasting and forecasting (5 mins)
-- Group work: explore a tool and relate it to key concepts in the course (30 mins)
-- Discussion (15 mins)
-- Individual exploration of all packages (35 mins)
-- Wrap up (5 mins)
+- Wrap up and discussion (20 mins)
-## Session 11: Methods in the real world
+## Session 3: Forecast ensembles
-Wednesday June 26: 11.00-12.00
+15:50-17:10
-- Presentations and Q&A on uses of nowcasts & forecasts in the real world (60 mins)
+- Introduction to forecast ensembles (10 mins)
+- Practice session: Creating forecast ensembles (60 mins)
+- Wrap up (10 mins)
-## Session 12: End of course summary
+## Session 4: End of course summary
-Wednesday June 26: 12.00-12.30
+17:10-17:30
- Summary of the course (10 mins)
-- Final discussion and closing (20 mins)
+- Final discussion and closing (10 mins)
diff --git a/sessions/R-Stan-and-statistical-concepts.qmd b/sessions/R-Stan-and-statistical-concepts.qmd
deleted file mode 100644
index d80542f..0000000
--- a/sessions/R-Stan-and-statistical-concepts.qmd
+++ /dev/null
@@ -1,208 +0,0 @@
----
-title: "Probability distributions and parameter estimation"
-order: 1
----
-
-# Introduction
-
-Many important characteristics, or parameters, of epidemiological processes are not fully observed - and are therefore uncertain. For example, in this course this might include time delays, reproduction numbers, or case numbers now and in the future. We can specify the shape of uncertainty around a specific parameter using a probability distribution.
-
-We'll want to correctly specify this uncertainty. We can do this best by combining our own prior understanding with the data that we have available. In this course, we'll use this Bayesian approach to modelling. A useful tool for creating these models is the `stan` probabilistic programming language.
-
-## Slides
-
-- [Introduction to statistical concepts used in the course](slides/introduction-to-statistical-concepts)
-- [Introduction to stan concepts used in the course](slides/introduction-to-stan)
-
-## Objectives
-
-The aim of this session is to introduce the concept of probability distributions and how to estimate their parameters using Bayesian inference with `stan`.
-
-::: {.callout-note collapse="true"}
-
-# Setup
-
-## Source file
-
-The source file of this session is located at `sessions/R-Stan-and-statistical-concepts.qmd`.
-
-## Libraries used
-
-In this session we will use the `ggplot2` library for plotting and the `nfidd` package to load the stan models.
-We will also use the `bayesplot` and `posterior` packages to investigate the results of the inference conducted with stan.
-
-```{r libraries, message = FALSE}
-library("nfidd")
-library("ggplot2")
-library("bayesplot")
-library("posterior")
-```
-
-::: {.callout-tip}
-The best way to interact with the material is via the [Visual Editor](https://docs.posit.co/ide/user/ide/guide/documents/visual-editor.html) of RStudio.
-:::
-
-## Initialisation
-
-We set a random seed for reproducibility.
-Setting this ensures that you should get exactly the same results on your computer as we do.
-We also set an option that makes `cmdstanr` show line numbers when printing model code.
-This is not strictly necessary but will help us talk about the models.
-
-```{r}
-set.seed(123)
-options(cmdstanr_print_line_numbers = TRUE)
-```
-
-:::
-
-# Simulating data from a probability distribution
-
-First let us simulate some data from a probability distribution.
-In R, this is usually done using a family of random number generation functions that start with `r`.
-For example, to simulate random numbers from a normal distribution you would use the `rnorm()` function.
-All these functions have a first argument `n`, the number of random replicates to generate, and then some further arguments that parameterise the probability distribution.
-
-```{r rnorm}
-rnorm(n = 10, mean = 0, sd = 1)
-```
-
-We will use probability distributions to characterise epidemiological delays.
-These are usually strictly positive because e.g. one cannot develop symptoms before becoming infected, and therefore the incubation period cannot be less than zero.
-Note that this does not necessarily apply to all distributions, e.g. serial intervals can be negative if person X infects Y but Y develops symptoms first.
-
-Probability distribution that are commonly used in this situation are the gamma or lognormal distributions.
-These are fairly similar, with one difference being that the lognormal commonly has a "heavier tail" on the right, i.e., gives more probability to occasional very large values, whereas the gamma distribution has more of a tail on the left, i.e., gives more probability to values lower than the mean.
-
-The gamma distribution is characterised by the `shape` ($\alpha$) and `rate` ($\beta$) parameters, with a mean of $\alpha/\beta$ and variance $\alpha/\beta^2$.
-A gamma distribution with mean 5 and variance 2, for example, has $\alpha = 12.5$ and $\beta = 2.5$.
-To simulate from such a distribution, we can use the following R code.
-
-```{r gammas}
-### simulate gamma with mean 5, variance 2
-gammas <- rgamma(100, shape = 12.5, rate = 2.5)
-head(gammas)
-mean(gammas)
-var(gammas)
-```
-
-The lognormal distribution is characterised by the `meanlog` ($\mu$) and `sdlog` ($\sigma$) parameters, with a mean of $e^{\mu + 0.5\sigma^2}$ and variance $(e^{\sigma^2} - 1) e^{2\mu + \sigma^2}$.
-A lognormal distribution with mean 5 and variance 2, for example, has (after a bit of calculation) $\mu = 1.57$ and $\sigma = 0.28$.
-
-```{r lognormals}
-### simulate lognormals with mean 5, variance 2
-lognormals <- rlnorm(100, meanlog = 1.57, sdlog = 0.28)
-head(lognormals)
-mean(lognormals)
-var(lognormals)
-```
-
-We can now plot the two distributions.
-
-```{r plot_randoms}
-df <- rbind(
- data.frame(dist = "lognormal", randoms = lognormals),
- data.frame(dist = "gamma", randoms = gammas)
-)
-ggplot(df, aes(x = randoms, fill = dist)) +
- geom_density(alpha = 0.5)
-```
-
-We have used `geom_density()` to get smooth lines.
-Alternatively we could have used, e.g., `geom_histogram()` to plot the raw data.
-
-## Estimating the parameters of probability distributions
-
-We will now use stan to estimate the parameters of the probability distribution.
-To do so, we first load in the stan model.
-Normally, you would do this by saving the model code in a text file called `gamma_model.stan` and then loading this using `cmdstanr::cmdstan_model()`.
-For this course, we included all the models in the `nfidd` package, and they can be accessed using `nfidd_cmdstan_model()` which uses `cmdstanr::cmdstan_model()` to load the specified model included with the package.
-Afterwards, we can use this model exactly how we would if we had loaded it using `cmdstanr` directly.
-
-```{r load_gamma_model}
-### load gamma model from the nfidd package
-mod <- nfidd_cmdstan_model("gamma")
-### show model code
-mod
-```
-
-::: {.callout-tip}
-### Arrays in stan
-On line 4 there is a data declaration starting with `array[n]`.
-This declares an array of size `n` of the type given afterwards (here: `real`).
-Arrays work in a similar way as arrays or vectors in R and its elements be accessed with the bracket operator `[`.
-For example, to get the third element of the array `y` you would write `y[3]`.
-:::
-
-::: {.callout-tip}
-### Take 5 minutes
-Familarise yourself with the model above.
-Do you understand all the lines?
-Which line(s) define the parameter prior distribution(s), which one(s) the likelihood, and which one(s) the data that has to be supplied to the model?
-:::
-
-::: {.callout-note collapse="true"}
-### Solution
-Lines 13 and 14 define the parametric prior distributions (for parameters alpha and beta).
-Line 15 defines the likelihood.
-Lines 3 and 4 define the data that has to be supplied to the model.
-:::
-
-We use the model we have defined in conjunction with the gamma distributed random numbers generated earlier to see if we can recover the parameters of the gamma distribution used.
-Once you have familiarised yourself with the model, use the `sample()` function to fit the model.
-
-```{r gamma_inference, results='hide', message=FALSE}
-stan_data <- list(
- N = length(gammas),
- y = gammas
-)
-gamma_fit <- mod$sample(data = stan_data)
-```
-
-::: {.callout-note}
-### Passing data to the stan model
-The `stan_data` object is a list with elements that will is passed to the stan model as the `data` argument to `sample`.
-The names and types of the elements need to correspond to the data block in the model (see lines 3 and 4 in the model).
-Here, we pass the length of `gammas` as `N` and the vector `gammas` itself as `y`.
-:::
-
-::: {.callout-note}
-### Stan messages
-The`mod$sample` command will produce a lot of messages which we have suppressed above.
-This is fine and intended to keep the user informed about any issues as well as general progress with the inference.
-This will come in handy later in the course when we fit more complicated models that can take a little while to run.
-:::
-
-In order to view a summary of the posterior samples generated, use
-
-```{r gamma_summary}
-gamma_fit
-```
-
-You can see that the estimates are broadly consistent with the parameters we specified.
-To investigate this further, we will conduct a so-called *posterior predictive check* by comparing random numbers simulated using the estimated parameters to the ones we simulated earlier.
-
-```{r gamma_ppc}
-## Extract posterior draws
-gamma_posterior <- as_draws_df(gamma_fit$draws())
-head(gamma_posterior)
-
-## Generate posterior predictive samples
-gamma_ppc <- sapply(seq_along(gammas), function(i) {
- rgamma(n = length(gammas),
- shape = gamma_posterior$alpha[i],
- rate = gamma_posterior$beta[i])
-})
-
-## Plot posterior predictive check
-ppc_dens_overlay(y = gammas, yrep = gamma_ppc)
-```
-
-We can see that the random numbers generated from the posterior samples are distributed relatively evenly around the data (in black), i.e., the samples generated earlier that we fitted to.
-
-## Going further
-
-- For the model above we chose truncated normal priors with a mode at 0 and standard deviation 10. If you change the parameters of the prior distributions, does it affect the results?
-- You could try the model included in `lognormal.stan` to estimate parameters of the lognormal distribution.
-
-## Wrap up
diff --git a/sessions/R-estimation-and-the-renewal-equation.qmd b/sessions/R-estimation-and-the-renewal-equation.qmd
deleted file mode 100644
index 81bf507..0000000
--- a/sessions/R-estimation-and-the-renewal-equation.qmd
+++ /dev/null
@@ -1,497 +0,0 @@
----
-title: "$R_t$ estimation and the renewal equation"
-order: 5
----
-
-# Introduction
-
-In the last session we used the idea of convolutions as a way to interpret individual time delays at a population level. In that session, we linked symptom onsets back to infections. Now we want to link infections themselves together over time, knowing that current infections were infected by past infections. Correctly capturing this transmission process is crucial to modelling infections in the present and future.
-
-## Slides
-
-- [Introduction to the reproduction number](slides/introduction-to-reproduction-number)
-
-## Objectives
-
-The aim of this session is to introduce the renewal equation as an infection generating process, and to show how it can be used to estimate a time-varying reproduction number.
-
-::: {.callout-note collapse="true"}
-
-# Setup
-
-## Source file
-
-The source file of this session is located at `sessions/R-estimation-and-the-renewal-equation.qmd`.
-
-## Libraries used
-
-In this session we will use the `nfidd` package to load a data set of infection times and access stan models and helper functions, the `dplyr` and `tidyr` packages for data wrangling, `ggplot2` library for plotting, and the `tidybayes` package for extracting results of the inference.
-
-```{r libraries, message = FALSE}
-library("nfidd")
-library("dplyr")
-library("tidyr")
-library("ggplot2")
-library("tidybayes")
-```
-
-::: {.callout-tip}
-The best way to interact with the material is via the [Visual Editor](https://docs.posit.co/ide/user/ide/guide/documents/visual-editor.html) of RStudio.
-:::
-
-## Initialisation
-
-We set a random seed for reproducibility.
-Setting this ensures that you should get exactly the same results on your computer as we do.
-We also set an option that makes `cmdstanr` show line numbers when printing model code.
-This is not strictly necessary but will help us talk about the models.
-
-```{r}
-set.seed(123)
-options(cmdstanr_print_line_numbers = TRUE)
-```
-
-:::
-
-# The renewal equation as a process model for infectious diseases
-
-In this session we introduce modelling the infection process itself, in addition to modelling observation processes.
-
-Recall that in the [session on convolutions](using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic#estimating-a-time-series-of-infections) we tried to estimate the number of infections.
-In doing so we assumed that infections every day were independently identically distributed and determined only by the number of symptom onsets that they caused.
-In reality, however, we know that infections are not independent.
-Because infection is dependent on a pathogen being transmitted from one individual to another, we expect infections on any day to depend on existing infections, that is the number of individuals that became infectious in the recent past.
-We now express this relationship via the renewal equation, which links these recent infections to the number of new infections expected on any day via the reproduction number $R_t$.
-
-Remember that this is a more general concept than the _basic_ reproduction number $R_0$ which represents the average number of secondary infections caused by a single infectious individual in a completely susceptible population.
-
-The reproduction number $R_t$ (sometimes called the _effective_ reproduction number) more generally describes the average number of secondary infections caused by a single infectious individual and can vary in time and space as a function of differences in population level susceptibility, changes in behaviour, policy, seasonality etc.
-
-In most mechanistic models of infectious diseases (starting with the simplest SIR model), $R_t$ arises out of a combination of parameters and variables representing the system state, for example in a simple SIR model it can be calculated as $R_0 S/N$ where $S$ is the current number of susceptibles in the population of size $N$.
-By fitting such models to data it is then possible to calculate the value of $R_t$ at any point in time.
-
-The _renewal equation_ represents a more general model which includes the SIR model as a special case.
-In its basic form it makes no assumption about the specific processes that cause $R_t$ to have a certain value and/or change over time, but instead it only relates the number of infected people in the population, the current value of the reproduction number and a delay distribution that represents the timings of when individuals infect others relative to when they themselves became infected, the so-called generation time.
-Mathematically, it can be written as
-
-$$
-I_t = R_t \sum_{i=1}^{g_\mathrm{max}} I_{t-i} g_i
-$$
-
-Here, $I_t$ is the number of infected individuals on day $t$, $R_t$ is the current value of the reproduction number and $g_i$ is the probability of a secondary infection occurring $i$ days after the infector became infected themselves, with a maximum $g_\mathrm{max}$.
-Remembering the [session on convolutions](using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic#delay-distributions-and-convolutions) you will be able to identify that the renewal equation represents a convolution of the infection time series with itself, with the delay distribution given by $g_i$ and $R_t$ representing a scaling that is being applied.
-
-::: {.callout-tip}
-## Discrete vs. continuous renewal equation
-The equation shown above represents the discrete version of the reproduction number.
-Similar to discussions in the [session on convolutions](using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic#delay-distributions-and-convolutions) this can be interpreted as a discretised version of a continuous one where the sum is replaced by an integral and the generation time distribution is continuous.
-Note that in the discrete version we have started the sum at 1 and thus set $g_0=0$ which will make calculations easier.
-:::
-
-::: {.callout-tip}
-## Instantaneous vs. case reproduction number
-There are different definitions of the reproduction number that can be applied to the situation where it changes in time.
-As it is defined above it is also called the _instantaneous_ reproduction number because any change affects all currently infectious individuals instantaneously.
-Another example of a definition is the _case_ reproduction number, where changes affect individuals at the time that they are infected but then they have a constant reproduction number throughout their infectious period.
-:::
-
-::: {.callout-tip}
-## Stochastic vs. deterministic renewal equation
-The version of the discrete renewal equation we wrote above is deterministic, i.e. knowing the number of infections up to a certain time point and the reproduction number we can work out exactly how many new infections we will see.
-Sometimes stochasticity is added where the equation above gives the _expectation_ of $I_t$ but there exists random variation around it.
-In this course we will only deal with the deterministic renewal equation.
-:::
-
-# Simulating an epidemic using the renewal equation
-
-With the theory out of the way we now turn to simulating an epidemic using the renewal equation.
-We can use function included in the `nfidd` package to simulate the epidemic using the discrete renewal equation.
-
-```{r renewal_equation}
-renewal
-```
-
-::: {.callout-note}
-## Take 10 minutes
-Try to understand the `renewal()` function above.
-Compare it to the `convolve_with_delay()` function from the [session on convolutions](using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic#delay-distributions-and-convolutions).
-How are they similar?
-Can you explain the key differences between the two?
-Try calling the function with a few different probability distributions and parameters.
-What kind of behaviours do you see depending on the values you put in?
-:::
-
-# Estimating $R_t$ from a time series of infections
-
-We now return to the time series of infections we used in the [session on convolutions](using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic#delay-distributions-and-convolutions).
-
-```{r load_ts}
-inf_ts <- make_daily_infections(infection_times)
-head(inf_ts)
-```
-
-This creates the `inf_ts` data set which we can look at e.g. using
-
-```{r load_ts_inspect}
-head(inf_ts)
-```
-
-We use a renewal equation model in _stan_ to estimate the effective reproduction number throughout the outbreak.
-We assume that the generation time is gamma-distributed with mean 4 and standard deviation 2, with a maximum of 2 weeks (14 days).
-From this we can calculate that the parameters of the distribution are shape 4 and rate 1.
-We can use the `censored_delay_pmf()` function defined in the [session on convolutions](using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic#delay-distributions-and-convolutions) to use this continuous distribution with the discrete renewal equation.
-
-To approximate the generation time PMF using random draws from the underlying continuous distribution use
-```{r gen_time_pmf}
-gen_time_pmf <- censored_delay_pmf(rgamma, max = 14, shape = 4, rate = 1)
-```
-
-The discrete renewal equation is only valid for generation times greater than 0 so we remove the first element of the pmf and re-normalise:
-
-```{r gen_time_renorm}
-gen_time_pmf <- gen_time_pmf[-1] ## remove first element
-gen_time_pmf <- gen_time_pmf / sum(gen_time_pmf) ## renormalise
-```
-
-As always we first load the stan model and spend some time trying to understand it.
-
-```{r stan_estimate_r}
-r_mod <- nfidd_cmdstan_model("estimate-r")
-r_mod
-```
-
-::: {.callout-tip}
-## Take 5 minutes
-Familiarise yourself with the model above.
-Again there is a `functions` block at the beginning of the model (lines 1-3), where we load a function called `renewal()` (line 2) from a file of the same name which can be found in the subdirectory `functions` of the `stan` directory or [viewed on the github repo](https://github.com/nfidd/nfidd/blob/main/stan/functions/renewal.stan).
-The functions correspond exactly to our earlier **R** function of the same name.
-Later, this function is called in the `model` block, to generate the time series of infections using the discretised renewal model (line 19).
-Which line defines priors, and which the likelihood?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-Line 24 defines the prior distribution of $R_t$ at each time point, and Line 25 defines the likelihood using Poisson observation uncertainty.
-:::
-
-Once again we can generate estimates from this model:
-
-```{r r_fit, results = 'hide', message = FALSE}
-data <- list(
- n = nrow(inf_ts) - 1,
- obs = inf_ts$infections[-1],
- I0 = inf_ts$infections[1],
- gen_time_max = length(gen_time_pmf),
- gen_time_pmf = gen_time_pmf
-)
-r_fit <- r_mod$sample(data = data, parallel_chains = 4)
-```
-
-```{r r_fit_summary}
-r_fit
-```
-
-Once stan has run its chains, we can visualise the posterior estimates.
-
-```{r r_plot}
-# Extract posterior draws
-r_posterior <- r_fit |>
- gather_draws(R[infection_day]) |>
- ungroup() |>
- mutate(infection_day = infection_day - 1) |>
- filter(.draw %in% sample(.draw, 100))
-
-ggplot(
- data = r_posterior,
- aes(x = infection_day, y = .value, group = .draw)) +
- geom_line(alpha = 0.1) +
- labs(title = "Estimated Rt",
- subtitle = "Model 1: renewal equation from infections")
-```
-
-::: {.callout-tip}
-## Take 10 minutes
-Simulate from the renewal equation using the `renewal()` R function we defined above with a given $R_t$ trajectory.
-For example, you could look at $R_t$ increasing steadily, or suddenly, or having any other trajectory you might imagine.
-Use the stan model to infer the trajectory of the reproduction number from the resulting time series of infection.
-Does the model reproduce the simulated $R_t$ trajectories?
-:::
-
-# Estimating $R_t$ from a time series of symptom onsets
-
-Epidemiological data is rarely, perhaps never, available as a time series of infection events.
-Instead, we usually observe outcomes such as symptom onsets when individuals interact with the health system, e.g. by presenting to a hospital.
-In the [session on convolutions](using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic#delay-distributions-and-convolutions) we simulated symptom onsets from a time series of infections by convolving with a delay and then sampling from a Poisson distribution:
-For this we used the `convolved_with_delay()` function.
-
-We then simulate observations again using:
-
-```{r generate_obs}
-ip_pmf <- censored_delay_pmf(rgamma, max = 14, shape = 5, rate = 1)
-onsets <- convolve_with_delay(inf_ts$infections, ip_pmf)
-obs <- rpois(length(onsets), onsets)
-```
-
-We now add this to our renewal equation model and use this to estimate infections as well as the reproduction number:
-
-```{r stan_estimate_inf_and_r}
-r_inf_mod <- nfidd_cmdstan_model("estimate-inf-and-r")
-r_inf_mod
-```
-::: {.callout-tip}
-## Take 5 minutes
-Familiarise yourself with the model above.
-Compare it to the model used earlier in this session, and the one used in the [session on convolutions](using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic#estimating-a-time-series-of-infections).
-Does this model have more parameters?
-How do the assumptions about the infections time series differ between the models?
-:::
-
-We then generate estimates from this model:
-
-```{r r_inf_fit, results = 'hide', message = FALSE}
-data <- list(
- n = length(obs) - 1,
- obs = obs[-1],
- I0 = inf_ts$infections[1],
- gen_time_max = length(gen_time_pmf),
- gen_time_pmf = gen_time_pmf,
- ip_max = length(ip_pmf) - 1,
- ip_pmf = ip_pmf
-)
-r_inf_fit <- r_inf_mod$sample(
- data = data, parallel_chains = 4, init = \() list(init_R = 0)
-)
-```
-
-::: {.callout-note}
-Generally one should start MCMC samplers with multiple different starting values to make sure the whole posterior distribution is explored.
-When testing this model we noticed that sometimes the model failed to fit the data at all.
-Because of this we added an argument to start sampling with `init_R` set to 0.
-This makes sure the sampler starts fitting the model from a sensible value and avoids fitting failiures.
-:::
-
-```{r r_inf_fit_summary}
-r_inf_fit
-```
-
-This time we can extract both the `infections` and `R` variables by infection day.
-
-```{r r_inf_posterior}
-r_inf_posteriors <- r_inf_fit |>
- gather_draws(infections[infection_day], R[infection_day]) |>
- ungroup() |>
- mutate(infection_day = infection_day - 1) |>
- filter(.draw %in% sample(.draw, 100))
-```
-
-We can visualise infections compared to the data used to generate the time series of onsets
-
-```{r plot_infections}
-inf_posterior <- r_inf_posteriors |>
- filter(.variable == "infections")
-ggplot(inf_posterior, mapping = aes(x = infection_day)) +
- geom_line(mapping = aes(y = .value, group = .draw), alpha = 0.1) +
- geom_line(
- data = inf_ts, mapping = aes(y = infections), colour = "red"
- ) +
- labs(title = "Infections, estimated (grey) and observed (red)",
- subtitle = "Model 2: renewal equation from symptom onsets")
-```
-
-and reproduction numbers
-
-```{r plot_rt}
-r_inf_posterior <- r_inf_posteriors |>
- filter(.variable == "R")
-ggplot(
- r_inf_posterior, mapping = aes(x = infection_day, y = .value, group = .draw)
-) +
- geom_line(alpha = 0.1)
-```
-
-# Improving the generative model for the reproduction number
-
-In the model so far we have assumed that the reproduction number at any time point is independent of the reproduction number at any other time point. This assumption has resulted in the quite noisy estimates of the reproduction number that we have seen in the plots above.
-
-In reality, we might expect the reproduction number to change more smoothly over time (except in situations of drastic change such as a very effective intervention) and to be more similar at adjacent time points. We can model this by assuming that the reproduction number at time $t$ is a random draw from a normal distribution with mean equal to the reproduction number at time $t-1$ and some standard deviation $\sigma$. This can be described as a random walk model for the reproduction number. In fact, rather than using this model directly, a better choice might be to use a model where the logarithm of the reproduction number does a random walk, as this will ensure that the reproduction number is always positive and that changes are multiplicative rather than additive (i.e as otherwise the same absolute change in the reproduction number would have a larger effect when the reproduction number is small which likely doesn't match your intuition for how outbreaks evolve over time). We can write this model as
-
-$$
-\sigma \sim HalfNormal(0, 0.05) \\
-$$
-$$
-\log(R_0) \sim \mathcal{Lognormal}(-0.1, 0.5)
-$$
-$$
-\log(R_t) \sim \mathcal{N}(\log(R_{t-1}), \sigma)
-$$
-
-Here we have placed a prior on the standard deviation of the random walk, which we have assumed to be half-normal (i.e., normal but restricted to being non-negative) with a mean of 0 and a standard deviation of 0.05. This is a so-called _weakly informative prior_ that allows for some variation in the reproduction number over time but not an unrealistic amount. We have also placed a prior on the initial reproduction number, which we have assumed to be log-normally distributed with a mean of -0.1 and a standard deviation of 0.5. This is a weakly informative prior that allows for a wide range of initial reproduction numbers but has a mean of approximately 1.
-The last line is the geometric random walk.
-
-## Simulating a geometric random walk
-
-You can have a look at an R function for performing the geometric random walk:
-
-```{r geometric-random-walk}
-geometric_random_walk
-```
-
-::: {.callout-tip}
-## Take 5 minutes
-Look at this function and try to understand what it does.
-Note that we use the fact that we can generate a random normally distributed variable $X$ with mean 0 and standard deviation $\sigma$ by mutiplying a standard normally distributed variable (i.e., mean 0 and standard deviation 1) $Y$ with $\sigma$.
-Using this [non-centred parameterisation](https://mc-stan.org/docs/2_18/stan-users-guide/reparameterization-section.html) for efficiency) will improve our computational efficency later when using an equivalent function in stan
-:::
-
-We can use this function to simulate a random walk:
-
-```{r simulate-geometric-walk}
-R <- geometric_random_walk(init = 1, noise = rnorm(100), std = 0.1)
-data <- tibble(t = seq_along(R), R = exp(R))
-
-ggplot(data, aes(x = t, y = R)) +
- geom_line() +
- labs(title = "Simulated data from a random walk model",
- x = "Time",
- y = "R")
-```
-
-You can repeat this multiple times either with the same parameters or changing some to get a feeling for what this does.
-
-## Estimating $R_t$ with a geometric random walk prior
-
-We can now include this in a stan model,
-
-```{r stan_estimate_inf_and_r_rw}
-rw_mod <- nfidd_cmdstan_model("estimate-inf-and-r-rw")
-rw_mod
-```
-
-Note that the model is very similar to the one we used earlier, but with the addition of the random walk model for the reproduction number using a function in stan that does the same as our R function of the same name we defined.
-
-We can now generate estimates from this model:
-
-```{r r_inf_rw_fit, results = 'hide', message = FALSE}
-data <- list(
- n = length(obs) - 1,
- obs = obs[-1],
- I0 = inf_ts$infections[1],
- gen_time_max = length(gen_time_pmf),
- gen_time_pmf = gen_time_pmf,
- ip_max = length(ip_pmf) - 1,
- ip_pmf = ip_pmf
-)
-r_rw_inf_fit <- rw_mod$sample(
- data = data, parallel_chains = 4, max_treedepth = 12,
- init = \() list(init_R = 0, rw_sd = 0.01)
-)
-```
-
-```{r r_inf_rw_fit_summary}
-r_rw_inf_fit
-```
-
-::: {.callout-note}
-As this is a more complex model we have increased the `max_treedepth` parameter to 12 to allow for more complex posterior distributions and we have also provided an initialisation for the `init_R` and `rw_sd` parameters to help the sampler find the right region of parameter space. This is a common technique when fitting more complex models and is needed as it is hard a priori to know where the sampler should start.
-:::
-
-We can again extract and visualise the posteriors in the same way as earlier.
-
-```{r r_rw_posterior}
-rw_posteriors <- r_rw_inf_fit |>
- gather_draws(infections[infection_day], R[infection_day]) |>
- ungroup() |>
- mutate(infection_day = infection_day - 1) |>
- filter(.draw %in% sample(.draw, 100))
-```
-
-```{r plot_infections_rw}
-rw_inf_posterior <- rw_posteriors |>
- filter(.variable == "infections")
-ggplot(mapping = aes(x = infection_day)) +
- geom_line(
- data = rw_inf_posterior, mapping = aes(y = .value, group = .draw), alpha = 0.1
- ) +
- geom_line(data = inf_ts, mapping = aes(y = infections), colour = "red") +
- labs(title = "Infections, estimated (grey) and observed (red)",
- subtitle = "Model 3: renewal equation with random walk")
-```
-
-and reproduction numbers
-
-```{r plot_rt_rw}
-rw_r_inf_posterior <- rw_posteriors |>
- filter(.variable == "R") |>
- filter(.draw %in% sample(.draw, 100))
-ggplot(
- data = rw_r_inf_posterior,
- mapping = aes(x = infection_day, y = .value, group = .draw)
-) +
- geom_line(alpha = 0.1) +
- labs(title = "Estimated R",
- subtitle = "Model 3: renewal equation with random walk")
-```
-
-::: {.callout-tip}
-## Take 10 minutes
-Compare the results across the models used in this session, and the one used in the [session on convolutions](using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic).
-How do the models vary in the number of parameters that need to be estimated?
-How do the assumptions about the infections time series differ between the models?
-What do you notice about the level of uncertainty in the estimates of infections and $R_t$ over the course of the time series?
-If you have time you could try re-running the experiment with different $R_t$ trajectories and delay distributions to see whether results change.
-
-::: {.callout-note collapse="true"}
-## Solution
-We can see that using the renewal model as generative model we recover the time series of infections more accurately compared to previously when we assumed independent numbers of infections each day and that using a more believable model (i.e the geometric random walk) for the reproduction number improves things even more.
-Of course, this is helped by the fact that the data was generated by a model similar to the renewal model used for inference.
-
-# Comparing the $R_t$ trajectories
-
-```{r plot_r_posteriors}
-## earlier posteriors
-r_posterior <- r_posterior |>
- mutate(data = "infections")
-r_inf_posterior <- r_inf_posterior |>
- mutate(data = "onsets (normal)")
-rw_r_inf_posterior <- rw_r_inf_posterior |>
- mutate(data = "onsets (random walk)")
-
-all_posteriors <- rbind(
- r_inf_posterior,
- rw_r_inf_posterior,
- r_posterior
-)
-
-ggplot(
- all_posteriors,
- mapping = aes(x = infection_day, y = .value, group = .draw,
- colour = data)
-) +
- geom_line(alpha = 0.1) +
- scale_fill_brewer(palette = "Set1") +
- labs(
- title = "Rt estimates from renewal equation models",
- subtitle = paste(
- "Estimates from infections, from symptom onsets, and from onsets with a",
- "random walk"
- )
- ) +
- guides(colour = guide_legend(override.aes = list(alpha = 1)))
-```
-
-We can see that the estimates are smoother when using the random walk model for the reproduction number, compared to the normal model. The model that fits directly to infections has the lowest uncertainty, which we would expect as it doesn't have to infer the number of infections from symptom onsets but even here the reproduction number estimates are unrealistically noisy due to the assumption of independence between infections each day.
-
-:::
-:::
-
-# Going further
-
-- We have used symptom onsets under the assumption that every infected person develops symptoms.
-Earlier we also created a time series of hospitalisation under the assumption that only a proportion (e.g., 30%) of symptomatic individuals get hospitalised.
-How would you change the model in this case?
-What are the implications for inference?
-- The [`EpiNow2`](https://epiforecasts.io/EpiNow2/) package implements a more complex version of the model we have used here. You will have the opportunity to try it in a later session.
-
-
-
-
diff --git a/sessions/biases-in-delay-distributions.qmd b/sessions/biases-in-delay-distributions.qmd
deleted file mode 100644
index f4a6cf5..0000000
--- a/sessions/biases-in-delay-distributions.qmd
+++ /dev/null
@@ -1,304 +0,0 @@
----
-title: "Biases in delay distributions"
-order: 3
----
-
-# Introduction
-
-So far, we've looked at the uncertainty of the time delays between epidemiological events. The next challenge is that our information on these delays is usually biased, especially when we're analysing data in real time. We'll consider two types of biases that commonly occur in reported infectious disease data:
-
-- *Censoring*: when we know an event occurred at some time, but not exactly when.
-- *Truncation*: when not enough time has passed for all the relevant epidemiological events to occur or be observed.
-
-We can again handle these by including them as uncertain parameters in the modelling process.
-
-## Slides
-
-[Introduction to biases in epidemiological delays](slides/introduction-to-biases-in-epidemiological-delays)
-
-## Objectives
-
-In this session, we'll introduce censoring and right truncation as typical properties of infectious disease data sets, using the delay from symptom onset to hospitalisation as an example.
-
-::: {.callout-note collapse="true"}
-
-# Setup
-
-## Source file
-
-The source file of this session is located at `sessions/biases-in-delay-distributions.qmd`.
-
-## Libraries used
-
-In this session we will use the `nfidd` package to load a data set of infection times and access stan models and helper functions, the `ggplot2` package for plotting, the `dplyr` and `tidyr` packages to wrangle data, the `lubridate` package to deal with dates.
-
-```{r libraries, message = FALSE}
-library("nfidd")
-library("ggplot2")
-library("dplyr")
-library("tidyr")
-library("lubridate")
-```
-
-::: callout-tip
-The best way to interact with the material is via the [Visual Editor](https://docs.posit.co/ide/user/ide/guide/documents/visual-editor.html) of RStudio.
-:::
-
-## Initialisation
-
-We set a random seed for reproducibility.
-Setting this ensures that you should get exactly the same results on your computer as we do.
-We also set an option that makes `cmdstanr` show line numbers when printing model code.
-This is not strictly necessary but will help us talk about the models.
-
-```{r}
-set.seed(123)
-options(cmdstanr_print_line_numbers = TRUE)
-```
-
-:::
-
-# Load data
-
-We will use the same simulated data set as in the [session on delay distributions](delay-distributions#simulating-delayed-epidemiological-data).
-
-::: callout-note
-Remember, in this outbreak we are assuming:
-
-- the incubation period is **gamma**-distributed with **shape 5** and **rate 1**, i.e. a mean of 5 days
-- the time from onset to hospital admission is **lognormally**-distributed, with **meanlog 1.75** and **sdlog 0.5**, i.e. a mean delay of about a week
-:::
-
-We use the same function we used in that session to simulate symptom onset and hospitalisation data.
-
-```{r inf_hosp_solution}
-df <- add_delays(infection_times)
-```
-
-This creates the `df` data frame that we can inspect e.g. using
-
-```{r show_df}
-head(df)
-```
-
-# Dates, not days: censoring
-
-Data on health outcomes are usually not recorded in the way that we have used so far in this session: as a numeric time since a given start date.
-Instead, we usually deal with *dates*.
-
-We can make our simulated dataset a bit more realistic by rounding down the infection times to an integer number.
-
-```{r censored_times}
-# Use the floor() function to round down to integers
-df_dates <- df |>
- mutate(
- infection_time = floor(infection_time),
- onset_time = floor(onset_time),
- hosp_time = floor(hosp_time)
- )
-head(df_dates)
-```
-
-::: {.callout-note}
-As before we are still not working with dates but numbers.
-This makes handling the data easier - we don't have to make any conversions before using the data in stan.
-:::
-
-Each of the numbers now represent the number of days that have passed since the start of the outbreak.
-That is, each of the numbers correspond to a day.
-In that sense, the data is more like typical data we get from infectious disease outbreaks, where we would usually have a line list with key events such as symptom onset or death reported by a *date*.
-In statistical terms, we call the delay *double interval censored*: "double" because the delays represent the time between two events that are both censored; and "interval" because all we know about the timings of the events is that they happened in a certain time interval (between 0:00 and 23:59 on the recorded day).
-
-## Estimating delay distributions accounting for censoring
-
-Let's estimate the time from symptom onset to hospitalisation with the censored data.
-
-A naïve approach to estimating the delay would be to ignore the fact that the data are censored.
-To estimate the delay from onset to hospitalisation, we could just use the difference between the censored times, which is an integer (the number of days).
-
-```{r integer-delays}
-df_dates <- df_dates |>
- mutate(
- incubation_period = onset_time - infection_time,
- onset_to_hosp = hosp_time - onset_time
- )
-```
-
-::: callout-tip
-## Take 5 minutes
-
-Fit the lognormal model used in the [session on delay distributions](delay-distributions#estimating-delay-distributions) to the estimates from the rounded data, i.e. using the `df_dates` data set.
-Do you still recover the parameters that we put in?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-
-```{r df_dates_solution, results = 'hide', message = FALSE}
-mod <- nfidd_cmdstan_model("lognormal")
-res <- mod$sample(
- data = list(
- n = nrow(na.omit(df_dates)),
- y = na.omit(df_dates)$onset_to_hosp
- )
-)
-```
-
-```{r df_dates_solution_summary}
-res
-```
-
-Usually the estimates will be further from the "true" parameters than before when we worked with the unrounded data.
-:::
-
-To account for double interval censoring, we need to modify the model to include the fact that we don't know when exactly on any given day the event happened.
-For example, if we know that symptom onset of an individual occurred on 20 June, 2024, and they were admitted to hospital on 22 June, 2024, this could mean an onset-to-hospitalisation delay from 1 day (onset at 23:59 on the 20th, admitted at 0:01 on the 22nd) to 3 days (onset at 0:01 on the 20th, admitted at 23:59 on the 22nd).
-
-We can use this in our delay estimation by making the exact time of the events based on the dates given part of the estimation procedure:
-
-```{r censoring_adjusted_delay_model}
-cmod <- nfidd_cmdstan_model("censored-delay-model")
-cmod
-```
-
-::: callout-tip
-## Take 5 minutes
-
-Familiarise yourself with the model above.
-Do you understand all the lines?
-Which line(s) define the parameter prior distribution(s), which one(s) the likelihood, and which one(s) reflect that we have now provided the delay as the difference in integer days?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-
-Lines 21-24 define the parametric prior distributions (for parameters meanlog and sdlog, and the estimates of exact times of events).
-Line 27 defines the likelihood.
-Lines 15-17 reflect the integer delays, adjusted by the estimated times of day.
-:::
-
-Now we can use this model to re-estimate the parameters of the delay distribution:
-
-```{r censored_estimate, results = 'hide', message = FALSE}
-cres <- cmod$sample(
- data = list(
- n = nrow(na.omit(df_dates)),
- onset_to_hosp = na.omit(df_dates)$onset_to_hosp
- )
-)
-```
-
-```{r censored_estimate_summary}
-cres
-```
-
-::: callout-tip
-## Take 10 minutes
-
-Try re-simulating the delays using different parameters of the delay distribution.
-Can you establish under which conditions the bias in estimation gets worse?
-:::
-
-# Real-time estimation: truncation
-
-The data set we have looked at so far in this session is a "final" data set representing an outbreak that has come and gone.
-However, information on delay distribution is often important during ongoing outbreaks as they can inform nowcasts and forecasts and help with broader interpretation of data.
-
-Estimating delays in real time comes with particular challenges, as the timing of the cut-off might introduce a bias.
-If, for example, infections are exponentially increasing then there will be disproportionately more people with recent symptom onset.
-Without adjustment, this would artificially decrease the estimate of the mean delay compared to its true value for all infections.
-This happens because most infections are recent (due to the exponential increase), but later symptom onsets amongst these have not had a chance to happen yet.
-
-Once again, we can simulate this effect, for example by imagining we would like to make an estimate on day 70 of our outbreak.
-Let us work with the original, un-censored data for the time from onset to hospitalisation so as to look at the issue of truncation in isolation:
-
-```{r truncated_df}
-df_realtime <- df |>
- mutate(onset_to_hosp = hosp_time - onset_time) |>
- filter(hosp_time <= 70)
-```
-
-## Estimating delay distributions accounting for truncation
-
-If we take the naïve mean of delays we get an underestimate as expected:
-
-```{r mean_truncated}
-# truncated mean delay
-mean(df_realtime$onset_to_hosp)
-# compare with the mean delay over the full outbreak
-mean(df$hosp_time - df$onset_time, na.rm=TRUE)
-```
-
-::: callout-tip
-## Take 5 minutes
-
-Fit the lognormal model used above to the estimates from the truncated data, i.e. using the `df_realtime` data set.
-How far away from the "true" parameters do you end up?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-
-```{r df_realtime_solution, results = 'hide', message = FALSE}
-res <- mod$sample(
- data = list(
- n = nrow(na.omit(df_realtime)),
- y = na.omit(df_realtime)$onset_to_hosp
- )
-)
-```
-
-```{r df_realtime_solution_summary}
-res
-```
-:::
-
-Once again, we can write a model that adjusts for truncation, by re-creating the simulated truncation effect in the stan model:
-
-```{r truncation_adjusted_delay_model}
-tmod <- nfidd_cmdstan_model("truncated-delay-model")
-tmod
-```
-
-::: callout-tip
-## Take 5 minutes
-
-Familiarise yourself with the model above.
-Which line introduces the truncation, i.e. the fact that we have not been able to observe hospitalisation times beyond the cutoff of (here) 70 days?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-
-Line 17 defines the upper limit of `onset_to_hosp` as `time_since_onset`.
-:::
-
-Now we can use this model to re-estimate the parameters of the delay distribution:
-
-```{r truncated_estimate, results = 'hide', message = FALSE}
-tres <- tmod$sample(
- data = list(
- n = nrow(df_realtime),
- onset_to_hosp = df_realtime$onset_to_hosp,
- time_since_onset = 70 - df_realtime$onset_time
- )
-)
-```
-
-```{r truncated_estimate_summary}
-tres
-```
-
-::: callout-tip
-## Take 10 minutes
-
-Try re-simulating the delays using different parameters of the delay distribution.
-Can you establish under which conditions the bias in estimation gets worse?
-:::
-
-# Going further
-
-- We have looked at censoring and truncation separately, but in reality often both are present. Can you combine the two in a model?
-- The solutions we introduced for addressing censoring and truncation are only some possible ones for the censoring problem. There are other solutions that reduce the biases from estimation even further. For a full overview, the [review by Park et al.](https://doi.org/10.1101/2024.01.12.24301247) might be worth a read. If you are feeling adventurous, try to implement one or more of them in the stan model above - with a warning that this can get quite involved very quickly.
-
-# Wrap up
diff --git a/sessions/delay-distributions.qmd b/sessions/delay-distributions.qmd
deleted file mode 100644
index 0badf82..0000000
--- a/sessions/delay-distributions.qmd
+++ /dev/null
@@ -1,220 +0,0 @@
----
-title: "Delay distributions"
-order: 2
----
-
-# Introduction
-
-Every outbreak starts with an infection, but we rarely observe these directly. Instead, there are many types of epidemiological events that we might observe that occur after the time of infection: symptom onsets, hospitalisations and discharge, etc. The length of time between any of these events might be highly variable and uncertain. However, we often need to have some estimate for these delays in order to understand what's happening now and predict what might happen in the future. We capture this variability using probability distributions, which is the focus of this session.
-
-## Slides
-
-- [Introduction to epidemiological delays](slides/introduction-to-epidemiological-delays)
-
-## Objectives
-
-The aim of this session is for you to familiarise yourself with the concept of delay distributions used to describe reporting in infectious disease epidemiology.
-First we'll look at this working forwards from infections in an outbreak. We'll use `R` to probabilistically simulate delays from infection to reporting symptoms and hospitalisations. Then we'll work only from this set of outcome data and use a stan model to estimate the parameters of one specific delay (in this example, symptom onset to hospitalisation).
-
-::: {.callout-note collapse="true"}
-
-# Setup
-
-## Source file
-
-The source file of this session is located at `sessions/delay-distributions.qmd`.
-
-## Libraries used
-
-In this session we will use the `nfidd` package to load a data set of infection times and access stan models and helper functions, the `ggplot2` package for plotting, the `dplyr` and `tidyr` packages to wrangle data, the `lubridate` package to deal with dates, and the `posterior` packages for investigating the results of the inference conducted with stan.
-
-```{r libraries, message = FALSE}
-library("nfidd")
-library("ggplot2")
-library("dplyr")
-library("tidyr")
-library("lubridate")
-library("posterior")
-```
-
-::: {.callout-tip}
-The best way to interact with the material is via the [Visual Editor](https://docs.posit.co/ide/user/ide/guide/documents/visual-editor.html) of RStudio.
-:::
-
-## Initialisation
-
-We set a random seed for reproducibility.
-Setting this ensures that you should get exactly the same results on your computer as we do.
-We also set an option that makes `cmdstanr` show line numbers when printing model code.
-This is not strictly necessary but will help us talk about the models.
-
-```{r}
-set.seed(1234)
-options(cmdstanr_print_line_numbers = TRUE)
-```
-
-:::
-
-# Simulating delayed epidemiological data
-
-We will start this session by working with a simulated data set of infections from a disease that has caused an outbreak which subsequently ended.
-In our example outbreak, there were 6383 infections.
-The outbreak lasted 140 days after the first infection, with new infections peaking roughly around day 80.
-
-::: callout-note
-For now we will not concern ourselves with the model used to generate the epidemic.
-This represents a typical situation in the real world, where we may have a model of how an infection has spread, but we don't necessarily know how well this corresponds to what really happened.
-:::
-
-We will later deal with modelling the infectious process.
-For now, we will focus on modelling how infections get reported in data - the *observation* process.
-Using infectious disease data for analysis comes with three common challenges:
-
-1. We don't normally observe infections directly, but their *outcomes* as symptomatic cases, hospitalisations or other realisations.
-2. These observations are *incomplete* (e.g. not every infection leads to hospitalisation, and so focusing on hospitalisations will leave infections unobserved).
-3. These observations happen with a *delay* after the infection occurs (e.g. from infection to symptoms).
-
-## Load the outbreak data
-
-We will work with a data set that is included in the `nfidd` R package that you installed initially.
-The column `infection_time` is a linelist of infections from our example outbreak, given as a decimal number of days that have passed since the initial infection of the outbreak.
-It can be loaded with the `data` command.
-
-```{r}
-data(infection_times)
-head(infection_times)
-
-### visualise the infection curve
-ggplot(infection_times, aes(x = infection_time)) +
- geom_histogram(binwidth = 1) +
- scale_x_continuous(n.breaks = 10) +
- labs(x = "Infection time (in days)", y = "Number of infections",
- title = "Infections during an outbreak")
-```
-
-::: callout-note
-In reality, data from an outbreak will usually be given as dates, not decimals; and those will usually not represent infection, but an observed outcome such as symptom onset or hospital admission.
-For now we don't want to spend too much time manipulating dates in R, but we will get back to working with more realistic outbreak data later.
-:::
-
-## Working forwards: simulating a sequence of delays after infection
-
-We would now like to simulate hospitalisations arising from this outbreak.
-We will start with our data on infection times, and work forwards to symptom onsets and then hospitalisations. We'll make the following assumptions about the process from infection to hospital admission:
-
-- Infection to symptoms:
- - We'll assume all infections cause symptoms.
- - *Time from infection to symptom onset (incubation period):* We assume that the incubation period is **gamma**-distributed with **shape 5** and **rate 1**, i.e. a mean of 5 days.
-- Symptoms to hospital admission:
- - We'll assume that 30% of symptomatic cases become hospitalised.
- - *Time from symptom onset to hospital admission*: We assume that the onset-to-hospitalisation period is **lognormally** distributed, with **meanlog 1.75** and **sdlog 0.5**, corresponding to a mean delay of about a week.
-
-Let's put these assumptions into practice by simulating some data, adding onset and hospitalisation times (in decimal number of days after the first infection) to the infection times. We'll use random values for each infection from the probability distributions we've assumed above.
-
-::: {.callout-note}
-Throughout the course, we repeatedly use some small chunks of code.
-Instead of copying these between sessions, we've put them into functions in the `nfidd` R package that we can use when needed.
-To see exactly what the function does, just type the function name, e.g. `add_delays` for the function below.
-:::
-
-```{r inf_hosp_solution}
-df <- add_delays(infection_times)
-```
-
-::: {.callout-tip}
-## Take 2 minutes
-Have a look at the `add_delays()` function and try to understand its inner workings.
-You can also access a help file for this function, just like any other R function, using `?add_delays`.
-:::
-
-Now we can plot infections, hospitalisations and onsets.
-
-```{r plot_distributions, messages = FALSE}
-# convert our data frame to long format
-dfl <- df |>
- pivot_longer(
- cols = c(infection_time, onset_time, hosp_time),
- names_to = "type", values_to = "time"
- )
-# plot
-ggplot(dfl, aes(x = time)) +
- geom_histogram(position = "dodge", binwidth = 1) +
- facet_wrap(~ type, ncol = 1) +
- xlab("Time (in days)") +
- ylab("Count")
-```
-
-## Estimating delay distributions from outcome data
-
-As mentioned above, our data set of infection, symptom onset and hospitalisation times is not the typical data set we encounter in outbreaks.
-In reality, we don't have infection dates, and we also have to deal with missing data, incomplete observations, data entry errors etc.
-For now, let us just assume we have a data set only including symptom onset times and some hospitalisation times.
-
-Let's look at a specific problem: we would like to estimate how long it takes for people to become hospitalised after becoming symptomatic.
-This might be an important delay to know about, for example when modelling and forecasting hospitalisations, or more generally for estimating required hospital capacity.
-
-To do this we can use our data with *stan* to model the delay from symptom onset to hospitalisation, with the same assumption that it follows a lognormal distribution.
-
-```{r naive_delay_model}
-mod <- nfidd_cmdstan_model("lognormal")
-mod
-```
-
-Let's estimate the onset-to-hospitalisation delay using the simulated data set we created above. Do we recover the parameters used for simulation?
-
-We can do this by *sampling* from the model's posterior distribution by feeding it our simulated data set.
-
-```{r sample_naive_delay_model, results = 'hide', message = FALSE}
-## Specify the time from onset to hospitalisation
-df_onset_to_hosp <- df |>
- mutate(onset_to_hosp = hosp_time - onset_time) |>
- # exclude infections that didn't result in hospitalisation
- drop_na(onset_to_hosp)
-## Use the data to sample from the model posterior
-res <- mod$sample(
- data = list(
- n = nrow(df_onset_to_hosp),
- y = df_onset_to_hosp$onset_to_hosp
- )
-)
-```
-
-To find out more about the `sample` function we used here, you can use `?cmdstanr::sample`.
-To see the estimates, we can use:
-
-```{r summarise_naive_delay_model}
-res$summary() ## could also simply type `res`
-```
-
-These estimates should look similar to what we used in the simulations.
-We can plot the resulting probability density functions.
-
-::: {.callout-note}
-Every time we sample from the model posterior, we create 4000 iterations. That is a lot of data to plot, so instead, when we produce plots we'll use a random sample of 100 iterations. You'll see this throughout the course.
-:::
-
-```{r plot_onset_hospitalisation}
-## get shape and rate samples from the posterior
-res_df <- res |>
- as_draws_df() |>
- filter(.draw %in% sample(.draw, 100)) # sample 100 draws
-
-## find the value (x) that includes 99% of the cumulative density
-max_x <- max(qlnorm(0.99, meanlog = res_df$meanlog, sdlog = res_df$sdlog))
-
-## calculate density on grid of x values
-x <- seq(0, max_x, length.out = 100)
-res_df <- res_df |>
- crossing(x = x) |> ## add grid to data frame
- mutate(density = dlnorm(x, meanlog, sdlog))
-
-## plot
-ggplot(res_df, aes(x = x, y = density, group = .draw)) +
- geom_line(alpha = 0.3)
-```
-
-# Going further
-
-- In this session we were in the enviable situation of knowing which distribution was used to generate the data. With real data, of course, we don't have this information available. Try using a different distribution for inference (e.g. normal, or gamma). Do you get a good fit?
-
-# Wrap up
diff --git a/sessions/examples-of-available-tools.qmd b/sessions/examples-of-available-tools.qmd
deleted file mode 100644
index ffb6f70..0000000
--- a/sessions/examples-of-available-tools.qmd
+++ /dev/null
@@ -1,65 +0,0 @@
----
-title: "Examples of available tools"
-order: 11
----
-
-# Introduction
-
-In the course so far we introduced key concepts and methods in nowcasting and forecasting of infectious disease dynamics, using a combination of R and (mostly) stan.
-Being able to understand these concepts and implement them in stan comes with the ability to create and adapt models that are tailor made to any given situation and specific characteristics of any given data set.
-At the same time, there is value in using and contributing to open-source **tools** that implement some or all of the methods we have encountered in the course.
-These tools have varying levels of flexibility to deal with different kinds of epidemiological problems and data.
-Whilst they will never have the flexibility of completely custom approaches, using and contributing to them avoids duplication, improves the chances of finding errors, and helps discussing and ultimately enforcing best practice.
-
-## Slides
-
-- [Examples of tools](slides/tools)
-
-## Objective
-
-The aim of this session is to try out some of the tools that are available that use some of the ideas we have discussed in this course.
-
-::: {.callout-note collapse="true"}
-
-# Setup
-
-## Source file
-
-The source file of this session is located at `sessions/examples-of-available-tools.qmd`.
-
-## Installing tools
-
-In order to install the tools mentioned below, refer to the installation instructions supplied with each of the packages.
-
-:::
-
-# Trying out available tools
-
-We are going to suggest trying three tools that implement several or all the concepts introduced in this course:
-
-## EpiEstim
-
-[EpiEstim](https://mrc-ide.github.io/EpiEstim/) implements the renewal equation on a time series of infections in a Bayesian framework, i.e. the model in `estimate-r.stan` in the [session on $R_t$ estimation](R-estimation-and-the-renewal-equation#estimating-R-from-a-time-series-of-infections).
-In combination with the [projections](https://www.repidemicsconsortium.org/projections/) package in can be used for forecasting.
-
-The [EpiEstim vignette](https://mrc-ide.github.io/EpiEstim/articles/full_EpiEstim_vignette.html) is a good starting point for a walkthrough of $R_t$ estimation and forecasting.
-
-## EpiNow2
-
-[EpiNow2](https://epiforecasts.io/EpiNow2/dev/) implements the renewal equation on a time series of delayed outcomes including nowcasts with a known reporting delay distribution, as well as forecasts using a Gaussian Process or random walk model, i.e. the models introduced here except the joint inference of nowcasts and delays and/or reproduction numbers.
-
-The [example of nowcasting, $R_t$ estimation and forecasting with EpiNow2](https://github.com/epiforecasts/nowcasting.example/blob/main/inst/reports/epinow2.md) is a good place to find out more about how to use EpiNow2 with linelist data.
-
-## Epinowcast
-
-[Epinowcast](https://package.epinowcast.org/) was created to replace EpiNow2.
-It implements all the models discussed in the course with a frontend to work with line lists, data by reference and report date or cumulative data snapshots.
-Whilst this model can also be used for forecasts (as we have seen) this does not currently have a user-facing interface in the package.
-
-The [vignette on estimating the effective reproduction number in real-time for a single timeseries with reporting delays](https://package.epinowcast.org/articles/single-timeseries-rt-estimation.html) is a good example of joint nowcasting and reproduction number estimation from an aggregated time series of reference (outcome) and reporting counts.
-This example does not start from a line list but does contain information about using the package for transforming between line list and appropriately aggregated data.
-
-## Going further
-
-There are many other R packages that can be used for $R_t$ estimation, nowcasting and forecasting.
-We invite readers to suggest further additions on our [discussion board](https://github.com/nfidd/nfidd/discussions).
diff --git a/sessions/joint-nowcasting.qmd b/sessions/joint-nowcasting.qmd
deleted file mode 100644
index 76ad573..0000000
--- a/sessions/joint-nowcasting.qmd
+++ /dev/null
@@ -1,365 +0,0 @@
----
-title: "Nowcasting with an unknown reporting delay"
-order: 6.5
----
-
-# Introduction
-
-In the last session we introduced the idea of nowcasting using a simple model. However, this approach had problems: we didn't fully account for uncertainty, or for example observation error in the primary events, and it's not a fully generative model of the data reporting process. And as we saw, if we get the delay distribution wrong, we can get the nowcast very wrong.
-
-A better approach is to jointly estimate the delay distribution together with the nowcast. We can do this by using information from multiple snapshots of the data as it changes over time (using a data structure called the "reporting triangle"). In this session, we'll introduce this approach to joint estimation in nowcasting. At the end we'll then demonstrate a way to combine this with our previous work estimating the reproduction number, steadily improving our real time outbreak model.
-
-## Slides
-
-- [Introduction to joint estimation of delays and nowcasts](slides/introduction-to-joint-estimation-of-nowcasting-and-reporting-delays)
-
-## Objectives
-
-This session aims to introduce how to do nowcasting if the reporting delay distribution is unknown.
-
-::: {.callout-note collapse="true"}
-
-# Setup
-
-## Source file
-
-The source file of this session is located at `sessions/joint-nowcasting.qmd`.
-
-## Libraries used
-
-In this session we will use the `nfidd` package to load a data set of infection times and access stan models and helper functions, the `dplyr` and `tidyr` packages for data wrangling, `ggplot2` library for plotting, and the `tidybayes` package for extracting results of the inference.
-
-```{r libraries, message = FALSE}
-library("nfidd")
-library("dplyr")
-library("tidyr")
-library("ggplot2")
-library("tidybayes")
-```
-
-::: {.callout-tip}
-The best way to interact with the material is via the [Visual Editor](https://docs.posit.co/ide/user/ide/guide/documents/visual-editor.html) of RStudio.
-:::
-
-## Initialisation
-
-We set a random seed for reproducibility.
-Setting this ensures that you should get exactly the same results on your computer as we do.
-We also set an option that makes `cmdstanr` show line numbers when printing model code.
-This is not strictly necessary but will help us talk about the models.
-
-```{r}
-set.seed(123)
-options(cmdstanr_print_line_numbers = TRUE)
-```
-
-:::
-
-# Joint estimation of delay distributions and nowcasting
-
-## Motivation
-
-So far we have assumed that the delay distribution is known. In practice, this is often not the case and we need to estimate it from the data.
-As we discussed in the [session on biases in delay distributions](sessions/biases-in-delay-distributions), this can be done using individual data and then passing this estimate to a simple nowcasting model like those above.
-However, this has the disadvantage that the nowcasting model does not take into account the uncertainty in the delay distribution or observation error of the primary events.
-We can instead estimate the delay distribution and nowcast the data jointly.
-
-## The reporting triangle
-
-To jointly estimate we need to decompose observations into what is known as the reporting triangle.
-This is a matrix where the rows are the days of onset and the columns are the days of report.
-The entries are the number of onsets on day $i$ that are reported on day $j$.
-We can then use this matrix to estimate the delay distribution and nowcast the data.
-It is referred to as a triangle because the data for the more recent data entries are incomplete which gives the matrix a triangular shape.
-
-We can construct the reporting triangle from onsets ($N_{t}$) as follows:
-$$
-N_{t} = \sum_{d=0}^{D} n_{t,d}
-$$
-
-Where $n_{t,d}$ is the number of onsets on day $t$ that are reported on day $t-d$ and $D$ represents the maximum delay between date of reference and time of report which in theory could be infinite but in practice we set to a finite value to make the model identifiable and computationally feasible.
-We can now construct a model to estimate $n_{t,d}$,
-
-$$
- n_{t,d} \mid \lambda_{t},p_{t,d} \sim \text{Poisson} \left(\lambda_{t} \times p_{t,d} \right),\ t=1,...,T.
-$$
-
-where $\lambda_{t}$ is the expected number of onsets on day $t$ and $p_{t,d}$ is the probability that an onset on day $t$ is reported on day $t-d$.
-Here $\lambda_{t}$ is the same as the expected number of onsets on day $t$ in the simple nowcasting model above so we again modelled it using a geometric random walk for now.
-We model $p_{t,d}$ as a [Dirichlet distribution](https://distribution-explorer.github.io/multivariate_continuous/dirichlet.html) as it is a distribution over probabilities.
-$p_{t,d}$ is equivalent to the reporting delays we have been using as fixed quantities so far but now estimated within the model.
-In most real-world settings we would want to use our domain expertise to inform the prior distribution of $p_{t,d}$.
-
-## Simulating the reporting triangle
-
-Now that we are aiming to jointly estimate the delay distribution we need additional data.
-We can simulate this data by using the same generative process as above but now also simulating the reporting delays.
-
-Once again we generate our simulated onset dataset:
-
-```{r, load-simulated-onset}
-gen_time_pmf <- make_gen_time_pmf()
-ip_pmf <- make_ip_pmf()
-onset_df <- simulate_onsets(
- make_daily_infections(infection_times), gen_time_pmf, ip_pmf
-)
-head(onset_df)
-cutoff <- 71
-```
-
-We also need to simulate the reporting delays:
-
-```{r simulate-reporting-delays}
-reporting_delay_pmf <- censored_delay_pmf(
- rlnorm, max = 15, meanlog = 1, sdlog = 0.5
-)
-plot(reporting_delay_pmf)
-```
-
-We can then simulate the reporting triangle:
-
-```{r simulate-reporting-triangle}
-reporting_triangle <- onset_df |>
- filter(day < cutoff) |>
- mutate(
- reporting_delay = list(
- tibble(d = 0:15, reporting_delay = reporting_delay_pmf)
- )
- ) |>
- unnest(reporting_delay) |>
- mutate(
- reported_onsets = rpois(n(), onsets * reporting_delay)
- ) |>
- mutate(reported_day = day + d)
-```
-
-We also need to update our simulated truth data to include the Poisson observation error we are assuming is part of the observation process.
-
-```{r update-simulated-onset}
-noisy_onsets_df <- reporting_triangle |>
- summarise(noisy_onsets = sum(reported_onsets), .by = day)
-```
-
-As we only partially observe the reporting triangle we need to filter it to only include the data we have observed:
-
-```{r filter-reporting-triangle}
-filtered_reporting_triangle <- reporting_triangle |>
- filter(reported_day <= max(day))
-```
-
-Finally, we sum the filtered reporting triangle to get the counts we actually observe.
-
-```{r counts-observed}
-available_onsets <- filtered_reporting_triangle |>
- summarise(available_onsets = sum(reported_onsets), .by = day)
-```
-
-## Fitting the joint model
-
-As usual we start by loading the model:
-
-```{r stan-joint-nowcast}
-joint_mod <- nfidd_cmdstan_model("joint-nowcast")
-joint_mod
-```
-
-::: {.callout-tip collapse="true"}
-## Model details
-This time we won't go into details of the model.
-For now, it is important that you understand the concept but as the models get more complex we hope that you trust us that the model does what we describe above.
-
-Once thing to note is that we are now fitting the initial number of symptom onsets (`init_onsets`).
-This is different from earlier when we had to pass the initial number of infections (`I0`) as data.
-In most situations this number would be unknown so what we do here is closer to what one would do in the real world.
-:::
-
-We then fit it do data:
-
-```{r joint-nowcast-fit, results = 'hide', message = FALSE}
-joint_data <- list(
- n = length(unique(filtered_reporting_triangle$day)), # number of days
- m = nrow(filtered_reporting_triangle), # number of reports
- p = filtered_reporting_triangle |>
- group_by(day) |>
- filter(d == max(d)) |>
- mutate(d = d + 1) |>
- pull(d), # number of observations per day
- obs = filtered_reporting_triangle$reported_onsets, # observed symptom onsets
- d = 16 # number of reporting delays
-)
-joint_nowcast_fit <- joint_mod$sample(data = joint_data, parallel_chains = 4)
-```
-
-```{r joint-nowcast-fit-summary}
-joint_nowcast_fit
-```
-
-::: {.callout-important}
-One benefit of this model is that because we have decomposed the data into the reporting triangle we can make a nowcast that uses the data we have available augmented with predictions from the model.
-This should give us far more accurate uncertainty estimates than the simple nowcasting models above (see `stan/functions/combine_obs_with_predicted_obs_rng.stan` but note the code is fairly involved).
-:::
-
-We now extract this nowcast:
-
-```{r joint-nowcast}
-joint_nowcast_onsets <- joint_nowcast_fit |>
- gather_draws(nowcast[day]) |>
- ungroup() |>
- filter(.draw %in% sample(.draw, 100))
-```
-
-Finally, we can plot the nowcast alongside the observed data:
-
-```{r plot-joint-nowcast}
-ggplot(joint_nowcast_onsets, aes(x = day)) +
- geom_col(
- data = noisy_onsets_df, mapping = aes(y = noisy_onsets), alpha = 0.6
- ) +
- geom_line(mapping = aes(y = .value, group = .draw), alpha = 0.1) +
- geom_point(data = available_onsets, mapping = aes(y = available_onsets))
-```
-
-:::{.callout-tip}
-Reminder: The points in this plot represent the data available when the nowcast was made (and so are truncated) whilst the bars represent the finally reported data (a perfect nowcast would exactly reproduce these).
-:::
-
-::: {.callout-tip}
-## Take 5 minutes
-Look back at the last three nowcasts. How do they compare? What are the advantages and disadvantages of each? Could we improve the nowcasts further?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-- The simple nowcast struggled to capture the generative process of the data and so produced poor nowcasts.
-The nowcast with the geometric random walk was better but still struggled to capture the generative process of the data.
-The joint nowcast was the best of the three as it properly handled the uncertainty and allowed us to fit the delay distribution versus relying on known delays.
-- However, the joint nowcast is still quite simple (in the sense that no detailed mechanism or reporting process is being modelled) and so may struggle to capture more complex patterns in the data.
-In particular the prior model for the geometric random walk assumes that onsets are the same as the previous day with some statistical noise.
-This may not be a good assumption in a rapidly changing epidemic (where the reproduction number is not near 1).
-- In addition, whilst we say it is "quite simple" as should be clear from the code it is quite complex and computationally intensive.
-This is because we are fitting a model to the reporting triangle which is a much larger data set and so the model is relatively quite slow to fit.
-:::
-
-
-# Putting it all together: Estimating the reproduction number, nowcasting, and joint estimation of delay distributions
-
-::: {.callout-note}
-This section contains a lot of code and is quite complex. It is not necessary to understand all of it to get the main points of the session. We recommend reading through it to get a sense of how all the pieces fit together but don't worry if you don't understand all of it.
-:::
-
-In the previous sessions, we have seen how to estimate the reproduction number and how to nowcast the data.
-We can now put these two pieces together to estimate the reproduction number and nowcast the data jointly.
-This should allow us to produce more accurate nowcasts as we can use the information from the reproduction number to inform the nowcast and vice versa.
-
-As in the [renewal sesssion](R-estimation-and-the-renewal-equation) we need to define the generation time distribution and a incubation period distribution.
-We will use the same distributions as in the [renewal session](R-estimation-and-the-renewal-equation) for simplicity.
-These are:
-
-```{r gt}
-plot(gen_time_pmf)
-```
-
-and
-
-```{r ip}
-plot(ip_pmf)
-```
-
-We now load in the model:
-
-```{r stan-joint-nowcast-rt}
-joint_rt_mod <- nfidd_cmdstan_model("joint-nowcast-with-r")
-joint_rt_mod
-```
-
-::: {.callout-tip}
-## Take 2 minutes
-Familiarise yourself with the model above.
-Can you see how it combines the nowcasting and the estimation of the reproduction number?
-Can you suggest how you swap in the simple nowcasting model whilst keeping the estimation of the reproduction number?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-Essentially rather than using `observe_onsets_with_delay.stan` we would use `condition_onsets_by_report.stan` and pass in the proportion reported as a data.
-This would allow us to use the simple nowcasting model whilst still estimating the reproduction number.
-We would also remove the `generated quantities` block as we are not nowcasting the data and simplify the observations to just the number of onsets.
-:::
-
-Now let's fit the final model for this session!
-
-```{r joint-nowcast-rt-fit, results = 'hide', message = FALSE}
-joint_rt_data <- c(joint_data,
- list(
- gen_time_max = length(gen_time_pmf),
- gen_time_pmf = gen_time_pmf,
- ip_max = length(ip_pmf) - 1,
- ip_pmf = ip_pmf,
- h = 0 # this is a small easter egg for the attentive reader
- )
-)
-joint_rt_fit <- joint_rt_mod$sample(
- data = joint_rt_data, parallel_chains = 4,
- adapt_delta = 0.95,
- init = \() list(init_R = 0, rw_sd = 0.01)
-)
-```
-
-::: {.callout-tip}
-We use `adapt_delta = 0.95` here as this is a relatively complex model with a difficult to explore posterior. Increasing this setting decreases the step size of the sampler and so makes it easier for it to explore the posterior. The downside is that fitting then takes longer.
-:::
-
-```{r joint-nowcast-rt-fit-summary}
-joint_rt_fit
-```
-
-First we can extract the nowcast and plot the nowcast alongside the observed data:
-```{r joint-nowcast-with-r}
-joint_nowcast_with_r_onsets <- joint_rt_fit |>
- gather_draws(nowcast[day]) |>
- ungroup() |>
- filter(.draw %in% sample(.draw, 100))
-```
-
-```{r plot-joint-nowcast-with-r}
-ggplot(joint_nowcast_with_r_onsets, aes(x = day)) +
- geom_col(
- data = noisy_onsets_df, mapping = aes(y = noisy_onsets), alpha = 0.6
- ) +
- geom_line(mapping = aes(y = .value, group = .draw), alpha = 0.1) +
- geom_point(data = available_onsets, mapping = aes(y = available_onsets))
-```
-
-We can also extract the reproduction number and plot it:
-
-```{r joint-rt}
-joint_rt <- joint_rt_fit |>
- gather_draws(R[day]) |>
- ungroup() |>
- filter(.draw %in% sample(.draw, 100))
-
-ggplot(joint_rt, aes(x = day, y = .value, group = .draw)) +
- geom_line(alpha = 0.1)
-```
-
-::: {.callout-tip}
-## Take 2 minutes
-What do you think of the nowcast now?
-Does it look better than the previous one?
-What about the reproduction number?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-- Whilst the majority of the nowcast is similar we see that the nowcast for days nearer to the present is more accurate as this model can capture the trend in infections and account for delays from infection to onset and onset to report.
-- The key takeaway from the reproduction number plot is that it looks similar to the one we estimated in the [renewal session](R-estimation-and-the-renewal-equation).
-This is because we have accounted for the truncation (otherwise it would be spuriously decreasing towards the end of the timeseries).
-:::
-
-# Going further
-
-- The simple nowcast models we showed here assumed perfect knowledge of the delay distribution. What happens when you instead use an estimate of the delay distribution from the data? Try and do this using methods from [session on biases in delay distributions](sessions/biases-in-delay-distributions) and see how it affects the simple nowcast models.
-- Despite being fairly involved the joint nowcast model we used here is still quite simple and may struggle to capture more complex patterns in the data.
-In practice more complex methods are often needed to account for structure in the reporting process, time-varying delay distributions, or delays that vary by other factors (such as the age of cases).
-- The [`epinowcast`](https://package.epinowcast.org/) package implements a more complex version of the model we have used here. It is designed to be highly flexible and so can be used to model a wide range of different data sets. You will have the opportunity to try it in a later session.
-- This session focussed on the role of the generative process in nowcasting. This is an area of active research but [this paper](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1012021) gives a good overview of the current state of the art.
diff --git a/sessions/methods-in-the-real-world.qmd b/sessions/methods-in-the-real-world.qmd
deleted file mode 100644
index bd7bf5a..0000000
--- a/sessions/methods-in-the-real-world.qmd
+++ /dev/null
@@ -1,8 +0,0 @@
----
-title: "Methods in the real world"
-order: 12
----
-
-# Objectives
-
-The aim of this session is to provide an overview of the different methods that are used in the real world from the perspective of the particpants and course instructors.
diff --git a/sessions/nowcasting.qmd b/sessions/nowcasting.qmd
deleted file mode 100644
index 5bbe93a..0000000
--- a/sessions/nowcasting.qmd
+++ /dev/null
@@ -1,397 +0,0 @@
----
-title: "Nowcasting concepts"
-order: 6
----
-
-# Introduction
-
-So far we've explored the delays and biases of real-time outbreak data, started to correct for these, and considered the underlying process that drives the evolution of an outbreak (looking at the reproduction number and renewal equation). Next, we'll focus on predicting new information about how an outbreak is evolving in the present and future.
-
-We know that we have incomplete information in the present because of delays in the observation process (reporting delays). The aim of nowcasting is to predict what an epidemiological time series will look like *after all delayed reports* are in, for which we need to account for the delays and biases we've already considered.
-
-## Slides
-
-- [Introduction to nowcasting](slides/introduction-to-nowcasting)
-
-## Objectives
-
-This session aims to introduce the concept of _nowcasting_, and see how we can perform a nowcast if we know the underlying delay distribution.
-
-::: {.callout-note collapse="true"}
-
-# Setup
-
-## Source file
-
-The source file of this session is located at `sessions/nowcasting.qmd`.
-
-## Libraries used
-
-In this session we will use the `nfidd` package to load a data set of infection times and access stan models and helper functions, the `dplyr` and `tidyr` packages for data wrangling, `ggplot2` library for plotting, and the `tidybayes` package for extracting results of the inference.
-
-```{r libraries, message = FALSE}
-library("nfidd")
-library("dplyr")
-library("tidyr")
-library("ggplot2")
-library("tidybayes")
-```
-
-::: {.callout-tip}
-The best way to interact with the material is via the [Visual Editor](https://docs.posit.co/ide/user/ide/guide/documents/visual-editor.html) of RStudio.
-:::
-
-## Initialisation
-
-We set a random seed for reproducibility.
-Setting this ensures that you should get exactly the same results on your computer as we do.
-We also set an option that makes `cmdstanr` show line numbers when printing model code.
-This is not strictly necessary but will help us talk about the models.
-
-```{r}
-set.seed(123)
-options(cmdstanr_print_line_numbers = TRUE)
-```
-
-:::
-
-# Simulating delayed reporting
-
-Epidemiological data is not usually available immediately for analysis.
-Instead, data usually gets collated at different levels of a healthcare or health surveillance system, cleaned, checked before being aggregated and/or anonymised and ultimately shared with an analyst.
-We call the _reporting time_ the time a data point (e.g. a time or day of symptom onset or a time or day of hospitalisation) has entered the data set used for some analysis.
-Similar to the data discussed in the preceding session, this time is often only available as a date, i.e. censored at the scale of a day.
-
-We can simulate this reporting process.
-Let us assume that the symptom onsets are reported with a delay and that this delay is characterised by a lognormal distribution with meanlog 1 and sdlog 0.5:
-To do so, we perform a very similar simulation to what we did in the [session on delay distributions](delay-distributions#simulating-delayed-epidemiological-data), except now we don't simulate hospitalisations but reports of symptom onsets:
-
-```{r onset_report}
-data(infection_times)
-df <- infection_times |>
- mutate(
- onset_time = infection_time + rgamma(n(), shape = 5, rate = 1),
- report_time = onset_time + rlnorm(n(), meanlog = 1, sdlog = 0.5)
- )
-```
-
-We then assume that we're 70 days into the outbreak, i.e. we only consider observations with a reporting time less than 71 - other symptom onset may have already happened, but we have not observed them yet.
-
-
-```{r truncate_reports}
-cutoff <- 71
-df_co <- df |>
- filter(report_time < cutoff)
-```
-
-We can now convert this to a time series of symptom onsets and reports:
-
-```{r aggregate}
-## create time series of infections, onsets, and reports
-df_co <- df_co |>
- transmute(
- infection_day = floor(infection_time),
- onset_day = floor(onset_time),
- report_day = floor(report_time)
- )
-
-infection_ts <- df_co |>
- count(day = infection_day, name = "infections")
-onset_ts <- df_co |>
- count(day = onset_day, name = "onsets")
-reports_ts <- df_co |>
- count(day = report_day, name = "reports")
-
-all_days <- expand_grid(day = seq(0, cutoff - 1)) |>
- full_join(infection_ts, by = "day") |>
- full_join(onset_ts, by = "day") |>
- full_join(reports_ts, by = "day") |>
- replace_na(list(onsets = 0, reports = 0))
-```
-
-Plotting these, we get
-
-```{r ts_plot, fig.height = 10}
-combined <- all_days |>
- pivot_longer(c(onsets, reports, infections), names_to = "variable")
- ggplot(combined, aes(x = day, y = value)) +
- facet_grid(variable ~ .) +
- geom_col()
-```
-
-Looking at the three plots in isolation we would conclude very different things about the epidemic: symptom onsets seem to have flattened off and perhaps are going down, whereas reports are increasing rapidly.
-
-This apparent contradiction appears because onsets are reported with a delay.
-By cutting off at a certain _reporting_ date, we do not observe many of the recent symptom onsets that are yet to be be reported.
-We can see that if we plot the final data set alongside the cut-off one:
-
-```{r plot_cut_final}
-# Use full outbreak dataset
-final <- df |>
- transmute(onset_day = floor(onset_time))
-final_onset_ts <- final |>
- count(day = onset_day, name = "onsets")
-final_all_days <- expand_grid(day = seq(0, max(final_onset_ts$day))) |>
- full_join(final_onset_ts, by = "day") |>
- replace_na(list(onsets = 0)) |>
- mutate(cutoff = "final")
-intermediate <- combined |>
- filter(variable == "onsets") |>
- select(-variable) |>
- rename(onsets = value) |>
- mutate(cutoff = "70 days")
-combined_cutoffs <- rbind(
- intermediate,
- final_all_days
-)
-ggplot(combined_cutoffs, aes(x = day, y = onsets, colour = cutoff)) +
- geom_line() +
- scale_colour_brewer(palette = "Dark2") +
- geom_vline(xintercept = cutoff, linetype = "dashed")
-```
-
-As we can see, even though it may much seem like the epidemic curve is going down, in fact in the final data set one can see that at the time symptom onsets were still increasing.
-The apparent decline towards the present (indicated by a dashed vertical line) was caused by the delay in reporting.
-
-::: {.callout-note}
-## Plotting and analysing data by report date
-Looking at the plots one might suggest plotting and analysing the data by date of reporting which correctly showed the data to be still increasing and which should, by definition, not be subject to future changes.
-
-This can sometimes be a sensible way to visualise the data.
-However, reporting might itself be subject to biases such as breaks during the weekend, holidays etc or reporting delays may vary over time.
-At the same time, when it comes to capacity or intervention planning we may need to know how many people e.g. become sick on any given day and will thus present to the healthcare system rather than how many will be reported.
-:::
-
-::: {.callout-tip}
-Estimating the "true curve" (i.e. what we expect to see once the data are complete at a future date) of the time series of _epidemiologically relevant events_ from a potentially truncated epidemiological curve and information about the delays is what is usually called "nowcasting".
-:::
-
-# Nowcasting with a known delay
-
-## The simplest possible nowcasting model
-
-Here we assume that the delay distribution is known and that we can use it to nowcast the most recent data. In practice, the delay distribution is often not known and needs to be estimated from the data. We could do this using methods from [the session on biases in delay distributions](sessions/biases-in-delay-distributions).
-
-In the [session on convolutions](using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic#estimating-a-time-series-of-infections) we used delay distributions convolved with the infection times to estimate the time series of symptom onsets. A simple way to nowcast is to use the same approach but using the cumulative distribution function of the delay distribution rather than the probability density function and only apply it to the most recent data as this is the only data that can be subject to change (due to delays in reporting).
-
-We will build intuition for this as usual using simulation. First we define the proportion reported using a delay distribution up to 15 days, again using a lognormal distribution with meanlog 1 and sdlog 0.5:
-
-```{r}
-proportion_reported <- plnorm(1:15, 1, 0.5)
-plot(proportion_reported)
-```
-
-::: {.callout-tip}
-## The `plnorm` function
-The `plnorm()` function is related to the `rlnorm()` function we used earlier to simulate the individual level reporting delay, but instead it gives the cumulative distribution function rather than random samples. That is, it gives us the probability that a report is made on day 1 or earlier, day 2 or earlier, etc.
-:::
-
-We can now use this delay distribution to nowcast the most recent data. Here we use the same simulation approach as in the [renewal session](R-estimation-and-the-renewal-equation) (here we have created helper functions `make_gen_time_pmf()` and `make_ip_pmf()` to make it easier to re-use; we recommend to have a look at what these functions do), and apply the `reporting_delay` to the last 15 days of data.
-
-```{r, load-simulated-onset}
-gen_time_pmf <- make_gen_time_pmf()
-ip_pmf <- make_ip_pmf()
-onset_df <- simulate_onsets(
- make_daily_infections(infection_times), gen_time_pmf, ip_pmf
-)
-reported_onset_df <- onset_df |>
- filter(day < cutoff) |>
- mutate(proportion_reported = c(rep(1, n() - 15), rev(proportion_reported)),
- reported_onsets = rpois(n(), onsets * proportion_reported)
- )
-tail(reported_onset_df)
-```
-
-::: {.callout-tip}
-## Take 5 minutes
-Spend a few minutes trying to understand the code above. What is the `proportion_reported`? What is the `reported_onsets`?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-- The `proportion_reported` is the cumulative distribution function of the delay distribution. It gives the probability that a report is made on day 1 or earlier, day 2 or earlier, etc. Note that for days more that 15 days into the past
-- The `reported_onsets` are the number of onsets that are reported on each day. This is calculated by multiplying the number of onsets by the proportion of onsets that are reported on each day. It has Poisson noise added to it to simulate the stochasticity in the reporting process.
-
-```{r plot-proportion-reported}
-reported_onset_df |>
- ggplot(aes(x = day, y = reported_onsets)) +
- geom_col()
-```
-:::
-
-We can now fit our first nowcasting model. Here we assume exactly the same generative process as we used for simulation and model the number of onsets as independent draws from a normal distribution.
-
-```{r stan-simple-nowcast}
-mod <- nfidd_cmdstan_model("simple-nowcast")
-mod
-```
-
-::: {.callout-tip}
-## Take 5 minutes
-Familiarise yourself with the model above. What does it do?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-- On line 2 we define a new function `condition_onsets_by_report.stan` which takes the number of onsets and reports and the delay distribution as input and returns the nowcasted number of onsets.
-- On line 17, this function is used to calculate the nowcasted number of onsets and this is then used in the likelihood.
-- On line 21, we define the generative process for the number of onsets. Here we assume that onsets are independent with each drawn from a normal distribution.
-:::
-
-Once again we can generate estimates from this model:
-
-```{r nowcast_fit, results = 'hide', message = FALSE}
-data <- list(
- n = nrow(reported_onset_df) - 1,
- obs = reported_onset_df$reported_onsets[-1],
- report_max = length(proportion_reported) - 1,
- report_cdf = proportion_reported
-)
-simple_nowcast_fit <- mod$sample(data = data, parallel_chains = 4)
-```
-
-```{r nowcast_fit_summary}
-simple_nowcast_fit
-```
-
-We can now plot onsets alongside those nowcasted by the model:
-
-```{r simple-nowcast-onsets}
-nowcast_onsets <- simple_nowcast_fit |>
- gather_draws(onsets[day]) |>
- ungroup() |>
- filter(.draw %in% sample(.draw, 100)) |>
- mutate(day = day + 1)
-```
-
-```{r plot_nowcast}
-ggplot(nowcast_onsets, aes(x = day)) +
- geom_line(mapping = aes(y = .value, group = .draw), alpha = 0.1) +
- geom_col(data = reported_onset_df, mapping = aes(y = onsets), alpha = 0.6) +
- geom_point(data = reported_onset_df, mapping = aes(y = reported_onsets))
-```
-
-:::{.callout-tip}
-The points in this plot represent the data available when the nowcast was made (and so are truncated) whilst the bars represent the finally reported data (a perfect nowcast would exactly reproduce these).
-:::
-
-::: {.callout-tip}
-As we found in the [using delay distributions to model the data generating process of an epidemic session](using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic#estimating-a-time-series-of-infections), this simple model struggles to recreate the true number of onsets. This is because it does not capture the generative process of the data (i.e. the transmission process and delays from infection to onset). In the next section we will see how we can use a model that does capture this generative process to improve our nowcasts.
-:::
-
-## Adding in a geometric random walk to the nowcasting model
-
-As we saw in the [session on the renewal equation](R-estimation-and-the-renewal-equation), a geometric random walk is a simple way to model multiplicative growth. Adding this into our simple nowcasting model may help us to better capture the generative process of the data and so produce a better nowcast.
-
-We first load the model
-
-```{r stan-nowcast-with-rw}
-rw_mod <- nfidd_cmdstan_model("simple-nowcast-rw")
-rw_mod
-```
-
-and then fit it
-
-```{r rw-nowcast-fit, results = 'hide', message = FALSE}
-rw_nowcast_fit <- rw_mod$sample(data = data, parallel_chains = 4)
-```
-
-```{r rw-nowcast-fit-summary}
-rw_nowcast_fit
-```
-
-
-Again we can extract the nowcasted onsets and plot them alongside the observed data:
-
-```{r rw-nowcast-onsets}
-rw_nowcast_onsets <- rw_nowcast_fit |>
- gather_draws(onsets[day]) |>
- ungroup() |>
- filter(.draw %in% sample(.draw, 100)) |> ## sample 100 iterations randomly
- mutate(day = day + 1)
-```
-
-```{r rw-plot_nowcast}
-ggplot(rw_nowcast_onsets, aes(x = day)) +
- geom_col(data = reported_onset_df, mapping = aes(y = onsets), alpha = 0.6) +
- geom_line(mapping = aes(y = .value, group = .draw), alpha = 0.1) +
- geom_point(data = reported_onset_df, mapping = aes(y = reported_onsets))
-```
-
-::: {.callout-tip}
-## Take 2 minutes
-What do you think of the nowcast now? Does it look better than the previous one?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-- The nowcast better matches the ultimately observed data.
-The geometric random walk allows the model to capture the multiplicative growth in the data and so better capture that current indidence is related to past incidence.
-- This should be particularly true when the data is more truncated (i.e nearer to the date of the nowcast) as the geometric random walk allows the model to extrapolate incidence based on previous incidence rather than relying on the prior distribution as the simpler model did.
-- However, the model is still quite simple and so may struggle to capture more complex patterns in the data.
-In particular, the prior model for the geometric random walk assumes that onsets are the same as the previous day with statistical noise.
-This may not be a good assumption in a rapidly changing epidemic (where the reproduction number is not near 1).
-:::
-
-## What happens if we get the delay distribution wrong?
-
-In practice, we often do not know the delay distribution and so need to estimate it using the data at hand.
-In the [session on biases in delay distributions](sessions/biases-in-delay-distributions) we saw how we could do this using individual-level records.
-We will now look at what happens if we get the delay distribution wrong.
-
-We use the same data as before but now assume that the delay distribution is a gamma distribution with shape 2 and rate 3.
-This is a very different distribution to the lognormal distribution we used to simulate the data.
-
-```{r gamma-dist}
-wrong_proportion_reported <- pgamma(1:15, 2, 3)
-plot(wrong_proportion_reported)
-```
-
-We first need to update the data to use this new delay distribution:
-
-```{r wrong-delay-data}
-wrong_delay_data <- data
-wrong_delay_data$report_cdf <- wrong_proportion_reported
-```
-
-We now fit the nowcasting model with the wrong delay distribution:
-
-```{r gamma-nowcast-fit, results = 'hide', message = FALSE}
-gamma_nowcast_fit <- rw_mod$sample(data = wrong_delay_data, parallel_chains = 4)
-```
-
-```{r gamma-nowcast-fit-summary}
-gamma_nowcast_fit
-```
-
-
-Again we can extract the nowcast of symptom onsets and plot it alongside the observed data:
-
-```{r gamma-nowcast-onsets}
-gamma_nowcast_onsets <- gamma_nowcast_fit |>
- gather_draws(onsets[day]) |>
- ungroup() |>
- filter(.draw %in% sample(.draw, 100)) |>
- mutate(day = day + 1)
-```
-
-```{r gamma-plot_nowcast}
-ggplot(gamma_nowcast_onsets, aes(x = day)) +
- geom_col(data = reported_onset_df, mapping = aes(y = onsets), alpha = 0.6) +
- geom_line(mapping = aes(y = .value, group = .draw), alpha = 0.1) +
- geom_point(data = reported_onset_df, mapping = aes(y = reported_onsets))
-```
-
-::: {.callout-tip}
-## Take 2 minutes
-What do you think of the nowcast now? How would you know you had the wrong delay if you didn't have the true delay distribution?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-- The nowcast now looks very different to the observed data. This is because the model is using the wrong delay distribution to nowcast the data.
-- If you didn't have the true delay distribution you would not know that you had the wrong delay distribution. This is why it is important to estimate the delay distribution from the data.
-- In practice, you would likely compare the nowcast to the observed data and if they did not match you would consider whether the delay distribution was the cause.
-:::
-
-# Wrap up
diff --git a/sessions/slides/convolutions.qmd b/sessions/slides/convolutions.qmd
deleted file mode 100644
index 89ae3d6..0000000
--- a/sessions/slides/convolutions.qmd
+++ /dev/null
@@ -1,75 +0,0 @@
----
-title: "Delay distributions at the population level"
-author: "Nowcasting and forecasting of infectious disease dynamics"
-engine: knitr
-format:
- revealjs:
- output: slides/introduction-to-biases-in-epidemiological-delays.html
- footer: "Delay distributions at the population level"
----
-
-## Individual delays
-
-If $f(t)$ is our delay distribution then
-
-$$
-p(y_i) = f(y_i - x_i)
-$$
-
-is the probability that *secondary* event of individual $i$ happens at time $y_i$ given its primary event happened at $x_i$.
-
-## Population level counts
-
-The expected number of individuals $S_t$ that have their secondary event at time $t$ can then be calculated as the sum of these probabilities
-
-$$
-S_t = \sum_i f_{t - x_i}
-$$
-
-. . .
-
-**Note:** If $S_t$ is in discrete time steps then $f_t$ needs to be a discrete probability distribution.
-
-## Population level counts
-
-If the number of individuals $P_{t'}$ that have their primary event at time $t'$ then we can rewrite this as
-
-$$
-S_t = \sum_{t'} P_{t'} f_{t - t'}
-$$
-
-This operation is called a (discrete) **convolution** of $P$ with $f$.
-
-We can use convolutions with the delay distribution that applies at the *individual* level to determine *population-level* counts.
-
-## Example: infections to symptom onsets
-
-![](figures/infections_onset_sketch.png)
-
-## Why use a convolution, not individual delays?
-
-- we don't always have individual data available
-- we often model other processes at the population level (such as transmission) and so being able to model delays on the same scale is useful
-- doing the computation at the population level requires fewer calculations (i.e. is faster)
-
-::: {.fragment .fade-in}
-- **however, a downside is that we won't have realistic uncertainty, especially if the number of individuals is small**
-:::
-
-## What if $f$ is continuous?
-
-Having moved to the population level, we can't estimate individual-level event times any more.
-
-Instead, we *discretise* the distribution (remembering that it is **double censored** - as both events are censored).
-
-This can be solved mathematically but in the session we will use simulation.
-
-## `r fontawesome::fa("laptop-code", "white")` Your Turn {background-color="#447099" transition="fade-in"}
-
-- Simulate convolutions with infection counts
-- Discretise continuous distributions
-- Estimate parameters numbers of infections from number of symptom onsets, using a convolution model
-
-#
-
-[Return to the session](../biases-in-delay-distributions)
diff --git a/sessions/slides/figures/bayesian_model.png b/sessions/slides/figures/bayesian_model.png
deleted file mode 100644
index 5a78d11..0000000
Binary files a/sessions/slides/figures/bayesian_model.png and /dev/null differ
diff --git a/sessions/slides/figures/bayesian_model_without_distributions.png b/sessions/slides/figures/bayesian_model_without_distributions.png
deleted file mode 100644
index e0a0786..0000000
Binary files a/sessions/slides/figures/bayesian_model_without_distributions.png and /dev/null differ
diff --git a/sessions/slides/figures/censoring_1.png b/sessions/slides/figures/censoring_1.png
deleted file mode 100644
index 93dded6..0000000
Binary files a/sessions/slides/figures/censoring_1.png and /dev/null differ
diff --git a/sessions/slides/figures/censoring_2.png b/sessions/slides/figures/censoring_2.png
deleted file mode 100644
index 09b801f..0000000
Binary files a/sessions/slides/figures/censoring_2.png and /dev/null differ
diff --git a/sessions/slides/figures/censoring_final.png b/sessions/slides/figures/censoring_final.png
deleted file mode 100644
index 05f6b9d..0000000
Binary files a/sessions/slides/figures/censoring_final.png and /dev/null differ
diff --git a/sessions/slides/figures/complete_reporting_triangle.png b/sessions/slides/figures/complete_reporting_triangle.png
deleted file mode 100644
index 6e3483c..0000000
Binary files a/sessions/slides/figures/complete_reporting_triangle.png and /dev/null differ
diff --git a/sessions/slides/figures/ferguson2020.png b/sessions/slides/figures/ferguson2020.png
deleted file mode 100644
index 5367af3..0000000
Binary files a/sessions/slides/figures/ferguson2020.png and /dev/null differ
diff --git a/sessions/slides/figures/germany_early.png b/sessions/slides/figures/germany_early.png
deleted file mode 100644
index 4bf1539..0000000
Binary files a/sessions/slides/figures/germany_early.png and /dev/null differ
diff --git a/sessions/slides/figures/germany_historical.png b/sessions/slides/figures/germany_historical.png
deleted file mode 100644
index 763c3c2..0000000
Binary files a/sessions/slides/figures/germany_historical.png and /dev/null differ
diff --git a/sessions/slides/figures/hub-ensemble-good.png b/sessions/slides/figures/hub-ensemble-good.png
deleted file mode 100644
index a97824b..0000000
Binary files a/sessions/slides/figures/hub-ensemble-good.png and /dev/null differ
diff --git a/sessions/slides/figures/hub-ensemble-less-good.png b/sessions/slides/figures/hub-ensemble-less-good.png
deleted file mode 100644
index f5d3dce..0000000
Binary files a/sessions/slides/figures/hub-ensemble-less-good.png and /dev/null differ
diff --git a/sessions/slides/figures/hub-metadata.png b/sessions/slides/figures/hub-metadata.png
deleted file mode 100644
index b5ddbd3..0000000
Binary files a/sessions/slides/figures/hub-metadata.png and /dev/null differ
diff --git a/sessions/slides/figures/infections_onset_sketch.png b/sessions/slides/figures/infections_onset_sketch.png
deleted file mode 100644
index 0166243..0000000
Binary files a/sessions/slides/figures/infections_onset_sketch.png and /dev/null differ
diff --git a/sessions/slides/figures/linelist.png b/sessions/slides/figures/linelist.png
deleted file mode 100644
index eb84d93..0000000
Binary files a/sessions/slides/figures/linelist.png and /dev/null differ
diff --git a/sessions/slides/figures/monkeypox_delayed.png b/sessions/slides/figures/monkeypox_delayed.png
deleted file mode 100644
index 6f7df5f..0000000
Binary files a/sessions/slides/figures/monkeypox_delayed.png and /dev/null differ
diff --git a/sessions/slides/figures/nishiura2007.png b/sessions/slides/figures/nishiura2007.png
deleted file mode 100644
index 4349d8e..0000000
Binary files a/sessions/slides/figures/nishiura2007.png and /dev/null differ
diff --git a/sessions/slides/figures/nowcasting.png b/sessions/slides/figures/nowcasting.png
deleted file mode 100644
index 24d5689..0000000
Binary files a/sessions/slides/figures/nowcasting.png and /dev/null differ
diff --git a/sessions/slides/figures/r_convolution_sketch.png b/sessions/slides/figures/r_convolution_sketch.png
deleted file mode 100644
index b6739e6..0000000
Binary files a/sessions/slides/figures/r_convolution_sketch.png and /dev/null differ
diff --git a/sessions/slides/figures/reporting_triangle.png b/sessions/slides/figures/reporting_triangle.png
deleted file mode 100644
index d8bb509..0000000
Binary files a/sessions/slides/figures/reporting_triangle.png and /dev/null differ
diff --git a/sessions/slides/figures/respicast.png b/sessions/slides/figures/respicast.png
deleted file mode 100644
index ec7896f..0000000
Binary files a/sessions/slides/figures/respicast.png and /dev/null differ
diff --git a/sessions/slides/figures/truncation_1.png b/sessions/slides/figures/truncation_1.png
deleted file mode 100644
index e018358..0000000
Binary files a/sessions/slides/figures/truncation_1.png and /dev/null differ
diff --git a/sessions/slides/figures/truncation_final.png b/sessions/slides/figures/truncation_final.png
deleted file mode 100644
index db21ad1..0000000
Binary files a/sessions/slides/figures/truncation_final.png and /dev/null differ
diff --git a/sessions/slides/figures/truncation_prefinal.png b/sessions/slides/figures/truncation_prefinal.png
deleted file mode 100644
index 39529e3..0000000
Binary files a/sessions/slides/figures/truncation_prefinal.png and /dev/null differ
diff --git a/sessions/slides/figures/ward2022.png b/sessions/slides/figures/ward2022.png
deleted file mode 100644
index 84ea75a..0000000
Binary files a/sessions/slides/figures/ward2022.png and /dev/null differ
diff --git a/sessions/slides/from-line-list-to-decisions.qmd b/sessions/slides/from-line-list-to-decisions.qmd
deleted file mode 100644
index 5ff0ab3..0000000
--- a/sessions/slides/from-line-list-to-decisions.qmd
+++ /dev/null
@@ -1,71 +0,0 @@
----
-title: "From an epidemiological line list to informing decisions in real-time"
-author: "Nowcasting and forecasting of infectious disease dynamics"
-engine: knitr
-format:
- revealjs:
- output: slides/from-line-list-to-decisions.html
- footer: "From an epidemiological line list to informing decisions in real-time"
- slide-level: 3
----
-
-### "We were losing ourselves in details [...] all we needed to know is, are the number of cases rising, falling or levelling off?"
-
-Hans Rosling, Liberia, 2014
-
-. . .
-
-- what **is** the number of cases now?
-- is it rising/falling and by how much?
-- what does this mean for the near future?
-
-### Data usually looks like this
-
-![](figures/linelist.png)
-
-### Aggregated data can look like this {.smaller}
-
-![](figures/monkeypox_delayed.png)
-
-[UKHSA, 2022](https://www.gov.uk/government/publications/monkeypox-outbreak-technical-briefings/investigation-into-monkeypox-outbreak-in-england-technical-briefing-1)
-[Overton et al., *PLOS Comp Biol*, 2023](https://doi.org/10.1371/journal.pcbi.1011463)
-
-### Aim of this course:
-
-How can we use data typically collected in an outbreak to answer questions like
-
-- what **is** the number of cases now? (*nowcasting*)
-- is it rising/falling and by how much? (*$R_t$ estimation*)
-- what does this mean for the near future (*forecasting*)
-
-in real time.
-
-### Approach
-
-Throughout the course we will
-
-1. simulate typical infectious disease data in **R**
-(the *generative model*)
-2. apply the generative model to data in **stan** to
- - learn about the system (conduct inference)
- - make predictions (nowcasting/forecasting)
-
-### Timeline
-
-::: {.incremental}
-- delay distributions and how to estimate them (day 1)
-- $R_t$ estimation and the generation interval (day 1)
-- nowcasting (day 2)
-- forecasting and evaluation, ensemble methods (day 2)
-- applications (day 3)
-:::
-
-#
-
-To start the course go to:
-[https://nfidd.github.io/nfidd/](https://nfidd.github.io/nfidd/)
-and get started on the first sesion (*Probability distributions and parameter estimation*)
-
-#
-
-[Return to the session](../introduction-and-course-overview)
diff --git a/sessions/slides/introduction-to-biases-in-epidemiological-delays.qmd b/sessions/slides/introduction-to-biases-in-epidemiological-delays.qmd
deleted file mode 100644
index 2429bfb..0000000
--- a/sessions/slides/introduction-to-biases-in-epidemiological-delays.qmd
+++ /dev/null
@@ -1,102 +0,0 @@
----
-title: "Introduction to biases in epidemiological delays"
-author: "Nowcasting and forecasting of infectious disease dynamics"
-engine: knitr
-format:
- revealjs:
- output: slides/introduction-to-biases-in-epidemiological-delays.html
- footer: "Introduction to biases in epidemiological delays"
----
-
-```{r seed}
-set.seed(123)
-```
-## Biases in epidemiological delays
-
-Why might our estimates of epidemiological delays be biased?
-
-. . .
-
-::: {.fragment .fade-out}
-- data reliability and representativeness
-:::
-- intrinsic issues with data collection and recording
-
-## Issue #1: Double censoring
-
-- reporting of events usually as a **date** (not date + precise time)
-- for short delays this can make quite a difference
-- accounting for it incorrectly can introduce more bias than doing nothing
-
-## Double censoring: example {.smaller}
-
-We are trying to estimate an incubation period. For person A we know exposure happened on day 1 and symptom onset on day 3.
-
-![](figures/censoring_1.png)
-
-## Double censoring: example {.smaller}
-
-We are trying to estimate an incubation period. For person A we know exposure happened on day 1 and symptom onset on day 3.
-
-![](figures/censoring_2.png)
-
-## Double censoring: example {.smaller}
-
-We are trying to estimate an incubation period. For person A we know exposure happened on day 1 and symptom onset on day 3.
-
-![](figures/censoring_final.png)
-
-## Double censoring: example {.smaller}
-
-We are trying to estimate an incubation period. For person A we know exposure happened on day 1 and symptom onset on day 3.
-
-![](figures/censoring_final.png)
-
-The *true* incubation period of A could be anywhere between 1 and 3 days (but not all equally likely).
-
-## Issue #2: right truncation
-
-- reporting of events can be triggered by the **secondary** event
-- in that case, longer delays might be missing because whilst the *primary events* have occurred the *secondary events* **have not occurred yet**
-
-## Example: right truncation {.smaller}
-
-We are trying to estimate an incubation period. Each arrow represents one person with an associated pair of events (infection and symptom onset).
-
-![](figures/truncation_1.png)
-
-## Example: right truncation {.smaller}
-
-We are trying to estimate an incubation period. Each arrow represents one person with an associated pair of events (infection and symptom onset).
-
-![](figures/truncation_prefinal.png)
-
-## Example: right truncation {.smaller}
-
-We are trying to estimate an incubation period. Each arrow represents one person with an associated pair of events (infection and symptom onset).
-
-![](figures/truncation_final.png)
-
-## Example: right truncation {.smaller}
-
-We are trying to estimate an incubation period. Each arrow represents one person with an associated pair of events (infection and symptom onset)
-
-![](figures/truncation_final.png)
-
-On the day of analysis we have not observed some delays yet, and these tended to be **longer**. This is made worse during periods of **exponential growth**.
-
-## Censoring and right truncation {.smaller}
-
-- When analysing data from an outbreak in real time, we are likely to have double censoring **and** right truncation, making things worse
-- In the practical we will only look at the two separately to keep things simple
-
-## `r fontawesome::fa("laptop-code", "white")` Your Turn {background-color="#447099" transition="fade-in"}
-
-1. Simulate epidemiological delays with biases
-2. Estimate parameters of a delay distribution, correcting for biases
-
-
-#
-
-[Return to the session](../biases-in-delay-distributions)
-
diff --git a/sessions/slides/introduction-to-epidemiological-delays.qmd b/sessions/slides/introduction-to-epidemiological-delays.qmd
deleted file mode 100644
index ecbf3e6..0000000
--- a/sessions/slides/introduction-to-epidemiological-delays.qmd
+++ /dev/null
@@ -1,171 +0,0 @@
----
-title: "Introduction to epidemiological delays"
-author: "Nowcasting and forecasting of infectious disease dynamics"
-engine: knitr
-format:
- revealjs:
- output: slides/introduction-to-epidemiological-delay-distributions.html
- footer: "Introduction to epidemiological delays"
----
-
-```{r seed}
-set.seed(123)
-library("dplyr")
-library("tidyr")
-library("ggplot2")
-```
-# Epidemiological delay
-
-Time between two epidemiological events
-
-## Epidemiological events: disease progression
-- infection
-- symptom onset
-- becoming infectious
-- hospital admission
-- death
-
-## Epidemiological events: recovery
-- pathogen clearance
-- symptoms clearance
-- end of infectiousness
-- discharge from hospital
-
-## Epidemiological events: control
-
-- quarantine
-- isolation
-- treatment
-
-## Epidemiological events: reporting
-
-- specimen taken
-- report added to database
-
-## Some delays have names
-
-*infection* to *symptom onset*
-
-. . .
-
-**Incubation period**
-
-## Some delays have names
-
-*infection* to *becoming infectious*
-
-. . .
-
-**Latent period**
-
-## Some delays have names
-
-*becoming infectious* to *end of infectiousness*
-
-. . .
-
-**Infectious period**
-
-## Some delays have names
-
-hospital *admission* to *discharge*
-
-. . .
-
-**Length of stay**
-
-## Some delays have names
-
-*symptom onset* (person A) to *symptom onset* (person B, infected by A)
-
-. . .
-
-**Serial interval**
-
-## Some delays have names
-
-*infection* (person A) to *infection* (person B, infected by A)
-
-. . .
-
-**Generation interval**
-
-## Why do we want to know these?
-
-## Key parameters in mathematical models {.smaller}
-
-![](figures/ferguson2020.png)
-
-[Ferguson et al., 2020](https://doi.org/10.25561/77482)
-
-## Key parameters in mathematical models {.smaller}
-
-![](figures/ward2022.png)
-
-[Ward et al., 2020](https://doi.org/10.25561/77482)
-
-## Key elements of infectious disease epidemiology {.smaller}
-
-![](figures/nishiura2007.png)
-[Nishiura et al., 2007](https://doi.org/10.1186/1742-7622-4-2)
-
-## Why do we want to know these?
-
-- Key elements of infectious disease epidemiology
-- Intricate relationship with nowcasting/forecasting
-
-## Quantifying delays
-
-- Epidemiological delays are *variable*
-- We can capture their variability using probability distributions
-
-## Warning: Two levels of uncertainty {background-color="#000000" transition="fade-in" .smaller}
-
-- Probability distributions characterise variability in the delays *between individuals*
-- *Parameters* of the probability distribution can be uncertain
-
-:::: {.columns}
-
-::: {.column width="40%"}
-$$
-\alpha \sim \mathrm{Normal}(mean = 5, sd = 0.1) \\
-\beta \sim \mathrm{Normal}(mean = 1, sd = 0.1) \\
-$$
-
-Showing probability density functions of lognormal distributions with shape $\alpha$ and rate $\beta$.
-
-:::
-
-::: {.column width="60%"}
-```{r plot_multiples, fig.width=5, fig.height=3}
-n <- 20
-df <- tibble(
- id = seq_len(n),
- shape = rnorm(n, mean = 5, sd = 0.1),
- rate = rlnorm(n, meanlog = 0, sdlog = 0.1)
-)
-
-## plot 99% of PMF
-max_x <- max(qgamma(0.99, shape = df$shape, rate = df$rate))
-
-## calculate density on grid of x values
-x <- seq(0.1, max_x, length.out = 100)
-df <- df |>
- crossing(x = x) |> ## add grid to data frame
- mutate(density = dgamma(x, shape, rate))
-
-ggplot(df, aes(x = x, y = density, group = id)) +
- geom_line(alpha = 0.3)
-```
-:::
-
-::::
-
-## `r fontawesome::fa("laptop-code", "white")` Your Turn {background-color="#447099" transition="fade-in"}
-
-- Simulate epidemiological delays
-- Estimate parameters of a delay distribution
-
-#
-
-[Return to the session](../delay-distributions)
diff --git a/sessions/slides/introduction-to-joint-estimation-of-nowcasting-and-reporting-delays.qmd b/sessions/slides/introduction-to-joint-estimation-of-nowcasting-and-reporting-delays.qmd
deleted file mode 100644
index 48404da..0000000
--- a/sessions/slides/introduction-to-joint-estimation-of-nowcasting-and-reporting-delays.qmd
+++ /dev/null
@@ -1,71 +0,0 @@
----
-title: "Introduction to joint estimation of nowcasting and reporting delays"
-author: "Nowcasting and forecasting of infectious disease dynamics"
-engine: knitr
-format:
- revealjs:
- output: slides/introduction-to-joint-estimation-of-nowcasting-and-reporting-delays.html
- footer: "Introduction to joint estimation of nowcasting and reporting delays"
- slide-level: 3
- chalkboard: true
----
-
-### Motivating example
-
-Often we see data like this
-
-![](figures/germany_historical.png)
-
-
-(all figures in this deck are courtesy of Johannes Bracher)
-
-### The aim of nowcasting
-
-Predict what an epidemiological time series will look like *after all delayed reports* are in.
-
-![](figures/nowcasting.png)
-
-
-### The limitations of simple nowcasting methods
-
- - Hard to propagate uncertainty
- - Doesn't account for observation error in the primary events
- - Not a generative model of the data reporting process (hard to add complex reporting patterns)
- - If we get the delay wrong, we can get the nowcast wrong
-
-### Jointly estimating reporting delays and nowcasting
-
-One potential solution is to jointly estimate the reporting delays and the nowcast. We can do this if we have multiple snapshots of the data.
-
-![](figures/germany_historical.png)
-
-### The reporting triangle
-
-The reporting triangle is the name given to the data structure that arises when we have multiple snapshots of the data.
-
-![](figures/reporting_triangle.png)
-
-### The reporting triangle
-
-Here the rows represent the time of the primary event, and the columns represent the time of the report.
-Some events are missing because they are yet to happen.
-
-![](figures/reporting_triangle.png)
-
-### Completing the reporting triangle
-
-The aim of nowcasting is to complete the reporting triangle. This means filling in the missing entries in the lower triangle and then summing the rows to get the nowcast.
-
-![](figures/complete_reporting_triangle.png)
-
-## `r fontawesome::fa("laptop-code", "white")` Your Turn {background-color="#447099" transition="fade-in"}
-
-1. Simulate the reporting triangle
-2. Perform a joint estimation of the delay and nowcast
-3. Understand the limitations of the data generating process
-4. Perform a joint estimation of the delay, nowcast, and reproduction number
-
-###
-
-[Return to the session](../joint-nowcasting)
-
diff --git a/sessions/slides/introduction-to-nowcasting.qmd b/sessions/slides/introduction-to-nowcasting.qmd
deleted file mode 100644
index b5a9ba8..0000000
--- a/sessions/slides/introduction-to-nowcasting.qmd
+++ /dev/null
@@ -1,80 +0,0 @@
----
-title: "Introduction to nowcasting"
-author: "Nowcasting and forecasting of infectious disease dynamics"
-engine: knitr
-format:
- revealjs:
- output: slides/introduction-to-nowcasting.html
- footer: "Introduction to nowcasting"
- chalkboard: true
- slide-level: 3
----
-
-### Motivating example {.smaller}
-
-Often we see data like this
-
-![](figures/monkeypox_delayed.png)
-
-Data after the dashed line are marked as uncertain. What, if anything, do they tell us about current trends?
-
-### second example {.smaller}
-
-... or like this
-
-![](figures/germany_historical.png)
-
-### second example {.smaller}
-
-... or like this
-
-![](figures/germany_early.png)
-
-[RKI COVID-19 Situation Report, 8 March 2020](https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Situationsberichte/2020-03-08-en.pdf?__blob=publicationFile)
-
-###
-
-These patterns arise because:
-
-- Epidemiological time series are aggregated by the *epidemiologically meaningful date*
- (e.g. symptom onset, hospital admission, death)
-- There is a *delay* between this date, and the events showing up in the data
-- This leads to an "articifial" *dip* in the most recent data
-
-### The aim of nowcasting
-
-Predict what an epidemiological time series will look like *after all delayed reports* are in.
-
-![](figures/nowcasting.png)
-
-(slide courtesy of Johannes Bracher)
-
-### Nowcasting as right truncation
-
-Remember from [biases in delay estimation](../biases-in-delay-distributions):
-
-**Right truncation**
-
-- reporting of events can be triggered by the **secondary** event
-- in that case, longer delays might be missing because whilst the *primary events* have occurred the *secondary events* **have not occurred yet**
-
-Nowcasting is exactly this!.
-
-### A simple approach to nowcasting
-
-1. Estimate the delay distribution from other data
-2. Specify a model for the epidemic dynamics
-3. Use the estimated delay distribution to model the expected right truncation in the data
-4. Fit the model to the truncated data
-5. Use the untruncated estimates from the model as the nowcast
-
-## `r fontawesome::fa("laptop-code", "white")` Your Turn {background-color="#447099" transition="fade-in"}
-
-1. Perform nowcast with a known reporting delay distribution
-2. Perform a nowcast using a more realistic data generating process
-3. Explore the impact of getting the delay distribution wrong
-
-#
-
-[Return to the session](../nowcasting)
-
diff --git a/sessions/slides/introduction-to-reproduction-number.qmd b/sessions/slides/introduction-to-reproduction-number.qmd
deleted file mode 100644
index 6fbf2c5..0000000
--- a/sessions/slides/introduction-to-reproduction-number.qmd
+++ /dev/null
@@ -1,78 +0,0 @@
----
-title: "Introduction to the time-varying reproduction number"
-author: "Nowcasting and forecasting of infectious disease dynamics"
-engine: knitr
-format:
- revealjs:
- output: slides/introduction-to-reproduction-number.html
- footer: "Introduction to the time-varying reproduction number"
-execute:
- echo: true
----
-
-```{r seed, echo = FALSE}
-set.seed(123)
-```
-
-## Convolution session {.smaller}
-
-```{stan, file = system.file("stan", "estimate-infections.stan", package = "nfidd"), eval = FALSE, output.var = "mod"}
-#| code-line-numbers: "|23"
-```
-
-. . .
-
-Prior for infections at time $t$ is independent from infections at all other time points.
-Is this reasonable?
-
-## Infections depend on previous infections
-
-Remember the definition of the generation time distribution $g(t)$:
-
-*infection* (person A) to *infection* (person B, infected by A)
-
-Through this, infections depend on previous infections:
-
-$$
-I_t = \mathrm{scaling} \times \sum_{t' < t} I_t' g(t - t')
-$$
-
-What is this scaling?
-
-## Scaling of infections with previous infections
-
-Let's assume we have $I_0$ infections at time 0, and the scaling doesn't change in time.
-
-How many people will they go on to infect?
-
-. . .
-
-$$
-I = \mathrm{scaling} \times \sum_{t=0}^\infty I_0 g(t) = \mathrm{scaling} * I_0
-$$
-
-The scaling can be interpreted as the (time-varying) reproduction number $R_t$.
-
-## The renewal equation
-
-If $R_t$ can change over time, it can still be interpreted as the ("instantaneous") reproduction number:
-
-$$
-I_t = R_t \times \sum_{t' < t} I_t' g(t - t')
-$$
-
-We can estimate $R_t$ from a time series of infections using the renewal equation.
-
-## The renewal equation as convolution
-
-![](figures/r_convolution_sketch.png)
-
-## `r fontawesome::fa("laptop-code", "white")` Your Turn {background-color="#447099" transition="fade-in"}
-
-- Simulate infections using the renewal equation
-- Estimate reproduction numbers using a time series of infections
-- Combine with delay distributions to jointly infer infections and R from a time series of outcomes
-
-#
-
-[Return to the session](../R-estimation-and-the-renewal-equation)
diff --git a/sessions/slides/introduction-to-stan.qmd b/sessions/slides/introduction-to-stan.qmd
deleted file mode 100644
index 2e8de81..0000000
--- a/sessions/slides/introduction-to-stan.qmd
+++ /dev/null
@@ -1,104 +0,0 @@
----
-title: "Introduction to stan"
-author: "Nowcasting and forecasting of infectious disease dynamics"
-format:
- revealjs:
- output: slides/introduction-to-stan.html
- footer: "Introduction to stan"
----
-
-```r
-set.seed(12345)
-```
-
-## What is stan and why do we use it?
-
-- a *Probabilistic Programming Language* for Bayesian inference
- (i.e., a way to write down models)
-
-- models are written in a text file (often ending `.stan`) and then loaded into an R/python/etc interface
-
-- once a model is written down, stan can be used to **generate samples** from the **posterior distribution** (using a variety of methods)
-
-## How to write a model in stan {.smaller}
-
-:::: {.columns}
-
-::: {.column width="40%"}
-In a stan model file we specify:
-
-- Data
(types and names)
-
-- Parameters
(types and names)
-
-- Model
(prior and likelihood)
-:::
-
-::: {.column width="60%"}
-
-
-```{stan empty_model, output.var = "empty", eval = FALSE, echo = TRUE}
-data {
-
-}
-
-parameters {
-
-}
-
-model {
-
-}
-```
-:::
-::::
-
-## Example: fairness of a coin {.smaller}
-
-:::: {.columns}
-
-::: {.column width="40%"}
-
-Data:
-
-- $N$ coin flips
-
-- $x$ times heads
-
-Parameters
-
-- $\theta$, probability of getting heads; uniform prior in $[0, 1]$
-
-:::
-
-::: {.column width="60%"}
-
-
-```{stan coin_model, output.var = "coin", eval = FALSE, echo = TRUE, file = nfidd::nfidd_cmdstan_model("coin")
-```
-:::
-::::
-
-## Using stan from R {.smaller}
-
-There are two packages for using stan from R. We will use the `cmdstanr` package:
-
-```{r load_coin_model, echo = TRUE}
-library("cmdstanr")
-mod <- nfidd::nfidd_cmdstan_model("coin")
-mod
-```
-
-## Sampling from the posterior {.xmaller}
-
-```{r sample_from_coin_model, echo = TRUE}
-data <- list(
- N = 10, ## 10 coin flips
- x = 6 ## 6 times heads
-)
-mod$sample(data = data)
-```
-
-#
-
-[Return to the session](../R-Stan-and-statistical-concepts)
diff --git a/sessions/slides/introduction-to-statistical-concepts.qmd b/sessions/slides/introduction-to-statistical-concepts.qmd
deleted file mode 100644
index 9a42970..0000000
--- a/sessions/slides/introduction-to-statistical-concepts.qmd
+++ /dev/null
@@ -1,268 +0,0 @@
----
-title: "Introduction to statistical concepts used in the course"
-author: "Nowcasting and forecasting of infectious disease dynamics"
-format:
- revealjs:
- output: slides/introduction-to-statistical-concepts.html
- footer: "Introduction to statistical concepts used in the course"
- slide-level: 3
----
-
-```{r echo = FALSE, message = FALSE}
-library("ggplot2")
-set.seed(123)
-```
-
-### Why statistical concepts?
-
-- We'll need to estimate things (delays, reproduction numbers, case numbers now and in the future)
-
-- We'll want to correctly specify uncertainty
-
-- We'll want to incorporate our domain expertise
-
-- We'll do this using *Bayesian inference*
-
-## Bayesian inference in 15 minutes
-
-![](figures/bayesian_model_without_distributions.png)
-
-### Interlude: probabilities
-
-::: {.callout-tip}
-#### [Laplace, 1812](https://archive.org/details/thorieanalytiqu01laplgoog)
-
-*Probability theory is nothing but common sense reduced to calculation.*
-:::
-
-### Interlude: probabilities (1/3) {.smaller}
-
-- If $A$ is a random variable, we write
- $$ p(A = a)$$
- for the *probability* that $A$ takes value $a$.
-- We often write
- $$ p(A = a) = p(a)$$
-- Example: The probability that it rains tomorrow
- $$ p(\mathrm{tomorrow} = \mathrm{rain}) = p(\mathrm{rain})$$
-- Normalisation
- $$ \sum_{a} p(a) = 1 $$
-
-### Interlude: probabilities (2/3) {.smaller}
-
-- If $A$ and $B$ are random variables, we write
- $$ p(A = a, B = b) = p(a, b)$$ for the *joint probability* that $A$ takes value $a$ and $B$ takes value $b$
-- Example: The probability that it rains today and tomorrow
- $$ p(\mathrm{tomorrow} = \mathrm{rain}, \mathrm{today} = \mathrm{rain}) = p(\mathrm{rain}, \mathrm{rain})$$
-- We can obtain a *marginal probability* from joint probabilities by summing
- $$ p(a) = \sum_{b} p(a, b)$$
-
-### Interlude: probabilities (3/3) {.smaller}
-
-- The *conditional probability* of getting outcome $a$ from random variable $A$, given that the outcome of random variable $B$ was $b$, is written as
- $$ p(A = a | B = b) = p(a| b) $$
-- Example: the probability that it rains tomorrow given that it is sunny today
- $$ p(\mathrm{tomorrow} = \mathrm{rain} | \mathrm{today} = \mathrm{rain}) = p(\mathrm{rain} | \mathrm{rain})$$
-- Conditional probabilities are related to joint probabilities as
- $$ p(a | b) = \frac{p(a, b)}{p(b)}$$
-- We can combine conditional probabilities in the *chain rule*
- $$ p(a, b, c) = p(a | b, c) p(b | c) p (c) $$
-
-### Probability distributions (discrete) {.smaller}
-
-```{r create_poisson_plot}
-kicks <- seq(0, 6)
-prob <- dpois(kicks, lambda = 0.61)
-df <- data.frame(kicks = kicks, prob = prob)
-pp <- ggplot(df, aes(x = kicks, y = prob)) +
- theme_bw(20) +
- geom_point(size = 3) +
- ylab("Probability") +
- xlab("Number of kicks")
-```
-
-- E.g., how many people die of horse kicks if there are 0.61 kicks per year
-- Described by the *Poisson* distribution
-
-:::: {.columns}
-
-::: {.column width="50%"}
-```{r plot_poisson1}
-pp
-```
-:::
-
-::: {.column width="50%"}
-:::
-
-#### Two directions
-1. Calculate the probability
-2. Randomly sample
-
-
-::::
-
-### Calculate discrete probability {.smaller}
-
-- E.g., how many people die of horse kicks if there are 0.61 kicks per year
-- Described by the *Poisson* distribution
-
-:::: {.columns}
-
-::: {.column width="50%"}
-```{r plot_poisson2}
-pp
-```
-:::
-
-::: {.column width="50%"}
-What is the probability of 2 deaths in a year?
-```{r dpois, echo = TRUE}
- dpois(x = 2, lambda = 0.61)
-```
-:::
-
-::::
-
-#### Two directions
-
-1. **Calculate the probability**
-2. Randomly sample
-
-### Generate a random (Poisson) sample {.smaller}
-- E.g., how many people die of horse kicks if there are 0.61 kicks per year
-- Described by the *Poisson* distribution
-
-:::: {.columns}
-
-::: {.column width="50%"}
-```{r plot_poisson3}
-pp
-```
-:::
-
-::: {.column width="50%"}
-Generate one random sample from the probability distribution
-```{r rpois, echo = TRUE}
- rpois(n = 1, lambda = 0.61)
-```
-:::
-
-::::
-
-#### Two directions
-
-1. Calculate the probability
-2. **Randomly sample**
-
-### Probability distributions (continuous) {.smaller}
-- Extension of probabilities to *continuous* variables
-- E.g., the temperature in Stockholm tomorrow
-
-Normalisation:
-$$ \int p(a) da = 1 $$
-
-Marginal probabilities:
-$$ p(a) = \int_{} p(a, b) db$$
-
-#### Two directions
-1. Calculate the probability (density)
-2. Randomly sample
-
-### Calculate probability density {.smaller}
-- Extension of probabilities to *continuous* variables
-- E.g., the temperature in Stockholm tomorrow
-
-```{r create_normal_plot}
-temp_mean <- 23
-temp_sd <- 2
-temp <- seq(15, 31, by = 0.01)
-prob <- dnorm(temp,mean = temp_mean, sd = temp_sd)
-df <- data.frame(temp = temp, prob = prob)
-pn <- ggplot(df, aes(x = temp, y = prob)) +
- theme_bw(20) +
- geom_line(lwd = 2) +
- xlab("Temperature") +
- ylab("Probability density")
-```
-
-:::: {.columns}
-
-::: {.column width="50%"}
-```{r plot_normal1}
-pn
-```
-:::
-
-::: {.column width="50%"}
-What is the probability density of $30^\circ C$ tomorrow, if the mean temperature on the day is $23^\circ C$ (standard deviation $2^\circ C$) ? A naïve model could be:
-```{r dnorm, echo = TRUE}
- dnorm(x = 30,
- mean = 23,
- sd = 2)
-```
-:::
-
-::::
-
-#### Two directions
-
-1. **Calculate the probability**
-2. Randomly sample
-
-### Generate a random (normal) sample {.smaller}
-
-:::: {.columns}
-
-::: {.column width="50%"}
-```{r plot_normal2}
-pn
-```
-:::
-
-::: {.column width="50%"}
-Generate one random sample from the normal probability distribution with mean 23 and standard deviation 2:
-```{r rnorm, echo = TRUE}
- rnorm(n = 1,
- mean = 23,
- sd = 2)
-```
-:::
-
-::::
-
-#### Two directions
-
-1. Calculate the probability
-2. **Randomly sample**
-
-## Bayesian inference in 15 minutes {.smaller}
-
-![](figures/bayesian_model_without_distributions.png)
-
-Idea of Bayesian inference: treat $\theta$ as **random variables** (with a probability distribution) and **condition on data**: posterior probability $p(\theta | \mathrm{data})$ as target of inference.
-
-### Bayes' rule {.smaller}
-
-- We treat the parameters of the a $\theta$ as random with *prior probabilities* given by a distribution $p(\theta)$. Confronting the model with data we obtain *posterior probabilities* $p(\theta | \mathrm{data})$, our target of inference. Applying the rule of conditional probabilities, we can write this as
-
-$$ p(\theta | \textrm{data}) = \frac{p(\textrm{data} | \theta) p(\theta)}{p(\textrm{data})}$$
-
-- $p(\textrm{data} | \theta)$ is the /likelihood/
-- $p(\textrm{data})$ is a /normalisation constant/
-
-- In words,
- $$\textrm{(posterior)} \propto \textrm{(normalised likelihood)} \times \textrm{(prior)}$$
-
-### Bayesian inference
-
-![](figures/bayesian_model.png)
-
-### MCMC
-
-- Markov-chain Monte Carlo (MCMC) is a method to generate *samples* from the *posterior distribution*, the **target** of inference
-
-- [stan](https://mc-stan.org/) is a probabilistic programming language that helps you to write down probabilistic models and to fit them using MCMC samplers and other methods.
-
-#
-
-[Return to the session](../R-Stan-and-statistical-concepts)
diff --git a/sessions/slides/tools.qmd b/sessions/slides/tools.qmd
deleted file mode 100644
index 056f3ff..0000000
--- a/sessions/slides/tools.qmd
+++ /dev/null
@@ -1,50 +0,0 @@
----
-title: "Examples of tools"
-author: "Nowcasting and forecasting of infectious disease dynamics"
-engine: knitr
-format:
- revealjs:
- output: slides/tools.html
- footer: "Examples of tools"
- slide-level: 3
----
-
-### Timeline so far
-
-- delay distributions and how to estimate them (day 1)
-- $R_t$ estimation and the generation interval (day 1)
-- nowcasting (day 2)
-- forecasting and evaluation, ensemble methods (day 2)
-- applications (day 3)
-
-### Available tools or custom implementation?
-
-A few open-source tools implement some or all of the methods we have encountered in the course.
-
-### Available tools or custom implementation?
-
-- Loses flexibility of custom approaches
-
- - different epidemiological problems, different data - different methods?
-
-### Available tools or custom implementation?
-
-- Value of using - and contributing
-
- - avoids duplication
-
- - improves the chances of finding errors
-
- - helps discussing and ultimately enforcing best practice
-
-## `r fontawesome::fa("laptop-code", "white")` Your Turn {background-color="#447099" transition="fade-in"}
-
-How does each tool relate to key concepts in the course?
-
- - EpiEstim
- - EpiNow2
- - Epinowcast
-
-#
-
-[Return to the session](../examples-of-available-tools)
diff --git a/sessions/using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic.qmd b/sessions/using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic.qmd
deleted file mode 100644
index 77a59c4..0000000
--- a/sessions/using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic.qmd
+++ /dev/null
@@ -1,318 +0,0 @@
----
-title: "Using delay distributions to model the data generating process"
-order: 4
----
-
-# Introduction
-
-We've now developed a good idea of how epidemiological time delays affect our understanding of an evolving outbreak. So far, we've been working with individual line-list data. However, we usually want to model how an outbreak evolves through a whole population. The aim of this session is to introduce how delay distributions can be used to model population-level data generating process during an epidemic.
-
-This means working with aggregated count data. This creates some new issues in correctly accounting for uncertainty. We handle population-level data using *convolutions* as a way of combining count data with a distribution of individual probabilities. We will have to adjust our continuous probability distributions to work with this using *discretisation*. We'll then need to re-introduce additional uncertainty to account for the observation process at a population level.
-
-## Slides
-
-- [Delay distributions at the population level](slides/convolutions)
-
-## Objectives
-
-In this session, we'll focus on the delay from infection to symptom onset at a population level.
-First, we will introduce the techniques of convolution and discretisation. We'll apply these to an aggregated time series of infections in order to simulate observed symptom onsets.
-Then, we'll use those simulated symptom onsets to try and reconstruct a time series of infections.
-
-::: {.callout-note collapse="true"}
-
-# Setup
-
-## Source file
-
-The source file of this session is located at `sessions/using-delay-distributions-to-model-the-data-generating-process-of-an-epidemic.qmd`.
-
-## Libraries used
-
-In this session we will use the `nfidd` package to load a data set of infection times and access stan models and helper functions, the `dplyr` and `tidyr` packages for data wrangling, the `ggplot2` library for plotting and the `tidybayes` package for extracting results of the inference.
-
-```{r libraries, message = FALSE}
-library("nfidd")
-library("dplyr")
-library("tidyr")
-library("ggplot2")
-library("tidybayes")
-```
-
-::: callout-tip
-The best way to interact with the material is via the [Visual Editor](https://docs.posit.co/ide/user/ide/guide/documents/visual-editor.html) of RStudio.
-:::
-
-## Initialisation
-
-We set a random seed for reproducibility.
-Setting this ensures that you should get exactly the same results on your computer as we do.
-We also set an option that makes `cmdstanr` show line numbers when printing model code.
-This is not strictly necessary but will help us talk about the models.
-
-```{r}
-set.seed(123)
-options(cmdstanr_print_line_numbers = TRUE)
-```
-
-:::
-
-# Simulating observations from a time series of infections
-
-As before we first simulate the process that generates the data we typically observe.
-In this session, we'll focus on the process from infection to symptom onset.
-Then we'll use the same model to conduct inference.
-
-## Delay distributions and convolutions
-
-In the last session we simulated *individual outcomes* from a delay distribution, and then re-estimated the corresponding parameters.
-However, sometimes we do not have data on these individual-level outcomes, either because they are not recorded or because they cannot be shared, for example due to privacy concerns.
-At the population level, individual-level delays translate into *convolutions*.
-
-If we have a time series of infections $I_t$ ($t=1, 2, 3, \ldots, t_\mathrm{max}$), where $t$ denotes the day on which the infections occur, and observable outcomes occur with a delay given by a delay distribution $p_i$ ($i=0, 1, 2, \dots, p_\mathrm{max}$), where $i$ is the number of days after infection that the observation happens, then the number of observable outcomes $C_t$ on day $t$ is given by
-
-$$
-C_t = \sum_{i=0}^{i=p_\mathrm{max}} I_{t-i} p_i
-$$
-
-In other words, the number of observable outcomes on day $t$ is given by the sum of infections on all previous days multiplied by the probability that those infections are observed on day $t$.
-For example, the observable outcomes $C_t$ could be the number of symptom onsets on day $t$ and $p_i$ is the incubation period.
-
-We can use the same data as in the [session on biases in delay distributions](biases-in-delay-distributions#create-data-set), but this time we first aggregate this into a daily time series of infections.
-You can do this using:
-
-```{r aggregate}
-inf_ts <- make_daily_infections(infection_times)
-```
-
-::: callout-note
-As before you can look at the code of the function by typing `make_daily_infections`.
-The second part of this function is used to add days without infections with a zero count.
-This will make our calculations easier later (as otherwise we would have to try and detect these in any models that used this data which could be complicated).
-:::
-
-And look at the first few rows of the daily aggregated data:
-
-```{r display-aggregate}
-head(inf_ts)
-```
-
-Now we can convolve the time series with a delay distribution to get a time series of outcomes as suggested above.
-
-#### Discretising a delay distribution
-
-In our first session, we decided to assume the delay from infection to symptom onset had a gamma distribution.
-However, if we want to use the gamma distribution with shape 5 and rate 1 as before, we face a familiar issue.
-The gamma distribution is a *continuous* distribution, but now our delay data are in days which are *discrete* entities.
-We will assume that both events that make up the delay (here infection and symptom onset) are observed as daily counts (e.g. as number of infections/symptom onsets by calendar date).
-Therefore, both observations are **censored** (as events are rounded to the nearest date).
-This means that our distribution is **double interval censored** which we encountered in the [the biases in delay distribution session](sessions/biases-in-delay-distributions), so we need to use the same ideas introduced in that session.
-
-::: {.callout-note collapse=true}
-#### Mathematical Definition (optional): Discretising a delay distribution subject to double interval censoring
-
-The cumulative distribution function (CDF) ($F(t)$) of a distribution that has a daily censored primary event can be expressed as,
-
-$$
-F^*(t) = \int_0^1 F(t - u) du
-$$
-
-In effect, this is saying that the daily censored CDF is the average of the continuous distributions CDF over all possible event times (here between 0 and 1).
-
-The probability mass function (PMF) of this distribution when observed as a daily process (i.e. the secondary event is also daily censored) is then
-
-$$
-f_t \propto F^*(t + 1) - F^*(t - 1)
-$$
-
-The important point is that the ultimately observed PMF is a combination of a primary event daily censoring process and a secondary event daily censoring process.
-:::
-
-We can think about this via simulation.
-We do so by generating many replicates of the corresponding random delay, taking into account that we have already rounded down our infection times to infection days.
-This means that discretising a delay in this context is **double censoring** as we discussed in the [the biases in delay distribution session](sessions/biases-in-delay-distributions).
-In the absence of any other information or model, we assume for our simulation that infection occurred at some random time during the day, with each time equally likely.
-We can then apply the incubation period using a continuous probability distribution, before once again rounding down to get the day of symptom onset (mimicking daily reporting).
-We repeat this many times to get the probability mass function that allows us to go from infection days to symptom onset days.
-
-You can view the function we have written do this using:
-
-```{r discretise_2_day_window}
-censored_delay_pmf
-```
-
-::: callout-note
-## Take 5 minutes
-
-Try to understand the `censored_delay_pmf()` function above.
-Try it with a few different probability distributions and parameters, e.g. for the parameters given above and a maximum delay of 2 weeks (14 days) it would be:
-
-```{r discretised_gamma}
-gamma_pmf <- censored_delay_pmf(rgamma, max = 14, shape = 5, rate = 1)
-gamma_pmf
-
-plot(gamma_pmf)
-```
-:::
-
-#### Applying a convolution
-
-Next we apply a convolution with the discretised incubation period distribution to the time series of infections, to generate a time series of symptom onsets.
-The function we have written to do this is called `convolve_with_delay`
-
-```{r convolution}
-convolve_with_delay
-```
-
-::: callout-tip
-## Take 5 minutes
-
-Try to understand the `convolve_with_delay()` function above.
-Try it with a few different time series and delay distributions.
-How would you create the time series of symptom onsets from infections, using the discretised gamma distribution created above (saved in `gamma_pmf`)?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-
-```{r applied_convolution}
-onsets <- convolve_with_delay(inf_ts$infections, gamma_pmf)
-```
-:::
-
-We can plot these symptom onsets:
-
-```{r convolution_plot}
-combined <- inf_ts |>
- rename(time = infection_day) |>
- mutate(onsets = onsets)
-ggplot(combined, aes(x = time, y = onsets)) +
- geom_bar(stat = "identity")
-```
-
-Do they look similar to the plot of symptom onsets in the [session on delay distributions](delay-distributions#simulating-delayed-epidemiological-data)?
-
-## Observation uncertainty
-
-Usually not all data are perfectly observed.
-Also, the convolution we applied is a *deterministic* operation that brushes over the fact that individual delays are random.
-We should therefore find another way to model the variation these processes introduce.
-
-Given that we are now dealing with count data a natural choice is the Poisson distribution.
-We can use this to generate uncertainty around our convolved data.
-
-```{r uncertain}
-combined <- combined |>
- mutate(observed = rpois(n(), onsets))
-```
-
-::: callout-tip
-## Take 5 minutes
-
-Does a plot of these observations look more like the plots from the [session on delay distributions](delay-distributions#simulating-delayed-epidemiological-data) than the convolution plotted above?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-
-```{r plot-with-poisson}
-ggplot(combined, aes(x = time, y = observed)) +
- geom_bar(stat = "identity")
-```
-:::
-
-# Estimating a time series of infections
-
-As in previous sessions, we don't usually have data on infections. We now create a model that uses symptom onsets to estimate the number of infections over time. We'll base the model on an uninformed prior and work forward from what we know about the observation process.
-
-```{r stan_estimate_infections}
-mod <- nfidd_cmdstan_model("estimate-infections")
-mod
-```
-
-::: callout-tip
-## Take 10 minutes
-
-Familiarise yourself with the model above.
-Unlike before there is now a `functions` block at the beginning of the model (lines 1-3), where we load a function called `convolve_with_delay()` (line 2) from a file of the same name which can be found in the subdirectory `functions` of the `stan` directory or [viewed on the github repo](https://github.com/nfidd/nfidd/blob/main/stan/functions/convolve_with_delay.stan).
-The functions correspond exactly to our earlier **R** function of the same name.
-Later, this functions is called in the `model` block, to generate the time series of symptom onsets (line 18).
-
-What is the prior assumption on the daily number of infections?
-Which line defines the likelihood, and how does it relate to the section about observation uncertainty above?
-:::
-
-::: {.callout-note collapse="true"}
-## Solution
-
-The model assumes that infections every day are independent from infections on any other day (line 23) and determined only by the number of symptom onsets that they result in (line 18).
-Line 24 defines the likelihood, and it does so using the Poisson observation uncertainty we used above.
-:::
-
-We can now use this model to conduct inference, i.e. to try to reconstruct the time series of infections from the time series of onsets that we generated earlier.
-
-```{r inf_fit, results = 'hide', message = FALSE}
-data <- list(
- n = nrow(combined),
- obs = combined$observed,
- ip_max = length(gamma_pmf) - 1,
- ip_pmf = gamma_pmf
-)
-inf_fit <- mod$sample(data = data, parallel_chains = 4)
-```
-
-::: callout-caution
-Note that this code might take a few minutes to run.
-We have added the `parallel_chains` option to make use of any paralle hardware you may ave.
-If you don't have 4 computing cores and/or the code runs very slowly, you could try only running 2 chains (`chains = 2`) and/or fewer samples (`iter_warmup = 500, iter_sampling = 500`).
-:::
-
-::: callout-tip
-In this model, we have estimated many more parameters than in the previous models: instead of e.g. 2 parameters of a probability distribution, we now have a total of `r nrow(onsets)` time points for which we estimate the number of infections.
-This is because we don't have a model for the *process* that generates infections.
-How would this be different if we e.g. used an SIR model here?
-:::
-
-We can see the first few estimates of the number of infections using:
-
-```{r inf_summary}
-inf_fit
-```
-
-Again, we can do a posterior predictive check by plotting the modelled estimates of the time series of infections (with uncertainty) against our original data.
-Does it look like a good fit?
-
-```{r inf_ppc}
-# Extract posterior draws
-inf_posterior <- inf_fit |>
- gather_draws(infections[infection_day]) |>
- ungroup() |>
- mutate(infection_day = infection_day - 1) |>
- filter(.draw %in% sample(.draw, 100))
-
-ggplot(mapping = aes(x = infection_day)) +
- geom_line(
- data = inf_posterior, mapping = aes(y = .value, group = .draw), alpha = 0.1
- ) +
- geom_line(data = inf_ts, mapping = aes(y = infections), colour = "red") +
- labs(title = "Infections per day",
- subtitle = "True data (red), and sample of estimated trajectories (black)")
-```
-
-::: callout-tip
-This time we used the `gather_draws()` function included in `tidybayes` to extract the inference results.
-This is particularly useful when dealing with arrays such as `inference` because it allows to extract them with a given index (here: `[day]`).
-:::
-
-# Going further
-
-- Above, we used a Poisson distribution to characterise uncertainty.
- In the Poisson distribution, the variance is the same as the mean.
- Another common choice is the negative binomial distribution, which has a more flexible relationship between variance and mean.
- If you re-did the analysis above with the negative binomial distribution, what would be the difference?
-
-- We could have used the individual-level model of the previous section to try to estimate the number of infections with a known delay distribution by estimating each individual infection time.
- How would this look in stan code?
- Would you expect it to yield a different result?
-
-# Wrap up
diff --git a/nfidd.Rproj b/ueifid.Rproj
similarity index 100%
rename from nfidd.Rproj
rename to ueifid.Rproj