Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add epichains summative #56

Open
avallecam opened this issue May 4, 2024 · 1 comment
Open

add epichains summative #56

avallecam opened this issue May 4, 2024 · 1 comment
Labels
enhancement New feature or request

Comments

@avallecam
Copy link
Member

# pre-activity ------------------------------------------------------------

## load packages -----------------------------------------------------------

library(epichains)
library(epiparameter)
#> Warning: package 'epiparameter' was built under R version 4.3.3
library(tidyverse)


## offspring distribution --------------------------------------------------

offspring_parameters <- c(
  mean = 0.8,
  dispersion = 0.01
)

offspring_parameters
#>       mean dispersion 
#>       0.80       0.01


## generation time ---------------------------------------------------------

generation_time <- epidist(
  disease = "disease x",
  epi_dist = "generation time",
  prob_distribution = "gamma",
  summary_stats = list(mean = 3, sd = 1)
)
#> Citation cannot be created as author, year, journal or title is missing


## simulate single ---------------------------------------------------------

set.seed(33)

simulate_chains(
  # simulation controls
  index_cases = 1, # --------------------- emphasis
  statistic = "size",
  # offspring
  offspring_dist = rnbinom, # --------------------- emphasis
  mu = offspring_parameters["mean"],
  size = offspring_parameters["dispersion"],
  # generation
  generation_time = function(x) generate(x = generation_time, times = x) # --------------------- emphasis
)
#> `<epichains>` object
#> 
#> < tree head (from first known infector_id) >
#> 
#> [1] infectee_id sim_id      infector_id generation  time       
#> <0 rows> (or 0-length row.names)
#> 
#> 
#> Trees simulated: 1
#> Number of infectors (known): 1
#> Number of generations: 1
#> Use `as.data.frame(<object_name>)` to view the full output in the console.

# paste this as assessment!




# activity 5 -------------------------------------------------------------




## purrr -------------------------------------------------------------------

# single operation
sqrt(1)
#> [1] 1

# cross values
1:5
#> [1] 1 2 3 4 5

# works
sqrt(1:5)
#> [1] 1.000000 1.414214 1.732051 2.000000 2.236068

# but not all functions can handle numeric vectors
map(.x = 1:5,.f = sqrt)
#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 1.414214
#> 
#> [[3]]
#> [1] 1.732051
#> 
#> [[4]]
#> [1] 2
#> 
#> [[5]]
#> [1] 2.236068

# we can use helpers to bind list outputs
map(.x = 1:5,.f = sqrt) %>% list_c()
#> [1] 1.000000 1.414214 1.732051 2.000000 2.236068

map(
  # across 1 to 5
  .x = 1:5,
  # iterate function
  .f = sqrt
) %>% 
  # bind list outputs
  list_c()
#> [1] 1.000000 1.414214 1.732051 2.000000 2.236068

## simulate multiple chains ------------------------------------------------


# Set seed for random number generator
set.seed(33)

# Number of simulation runs
number_chains <- 1000

# Number of initial cases
initial_cases <- 1


multiple_chains <- map(
  # across a sequence of numbers ID # --------------------- emphasis
  .x = seq_len(number_chains),
  # iterate this function # --------------------- emphasis
  # "sim" will have the number ID in each iteration
  .f = function(sim) { # --------------------- emphasis embed within function(){...}
    simulate_chains( # --------------------- copy/paste
      # simulation controls
      index_cases = initial_cases, # --------------------- replace
      statistic = "size",
      stat_max = 500, # --------------------- replace
      # offspring
      offspring_dist = rnbinom,
      mu = offspring_parameters["mean"],
      size = offspring_parameters["dispersion"],
      # generation
      generation_time = function(x) generate(x = generation_time, times = x)
    )%>%
      # creates a column with the chain ID number
      mutate(chain_id = sim) # --------------------- emphasis (NEW addition)
  }
) %>% # --------------------- emphasis - we get list of outputs
  list_rbind() # --------------------- emphasis - bind all list outputs

# paste this as assessment!

multiple_chains
#> `<epichains>` object
#> 
#> < tree head (from first known infector_id) >
#> 
#>    infectee_id sim_id infector_id generation     time chain_id
#> 84           1      2           1          2 3.549575       83
#> 87           1      2           1          2 2.455586       85
#> 88           1      3           1          2 5.144965       85
#> 89           1      4           1          2 4.330285       85
#> 90           1      5           1          2 2.417124       85
#> 91           1      6           1          2 2.691740       85
#> 
#> 
#> Number of infectors (known): 22
#> Number of generations: 7
#> Use `as.data.frame(<object_name>)` to view the full output in the console.

multiple_chains %>% 
  as_tibble() %>% 
  filter(chain_id == 683) %>% # --------------------- emphasis (run this with set.seed)
  print(n=Inf)
#> # A tibble: 28 × 6
#>    infectee_id sim_id infector_id generation  time chain_id
#>          <int>  <dbl>       <dbl>      <int> <dbl>    <int>
#>  1           1      1          NA          1  0         683
#>  2           1      2           1          2  2.89      683
#>  3           1      3           1          2  1.60      683
#>  4           1      4           1          2  2.41      683
#>  5           1      5           1          2  1.74      683
#>  6           1      6           1          2  1.76      683
#>  7           1      7           1          2  4.51      683
#>  8           1      8           1          2  3.35      683
#>  9           1      9           1          2  1.84      683
#> 10           1     10           9          3  4.08      683
#> 11           1     11           9          3  5.13      683
#> 12           1     12           9          3  4.20      683
#> 13           1     13           9          3  5.04      683
#> 14           1     14           9          3  3.46      683
#> 15           1     15           9          3  3.62      683
#> 16           1     16           9          3  5.79      683
#> 17           1     17           9          3  3.11      683
#> 18           1     18           9          3  5.51      683
#> 19           1     19           9          3  4.28      683
#> 20           1     20           9          3  7.17      683
#> 21           1     21           9          3  3.75      683
#> 22           1     22           9          3  6.61      683
#> 23           1     23           9          3  4.65      683
#> 24           1     24           9          3  6.24      683
#> 25           1     25           9          3  4.90      683
#> 26           1     26           9          3  6.20      683
#> 27           1     27           9          3  4.75      683
#> 28           1     28           9          3  3.76      683

# activity 6 --------------------------------------------------------------

# daily aggregate of cases
simulated_chains_day <- multiple_chains %>%
  # use data.frame output from <epichains> object
  as_tibble() %>%
  # get the round number (day) of infection times
  mutate(day = ceiling(time)) %>% # --------------------- emphasis
  # count the daily number of cases in each simulation (chain ID)
  count(chain_id, day, name = "cases") %>%
  # calculate the cumulative number of cases for each simulation (chain ID)
  group_by(chain_id) %>%
  mutate(cases_cumsum = cumsum(cases)) %>% # --------------------- emphasis
  ungroup()

# Visualize transmission chains by cumulative cases
ggplot() +
  # create grouped chain trajectories
  geom_line(
    data = simulated_chains_day,
    mapping = aes(
      x = day,
      y = cases_cumsum,
      group = chain_id # --------------------- emphasis
    ),
    color = "black",
    alpha = 0.25,
    show.legend = FALSE
  ) +
  # define a 100-case threshold
  geom_hline(aes(yintercept = 100), lty = 2) + # --------------------- emphasis
  labs(
    x = "Day",
    y = "Cumulative cases"
  )

Created on 2024-05-04 with reprex v2.1.0

@avallecam avallecam added the enhancement New feature or request label May 4, 2024
@avallecam
Copy link
Member Author

fixed with new epichains version

## load packages -----------------------------------------------------------

library(epichains)
library(epiparameter)
#> Warning: package 'epiparameter' was built under R version 4.3.3
library(tidyverse)


## offspring distribution --------------------------------------------------

offspring_parameters <- c(
  mean = 0.8,
  dispersion = 0.01
)

offspring_parameters
#>       mean dispersion 
#>       0.80       0.01


## generation time ---------------------------------------------------------

generation_time <- epidist(
  disease = "disease x",
  epi_dist = "generation time",
  prob_distribution = "gamma",
  summary_stats = list(mean = 3, sd = 1)
)
#> Citation cannot be created as author, year, journal or title is missing


## single simulation ---------------------------------------------------------

set.seed(33)

simulate_chains(
  # simulation controls
  # index_cases = 1, # --------------------- emphasis
  n_chains = 1, # --------------------- emphasis
  statistic = "size",
  # offspring
  offspring_dist = rnbinom, # --------------------- emphasis
  mu = offspring_parameters["mean"],
  size = offspring_parameters["dispersion"],
  # generation
  generation_time = function(x) generate(x = generation_time, times = x) # --------------------- emphasis
)
#> `<epichains>` object
#> 
#> < epichains head (from first known infector) >
#> 
#> [1] chain      infector   infectee   generation time      
#> <0 rows> (or 0-length row.names)
#> 
#> 
#> Number of chains: 1
#> Number of infectors (known): 1
#> Number of generations: 1
#> Use `as.data.frame(<object_name>)` to view the full output in the console.

## multiple simulations ------------------------------------------------


# Set seed for random number generator
set.seed(33)

# Number of simulation runs
number_chains <- 1000

# Number of initial cases
initial_cases <- 1


multiple_simulations <- map(
  # across a sequence of numbers ID # --------------------- emphasis
  .x = seq_len(number_chains),
  # iterate this function # --------------------- emphasis
  # "sim" will have the number ID in each iteration
  .f = function(sim) { # --------------------- emphasis embed within function(){...}
    simulate_chains( # --------------------- copy/paste
      # simulation controls
      # index_cases = initial_cases, # --------------------- replace
      n_chains = initial_cases,
      statistic = "size",
      stat_threshold = 500, # --------------------- replace
      # offspring
      offspring_dist = rnbinom,
      mu = offspring_parameters["mean"],
      size = offspring_parameters["dispersion"],
      # generation
      generation_time = function(x) generate(x = generation_time, times = x)
    )%>%
      # creates a column with the chain ID number
      mutate(simulation_id = sim) # --------------------- emphasis (NEW addition)
  }
) %>% # --------------------- emphasis - we get list of outputs
  list_rbind() # --------------------- emphasis - bind all list outputs

# paste this as assessment!

multiple_simulations
#> `<epichains>` object
#> 
#> < epichains head (from first known infector) >
#> 
#>    chain infector infectee generation     time simulation_id
#> 84     1        1        2          2 3.549575            83
#> 87     1        1        2          2 2.455586            85
#> 88     1        1        3          2 5.144965            85
#> 89     1        1        4          2 4.330285            85
#> 90     1        1        5          2 2.417124            85
#> 91     1        1        6          2 2.691740            85
#> 
#> 
#> Number of infectors (known): 22
#> Number of generations: 7
#> Use `as.data.frame(<object_name>)` to view the full output in the console.

# one simulation --------------------------------------------------------------

multiple_simulations %>% 
  as_tibble() %>% 
  filter(simulation_id == 683) %>% # --------------------- emphasis (run this with set.seed)
  print(n=Inf)
#> # A tibble: 28 × 6
#>    chain infector infectee generation  time simulation_id
#>    <int>    <dbl>    <dbl>      <int> <dbl>         <int>
#>  1     1       NA        1          1  0              683
#>  2     1        1        2          2  2.89           683
#>  3     1        1        3          2  1.60           683
#>  4     1        1        4          2  2.41           683
#>  5     1        1        5          2  1.74           683
#>  6     1        1        6          2  1.76           683
#>  7     1        1        7          2  4.51           683
#>  8     1        1        8          2  3.35           683
#>  9     1        1        9          2  1.84           683
#> 10     1        9       10          3  4.08           683
#> 11     1        9       11          3  5.13           683
#> 12     1        9       12          3  4.20           683
#> 13     1        9       13          3  5.04           683
#> 14     1        9       14          3  3.46           683
#> 15     1        9       15          3  3.62           683
#> 16     1        9       16          3  5.79           683
#> 17     1        9       17          3  3.11           683
#> 18     1        9       18          3  5.51           683
#> 19     1        9       19          3  4.28           683
#> 20     1        9       20          3  7.17           683
#> 21     1        9       21          3  3.75           683
#> 22     1        9       22          3  6.61           683
#> 23     1        9       23          3  4.65           683
#> 24     1        9       24          3  6.24           683
#> 25     1        9       25          3  4.90           683
#> 26     1        9       26          3  6.20           683
#> 27     1        9       27          3  4.75           683
#> 28     1        9       28          3  3.76           683

# visualize ---------------------------------------------------------------

# daily aggregate of cases
simulated_chains_day <- multiple_simulations %>%
  # use data.frame output from <epichains> object
  as_tibble() %>%
  # get the round number (day) of infection times
  mutate(day = ceiling(time)) %>% # --------------------- emphasis
  # count the daily number of cases in each simulation (chain ID)
  count(simulation_id, day, name = "cases") %>%
  # calculate the cumulative number of cases for each simulation (chain ID)
  group_by(simulation_id) %>%
  mutate(cases_cumsum = cumsum(cases)) %>% # --------------------- emphasis
  ungroup()

# Visualize transmission chains by cumulative cases
ggplot() +
  # create grouped chain trajectories
  geom_line(
    data = simulated_chains_day,
    mapping = aes(
      x = day,
      y = cases_cumsum,
      group = simulation_id # --------------------- emphasis
    ),
    color = "black",
    alpha = 0.25,
    show.legend = FALSE
  ) +
  # define a 100-case threshold
  geom_hline(aes(yintercept = 100), lty = 2) + # --------------------- emphasis
  labs(
    x = "Day",
    y = "Cumulative cases"
  )

Created on 2024-05-30 with reprex v2.1.0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Projects
Status: No status
Development

No branches or pull requests

1 participant