Arbitrary outcomes

vimc · May 13, 2024 · ab6cfb1 · ab6cfb1
1 parent 6272b51
commit ab6cfb1
Show file tree

Hide file tree

Showing 5 changed files with 93 additions and 89 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: stoner
 Title: Support for Building VIMC Montagu Touchstones, using Dettl
-Version: 0.1.16
+Version: 0.1.17
 Authors@R:
     c(person("Wes", "Hinsley",role = c("aut", "cre", "cst", "dnc", "elg", "itr", "sng", "ard"),
            email = "[email protected]"),
@@ -28,7 +28,7 @@ Imports:
     utils,
     withr
 Language: en-GB
-RoxygenNote: 7.2.1
+RoxygenNote: 7.3.1
 Roxygen: list(markdown = TRUE)
 Suggests:
     knitr,

diff --git a/R/stochastic_process.R b/R/stochastic_process.R
@@ -33,16 +33,12 @@
 ##' @param out_path Path to writing output files into
 ##' @param pre_aggregation_path Path to dir to write out pre age-disaggregated
 ##' data into. If NULL then this is skipped.
-##' @param deaths If deaths must be calculated as a sum of other burden
-##' outcomes, then provide a vector of the outcome names here. The default
-##' is the existing deaths burden_outcome.
-##' @param cases If cases must be calculated as a sum of other burden
-##' outcomes, then provide a vector of the outcome names here. The default
-##' is the existing cases burden_outcome.
-##' @param dalys If DALYs must be calculated as a sum of other burden
-##' outcomes, then provide a vector of the outcome names here. The default
-##' is the existing DALYs burden_outcome. Alternatively, for the one
-##' remaining group that does not provide DALYs, you can supply a data
+##' @param outcomes A list of names vectors, where the name is the burden
+##' outcome, and the elements of the list are the column names in the
+##' stochastic files that should be summed to compute that outcome. The
+##' default is to expect outcomes `deaths`, `cases`, `dalys`, and `yll`,
+##' with single columns with the same names in the stochastic files.
+##' @param dalys_recipe If DALYs must be calculated, you can supply a data
 ##' frame here, and stoner will calculate DALYs using that recipe. The
 ##' data frame must have names `outcome`, `proportion`, `average_duration`
 ##' and `disability_weight`. See [stoner_calculate_dalys].
@@ -80,8 +76,11 @@ stone_stochastic_process <- function(con, modelling_group, disease,
                                      touchstone, scenarios, in_path, files,
                                      cert, index_start, index_end, out_path,
                                      pre_aggregation_path = NULL,
-                                     deaths = "deaths", cases = "cases",
-                                     dalys = "dalys", yll = "yll",
+                                     outcomes = list(deaths = "deaths",
+                                                     cases = "cases",
+                                                     dalys = "dalys",
+                                                     yll = "yll"),
+                                     dalys_recipe = NULL,
                                      runid_from_file = FALSE,
                                      allow_missing_disease = FALSE,
                                      upload_to_annex = FALSE,
@@ -130,12 +129,7 @@ stone_stochastic_process <- function(con, modelling_group, disease,
     disease = disease,
     touchstone = touchstone
   )
-  outcomes <- list(
-    deaths = deaths,
-    cases = cases,
-    dalys = dalys,
-    yll = yll
-  )
+
   withCallingHandlers(
     files <- stochastic_process_validate(con,
       touchpoint = touchpoint,
@@ -147,6 +141,7 @@ stone_stochastic_process <- function(con, modelling_group, disease,
       out_path = out_path,
       pre_aggregation_path = pre_aggregation_path,
       outcomes = outcomes,
+      dalys_recipe = dalys_recipe,
       runid_from_file = runid_from_file,
       upload_to_annex = upload_to_annex,
       annex = annex,
@@ -177,7 +172,8 @@ stone_stochastic_process <- function(con, modelling_group, disease,
                   touchpoint = touchpoint,
                   scenarios = scenarios,
                   read_params = read_params,
-                  outcomes = outcomes),
+                  outcomes = outcomes,
+                  dalys_recipe = dalys_recipe),
     "Processed %s scenarios for modelling group: %s, disease: %s",
     length(scenarios), touchpoint$modelling_group, touchpoint$disease)
 
@@ -215,7 +211,8 @@ all_scenarios <- function(con,
                           touchpoint,
                           scenarios,
                           read_params,
-                          outcomes) {
+                          outcomes,
+                          dalys_recipe) {
 
   all_scenarios <- NULL
   all_countries <- DBI::dbGetQuery(con, "SELECT id, nid FROM country")
@@ -233,13 +230,12 @@ all_scenarios <- function(con,
             scenario_name)
     scenario_data <- process_scenario(con, scenario_name, files,
                                       touchpoint, read_params, outcomes,
-                                      all_countries)
+                                      dalys_recipe, all_countries)
 
     ##############################################################
     # If this is the first scenario, then it's easy...
     if (is.null(all_scenarios)) {
       all_scenarios <- scenario_data
-
       # Otherwise, we need to add new columns with the new scenario
       # HOWEVER: there could be different countries in different
       # scenarios, so we may need to add countries with NA data to
@@ -263,7 +259,8 @@ rename_cols <- function(df, scenario_name) {
 }
 
 process_scenario <- function(con, scenario, files, touchpoint,
-                             read_params, outcomes, countries) {
+                             read_params, outcomes, dalys_recipe,
+                             countries) {
   scenario_data <- list()
   lines <- read_params$lines
 
@@ -274,7 +271,7 @@ process_scenario <- function(con, scenario, files, touchpoint,
     the_file <- files[i]
     lg$info("Reading %s", the_file)
     scenario_data[[i]] <-
-      read_xz_csv(con, the_file, outcomes,
+      read_xz_csv(con, the_file, outcomes, dalys_recipe,
                   read_params$allow_missing_disease,
                   read_params$runid_from_file, i,
                   touchpoint$touchstone, countries,
@@ -362,16 +359,13 @@ calc_outcomes <- function(csv, outcomes, single_outcome) {
   csv
 }
 
-read_xz_csv <- function(con, the_file, outcomes, allow_missing_disease,
+read_xz_csv <- function(con, the_file, outcomes, dalys_recipe, allow_missing_disease,
                         runid_from_file, run_id, touchstone, countries,
                         lines) {
-
-  if (is.data.frame(outcomes$dalys)) {
-    dalys_cols <- unique(outcomes$dalys$outcome)
-  } else {
-    dalys_cols <- outcomes$dalys
+  if (is.data.frame(dalys_recipe)) {
+    dalys_cols <- unique(dalys_recipe$outcome)
   }
-  meta_cols <- unique(c(outcomes$deaths, outcomes$cases, dalys_cols, outcomes$yll))
+  meta_cols <- unique(unlist(outcomes))
 
   col_list <- list(
     year = readr::col_integer(),
@@ -414,23 +408,22 @@ read_xz_csv <- function(con, the_file, outcomes, allow_missing_disease,
 
   csv$country <- countries$nid[match(csv$country, countries$id)]
 
-  if (is.data.frame(outcomes$dalys)) {
+  if (is.data.frame(dalys_recipe)) {
     res <- stoner_calculate_dalys(con, touchstone, csv,
-                                  outcomes$dalys, cache$life_table)
+                                  dalys_recipe, cache$life_table)
     csv <- res$data
     if (is.null(cache$life_table)) {
       cache$life_table <- res$life_table
     }
-
-  } else {
-    csv <- calc_outcomes(csv, outcomes$dalys, "dalys")
   }
 
-  csv <- calc_outcomes(csv, outcomes$deaths, "deaths")
-  csv <- calc_outcomes(csv, outcomes$cases, "cases")
-  csv <- calc_outcomes(csv, outcomes$yll, "yll")
+  for (i in seq_along(outcomes)) {
+    csv <- calc_outcomes(csv, outcomes[[i]], names(outcomes)[i])
+  }
 
-  csv[, c("run_id", "year", "age", "country", "deaths", "cases" ,"dalys", "yll")]
+  csv <- as.data.frame(csv)
+  csv[, unique(c("run_id", "year", "age", "country", names(outcomes),
+          if (!is.null(dalys_recipe)) "dalys"))]
 }
 
 
@@ -515,6 +508,7 @@ stochastic_process_validate <- function(con, touchpoint, scenarios, in_path,
                                         out_path,
                                         pre_aggregation_path,
                                         outcomes,
+                                        dalys_recipe,
                                         runid_from_file,
                                         upload_to_annex,
                                         annex,
@@ -532,9 +526,13 @@ stochastic_process_validate <- function(con, touchpoint, scenarios, in_path,
   assert_scalar_character(touchpoint$touchstone)
   assert_db_value_exists(con, "touchstone", "id", touchpoint$touchstone)
 
-  if (is.data.frame(outcomes$dalys)) {
-    stopifnot(all.equal(sort(names(outcomes$dalys)),
+  if (!is.null(dalys_recipe)) {
+    stopifnot(all.equal(sort(names(dalys_recipe)),
                         c("average_duration", "disability_weight", "outcome", "proportion")))
+
+    # Can't specify both a DALYs sum and a recipe.
+
+    stopifnot("dalys" %in% names(outcomes))
   }
 
   assert_scalar_character(in_path)
@@ -635,12 +633,8 @@ stochastic_process_validate <- function(con, touchpoint, scenarios, in_path,
     stochastic_validate_scenario(con, touchpoint, scenario)
   }
 
-  check_outcomes(con, "cases", outcomes$cases)
-  check_outcomes(con, "deaths", outcomes$deaths)
-  if (is.data.frame(outcomes$dalys)) {
-    check_outcomes(con, "dalys", unique(outcomes$dalys$outcome))
-  } else {
-    check_outcomes(con, "dalys", outcomes$dalys)
+  for (i in seq_along(outcomes)) {
+    check_outcome(con, names(outcomes)[i], outcomes[[i]])
   }
 
   validate_paths(file.path(in_path, files), scenarios,
@@ -687,7 +681,7 @@ validate_paths <- function(files, scenarios, touchpoint,
 }
 
 
-check_outcomes <- function(con, type, options) {
+check_outcome <- function(con, type, options) {
   assert_character(options)
   if (any(duplicated(options))) {
     stop(sprintf("Duplicated outcome in %s", type))

diff --git a/man/stone_stochastic_process.Rd b/man/stone_stochastic_process.Rd
diff --git a/tests/testthat/test_stochastic_process.R b/tests/testthat/test_stochastic_process.R
@@ -159,18 +159,21 @@ test_that("Bad arguments", {
 
   expect_error(stone_stochastic_process(test$con,
     "LAP-elf", "flu", "nevis-1", "pies", test$path, "non_exist:index.xz",
-    "", 1, 1, ".", deaths = c("deaths", "deaths"), bypass_cert_check = TRUE),
+    "", 1, 1, ".", outcomes = list(deaths = c("deaths", "deaths")),
+    bypass_cert_check = TRUE),
     "Duplicated outcome in deaths")
 
   expect_error(stone_stochastic_process(test$con,
     "LAP-elf", "flu", "nevis-1", "pies", test$path, "non_exist:index.xz",
-    "", 1, 1, ".", deaths = "deaths", cases = "cases", dalys = "piles_dalys",
+    "", 1, 1, ".",
+    outcomes = list(deaths = "deaths", cases = "cases", dalys = "piles_dalys"),
     bypass_cert_check = TRUE),
     "Outcomes not found, dalys \\('piles_dalys'\\)")
 
   expect_error(stone_stochastic_process(test$con,
     "LAP-elf", "flu", "nevis-1", "pies", test$path, "non_exist:index.xz",
-    "", 1, 1, ".", deaths = "deaths", cases = "cases", dalys = "dalys",
+    "", 1, 1, ".",
+    outcomes = list(deaths = "deaths", cases = "cases", dalys = "dalys"),
     runid_from_file = TRUE, bypass_cert_check = TRUE),
     "Must have index_start and index_end as 1..200 to imply run_id")
 
@@ -364,7 +367,7 @@ stochastic_runner <- function(same_countries = TRUE,
                               upload = FALSE,
                               allow_new_database = TRUE,
                               bypass_cert_check = TRUE,
-                              dalys_df = NULL,
+                              dalys_recipe = NULL,
                               cert = "",
                               pre_aggregation_path = NULL,
                               lines = Inf,
@@ -375,9 +378,9 @@ stochastic_runner <- function(same_countries = TRUE,
 
   res <- random_stoch_data(test, same_countries, simple_outcomes,
                            single_file_per_scenario, include_run_id,
-                           include_disease, !is.null(dalys_df))
+                           include_disease, !is.null(dalys_recipe))
 
-  if (is.data.frame(dalys_df)) {
+  if (is.data.frame(dalys_recipe)) {
     fake_lifetable_db(test$con)
   }
 
@@ -393,25 +396,30 @@ stochastic_runner <- function(same_countries = TRUE,
     index_end <- 200
   }
 
-  deaths <- "deaths"
-  cases <- "cases"
-  dalys <- "dalys"
   if (!simple_outcomes) {
-    deaths <- c("deaths_acute", "deaths_chronic")
-    cases <- c("cases_acute", "cases_chronic")
-    dalys <- c("dalys_men", "dalys_pneumo")
+    outcomes <- list(
+      deaths = c("deaths_acute", "deaths_chronic"),
+      cases = c("cases_acute", "cases_chronic"),
+      dalys = c("dalys_men", "dalys_pneumo"))
+  } else {
+    outcomes <- list(
+      deaths = "deaths",
+      cases = "cases",
+      dalys = "dalys")
+
   }
 
-  if (!is.null(dalys_df)) {
-    dalys <- dalys_df
+  if (!is.null(dalys_recipe)) {
+    outcomes$dalys <- NULL
   }
 
   stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1",
                            res$resps$scenario, test$path, res$files,
                            cert = cert,
                            index_start, index_end, test$path,
                            pre_aggregation_path,
-                           deaths, cases, dalys,
+                           outcomes,
+                           dalys_recipe,
                            runid_from_file = !include_run_id,
                            allow_missing_disease = !include_disease,
                            upload_to_annex = upload, annex = test$con,
@@ -648,13 +656,13 @@ fake_lifetable_db <- function(con) {
 }
 
 test_that("Stochastic - with DALYs", {
-  dalys_df <- data_frame(
+  dalys_recipe <- data_frame(
     outcome = c("cases_acute", "deaths_chronic"),
     proportion = c(0.1, 0.2),
     average_duration = c(20, 1000),
     disability_weight = c(0.4, 0.6))
 
-  result <- stochastic_runner(upload = FALSE, dalys_df = dalys_df,
+  result <- stochastic_runner(upload = FALSE, dalys_recipe = dalys_recipe,
                               simple_outcomes = FALSE)
 
   lt <- stoner_life_table(result$test$con, "nevis-1", 2000, 2100, TRUE)