mrc-ide · cm401 · Apr 25, 2024 · Apr 25, 2024 · Apr 25, 2024 · Apr 25, 2024
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,13 @@
 .RData
 .Ruserdata
 priority-pathogens.Rproj
+src/*.csv
+src/*/*.csv
+src/*/*/*.csv
+src/*/*.png
+src/*/lassa_functions.R
+pathway
+src/db_extraction/utils.R
 
 # ---VVV--- added by orderly ---VVV----------------
 # Don't manually edit content between these markers

diff --git a/orderly_config.yml b/orderly_config.yml
@@ -5,4 +5,5 @@ plugins:
     lassadoubledb: "Z:/Lassa/Databases/Double Extractions"
     eboladoubledb2: "Z:/Ebola/databases/NEW DOUBLE EXTRACTION DATABASES"
     eboladoubledb: "Z:/Ebola/databases/Double extraction databases"
-    ebolasingledb: "Z:/Ebola/databases/Single extraction databases"
+    ebolasingledb: "Z:/Ebola/databases/Single extraction databases"
+    sarssingledb: "Y:/SARS/Databases/full_extraction"
diff --git a/shared/lassa_functions.R b/shared/lassa_functions.R
@@ -1,62 +1,86 @@
 #function to tidy-up all dataframes
 
-curation <- function(articles, outbreaks, models, parameters, plotting) {
+data_curation <- function(articles, outbreaks, models, parameters, plotting) {
 
   articles   <- articles %>%
-                mutate(refs = paste(first_author_first_name," (",year_publication,")",sep="")) %>% #define references
-                group_by(refs) %>% mutate(counter = row_number()) %>% ungroup() %>% #distinguish same-author-same-year references
-                mutate(new_refs = ifelse(refs %in% refs[duplicated(refs)], paste0(sub("\\)$", "", refs),letters[counter],")"), refs)) %>%
-                select(-counter,-refs) %>% rename(refs = new_refs)
-  outbreaks  <- outbreaks %>% 
-                mutate(refs = articles$refs[match(covidence_id, articles$covidence_id)])
+    mutate(refs = paste(first_author_first_name," (",year_publication,")",sep="")) %>% #define references
+    group_by(refs) %>% mutate(counter = row_number()) %>% ungroup() %>% #distinguish same-author-same-year references
+    mutate(new_refs = ifelse(refs %in% refs[duplicated(refs)], paste0(sub("\\)$", "", refs),letters[counter],")"), refs)) %>%
+    select(-counter,-refs) %>% rename(refs = new_refs)
+
+  if(dim(outbreaks)[1]>0)  
+  {
+    outbreaks  <- outbreaks %>% 
+      mutate(refs = articles$refs[match(covidence_id, articles$covidence_id)])
+  }
+
   models     <- models %>% 
-                mutate(refs = articles$refs[match(covidence_id, articles$covidence_id)])
+    mutate(refs = articles$refs[match(covidence_id, articles$covidence_id)])
+
   parameters <- parameters %>% 
-                mutate(refs = articles$refs[match(covidence_id, articles$covidence_id)]) %>%
-                filter(!parameter_from_figure)
+    mutate(refs = articles$refs[match(covidence_id, articles$covidence_id)]) %>%
+    filter(!parameter_from_figure)
 
   param4plot <- parameters %>%
-                mutate_at(vars(parameter_value, parameter_lower_bound, parameter_upper_bound, 
-                               parameter_uncertainty_lower_value, parameter_uncertainty_upper_value),
-                          list(~ ifelse(inverse_param, 1/.x, .x))) %>%
-                mutate_at(vars(parameter_value, parameter_lower_bound, parameter_upper_bound, 
-                               parameter_uncertainty_lower_value, parameter_uncertainty_upper_value),
-                          list(~ .x * 10^exponent)) %>%
-                mutate_at(vars(parameter_value,parameter_lower_bound,parameter_upper_bound,
-                               parameter_uncertainty_lower_value,parameter_uncertainty_upper_value), #account for different units
-                          list(~ ifelse(parameter_unit == "Weeks", . * 7, .))) %>% 
-                mutate(parameter_unit = ifelse(parameter_unit == "Weeks", "Days", parameter_unit)) %>%
-                mutate(no_unc = is.na(parameter_uncertainty_lower_value) & is.na(parameter_uncertainty_upper_value), #store uncertainty in pu_lower and pu_upper
-                       parameter_uncertainty_lower_value = case_when(
-                         parameter_uncertainty_singe_type == "Maximum" & no_unc ~ parameter_value,
-                         parameter_uncertainty_singe_type == "Standard deviation (Sd)" & no_unc ~ parameter_value-parameter_uncertainty_single_value,
-                         parameter_uncertainty_singe_type == "Standard Error (SE)" & no_unc ~ parameter_value-parameter_uncertainty_single_value,
-                         distribution_type == "Gamma" & no_unc ~ qgamma(0.05, shape = (distribution_par1_value/distribution_par2_value)^2, rate = distribution_par1_value/distribution_par2_value^2),      
-                         TRUE ~ parameter_uncertainty_lower_value),                                                 
-                       parameter_uncertainty_upper_value = case_when(
-                         parameter_uncertainty_singe_type == "Maximum" & no_unc ~ parameter_uncertainty_single_value,
-                         parameter_uncertainty_singe_type == "Standard deviation (Sd)" & no_unc ~ parameter_value+parameter_uncertainty_single_value,
-                         parameter_uncertainty_singe_type == "Standard Error (SE)" & no_unc ~ parameter_value+parameter_uncertainty_single_value,
-                         distribution_type == "Gamma" & no_unc ~ qgamma(0.95, shape = (distribution_par1_value/distribution_par2_value)^2, rate = distribution_par1_value/distribution_par2_value^2),      
-                         TRUE ~ parameter_uncertainty_upper_value)) %>%
-                select(-c(no_unc)) %>%
-                mutate(central = coalesce(parameter_value,100*cfr_ifr_numerator/cfr_ifr_denominator,0.5*(parameter_lower_bound+parameter_upper_bound))) #central value for plotting
+    mutate_at(vars(parameter_value, parameter_lower_bound, parameter_upper_bound, 
+                   parameter_uncertainty_lower_value, parameter_uncertainty_upper_value),
+              list(~ ifelse(inverse_param, 1/.x, .x))) %>%
+    mutate_at(vars(parameter_value, parameter_lower_bound, parameter_upper_bound, 
+                   parameter_uncertainty_lower_value, parameter_uncertainty_upper_value),
+              list(~ .x * 10^exponent)) %>%
+    mutate_at(vars(parameter_value,parameter_lower_bound,parameter_upper_bound,
+                   parameter_uncertainty_lower_value,parameter_uncertainty_upper_value), #account for different units
+              list(~ ifelse(parameter_unit %in% "Weeks", . * 7, .))) %>% 
+    mutate(parameter_unit = ifelse(parameter_unit %in% "Weeks", "Days", parameter_unit)) %>%
+    mutate(no_unc = is.na(parameter_uncertainty_lower_value) & is.na(parameter_uncertainty_upper_value), #store uncertainty in pu_lower and pu_upper
+           parameter_uncertainty_lower_value = case_when(
+             parameter_uncertainty_singe_type == "Maximum" & no_unc ~ parameter_value,
+             parameter_uncertainty_singe_type == "Standard deviation (Sd)" & no_unc ~ parameter_value-parameter_uncertainty_single_value,
+             parameter_uncertainty_singe_type == "Standard Error (SE)" & no_unc ~ parameter_value-parameter_uncertainty_single_value,
+             distribution_type == "Gamma" & no_unc ~ qgamma(0.05, shape = (distribution_par1_value/distribution_par2_value)^2, rate = distribution_par1_value/distribution_par2_value^2),      
+             TRUE ~ parameter_uncertainty_lower_value),                                                 
+           parameter_uncertainty_upper_value = case_when(
+             parameter_uncertainty_singe_type == "Maximum" & no_unc ~ parameter_uncertainty_single_value,
+             parameter_uncertainty_singe_type == "Standard deviation (Sd)" & no_unc ~ parameter_value+parameter_uncertainty_single_value,
+             parameter_uncertainty_singe_type == "Standard Error (SE)" & no_unc ~ parameter_value+parameter_uncertainty_single_value,
+             distribution_type == "Gamma" & no_unc ~ qgamma(0.95, shape = (distribution_par1_value/distribution_par2_value)^2, rate = distribution_par1_value/distribution_par2_value^2),      
+             TRUE ~ parameter_uncertainty_upper_value)) %>%
+    select(-c(no_unc)) %>%
+    mutate(central = coalesce(parameter_value,100*cfr_ifr_numerator/cfr_ifr_denominator,0.5*(parameter_lower_bound+parameter_upper_bound))) #central value for plotting
 
   if (plotting) {
     parameters <- param4plot    
   } else {
-    parameters <- parameters %>%
-                  left_join(param4plot %>% select(parameter_data_id, central), by = "parameter_data_id")
+    check_param_id <- (parameters$parameter_data_id == param4plot$parameter_data_id )    # check that parameter data ids didn't get scrambled 
+    if(sum(check_param_id)==dim(parameters)[1])
+    {
+      parameters$central <- param4plot$central
+    } else {
+      errorCondition('parameters not in right order to match')
+    }  
+  }
+
+  if(dim(outbreaks)[1]>0)  
+  {
+    outbreaks  <- outbreaks  %>% mutate(outbreak_location  = str_replace_all(outbreak_location, "\xe9" , "é"))
   }
 
-  outbreaks  <- outbreaks  %>% mutate(outbreak_location  = str_replace_all(outbreak_location, "\xe9" , "é"))
   parameters <- parameters %>% mutate(parameter_type     = str_replace_all(parameter_type, "\x96" , "–"),
                                       population_country = str_replace_all(population_country, c("昼㸴" = "ô", "�" = "ô")))
-
+  
   return(list(articles = articles, outbreaks = outbreaks, 
               models = models, parameters = parameters))
 }
 
+
+curation <- function(articles, outbreaks, models, parameters, plotting) {
+  #call data_curation function (which at some stage will move to epireview) but keep curation to be backward competible
+  df <- data_curation(articles,outbreaks,models,parameters,plotting)
+
+  return(list(articles = df$articles, outbreaks = df$outbreaks, 
+              models = df$models, parameters = df$parameters))
+}
+
 # function to produce forest plot for given dataframe
 
 forest_plot <- function(df, label, color_column, lims) {
@@ -222,7 +246,7 @@ metamean_wrap <- function(dataframe, estmeansd_method,
 
   gg <- png::readPNG("temp.png", native = TRUE)
   file.remove("temp.png")
-  #gg <- wrap_elements(plot = rasterGrob(pg, interpolate = TRUE))
+  gg <- wrap_elements(plot = rasterGrob(gg, interpolate = TRUE))
   return(list(result = mtan, plot = gg))
 } 
 

diff --git a/shared/utils.R b/shared/utils.R
@@ -81,7 +81,34 @@ database_files <- function(pathogen) {
         "DIDE Priority Pathogens LASSA - Thom.accdb",
         "DIDE Priority Pathogens LASSA - Tristan.accdb"
       )
-    )
+    ),
+    SARS = list(
+      sarssingledb = c(
+        "DIDE Priority Pathogens SARS - Anna Vicco.accdb",
+        "DIDE Priority Pathogens SARS - Anna Vicco double.accdb",
+        "DIDE Priority Pathogens SARS - Anne.accdb",
+        "DIDE Priority Pathogens SARS - Bethan.accdb",
+        "DIDE Priority Pathogens SARS - Christian.accdb",
+        "DIDE Priority Pathogens SARS - Dominic.accdb",
+        "DIDE Priority Pathogens SARS - Ettie.accdb",
+        "DIDE Priority Pathogens SARS - Joseph.accdb",
+        "DIDE Priority Pathogens SARS - Kanchan.accdb",
+        "DIDE Priority Pathogens SARS - Kelly.accdb",
+        "DIDE Priority Pathogens SARS - Kieran.accdb",
+        "DIDE Priority Pathogens SARS - Patrick.accdb",
+        "DIDE Priority Pathogens SARS - Paula_double.accdb",
+        "DIDE Priority Pathogens SARS - Paula_single.accdb",
+        "DIDE Priority Pathogens SARS - Rebecca.accdb",
+        "DIDE Priority Pathogens SARS - Richard.accdb",
+        "DIDE Priority Pathogens SARS - Rob.accdb",
+        "DIDE Priority Pathogens SARS - Ruth.accdb",
+        "DIDE Priority Pathogens SARS - Sangeeta.accdb",
+        "DIDE Priority Pathogens SARS - Sequoia.accdb",
+        "DIDE Priority Pathogens SARS - Thom.accdb",
+        "DIDE Priority Pathogens SARS - Tristan.accdb"
+      )
+    )    
+
     ## Nipah Database files
   )
 

diff --git a/shared/world_cases_table.xlsx b/shared/world_cases_table.xlsx