Merge pull request #137 from timcadman/dev

v1.5
timcadman · Jun 12, 2024 · b097c5e · b097c5e
2 parents 5be3f6f + 9d2f85e
commit b097c5e
Show file tree

Hide file tree

Showing 74 changed files with 1,173 additions and 776 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: dsHelper
 Type: Package
 Title: Helper Functions for Use with 'DataSHIELD'
-Version: 1.3.0
+Version: 1.5
 Description: Often we need to automate things with 'DataSHIELD'. These functions help to do that.
 Authors@R: 
   c(person(given= "Tim", 
@@ -33,19 +33,21 @@ Imports:
   DSI,
   metafor,
   checkmate,
-  withr,
   lme4, 
-  webmockr,
-  tidyselect
+  tidyselect, 
+  arrow (>= 16.1.0)
 Suggests:
   knitr,
   rmarkdown,
   testthat,
-  mockery
+  mockery,
+  httr,
+  webmockr,
+  withr
 URL: https://github.com/lifecycle-project/ds-helper/, 
   https: //lifecycle-project.github.io/ds-helper/
 BugReports: https://github.com/lifecycle-project/ds-helper/issues/
 VignetteBuilder: knitr
 License: GPL-3
 Encoding: UTF-8
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -34,6 +34,7 @@ export(dh.subjHasData)
 export(dh.tidyEnv)
 export(dh.trimPredData)
 export(dh.zByGroup)
+import(arrow)
 import(lme4)
 importFrom(DSI,datashield.aggregate)
 importFrom(DSI,datashield.assign)

diff --git a/R/build-models.R b/R/build-models.R
@@ -171,7 +171,7 @@ make_exp_out_cov_form <- function(comb_with_covariates) {
   outcome <- exposure <- covariates <- NULL
 
   formula <- comb_with_covariates %>%
-    rowwise %>%
+    rowwise() %>%
     mutate(formula = paste(outcome, "~1+", exposure, "+", paste(unlist(covariates), collapse = "+")))
 
   return(formula)

diff --git a/R/create-table-one.R b/R/create-table-one.R
@@ -319,10 +319,11 @@ dh.createTableOne <- function(stats = NULL, vars = NULL, var_labs = NULL,
     mutate(value = ifelse(is.na(category),
       paste0(missing, " (", missing_perc, ")"), value
     )) %>%
-    dplyr::select(cohort, variable, category, value)  %>%
+    dplyr::select(cohort, variable, category, value) %>%
     mutate(category = case_when(
       category == "med_iqr" ~ "Median \u00b1 (IQR)",
-      category == "mean_sd" ~ "Mean \u00b1 SD"))
+      category == "mean_sd" ~ "Mean \u00b1 SD"
+    ))
 
   return(out)
 }
@@ -352,8 +353,6 @@ dh.createTableOne <- function(stats = NULL, vars = NULL, var_labs = NULL,
       but do not have a corresponding labels for all categories provided in
       `cat_labs`\n\n", unique(missing_cats$variable)
     )
-
-
   }
 }
 

diff --git a/R/get-anon-plot-data.R b/R/get-anon-plot-data.R
@@ -84,10 +84,10 @@ dh.getAnonPlotData <- function(df = NULL, var_1 = NULL, var_2 = NULL,
 
     out <- scatter %>%
       map(~
-      tibble(
-        x = .[[1]],
-        y = .[[2]]
-      )) %>%
+        tibble(
+          x = .[[1]],
+          y = .[[2]]
+        )) %>%
       bind_rows(.id = "cohort") %>%
       dplyr::rename(
         !!quo_name(var_1) := x,

diff --git a/R/get-stats.R b/R/get-stats.R
@@ -59,6 +59,7 @@
 #' @importFrom magrittr %<>%
 #' @importFrom DSI datashield.connections_find datashield.aggregate
 #' @importFrom utils capture.output
+#' @import arrow
 #'
 #' @md
 #'
@@ -73,16 +74,16 @@ dh.getStats <- function(df = NULL, vars = NULL, digits = 2, conns = NULL,
     stop("`df` must not be NULL.", call. = FALSE)
   }
 
-  if (is.null(vars)) {
-    stop("`vars` must not be NULL.", call. = FALSE)
-  }
-
   if (is.null(conns)) {
     conns <- datashield.connections_find()
   }
-
   if (checks == TRUE) {
-    .isDefined(df = df, conns = conns)
+    conns_exist <- unlist(ds.exists(df, conns))
+    excluded <- names(conns)[!conns_exist]
+    conns <- conns[conns_exist]
+    if (length(excluded) > 0) {
+      warning(paste0("Cohorts ", excluded, " have been excluded as they do not contain data frame ", df), call. = F)
+    }
   }
   # Not checking whether variable exists because function will show NA if it
   # doesnt
@@ -95,6 +96,10 @@ dh.getStats <- function(df = NULL, vars = NULL, digits = 2, conns = NULL,
     stats_tmp <- stats_wide <- std.dev <- type <- type_w_null <- . <-
     perc_valid <- perc_total <- Ntotal <- disclosure_fail <- NULL
 
+  if (is.null(df)) {
+    vars <- .define_default_vars(df, conns)
+  }
+
   ################################################################################
   # 1. Remove duplicate variables
   ################################################################################
@@ -383,7 +388,6 @@ check with ds.class \n\n",
   ################################################################################
 
   if (nrow(fact_ref) > 0) {
-
     ## ---- Combined value for each level of variables -----------------------------
     levels_comb <- stats_cat %>%
       group_by(variable, category) %>%
@@ -546,6 +550,21 @@ check with ds.class \n\n",
   return(out)
 }
 
+
+#' Define Default Variables
+#'
+#' This function takes a list of connections as input and returns a vector of unique column names across all connections.
+#'
+#' @param conns A list of connections to data sources.
+#' @return A character vector containing unique column names across all connections.
+#' @noRd
+.define_default_vars <- function(df, conns) {
+  all_cols <- ds.colnames(df, conns)
+  unique_cols <- unique(unlist(all_cols))
+  return(unique_cols)
+}
+
+
 #' Extracts stats using table function
 #'
 #' @param ref reference tibble of vars with four columns: variable, cohort,

diff --git a/R/lm-tab.R b/R/lm-tab.R
@@ -94,13 +94,12 @@ dh.lmTab <- function(model = NULL, type = NULL, coh_names = NULL,
     coh_ns <- extract_ns_lmer(model, nstudy)
     coh_coefs <- add_ns_slma(coh_ns, coh_coefs, coh_names)
 
-    if(extract_random){
-    random <- extract_random(model, coh_names, nstudy)
-    random <- rename_intercept(random, col_name = "var1")
+    if (extract_random) {
+      random <- extract_random(model, coh_names, nstudy)
+      random <- rename_intercept(random, col_name = "var1")
 
-    random <- random %>%
-      mutate(across(stddev, ~ round(., digits)))
-
+      random <- random %>%
+        mutate(across(stddev, ~ round(., digits)))
     }
   }
 
@@ -142,26 +141,20 @@ dh.lmTab <- function(model = NULL, type = NULL, coh_names = NULL,
   coefs <- rename_intercept(coefs, col_name = "variable")
 
   if (type == "lmer_slma") {
-
-    if(extract_random){
-
+    if (extract_random) {
       return(
-      list(
-        fixed = coefs,
-        random = random
+        list(
+          fixed = coefs,
+          random = random
+        )
       )
-    )
-
-      } else{
-
-        return(
-          list(
-            fixed = coefs
-          )
+    } else {
+      return(
+        list(
+          fixed = coefs
         )
-
-      }
-
+      )
+    }
   } else {
     return(coefs)
   }

diff --git a/R/make-iqr.R b/R/make-iqr.R
@@ -109,7 +109,6 @@ dh.makeIQR <- function(df = NULL, vars = NULL, type = c("combine", "split"),
       stringsAsFactors = FALSE
     )
   } else if (type == "combine") {
-
     ## ---- Identify cohorts which are all missing -----------------------------
     missing <- expand.grid(vars, names(conns)) %>%
       set_names(c("variable", "cohort")) %>%

diff --git a/R/make-lmer-form.R b/R/make-lmer-form.R
@@ -37,7 +37,6 @@
 #' @export
 dh.makeLmerForm <- function(outcome = NULL, id_var = NULL, age_vars = NULL,
                             random = NULL, fixed = NULL, age_interactions = NULL) {
-
   lmer_form_check_args(outcome, id_var, age_vars, random, fixed, age_interactions)
 
   formula_fixed <- make_fixed_effects(age_vars, fixed, age_interactions)
@@ -92,8 +91,9 @@ make_fixed_effects <- function(age_vars, fixed, age_interactions) {
 
   if (!is.null(age_interactions)) {
     age_interactions <- c(
-      combn(paste0(age_interactions, "*", age_vars), 2, paste, collapse = "+"), 
-      paste0(age_interactions, "*", age_vars))
+      combn(paste0(age_interactions, "*", age_vars), 2, paste, collapse = "+"),
+      paste0(age_interactions, "*", age_vars)
+    )
   }
 
   if (!is.null(fixed)) {

diff --git a/R/make-strata.R b/R/make-strata.R
@@ -563,7 +563,6 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse
 #' @noRd
 .sortSubset <- function(mult_action, nearest_value, subset_name, age_var, newobj, conns) {
   if (mult_action == "nearest") {
-
     ## Make a variable specifying distance between age of measurement and prefered
     ## value (provided by "mult_vals")
 
@@ -602,7 +601,6 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse
 #'
 #' @noRd
 .reshapeSubset <- function(sorted_subset, id_var, age_var, var_to_subset, var_suffix, conns, newobj, keep_vars) {
-
   # We need a vector the length of our subset with an integer value describing
   # the name of the subset. We use this to create our final variables names
 

diff --git a/R/mean-by-group.R b/R/mean-by-group.R
@@ -58,8 +58,6 @@ dh.meanByGroup <- function(df = NULL, outcome = NULL, group_var = NULL,
   ## the binning variable it is quite quick
 
   if (is.null(intervals)) {
-
-
     ## ---- First we round up the age variable -----------------------------------------------
     DSI::datashield.assign(
       conns, "age_tmp", as.symbol(paste0(df, "$", group_var, "+0.5"))
@@ -104,7 +102,6 @@ re-run the function using the `intervals` argument. \n\n",
       conns = conns
     )
   } else if (!is.null(intervals)) {
-
     ## ---- This is the harder one -------------------------------------------------------------------
 
     ## First we need to create a table defining our age bands.

diff --git a/R/meta-sep-models.R b/R/meta-sep-models.R
@@ -23,7 +23,7 @@ dh.metaSepModels <- function(input = "fit", ref = NULL, exp = NULL, method = NUL
                              output = "both") {
   exposure <- variable <- cohort <- . <- est <- lowci <- uppci <-
     model_id <- n_obs <- se <- NULL
-  
+
   method <- arg_match(
     arg = method,
     values = c("DL", "HE", "HS", "HSk", "SJ", "ML", "REML", "EB", "PM", "GENQ")
@@ -35,30 +35,25 @@ dh.metaSepModels <- function(input = "fit", ref = NULL, exp = NULL, method = NUL
   )
 
   if (output %in% c("meta", "both") == TRUE) {
-
-    if(input == "fit"){
-
-    ## ---- Get coefficients -----------------------------------------------------
-    model_coefs <- ref %>%
-      pmap(function(cohort, fit, ...) {
-        dh.lmTab(
-          model = fit,
-          coh_names = cohort,
-          type = "glm_slma",
-          ci_format = "separate",
-          direction = "wide",
-          family = "binomial",
-          digits = 50
-        ) %>%
-          dplyr::filter(cohort != "combined")
-      }) %>%
-      set_names(ref$model_id) %>%
-      bind_rows(.id = "model_id")
-
-    } else{
-
+    if (input == "fit") {
+      ## ---- Get coefficients -----------------------------------------------------
+      model_coefs <- ref %>%
+        pmap(function(cohort, fit, ...) {
+          dh.lmTab(
+            model = fit,
+            coh_names = cohort,
+            type = "glm_slma",
+            ci_format = "separate",
+            direction = "wide",
+            family = "binomial",
+            digits = 50
+          ) %>%
+            dplyr::filter(cohort != "combined")
+        }) %>%
+        set_names(ref$model_id) %>%
+        bind_rows(.id = "model_id")
+    } else {
       model_coefs <- ref
-
     }
 
     ## ---- Create tibble respecting grouping order ------------------------------