Skip to content

Commit

Permalink
Merge pull request #102 from lifecycle-project/fixes
Browse files Browse the repository at this point in the history
Fixes
  • Loading branch information
sidohaakma authored Jan 10, 2022
2 parents b65dfae + 99e3d89 commit 2f8d9c5
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 45 deletions.
3 changes: 1 addition & 2 deletions R/define-cases.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,12 @@ dh.defineCases <- function(df = NULL, vars = NULL, type = NULL, new_obj = NULL,

vars %>%
map(function(x) {
calltext <- call("BooleDS", x, -999999, 6, 0, TRUE)
calltext <- call("BooleDS", x, -999999, 5, 0, TRUE)
DSI::datashield.assign(conns, paste0(x, "_dc_1"), calltext)
})

## Add up these vectors. Value >= 1 means there is data on at least one.
cally <- as.symbol(paste0(paste0(vars, "_dc_1"), collapse = "+"))

DSI::datashield.assign(conns, "dc_any_data", cally)

calltext <- call("BooleDS", "dc_any_data", 1, 6, 0, TRUE)
Expand Down
103 changes: 76 additions & 27 deletions R/make-strata.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse
band_action = NULL, conns = NULL, checks = TRUE, df_name = NULL) {
op <- tmp <- dfs <- new_subset_name <- value <- cohort <- varname <- new_df_name <-
available <- bmi_to_subset <- ref_val <- enough_obs <- boole_name <- subset_name <- wide_name <-
end_objs <- . <- nearest_value <- age <- NULL
end_objs <- . <- nearest_value <- age <- subset_short <- suffix <- value_1 <- value_2 <- Var1 <-
Var2 <- var <- value <- NULL

if (is.null(conns)) {
conns <- datashield.connections_find()
Expand Down Expand Up @@ -177,19 +178,23 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse
pattern = "-",
replacement = "m"
)
))
)) %>%
mutate(
boole_short = paste0("bl_", seq(1, length(boole_name))),
subset_short = paste0("sb_", seq(1, length(subset_name)))
)


boole_ref %>%
pmap(function(value_1, op_1, value_2, op_2, boole_name, ...) {
pmap(function(value_1, op_1, value_2, op_2, boole_short, ...) {
.BooleTwoConditions(
df = "df_slim",
var = age_var,
value_1 = value_1,
op_1 = op_1,
value_2 = value_2,
op_2 = op_2,
newobj = boole_name,
newobj = boole_short,
conns = conns[valid_coh]
)
})
Expand All @@ -200,7 +205,7 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse

# We need to check that the subsets will have enough rows to avoid triggering
# disclosure traps.
discloure_ref <- boole_ref$boole_name %>%
discloure_ref <- boole_ref$boole_short %>%
map(
~ .checkDisclosure(
bin_vec = .x,
Expand All @@ -214,7 +219,7 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse
}

failed_disclosure <- discloure_ref %>%
left_join(., boole_ref, by = "boole_name") %>%
left_join(., boole_ref, by = "boole_short") %>%
dplyr::filter(enough_obs == FALSE)

if (nrow(failed_disclosure) > 1) {
Expand All @@ -229,20 +234,19 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse

message("** Step 5 of 9: Creating subsets ... ", appendLF = FALSE)

subset_ref <- left_join(boole_ref, discloure_ref, by = "boole_name") %>%
dplyr::filter(enough_obs == TRUE) %>%
select(cohort, boole_name, subset_name)
subset_ref <- left_join(boole_ref, discloure_ref, by = "boole_short") %>%
dplyr::filter(enough_obs == TRUE)

subset_ref %>%
pmap(
function(cohort, boole_name, subset_name) {
function(cohort, boole_short, subset_short, ...) {
ds.dataFrameSubset(
df.name = "df_slim",
V1.name = boole_name,
V1.name = boole_short,
V2.name = "1",
Boolean.operator = "==",
keep.NAs = TRUE,
newobj = subset_name,
newobj = subset_short,
datasources = conns[cohort]
)
}
Expand All @@ -260,25 +264,25 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse

if (mult_action == "nearest") {
nearest_ref <- tibble(
subset_name = unique(subset_ref$subset_name),
subset_short = unique(subset_ref$subset_short),
nearest_value = mult_vals
)

sort_ref <- left_join(subset_ref, nearest_ref, by = "subset_name")
sort_ref <- left_join(subset_ref, nearest_ref, by = "subset_short")
} else if (mult_action %in% c("earliest", "latest")) {
sort_ref <- subset_ref %>%
mutate(nearest_value = NA)
}

sort_ref <- sort_ref %>%
mutate(sort_name = paste0(subset_name, "_s"))
mutate(sort_name = paste0(subset_short, "_s"))

sort_ref %>%
pmap(function(cohort, subset_name, sort_name, nearest_value, ...) {
pmap(function(cohort, subset_short, sort_name, nearest_value, ...) {
.sortSubset(
mult_action = mult_action,
nearest_value = nearest_value,
subset_name = subset_name,
subset_name = subset_short,
age_var = age_var,
newobj = sort_name,
conns = conns[cohort]
Expand All @@ -291,8 +295,8 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse

reshape_ref <- sort_ref %>%
mutate(
suffix = str_extract(subset_name, "[^_]+$"),
wide_name = paste0(subset_name, "_w")
suffix = str_extract(subset_short, "[^_]+$"),
wide_name = paste0(subset_short, "_w")
)

reshape_ref %>%
Expand Down Expand Up @@ -340,6 +344,42 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse
datasources = conns[cohort]
)
})

## The last step is to rename created variables with correct suffix
suffix_ref <- reshape_ref %>%
dplyr::select(cohort, suffix, value_1, value_2) %>%
mutate(suffix = paste0(".", suffix))

var_ref <- c(var_to_subset, age_var, keep_vars)

rename_ref_coh <- suffix_ref %>%
group_by(cohort)

tmp_names <- group_keys(rename_ref_coh) %>%
unlist()

rename_ref <- rename_ref_coh %>%
group_split %>%
map(~expand.grid(.$suffix, var_ref)) %>%
set_names(tmp_names) %>%
bind_rows(.id = "cohort") %>%
dplyr::rename(suffix = Var1, var = Var2) %>%
left_join(., suffix_ref, by = c("cohort", "suffix")) %>%
mutate(
old_name = paste0(var, suffix),
new_name = paste0(var, ".", value_2)) %>%
group_by(cohort)

rename_ref %>%
pmap(function(cohort, old_name, new_name, ...){
dh.renameVars(
df = new_obj,
current_names = old_name,
new_names = new_name,
conns = conns[cohort],
checks = FALSE
)
})

message("DONE", appendLF = TRUE)

Expand All @@ -351,9 +391,13 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse
conns = conns
)

created <- subset_ref %>%
mutate(age = subset_name %>% str_remove("subset_")) %>%
dplyr::select(cohort, age)
created <- rename_ref %>%
ungroup %>%
distinct(cohort, value_1, value_2) %>%
dplyr::rename(
lower_band = value_1,
upper_band = value_2) %>%
arrange(cohort)

message("DONE", appendLF = TRUE)

Expand Down Expand Up @@ -605,16 +649,21 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse
.checkDisclosure <- function(bin_vec, conns) {
observations <- . <- NULL

coh_ref <- tibble(
coh_num = as.character(seq(1, length(names(conns)))),
cohort = names(conns)
)

n_obs <- ds.table(bin_vec, datasources = conns)$output.list$TABLE_rvar.by.study_counts %>%
as_tibble(rownames = "levels") %>%
dplyr::filter(levels == 1) %>%
pivot_longer(
cols = c(-levels),
names_to = "cohort",
names_to = "coh_num",
values_to = "observations"
) %>%
dplyr::select(-levels) %>%
mutate(cohort = names(conns))
left_join(., coh_ref, by = "coh_num") %>%
dplyr::filter(levels == 1) %>%
dplyr::select(-levels)

min_obs <- ds.listDisclosureSettings(datasources = conns)$ds.disclosure.settings %>%
map_df(~ .$nfilter.subset) %>%
Expand All @@ -626,7 +675,7 @@ dh.makeStrata <- function(df = NULL, id_var = NULL, age_var = NULL, var_to_subse

disclosure_ref <- left_join(n_obs, min_obs, by = "cohort") %>%
mutate(
boole_name = bin_vec,
boole_short = bin_vec,
enough_obs = ifelse(observations > min_obs, TRUE, FALSE)
)

Expand Down
51 changes: 35 additions & 16 deletions R/tidy-env.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#'
#' @export
dh.tidyEnv <- function(obj = NULL, type = NULL, conns = NULL) {
. <- NULL
. <- value <- NULL

if (is.null(obj)) {
stop("`obj` must not be NULL.", call. = FALSE)
Expand All @@ -36,7 +36,26 @@ dh.tidyEnv <- function(obj = NULL, type = NULL, conns = NULL) {
}

if (type == "remove") {
obj %>% map(ds.rm, datasources = conns)

obj <- obj

## Check no objects to removed have character length >20
obj_lengths <- tibble(
obj = obj,
length = obj %>% map_int(nchar))

obj_valid <- obj_lengths %>%
dplyr::filter(length < 20) %>%
pull(obj)

obj_not_valid <- obj_lengths %>%
dplyr::filter(length >= 20)

if (nrow(obj_not_valid > 0)) {
warning(paste0("You are attempting to remove objects with name(s) longer than 20 characters. DS does not permit this
due to risk of malicious code. These objects have not been removed: \n\n", as.character(obj_not_valid$value)), call. = FALSE)
}

} else if (type == "keep") {
objects <- names(conns) %>%
map(
Expand All @@ -50,24 +69,24 @@ dh.tidyEnv <- function(obj = NULL, type = NULL, conns = NULL) {

names(vars) <- names(conns)

## Check no objects to removed have character length >20
obj_lengths <- vars %>%
map(~ nchar(.)) %>%
map(~ any(. > 20)) %>%
unlist() %>%
any(. > 20)

if (obj_lengths == TRUE) {
stop("You are attempting to remove objects with name(s) longer than 20 characters. DS does not permit this
due to risk of malicious code. Amend your script so that your objects have shorter names", call. = FALSE)
}

vars_tibble <- vars %>%
map(~ as_tibble(.)) %>%
imap(~ mutate(., cohort = .y)) %>%
bind_rows()
bind_rows() %>%
mutate(length = value %>% map_int(nchar))

obj_valid <- vars_tibble %>%
dplyr::filter(length < 20)

obj_not_valid <- vars_tibble %>%
dplyr::filter(length >= 20)

if (nrow(obj_not_valid > 0)) {
warning(paste0("You are attempting to remove objects with name(s) longer than 20 characters. DS does not permit this
due to risk of malicious code. These objects have not been removed.", as.character(obj_not_valid$value)), call. = FALSE)
}

vars_tibble %>% pmap(function(cohort, value) {
obj_valid %>% pmap(function(cohort, value, ...) {
ds.rm(x.name = value, datasources = conns[cohort])
})
}
Expand Down

0 comments on commit 2f8d9c5

Please sign in to comment.