Skip to content

Commit

Permalink
Merge pull request #370 from OHDSI/webpage
Browse files Browse the repository at this point in the history
Remove magritte dependency
  • Loading branch information
edward-burn authored Oct 25, 2024
2 parents e00378b + 81ed5b2 commit 51ceaba
Show file tree
Hide file tree
Showing 31 changed files with 397 additions and 421 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Imports:
CDMConnector (>= 1.3.1),
checkmate,
cli,
clock,
dbplyr (>= 2.5.0),
dplyr,
glue,
Expand Down
2 changes: 1 addition & 1 deletion R/conceptCohort.R
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ unerafiedConceptCohort <- function(cdm,
}
}

cohorts <- cohorts %>%
cohorts <- cohorts |>
purrr::discard(is.null)

if (length(cohorts) == 0) {
Expand Down
107 changes: 48 additions & 59 deletions R/intersectCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ intersectCohorts <- function(cohort,
# get intersections between cohorts
tblName <- omopgenerics::uniqueTableName(prefix = uniquePrefix)
lowerWindow <- ifelse(gap != 0, -gap, gap)
cohortOut <- cohort %>%
dplyr::filter(.data$cohort_definition_id %in% .env$cohortId) %>%
dplyr::select(-"cohort_definition_id") %>%
cohortOut <- cohort |>
dplyr::filter(.data$cohort_definition_id %in% .env$cohortId) |>
dplyr::select(-"cohort_definition_id") |>
splitOverlap(by = "subject_id", name = tblName, tmp = paste0(tblName)) |>
PatientProfiles::addCohortIntersectFlag(
targetCohortTable = omopgenerics::tableName(cohort),
Expand All @@ -77,13 +77,13 @@ intersectCohorts <- function(cohort,
)

# create intersect cohort set
cohortNames <- omopgenerics::settings(cohort) %>%
dplyr::filter(.data$cohort_definition_id %in% .env$cohortId) %>%
cohortNames <- omopgenerics::settings(cohort) |>
dplyr::filter(.data$cohort_definition_id %in% .env$cohortId) |>
dplyr::pull("cohort_name")
x <- rep(list(c(0, 1)), length(cohortNames))
names(x) <- cohortNames
cohSet <- expand.grid(x) %>%
dplyr::as_tibble() %>%
cohSet <- expand.grid(x) |>
dplyr::as_tibble() |>
dplyr::filter(dplyr::if_any(dplyr::everything(), ~ . != 0)) |>
addNames() |>
dplyr::mutate(cohort_definition_id = as.integer(dplyr::row_number())) |>
Expand Down Expand Up @@ -124,13 +124,13 @@ intersectCohorts <- function(cohort,
name = setName,
table = cohSet
)
cohortOut <- cohortOut %>%
dplyr::inner_join(cdm[[setName]], by = cohortNames) %>%
cohortOut <- cohortOut |>
dplyr::inner_join(cdm[[setName]], by = cohortNames) |>
dplyr::select("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date") %>%
"cohort_start_date", "cohort_end_date") |>
dplyr::compute(name = tblName, temporary = FALSE)
if (cohortOut |> dplyr::tally() |> dplyr::pull("n") > 0) {
cohortOut <- cohortOut %>%
cohortOut <- cohortOut |>
PatientProfiles::addObservationPeriodId(name = tblName) |>
joinOverlap(
name = tblName, gap = gap,
Expand Down Expand Up @@ -202,7 +202,7 @@ intersectCohorts <- function(cohort,
)
cdm <- bind(originalCohorts, cohortOut, name = name)
} else {
cohortOut <- cohortOut %>%
cohortOut <- cohortOut |>
dplyr::compute(name = name, temporary = FALSE)
cdm[[name]] <- omopgenerics::newCohortTable(
table = cohortOut,
Expand Down Expand Up @@ -260,49 +260,49 @@ splitOverlap <- function(x,
ie <- ids[3]

tmpTable_1 <- paste0(tmp, "_1")
x_a <- x %>%
dplyr::select(dplyr::all_of(by), !!is := dplyr::all_of(start)) %>%
x_a <- x |>
dplyr::select(dplyr::all_of(by), !!is := dplyr::all_of(start)) |>
dplyr::union_all(
x %>%
dplyr::select(dplyr::all_of(by), !!is := dplyr::all_of(end)) %>%
dplyr::mutate(!!is := as.Date(!!CDMConnector::dateadd(is, 1)))
) %>%
dplyr::distinct() %>%
x |>
dplyr::select(dplyr::all_of(by), !!is := dplyr::all_of(end)) |>
dplyr::mutate(!!is := as.Date(clock::add_days(.data[[is]], 1L)))
) |>
dplyr::distinct() |>
dplyr::compute(temporary = FALSE, name = tmpTable_1)

tmpTable_2 <- paste0(tmp, "_2")
x_a <- x_a %>%
dplyr::group_by(dplyr::across(dplyr::all_of(by))) %>%
dbplyr::window_order(.data[[is]]) %>%
dplyr::mutate(!!id := dplyr::row_number()) %>%
dbplyr::window_order() %>%
dplyr::ungroup() %>%
x_a <- x_a |>
dplyr::group_by(dplyr::across(dplyr::all_of(by))) |>
dbplyr::window_order(.data[[is]]) |>
dplyr::mutate(!!id := dplyr::row_number()) |>
dbplyr::window_order() |>
dplyr::ungroup() |>
dplyr::compute(temporary = FALSE, name = tmpTable_2)

tmpTable_3 <- paste0(tmp, "_3")
x_b <- x %>%
dplyr::select(dplyr::all_of(by), !!ie := dplyr::all_of(end)) %>%
x_b <- x |>
dplyr::select(dplyr::all_of(by), !!ie := dplyr::all_of(end)) |>
dplyr::union_all(
x %>%
dplyr::select(dplyr::all_of(by), !!ie := dplyr::all_of(start)) %>%
dplyr::mutate(!!ie := as.Date(!!CDMConnector::dateadd(ie, -1)))
) %>%
dplyr::distinct() %>%
dplyr::group_by(dplyr::across(dplyr::all_of(by))) %>%
dbplyr::window_order(.data[[ie]]) %>%
dplyr::mutate(!!id := dplyr::row_number() - 1) %>%
dbplyr::window_order() %>%
dplyr::ungroup() %>%
x |>
dplyr::select(dplyr::all_of(by), !!ie := dplyr::all_of(start)) |>
dplyr::mutate(!!ie := as.Date(clock::add_days(.data[[ie]], -1L)))
) |>
dplyr::distinct() |>
dplyr::group_by(dplyr::across(dplyr::all_of(by))) |>
dbplyr::window_order(.data[[ie]]) |>
dplyr::mutate(!!id := dplyr::row_number() - 1) |>
dbplyr::window_order() |>
dplyr::ungroup() |>
dplyr::compute(temporary = FALSE, name = tmpTable_3)

x <- x_a %>%
x <- x_a |>
dplyr::inner_join(
x_b,
by = c(by, id)
) %>%
) |>
dplyr::select(dplyr::all_of(by),
!!start := dplyr::all_of(is),
!!end := dplyr::all_of(ie)) %>%
!!end := dplyr::all_of(ie)) |>
dplyr::compute(temporary = FALSE, name = name)
}

Expand Down Expand Up @@ -330,6 +330,7 @@ joinOverlap <- function(cohort,
return(cohort)
}

gap <- as.integer(gap)
cdm <- omopgenerics::cdmReference(cohort)

start <- cohort |>
Expand All @@ -339,14 +340,8 @@ joinOverlap <- function(cohort,
dplyr::select(by, "date" := !!endDate) |>
dplyr::mutate("date_id" = 1)
if (gap > 0) {
end <- end %>%
dplyr::mutate("date" = as.Date(
!!CDMConnector::dateadd(
date = "date",
number = gap,
interval = "day"
)
))
end <- end |>
dplyr::mutate("date" = as.Date(clock::add_days(x = .data$date, n = gap)))
}
workingTbl <- omopgenerics::uniqueTableName()
x <- start |>
Expand All @@ -372,14 +367,8 @@ joinOverlap <- function(cohort,
dplyr::select(-"era_id") |>
dplyr::compute(temporary = FALSE, name = name)
if (gap > 0) {
x <- x %>%
dplyr::mutate(!!endDate := as.Date(
!!CDMConnector::dateadd(
date = endDate,
number = -gap,
interval = "day"
)
))
x <- x |>
dplyr::mutate(!!endDate := as.Date(clock::add_days(x = .data[[endDate]], n = -gap)))
}

x <- x |>
Expand Down Expand Up @@ -469,13 +458,13 @@ getIdentifier <- function(x,
getRandom <- function(n) {
sample(x = letters,
size = n,
replace = TRUE) %>% paste0(collapse = "")
replace = TRUE) |> paste0(collapse = "")
}
addNames <- function(cs) {
cols <- colnames(cs)[colnames(cs) != "cohort_definition_id"]
cs <- cs %>% dplyr::mutate("cohort_name" = NA_character_)
cs <- cs |> dplyr::mutate("cohort_name" = NA_character_)
for (col in cols) {
cs <- cs %>%
cs <- cs |>
dplyr::mutate(
"cohort_name" = dplyr::case_when(
.data[[col]] == 1 & is.na(.data$cohort_name) ~ .env$col,
Expand Down
60 changes: 30 additions & 30 deletions R/matchCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,9 @@ getNewCohort <- function(cohort, cohortId, control) {
name = temp_name,
table = dplyr::tibble("cohort_definition_id" = cohortId)
)
controls <- cdm[[temp_name]] %>%
controls <- cdm[[temp_name]] |>
dplyr::cross_join(
cdm[["person"]] %>%
cdm[["person"]] |>
dplyr::select("subject_id" = "person_id") |>
dplyr::inner_join(
cdm[["observation_period"]] |>
Expand All @@ -212,7 +212,7 @@ getNewCohort <- function(cohort, cohortId, control) {
dplyr::ungroup(),
by = "subject_id"
)
) %>%
) |>
dplyr::compute(name = control, temporary = FALSE)
cdm <- omopgenerics::dropTable(cdm, temp_name)

Expand Down Expand Up @@ -268,8 +268,8 @@ addMatchCols <- function(x, matchCols) {
dplyr::compute(name = omopgenerics::tableName(x), temporary = FALSE)
}
excludeIndividualsWithNoMatch <- function(cohort, groups, matchCols) {
cohort %>%
dplyr::inner_join(groups, by = c("cohort_definition_id", matchCols)) %>%
cohort |>
dplyr::inner_join(groups, by = c("cohort_definition_id", matchCols)) |>
dplyr::select(!dplyr::all_of(matchCols)) |>
dplyr::compute(name = tableName(cohort), temporary = FALSE) |>
omopgenerics::recordCohortAttrition("Exclude individuals that do not have any match")
Expand Down Expand Up @@ -302,20 +302,20 @@ excludeNoMatchedIndividuals <- function(cdm,
temporary = FALSE)

# Exclude individuals that do not have any match
cdm[[target]] <- cdm[[target]] %>%
cdm[[target]] <- cdm[[target]] |>
excludeIndividualsWithNoMatch(groups, matchCols)
cdm[[control]] <- cdm[[control]] %>%
cdm[[control]] <- cdm[[control]] |>
excludeIndividualsWithNoMatch(groups, matchCols)

return(cdm)
}

addRandPairId <- function(x) {
x %>%
dplyr::mutate("id" = stats::runif(n = dplyr::n())) %>%
dplyr::arrange(.data$id) %>%
dplyr::mutate("pair_id" = dplyr::row_number(), .by = "group_id") %>%
dplyr::select(-"id") %>%
x |>
dplyr::mutate("id" = stats::runif(n = dplyr::n())) |>
dplyr::arrange(.data$id) |>
dplyr::mutate("pair_id" = dplyr::row_number(), .by = "group_id") |>
dplyr::select(-"id") |>
dplyr::compute(name = omopgenerics::tableName(x), temporary = FALSE)
}
addClusterId <- function(x, u) {
Expand All @@ -333,37 +333,37 @@ clusterId <- function(x) {
}
infiniteMatching <- function(cdm, target, control) {
# Create pair id to perform a random match
cdm[[target]] <- cdm[[target]] %>% addRandPairId()
cdm[[control]] <- cdm[[control]] %>% addRandPairId()
cdm[[target]] <- cdm[[target]] |> addRandPairId()
cdm[[control]] <- cdm[[control]] |> addRandPairId()

cdm[[control]] <- cdm[[control]] %>%
cdm[[control]] <- cdm[[control]] |>
dplyr::inner_join(
# Calculate the maximum number of cases per group
cdm[[target]] %>%
dplyr::group_by(.data$group_id) %>%
cdm[[target]] |>
dplyr::group_by(.data$group_id) |>
dplyr::summarise(
"max_id" = max(.data$pair_id, na.rm = TRUE),
.groups = "drop"
),
by = c("group_id")
) %>%
) |>
# Calculate the maximum ratio per group
dplyr::mutate("pair_id" = ((.data$pair_id - 1) %% .data$max_id) + 1) %>%
dplyr::select(-"max_id") %>%
dplyr::mutate("pair_id" = ((.data$pair_id - 1) %% .data$max_id) + 1) |>
dplyr::select(-"max_id") |>
dplyr::compute(name = control, temporary = FALSE)

clusterId <- clusterId(cdm[[target]])
cdm[[control]] <- cdm[[control]] |> addClusterId(clusterId)
cdm[[target]] <- cdm[[target]] |> addClusterId(clusterId)

# assign cohort_start_date and cohort end date to controls
cdm[[control]] <- cdm[[control]] %>%
cdm[[control]] <- cdm[[control]] |>
dplyr::inner_join(
# Cohort start date and end date of cases
cdm[[target]] %>%
cdm[[target]] |>
dplyr::select("cluster_id", "index_date" = "cohort_start_date"),
by = c("cluster_id")
) %>%
) |>
dplyr::compute(name = control, temporary = FALSE)

return(cdm)
Expand Down Expand Up @@ -401,15 +401,15 @@ observationTarget <- function(cdm, target, control) {

checkRatio <- function(x, ratio) {
if (!is.infinite(ratio)) {
x <- x %>%
dplyr::mutate("id" = stats::runif(n = dplyr::n())) %>%
dplyr::group_by(.data$cluster_id) %>%
dplyr::arrange(.data$id) %>%
dplyr::filter(dplyr::row_number() <= .env$ratio) %>%
dplyr::ungroup() %>%
x <- x |>
dplyr::mutate("id" = stats::runif(n = dplyr::n())) |>
dplyr::group_by(.data$cluster_id) |>
dplyr::arrange(.data$id) |>
dplyr::filter(dplyr::row_number() <= .env$ratio) |>
dplyr::ungroup() |>
dplyr::arrange() |>
dplyr::select(-"id") |>
dplyr::compute(name = tableName(x), temporary = FALSE) %>%
dplyr::compute(name = tableName(x), temporary = FALSE) |>
omopgenerics::recordCohortAttrition("Exclude individuals to fulfil the ratio")
}
return(x)
Expand Down
Loading

0 comments on commit 51ceaba

Please sign in to comment.