diff --git a/R/conceptCohort.R b/R/conceptCohort.R index 461b488..5a1f772 100644 --- a/R/conceptCohort.R +++ b/R/conceptCohort.R @@ -193,16 +193,13 @@ conceptCohort <- function(cdm, cli::cli_inform(c("i" = "Applying cohort requirements.")) cdm[[name]] <- fulfillCohortReqs(cdm = cdm, name = name) - cdm[[name]] <- omopgenerics::newCohortTable(table = cdm[[name]], - cohortAttritionRef = NULL, - .softValidation = TRUE) cli::cli_inform(c("i" = "Collapsing records.")) cdm[[name]] <- cdm[[name]] |> - joinOverlap(name = name, gap = 0) - cdm[[name]] <- omopgenerics::newCohortTable(table = cdm[[name]], - cohortAttritionRef = NULL, - .softValidation = TRUE) + joinOverlap(name = name, gap = 0) |> + omopgenerics::recordCohortAttrition(reason = "Collapse overlapping records") + + cdm[[name]] <- omopgenerics::newCohortTable(table = cdm[[name]]) cli::cli_inform(c("v" = "Cohort {.strong {name}} created.")) @@ -309,10 +306,6 @@ unerafiedConceptCohort <- function(cdm, "cohort_end_date" ) |> dplyr::mutate(cohort_end_date = dplyr::coalesce(.data$cohort_end_date, .data$cohort_start_date)) |> - dplyr::filter( - !is.na(.data$cohort_start_date), - .data$cohort_start_date <= .data$cohort_end_date - ) |> dplyr::compute(name = name, temporary = FALSE) omopgenerics::dropTable(cdm, name = dplyr::starts_with(workingTblNames)) @@ -324,6 +317,12 @@ fulfillCohortReqs <- function(cdm, name) { # 1) if start is out of observation, drop cohort entry # 2) if end is after observation end, set cohort end as observation end cdm[[name]] |> + dplyr::filter( + !is.na(.data$cohort_start_date), + .data$cohort_start_date <= .data$cohort_end_date + ) |> + dplyr::compute(temporary = FALSE, name = name) |> + omopgenerics::recordCohortAttrition(reason = "Record start <= record end") |> dplyr::left_join( cdm$observation_period |> dplyr::select( @@ -350,7 +349,8 @@ fulfillCohortReqs <- function(cdm, name) { "cohort_start_date", "cohort_end_date" ) |> - dplyr::compute(temporary = FALSE, name = name) + dplyr::compute(temporary = FALSE, name = name) |> + omopgenerics::recordCohortAttrition(reason = "Record in observation") } diff --git a/R/measurementCohort.R b/R/measurementCohort.R index 3678af2..7592b36 100644 --- a/R/measurementCohort.R +++ b/R/measurementCohort.R @@ -126,6 +126,7 @@ measurementCohort <- function(cdm, dplyr::filter(tolower(.data$domain_id) %in% "measurement") |> dplyr::compute(name = tmpCodelist, temporary = FALSE) + cli::cli_inform(c("i" = "Subsetting measurement table.")) cohort <- cdm$measurement |> dplyr::select( "subject_id" = "person_id", @@ -140,9 +141,29 @@ measurementCohort <- function(cdm, cohortCodelist |> dplyr::select("concept_id", "cohort_definition_id"), by = "concept_id" ) |> - dplyr::filter(!is.na(.data$cohort_start_date)) |> dplyr::compute(name = name, temporary = FALSE) + if (!is.null(valueAsConcept) || !is.null(valueAsNumber)) { + cli::cli_inform(c("i" = "Applying measurement requirements.")) + filterExpr <- getFilterExpression(valueAsConcept, valueAsNumber) + cohort <- cohort |> + dplyr::filter(!!!filterExpr) |> + dplyr::compute(name = name, temporary = FALSE) + + if (cohort |> dplyr::tally() |> dplyr::pull("n") == 0) { + cli::cli_warn( + "There are no subjects with the specified value_as_concept_id or value_as_number." + ) + } + } + + cohort <- cohort |> + omopgenerics::newCohortTable( + cohortSetRef = cohortSet, + cohortCodelistRef = cohortCodelist |> dplyr::collect(), + .softValidation = TRUE + ) + if (cohort |> dplyr::tally() |> dplyr::pull("n") == 0) { cli::cli_inform(c("i" = "No table could be subsetted, returning empty cohort.")) cdm <- omopgenerics::emptyCohortTable(cdm = cdm, name = name) @@ -153,7 +174,7 @@ measurementCohort <- function(cdm, "cohort_end_date") |> omopgenerics::newCohortTable( cohortSetRef = cohortSet, - cohortAttritionRef = NULL, + cohortAttritionRef = attrition(cohort), cohortCodelistRef = cohortCodelist |> dplyr::collect() ) return(cdm[[name]]) @@ -161,35 +182,25 @@ measurementCohort <- function(cdm, cli::cli_inform(c("i" = "Getting records in observation.")) cohort <- cohort |> - PatientProfiles::addDemographics( - age = FALSE, - sex = FALSE, - priorObservationType = "date", - futureObservationType = "date", - name = name + dplyr::filter(!is.na(.data$cohort_start_date)) |> + dplyr::compute(name = name, temporary = FALSE) |> + omopgenerics::recordCohortAttrition(reason = "Not missing record date") |> + dplyr::left_join( + cdm$observation_period |> + dplyr::select( + "person_id", + "observation_period_start_date", + "observation_period_end_date" + ), + by = c("subject_id" = "person_id") ) |> dplyr::filter( - .data$prior_observation <= .data$cohort_start_date, - .data$future_observation >= .data$cohort_end_date + .data$observation_period_start_date <= .data$cohort_start_date, + .data$observation_period_end_date >= .data$cohort_end_date ) |> - dplyr::select(-"prior_observation", -"future_observation") |> - dplyr::compute(name = name, temporary = FALSE) - - - if (!is.null(valueAsConcept) || !is.null(valueAsNumber)) { - cli::cli_inform(c("i" = "Applying measurement requirements.")) - filterExpr <- getFilterExpression(valueAsConcept, valueAsNumber) - cohort <- cohort |> - dplyr::filter(!!!filterExpr) |> - dplyr::compute(name = name, temporary = FALSE) - - if (cohort |> dplyr::tally() |> dplyr::pull("n") == 0) { - cli::cli_warn( - "There are no subjects with the specified value_as_concept_id or value_as_number." - ) - } - - } + dplyr::select(-"observation_period_start_date", -"observation_period_end_date") |> + dplyr::compute(name = name, temporary = FALSE) |> + omopgenerics::recordCohortAttrition(reason = "Record in observation") cohort <- cohort |> dplyr::select("cohort_definition_id", @@ -197,17 +208,13 @@ measurementCohort <- function(cdm, "cohort_start_date", "cohort_end_date") |> dplyr::distinct() |> - dplyr::compute(name = name, temporary = FALSE) + dplyr::compute(name = name, temporary = FALSE) |> + omopgenerics::recordCohortAttrition(reason = "Distinct measurement records") cli::cli_inform(c("i" = "Creating cohort attributes.")) cohort <- cohort |> - omopgenerics::newCohortTable( - cohortSetRef = cohortSet, - cohortAttritionRef = NULL, - cohortCodelistRef = cohortCodelist |> dplyr::collect(), - .softValidation = TRUE - ) + omopgenerics::newCohortTable(.softValidation = TRUE) cli::cli_inform(c("v" = "Cohort {.strong {name}} created.")) diff --git a/R/validateFunctions.R b/R/validateFunctions.R index 0b5f846..94a48eb 100644 --- a/R/validateFunctions.R +++ b/R/validateFunctions.R @@ -184,7 +184,7 @@ validateStrata <- function(strata, cohort) { validateValueAsNumber <- function(valueAsNumber) { omopgenerics::assertList(valueAsNumber, named = TRUE, - class = "numeric", + class = c("integer", "numeric"), null = TRUE ) for (i in seq_along(valueAsNumber)) { diff --git a/tests/testthat/test-conceptCohort.R b/tests/testthat/test-conceptCohort.R index 54c175f..33b527e 100644 --- a/tests/testthat/test-conceptCohort.R +++ b/tests/testthat/test-conceptCohort.R @@ -4,7 +4,7 @@ test_that("expected errors and messages", { omock::mockObservationPeriod() cdm <- omopgenerics::insertTable( cdm = cdm, name = "concept", table = dplyr::tibble( - "concept_id" = 1, + "concept_id" = 1L, "concept_name" = "my concept", "domain_id" = "adsf", "vocabulary_id" = NA, @@ -37,10 +37,10 @@ test_that("expected errors and messages", { expect_true(attrition(x) |> nrow() == 0) # not codelist - expect_error(x <- conceptCohort(cdm = cdm, conceptSet = 1, name = "cohort")) - expect_error(x <- conceptCohort(cdm = cdm, conceptSet = list(1), name = "cohort")) + expect_error(x <- conceptCohort(cdm = cdm, conceptSet = 1L, name = "cohort")) + expect_error(x <- conceptCohort(cdm = cdm, conceptSet = list(1L), name = "cohort")) expect_message(expect_message( - x <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort") + x <- conceptCohort(cdm = cdm, conceptSet = list(a = 1L), name = "cohort") )) expect_true(inherits(x, "cohort_table")) expect_true(x |> dplyr::collect() |> nrow() == 0) @@ -57,7 +57,7 @@ test_that("expected errors and messages", { expect_true(nrow(attrition(x)) == 1) expect_true(nrow(attr(x, "cohort_codelist")) == 1) expect_message(expect_message( - x <- conceptCohort(cdm = cdm, conceptSet = list(a = 2), name = "cohort") + x <- conceptCohort(cdm = cdm, conceptSet = list(a = 2L), name = "cohort") )) }) @@ -116,7 +116,7 @@ test_that("simple example", { "dbcon")) } expect_no_error(cohort <- conceptCohort(cdm = cdm, - conceptSet = list(a = 1), + conceptSet = list(a = 1L), name = "my_cohort")) if(isDuckdb){ endTempTables <- countDuckdbTempTables( @@ -137,7 +137,6 @@ test_that("simple example", { expect_true(cohort |> dplyr::tally() |> dplyr::pull() == 4) expect_true(cohortCount(cohort)$number_records == 4) expect_true(cohortCount(cohort)$number_subjects == 2) - # expect_true(attrition(cohort) |> nrow() == 1) expect_identical( settings(cohort), dplyr::tibble( @@ -145,6 +144,21 @@ test_that("simple example", { "cdm_version" = attr(cdm, "cdm_version"), "vocabulary_version" = "mock" ) ) + expect_identical( + attrition(cohort) |> dplyr::as_tibble(), + dplyr::tibble( + "cohort_definition_id" = 1L, + "number_records" = c(9L, 9L, 9L, 4L), + "number_subjects" = 2L, + "reason_id" = 1:4L, + "reason" = c( + "Initial qualifying events", "Record start <= record end", + "Record in observation", "Collapse overlapping records" + ), + "excluded_records" = c(0L, 0L, 0L, 5L), + "excluded_subjects" = 0L + ) + ) expect_identical(cohortCodelist(cohort, 1), omopgenerics::newCodelist(list(a = 1L))) cohort <- cohort |> dplyr::collect() |> @@ -176,18 +190,18 @@ test_that("simple example", { ) ) cohort2 <- conceptCohort(cdm = cdm, - conceptSet = list(a = 1), + conceptSet = list(a = 1L), name = "my_cohort2", subsetCohort = cdm$cohort) expect_equal(collectCohort(cohort2, 1), collectCohort(cohort, 1)) cohort3 <- conceptCohort(cdm = cdm, - conceptSet = list(a = 1), + conceptSet = list(a = 1L), name = "my_cohort3", subsetCohort = cdm$cohort, subsetCohortId = 2) expect_true(nrow(collectCohort(cohort3, 1)) == 0) expect_error(conceptCohort(cdm = cdm, - conceptSet = list(a = 1), + conceptSet = list(a = 1L), name = "my_cohort3", subsetCohort = cdm$cohort, subsetCohortId = 3)) @@ -199,14 +213,14 @@ test_that("simple example duckdb", { testthat::skip_on_cran() cdm <- omock::mockCdmReference() |> omock::mockCdmFromTables(tables = list("cohort" = dplyr::tibble( - "cohort_definition_id" = 1, - "subject_id" = c(1, 2, 3), + "cohort_definition_id" = 1L, + "subject_id" = 1:3L, "cohort_start_date" = as.Date("2020-01-01"), "cohort_end_date" = as.Date("2029-12-31") ))) cdm <- omopgenerics::insertTable( cdm = cdm, name = "concept", table = dplyr::tibble( - "concept_id" = 1, + "concept_id" = 1L, "concept_name" = "my concept", "domain_id" = "drUg", "vocabulary_id" = NA, @@ -218,12 +232,12 @@ test_that("simple example duckdb", { ) cdm <- omopgenerics::insertTable( cdm = cdm, name = "drug_exposure", table = dplyr::tibble( - "drug_exposure_id" = 1:11, - "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1), - "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1), + "drug_exposure_id" = 1:11L, + "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1) |> as.integer(), + "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1) |> as.integer(), "drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803), "drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1801, 1802, 1803, 1804), - "drug_type_concept_id" = 1 + "drug_type_concept_id" = 1L ) |> dplyr::mutate( "drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2020-01-01"), @@ -233,11 +247,11 @@ test_that("simple example duckdb", { cdm <- cdm |> copyCdm() - expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort")) + expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1L), name = "cohort")) expect_true(cohort |> dplyr::tally() |> dplyr::pull() == 4) - expect_true(cohortCount(cohort)$number_records == 4) - expect_true(cohortCount(cohort)$number_subjects == 2) + expect_true(cohortCount(cohort)$number_records == 4L) + expect_true(cohortCount(cohort)$number_subjects == 2L) # expect_true(attrition(cohort) |> nrow() == 1) expect_identical( settings(cohort), @@ -271,8 +285,8 @@ test_that("concepts from multiple cdm tables duckdb", { testthat::skip_on_cran() cdm <- omock::mockCdmReference() |> omock::mockCdmFromTables(tables = list("cohort" = dplyr::tibble( - "cohort_definition_id" = 1, - "subject_id" = c(1, 2, 3), + "cohort_definition_id" = 1L, + "subject_id" = c(1L, 2L, 3L), "cohort_start_date" = as.Date("2020-01-01"), "cohort_end_date" = as.Date("2029-12-31") ))) |> @@ -283,11 +297,13 @@ test_that("concepts from multiple cdm tables duckdb", { cs <- list("a" = cdm$condition_occurrence |> dplyr::select("condition_concept_id") |> head(1)|> - dplyr::pull(), + dplyr::pull() |> + as.integer(), "b" = cdm$drug_exposure |> dplyr::select("drug_concept_id") |> head(1) |> - dplyr::pull()) + dplyr::pull() |> + as.integer()) expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = cs, @@ -299,14 +315,14 @@ test_that("excluded concepts in codelist", { testthat::skip_on_cran() cdm <- omock::mockCdmReference() |> omock::mockCdmFromTables(tables = list("cohort" = dplyr::tibble( - "cohort_definition_id" = 1, - "subject_id" = c(1, 2, 3), + "cohort_definition_id" = 1L, + "subject_id" = c(1L, 2L, 3L), "cohort_start_date" = as.Date("2020-01-01"), "cohort_end_date" = as.Date("2029-12-31") ))) cdm <- omopgenerics::insertTable( cdm = cdm, name = "concept", table = dplyr::tibble( - "concept_id" = 1, + "concept_id" = 1L, "concept_name" = "my concept", "domain_id" = "drUg", "vocabulary_id" = NA, @@ -318,9 +334,9 @@ test_that("excluded concepts in codelist", { ) cdm <- omopgenerics::insertTable( cdm = cdm, name = "drug_exposure", table = dplyr::tibble( - "drug_exposure_id" = 1:11, - "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1), - "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1), + "drug_exposure_id" = 1:11L, + "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1) |> as.integer(), + "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1) |> as.integer(), "drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803), "drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1801, 1802, 1803, 1804), "drug_type_concept_id" = 1 @@ -333,10 +349,10 @@ test_that("excluded concepts in codelist", { cdm <- cdm |> copyCdm() - expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1:2), name = "cohort1")) + expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1:2L), name = "cohort1")) expect_true(all(attr(cohort, "cohort_codelist") |> dplyr::pull("concept_id") |> sort() == 1:2)) - expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 2:3), name = "cohort2")) + expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 2:3L), name = "cohort2")) expect_true(all(attr(cohort, "cohort_codelist") |> dplyr::pull("concept_id") |> sort() == 2:3)) PatientProfiles::mockDisconnect(cdm) @@ -348,7 +364,7 @@ test_that("out of observation", { omock::mockPerson(n = 4, seed = 1) |> omock::mockObservationPeriod(seed = 1) cdm_local$concept <- dplyr::tibble( - "concept_id" = c(1, 2), + "concept_id" = c(1L, 2L), "concept_name" = c("my concept 1", "my concept 2"), "domain_id" = "Drug", "vocabulary_id" = NA, @@ -358,12 +374,12 @@ test_that("out of observation", { "valid_end_date" = NA ) cdm_local$drug_exposure <- dplyr::tibble( - "drug_exposure_id" = 1:13, - "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1, 4, 4), - "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 2), + "drug_exposure_id" = 1:13L, + "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1, 4, 4) |> as.integer(), + "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 2) |> as.integer(), "drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803, 430, -10), "drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1801, 1802, 1803, 1804, 400, -100), - "drug_type_concept_id" = 1 + "drug_type_concept_id" = 1L ) |> dplyr::mutate( "drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2010-01-01"), @@ -376,7 +392,7 @@ test_that("out of observation", { # start end before (subject 3) # end event < start event (subject 4) cdm$cohort1 <- conceptCohort(cdm = cdm, - conceptSet = list(a = 1, b = 2), name = "cohort1") + conceptSet = list(a = 1L, b = 2L), name = "cohort1") expect_true(all(c("cohort_table", "cdm_table") %in% class(cdm$cohort1))) # person 1 has two cohort entries # the first - same dates as drug exposure @@ -408,7 +424,7 @@ test_that("out of observation", { omock::mockPerson(n = 4, seed = 1) |> omock::mockObservationPeriod(seed = 1) cdm_local$concept <- dplyr::tibble( - "concept_id" = c(1, 2), + "concept_id" = c(1L, 2L), "concept_name" = c("my concept 1", "my concept 2"), "domain_id" = "Drug", "vocabulary_id" = NA, @@ -418,16 +434,16 @@ test_that("out of observation", { "valid_end_date" = NA ) cdm_local$drug_exposure <- dplyr::tibble( - "drug_exposure_id" = 1:4, - "person_id" = c(1, 3, 4, 2), - "drug_concept_id" = 1, + "drug_exposure_id" = 1:4L, + "person_id" = c(1L, 3L, 4L, 2L), + "drug_concept_id" = 1L, "drug_exposure_start_date" = as.Date(c("2004-01-01", "2014-01-01", "2001-01-01", "2000-01-01")), "drug_exposure_end_date" = as.Date(c("2015-01-01", "2015-05-01", "2002-01-01", "2000-02-02")), - "drug_type_concept_id" = 1 + "drug_type_concept_id" = 1L ) cdm <- cdm_local |> copyCdm() - cdm$cohort2 <- conceptCohort(cdm = cdm, conceptSet = list(a = 1, b = 2), name = "cohort2") + cdm$cohort2 <- conceptCohort(cdm = cdm, conceptSet = list(a = 1L, b = 2L), name = "cohort2") expect_true(all(c("cohort_table", "cdm_table") %in% class(cdm$cohort2))) expect_true(all(cdm$cohort2 |> dplyr::pull("subject_id") |> sort() == c(1, 2, 4))) expect_true(all(cdm$cohort2 |> dplyr::pull("cohort_start_date") |> sort() == @@ -449,7 +465,7 @@ test_that("out of observation", { omock::mockPerson(n = 4, seed = 1) |> omock::mockObservationPeriod(seed = 1) cdm_local$concept <- dplyr::tibble( - "concept_id" = c(1, 2), + "concept_id" = c(1L, 2L), "concept_name" = c("my concept 1", "my concept 2"), "domain_id" = "Drug", "vocabulary_id" = NA, @@ -459,17 +475,17 @@ test_that("out of observation", { "valid_end_date" = NA ) cdm_local$drug_exposure <- dplyr::tibble( - "drug_exposure_id" = 1:6, - "person_id" = c(1, 2, 2, 3, 4, 4), - "drug_concept_id" = c(1, 1, 1, 1, 2, 2), + "drug_exposure_id" = 1:6L, + "person_id" = c(1, 2, 2, 3, 4, 4) |> as.integer(), + "drug_concept_id" = c(1, 1, 1, 1, 2, 2) |> as.integer(), "drug_exposure_start_date" = as.Date(c("2004-01-01", "2014-01-01", "2015-04-01", "2000-01-01", "2000-01-01", "1999-01-01")), "drug_exposure_end_date" = as.Date(c("2003-01-01", "2015-05-01", "2015-07-01", "2000-02-02", "2000-01-01", "2001-01-01")), - "drug_type_concept_id" = 1 + "drug_type_concept_id" = 1L ) cdm <- cdm_local |> copyCdm() # empty cohort - cdm$cohort3 <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort3") + cdm$cohort3 <- conceptCohort(cdm = cdm, conceptSet = list(a = 1L), name = "cohort3") expect_true(all(colnames(cdm$cohort3) == c("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date"))) expect_true(all(c("cohort_table", "cdm_table") %in% class(cdm$cohort3))) @@ -477,7 +493,7 @@ test_that("out of observation", { expect_true(cohortCodelist(cdm$cohort3, 1)$a == 1) # empty cohort - cdm$cohort4 <- conceptCohort(cdm = cdm, conceptSet = list(a = 1, b = 2), name = "cohort4") + cdm$cohort4 <- conceptCohort(cdm = cdm, conceptSet = list(a = 1L, b = 2L), name = "cohort4") expect_true(all(c("cohort_table", "cdm_table") %in% class(cdm$cohort4))) expect_true(cdm$cohort4 |> dplyr::tally() |> dplyr::pull("n") == 1) expect_true(cohortCodelist(cdm$cohort4, 1)$a == 1) @@ -492,14 +508,14 @@ test_that("table not present in the cdm", { testthat::skip_on_cran() cdm <- omock::mockCdmReference() |> omock::mockCdmFromTables(tables = list("cohort" = dplyr::tibble( - "cohort_definition_id" = 1, - "subject_id" = c(1, 2, 3), + "cohort_definition_id" = 1L, + "subject_id" = c(1L, 2L, 3L), "cohort_start_date" = as.Date("2020-01-01"), "cohort_end_date" = as.Date("2029-12-31") ))) cdm <- omopgenerics::insertTable( cdm = cdm, name = "concept", table = dplyr::tibble( - "concept_id" = 1:2, + "concept_id" = 1:2L, "concept_name" = c("my concept", "my other concept"), "domain_id" = c("drug", "condition"), "vocabulary_id" = NA, @@ -511,12 +527,12 @@ test_that("table not present in the cdm", { ) cdm <- omopgenerics::insertTable( cdm = cdm, name = "drug_exposure", table = dplyr::tibble( - "drug_exposure_id" = 1:11, - "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1), - "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1), + "drug_exposure_id" = 1:11L, + "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1) |> as.integer(), + "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1) |> as.integer(), "drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803), "drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1801, 1802, 1803, 1804), - "drug_type_concept_id" = 1 + "drug_type_concept_id" = 1L ) |> dplyr::mutate( "drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2020-01-01"), @@ -526,7 +542,7 @@ test_that("table not present in the cdm", { cdm <- cdm |> copyCdm() - expect_warning(cdm$conceptcohort <- conceptCohort(cdm, list(a = 1, b = 1, c = 1:2, d = 2), name = "conceptcohort")) + expect_warning(cdm$conceptcohort <- conceptCohort(cdm, list(a = 1L, b = 1L, c = 1:2L, d = 2L), name = "conceptcohort")) expect_true(all(cdm$conceptcohort |> dplyr::pull(cohort_definition_id) |> unique() |> sort() == 1:3)) expect_true(all(cdm$conceptcohort |> dplyr::pull(cohort_start_date) |> sort() == c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-11", "2020-01-11", @@ -584,7 +600,7 @@ test_that("cohort exit as event start date", { # exit as event start expect_no_error(cdm$cohort_1 <- conceptCohort(cdm = cdm, - conceptSet = list(a = 1), + conceptSet = list(a = 1L), name = "cohort_1", exit = "event_start_date")) expect_true(nrow(cdm$cohort_1 |> @@ -604,7 +620,7 @@ test_that("cohort exit as event start date", { name = "cohort_1", exit = "not_an_option")) - + PatientProfiles::mockDisconnect(cdm) }) test_that("use source field concepts", { @@ -667,14 +683,14 @@ test_that("missing event end dates", { testthat::skip_on_cran() cdm <- omock::mockCdmReference() |> omock::mockCdmFromTables(tables = list("cohort" = dplyr::tibble( - "cohort_definition_id" = 1, - "subject_id" = c(1, 2, 3), + "cohort_definition_id" = 1L, + "subject_id" = c(1L, 2L, 3L), "cohort_start_date" = as.Date("2020-01-01"), "cohort_end_date" = as.Date("2029-12-31") ))) cdm <- omopgenerics::insertTable( cdm = cdm, name = "concept", table = dplyr::tibble( - "concept_id" = 1, + "concept_id" = 1L, "concept_name" = "my concept", "domain_id" = "drUg", "vocabulary_id" = NA, @@ -686,12 +702,12 @@ test_that("missing event end dates", { ) cdm <- omopgenerics::insertTable( cdm = cdm, name = "drug_exposure", table = dplyr::tibble( - "drug_exposure_id" = 1:11, - "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1), - "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1), + "drug_exposure_id" = 1:11L, + "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1) |> as.integer(), + "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1) |> as.integer(), "drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803), "drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1801, 1802, 1803, 1804), - "drug_type_concept_id" = 1 + "drug_type_concept_id" = 1L ) |> dplyr::mutate( "drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2020-01-01"), @@ -699,7 +715,7 @@ test_that("missing event end dates", { ) ) cdm <- cdm |> copyCdm() - expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort")) + expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1L), name = "cohort")) startCount <- cohortCount(cohort) |> dplyr::pull("number_subjects") @@ -707,23 +723,22 @@ test_that("missing event end dates", { # as their missing end date will have been replaced by the start date cdm <- omopgenerics::insertTable( cdm = cdm, name = "drug_exposure", table = dplyr::tibble( - "drug_exposure_id" = 1:11, - "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1), - "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1), + "drug_exposure_id" = 1:11L, + "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1) |> as.integer(), + "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1) |> as.integer(), "drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803), "drug_exposure_end_date" = as.Date(NA), - "drug_type_concept_id" = 1 + "drug_type_concept_id" = 1L ) |> dplyr::mutate( "drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2020-01-01") ) ) - expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort")) + expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1L), name = "cohort")) endCount <- cohortCount(cohort) |> dplyr::pull("number_subjects") expect_identical(startCount, endCount) - PatientProfiles::mockDisconnect(cdm) }) diff --git a/tests/testthat/test-measurementCohort.R b/tests/testthat/test-measurementCohort.R index 4d68053..437912e 100644 --- a/tests/testthat/test-measurementCohort.R +++ b/tests/testthat/test-measurementCohort.R @@ -3,7 +3,7 @@ test_that("mearurementCohorts works", { cdm$concept <- cdm$concept |> dplyr::union_all( dplyr::tibble( - concept_id = c(4326744, 4298393, 45770407, 8876, 4124457, 999999, 123456), + concept_id = c(4326744, 4298393, 45770407, 8876, 4124457, 999999, 123456) |> as.integer(), concept_name = c("Blood pressure", "Systemic blood pressure", "Baseline blood pressure", "millimeter mercury column", "Normal range", "Normal", "outObs"), @@ -20,14 +20,14 @@ test_that("mearurementCohorts works", { ) ) cdm$measurement <- dplyr::tibble( - measurement_id = 1:7, + measurement_id = 1:7L, person_id = as.integer(c(1, 1, 2, 3, 3, 1, 1)), - measurement_concept_id = c(4326744, 4298393, 4298393, 45770407, 45770407, 123456, 123456), + measurement_concept_id = c(4326744, 4298393, 4298393, 45770407, 45770407, 123456, 123456) |> as.integer(), measurement_date = as.Date(c("2000-07-01", "2000-12-11", "2002-09-08", "2015-02-19", "2015-02-20", "1900-01-01", "2050-01-01")), - measurement_type_concept_id = NA, - value_as_number = c(100, 125, NA, NA, NA, NA, NA), - value_as_concept_id = c(0, 0, 0, 4124457, 999999, 0, 0), - unit_concept_id = c(8876, 8876, 0, 0, 0, 0, 0) + measurement_type_concept_id = NA_integer_, + value_as_number = c(100, 125, NA, NA, NA, NA, NA) |> as.integer(), + value_as_concept_id = c(0, 0, 0, 4124457, 999999, 0, 0) |> as.integer(), + unit_concept_id = c(8876, 8876, 0, 0, 0, 0, 0) |> as.integer() ) cdm <- cdm |> copyCdm() @@ -45,9 +45,9 @@ test_that("mearurementCohorts works", { cdm$cohort <- measurementCohort( cdm = cdm, name = "cohort", - conceptSet = list("normal_blood_pressure" = c(4326744, 4298393, 45770407)), + conceptSet = list("normal_blood_pressure" = c(4326744L, 4298393L, 45770407L)), valueAsConcept = c(4124457), - valueAsNumber = list("8876" = c(70, 120)) + valueAsNumber = list("8876" = c(70L, 120L)) ) if(isDuckdb){ @@ -73,7 +73,22 @@ test_that("mearurementCohorts works", { cohort_start_date = as.Date(c("2000-07-01", "2015-02-19")), cohort_end_date = as.Date(c("2000-07-01", "2015-02-19")) )) - expect_true(cdm$cohort |> attrition() |> dplyr::pull("reason") == "Initial qualifying events") + expect_identical( + cdm$cohort |> attrition() |> dplyr::as_tibble(), + dplyr::tibble( + "cohort_definition_id" = 1L, + "number_records" = c(rep(2L, 4)), + "number_subjects" = c(rep(2L, 4)), + "reason_id" = 1:4L, + "reason" = c( + "Initial qualifying events", + "Not missing record date", "Record in observation", + "Distinct measurement records" + ), + "excluded_records" = c(0L, 0L, 0L, 0L), + "excluded_subjects" = c(0L, 0L, 0L, 0L), + ) + ) expect_true(settings(cdm$cohort)$cohort_name == "normal_blood_pressure") codes <- attr(cdm$cohort, "cohort_codelist") |> dplyr::collect() expect_true(all(codes$concept_id |> sort() == c(4298393, 4326744, 45770407))) @@ -89,7 +104,7 @@ test_that("mearurementCohorts works", { cdm$cohort3 <- measurementCohort( cdm = cdm, name = "cohort3", - conceptSet = list("normal_blood_pressure" = c(4326744, 4298393, 45770407, 12345)), + conceptSet = list("normal_blood_pressure" = c(4326744L, 4298393L, 45770407L, 12345L)), valueAsConcept = c(4124457), valueAsNumber = list("8876" = c(70, 120)) ) @@ -100,7 +115,6 @@ test_that("mearurementCohorts works", { cohort_start_date = as.Date(c("2000-07-01", "2015-02-19")), cohort_end_date = as.Date(c("2000-07-01", "2015-02-19")) )) - expect_true(cdm$cohort3 |> attrition() |> dplyr::pull("reason") == "Initial qualifying events") expect_true(settings(cdm$cohort3)$cohort_name == "normal_blood_pressure") codes <- attr(cdm$cohort3, "cohort_codelist") |> dplyr::collect() expect_true(all(c(4298393, 4326744, 45770407) %in% codes$concept_id)) @@ -109,7 +123,7 @@ test_that("mearurementCohorts works", { cdm$cohort4 <- measurementCohort( cdm = cdm, name = "cohort4", - conceptSet = list("normal_blood_pressure" = c(4326744, 4298393, 45770407)) + conceptSet = list("normal_blood_pressure" = c(4326744L, 4298393L, 45770407L)) ) expect_equal( collectCohort(cdm$cohort4, 1), @@ -118,7 +132,6 @@ test_that("mearurementCohorts works", { cohort_start_date = as.Date(c("2000-07-01", "2000-12-11", "2002-09-08", "2015-02-19", "2015-02-20")), cohort_end_date = as.Date(c("2000-07-01", "2000-12-11", "2002-09-08", "2015-02-19", "2015-02-20")) )) - expect_true(cdm$cohort4 |> attrition() |> dplyr::pull("reason") == "Initial qualifying events") expect_true(settings(cdm$cohort4)$cohort_name == "normal_blood_pressure") codes <- attr(cdm$cohort4, "cohort_codelist") |> dplyr::collect() expect_true(all(codes$concept_id |> sort() == c(4298393, 4326744, 45770407))) @@ -127,7 +140,7 @@ test_that("mearurementCohorts works", { cdm$cohort5 <- measurementCohort( cdm = cdm, name = "cohort5", - conceptSet = list("normal_blood_pressure" = c(4326744, 4298393, 45770407)), + conceptSet = list("normal_blood_pressure" = c(4326744L, 4298393L, 45770407L)), valueAsNumber = list("8876" = c(70, 120), "908" = c(800, 900)) ) expect_equal( @@ -137,7 +150,6 @@ test_that("mearurementCohorts works", { cohort_start_date = as.Date(c("2000-07-01")), cohort_end_date = as.Date(c("2000-07-01")) )) - expect_true(cdm$cohort5 |> attrition() |> dplyr::pull("reason") == "Initial qualifying events") expect_true(settings(cdm$cohort5)$cohort_name == "normal_blood_pressure") codes <- attr(cdm$cohort5, "cohort_codelist") |> dplyr::collect() expect_true(all(codes$concept_id |> sort() == c(4298393, 4326744, 45770407))) @@ -146,7 +158,7 @@ test_that("mearurementCohorts works", { cdm$cohort6 <- measurementCohort( cdm = cdm, name = "cohort6", - conceptSet = list("normal_blood_pressure" = c(4326744, 4298393, 45770407)), + conceptSet = list("normal_blood_pressure" = c(4326744L, 4298393L, 45770407L)), valueAsConcept = c(4124457, 999999, 12345) ) expect_equal( @@ -156,7 +168,6 @@ test_that("mearurementCohorts works", { cohort_start_date = as.Date(c("2015-02-19", "2015-02-20")), cohort_end_date = as.Date(c("2015-02-19", "2015-02-20")) )) - expect_true(cdm$cohort6 |> attrition() |> dplyr::pull("reason") == "Initial qualifying events") expect_true(settings(cdm$cohort6)$cohort_name == "normal_blood_pressure") codes <- attr(cdm$cohort6, "cohort_codelist") |> dplyr::collect() expect_true(all(codes$concept_id |> sort() == c(4298393, 4326744, 45770407))) @@ -165,7 +176,7 @@ test_that("mearurementCohorts works", { cdm$cohort7 <- measurementCohort( cdm = cdm, name = "cohort7", - conceptSet = list("c1" = c(4326744), "c2" = c(4298393, 45770407)) + conceptSet = list("c1" = c(4326744L), "c2" = c(4298393L, 45770407L)) ) expect_equal( collectCohort(cdm$cohort7, 1), @@ -181,7 +192,27 @@ test_that("mearurementCohorts works", { cohort_start_date = as.Date(c("2000-12-11", "2002-09-08", "2015-02-19","2015-02-20")), cohort_end_date = as.Date(c("2000-12-11", "2002-09-08", "2015-02-19", "2015-02-20")) )) - expect_true(all(cdm$cohort7 |> attrition() |> dplyr::pull("reason") == c("Initial qualifying events", "Initial qualifying events"))) + expect_identical( + attrition(cdm$cohort7) |> dplyr::as_tibble(), + dplyr::tibble( + "cohort_definition_id" = c(rep(1L, 4), rep(2L, 4)), + "number_records" = c(rep(1L, 4), rep(4L, 4)), + "number_subjects" = c(rep(1L, 4), rep(3L, 4)), + "reason_id" = rep(1:4L, 2), + "reason" = c( + "Initial qualifying events", + "Not missing record date", + "Record in observation", + "Distinct measurement records", + "Initial qualifying events", + "Not missing record date", + "Record in observation", + "Distinct measurement records" + ), + "excluded_records" = 0L, + "excluded_subjects" = 0L + ) + ) expect_true(all(all(settings(cdm$cohort7)$cohort_name == c("c1", "c2")))) codes <- attr(cdm$cohort7, "cohort_codelist") |> dplyr::collect() expect_true(all(codes$concept_id[codes$codelist_name == "c1"] == c(4326744))) @@ -191,12 +222,11 @@ test_that("mearurementCohorts works", { cdm$cohort8 <- measurementCohort( cdm = cdm, name = "cohort8", - conceptSet = list("c1" = c(123456)) + conceptSet = list("c1" = c(123456L)) ) expect_true(all(colnames(cdm$cohort8) == c("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date"))) expect_true(cdm$cohort8 |> dplyr::tally() |> dplyr::pull("n") == 0) - expect_true(cdm$cohort8 |> attrition() |> dplyr::pull("reason") == "Initial qualifying events") expect_true(settings(cdm$cohort8)$cohort_name == "c1") codes <- attr(cdm$cohort8, "cohort_codelist") |> dplyr::collect() expect_true(all(codes$concept_id |> sort() == c(123456))) @@ -205,10 +235,9 @@ test_that("mearurementCohorts works", { cdm$cohort9 <- measurementCohort( cdm = cdm, name = "cohort9", - conceptSet = list("c1" = c(1234567)) + conceptSet = list("c1" = c(1234567L)) ) expect_true(cdm$cohort9 |> dplyr::tally() |> dplyr::pull("n") == 0) - expect_true(cdm$cohort9 |> attrition() |> dplyr::pull("reason") == "Initial qualifying events") expect_true(settings(cdm$cohort9)$cohort_name == "c1") expect_equal( colnames(settings(cdm$cohort9)) |> sort(), @@ -238,7 +267,7 @@ test_that("expected errors", { cdm$concept <- cdm$concept |> dplyr::union_all( dplyr::tibble( - concept_id = c(4326744, 4298393, 45770407, 8876, 4124457), + concept_id = c(4326744L, 4298393L, 45770407L, 8876L, 4124457L), concept_name = c("Blood pressure", "Systemic blood pressure", "Baseline blood pressure", "millimeter mercury column", "Normal range"), @@ -254,14 +283,14 @@ test_that("expected errors", { ) ) cdm$measurement <- dplyr::tibble( - measurement_id = 1:4, - person_id = c(1, 1, 2, 3), - measurement_concept_id = c(4326744, 4298393, 4298393, 45770407), + measurement_id = 1:4L, + person_id = c(1L, 1L, 2L, 3L), + measurement_concept_id = c(4326744L, 4298393L, 4298393L, 45770407L), measurement_date = as.Date(c("2000-07-01", "2000-12-11", "2002-09-08", "2015-02-19")), - measurement_type_concept_id = NA, - value_as_number = c(100, 125, NA, NA), - value_as_concept_id = c(0, 0, 0, 4124457), - unit_concept_id = c(8876, 8876, 0, 0) + measurement_type_concept_id = NA_integer_, + value_as_number = c(100L, 125L, NA_integer_, NA_integer_), + value_as_concept_id = c(0L, 0L, 0L, 4124457L), + unit_concept_id = c(8876L, 8876L, 0L, 0L) ) cdm <- cdm |> copyCdm() @@ -270,16 +299,16 @@ test_that("expected errors", { measurementCohort( cdm = cdm, name = "cohort", - conceptSet = list(c(4326744, 4298393, 45770407)), - valueAsConcept = c(4124457), - valueAsNumber = list("8876" = c(70, 120)) + conceptSet = list(c(4326744L, 4298393L, 45770407L)), + valueAsConcept = c(4124457L), + valueAsNumber = list("8876" = c(70L, 120L)) ) ) expect_error( measurementCohort( cdm = cdm, name = "cohort", - conceptSet = list("name " = c(4326744, 4298393, 45770407)), + conceptSet = list("name " = c(4326744L, 4298393L, 45770407L)), valueAsConcept = c(4124457), valueAsNumber = list("8876" = c(700, 120)) )