diff --git a/.lintr b/.lintr index b423034..b78e223 100644 --- a/.lintr +++ b/.lintr @@ -1,4 +1,10 @@ -linters: linters_with_defaults(line_length_linter(100), object_usage_linter=NULL, cyclocomp_linter(complexity_limit = 20)) +linters: linters_with_defaults( + line_length_linter(100), + object_usage_linter=NULL, + infix_spaces_linter=NULL, + indentation_linter=NULL, + cyclocomp_linter(complexity_limit = 22) + ) exclusions: list( - "R/data.R", - "tests/testthat") + "R/data.R", + "tests/testthat") diff --git a/NEWS.md b/NEWS.md index ba11b08..436618c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,9 @@ - Added a "Report a bug" link to `{datacutr}` website (#182) ## Updates of Existing Functions -- N/A +- Update to `date_cut()` and `special_dm_cut()` functions to allow for +datacut date to be null. In this case, all records for this patient +will be kept/left unchanged. ## Breaking Changes - N/A diff --git a/R/date_cut.R b/R/date_cut.R index 02989d4..a2bdb40 100644 --- a/R/date_cut.R +++ b/R/date_cut.R @@ -64,20 +64,18 @@ date_cut <- function(dataset_sdtm, (length(get_duplicates(dataset_cut$USUBJID)) == 0), msg = "Duplicate patients in the DCUT (dataset_cut) dataset, please update." ) - assert_that( - (any(is.na(mutate(dataset_cut, !!cut_var))) == FALSE), - msg = "At least one patient with missing datacut date (cut_var) in the DCUT - (dataset_cut) dataset, please update." + ifelse(any(is.na(mutate(dataset_cut, !!cut_var))) == TRUE, + print("At least 1 patient with missing datacut date, all records will be kept."), NA ) - dcut <- dataset_cut %>% mutate(DCUT_TEMP_DCUTDTM = !!cut_var) %>% - subset(select = c(USUBJID, DCUT_TEMP_DCUTDTM)) + subset(select = c(USUBJID, DCUT_TEMP_DCUTDTM)) %>% + mutate(TEMP_DCUT_KEEP = "Y") - assert_that(is.POSIXt(dcut$DCUT_TEMP_DCUTDTM), + ifelse(!is.na(dcut$DCUT_TEMP_DCUTDTM), assert_that(is.POSIXt(dcut$DCUT_TEMP_DCUTDTM), msg = "cut_var is expected to be of date type POSIXt" - ) + ), NA) attributes(dcut$USUBJID)$label <- attributes(dataset_sdtm$USUBJID)$label @@ -92,7 +90,10 @@ date_cut <- function(dataset_sdtm, # Flag records to be removed - those occurring after cut date and patients not in dcut dataset dataset <- dataset_sdtm_pt %>% mutate(DCUT_TEMP_REMOVE = ifelse((DCUT_TEMP_SDTM_DATE > DCUT_TEMP_DCUTDTM) | - is.na(DCUT_TEMP_DCUTDTM), "Y", NA_character_)) + is.na(TEMP_DCUT_KEEP), "Y", NA_character_)) + + # Ensure variable is character + dataset$DCUT_TEMP_REMOVE <- as.character(dataset$DCUT_TEMP_REMOVE) dataset <- drop_temp_vars(dsin = dataset, drop_dcut_temp = FALSE) diff --git a/R/global.R b/R/global.R index 8f12d61..304559f 100644 --- a/R/global.R +++ b/R/global.R @@ -20,5 +20,6 @@ globalVariables(c( "DCUT_TEMP_DATE", "source_data", "DCUT_TEMP_REMOVE", - "DCUT_TEMP_DTHCHANGE" + "DCUT_TEMP_DTHCHANGE", + "TEMP_DCUT_KEEP" )) diff --git a/R/special_dm_cut.R b/R/special_dm_cut.R index 8cea8a6..b854373 100644 --- a/R/special_dm_cut.R +++ b/R/special_dm_cut.R @@ -48,10 +48,8 @@ special_dm_cut <- function(dataset_dm, (length(get_duplicates(dataset_cut$USUBJID)) == 0), msg = "Duplicate patients in the DCUT (dataset_cut) dataset, please update." ) - assert_that( - (any(is.na(mutate(dataset_cut, !!cut_var))) == FALSE), - msg = "At least one patient with missing datacut date (cut_var) in the DCUT - (dataset_cut) dataset, please update." + ifelse(any(is.na(mutate(dataset_cut, !!cut_var))) == TRUE, + print("At least 1 patient with missing datacut date, all records will be kept."), NA ) assert_data_frame(dataset_dm, required_vars = exprs(USUBJID, DTHDTC) @@ -69,16 +67,19 @@ special_dm_cut <- function(dataset_dm, by = "USUBJID" ) - assert_that(is.POSIXt(dm_temp$DCUT_TEMP_DCUTDTM), + ifelse(!is.na(dm_temp$DCUT_TEMP_DCUTDTM), assert_that(is.POSIXt(dm_temp$DCUT_TEMP_DCUTDTM), msg = "cut_var is expected to be of date type POSIXt" - ) + ), NA) # Flag records with Death Date after Cut date dataset_updatedth <- dm_temp %>% mutate(DCUT_TEMP_DTHCHANGE = case_when( - DCUT_TEMP_DTHDT > DCUT_TEMP_DCUTDTM ~ "Y", + !is.na(DCUT_TEMP_DCUTDTM) & (DCUT_TEMP_DTHDT > DCUT_TEMP_DCUTDTM) ~ "Y", TRUE ~ as.character(NA) )) + # Ensure variable is character + dataset_updatedth$DCUT_TEMP_REMOVE <- as.character(dataset_updatedth$DCUT_TEMP_REMOVE) + dataset_updatedth } diff --git a/_pkgdown.yml b/_pkgdown.yml index cf0dc04..e02c7c5 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -7,7 +7,7 @@ search: repo: url: home: https://github.com/pharmaverse/datacutr - source: https://github.com/pharmaverse/datacutr + source: https://github.com/pharmaverse/datacutr/blob/main/ issue: https://github.com/pharmaverse/datacutr/issues/ user: https://github.com/ news: diff --git a/tests/testthat/test-date_cut.R b/tests/testthat/test-date_cut.R index c6bc055..b77a59a 100644 --- a/tests/testthat/test-date_cut.R +++ b/tests/testthat/test-date_cut.R @@ -178,3 +178,49 @@ test_that("All SDTMv dates are after datacut date", { expected_ae4 ) }) + +# Test 5 - Datacut date is NA + +input_ae5 <- tibble::tribble( + ~STUDYID, ~USUBJID, ~AESEQ, ~AESTDTC, + "my_study", "subject1", 1, "2021-01-02", + "my_study", "subject1", 2, "2021-08-31", + "my_study", "subject1", 3, "2021-10-10", + "my_study", "subject2", 2, "2021-02-20", + "my_study", "subject3", 1, "2021-03-02" +) + +input_dcut5 <- tibble::tribble( + ~STUDYID, ~USUBJID, ~DCUTDTM, + "my_study", "subject1", NA, + "my_study", "subject2", NA, + "my_study", "subject3", NA +) + + +expected_ae5 <- tibble::tribble( + ~STUDYID, ~USUBJID, ~AESEQ, ~AESTDTC, ~DCUT_TEMP_SDTM_DATE, ~DCUT_TEMP_DCUTDTM, + ~DCUT_TEMP_REMOVE, + "my_study", "subject1", 1, "2021-01-02", ymd_hms("2021-01-02T00:00:00"), + NA, NA_character_, + "my_study", "subject1", 2, "2021-08-31", ymd_hms("2021-08-31T00:00:00"), + NA, NA_character_, + "my_study", "subject1", 3, "2021-10-10", ymd_hms("2021-10-10T00:00:00"), + NA, NA_character_, + "my_study", "subject2", 2, "2021-02-20", ymd_hms("2021-02-20T00:00:00"), + NA, NA_character_, + "my_study", "subject3", 1, "2021-03-02", ymd_hms("2021-03-02T00:00:00"), + NA, NA_character_ +) + +test_that("DCUTDTM is NA", { + expect_equal( + date_cut( + dataset_sdtm = input_ae5, + sdtm_date_var = AESTDTC, + dataset_cut = input_dcut5, + cut_var = DCUTDTM + ), + expected_ae5 + ) +}) diff --git a/tests/testthat/test-special_dm_cut.R b/tests/testthat/test-special_dm_cut.R index 145f781..e47cc90 100644 --- a/tests/testthat/test-special_dm_cut.R +++ b/tests/testthat/test-special_dm_cut.R @@ -60,3 +60,53 @@ test_that("Error thrown if cut_var is not a POSIXt input", { regexp = "cut_var is expected to be of date type POSIXt" ) }) + +dcut_na <- tibble::tribble( + ~USUBJID, ~DCUTDTM, + "01-701-1015", NA, + "01-701-1023", NA, + "01-701-1028", NA, + "01-701-1033", NA, + "01-701-1047", NA, + "01-701-1057", NA, + "01-701-1097", NA, + "01-701-1111", NA, + "01-701-1115", NA, + "01-701-1118", NA +) + +dm_expect_na <- tibble::tribble( + ~USUBJID, ~DTHDTC, ~DTHFL, ~DCUT_TEMP_REMOVE, + ~DCUT_TEMP_DTHDT, ~DCUT_TEMP_DCUTDTM, ~DCUT_TEMP_DTHCHANGE, + "01-701-1015", "", "", NA_character_, + NA, NA, NA_character_, + "01-701-1023", "2014-10-20", "Y", NA_character_, + ymd_hms("2014-10-20T00:00:00"), NA, NA_character_, + "01-701-1028", "2014-10-21", "Y", NA_character_, + ymd_hms("2014-10-21T00:00:00"), NA, NA_character_, + "01-701-1033", "2014-10-19", "Y", NA_character_, + ymd_hms("2014-10-19T00:00:00"), NA, NA_character_, + "01-701-1047", "2014-10-31", "Y", NA_character_, + ymd_hms("2014-10-31T00:00:00"), NA, NA_character_, + "01-701-1057", "2025-10-20", "Y", NA_character_, + ymd_hms("2025-10-20T00:00:00"), NA, NA_character_, + "01-701-1097", "2002-10-20", "Y", NA_character_, + ymd_hms("2002-10-20T00:00:00"), NA, NA_character_, + "01-701-1111", "", "Y", NA_character_, + NA, NA, NA_character_, + "01-701-1115", "", "Y", NA_character_, + NA, NA, NA_character_, + "01-701-1118", "2014-11-20", "", NA_character_, + ymd_hms("2014-11-20T00:00:00"), NA, NA_character_ +) + +test_that("Tests all expected outcomes when datacut date is NA", { + testthat::expect_equal( + special_dm_cut( + dataset_dm = dm, + dataset_cut = dcut_na, + cut_var = DCUTDTM + ), + dm_expect_na + ) +})