From 29c233fc6b29d771cd164778dfdf887f0708cf7f Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Tue, 26 Sep 2023 12:46:32 +0200 Subject: [PATCH 1/6] Fixing source link --- _pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index cf0dc04..e02c7c5 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -7,7 +7,7 @@ search: repo: url: home: https://github.com/pharmaverse/datacutr - source: https://github.com/pharmaverse/datacutr + source: https://github.com/pharmaverse/datacutr/blob/main/ issue: https://github.com/pharmaverse/datacutr/issues/ user: https://github.com/ news: From 68a9661aa293eef87dde71c0e2576e50532c3b26 Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Mon, 2 Oct 2023 18:20:34 +0200 Subject: [PATCH 2/6] Updating special_dm_cut and date_cut to account for the case that DCUTDTM (datacut date) is null. --- .lintr | 2 +- R/date_cut.R | 20 ++++++----- R/global.R | 3 +- R/special_dm_cut.R | 16 +++++---- tests/testthat/test-date_cut.R | 46 +++++++++++++++++++++++++ tests/testthat/test-special_dm_cut.R | 50 ++++++++++++++++++++++++++++ 6 files changed, 119 insertions(+), 18 deletions(-) diff --git a/.lintr b/.lintr index abb2318..5ac008d 100644 --- a/.lintr +++ b/.lintr @@ -1,4 +1,4 @@ -linters: with_defaults(line_length_linter(100), object_usage_linter=NULL, cyclocomp_linter(complexity_limit = 20)) +linters: with_defaults(line_length_linter(100), object_name_linter = NULL, object_usage_linter=NULL, cyclocomp_linter(complexity_limit = 20)) exclusions: list( "R/data.R", "tests/testthat") diff --git a/R/date_cut.R b/R/date_cut.R index 02989d4..6f3e58c 100644 --- a/R/date_cut.R +++ b/R/date_cut.R @@ -64,20 +64,19 @@ date_cut <- function(dataset_sdtm, (length(get_duplicates(dataset_cut$USUBJID)) == 0), msg = "Duplicate patients in the DCUT (dataset_cut) dataset, please update." ) - assert_that( - (any(is.na(mutate(dataset_cut, !!cut_var))) == FALSE), - msg = "At least one patient with missing datacut date (cut_var) in the DCUT - (dataset_cut) dataset, please update." + ifelse(any(is.na(mutate(dataset_cut, !!cut_var))) == TRUE, + print("At least one patient with missing datacut date (cut_var) in the DCUT + (dataset_cut) dataset, all records for this/these patient(s) will be kept."), NA ) - dcut <- dataset_cut %>% mutate(DCUT_TEMP_DCUTDTM = !!cut_var) %>% - subset(select = c(USUBJID, DCUT_TEMP_DCUTDTM)) + subset(select = c(USUBJID, DCUT_TEMP_DCUTDTM)) %>% + mutate(TEMP_DCUT_KEEP = "Y") - assert_that(is.POSIXt(dcut$DCUT_TEMP_DCUTDTM), + ifelse(!is.na(dcut$DCUT_TEMP_DCUTDTM), assert_that(is.POSIXt(dcut$DCUT_TEMP_DCUTDTM), msg = "cut_var is expected to be of date type POSIXt" - ) + ), NA) attributes(dcut$USUBJID)$label <- attributes(dataset_sdtm$USUBJID)$label @@ -92,7 +91,10 @@ date_cut <- function(dataset_sdtm, # Flag records to be removed - those occurring after cut date and patients not in dcut dataset dataset <- dataset_sdtm_pt %>% mutate(DCUT_TEMP_REMOVE = ifelse((DCUT_TEMP_SDTM_DATE > DCUT_TEMP_DCUTDTM) | - is.na(DCUT_TEMP_DCUTDTM), "Y", NA_character_)) + is.na(TEMP_DCUT_KEEP), "Y", NA_character_)) + + # Ensure variable is character + dataset$DCUT_TEMP_REMOVE <- as.character(dataset$DCUT_TEMP_REMOVE) dataset <- drop_temp_vars(dsin = dataset, drop_dcut_temp = FALSE) diff --git a/R/global.R b/R/global.R index 8f12d61..304559f 100644 --- a/R/global.R +++ b/R/global.R @@ -20,5 +20,6 @@ globalVariables(c( "DCUT_TEMP_DATE", "source_data", "DCUT_TEMP_REMOVE", - "DCUT_TEMP_DTHCHANGE" + "DCUT_TEMP_DTHCHANGE", + "TEMP_DCUT_KEEP" )) diff --git a/R/special_dm_cut.R b/R/special_dm_cut.R index 8cea8a6..3c4b3bf 100644 --- a/R/special_dm_cut.R +++ b/R/special_dm_cut.R @@ -48,10 +48,9 @@ special_dm_cut <- function(dataset_dm, (length(get_duplicates(dataset_cut$USUBJID)) == 0), msg = "Duplicate patients in the DCUT (dataset_cut) dataset, please update." ) - assert_that( - (any(is.na(mutate(dataset_cut, !!cut_var))) == FALSE), - msg = "At least one patient with missing datacut date (cut_var) in the DCUT - (dataset_cut) dataset, please update." + ifelse(any(is.na(mutate(dataset_cut, !!cut_var))) == TRUE, + print("At least one patient with missing datacut date (cut_var) in the DCUT + (dataset_cut) dataset, all records for this/these patient(s) will be kept."), NA ) assert_data_frame(dataset_dm, required_vars = exprs(USUBJID, DTHDTC) @@ -69,16 +68,19 @@ special_dm_cut <- function(dataset_dm, by = "USUBJID" ) - assert_that(is.POSIXt(dm_temp$DCUT_TEMP_DCUTDTM), + ifelse(!is.na(dm_temp$DCUT_TEMP_DCUTDTM), assert_that(is.POSIXt(dm_temp$DCUT_TEMP_DCUTDTM), msg = "cut_var is expected to be of date type POSIXt" - ) + ), NA) # Flag records with Death Date after Cut date dataset_updatedth <- dm_temp %>% mutate(DCUT_TEMP_DTHCHANGE = case_when( - DCUT_TEMP_DTHDT > DCUT_TEMP_DCUTDTM ~ "Y", + !is.na(DCUT_TEMP_DCUTDTM) & (DCUT_TEMP_DTHDT > DCUT_TEMP_DCUTDTM) ~ "Y", TRUE ~ as.character(NA) )) + # Ensure variable is character + dataset_updatedth$DCUT_TEMP_REMOVE <- as.character(dataset_updatedth$DCUT_TEMP_REMOVE) + dataset_updatedth } diff --git a/tests/testthat/test-date_cut.R b/tests/testthat/test-date_cut.R index c6bc055..b77a59a 100644 --- a/tests/testthat/test-date_cut.R +++ b/tests/testthat/test-date_cut.R @@ -178,3 +178,49 @@ test_that("All SDTMv dates are after datacut date", { expected_ae4 ) }) + +# Test 5 - Datacut date is NA + +input_ae5 <- tibble::tribble( + ~STUDYID, ~USUBJID, ~AESEQ, ~AESTDTC, + "my_study", "subject1", 1, "2021-01-02", + "my_study", "subject1", 2, "2021-08-31", + "my_study", "subject1", 3, "2021-10-10", + "my_study", "subject2", 2, "2021-02-20", + "my_study", "subject3", 1, "2021-03-02" +) + +input_dcut5 <- tibble::tribble( + ~STUDYID, ~USUBJID, ~DCUTDTM, + "my_study", "subject1", NA, + "my_study", "subject2", NA, + "my_study", "subject3", NA +) + + +expected_ae5 <- tibble::tribble( + ~STUDYID, ~USUBJID, ~AESEQ, ~AESTDTC, ~DCUT_TEMP_SDTM_DATE, ~DCUT_TEMP_DCUTDTM, + ~DCUT_TEMP_REMOVE, + "my_study", "subject1", 1, "2021-01-02", ymd_hms("2021-01-02T00:00:00"), + NA, NA_character_, + "my_study", "subject1", 2, "2021-08-31", ymd_hms("2021-08-31T00:00:00"), + NA, NA_character_, + "my_study", "subject1", 3, "2021-10-10", ymd_hms("2021-10-10T00:00:00"), + NA, NA_character_, + "my_study", "subject2", 2, "2021-02-20", ymd_hms("2021-02-20T00:00:00"), + NA, NA_character_, + "my_study", "subject3", 1, "2021-03-02", ymd_hms("2021-03-02T00:00:00"), + NA, NA_character_ +) + +test_that("DCUTDTM is NA", { + expect_equal( + date_cut( + dataset_sdtm = input_ae5, + sdtm_date_var = AESTDTC, + dataset_cut = input_dcut5, + cut_var = DCUTDTM + ), + expected_ae5 + ) +}) diff --git a/tests/testthat/test-special_dm_cut.R b/tests/testthat/test-special_dm_cut.R index 145f781..e47cc90 100644 --- a/tests/testthat/test-special_dm_cut.R +++ b/tests/testthat/test-special_dm_cut.R @@ -60,3 +60,53 @@ test_that("Error thrown if cut_var is not a POSIXt input", { regexp = "cut_var is expected to be of date type POSIXt" ) }) + +dcut_na <- tibble::tribble( + ~USUBJID, ~DCUTDTM, + "01-701-1015", NA, + "01-701-1023", NA, + "01-701-1028", NA, + "01-701-1033", NA, + "01-701-1047", NA, + "01-701-1057", NA, + "01-701-1097", NA, + "01-701-1111", NA, + "01-701-1115", NA, + "01-701-1118", NA +) + +dm_expect_na <- tibble::tribble( + ~USUBJID, ~DTHDTC, ~DTHFL, ~DCUT_TEMP_REMOVE, + ~DCUT_TEMP_DTHDT, ~DCUT_TEMP_DCUTDTM, ~DCUT_TEMP_DTHCHANGE, + "01-701-1015", "", "", NA_character_, + NA, NA, NA_character_, + "01-701-1023", "2014-10-20", "Y", NA_character_, + ymd_hms("2014-10-20T00:00:00"), NA, NA_character_, + "01-701-1028", "2014-10-21", "Y", NA_character_, + ymd_hms("2014-10-21T00:00:00"), NA, NA_character_, + "01-701-1033", "2014-10-19", "Y", NA_character_, + ymd_hms("2014-10-19T00:00:00"), NA, NA_character_, + "01-701-1047", "2014-10-31", "Y", NA_character_, + ymd_hms("2014-10-31T00:00:00"), NA, NA_character_, + "01-701-1057", "2025-10-20", "Y", NA_character_, + ymd_hms("2025-10-20T00:00:00"), NA, NA_character_, + "01-701-1097", "2002-10-20", "Y", NA_character_, + ymd_hms("2002-10-20T00:00:00"), NA, NA_character_, + "01-701-1111", "", "Y", NA_character_, + NA, NA, NA_character_, + "01-701-1115", "", "Y", NA_character_, + NA, NA, NA_character_, + "01-701-1118", "2014-11-20", "", NA_character_, + ymd_hms("2014-11-20T00:00:00"), NA, NA_character_ +) + +test_that("Tests all expected outcomes when datacut date is NA", { + testthat::expect_equal( + special_dm_cut( + dataset_dm = dm, + dataset_cut = dcut_na, + cut_var = DCUTDTM + ), + dm_expect_na + ) +}) From 9f61f8d9992f700a53a755bb55a9186494a18d7a Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Wed, 11 Oct 2023 12:55:50 +0200 Subject: [PATCH 3/6] Updating warning message. --- R/date_cut.R | 3 +-- R/special_dm_cut.R | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/R/date_cut.R b/R/date_cut.R index 6f3e58c..a2bdb40 100644 --- a/R/date_cut.R +++ b/R/date_cut.R @@ -65,8 +65,7 @@ date_cut <- function(dataset_sdtm, msg = "Duplicate patients in the DCUT (dataset_cut) dataset, please update." ) ifelse(any(is.na(mutate(dataset_cut, !!cut_var))) == TRUE, - print("At least one patient with missing datacut date (cut_var) in the DCUT - (dataset_cut) dataset, all records for this/these patient(s) will be kept."), NA + print("At least 1 patient with missing datacut date, all records will be kept."), NA ) dcut <- dataset_cut %>% diff --git a/R/special_dm_cut.R b/R/special_dm_cut.R index 3c4b3bf..b854373 100644 --- a/R/special_dm_cut.R +++ b/R/special_dm_cut.R @@ -49,8 +49,7 @@ special_dm_cut <- function(dataset_dm, msg = "Duplicate patients in the DCUT (dataset_cut) dataset, please update." ) ifelse(any(is.na(mutate(dataset_cut, !!cut_var))) == TRUE, - print("At least one patient with missing datacut date (cut_var) in the DCUT - (dataset_cut) dataset, all records for this/these patient(s) will be kept."), NA + print("At least 1 patient with missing datacut date, all records will be kept."), NA ) assert_data_frame(dataset_dm, required_vars = exprs(USUBJID, DTHDTC) From ae8afeeefbad71775804abcbe18d672177d71746 Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Thu, 12 Oct 2023 13:03:17 +0200 Subject: [PATCH 4/6] Update news file. --- NEWS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 41199cd..700459f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,9 @@ - N/A ## Updates of Existing Functions -- N/A +- Update to `date_cut()` and special_dm_cut()` functions to allow for +datacut date to be null. In this case, all records for this patient +will be kept/left unchanged. ## Breaking Changes - N/A From 7faba8a0208a07856b5b8c667352e444d61edf3f Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Thu, 12 Oct 2023 13:03:17 +0200 Subject: [PATCH 5/6] Update news file. --- NEWS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 41199cd..b14d78a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,9 @@ - N/A ## Updates of Existing Functions -- N/A +- Update to `date_cut()` and `special_dm_cut()` functions to allow for +datacut date to be null. In this case, all records for this patient +will be kept/left unchanged. ## Breaking Changes - N/A From 7e87ab161e5dc969919cbfd43a5bd67f9c2f76f8 Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Tue, 17 Oct 2023 17:56:57 +0200 Subject: [PATCH 6/6] Fixing lintr error --- .lintr | 1 + 1 file changed, 1 insertion(+) diff --git a/.lintr b/.lintr index b5d74ab..b78e223 100644 --- a/.lintr +++ b/.lintr @@ -2,6 +2,7 @@ linters: linters_with_defaults( line_length_linter(100), object_usage_linter=NULL, infix_spaces_linter=NULL, + indentation_linter=NULL, cyclocomp_linter(complexity_limit = 22) ) exclusions: list(