Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply review comments for derive_blfl #99

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 54 additions & 10 deletions R/derive_blfl.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,41 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) {
return(tm)
}

#' This function is used to check if a --DTC variable is in ISO8601 format
#'
#' @param dtc_var A vector of the date and time values
#'
#' @return A logical value indicating if input is in ISO8601 format
#' @keywords internal
is_iso8601 <- function(dtc_var) {
# Remove missing DTC values
dtc_var <- dtc_var[!(is.na(dtc_var) | dtc_var %in% c(""))]

# Define ISO8601 precision map
ISO8601_precision_map <- list(
y = "%Y", # Year: YYYY
ym = "%Y-%m", # Year-Month: YYYY-MM
ymd = "%Y-%m-%d", # Full Date: YYYY-MM-DD
ymdh = "%Y-%m-%dT%H", # DateTime: YYYY-MM-DDThh
ymdhm = "%Y-%m-%dT%H:%M", # DateTime: YYYY-MM-DDThh:mm
ymdhms = "%Y-%m-%dT%H:%M:%S" # Full DateTime: YYYY-MM-DDThh:mm:ss
)

# Function to check if a single DTC value matches any of the formats
valid_iso8601 <- sapply(dtc_var, function(dtc_value) {
any(sapply(ISO8601_precision_map, function(fmt) {
parsed_date <- try(as.POSIXct(dtc_value, format = fmt, tz = "UTC"), silent = TRUE)
!inherits(parsed_date, "try-error") && !is.na(parsed_date)
}))
})

if (all(valid_iso8601)) {
return(TRUE)
}

return(FALSE)
}

#' Derive Baseline Flag or Last Observation Before Exposure Flag
#'
#' Derive the baseline flag variable (`--BLFL`) or the last observation before
Expand Down Expand Up @@ -348,15 +383,23 @@ derive_blfl <- function(sdtm_in,
)]))
)

# Assert that the input "DTC" column follows iso_8601 format
assertthat::assert_that(is_iso8601(sdtm_in[[domain_prefixed_names["dtc"]]]),
msg = paste(
domain_prefixed_names["dtc"],
"column does not follow ISO8601 format, please check."
)
)

# End of assertions, work begins ------------------------------------------
# Create copy of input dataset for modification and processing
ds_mod <- sdtm_in

# Filter out rows where --ORRES is missing. Filter out --ORRES in
# ("ND", "NOT DONE") as well.
bad_orres_rows <- is.na(ds_mod[[domain_prefixed_names["orres"]]]) |
trimws(ds_mod[[domain_prefixed_names["orres"]]]) %in% c("ND", "NOT DONE", "")
ds_mod <- ds_mod[!bad_orres_rows, ]
ds_mod <- ds_mod |>
dplyr::filter(!(is.na(.data[[domain_prefixed_names["orres"]]]))) |>
dplyr::filter(!(trimws(.data[[domain_prefixed_names["orres"]]]) %in% c("ND", "NOT DONE", "")))

# Filter out rows where --STAT is not equal to "NOT DONE"
ds_mod <-
Expand All @@ -374,10 +417,10 @@ derive_blfl <- function(sdtm_in,
}

# Checking for columns of interest
con_col <- c(domain_prefixed_names[c("testcd", "dtc", "var_tpt")], "VISIT")
con_col <- c(domain_prefixed_names[c("testcd", "dtc", "tpt")], "VISIT")

# Drop those columns from the list which are not present in ds_in
con_col <- con_col[con_col %in% names(sdtm_in)]
# Drop those columns from the list which are not present in ds_mod
con_col <- con_col[con_col %in% names(ds_mod)]

# Check for any column which is all NA and removing it from con_col list
h <- which(sapply(ds_mod, function(x) all(is.na(x))))
Expand Down Expand Up @@ -438,16 +481,17 @@ derive_blfl <- function(sdtm_in,
dplyr::filter(
dom_dt == ref_dt,
is.na(dom_tm) | is.na(ref_tm) | dom_tm == ref_tm,
(VISIT %in% baseline_visits & get(domain_prefixed_names["tpt"]) %in% baseline_timepoints) |
(VISIT %in% baseline_visits & !!rlang::sym(domain_prefixed_names["tpt"]) %in% baseline_timepoints) |
(VISIT %in% baseline_visits & length(baseline_timepoints) == 0L) |
(get(domain_prefixed_names["tpt"]) %in% baseline_timepoints & length(baseline_visits) == 0L)
(!!rlang::sym(domain_prefixed_names["tpt"]) %in% baseline_timepoints & length(baseline_visits) == 0L)
)

# Combine (*A) and (*B) and (*C)
ds_base <- rbind(ds_subset_lt, ds_subset_eq_1, ds_subset_eq_2)
ds_base <- dplyr::bind_rows(ds_subset_lt, ds_subset_eq_1, ds_subset_eq_2)

# Sort the rows in ascending order with respect to columns from con_col
ds_base <- dplyr::arrange_at(ds_base, c("USUBJID", con_col))
ds_base <- ds_base |>
dplyr::arrange(USUBJID, !!!rlang::syms(con_col))

if (nrow(ds_base) == 0L) {
cli::cli_inform("There are no baseline records.")
Expand Down
18 changes: 18 additions & 0 deletions man/is_iso8601.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions tests/testthat/_snaps/derive_blfl.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@

Required variables `VSORRES`, `VSSTAT`, `VSTESTCD`, and `VSDTC` are missing in `sdtm_in`

---

VSDTC column does not follow ISO8601 format, please check.

# derive_blfl dm_domain validations work

Required variables `USUBJID` and `RFXSTDTC` are missing in `dm_domain`
Expand Down
31 changes: 31 additions & 0 deletions tests/testthat/test-derive_blfl.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ test_that("derive_blfl sdmt_in validations work", {
tgt_var = "VSLOBXFL",
ref_var = "RFXSTDTC"
))

sdmt_in_no_iso <- d$sdtm_in |>
dplyr::mutate(VSDTC = "DUMMY")

expect_snapshot_error(derive_blfl(
sdtm_in = sdmt_in_no_iso,
dm_domain = d$dm,
tgt_var = "VSLOBXFL",
ref_var = "RFXSTDTC"
))
})

test_that("derive_blfl dm_domain validations work", {
Expand Down Expand Up @@ -209,3 +219,24 @@ test_that("`dtc_timepart`: basic usage", {
c(NA, "", "", "12", "12:30", "12:30:59")
)
})

test_that("`is_iso8601`: basic usage", {
dtc_var <- c(
NA,
"",
"2021-12-25",
"2021-12-25T12",
"2021-12-25T12:30",
"2021-12-25T12:30:59"
)

expect_true(is_iso8601(dtc_var))

dtc_var <- c(
NA,
"DUMMY",
"2021-12-25"
)

expect_false(is_iso8601(dtc_var))
})
Loading