From 5489f42140aab8aab78982458c493b83ee82072c Mon Sep 17 00:00:00 2001 From: Cara Andrews Date: Fri, 31 May 2024 17:03:50 +0000 Subject: [PATCH] Changes made to the documentation for read-out --- DESCRIPTION | 5 +- NAMESPACE | 2 + R/process_cut.R | 10 +- R/read_out.R | 140 ++++++++++---- inst/read-out/read_out.Rmd | 323 ++++++++++++++++++--------------- man/process_cut.Rd | 16 +- man/read_out.Rd | 90 +++++++++ tests/testthat/test-read_out.R | 2 +- 8 files changed, 390 insertions(+), 198 deletions(-) create mode 100644 man/read_out.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 8838d96c..a806b256 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,7 +17,7 @@ Encoding: UTF-8 Language: en-US LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Depends: R (>= 3.5) Imports: admiraldev (>= 0.3.0), @@ -28,7 +28,8 @@ Imports: purrr (>= 0.3.3), stringr, rlang (>= 0.4.4), - tibble + tibble, + reactable Suggests: devtools, lintr, diff --git a/NAMESPACE b/NAMESPACE index f1b8e757..c2979cdc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(impute_dcutdtc) export(impute_sdtm) export(process_cut) export(pt_cut) +export(read_out) export(special_dm_cut) importFrom(admiraldev,assert_character_scalar) importFrom(admiraldev,assert_data_frame) @@ -36,6 +37,7 @@ importFrom(magrittr,"%>%") importFrom(purrr,map) importFrom(purrr,map_lgl) importFrom(purrr,pmap) +importFrom(reactable,reactable) importFrom(rlang,"!!") importFrom(rlang,":=") importFrom(rlang,as_quosures) diff --git a/R/process_cut.R b/R/process_cut.R index 0b35c3b7..55d25713 100644 --- a/R/process_cut.R +++ b/R/process_cut.R @@ -2,8 +2,10 @@ #' #' @description Applies the selected type of datacut on each SDTMv dataset based on the chosen #' SDTMv date variable, and outputs the resulting cut datasets, as well as the datacut dataset, -#' as a list. It also provides an option to perform a "special" cut on the demography (dm) domain -#' in which any deaths occurring after the datacut date are removed. +#' as a list. It provides an option to perform a "special" cut on the demography (dm) domain +#' in which any deaths occurring after the datacut date are removed. It also provides an option +#' to produce a .html file that summarises the changes applied to the data during the cut, where +#' you can inspect the records that have been removed and/or modified. #' #' @param source_sdtm_data A list of uncut SDTMv dataframes #' @param patient_cut_v A vector of quoted SDTMv domain names in which a patient cut should be @@ -19,6 +21,10 @@ #' @param special_dm A logical input indicating whether the `special dm cut` should be performed. #' Note that, if TRUE, dm should not be included in `patient_cut_v`, `date_cut_m` or `no_cut_v` #' inputs. +#' @param read_out A logical input indicating whether a summary file for the datacut should be produced. +#' If `TRUE`, a .html file will be returned containing a summary of the cut and records removed. Default set to `FALSE`. +#' @param out_path A character vector of file save path for the summary file if `read_out = TRUE`; +#' the default corresponds to the working directory, getwd(). #' #' @return Returns a list of all input SDTMv datasets, plus the datacut dataset, after #' performing the selected datacut on each SDTMv domain. diff --git a/R/read_out.R b/R/read_out.R index 10d34b0c..7c4953a9 100644 --- a/R/read_out.R +++ b/R/read_out.R @@ -1,17 +1,25 @@ -#' @title Produce a Datacut Summary Rmarkdown +#' @title Datacut Summary File #' -#' @description +#' @description Produces a .html file summarising the changes applied to data during a data cut. +#' The file will contain an overview for the change in number of records for each dataset, the types +#' of cut applied and the opportunity to inspect the removed records. #' -#' @param dcut -#' @param patient_cut_data A list of quoted SDTMv domain names in which a patient cut has been -#' applied. To be left blank if a patient cut has not been performed on any domains. +#' @param dcut The output datacut dataset (DCUT), created via the `create_dcut()` function, containing +#' the variable DCUTDTC. +#' @param patient_cut_data A list of quoted SDTMv domain names in which a patient cut has been. +#' applied (via the `pt_cut()` function). To be left blank if a patient cut has not been performed on any domains. #' @param date_cut_data A list of quoted SDTMv domain names in which a date cut has been applied. -#' To be left blank if a date cut has not been performed on any domains. -#' @param dm_cut +#' (via the `date_cut()` function). To be left blank if a date cut has not been performed on any domains. +#' @param dm_cut The output dataset, created via the `special_dm_cut()` function, containing +#' the variables DCUT_TEMP_REMOVE and DCUT_TEMP_DTHCHANGE. #' @param no_cut_list List of of quoted SDTMv domain names in which no cut should be applied. To be #' left blank if no domains are to remain exactly as source. +#' @param out_path A character vector of file save path for the summary file; +#' the default corresponds to the working directory, getwd(). #' -#' @return Returns a .html R markdown summarising the changes made to data during a data cut. +#' @return Returns a .html file summarising the changes made to data during a datacut. +#' +#' @importFrom reactable reactable #' #' @export #' @@ -19,66 +27,118 @@ #' #' @examples #' - -# alt name summarize_cut() ?? +#' dcut <- tibble::tribble( +#' ~USUBJID, ~DCUTDTM, ~DCUTDTC, +#' "subject1", ymd_hms("2020-10-11T23:59:59"), "2020-10-11T23:59:59", +#' "subject2", ymd_hms("2020-10-11T23:59:59"), "2020-10-11T23:59:59", +#' "subject4", ymd_hms("2020-10-11T23:59:59"), "2020-10-11T23:59:59" +#' ) +#' +#' ae <- tibble::tribble( +#' ~USUBJID, ~AESEQ, ~AESTDTC, +#' "subject1", 1, "2020-01-02T00:00:00", +#' "subject1", 2, "2020-08-31T00:00:00", +#' "subject1", 3, "2020-10-10T00:00:00", +#' "subject2", 2, "2020-02-20T00:00:00", +#' "subject3", 1, "2020-03-02T00:00:00", +#' "subject4", 1, "2020-11-02T00:00:00", +#' "subject4", 2, "" +#' ) +#' +#' dm <- tibble::tribble( +#' ~USUBJID, ~DTHDTC, ~DTHFL, +#' "subject1", "2020-10-11", "Y", +#' "subject2", "2020-10-12", "Y", +#' ) +#' +#' dt_ae <- date_cut( +#' dataset_sdtm = ae, +#' sdtm_date_var = AESTDTC, +#' dataset_cut = dcut, +#' cut_var = DCUTDTM +#' ) +#' +#' pt_ae <- pt_cut( +#' dataset_sdtm = ae, +#' dataset_cut = dcut +#' ) +#' +#' dm_cut <- special_dm_cut( +#' dataset_dm = dm, +#' dataset_cut = dcut, +#' cut_var = DCUTDTM +#' ) +#' +#' read_out(dcut, patient_cut_data = list(ae = pt_ae), date_cut_data = list(ae = dt_ae), dm_cut) +#' read_out <- function(dcut = NULL, patient_cut_data = NULL, date_cut_data = NULL, dm_cut = NULL, no_cut_list = NULL, out_path = ".") { -#browser() if (!is.null(dcut)) { assert_data_frame(dcut, - required_vars = exprs(USUBJID, DCUTDTC) + required_vars = exprs(USUBJID, DCUTDTC) ) - } - if (!is.null(patient_cut_data)){ + } + if (!is.null(patient_cut_data)) { assert_that(is.list(patient_cut_data) & !is.data.frame(patient_cut_data), - msg = "patient_cut_data must be a list. \n + msg = "patient_cut_data must be a list. \n Note: If you have not used or do not with to view patient cut on any SDTMv domains, then please leave -patient_cut_data empty, in which case a default value of NULL will be used.") +patient_cut_data empty, in which case a default value of NULL will be used." + ) - for(i in seq(length(patient_cut_data))){ + for (i in seq(length(patient_cut_data))) { assert_data_frame(patient_cut_data[[i]], - required_vars = exprs(USUBJID, DCUT_TEMP_REMOVE)) + required_vars = exprs(USUBJID, DCUT_TEMP_REMOVE) + ) - assert_that(is_named(patient_cut_data[i]), - msg = "All elements patient_cut_data must be named with corresponding domain") + assert_that(admiraldev::is_named(patient_cut_data[i]), + msg = "All elements patient_cut_data must be named with corresponding domain" + ) } } - if (!is.null(date_cut_data)){ + if (!is.null(date_cut_data)) { assert_that(is.list(date_cut_data) & !is.data.frame(date_cut_data), - msg = "date_cut_data must be a list. \n + msg = "date_cut_data must be a list. \n Note: If you have not used or do not with to view date cut on any SDTMv domains, then please leave -date_cut_data empty, in which case a default value of NULL will be used.") - for(i in seq(length(date_cut_data))){ +date_cut_data empty, in which case a default value of NULL will be used." + ) + for (i in seq(length(date_cut_data))) { assert_data_frame(date_cut_data[[i]], - required_vars = exprs(USUBJID, DCUT_TEMP_REMOVE)) + required_vars = exprs(USUBJID, DCUT_TEMP_REMOVE) + ) - assert_that(is_named(date_cut_data[i]), - msg = "All elements in date_cut_data must be named with corresponding domain") + assert_that(admiraldev::is_named(date_cut_data[i]), + msg = "All elements in date_cut_data must be named with corresponding domain" + ) } } - if (!is.null(dm_cut)){ + if (!is.null(dm_cut)) { assert_data_frame(dm_cut, - required_vars = exprs(USUBJID, DCUT_TEMP_REMOVE, DCUT_TEMP_DTHCHANGE) - ) + required_vars = exprs(USUBJID, DCUT_TEMP_REMOVE, DCUT_TEMP_DTHCHANGE) + ) } - if (!is.null(no_cut_list)){ + if (!is.null(no_cut_list)) { assert_that(is.list(no_cut_list) & !is.data.frame(no_cut_list), - msg = "no_cut_list must be a list. \n + msg = "no_cut_list must be a list. \n Note: If you have not used or do not with to view the SDTMv domains where no cut has been applied, then please leave -no_cut_list empty, in which case a default value of NULL will be used.") - for(i in seq(length(no_cut_list))){ +no_cut_list empty, in which case a default value of NULL will be used." + ) + for (i in seq(length(no_cut_list))) { assert_data_frame(no_cut_list[[i]]) assert_that(is_named(no_cut_list[i]), - msg = "All elements in no_cut_list must be named with corresponding domain") - } + msg = "All elements in no_cut_list must be named with corresponding domain" + ) + } } - rmarkdown::render(paste0(system.file(package = "datacutr"), - path = "/read-out/read_out.Rmd"), - output_file = paste("datacut_", format(Sys.time(), "%d-%b-%Y_%H:%M:%S", ".html")), - output_dir = out_path) + rmarkdown::render( + paste0(system.file(package = "datacutr"), + path = "/read-out/read_out.Rmd" + ), + output_file = paste("datacut_", format(Sys.time(), "%d-%b-%Y_%H:%M:%S", ".html")), + output_dir = out_path + ) } diff --git a/inst/read-out/read_out.Rmd b/inst/read-out/read_out.Rmd index 3d0bbc0e..f5bdaa4e 100644 --- a/inst/read-out/read_out.Rmd +++ b/inst/read-out/read_out.Rmd @@ -16,200 +16,222 @@ knitr::opts_chunk$set(echo = TRUE) library(reactable) # Overall Summary of Datacut Tab#### -data_review <- function(pt_data, dt_data, dm_data, no_cut){ +data_review <- function(pt_data, dt_data, dm_data, no_cut) { cyc_num <- 1 - sum_t <- tibble(Dataset = character(), - `Cut Applied` = character(), - `Before Cut` = numeric(), - `After Cut` = numeric(), - `Removed` = numeric(), - `Modified` = numeric()) - if (is.null(pt_data) & is.null(dt_data) & is.null(dm_data) & is.null(no_cut)) { - cat("No data has been inputted into the read-out function.", ' \n', - "If you would like to view a summary of the datacut, please double check you have filled in all the parameters in the read_out() function (or process_cut() when using the wrapped approach.)", ' \n', fill = TRUE) - } + sum_t <- tibble( + Dataset = character(), + `Cut Applied` = character(), + `Before Cut` = numeric(), + `After Cut` = numeric(), + `Removed` = numeric(), + `Modified` = numeric() + ) + if (is.null(pt_data) & is.null(dt_data) & is.null(dm_data) & is.null(no_cut)) { + cat("No data has been inputted into the read-out function.", " \n", + "If you would like to view a summary of the datacut, please double check you have filled in all the parameters in the read_out() function (or process_cut() when using the wrapped approach.)", " \n", + fill = TRUE + ) + } # Patient Cut Data - for (df in pt_data){ + for (df in pt_data) { name <- names(pt_data)[cyc_num] x <- length(which(df$DCUT_TEMP_REMOVE == "Y")) uncut <- nrow(df) cut <- length(which(is.na(df$DCUT_TEMP_REMOVE))) sum_t <- add_row(sum_t, - Dataset = toupper(name), - `Cut Applied` = "PATIENT", - `Before Cut`= uncut, - `After Cut` = cut, - `Removed` = x, - `Modified` = NA) + Dataset = toupper(name), + `Cut Applied` = "PATIENT", + `Before Cut` = uncut, + `After Cut` = cut, + `Removed` = x, + `Modified` = NA + ) cyc_num <- cyc_num + 1 } # Date Cut Data cyc_num <- 1 - for (df in dt_data){ - name <- names(dt_data)[cyc_num] - x <- length(which(df$DCUT_TEMP_REMOVE == "Y")) - uncut <- nrow(df) - cut <- length(which(is.na(df$DCUT_TEMP_REMOVE))) - sum_t <- add_row(sum_t, - Dataset = toupper(name), - `Cut Applied` = "DATE", - `Before Cut`= uncut, - `After Cut` = cut, - `Removed` = x, - `Modified` = NA) - cyc_num <- cyc_num + 1 + for (df in dt_data) { + name <- names(dt_data)[cyc_num] + x <- length(which(df$DCUT_TEMP_REMOVE == "Y")) + uncut <- nrow(df) + cut <- length(which(is.na(df$DCUT_TEMP_REMOVE))) + sum_t <- add_row(sum_t, + Dataset = toupper(name), + `Cut Applied` = "DATE", + `Before Cut` = uncut, + `After Cut` = cut, + `Removed` = x, + `Modified` = NA + ) + cyc_num <- cyc_num + 1 } # Uncut Data cyc_num <- 1 - for (df in no_cut){ - name <- names(no_cut)[cyc_num] - uncut <- nrow(df) - sum_t <- add_row(sum_t, - Dataset = toupper(name), - `Cut Applied` = "NO CUT", - `Before Cut`= uncut, - `After Cut` = uncut, - `Removed` = NA, - `Modified` = NA) - cyc_num <- cyc_num + 1 + for (df in no_cut) { + name <- names(no_cut)[cyc_num] + uncut <- nrow(df) + sum_t <- add_row(sum_t, + Dataset = toupper(name), + `Cut Applied` = "NO CUT", + `Before Cut` = uncut, + `After Cut` = uncut, + `Removed` = NA, + `Modified` = NA + ) + cyc_num <- cyc_num + 1 } # DM Cut Data - if (!is.null(dm_data)){ + if (!is.null(dm_data)) { x <- length(which(dm_data$DCUT_TEMP_REMOVE == "Y")) y <- length(which(dm_data$DCUT_TEMP_DTHCHANGE == "Y")) uncut <- nrow(dm_data) cut <- length(which(is.na(dm_data$DCUT_TEMP_REMOVE))) sum_t <- add_row(sum_t, - Dataset = "DM", - `Cut Applied` = "DM", - `Before Cut`= uncut, - `After Cut` = cut, - `Removed` = x, - `Modified` = y) - } - # Custom table container - sketch = htmltools::withTags(table( - class = 'display', - thead( - tr( - th(rowspan = 2, 'Dataset'), - th(rowspan = 2, 'Cut Applied'), - th(colspan = 4, 'Total Number of Records') - ), - tr( - lapply(names(sum_t)[-c(1,2)], th) - ) - ) + Dataset = "DM", + `Cut Applied` = "DM", + `Before Cut` = uncut, + `After Cut` = cut, + `Removed` = x, + `Modified` = y + ) + } + # Custom table container + sketch <- htmltools::withTags(table( + class = "display", + thead( + tr( + th(rowspan = 2, "Dataset"), + th(rowspan = 2, "Cut Applied"), + th(colspan = 4, "Total Number of Records") + ), + tr( + lapply(names(sum_t)[-c(1, 2)], th) + ) + ) )) - reactable(sum_t, columnGroups = list( - colGroup("Total Number of Records", columns = c("Before Cut", "After Cut", "Removed", "Modified")) - ), - bordered = TRUE, filterable = TRUE, searchable = TRUE, pagination = TRUE, - showPageSizeOptions = TRUE, striped = TRUE, showSortable = TRUE) + reactable(sum_t, + columnGroups = list( + colGroup("Total Number of Records", columns = c("Before Cut", "After Cut", "Removed", "Modified")) + ), + bordered = TRUE, filterable = TRUE, searchable = TRUE, pagination = TRUE, + showPageSizeOptions = TRUE, striped = TRUE, showSortable = TRUE + ) } # Create tabs in the html #### -df_tabs <- function(data){ +df_tabs <- function(data) { cyc_num <- 1 # case when data = NULL if (is.null(data)) { return(cat("NO DATA TO VIEW")) } else { - # include modified records in special dm cut - if (exists("DCUT_TEMP_DTHCHANGE", where = data)) { - cat("####", "DM", " \n") + # include modified records in special dm cut + if (exists("DCUT_TEMP_DTHCHANGE", where = data)) { + cat("####", "DM", " \n") x <- length(which(data$DCUT_TEMP_REMOVE == "Y")) - cat(paste("Number of records removed in DM: ", x, " \n")) + cat(paste("Number of records removed in DM: ", x, " \n")) y <- length(which(data$DCUT_TEMP_DTHCHANGE == "Y")) - cat(paste("Number of records modified in DM: ", y, " \n", " \n")) + cat(paste("Number of records modified in DM: ", y, " \n", " \n")) removed <- data %>% filter(data$DCUT_TEMP_REMOVE == "Y" | data$DCUT_TEMP_DTHCHANGE == "Y") print(htmltools::tagList(reactable(removed, - rowStyle = function(index){ - row <- removed[index, ] - if (!is.na(row$DCUT_TEMP_REMOVE)){ - list(background = "#FFDFDF") - } else if (!is.na(row$DCUT_TEMP_DTHCHANGE)){ - list(background = "#DFF3FF") - } else { - NULL - } - }, - bordered = TRUE, filterable = TRUE, searchable = TRUE, pagination = TRUE, - showPageSizeOptions = TRUE, striped = TRUE, wrap = FALSE, resizable = TRUE, - showSortable = TRUE))) - - cat(' \n\n') - } else { - # table for each df in the datacut - for (df in data){ - name <- names(data)[cyc_num] - cat("####", toupper(name), " \n") + rowStyle = function(index) { + row <- removed[index, ] + if (!is.na(row$DCUT_TEMP_REMOVE)) { + list(background = "#FFDFDF") + } else if (!is.na(row$DCUT_TEMP_DTHCHANGE)) { + list(background = "#DFF3FF") + } else { + NULL + } + }, + bordered = TRUE, filterable = TRUE, searchable = TRUE, pagination = TRUE, + showPageSizeOptions = TRUE, striped = TRUE, wrap = FALSE, resizable = TRUE, + showSortable = TRUE + ))) + + cat(" \n\n") + } else { + # table for each df in the datacut + for (df in data) { + name <- names(data)[cyc_num] + cat("####", toupper(name), " \n") x <- length(which(df$DCUT_TEMP_REMOVE == "Y")) cat(paste("Number of records removed in ", toupper(name), ": ", x, " \n", " \n")) removed <- df %>% filter(df$DCUT_TEMP_REMOVE == "Y") print(htmltools::tagList(reactable(removed, - bordered = TRUE, filterable = TRUE, searchable = TRUE, pagination = TRUE, - showPageSizeOptions = TRUE, showSortable = TRUE, - striped = TRUE, wrap = FALSE, resizable = TRUE))) - cat(' \n\n') - cyc_num <- cyc_num + 1 - }} + bordered = TRUE, filterable = TRUE, searchable = TRUE, pagination = TRUE, + showPageSizeOptions = TRUE, showSortable = TRUE, + striped = TRUE, wrap = FALSE, resizable = TRUE + ))) + cat(" \n\n") + cyc_num <- cyc_num + 1 + } + } } } # Summary Tab for each cut -sum_test <- function(data){ +sum_test <- function(data) { cyc_num <- 1 cat("####", "Summary", " \n") - sum_t <- tibble(Dataset = character(), - `Before Cut` = numeric(), - `After Cut` = numeric(), - `Removed` = numeric()) - # case when data = NULL + sum_t <- tibble( + Dataset = character(), + `Before Cut` = numeric(), + `After Cut` = numeric(), + `Removed` = numeric() + ) + # case when data = NULL if (is.null(data)) { - cat("No data has been cut with this cut type.", ' \n') + cat("No data has been cut with this cut type.", " \n") cat("If you would like to apply this cut, please double check you have ran this cut / filled in all the parameters in process_cut() - for the wrapped approach - or read_out() - modular approach.") - cat(' \n\n') + cat(" \n\n") } else { - # DM-specific Summary Tab - if (exists("DCUT_TEMP_DTHCHANGE", where = data)) { - x <- length(which(data$DCUT_TEMP_REMOVE == "Y")) - y <- length(which(data$DCUT_TEMP_DTHCHANGE == "Y")) - uncut <- nrow(data) - cut <- length(which(is.na(data$DCUT_TEMP_REMOVE))) - sum_t <- tibble(Dataset = character(), - `Before Cut` = numeric(), - `After Cut` = numeric(), - Removed = numeric(), - Modified = numeric()) %>% - add_row(Dataset = "DM", `Before Cut` = uncut, `After Cut` = cut, Removed = x, Modified = y) - print(htmltools::tagList(reactable(sum_t, columnGroups = list( + # DM-specific Summary Tab + if (exists("DCUT_TEMP_DTHCHANGE", where = data)) { + x <- length(which(data$DCUT_TEMP_REMOVE == "Y")) + y <- length(which(data$DCUT_TEMP_DTHCHANGE == "Y")) + uncut <- nrow(data) + cut <- length(which(is.na(data$DCUT_TEMP_REMOVE))) + sum_t <- tibble( + Dataset = character(), + `Before Cut` = numeric(), + `After Cut` = numeric(), + Removed = numeric(), + Modified = numeric() + ) %>% + add_row(Dataset = "DM", `Before Cut` = uncut, `After Cut` = cut, Removed = x, Modified = y) + print(htmltools::tagList(reactable(sum_t, + columnGroups = list( colGroup("Total Number of Records", columns = c("Before Cut", "After Cut", "Removed", "Modified")) - ), - bordered = TRUE, filterable = TRUE, searchable = TRUE, pagination = TRUE, - showPageSizeOptions = TRUE, striped = TRUE, showSortable = TRUE))) - cat(' \n\n') - } else { + ), + bordered = TRUE, filterable = TRUE, searchable = TRUE, pagination = TRUE, + showPageSizeOptions = TRUE, striped = TRUE, showSortable = TRUE + ))) + cat(" \n\n") + } else { # Summary tabs for other cuts - for (df in data){ - name <- names(data)[cyc_num] - x <- length(which(df$DCUT_TEMP_REMOVE == "Y")) - uncut <- nrow(df) - cut <- length(which(is.na(df$DCUT_TEMP_REMOVE))) - sum_t <- add_row(sum_t, - Dataset = toupper(name), - `Before Cut` = uncut, - `After Cut` = cut, - `Removed` = x) - cat(' \n\n') - cyc_num <- cyc_num + 1 - } - reactable(sum_t, columnGroups = list( + for (df in data) { + name <- names(data)[cyc_num] + x <- length(which(df$DCUT_TEMP_REMOVE == "Y")) + uncut <- nrow(df) + cut <- length(which(is.na(df$DCUT_TEMP_REMOVE))) + sum_t <- add_row(sum_t, + Dataset = toupper(name), + `Before Cut` = uncut, + `After Cut` = cut, + `Removed` = x + ) + cat(" \n\n") + cyc_num <- cyc_num + 1 + } + reactable(sum_t, + columnGroups = list( colGroup("Total Number of Records", columns = c("Before Cut", "After Cut", "Removed")) - ), - bordered = TRUE, filterable = TRUE, searchable = TRUE, pagination = TRUE, - showPageSizeOptions = TRUE, striped = TRUE, showSortable = TRUE) - } + ), + bordered = TRUE, filterable = TRUE, searchable = TRUE, pagination = TRUE, + showPageSizeOptions = TRUE, striped = TRUE, showSortable = TRUE + ) + } } } ``` @@ -247,12 +269,13 @@ After filtering the input DS dataset (based on the given filter condition), any ### ```{r dcut, echo = FALSE} if (is.null(dcut)) { - return(cat("NO DATA TO VIEW")) - } else { - reactable(dcut, - bordered = TRUE, filterable = TRUE, searchable = TRUE, pagination = TRUE, - showPageSizeOptions = TRUE, striped = TRUE, showSortable = TRUE) - } + return(cat("NO DATA TO VIEW")) +} else { + reactable(dcut, + bordered = TRUE, filterable = TRUE, searchable = TRUE, pagination = TRUE, + showPageSizeOptions = TRUE, striped = TRUE, showSortable = TRUE + ) +} ``` ## Patient Cut diff --git a/man/process_cut.Rd b/man/process_cut.Rd index bcb9a233..5f981a76 100644 --- a/man/process_cut.Rd +++ b/man/process_cut.Rd @@ -11,7 +11,9 @@ process_cut( no_cut_v = vector(), dataset_cut, cut_var, - special_dm = TRUE + special_dm = TRUE, + read_out = FALSE, + out_path = "." ) } \arguments{ @@ -35,6 +37,12 @@ left blank if no domains are to remain exactly as source.} \item{special_dm}{A logical input indicating whether the \verb{special dm cut} should be performed. Note that, if TRUE, dm should not be included in \code{patient_cut_v}, \code{date_cut_m} or \code{no_cut_v} inputs.} + +\item{read_out}{A logical input indicating whether a summary file for the datacut should be produced. +If \code{TRUE}, a .html file will be returned containing a summary of the cut and records removed. Default set to \code{FALSE}.} + +\item{out_path}{A character vector of file save path for the summary file if \code{read_out = TRUE}; +the default corresponds to the working directory, getwd().} } \value{ Returns a list of all input SDTMv datasets, plus the datacut dataset, after @@ -43,8 +51,10 @@ performing the selected datacut on each SDTMv domain. \description{ Applies the selected type of datacut on each SDTMv dataset based on the chosen SDTMv date variable, and outputs the resulting cut datasets, as well as the datacut dataset, -as a list. It also provides an option to perform a "special" cut on the demography (dm) domain -in which any deaths occurring after the datacut date are removed. +as a list. It provides an option to perform a "special" cut on the demography (dm) domain +in which any deaths occurring after the datacut date are removed. It also provides an option +to produce a .html file that summarises the changes applied to the data during the cut, where +you can inspect the records that have been removed and/or modified. } \examples{ dcut <- data.frame( diff --git a/man/read_out.Rd b/man/read_out.Rd new file mode 100644 index 00000000..834d22e5 --- /dev/null +++ b/man/read_out.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_out.R +\name{read_out} +\alias{read_out} +\title{Datacut Summary File} +\usage{ +read_out( + dcut = NULL, + patient_cut_data = NULL, + date_cut_data = NULL, + dm_cut = NULL, + no_cut_list = NULL, + out_path = "." +) +} +\arguments{ +\item{dcut}{The output datacut dataset (DCUT), created via the \code{create_dcut()} function, containing +the variable DCUTDTC.} + +\item{patient_cut_data}{A list of quoted SDTMv domain names in which a patient cut has been. +applied (via the \code{pt_cut()} function). To be left blank if a patient cut has not been performed on any domains.} + +\item{date_cut_data}{A list of quoted SDTMv domain names in which a date cut has been applied. +(via the \code{date_cut()} function). To be left blank if a date cut has not been performed on any domains.} + +\item{dm_cut}{The output dataset, created via the \code{special_dm_cut()} function, containing +the variables DCUT_TEMP_REMOVE and DCUT_TEMP_DTHCHANGE.} + +\item{no_cut_list}{List of of quoted SDTMv domain names in which no cut should be applied. To be +left blank if no domains are to remain exactly as source.} + +\item{out_path}{A character vector of file save path for the summary file; +the default corresponds to the working directory, getwd().} +} +\value{ +Returns a .html file summarising the changes made to data during a datacut. +} +\description{ +Produces a .html file summarising the changes applied to data during a data cut. +The file will contain an overview for the change in number of records for each dataset, the types +of cut applied and the opportunity to inspect the removed records. +} +\examples{ + +dcut <- tibble::tribble( + ~USUBJID, ~DCUTDTM, ~DCUTDTC, + "subject1", ymd_hms("2020-10-11T23:59:59"), "2020-10-11T23:59:59", + "subject2", ymd_hms("2020-10-11T23:59:59"), "2020-10-11T23:59:59", + "subject4", ymd_hms("2020-10-11T23:59:59"), "2020-10-11T23:59:59" +) + +ae <- tibble::tribble( + ~USUBJID, ~AESEQ, ~AESTDTC, + "subject1", 1, "2020-01-02T00:00:00", + "subject1", 2, "2020-08-31T00:00:00", + "subject1", 3, "2020-10-10T00:00:00", + "subject2", 2, "2020-02-20T00:00:00", + "subject3", 1, "2020-03-02T00:00:00", + "subject4", 1, "2020-11-02T00:00:00", + "subject4", 2, "" +) + +dm <- tibble::tribble( + ~USUBJID, ~DTHDTC, ~DTHFL, + "subject1", "2020-10-11", "Y", + "subject2", "2020-10-12", "Y", +) + +dt_ae <- date_cut( + dataset_sdtm = ae, + sdtm_date_var = AESTDTC, + dataset_cut = dcut, + cut_var = DCUTDTM +) + +pt_ae <- pt_cut( + dataset_sdtm = ae, + dataset_cut = dcut +) + +dm_cut <- special_dm_cut( + dataset_dm = dm, + dataset_cut = dcut, + cut_var = DCUTDTM +) + +read_out(dcut, patient_cut_data = list(ae = pt_ae), date_cut_data = list(ae = dt_ae), dm_cut) + +} +\keyword{derive} diff --git a/tests/testthat/test-read_out.R b/tests/testthat/test-read_out.R index 3957d746..281e0f21 100644 --- a/tests/testthat/test-read_out.R +++ b/tests/testthat/test-read_out.R @@ -191,7 +191,7 @@ test_that("Test that read_out() errors when data frames in date_cut_data are unn ## dm_cut ---- # Test that read_out() errors when dm_cut data frame does not contain the vars DCUT_TEMP_REMOVE & DCUT_TEMP_DTHCHANG -test_that("Test that read_out() errors when dm_cut data frame does not contain the vars DCUT_TEMP_REMOVE & DCUT_TEMP_DTHCHANG", { +test_that("Test that read_out() errors when dm_cut data frame does not contain the vars DCUT_TEMP_REMOVE & DCUT_TEMP_DTHCHANGE", { expect_error(read_out(dcut = dcut, patient_cut_data = pt_cut_data, date_cut_data = dt_cut_data,