diff --git a/R/bootstrap-model.R b/R/bootstrap-model.R index 51664cac..60e4cca2 100644 --- a/R/bootstrap-model.R +++ b/R/bootstrap-model.R @@ -6,14 +6,13 @@ #' `remove_tables` arguments). The object returned from this must then be passed #' to [setup_bootstrap_run()] before submission (see examples). #' -#' @param .mod a `bbr` model object -#' @param .suffix a suffix for the bootstrap run directory. Will be prefixed by +#' @param .mod A `bbr` model object. +#' @param .suffix A suffix for the bootstrap run directory. Will be prefixed by #' the model id of `.mod`. #' @inheritParams copy_model_from #' @param .inherit_tags If `TRUE`, the default, inherit any tags from `.mod`. -#' @param remove_cov,remove_tables Logical (T/F). Optionally remove `$COVARIANCE` +#' @param remove_cov,remove_tables If `TRUE`, the default, remove `$COVARIANCE` #' and `$TABLE` records respectively, allowing for notably faster run times. -#' Both default to `TRUE`. #' #' @seealso [setup_bootstrap_run()] [summarize_bootstrap_run()] #' @examples @@ -81,21 +80,21 @@ new_bootstrap_run <- function( #' objects, and the new datasets are sampled from the dataset defined in its #' `$DATA` record (i.e. `get_data_path(.boot_run)`). #' -#' @param .boot_run a `bbi_nmboot_model` object. -#' @param n number of model runs. -#' @param strat_cols columns to maintain proportion for stratification -#' @param seed a seed for sampling the data. Set to `NULL` to avoid setting. +#' @param .boot_run A `bbi_nmboot_model` object. +#' @param n Number of data sets and model runs to generate. +#' @param strat_cols Columns to maintain proportion for stratification +#' @param seed A numeric seed to set prior to resampling the data; use `NULL` to +#' avoid setting a seed. #' @param data A dataset to resample from. Defaults to `NULL`, which will use -#' the output from `nm_join(.mod)`. If provided, must include the same column -#' names as what's returned from `nm_data(.mod)`. If the default is used, note -#' that a suitable `.join_col` must be provided. -#' @param .join_col Character column name to use to join table files. Not used -#' if `data` is specified. Passed to [nm_join()], and used to create the -#' initial dataset that gets re-sampled `n` times. The purpose of joining the -#' input data to table files is to filter the population to only the subjects -#' that actually made it into the model. See the `Details` section in -#' [nm_join()] for more information. -#' @param .overwrite logical (T/F) indicating whether or not to overwrite +#' the _filtered_ output from `nm_data(.mod, filter = TRUE)`. If provided, +#' must include the same column names as what's returned from `nm_data(.mod)`. +#' @param .bbi_args Named list passed to `model_summary(orig_mod, .bbi_args)`, +#' where `orig_mod` is the model `.boot_run` is based on. See +#' [print_bbi_args()] for valid options. Defaults to `list(no_grd_file = TRUE, +#' no_shk_file = TRUE)` because [model_summary()] is only called internally to +#' extract the number of records, so those files are irrelevant. Only used if +#' the based on model (the model being bootstrapped) has been executed. +#' @param .overwrite Logical (T/F) indicating whether or not to overwrite #' existing setup for a bootstrap run. #' #' @details @@ -138,7 +137,10 @@ setup_bootstrap_run <- function( strat_cols = NULL, seed = 1234, data = NULL, - .join_col = "NUM", + .bbi_args = list( + no_grd_file = TRUE, + no_shk_file = TRUE + ), .overwrite = FALSE ){ check_model_object(.boot_run, NMBOOT_MOD_CLASS) @@ -172,29 +174,6 @@ setup_bootstrap_run <- function( writeLines(ignore_lines, file.path(boot_dir, ".gitignore")) if(is.null(data)){ - # Only include subjects that entered the original problem by default - can_be_joined <- can_be_nm_joined(orig_mod, .join_col = .join_col) - if(isTRUE(can_be_joined)){ - starting_data <- nm_join(orig_mod, .join_col = .join_col) %>% - suppressMessages() - - # select only columns from original data set - starting_data <- starting_data %>% - dplyr::select(attr(starting_data, "nm_join_origin")$data) - - if ("DV.DATA" %in% names(starting_data)) { - starting_data <- dplyr::rename(starting_data, "DV" = "DV.DATA") - } - }else{ - rlang::inform( - paste( - "Defaulting to input data, which may include data that doesn't enter", - "the final problem (i.e. ignored subjects)" - ) - ) - starting_data <- nm_data(orig_mod) %>% suppressMessages() - } - # Overwrite data path in control stream # - This is not necessary in most cases, but is if overwriting a previous # run where a starting dataset was provided. The data path must then @@ -204,6 +183,55 @@ setup_bootstrap_run <- function( data_path_rel <- get_data_path_from_ctl(orig_mod, normalize = FALSE) modify_data_path_ctl(.boot_run, data_path_rel) } + + # Only include subjects that entered the original problem by default + starting_data <- tryCatch({ + nm_data(.boot_run, filter = TRUE) %>% suppressMessages() + }, error = function(cond){ + fs::dir_delete(boot_dir) + # If IGNORE/ACCEPT expressions cant be turned into dplyr expressions + cli::cli_div(theme = list(span.code = list(color = "blue"))) + cli::cli_abort( + c( + cond$message, + "i" = "Please check your control stream or provide a starting dataset ({.var data} arg)", + "i" = "You may try {.code setup_bootstrap_run(.boot_run, data = nm_join(mod))}" + ) + ) + }) + + # If model has finished, check the number of records to ensure the filtering + # was done correctly + if(check_nonmem_finished(orig_mod)){ + .s <- model_summary(orig_mod, .bbi_args = .bbi_args) + nrec <- .s$run_details$number_of_data_records + nrec_f <- nrow(starting_data) + if(nrec != nrec_f){ + fs::dir_delete(boot_dir) + cli::cli_div(theme = list(.code = list(color = "blue"), .val = list(color = "red3"))) + cli::cli_abort( + c( + "!" = "The filtered dataset does not have the same number of records as the original model:", + "*" = "{.code nm_data(orig_mod, filter = TRUE)} returned {.val {nrec_f}} records", + "*" = "{.code model_summary(orig_mod)} returned {.val {nrec}} records", + "i" = "where {.code orig_mod <- read_model(get_based_on(.boot_run))}", + "i" = "Try providing a starting dataset (e.g., {.code setup_bootstrap_run(.boot_run, data = nm_join(orig_mod))})" + ) + ) + } + }else{ + orig_mod_name <- fs::path_rel( + get_model_path(orig_mod), + get_model_working_directory(orig_mod) + ) + cli::cli_div(theme = list(.code = list(color = "blue"), .val = list(color = "red3"))) + cli::cli_warn( + c( + "The parent model ({.code {orig_mod_name}}) has not been submitted", + "i" = "Consider executing {.code {orig_mod_name}} to perform additional checks" + ) + ) + } }else{ checkmate::assert_data_frame(data) # Get input columns from dataset referenced in based_on model @@ -288,8 +316,8 @@ setup_bootstrap_run <- function( #' Set up a single bootstrap model run #' -#' @param mod_path absolute model path (no file extension) of a bootstrap model run. -#' @param boot_args list of parameters needed to create a bootstrap model run. +#' @param mod_path Absolute model path (no file extension) of a bootstrap model run. +#' @param boot_args List of parameters needed to create a bootstrap model run. #' #' @keywords internal make_boot_run <- function(mod_path, boot_args){ @@ -358,7 +386,7 @@ make_boot_run <- function(mod_path, boot_args){ #' Store bootstrap run details before submission #' -#' @param boot_models list of boostrap model objects created by `make_boot_run()`. +#' @param boot_models List of boostrap model objects created by `make_boot_run()`. #' @inheritParams make_boot_run #' #' @details @@ -425,7 +453,7 @@ make_boot_spec <- function(boot_models, boot_args){ #' bootstrap run directory. #' #' @inheritParams setup_bootstrap_run -#' @param force_resummarize logical (T/F). If `TRUE`, force re-summarization. +#' @param force_resummarize Logical (T/F). If `TRUE`, force re-summarization. #' Will _only_ update the saved out `RDS` file when specified via #' `summarize_bootstrap_run()`. See details for more information. #' @@ -621,7 +649,7 @@ summarize_bootstrap_run <- function( #' @describeIn summarize_bootstrap Tabulate parameter estimates for each model #' submission in a bootstrap run -#' @param format_long logical (T/F). If `TRUE`, format data as a long table, +#' @param format_long Logical (T/F). If `TRUE`, format data as a long table, #' making the data more portable for plotting. #' @export bootstrap_estimates <- function( diff --git a/R/filter-nm-data.R b/R/filter-nm-data.R new file mode 100644 index 00000000..04c9081f --- /dev/null +++ b/R/filter-nm-data.R @@ -0,0 +1,258 @@ +#' Helper for reading in and parsing the `$DATA` record. +#' +#' @note This cannot be used for _modifying_ a `$DATA` record. +#' @noRd +#' @keywords internal +read_data_record <- function(.mod){ + data_recs <- get_records(.mod, "data") + n_data <- length(data_recs) + if(n_data !=1){ + recs_fmt <- purrr::map_chr(data_recs, function(rec) rec$format()) + rlang::abort( + c( + glue::glue("Expected a single data record, but found {n_data}:\n\n"), + recs_fmt + ) + ) + } + data_recs[[1]]$parse() + return(data_recs[[1]]) +} + + +#' Extract IGNORE or ACCEPT options from a data record +#' @inheritParams filter_nm_data +#' @noRd +get_data_filter_exprs <- function(.mod){ + + data_rec <- read_data_record(.mod) + + # Extract & format IGNORE/ACCEPT options + ignore_opts <- purrr::keep(data_rec$values, function(val){ + inherits(val, "nmrec_option_value") && identical(val[["name"]], "ignore") + }) + + accept_opts <- purrr::keep(data_rec$values, function(val){ + inherits(val, "nmrec_option_value") && identical(val[["name"]], "accept") + }) + + has_ignore <- !rlang::is_empty(ignore_opts) + has_accept <- !rlang::is_empty(accept_opts) + + if(has_ignore && has_accept){ + # Identical to NMTRAN error message if both are used + rlang::abort("ACCEPT list and IGNORE list may not both be used") + }else{ + type <- dplyr::case_when( + has_ignore ~ "ignore", + has_accept ~ "accept", + # NA type will escape filtering + TRUE ~ NA_character_ + ) + } + + # Pull out filters (remove parentheses and any quoting) + # - can concatenate options since one or both are assumed to be empty + exprs <- purrr::map_chr(c(ignore_opts, accept_opts), function(val){ + gsub("\\(|\\)", "", unquote_filename(val$value)) + }) + + # Separate any (list) type expressions + exprs <- unlist(stringr::str_split(exprs, ",")) %>% stringr::str_trim() + + return( + list( + type = type, + exprs = exprs + ) + ) +} + +#' Function to translate `NONMEM` operators to `R` operators +#' @param expr String. A `NONMEM` ignore/accept expression +#' @note `.EQN.` and `.NEN.` are available after `NONMEM 7.3` +#' @return A [dplyr::filter()] expression +#' +#' @examples +#' \dontrun{ +#' +#' translate_nm_operator(c("AGE.NE.30", "ID.EQ.2", "WT/=70")) +#' #> [1] "AGE!=30" "ID==2" "WT!=70" +#' } +#' @keywords internal +#' @seealso [invert_operator()], [translate_nm_expr()] +translate_nm_operator <- function(expr) { + # Check for unsupported operators + bad_ops <- c(".OR.",".AND", ".NOT.") + bad_ops_pat <- paste(bad_ops, collapse = "|") + if(any(grepl(bad_ops_pat, expr))){ + cli::cli_abort( + c( + "The following logical operators are not supported {.var {bad_ops}}", + "i" = "See NONMEM documentation for more details" + ) + ) + } + + # Equal + expr <- gsub(".EQ.", "==", expr, fixed = TRUE) + expr <- gsub(".EQN.", "==", expr, fixed = TRUE) + # Not equal + expr <- gsub(".NE.", "!=", expr, fixed = TRUE) + expr <- gsub(".NEN.", "!=", expr, fixed = TRUE) + expr <- gsub("/=", "!=", expr, fixed = TRUE) + # Less than | Less than or equal to + expr <- gsub(".LT.", "<", expr, fixed = TRUE) + expr <- gsub(".LE.", "<=", expr, fixed = TRUE) + # Greater than | Greater than or equal to + expr <- gsub(".GT.", ">", expr, fixed = TRUE) + expr <- gsub(".GE.", ">=", expr, fixed = TRUE) + + # Handle single `=` only when it's not part of `==`, `!=`, `<=`, `>=` + expr <- gsub("(?<=[^=!<>])=(?=[^=!<>])", "==", expr, perl = TRUE) + return(expr) +} + +#' Function to invert `R` operators in filter expressions +#' @param expr A [dplyr::filter()] expression +#' @return the inverted expression +#' +#' @examples +#' \dontrun{ +#' +#' invert_operator(c('A==2', 'B >= 4')) +#' #> [1] "A!=2" "B <= 4" +#' } +#' @keywords internal +#' @seealso [translate_nm_operator()], [translate_nm_expr()] +invert_operator <- function(expr) { + expr <- dplyr::case_when( + grepl("==", expr, fixed = TRUE) ~ gsub("==", "!=", expr, fixed = TRUE), + grepl("!=", expr, fixed = TRUE) ~ gsub("!=", "==", expr, fixed = TRUE), + grepl("<=", expr, fixed = TRUE) ~ gsub("<=", ">=", expr, fixed = TRUE), + grepl(">=", expr, fixed = TRUE) ~ gsub(">=", "<=", expr, fixed = TRUE), + grepl("<", expr, fixed = TRUE) ~ gsub("<", ">", expr, fixed = TRUE), + grepl(">", expr, fixed = TRUE) ~ gsub(">", "<", expr, fixed = TRUE), + TRUE ~ expr + ) + return(expr) +} + +#' Translate `NONMEM` `IGNORE` and `ACCEPT` expressions into [dplyr::filter()] +#' expressions. +#' +#' @param nm_expr A `NONMEM` filter expression. e.g., `'ID.EQ.2, BLQ=1'`. +#' @param type Either `'ignore'` or `'accept'`. Denotes which type of `NONMEM` +#' filtering the expression corresponds to. +#' @param data_cols Column names associated with the input data. Used for +#' `'ignore'` expressions. +#' +#' @examples +#' \dontrun{ +#' +#' test_exprs <- c("SEX==1", "ID.EQ.2", "WT/=70", "AGE.NE.30", "A=1", "WT.GT.40") +#' +#' translate_nm_expr(test_exprs, type = 'ignore') +#' +#' translate_nm_expr(test_exprs, type = 'accept') +#' +#' +#' # Use of `@`, `#`, or form `IGNORE=C2` require `data_cols` to be specified, +#' # though only the first column is used +#' data_cols <- c("C", "ID", "TIME", "EVID", "DV", "BLQ") +#' +#' translate_nm_expr("#", data_cols = data_cols) +#' +#' translate_nm_expr("c2", data_cols = data_cols) +#' +#' translate_nm_expr("@", data_cols = data_cols) +#' +#' } +#' @keywords internal +#' @seealso [translate_nm_operator()], [invert_operator()] +translate_nm_expr <- function( + nm_expr, + type = c("ignore", "accept"), + data_cols = NULL +){ + type <- match.arg(type) + checkmate::assert_character(data_cols, min.len = 1, null.ok = TRUE) + + # Translate NM operators + exprs <- translate_nm_operator(nm_expr) + + r_exprs <- purrr::map_chr(exprs, function(expr){ + if(type == "ignore"){ + # `IGNORE=#`, `IGNORE=@`, `IGNORE=c1`, `IGNORE=(list)` + if(expr == "#"){ + # IGNORE=# is the default. That is, in the absence of IGNORE option, any + # record whose first character is '#' is treated as a comment record. + paste0("!grepl('^#', ", data_cols[1], ")") + }else if(expr == "@"){ + # IGNORE=@ signifies that any data record having an alphabetic character + # or `@` as its first non-blank character in column one should be ignored. + # - This permits a table file having header lines to be used as an + # NM-TRAN data set. + # - add extra `\\` for later parse() + paste0("!grepl('^\\\\s*[A-Za-z@]', ", data_cols[1], ")") + }else if(grepl('^[a-zA-Z]$', expr)){ + # This is for `IGNORE=C` columns. Meaning ignore rows if the _first_ column + # contains 'C' (this form always points to the _first_ column) + # - the above regex looks for characters of length>=1, and no symbols + paste0(data_cols[1], "!=", "'", expr, "'") + }else{ + # Invert list form expressions + invert_operator(expr) + } + }else{ + # ACCEPT option only supports `ACCEPT=(list)` form --> no formatting needed + expr + } + }) + + return(r_exprs) +} + +#' Filter `NONMEM` input data based on `IGNORE` and `ACCEPT` record options +#' +#' @param .mod A `bbi_nonmem_model` object +#' @param data A starting dataset +#' @keywords internal +filter_nm_data <- function(.mod, data = nm_data(.mod)){ + + # Extract & format IGNORE/ACCEPT options into expressions + nm_exprs <- get_data_filter_exprs(.mod) + + # Return starting data if no IGNORE/ACCEPT options are found + if(is.na(nm_exprs$type)){ + attr(data, "n_records_dropped") <- 0 + return(data) + } + + # Translate NONMEM syntax into `dplyr::filter` logic + r_filters <- translate_nm_expr( + nm_expr = nm_exprs$exprs, type = nm_exprs$type, data_cols = names(data) + ) + + # Create the final dplyr::filter expression + filter_expression <- paste(r_filters, collapse = " & ") + + # Apply filters + filtered_data <- tryCatch({ + data %>% dplyr::filter(eval(parse(text = filter_expression))) + }, error = function(cond){ + cli::cli_abort( + c( + "ignore/accept list could not be converted to filters", + "i" = "The following errors occurred:", + "x" = cond$parent$message + ) + ) + }) + + perc_retained <- round(100*(nrow(filtered_data)/nrow(data)), 2) + attr(filtered_data, "perc_retained") <- perc_retained + attr(filtered_data, "n_records_dropped") <- nrow(data) - nrow(filtered_data) + + return(filtered_data) +} diff --git a/R/nm-file.R b/R/nm-file.R index 45c0b586..bf9d3913 100644 --- a/R/nm-file.R +++ b/R/nm-file.R @@ -49,8 +49,9 @@ nm_file.character <- function(.mod, .suffix = NULL, ...) { #' @describeIn nm_file Reads `.grd` file from a `bbi_nonmem_model` or #' `bbi_nonmem_summary` object -#' @param .rename If `TRUE`, the default, will rename `.grd` columns to the -#' relevant parameter names. Otherwise will leave column names as is. +#' @param .rename Logical (`T`/`F`). If `TRUE`, the default, will rename `.grd` +#' columns to the relevant parameter names. Otherwise will leave column names +#' as is. #' @export nm_grd <- function(.mod, .rename = TRUE) { check_model_object(.mod, c(NM_MOD_CLASS, NM_SUM_CLASS)) @@ -87,20 +88,32 @@ nm_par_tab <- function(.mod) { #' @describeIn nm_file Reads the input data file from a `bbi_nonmem_model` or #' `bbi_nonmem_summary` object +#' @param filter Logical (`T`/`F`). If `TRUE`, filter data based on `IGNORE LIST` +#' or `ACCEPT LIST` options defined in the `$DATA` record. #' @importFrom data.table fread #' @importFrom tibble as_tibble #' @export -nm_data <- function(.mod) { - check_model_object(.mod, c(NM_MOD_CLASS, NM_SUM_CLASS, NMSIM_MOD_CLASS)) +nm_data <- function(.mod, filter = FALSE) { + check_model_object(.mod, c(NM_MOD_CLASS, NM_SUM_CLASS, NMSIM_MOD_CLASS, NMBOOT_MOD_CLASS)) .path <- get_data_path(.mod) verbose_msg(glue("Reading data file: {basename(.path)}")) .d <- fread(.path, na.strings = ".", verbose = FALSE) .d <- remove_dup_cols(.d) .d <- as_tibble(.d) + names(.d) <- toupper(names(.d)) + + if(isTRUE(filter)){ + .d <- filter_nm_data(.mod, data = .d) + perc_retained <- attributes(.d)$perc_retained + msg <- paste(" Filtered:", cli::col_blue(perc_retained, "%"), "of records retained") + verbose_msg(msg) + } + verbose_msg(glue(" rows: {nrow(.d)}")) verbose_msg(glue(" cols: {ncol(.d)}")) verbose_msg("") # for newline + return(.d) } diff --git a/man/cleanup_bootstrap_run.Rd b/man/cleanup_bootstrap_run.Rd index 6f82dcf7..fbddc794 100644 --- a/man/cleanup_bootstrap_run.Rd +++ b/man/cleanup_bootstrap_run.Rd @@ -7,7 +7,7 @@ cleanup_bootstrap_run(.boot_run, .force = FALSE) } \arguments{ -\item{.boot_run}{a \code{bbi_nmboot_model} object.} +\item{.boot_run}{A \code{bbi_nmboot_model} object.} \item{.force}{logical (T/F). If \code{TRUE}, do not prompt the user if they want to delete the models.} } diff --git a/man/filter_nm_data.Rd b/man/filter_nm_data.Rd new file mode 100644 index 00000000..e432acd0 --- /dev/null +++ b/man/filter_nm_data.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter-nm-data.R +\name{filter_nm_data} +\alias{filter_nm_data} +\title{Filter \code{NONMEM} input data based on \code{IGNORE} and \code{ACCEPT} record options} +\usage{ +filter_nm_data(.mod, data = nm_data(.mod)) +} +\arguments{ +\item{.mod}{A \code{bbi_nonmem_model} object} + +\item{data}{A starting dataset} +} +\description{ +Filter \code{NONMEM} input data based on \code{IGNORE} and \code{ACCEPT} record options +} +\keyword{internal} diff --git a/man/invert_operator.Rd b/man/invert_operator.Rd new file mode 100644 index 00000000..0c8cb4fa --- /dev/null +++ b/man/invert_operator.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter-nm-data.R +\name{invert_operator} +\alias{invert_operator} +\title{Function to invert \code{R} operators in filter expressions} +\usage{ +invert_operator(expr) +} +\arguments{ +\item{expr}{A \code{\link[dplyr:filter]{dplyr::filter()}} expression} +} +\value{ +the inverted expression +} +\description{ +Function to invert \code{R} operators in filter expressions +} +\examples{ +\dontrun{ + +invert_operator(c('A==2', 'B >= 4')) +#> [1] "A!=2" "B <= 4" +} +} +\seealso{ +\code{\link[=translate_nm_operator]{translate_nm_operator()}}, \code{\link[=translate_nm_expr]{translate_nm_expr()}} +} +\keyword{internal} diff --git a/man/make_boot_run.Rd b/man/make_boot_run.Rd index 7334fbe6..61221860 100644 --- a/man/make_boot_run.Rd +++ b/man/make_boot_run.Rd @@ -7,9 +7,9 @@ make_boot_run(mod_path, boot_args) } \arguments{ -\item{mod_path}{absolute model path (no file extension) of a bootstrap model run.} +\item{mod_path}{Absolute model path (no file extension) of a bootstrap model run.} -\item{boot_args}{list of parameters needed to create a bootstrap model run.} +\item{boot_args}{List of parameters needed to create a bootstrap model run.} } \description{ Set up a single bootstrap model run diff --git a/man/make_boot_spec.Rd b/man/make_boot_spec.Rd index d38986a0..ee3a5628 100644 --- a/man/make_boot_spec.Rd +++ b/man/make_boot_spec.Rd @@ -7,9 +7,9 @@ make_boot_spec(boot_models, boot_args) } \arguments{ -\item{boot_models}{list of boostrap model objects created by \code{make_boot_run()}.} +\item{boot_models}{List of boostrap model objects created by \code{make_boot_run()}.} -\item{boot_args}{list of parameters needed to create a bootstrap model run.} +\item{boot_args}{List of parameters needed to create a bootstrap model run.} } \description{ Store bootstrap run details before submission diff --git a/man/new_bootstrap_run.Rd b/man/new_bootstrap_run.Rd index 59b70fa1..909b11b9 100644 --- a/man/new_bootstrap_run.Rd +++ b/man/new_bootstrap_run.Rd @@ -14,9 +14,9 @@ new_bootstrap_run( ) } \arguments{ -\item{.mod}{a \code{bbr} model object} +\item{.mod}{A \code{bbr} model object.} -\item{.suffix}{a suffix for the bootstrap run directory. Will be prefixed by +\item{.suffix}{A suffix for the bootstrap run directory. Will be prefixed by the model id of \code{.mod}.} \item{.inherit_tags}{If \code{TRUE}, the default, inherit any tags from \code{.mod}.} @@ -25,9 +25,8 @@ the model id of \code{.mod}.} file already exists at specified \code{.new_model} path. If \code{TRUE} any existing file at \code{.new_model} will be overwritten silently.} -\item{remove_cov, remove_tables}{Logical (T/F). Optionally remove \verb{$COVARIANCE} -and \verb{$TABLE} records respectively, allowing for notably faster run times. -Both default to \code{TRUE}.} +\item{remove_cov, remove_tables}{If \code{TRUE}, the default, remove \verb{$COVARIANCE} +and \verb{$TABLE} records respectively, allowing for notably faster run times.} } \value{ S3 object of class \code{bbi_nmboot_model}. diff --git a/man/nm_file.Rd b/man/nm_file.Rd index 85eddcda..f72f508c 100644 --- a/man/nm_file.Rd +++ b/man/nm_file.Rd @@ -16,7 +16,7 @@ nm_tab(.mod) nm_par_tab(.mod) -nm_data(.mod) +nm_data(.mod, filter = FALSE) } \arguments{ \item{.mod}{Either a \code{bbi_nonmem_model}, \code{bbi_nonmem_summary}, or a path to a @@ -29,8 +29,12 @@ See examples.} \item{...}{arguments passed through to methods. (Currently none.)} -\item{.rename}{If \code{TRUE}, the default, will rename \code{.grd} columns to the -relevant parameter names. Otherwise will leave column names as is.} +\item{.rename}{Logical (\code{T}/\code{F}). If \code{TRUE}, the default, will rename \code{.grd} +columns to the relevant parameter names. Otherwise will leave column names +as is.} + +\item{filter}{Logical (\code{T}/\code{F}). If \code{TRUE}, filter data based on \verb{IGNORE LIST} +or \verb{ACCEPT LIST} options defined in the \verb{$DATA} record.} } \value{ A tibble with the data from the specified file and estimation method. diff --git a/man/setup_bootstrap_run.Rd b/man/setup_bootstrap_run.Rd index 45eb3f3a..0537c591 100644 --- a/man/setup_bootstrap_run.Rd +++ b/man/setup_bootstrap_run.Rd @@ -10,32 +10,31 @@ setup_bootstrap_run( strat_cols = NULL, seed = 1234, data = NULL, - .join_col = "NUM", + .bbi_args = list(no_grd_file = TRUE, no_shk_file = TRUE), .overwrite = FALSE ) } \arguments{ -\item{.boot_run}{a \code{bbi_nmboot_model} object.} +\item{.boot_run}{A \code{bbi_nmboot_model} object.} -\item{n}{number of model runs.} +\item{n}{Number of data sets and model runs to generate.} -\item{strat_cols}{columns to maintain proportion for stratification} +\item{strat_cols}{Columns to maintain proportion for stratification} -\item{seed}{a seed for sampling the data. Set to \code{NULL} to avoid setting.} +\item{seed}{A numeric seed to set prior to resampling the data; use \code{NULL} to +avoid setting a seed.} \item{data}{A dataset to resample from. Defaults to \code{NULL}, which will use -the output from \code{nm_join(.mod)}. If provided, must include the same column -names as what's returned from \code{nm_data(.mod)}. If the default is used, note -that a suitable \code{.join_col} must be provided.} +the \emph{filtered} output from \code{nm_data(.mod, filter = TRUE)}. If provided, +must include the same column names as what's returned from \code{nm_data(.mod)}.} -\item{.join_col}{Character column name to use to join table files. Not used -if \code{data} is specified. Passed to \code{\link[=nm_join]{nm_join()}}, and used to create the -initial dataset that gets re-sampled \code{n} times. The purpose of joining the -input data to table files is to filter the population to only the subjects -that actually made it into the model. See the \code{Details} section in -\code{\link[=nm_join]{nm_join()}} for more information.} +\item{.bbi_args}{Named list passed to \code{model_summary(orig_mod, .bbi_args)}, +where \code{orig_mod} is the model \code{.boot_run} is based on. See +\code{\link[=print_bbi_args]{print_bbi_args()}} for valid options. Defaults to \code{list(no_grd_file = TRUE, no_shk_file = TRUE)} because \code{\link[=model_summary]{model_summary()}} is only called internally to +extract the number of records, so those files are irrelevant. Only used if +the based on model (the model being bootstrapped) has been executed.} -\item{.overwrite}{logical (T/F) indicating whether or not to overwrite +\item{.overwrite}{Logical (T/F) indicating whether or not to overwrite existing setup for a bootstrap run.} } \description{ diff --git a/man/summarize_bootstrap.Rd b/man/summarize_bootstrap.Rd index dd6b8717..cecdb40a 100644 --- a/man/summarize_bootstrap.Rd +++ b/man/summarize_bootstrap.Rd @@ -14,13 +14,13 @@ bootstrap_estimates(.boot_run, format_long = FALSE, force_resummarize = FALSE) get_boot_models(.boot_run) } \arguments{ -\item{.boot_run}{a \code{bbi_nmboot_model} object.} +\item{.boot_run}{A \code{bbi_nmboot_model} object.} -\item{force_resummarize}{logical (T/F). If \code{TRUE}, force re-summarization. +\item{force_resummarize}{Logical (T/F). If \code{TRUE}, force re-summarization. Will \emph{only} update the saved out \code{RDS} file when specified via \code{summarize_bootstrap_run()}. See details for more information.} -\item{format_long}{logical (T/F). If \code{TRUE}, format data as a long table, +\item{format_long}{Logical (T/F). If \code{TRUE}, format data as a long table, making the data more portable for plotting.} } \description{ diff --git a/man/translate_nm_expr.Rd b/man/translate_nm_expr.Rd new file mode 100644 index 00000000..250f070a --- /dev/null +++ b/man/translate_nm_expr.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter-nm-data.R +\name{translate_nm_expr} +\alias{translate_nm_expr} +\title{Translate \code{NONMEM} \code{IGNORE} and \code{ACCEPT} expressions into \code{\link[dplyr:filter]{dplyr::filter()}} +expressions.} +\usage{ +translate_nm_expr(nm_expr, type = c("ignore", "accept"), data_cols = NULL) +} +\arguments{ +\item{nm_expr}{A \code{NONMEM} filter expression. e.g., \code{'ID.EQ.2, BLQ=1'}.} + +\item{type}{Either \code{'ignore'} or \code{'accept'}. Denotes which type of \code{NONMEM} +filtering the expression corresponds to.} + +\item{data_cols}{Column names associated with the input data. Used for +\code{'ignore'} expressions.} +} +\description{ +Translate \code{NONMEM} \code{IGNORE} and \code{ACCEPT} expressions into \code{\link[dplyr:filter]{dplyr::filter()}} +expressions. +} +\examples{ +\dontrun{ + +test_exprs <- c("SEX==1", "ID.EQ.2", "WT/=70", "AGE.NE.30", "A=1", "WT.GT.40") + +translate_nm_expr(test_exprs, type = 'ignore') + +translate_nm_expr(test_exprs, type = 'accept') + + +# Use of `@`, `#`, or form `IGNORE=C2` require `data_cols` to be specified, +# though only the first column is used +data_cols <- c("C", "ID", "TIME", "EVID", "DV", "BLQ") + +translate_nm_expr("#", data_cols = data_cols) + +translate_nm_expr("c2", data_cols = data_cols) + +translate_nm_expr("@", data_cols = data_cols) + +} +} +\seealso{ +\code{\link[=translate_nm_operator]{translate_nm_operator()}}, \code{\link[=invert_operator]{invert_operator()}} +} +\keyword{internal} diff --git a/man/translate_nm_operator.Rd b/man/translate_nm_operator.Rd new file mode 100644 index 00000000..0f6f6e24 --- /dev/null +++ b/man/translate_nm_operator.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter-nm-data.R +\name{translate_nm_operator} +\alias{translate_nm_operator} +\title{Function to translate \code{NONMEM} operators to \code{R} operators} +\usage{ +translate_nm_operator(expr) +} +\arguments{ +\item{expr}{String. A \code{NONMEM} ignore/accept expression} +} +\value{ +A \code{\link[dplyr:filter]{dplyr::filter()}} expression +} +\description{ +Function to translate \code{NONMEM} operators to \code{R} operators +} +\note{ +\code{.EQN.} and \code{.NEN.} are available after \verb{NONMEM 7.3} +} +\examples{ +\dontrun{ + +translate_nm_operator(c("AGE.NE.30", "ID.EQ.2", "WT/=70")) +#> [1] "AGE!=30" "ID==2" "WT!=70" +} +} +\seealso{ +\code{\link[=invert_operator]{invert_operator()}}, \code{\link[=translate_nm_expr]{translate_nm_expr()}} +} +\keyword{internal} diff --git a/tests/testthat/test-filter-nm-data.R b/tests/testthat/test-filter-nm-data.R new file mode 100644 index 00000000..8eae99b9 --- /dev/null +++ b/tests/testthat/test-filter-nm-data.R @@ -0,0 +1,161 @@ +test_that("translate_nm_operator translates NONMEM operators", { + # label.EQN.value and label.NEN.value are supported after NONMEM 7.3 + nm_r_translations <- list( + equal = c("A.EQ.1", "A.EQN.1", "A==1", "A=1"), + not_equal = c("B.NE.1", "B.NEN.1", "B/=1"), + greater_than = c("C.GE.1", "C.GT.1"), + less_than = c("D.LE.1", "D.LT.1") + ) + + expect_equal(unique(translate_nm_operator(nm_r_translations$equal)), "A==1") + expect_equal(unique(translate_nm_operator(nm_r_translations$not_equal)), "B!=1") + expect_equal(translate_nm_operator(nm_r_translations$greater_than), c("C>=1", "C>1")) + expect_equal(translate_nm_operator(nm_r_translations$less_than), c("D<=1", "D<1")) +}) + +test_that("translate_nm_expr() translates NONMEM filter expressions", { + test_exprs <- c("SEX==1", "ID.EQ.2", "WT/=70", "AGE.NE.30", "A=1", "WT.GT.40", "B.LE.20") + + expect_equal( + translate_nm_expr(test_exprs, type = 'accept'), + c("SEX==1", "ID==2", "WT!=70", "AGE!=30", "A==1", "WT>40", "B<=20") + ) + + expect_equal( + translate_nm_expr(test_exprs, type = 'ignore'), + c("SEX!=1", "ID!=2", "WT==70", "AGE==30", "A!=1", "WT<40", "B>=20") + ) + + + # Use of `@`, `#`, or form `IGNORE=C2` require `data_cols` to be specified + # - only `data_cols[1]` is technically needed + data_cols <- c("C", "ID", "TIME", "EVID", "DV", "BLQ") + expect_equal( + translate_nm_expr("#", data_cols = data_cols), + paste0("!grepl('^#', ", data_cols[1], ")") + ) + + expect_equal( + translate_nm_expr("C", data_cols = data_cols), + paste0(data_cols[1], "!='C'") + ) + + # Extra `\\` is added for escape purposes when the expression is later parsed + expect_equal( + translate_nm_expr("@", data_cols = data_cols), + paste0("!grepl('^\\\\s*[A-Za-z@]', ", data_cols[1], ")") + ) + + # Error out for unsupported logical operators + test_exprs_bad <- c(test_exprs, "GEN=1 .AND. AGE > 60") + expect_error( + translate_nm_expr(test_exprs_bad, type = 'accept'), + "The following logical operators are not supported" + ) +}) + +test_that("filter_nm_data() filters input data using IGNORE/ACCEPT options", { + + filtered_data <- filter_nm_data(MOD1) + + # Check expected filters + expect_equal(attributes(filtered_data)$n_records_dropped, 20) + expect_equal(nrow(filtered_data), DATA_TEST_ROWS_IGNORE) + + # Check that nm_data(.mod, filter=TRUE) works the same way + expect_equal(nrow(nm_data(MOD1, filter = TRUE)), DATA_TEST_ROWS_IGNORE) + + mod2 <- copy_model_from(MOD1, "2") + on.exit(delete_models(mod2, .force = TRUE, .tags = NULL)) + + # Add additional IGNORE expressions and compare to dplyr filters + ctl <- get_model_ctl(mod2) + data_rec <- nmrec::select_records(ctl, "data")[[1]] + data_rec$parse() + data_rec$values[[7]]$value <- "(ID.EQ.2, SEX=1, WT.LE.50)" + nmrec::write_ctl(ctl, get_model_path(mod2)) + + # Check expected expressions + input_data <- nm_data(mod2) %>% suppressMessages() + nm_exprs <- get_data_filter_exprs(mod2) + r_filters <- translate_nm_expr( + nm_expr = nm_exprs$exprs, type = nm_exprs$type, data_cols = names(input_data) + ) + filter_expression <- paste(r_filters, collapse = " & ") + expect_equal( + filter_expression, "!grepl('^\\\\s*[A-Za-z@]', ID) & ID!=2 & SEX!=1 & WT>=50" + ) + + # Check that filter expression works correctly + filtered_data <- filter_nm_data(mod2) + expect_equal( + nrow(filtered_data), + input_data %>% dplyr::filter(eval(parse(text = filter_expression))) %>% nrow() + ) + + # Check that relevant rows have been filtered out + removed_records <- invert_operator(r_filters[2]) + expect_equal( + filtered_data %>% dplyr::filter(eval(parse(text = removed_records))) %>% nrow(), + 0 + ) +}) + +test_that("filter_nm_data() errors out when using both IGNORE and ACCEPT options", { + mod2 <- copy_model_from(MOD1, "2") + on.exit(delete_models(mod2, .force = TRUE, .tags = NULL)) + + # Add additional IGNORE expressions and compare to dplyr filters + ctl <- get_model_ctl(mod2) + data_rec <- nmrec::select_records(ctl, "data")[[1]] + data_rec$parse() + + # Create new ACCEPT option + nmrec::set_record_option(data_rec, "accept", "(SEX.EQ.1)") + nmrec::write_ctl(ctl, get_model_path(mod2)) + + expect_error( + filter_nm_data(mod2), + "ACCEPT list and IGNORE list may not both be used" + ) +}) + +test_that("filter_nm_data() works with no filters", { + mod2 <- copy_model_from(MOD1, "2") + on.exit(delete_models(mod2, .force = TRUE, .tags = NULL)) + + # Add additional IGNORE expressions and compare to dplyr filters + ctl <- get_model_ctl(mod2) + data_rec <- nmrec::select_records(ctl, "data")[[1]] + data_rec$parse() + + # Remove filters + data_rec$values[[5]]$value <- NULL + data_rec$values[[7]]$value <- NULL + nmrec::write_ctl(ctl, get_model_path(mod2)) + + # Expect identical to `nm_data()` (minus the attribute) + filtered_data <- filter_nm_data(mod2) + expect_equal(attributes(filtered_data)$n_records_dropped, 0) + expect_true(all.equal(nm_data(mod2), filtered_data, check.attributes = FALSE)) +}) + + +test_that("filter_nm_data() errors if expressions cant be parsed", { + mod2 <- copy_model_from(MOD1, "2") + on.exit(delete_models(mod2, .force = TRUE, .tags = NULL)) + + # Add additional IGNORE expressions and compare to dplyr filters + ctl <- get_model_ctl(mod2) + data_rec <- nmrec::select_records(ctl, "data")[[1]] + data_rec$parse() + + # Set filter that should error + data_rec$values[[7]]$value <- "(ID.EQ.2X)" + nmrec::write_ctl(ctl, get_model_path(mod2)) + + expect_error( + filter_nm_data(mod2), + "ignore/accept list could not be converted to filters" + ) +}) diff --git a/tests/testthat/test-workflow-bootstrap.R b/tests/testthat/test-workflow-bootstrap.R index f4a98bb6..6ce2687e 100644 --- a/tests/testthat/test-workflow-bootstrap.R +++ b/tests/testthat/test-workflow-bootstrap.R @@ -102,25 +102,60 @@ withr::with_options( }) }) - test_that("setup_bootstrap_run messages if nm_join cant be used: unfinished model", { - boot_spec_path <- get_spec_path(.boot_run, .check_exists = FALSE) - expect_false(fs::file_exists(boot_spec_path)) - # Set up bootstrap run object using *non-submitted* model - expect_message( - setup_bootstrap_run(.boot_run, n = 3), - "Model has not finished executing" + test_that("setup_bootstrap_run fails if filtering expressions cant be parsed", { + mod2 <- copy_model_from(mod1, "2") + boot_run2 <- new_bootstrap_run(mod2) + on.exit(delete_models(list(mod2, boot_run2), .force = TRUE, .tags = NULL)) + + # Add additional IGNORE expressions and compare to dplyr filters + ctl <- get_model_ctl(boot_run2) + data_rec <- nmrec::select_records(ctl, "data")[[1]] + data_rec$parse() + + # Set filter that should error + current_filter <- data_rec$values[[7]]$value + data_rec$values[[7]]$value <- "(ID.EQ.2X)" + nmrec::write_ctl(ctl, get_model_path(boot_run2)) + + expect_error( + setup_bootstrap_run(boot_run2, n = 3), + "ignore/accept list could not be converted to filters" + ) + + # Revert filter and check for warning that the model hasnt been run + data_rec$values[[7]]$value <- current_filter + nmrec::write_ctl(ctl, get_model_path(boot_run2)) + + expect_warning( + setup_bootstrap_run(boot_run2, n = 3), + "has not been submitted" ) - expect_true(fs::file_exists(boot_spec_path)) }) # Submit based_on model to use for remainder of tests proc1 <- submit_model(mod1, .mode = "local", .wait = TRUE) - test_that("setup_bootstrap_run messages if nm_join cant be used: bad .join_col setup", { - # Set up bootstrap run object when .join_col checks fail - expect_message( - setup_bootstrap_run(.boot_run, n = 3, .join_col = "ID", .overwrite = TRUE), - "records must include the provided .join_col" + test_that("setup_bootstrap_run fails if n records are different", { + # If the based on model has been executed, we check that the number of + # records in the model_summary matches the number of rows in the filtered + # dataset (`nm_data(mod, filter = TRUE)`). + mod2 <- copy_model_from(mod1, "2") + copy_output_dir(mod1, file.path(MODEL_DIR_BBI, get_model_id(mod2))) + boot_run2 <- new_bootstrap_run(mod2) + on.exit(delete_models(list(mod2, boot_run2), .force = TRUE, .tags = NULL)) + + # Add additional IGNORE expressions and compare to dplyr filters + ctl <- get_model_ctl(boot_run2) + data_rec <- nmrec::select_records(ctl, "data")[[1]] + data_rec$parse() + + # Set filter that should error + data_rec$values[[7]]$value <- "(ID.EQ.2, SEX.EQ.1)" + nmrec::write_ctl(ctl, get_model_path(boot_run2)) + + expect_error( + setup_bootstrap_run(boot_run2, n = 3), + "The filtered dataset does not have the same number of records" ) }) @@ -384,9 +419,9 @@ withr::with_options( # Copy model files and output directory of simulation new_dir <- tempdir() new_dir_path <- file.path(new_dir, basename(.boot_run[[ABS_MOD_PATH]])) - fs::file_copy(ctl_ext(.boot_run[[ABS_MOD_PATH]]), ctl_ext(new_dir_path)) - fs::file_copy(yaml_ext(.boot_run[[ABS_MOD_PATH]]), yaml_ext(new_dir_path)) - fs::dir_copy(.boot_run[[ABS_MOD_PATH]], new_dir_path) + fs::file_copy(ctl_ext(.boot_run[[ABS_MOD_PATH]]), ctl_ext(new_dir_path), overwrite = TRUE) + fs::file_copy(yaml_ext(.boot_run[[ABS_MOD_PATH]]), yaml_ext(new_dir_path), overwrite = TRUE) + fs::dir_copy(.boot_run[[ABS_MOD_PATH]], new_dir_path, overwrite = TRUE) fake_boot <- read_model(new_dir_path) on.exit(delete_models(fake_boot, .tags = NULL, .force = TRUE))