Skip to content

Commit

Permalink
Merge pull request #711 from metrumresearchgroup/parse-data-record
Browse files Browse the repository at this point in the history
Parse $DATA record for the purpose of filtering the input data
  • Loading branch information
barrettk authored Sep 19, 2024
2 parents ed2c258 + df3566e commit e2d2e15
Show file tree
Hide file tree
Showing 16 changed files with 719 additions and 98 deletions.
122 changes: 75 additions & 47 deletions R/bootstrap-model.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@
#' `remove_tables` arguments). The object returned from this must then be passed
#' to [setup_bootstrap_run()] before submission (see examples).
#'
#' @param .mod a `bbr` model object
#' @param .suffix a suffix for the bootstrap run directory. Will be prefixed by
#' @param .mod A `bbr` model object.
#' @param .suffix A suffix for the bootstrap run directory. Will be prefixed by
#' the model id of `.mod`.
#' @inheritParams copy_model_from
#' @param .inherit_tags If `TRUE`, the default, inherit any tags from `.mod`.
#' @param remove_cov,remove_tables Logical (T/F). Optionally remove `$COVARIANCE`
#' @param remove_cov,remove_tables If `TRUE`, the default, remove `$COVARIANCE`
#' and `$TABLE` records respectively, allowing for notably faster run times.
#' Both default to `TRUE`.
#'
#' @seealso [setup_bootstrap_run()] [summarize_bootstrap_run()]
#' @examples
Expand Down Expand Up @@ -81,21 +80,21 @@ new_bootstrap_run <- function(
#' objects, and the new datasets are sampled from the dataset defined in its
#' `$DATA` record (i.e. `get_data_path(.boot_run)`).
#'
#' @param .boot_run a `bbi_nmboot_model` object.
#' @param n number of model runs.
#' @param strat_cols columns to maintain proportion for stratification
#' @param seed a seed for sampling the data. Set to `NULL` to avoid setting.
#' @param .boot_run A `bbi_nmboot_model` object.
#' @param n Number of data sets and model runs to generate.
#' @param strat_cols Columns to maintain proportion for stratification
#' @param seed A numeric seed to set prior to resampling the data; use `NULL` to
#' avoid setting a seed.
#' @param data A dataset to resample from. Defaults to `NULL`, which will use
#' the output from `nm_join(.mod)`. If provided, must include the same column
#' names as what's returned from `nm_data(.mod)`. If the default is used, note
#' that a suitable `.join_col` must be provided.
#' @param .join_col Character column name to use to join table files. Not used
#' if `data` is specified. Passed to [nm_join()], and used to create the
#' initial dataset that gets re-sampled `n` times. The purpose of joining the
#' input data to table files is to filter the population to only the subjects
#' that actually made it into the model. See the `Details` section in
#' [nm_join()] for more information.
#' @param .overwrite logical (T/F) indicating whether or not to overwrite
#' the _filtered_ output from `nm_data(.mod, filter = TRUE)`. If provided,
#' must include the same column names as what's returned from `nm_data(.mod)`.
#' @param .bbi_args Named list passed to `model_summary(orig_mod, .bbi_args)`,
#' where `orig_mod` is the model `.boot_run` is based on. See
#' [print_bbi_args()] for valid options. Defaults to `list(no_grd_file = TRUE,
#' no_shk_file = TRUE)` because [model_summary()] is only called internally to
#' extract the number of records, so those files are irrelevant. Only used if
#' the based on model (the model being bootstrapped) has been executed.
#' @param .overwrite Logical (T/F) indicating whether or not to overwrite
#' existing setup for a bootstrap run.
#'
#' @details
Expand Down Expand Up @@ -138,7 +137,10 @@ setup_bootstrap_run <- function(
strat_cols = NULL,
seed = 1234,
data = NULL,
.join_col = "NUM",
.bbi_args = list(
no_grd_file = TRUE,
no_shk_file = TRUE
),
.overwrite = FALSE
){
check_model_object(.boot_run, NMBOOT_MOD_CLASS)
Expand Down Expand Up @@ -172,29 +174,6 @@ setup_bootstrap_run <- function(
writeLines(ignore_lines, file.path(boot_dir, ".gitignore"))

if(is.null(data)){
# Only include subjects that entered the original problem by default
can_be_joined <- can_be_nm_joined(orig_mod, .join_col = .join_col)
if(isTRUE(can_be_joined)){
starting_data <- nm_join(orig_mod, .join_col = .join_col) %>%
suppressMessages()

# select only columns from original data set
starting_data <- starting_data %>%
dplyr::select(attr(starting_data, "nm_join_origin")$data)

if ("DV.DATA" %in% names(starting_data)) {
starting_data <- dplyr::rename(starting_data, "DV" = "DV.DATA")
}
}else{
rlang::inform(
paste(
"Defaulting to input data, which may include data that doesn't enter",
"the final problem (i.e. ignored subjects)"
)
)
starting_data <- nm_data(orig_mod) %>% suppressMessages()
}

# Overwrite data path in control stream
# - This is not necessary in most cases, but is if overwriting a previous
# run where a starting dataset was provided. The data path must then
Expand All @@ -204,6 +183,55 @@ setup_bootstrap_run <- function(
data_path_rel <- get_data_path_from_ctl(orig_mod, normalize = FALSE)
modify_data_path_ctl(.boot_run, data_path_rel)
}

# Only include subjects that entered the original problem by default
starting_data <- tryCatch({
nm_data(.boot_run, filter = TRUE) %>% suppressMessages()
}, error = function(cond){
fs::dir_delete(boot_dir)
# If IGNORE/ACCEPT expressions cant be turned into dplyr expressions
cli::cli_div(theme = list(span.code = list(color = "blue")))
cli::cli_abort(
c(
cond$message,
"i" = "Please check your control stream or provide a starting dataset ({.var data} arg)",
"i" = "You may try {.code setup_bootstrap_run(.boot_run, data = nm_join(mod))}"
)
)
})

# If model has finished, check the number of records to ensure the filtering
# was done correctly
if(check_nonmem_finished(orig_mod)){
.s <- model_summary(orig_mod, .bbi_args = .bbi_args)
nrec <- .s$run_details$number_of_data_records
nrec_f <- nrow(starting_data)
if(nrec != nrec_f){
fs::dir_delete(boot_dir)
cli::cli_div(theme = list(.code = list(color = "blue"), .val = list(color = "red3")))
cli::cli_abort(
c(
"!" = "The filtered dataset does not have the same number of records as the original model:",
"*" = "{.code nm_data(orig_mod, filter = TRUE)} returned {.val {nrec_f}} records",
"*" = "{.code model_summary(orig_mod)} returned {.val {nrec}} records",
"i" = "where {.code orig_mod <- read_model(get_based_on(.boot_run))}",
"i" = "Try providing a starting dataset (e.g., {.code setup_bootstrap_run(.boot_run, data = nm_join(orig_mod))})"
)
)
}
}else{
orig_mod_name <- fs::path_rel(
get_model_path(orig_mod),
get_model_working_directory(orig_mod)
)
cli::cli_div(theme = list(.code = list(color = "blue"), .val = list(color = "red3")))
cli::cli_warn(
c(
"The parent model ({.code {orig_mod_name}}) has not been submitted",
"i" = "Consider executing {.code {orig_mod_name}} to perform additional checks"
)
)
}
}else{
checkmate::assert_data_frame(data)
# Get input columns from dataset referenced in based_on model
Expand Down Expand Up @@ -288,8 +316,8 @@ setup_bootstrap_run <- function(

#' Set up a single bootstrap model run
#'
#' @param mod_path absolute model path (no file extension) of a bootstrap model run.
#' @param boot_args list of parameters needed to create a bootstrap model run.
#' @param mod_path Absolute model path (no file extension) of a bootstrap model run.
#' @param boot_args List of parameters needed to create a bootstrap model run.
#'
#' @keywords internal
make_boot_run <- function(mod_path, boot_args){
Expand Down Expand Up @@ -358,7 +386,7 @@ make_boot_run <- function(mod_path, boot_args){

#' Store bootstrap run details before submission
#'
#' @param boot_models list of boostrap model objects created by `make_boot_run()`.
#' @param boot_models List of boostrap model objects created by `make_boot_run()`.
#' @inheritParams make_boot_run
#'
#' @details
Expand Down Expand Up @@ -425,7 +453,7 @@ make_boot_spec <- function(boot_models, boot_args){
#' bootstrap run directory.
#'
#' @inheritParams setup_bootstrap_run
#' @param force_resummarize logical (T/F). If `TRUE`, force re-summarization.
#' @param force_resummarize Logical (T/F). If `TRUE`, force re-summarization.
#' Will _only_ update the saved out `RDS` file when specified via
#' `summarize_bootstrap_run()`. See details for more information.
#'
Expand Down Expand Up @@ -621,7 +649,7 @@ summarize_bootstrap_run <- function(

#' @describeIn summarize_bootstrap Tabulate parameter estimates for each model
#' submission in a bootstrap run
#' @param format_long logical (T/F). If `TRUE`, format data as a long table,
#' @param format_long Logical (T/F). If `TRUE`, format data as a long table,
#' making the data more portable for plotting.
#' @export
bootstrap_estimates <- function(
Expand Down
Loading

0 comments on commit e2d2e15

Please sign in to comment.