Skip to content

Commit

Permalink
optionally truncate values
Browse files Browse the repository at this point in the history
  • Loading branch information
hillalex committed Nov 11, 2024
1 parent 89daa83 commit be766bb
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 82 deletions.
163 changes: 86 additions & 77 deletions R/biokinetics.R
Original file line number Diff line number Diff line change
Expand Up @@ -237,31 +237,35 @@ biokinetics <- R6::R6Class(
}
),
public = list(
#' @description Initialise the kinetics model.
#' @return An epikinetics::biokinetics object.
#' @param data Optional data table of model inputs. One of data or file must be provided. See the data vignette
#' for required columns: \code{vignette("data", package = "epikinetics")}.
#' @param file_path Optional file path to model inputs in CSV format. One of data or file must be provided.
#' @param priors Object of type \link[epikinetics]{biokinetics_priors}. Default biokinetics_priors().
#' @param covariate_formula Formula specifying linear regression model. Note all variables in the formula
#' will be treated as categorical variables. Default ~0.
#' @param preds_sd Standard deviation of predictor coefficients. Default 0.25.
#' @param scale One of "log" or "natural". Default "natural". Is provided data on a log or a natural scale? If on a natural scale it
#' will be converted to a log scale for model fitting.
#' @param upper_detection_limit Optional upper detection limit of the titre used. This is needed to construct a likelihood for upper censored
#' values, so only needs to be provided if you have such values in the dataset. If not provided, the model will default
#' to using the largest value in the dataset as the upper detection limit.
#' @param lower_detection_limit Optional lower detection limit of the titre used. This is needed to construct a likelihood for lower censored
#' values, so only needs to be provided if you have such values in the dataset. If not provided, the model will default
#' to using the smallest value in the dataset as the lower detection limit.
#' @description Initialise the kinetics model.
#' @return An epikinetics::biokinetics object.
#' @param data Optional data table of model inputs. One of data or file must be provided. See the data vignette
#' for required columns: \code{vignette("data", package = "epikinetics")}.
#' @param file_path Optional file path to model inputs in CSV format. One of data or file must be provided.
#' @param priors Object of type \link[epikinetics]{biokinetics_priors}. Default biokinetics_priors().
#' @param covariate_formula Formula specifying linear regression model. Note all variables in the formula
#' will be treated as categorical variables. Default ~0.
#' @param preds_sd Standard deviation of predictor coefficients. Default 0.25.
#' @param scale One of "log" or "natural". Default "natural". Is provided data on a log or a natural scale? If on a natural scale it
#' will be converted to a log scale for model fitting.
#' @param upper_detection_limit Optional upper detection limit of the titre used. This is needed to construct a likelihood for upper censored
#' values, so only needs to be provided if you have such values in the dataset. If not provided, the model will default
#' to using the largest value in the dataset as the upper detection limit.
#' @param lower_detection_limit Optional lower detection limit of the titre used. This is needed to construct a likelihood for lower censored
#' values, so only needs to be provided if you have such values in the dataset. If not provided, the model will default
#' to using the smallest value in the dataset as the lower detection limit.
#' @param truncate_upper Logical. Whether values greater than the upper detection limit should be truncated, i.e. set to the same value as the limit. Default TRUE.
#' @param truncate_lower Logical. Whether values smaller than the lower detection limit should be truncated, i.e. set to the same value as the limit. Default TRUE.
initialize = function(priors = biokinetics_priors(),
data = NULL,
file_path = NULL,
covariate_formula = ~0,
preds_sd = 0.25,
scale = "natural",
upper_detection_limit = NULL,
lower_detection_limit = NULL) {
lower_detection_limit = NULL,
truncate_upper = TRUE,
truncate_lower = TRUE) {
validate_priors(priors)
private$priors <- priors
validate_numeric(preds_sd)
Expand All @@ -287,19 +291,24 @@ biokinetics <- R6::R6Class(
validate_required_cols(private$data)
validate_formula_vars(private$all_formula_vars, private$data)
logger::log_info("Preparing data for stan")
# this is used to scale the data if natural scale data is provided
private$smallest_value <- min(private$data$value)
private$get_upper_detection_limit(upper_detection_limit)
private$get_lower_detection_limit(lower_detection_limit)
if (truncate_upper) {
private$data[, value := ifelse(value > private$upper_detection_limit, private$upper_detection_limit, value)]
}
if (truncate_lower) {
private$data[, value := ifelse(value < private$lower_detection_limit, private$lower_detection_limit, value)]
}
private$data[, `:=`(obs_id = seq_len(.N),
time_since_last_exp = as.integer(day - last_exp_day, units = "days"),
censored_lo = value <= private$lower_detection_limit,
censored_hi = value >= private$upper_detection_limit)]
censored_lo = value == private$lower_detection_limit,
censored_hi = value == private$upper_detection_limit)]
private$construct_design_matrix()
private$build_covariate_lookup_table()
private$build_pid_lookup()
private$build_titre_type_lookup()
if (scale == "natural") {
private$smallest_value <- min(private$data$value)
private$data <- convert_log2_scale(private$data,
smallest_value = private$smallest_value,
vars_to_transform = "value")
Expand All @@ -311,12 +320,12 @@ biokinetics <- R6::R6Class(
package = "epikinetics"
)
},
#' @description Plot the kinetics trajectory predicted by the model priors.
#' Note that this is on a log scale, regardless of whether the data was provided
#' on a log or a natural scale.
#' @return A ggplot2 object.
#' @param tmax Integer. The number of time points in each simulated trajectory. Default 150.
#' @param n_draws Integer. The number of trajectories to simulate. Default 2000.
#' @description Plot the kinetics trajectory predicted by the model priors.
#' Note that this is on a log scale, regardless of whether the data was provided
#' on a log or a natural scale.
#' @return A ggplot2 object.
#' @param tmax Integer. The number of time points in each simulated trajectory. Default 150.
#' @param n_draws Integer. The number of trajectories to simulate. Default 2000.
plot_prior_predictive = function(tmax = 150,
n_draws = 2000) {
plot(private$priors,
Expand All @@ -326,41 +335,41 @@ biokinetics <- R6::R6Class(
upper_detection_limit = private$stan_input_data$upper_detection_limit,
lower_detection_limit = private$stan_input_data$lower_detection_limit)
},
#' @description Plot model input data with a smoothing function. Note that
#' this plot is of the data as provided to the Stan model so is on a log scale,
#' regardless of whether data was provided on a log or a natural scale.
#' @param tmax Integer. Maximum time since last exposure to include. Default 150.
#' @return A ggplot2 object.
#' @description Plot model input data with a smoothing function. Note that
#' this plot is of the data as provided to the Stan model so is on a log scale,
#' regardless of whether data was provided on a log or a natural scale.
#' @param tmax Integer. Maximum time since last exposure to include. Default 150.
#' @return A ggplot2 object.
plot_model_inputs = function(tmax = 150) {
plot_sero_data(private$data,
tmax = tmax,
covariates = private$all_formula_vars,
upper_detection_limit = private$stan_input_data$upper_detection_limit,
lower_detection_limit = private$stan_input_data$lower_detection_limit)
},
#' @description View the data that is passed to the stan model, for debugging purposes.
#' @return A list of arguments that will be passed to the stan model.
#' @description View the data that is passed to the stan model, for debugging purposes.
#' @return A list of arguments that will be passed to the stan model.
get_stan_data = function() {
private$stan_input_data
},
#' @description View the mapping of human readable covariate names to the model variable p.
#' @return A data.table mapping the model variable p to human readable covariates.
#' @description View the mapping of human readable covariate names to the model variable p.
#' @return A data.table mapping the model variable p to human readable covariates.
get_covariate_lookup_table = function() {
private$covariate_lookup_table
},
#' @description Fit the model and return CmdStanMCMC fitted model object.
#' @return A CmdStanMCMC fitted model object: <https://mc-stan.org/cmdstanr/reference/CmdStanMCMC.html>
#' @param ... Named arguments to the `sample()` method of CmdStan model.
#' objects: <https://mc-stan.org/cmdstanr/reference/model-method-sample.html>
#' @description Fit the model and return CmdStanMCMC fitted model object.
#' @return A CmdStanMCMC fitted model object: <https://mc-stan.org/cmdstanr/reference/CmdStanMCMC.html>
#' @param ... Named arguments to the `sample()` method of CmdStan model.
#' objects: <https://mc-stan.org/cmdstanr/reference/model-method-sample.html>
fit = function(...) {
logger::log_info("Fitting model")
private$fitted <- private$model$sample(private$stan_input_data, ...)
private$fitted
},
#' @description Extract fitted population parameters
#' @return A data.table
#' @param n_draws Integer. Default 2000.
#' @param human_readable_covariates Logical. Default TRUE.
#' @description Extract fitted population parameters
#' @return A data.table
#' @param n_draws Integer. Default 2000.
#' @param human_readable_covariates Logical. Default TRUE.
extract_population_parameters = function(n_draws = 2000,
human_readable_covariates = TRUE) {
private$check_fitted()
Expand Down Expand Up @@ -393,11 +402,11 @@ biokinetics <- R6::R6Class(
}
dt_out
},
#' @description Extract fitted individual parameters
#' @return A data.table
#' @param n_draws Integer. Default 2000.
#' @param include_variation_params Logical
#' @param human_readable_covariates Logical. Default TRUE.
#' @description Extract fitted individual parameters
#' @return A data.table
#' @param n_draws Integer. Default 2000.
#' @param include_variation_params Logical
#' @param human_readable_covariates Logical. Default TRUE.
extract_individual_parameters = function(n_draws = 2000,
include_variation_params = TRUE,
human_readable_covariates = TRUE) {
Expand Down Expand Up @@ -430,17 +439,17 @@ biokinetics <- R6::R6Class(
}
dt_out
},
#' @description Process the model results into a data table of titre values over time.
#' @return A data.table containing titre values at time points. If summarise = TRUE, columns are time_since_last_exp,
#' me, lo, hi, titre_type, and a column for each covariate in the hierarchical model. If summarise = FALSE, columns are
#' time_since_last_exp, .draw, t0_pop, tp_pop, ts_pop, m1_pop, m2_pop, m3_pop, beta_t0, beta_tp, beta_ts, beta_m1, beta_m2, beta_m3, mu
#' titre_type and a column for each covariate in the hierarchical model. See the data vignette for details:
#' \code{vignette("data", package = "epikinetics")}
#' @param t_max Integer. Maximum number of time points to include.
#' @param summarise Boolean. Default TRUE. If TRUE, summarises over draws from posterior parameter distributions to
#' return 0.025, 0.5 and 0.975 quantiles, labelled lo, me and hi, respectively. If FALSE returns values for individual
#' draws from posterior parameter distributions.
#' @param n_draws Integer. Maximum number of samples to include. Default 2000.
#' @description Process the model results into a data table of titre values over time.
#' @return A data.table containing titre values at time points. If summarise = TRUE, columns are time_since_last_exp,
#' me, lo, hi, titre_type, and a column for each covariate in the hierarchical model. If summarise = FALSE, columns are
#' time_since_last_exp, .draw, t0_pop, tp_pop, ts_pop, m1_pop, m2_pop, m3_pop, beta_t0, beta_tp, beta_ts, beta_m1, beta_m2, beta_m3, mu
#' titre_type and a column for each covariate in the hierarchical model. See the data vignette for details:
#' \code{vignette("data", package = "epikinetics")}
#' @param t_max Integer. Maximum number of time points to include.
#' @param summarise Boolean. Default TRUE. If TRUE, summarises over draws from posterior parameter distributions to
#' return 0.025, 0.5 and 0.975 quantiles, labelled lo, me and hi, respectively. If FALSE returns values for individual
#' draws from posterior parameter distributions.
#' @param n_draws Integer. Maximum number of samples to include. Default 2000.
simulate_population_trajectories = function(
t_max = 150,
summarise = TRUE,
Expand Down Expand Up @@ -485,11 +494,11 @@ biokinetics <- R6::R6Class(

dt_out
},
#' @description Process the stan model results into a data.table.
#' @return A data.table of peak and set titre values. Columns are tire_type, mu_p, mu_s, rel_drop_me, mu_p_me,
#' mu_s_me, and a column for each covariate. See the data vignette for details:
#' \code{vignette("data", package = "epikinetics")}
#' @param n_draws Integer. Maximum number of samples to include. Default 2000.
#' @description Process the stan model results into a data.table.
#' @return A data.table of peak and set titre values. Columns are tire_type, mu_p, mu_s, rel_drop_me, mu_p_me,
#' mu_s_me, and a column for each covariate. See the data vignette for details:
#' \code{vignette("data", package = "epikinetics")}
#' @param n_draws Integer. Maximum number of samples to include. Default 2000.
population_stationary_points = function(n_draws = 2000) {
private$check_fitted()
validate_numeric(n_draws)
Expand Down Expand Up @@ -535,17 +544,17 @@ biokinetics <- R6::R6Class(
mu_s_me = quantile(mu_s, 0.5)),
by = c(private$all_formula_vars, "titre_type")]
},
#' @description Simulate individual trajectories from the model. This is
#' computationally expensive and may take a while to run if n_draws is large.
#' @return A data.table. If summarise = TRUE columns are calendar_date, titre_type, me, lo, hi, time_shift.
#' If summarise = FALSE, columns are pid, draw, time_since_last_exp, mu, titre_type, exposure_day, calendar_day, time_shift
#' and a column for each covariate in the regression model. See the data vignette for details:
#' \code{vignette("data", package = "epikinetics")}.
#' @param summarise Boolean. If TRUE, average the individual trajectories to get lo, me and
#' hi values for the population, disaggregated by titre type. If FALSE return the indidivudal trajectories.
#' Default TRUE.
#' @param n_draws Integer. Maximum number of samples to draw. Default 2000.
#' @param time_shift Integer. Number of days to adjust the exposure day by. Default 0.
#' @description Simulate individual trajectories from the model. This is
#' computationally expensive and may take a while to run if n_draws is large.
#' @return A data.table. If summarise = TRUE columns are calendar_date, titre_type, me, lo, hi, time_shift.
#' If summarise = FALSE, columns are pid, draw, time_since_last_exp, mu, titre_type, exposure_day, calendar_day, time_shift
#' and a column for each covariate in the regression model. See the data vignette for details:
#' \code{vignette("data", package = "epikinetics")}.
#' @param summarise Boolean. If TRUE, average the individual trajectories to get lo, me and
#' hi values for the population, disaggregated by titre type. If FALSE return the indidivudal trajectories.
#' Default TRUE.
#' @param n_draws Integer. Maximum number of samples to draw. Default 2000.
#' @param time_shift Integer. Number of days to adjust the exposure day by. Default 0.
simulate_individual_trajectories = function(
summarise = TRUE,
n_draws = 2000,
Expand Down
4 changes: 2 additions & 2 deletions R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ plot.biokinetics_priors <- function(x,

# Declare variables to suppress notes when compiling package
# https://github.com/Rdatatable/data.table/issues/850#issuecomment-259466153
t0 <- tp <- ts <- m1 <- m2 <- m3 <- censored <- NULL
t0 <- tp <- ts <- m1 <- m2 <- m3 <- NULL
time_since_last_exp <- me <- lo <- hi <- value <- mu <- NULL

if (!is.null(data)) {
Expand Down Expand Up @@ -81,7 +81,7 @@ plot_sero_data <- function(data,
data <- data[time_since_last_exp <= tmax,]
# Declare variables to suppress notes when compiling package
# https://github.com/Rdatatable/data.table/issues/850#issuecomment-259466153
time_since_last_exp <- value <- titre_type <- censored <- NULL
time_since_last_exp <- value <- titre_type <- NULL

plot <- ggplot(data) +
geom_point(aes(x = time_since_last_exp, y = value, colour = titre_type),
Expand Down
8 changes: 7 additions & 1 deletion man/biokinetics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions tests/testthat/test-plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,16 @@ test_that("Prior predictions from model are the same", {

test_that("Can plot input data", {
data <- data.table::fread(system.file("delta_full.rds", package = "epikinetics"))
mod <- biokinetics$new(data = data, lower_detection_limit = 40)
mod <- biokinetics$new(data = data,
lower_detection_limit = 40,
truncate_lower = FALSE)
suppressWarnings({plot <- mod$plot_model_inputs()})
vdiffr::expect_doppelganger("inputdata", plot)

mod <- biokinetics$new(data = data,
lower_detection_limit = 40,
covariate_formula = ~0 + infection_history)
covariate_formula = ~0 + infection_history,
truncate_lower = FALSE)
suppressWarnings({plot <- mod$plot_model_inputs()})
vdiffr::expect_doppelganger("inputdata_covariates", plot)
})
Expand Down

0 comments on commit be766bb

Please sign in to comment.