From db2b4a2c62eb03896d1d0bc37a18816d8edbd1ff Mon Sep 17 00:00:00 2001 From: "alex.hill@gmail.com" Date: Fri, 1 Nov 2024 11:04:44 +0000 Subject: [PATCH] take user provided upper and lower detction limits --- R/biokinetics.R | 40 +++++++++++++++++++++++++--- R/utils.R | 12 +++------ src/stan/antibody_kinetics_main.stan | 12 +++++---- 3 files changed, 48 insertions(+), 16 deletions(-) diff --git a/R/biokinetics.R b/R/biokinetics.R index e93e1e1..322e677 100644 --- a/R/biokinetics.R +++ b/R/biokinetics.R @@ -26,6 +26,28 @@ biokinetics <- R6::R6Class( stop("Model has not been fitted yet. Call 'fit' before calling this function.") } }, + get_upper_detection_limit = function(upper_limit) { + if (is.null(upper_limit)) { + return(max(private$data$value)) + } + max_value <- max(private$data$value) + if (max_value > upper_limit) { + warning(sprintf("Data contains a value of %s which is greater than the upper detection limit %s", + max_value, upper_limit)) + } + return(upper_limit) + }, + get_lower_detection_limit = function(lower_limit) { + if (is.null(lower_limit)) { + return(min(private$data$value)) + } + min_value <- min(private$data$value) + if (max_value > upper_limit) { + warning(sprintf("Data contains a value of %s which is smaller than the lower detection limit %s", + min_value, lower_limit)) + } + return(lower_limit) + }, model_matrix_with_dummy = function(data) { # Identify columns that are factors with one level @@ -155,7 +177,7 @@ biokinetics <- R6::R6Class( } dt_out }, - prepare_stan_data = function() { + prepare_stan_data = function(upper, lower) { pid <- value <- censored <- titre_type <- obs_id <- time_since_last_exp <- NULL stan_data <- list( N = private$data[, .N], @@ -176,6 +198,8 @@ biokinetics <- R6::R6Class( stan_data$t <- private$data[, time_since_last_exp] stan_data$X <- private$design_matrix stan_data$P <- ncol(private$design_matrix) + stan_data$upper_limit <- log2(upper) + stan_data$lower_limit <- log2(lower) private$stan_input_data <- c(stan_data, private$priors) }, @@ -215,12 +239,20 @@ biokinetics <- R6::R6Class( #' @param preds_sd Standard deviation of predictor coefficients. Default 0.25. #' @param scale One of "log" or "natural". Default "natural". Is provided data on a log or a natural scale? If on a natural scale it #' will be converted to a log scale for model fitting. + #' @param upper_detection_limit Optional upper detection limit of the titre used. This is needed to construct a likelihood for upper censored + #' values, so only needs to be provided if you have such values in the dataset. If not provided and you have censored values, the model will default + #' to using the largest value in the dataset as the upper detection limit. + #' @param lower_detection_limit Optional lower detection limit of the titre used. This is needed to construct a likelihood for lower censored + #' values, so only needs to be provided if you have such values in the dataset. If not provided and you have censored values, the model will default + #' to using the smallest value in the dataset as the lower detection limit. initialize = function(priors = biokinetics_priors(), data = NULL, file_path = NULL, covariate_formula = ~0, preds_sd = 0.25, - scale = "natural") { + scale = "natural", + upper_detection_limit = NULL, + lower_detection_limit = NULL) { validate_priors(priors) private$priors <- priors validate_numeric(preds_sd) @@ -258,7 +290,9 @@ biokinetics <- R6::R6Class( private$build_covariate_lookup_table() private$build_pid_lookup() private$build_titre_type_lookup() - private$prepare_stan_data() + upper <- private$get_upper_detection_limit(upper_detection_limit) + lower <- private$get_lower_detection_limit(lower_detection_limit) + private$prepare_stan_data(upper, lower) logger::log_info("Retrieving compiled model") private$model <- instantiate::stan_package_model( name = "antibody_kinetics_main", diff --git a/R/utils.R b/R/utils.R index 532f9c8..3fc4131 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,12 +1,8 @@ convert_log2_scale <- function( - dt_in, vars_to_transform = "value", - simplify_limits = TRUE) { + dt_in, vars_to_transform = "value") { dt_out <- data.table::copy(dt_in) for (var in vars_to_transform) { - if (simplify_limits == TRUE) { - dt_out[get(var) > 2560, (var) := 2560] - } - dt_out[, (var) := log2(get(var) / 5)] + dt_out[, (var) := log2(get(var))] } return(dt_out) } @@ -23,8 +19,8 @@ convert_log2_scale <- function( convert_log2_scale_inverse <- function(dt_in, vars_to_transform) { dt_out <- data.table::copy(dt_in) for (var in vars_to_transform) { - # Reverse the log2 transformation and multiplication by 5. - dt_out[, (var) := 5 * 2^(get(var))] + # Reverse the log2 transformation + dt_out[, (var) := 2^(get(var))] } dt_out } diff --git a/src/stan/antibody_kinetics_main.stan b/src/stan/antibody_kinetics_main.stan index 61708a2..63fe543 100644 --- a/src/stan/antibody_kinetics_main.stan +++ b/src/stan/antibody_kinetics_main.stan @@ -29,6 +29,8 @@ data { int N; // Number of observations int N_events; // Number of exposure events int K; // Number of titre types + real upper_limit; // Upper detection limit + real lower_limit; // Lower detection limit array[N] int titre_type; // Titre type for each observation vector[N] t; // Time for each observation vector[N] value; // Observed titre values @@ -143,14 +145,14 @@ model { mu = boost_wane_ind( t, t0_ind, tp_ind, ts_ind, m1_ind, m2_ind, m3_ind, titre_type, id); - // Likelihood for observations in the range 1 < log2(value/5) <= 7 + // Likelihood for uncensored observations value[uncens_idx] ~ normal(mu[uncens_idx], sigma); - // Likelihood for observations at lower limit: log2(value/5) = 0 - target += normal_lcdf(0 | mu[cens_lo_idx], sigma); + // Likelihood for observations at lower limit + target += normal_lcdf(lower_limit | mu[cens_lo_idx], sigma); - // Censoring at log2(value/5) = 9, originally value = 2560 - target += normal_lccdf(9 | mu[cens_hi_idx], sigma); + // Censoring at upper limit + target += normal_lccdf(upper_limit | mu[cens_hi_idx], sigma); // Covariate-level mean priors, parameterised from previous studies t0_pop ~ normal(mu_t0, sigma_t0);