diff --git a/DESCRIPTION b/DESCRIPTION index ccff2ea..774f6fb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: drjacoby Type: Package Title: Flexible Markov Chain Monte Carlo via Reparameterization -Version: 1.5.3 +Version: 1.5.4 Authors@R: c( person("Bob", "Verity", email = "r.verity@imperial.ac.uk", role = c("aut", "cre")), person("Pete", "Winskill", email = "p.winskill@imperial.ac.uk", role = c("aut")) @@ -26,7 +26,9 @@ Imports: rlang, cowplot, dplyr, - magrittr + magrittr, + GGally, + tidyr Suggests: testthat, covr, diff --git a/NAMESPACE b/NAMESPACE index dbb7d15..43db110 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,16 +6,20 @@ export(check_drjacoby_loaded) export(cpp_template) export(define_params) export(plot_autocorrelation) -export(plot_cor) export(plot_cor_mat) export(plot_credible) +export(plot_density) export(plot_mc_acceptance) -export(plot_par) +export(plot_pairs) export(plot_rung_loglike) +export(plot_scatter) +export(plot_trace) export(run_mcmc) export(sample_chains) import(dplyr) import(ggplot2) +importFrom(GGally,ggpairs) +importFrom(GGally,wrap) importFrom(Rcpp,sourceCpp) importFrom(coda,geweke.diag) importFrom(coda,mcmc) @@ -27,6 +31,7 @@ importFrom(stats,quantile) importFrom(stats,runif) importFrom(stats,setNames) importFrom(stats,var) +importFrom(tidyr,pivot_longer) importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) useDynLib(drjacoby, .registration = TRUE) diff --git a/R/main.R b/R/main.R index 5261b86..d548e98 100644 --- a/R/main.R +++ b/R/main.R @@ -1,629 +1,629 @@ - -#------------------------------------------------ -#' @title Check that drjacoby package has loaded successfully -#' -#' @description Simple function to check that drjacoby package has loaded -#' successfully. Prints "drjacoby loaded successfully!" if so. -#' -#' @export - -check_drjacoby_loaded <- function() { - message("drjacoby loaded successfully!") -} - -#------------------------------------------------ -#' @title Define parameters dataframe -#' -#' @description Provides a convenient way of defining parameters in the format -#' required by \code{run_mcmc()}. Each parameter must have the following three -#' elements, defined in order: -#' \itemize{ -#' \item \code{name} - the parameter name. -#' \item \code{min} - the minimum value of the parameter. \code{-Inf} is -#' allowed. -#' \item \code{max} - the maximum value of the parameter. \code{Inf} is -#' allowed. -#' } -#' There following arguments are also optional: -#' \itemize{ -#' \item \code{init} - the initial value of the parameter. If running -#' multiple chains a vector of initial values can be used to specify distinct -#' values for each chain. -#' \item \code{block} - which likelihood block(s) this parameter belongs to. -#' See vignettes for instructions on using likelihood blocks. -#' } -#' -#' @param ... a series of named input arguments. -#' -#' -#' @export -#' @examples -#' define_params(name = "mu", min = -10, max = 10, init = 0, -#' name = "sigma", min = 0, max = 5, init = c(1, 2)) -#' -#' define_params(name = "mu1", min = -10, max = 10, init = 0, block = 1, -#' name = "mu2", min = -10, max = 10, init = 0, block = 2, -#' name = "sigma", min = 0, max = 5, init = 1, block = c(1, 2)) - -define_params <- function(...) { - x <- list(...) - - # check input format of arguments - assert_gr(length(x), 0, message = "input cannot be empty") - assert_in(names(x), c("name", "min", "max", "init", "block")) - use_init <- ("init" %in% names(x)) - use_block <- ("block" %in% names(x)) - n_cols <- 3 + use_init + use_block - if ((length(x) %% n_cols) != 0) { - stop("must have the same number of inputs per parameter") - } - n_param <- length(x) / n_cols - arg_names <-c("name", "min", "max") - if (use_init) { - arg_names <- c(arg_names, "init") - } - if (use_block) { - arg_names <- c(arg_names, "block") - } - assert_eq(names(x), rep(arg_names, n_param)) - - # create params dataframe - v <- n_cols*(0:(n_param - 1)) - ret <- data.frame(name = unlist(x[1 + v]), - min = unlist(x[2 + v]), - max = unlist(x[3 + v])) - if (use_init) { - ret$init <- x[which(arg_names == "init") + v] - } - if (use_block) { - ret$block <- x[which(arg_names == "block") + v] - } - - # run checks and standardise format - ret <- check_params(ret) - - return(ret) -} - -#------------------------------------------------ -# Check that params dataframe is formatted correctly, and return in standardised -# format (init and block coerced to list) -#' @noRd -check_params <- function(x) { - - # check dataframe has correct elements - assert_dataframe(x) - assert_in(c("name", "min", "max"), names(x), - message = "df_params must contain the columns 'name', 'min', 'max'") - if (any(duplicated(x$name))) { - stop("parameter names must be unique") - } - use_init <- ("init" %in% names(x)) - use_block <- ("block" %in% names(x)) - - # coerce init and block to list - if (use_init) { - if (!is.list(x$init)) { - x$init <- as.list(x$init) - } - } - if (use_block) { - if (!is.list(x$block)) { - x$block <- as.list(x$block) - } - } - - # check each row in turn - for (i in seq_len(nrow(x))) { - - # check format - assert_single_string(x$name[i], message = "parameter names must be character strings") - assert_single_numeric(x$min[i], message = "min values must be single values") - assert_single_numeric(x$max[i], message = "min values must be single values") - if (use_init) { - assert_vector_numeric(x$init[[i]], message = "init values must be numeric") - } - if (use_block) { - assert_vector_numeric(x$block[[i]], message = "block values must be numeric") - } - - # check order - assert_leq(x$min[i], x$max[i], message = "min values must be less than or equal to max values") - if (use_init) { - assert_greq(x$init[[i]], x$min[i], message = "init values must be greater than or equal to min values") - assert_leq(x$init[[i]], x$max[i], message = "init values must be less than or equal to max values") - } - } - - return(x) -} - -#------------------------------------------------ -#' @title Run drjacoby MCMC -#' -#' @description Run MCMC either with or without parallel tempering turned on. -#' Minimum inputs include a data object, a data.frame of parameters, a -#' log-likelihood function and a log-prior function. Produces an object of -#' class \code{drjacoby_output}, which contains all MCMC output along with -#' some diagnostics and a record of inputs. -#' -#' @details Note that both \code{data} and \code{misc} are passed into -#' log-likelihood/log-prior functions *by reference*. This means if you modify -#' these objects inside the functions then any changes will persist. -#' -#' @param data a named list or data frame or data values. -#' @param df_params a data.frame of parameters (see \code{?define_params}). -#' @param misc optional list object passed to likelihood and prior. This can be -#' useful for passing values that are not strictly data, for example passing a -#' lookup table to the prior function. -#' @param loglike,logprior the log-likelihood and log-prior functions used in -#' the MCMC. Can either be passed in as R functions (not in quotes), or as -#' character strings naming compiled C++ functions (in quotes). -#' @param burnin the number of burn-in iterations. Automatic tuning of proposal -#' standard deviations is only active during the burn-in period. -#' @param samples the number of sampling iterations. -#' @param rungs the number of temperature rungs used in the parallel tempering -#' method. By default, \eqn{\beta} values are equally spaced between 0 and 1, -#' i.e. \eqn{\beta[i]=}\code{(i-1)/(rungs-1)} for \code{i} in \code{1:rungs}. -#' The likelihood for the \out{ith} heated chain is raised to the -#' power \eqn{\beta[i]^\alpha}, meaning we can use the \eqn{\alpha} parameter -#' to concentrate rungs towards the start or the end of the interval (see the -#' \code{alpha} argument). -#' @param chains the number of independent replicates of the MCMC to run. If a -#' \code{cluster} object is defined then these chains are run in parallel, -#' otherwise they are run in serial. -#' @param beta_manual vector of manually defined \eqn{\beta} values used in the -#' parallel tempering approach. If defined, this overrides the spacing defined -#' by \code{rungs}. Note that even manually defined \eqn{\beta} values are -#' raised to the power \eqn{\alpha} internally, hence you should set -#' \code{alpha = 1} if you want to fix \eqn{\beta} values exactly. -#' @param alpha the likelihood for the \out{ith} heated chain is -#' raised to the power \eqn{\beta[i]^\alpha}, meaning we can use the -#' \eqn{\alpha} parameter to concentrate rungs towards the start or the end of -#' the temperature scale. -#' @param target_acceptance Target acceptance rate. Should be between 0 and 1. -#' Default of 0.44, set as optimum for unvariate proposal distributions. -#' @param cluster option to pass in a cluster environment, allowing chains to be -#' run in parallel (see package "parallel"). -#' @param coupling_on whether to implement Metropolis-coupling over temperature -#' rungs. The option of deactivating coupling has been retained for general -#' interest and debugging purposes only. If this parameter is \code{FALSE} -#' then parallel tempering will have no impact on MCMC mixing. -#' @param pb_markdown whether to run progress bars in markdown mode, meaning -#' they are only updated when they reach 100\% to avoid large amounts of output -#' being printed to markdown files. -#' @param save_data if \code{TRUE} (the default) the raw input data is stored -#' for reference in the project output. This allows complete reproducibility -#' from a project, but may be undesirable when datasets are very large. -#' @param save_hot_draws if \code{TRUE} the parameter draws relating to the hot -#' chains are also stored inside the \code{pt} element of the project output. -#' If \code{FALSE} (the default) only log-likelihoods and log-priors are -#' stored from heated chains. -#' @param silent whether to suppress all console output. -#' -#' @importFrom utils txtProgressBar -#' @importFrom stats setNames var runif -#' @export - -run_mcmc <- function(data, - df_params, - misc = list(), - loglike, - logprior, - burnin = 1e3, - samples = 1e4, - rungs = 1, - chains = 5, - beta_manual = NULL, - alpha = 1.0, - target_acceptance = 0.44, - cluster = NULL, - coupling_on = TRUE, - pb_markdown = FALSE, - save_data = TRUE, - save_hot_draws = FALSE, - silent = FALSE) { - - # declare variables to avoid "no visible binding" issues - phase <- rung <- value <- chain <- link <- NULL - - # Cleanup pointers on exit - on.exit(gc()) - - # ---------- check inputs ---------- - - # check data - assert_list_named(data) - - # check misc - assert_list(misc) - - # check loglikelihood and logprior functions - assert_class(loglike, c("function", "character")) - assert_class(logprior, c("function", "character")) - - # check MCMC parameters - assert_single_pos_int(burnin, zero_allowed = FALSE) - assert_single_pos_int(samples, zero_allowed = FALSE) - assert_single_pos_int(rungs, zero_allowed = FALSE) - assert_single_pos_int(chains, zero_allowed = FALSE) - assert_single_logical(coupling_on) - assert_single_pos(alpha) - assert_bounded(target_acceptance, 0, 1) - - # check df_params - df_params <- check_params(df_params) - use_init <- ("init" %in% names(df_params)) - use_block <- ("block" %in% names(df_params)) - if (use_init) { - for (i in 1:nrow(df_params)) { - if (length(df_params$init[[i]]) != 1) { - assert_length(df_params$init[[i]], chains, message = paste0("must define one df_params$init value per parameter, ", - "or alternatively a list of values one for each chain")) - } - } - } - - # calculate/check final temperature vector - if (is.null(beta_manual)) { - beta_manual <- rev(seq(1, 0, l = rungs)) - } - rungs <- length(beta_manual) - assert_vector_bounded(beta_manual) - assert_increasing(beta_manual) - assert_eq(beta_manual[rungs], 1.0) - - # check misc parameters - if (!is.null(cluster)) { - assert_class(cluster, "cluster") - } - assert_single_logical(pb_markdown) - assert_single_logical(save_data) - assert_single_logical(save_hot_draws) - assert_single_logical(silent) - - - # ---------- pre-processing ---------- - - # calculate transformation type for each parameter - # 0 = [-Inf,Inf] -> phi = theta - # 1 = [-Inf,b] -> phi = log(b - theta) - # 2 = [a,Inf] -> phi = log(theta - a) - # 3 = [a,b] -> phi = log((theta - a)/(b - theta)) - df_params$trans_type <- 2*is.finite(df_params$min) + is.finite(df_params$max) - - # flag to skip over fixed parameters - skip_param <- (df_params$min == df_params$max) - - # define default init values - if (!use_init) { - init_list <- list() - for (i in 1:nrow(df_params)) { - p <- runif(chains) - if (df_params$trans_type[i] == 0) { - init_list[[i]] <- log(p) - log(1 - p) - } else if (df_params$trans_type[i] == 1) { - init_list[[i]] <- log(p) + df_params$max[i] - } else if (df_params$trans_type[i] == 2) { - init_list[[i]] <- df_params$min[i] - log(p) - } else if (df_params$trans_type[i] == 3) { - init_list[[i]] <- df_params$min[i] + (df_params$max[i] - df_params$min[i])*p - } - } - df_params$init <- init_list - } - - # define default blocks - if (!use_block) { - df_params$block <- as.list(rep(1, nrow(df_params))) - } - - # get initial values into matrix. Rows for parameters, columns for chains - init_mat <- do.call(rbind, mapply(function(x) { - if (length(x) == 1) { - rep(x, chains) - } else { - x - } - }, df_params$init, SIMPLIFY = FALSE)) - - # flag whether likelihood and/or prior are C++ functions - loglike_use_cpp <- inherits(loglike, "character") - logprior_use_cpp <- inherits(logprior, "character") - - # raise temperature vector to power - beta_raised <- beta_manual^alpha - - # make sure "block" is not an element of misc already being used, and if not - # create dummy element for storing current block - if (length(misc) > 0) { - assert_not_in("block", names(misc), message = "'block' is a reserved name within misc object") - } - misc$block <- -1 - - - # ---------- define argument lists ---------- - - # parameters to pass to C++ - args_params <- list(x = data, - misc = misc, - loglike_use_cpp = loglike_use_cpp, - logprior_use_cpp = logprior_use_cpp, - theta_min = df_params$min, - theta_max = df_params$max, - block = df_params$block, - n_block = max(unlist(df_params$block)), - trans_type = df_params$trans_type, - skip_param = skip_param, - burnin = burnin, - samples = samples, - rungs = rungs, - coupling_on = coupling_on, - beta_raised = beta_raised, - save_hot_draws = save_hot_draws, - pb_markdown = pb_markdown, - silent = silent, - target_acceptance = target_acceptance) - - # functions to pass to C++ - args_functions <- list(loglike = loglike, - logprior = logprior, - test_convergence = test_convergence, - update_progress = update_progress) - - # complete list of arguments - args <- list(args_params = args_params, - args_functions = args_functions) - - # create distinct argument sets over chains - parallel_args <- replicate(chains, args, simplify = FALSE) - for (i in 1:chains) { - parallel_args[[i]]$args_params$chain <- i - - # create named vector object for passing internally within C++ functions. - # Initial values defined separately for each chain - parallel_args[[i]]$args_params$theta_vector <- setNames(init_mat[,i], df_params$name) - } - - - # ---------- run MCMC ---------- - - # split into parallel and serial implementations - if (!is.null(cluster)) { - - # run in parallel - parallel::clusterEvalQ(cluster, library(drjacoby)) - output_raw <- parallel::clusterApplyLB(cl = cluster, parallel_args, deploy_chain) - - } else { - - # run in serial - output_raw <- lapply(parallel_args, deploy_chain) - } - - # print total runtime - chain_runtimes <- mapply(function(x) x$t_diff, output_raw) - if (!silent) { - message(sprintf("total MCMC run-time: %s seconds", signif(sum(chain_runtimes), 3))) - } - - - # ---------- process output ---------- - - # define names - chain_names <- 1:chains - rung_names <- 1:rungs - param_names <- df_params$name - - # get parameter draws into dataframe. This will be over all rungs if - # save_hot_draws is TRUE, otherwise it will only be over the cold chain - df_theta <- do.call(rbind, mapply(function(j) { - do.call(rbind, mapply(function(i) { - - theta_burnin <- do.call(rbind, output_raw[[j]]$theta_burnin[[i]]) %>% - as.data.frame() %>% - magrittr::set_colnames(param_names) %>% - dplyr::mutate(chain = chain_names[j], - rung = rung_names[i], - phase = "burnin", .before = 1) - - theta_sampling <- do.call(rbind, output_raw[[j]]$theta_sampling[[i]]) %>% - as.data.frame() %>% - magrittr::set_colnames(param_names) %>% - dplyr::mutate(chain = chain_names[j], - rung = rung_names[i], - phase = "sampling", .before = 1) - - ret <- theta_burnin %>% - dplyr::bind_rows(theta_sampling) %>% - dplyr::mutate(iteration = seq_along(phase), .after = "phase") - - return(ret) - }, seq_along(output_raw[[j]]$theta_burnin), SIMPLIFY = FALSE)) - }, seq_along(output_raw), SIMPLIFY = FALSE)) - - # fix rungs field if save_hot_draws is FALSE - if (!save_hot_draws) { - df_theta$rung <- rungs - } - - # get likelihoods and priors over all rungs - df_pt <- do.call(rbind, mapply(function(j) { - do.call(rbind, mapply(function(i) { - - pt_burnin <- data.frame(chain = chain_names[j], - rung = rung_names[i], - phase = "burnin", - logprior = output_raw[[j]]$logprior_burnin[[i]], - loglikelihood = output_raw[[j]]$loglike_burnin[[i]]) - - pt_sampling <- data.frame(chain = chain_names[j], - rung = rung_names[i], - phase = "sampling", - logprior = output_raw[[j]]$logprior_sampling[[i]], - loglikelihood = output_raw[[j]]$loglike_sampling[[i]]) - - ret <- pt_burnin %>% - dplyr::bind_rows(pt_sampling) %>% - dplyr::mutate(iteration = seq_along(phase), .after = "phase") - - return(ret) - }, seq_along(output_raw[[j]]$logprior_burnin), SIMPLIFY = FALSE)) - }, seq_along(output_raw), SIMPLIFY = FALSE)) - - # merge loglike and logprior for cold chain into main output - df_theta <- df_theta %>% - dplyr::left_join(df_pt, by = c("chain", "rung", "phase", "iteration")) - - # if save_hot_draws = TRUE then merge theta values back into pt output - if (save_hot_draws) { - df_pt <- df_pt %>% - dplyr::left_join(dplyr::select(df_theta, -.data$loglikelihood, -.data$logprior), by = c("chain", "rung", "phase", "iteration")) - } - - # drop rungs field from main output - df_output <- df_theta %>% - dplyr::filter(.data$rung == max(rungs)) %>% - dplyr::select(-.data$rung) - - # check for bad values in output - if (!all(is.finite(unlist(df_output[, param_names])))) { - stop("output contains non-finite values") - } - - # append to output list - output_processed <- list(output = df_output, - pt = df_pt) - - ## Diagnostics - output_processed$diagnostics <- list() - - # run-times - run_time <- data.frame(chain = chain_names, - seconds = chain_runtimes) - output_processed$diagnostics$run_time <- run_time - - # Rhat (Gelman-Rubin diagnostic) - if (chains > 1) { - rhat_est <- c() - for (p in seq_along(param_names)) { - rhat_est[p] <- df_output %>% - dplyr::filter(phase == "sampling") %>% - dplyr::select(chain, param_names[p]) %>% - gelman_rubin(chains = chains, samples = samples) - } - rhat_est[skip_param] <- NA - names(rhat_est) <- param_names - output_processed$diagnostics$rhat <- rhat_est - } - - # ESS - ess_est <- df_output %>% - dplyr::filter(phase == "sampling") %>% - dplyr::select(param_names) %>% - apply(2, coda::effectiveSize) - ess_est[skip_param] <- NA - output_processed$diagnostics$ess <- ess_est - - # Thermodynamic power - output_processed$diagnostics$rung_details <- data.frame(rung = 1:rungs, - thermodynamic_power = beta_raised) - - # Metropolis-coupling - # store acceptance rates between pairs of rungs (links) - mc_accept <- NA - if (rungs > 1) { - - # MC accept - mc_accept <- expand.grid(link = seq_len(rungs - 1), chain = chain_names) - mc_accept_burnin <- unlist(lapply(output_raw, function(x){x$mc_accept_burnin})) / burnin - mc_accept_sampling <- unlist(lapply(output_raw, function(x){x$mc_accept_sampling})) / samples - mc_accept <- rbind(cbind(mc_accept, phase = "burnin", value = mc_accept_burnin), - cbind(mc_accept, phase = "sampling", value = mc_accept_sampling)) - - } - output_processed$diagnostics$mc_accept <- mc_accept - - # DIC - DIC <- df_pt %>% - dplyr::filter(.data$phase == "sampling" & .data$rung == rungs) %>% - dplyr::select(.data$loglikelihood) %>% - dplyr::mutate(deviance = -2*.data$loglikelihood) %>% - dplyr::summarise(DIC = mean(.data$deviance) + 0.5*var(.data$deviance)) %>% - dplyr::pull(.data$DIC) - output_processed$diagnostics$DIC_Gelman <- DIC - - ## Parameters - data_store <- NULL - if (save_data) { - data_store <- data - } - output_processed$parameters <- list(data = data_store, - df_params = df_params, - loglike = loglike, - logprior = logprior, - burnin = burnin, - samples = samples, - rungs = rungs, - chains = chains, - coupling_on = coupling_on, - alpha = alpha, - beta_manual = beta_manual) - - # save output as custom class - class(output_processed) <- "drjacoby_output" - - # return - return(output_processed) -} - -#------------------------------------------------ -# deploy main_mcmc for this chain -#' @noRd -deploy_chain <- function(args) { - - # Specify pointers to cpp functions - if (args$args_params$loglike_use_cpp) { - args$args_functions$loglike <- create_xptr(args$args_functions$loglike) - } - if (args$args_params$logprior_use_cpp) { - args$args_functions$logprior <- create_xptr(args$args_functions$logprior) - } - - # get parameters - burnin <- args$args_params$burnin - samples <- args$args_params$samples - - # make progress bars - pb_burnin <- txtProgressBar(min = 0, max = burnin, initial = NA, style = 3) - pb_samples <- txtProgressBar(min = 0, max = samples, initial = NA, style = 3) - args$args_progress <- list(pb_burnin = pb_burnin, - pb_samples = pb_samples) - - # run C++ function - ret <- main_cpp(args) - - # remove arguments - rm(args) - - return(ret) -} - -#------------------------------------------------ -# update progress bar -# pb_list = list of progress bar objects -# name = name of this progress bar -# i = new value of bar -# max_i = max value of bar (close when reach this value) -# close = whether to close when reach end -#' @importFrom utils setTxtProgressBar -#' @noRd -update_progress <- function(pb_list, name, i, max_i, close = TRUE) { - setTxtProgressBar(pb_list[[name]], i) - if (i == max_i & close) { - close(pb_list[[name]]) - } -} - -# Deal with user input cpp not being defined -globalVariables(c("create_xptr")) + +#------------------------------------------------ +#' @title Check that drjacoby package has loaded successfully +#' +#' @description Simple function to check that drjacoby package has loaded +#' successfully. Prints "drjacoby loaded successfully!" if so. +#' +#' @export + +check_drjacoby_loaded <- function() { + message("drjacoby loaded successfully!") +} + +#------------------------------------------------ +#' @title Define parameters dataframe +#' +#' @description Provides a convenient way of defining parameters in the format +#' required by \code{run_mcmc()}. Each parameter must have the following three +#' elements, defined in order: +#' \itemize{ +#' \item \code{name} - the parameter name. +#' \item \code{min} - the minimum value of the parameter. \code{-Inf} is +#' allowed. +#' \item \code{max} - the maximum value of the parameter. \code{Inf} is +#' allowed. +#' } +#' There following arguments are also optional: +#' \itemize{ +#' \item \code{init} - the initial value of the parameter. If running +#' multiple chains a vector of initial values can be used to specify distinct +#' values for each chain. +#' \item \code{block} - which likelihood block(s) this parameter belongs to. +#' See vignettes for instructions on using likelihood blocks. +#' } +#' +#' @param ... a series of named input arguments. +#' +#' +#' @export +#' @examples +#' define_params(name = "mu", min = -10, max = 10, init = 0, +#' name = "sigma", min = 0, max = 5, init = c(1, 2)) +#' +#' define_params(name = "mu1", min = -10, max = 10, init = 0, block = 1, +#' name = "mu2", min = -10, max = 10, init = 0, block = 2, +#' name = "sigma", min = 0, max = 5, init = 1, block = c(1, 2)) + +define_params <- function(...) { + x <- list(...) + + # check input format of arguments + assert_gr(length(x), 0, message = "input cannot be empty") + assert_in(names(x), c("name", "min", "max", "init", "block")) + use_init <- ("init" %in% names(x)) + use_block <- ("block" %in% names(x)) + n_cols <- 3 + use_init + use_block + if ((length(x) %% n_cols) != 0) { + stop("must have the same number of inputs per parameter") + } + n_param <- length(x) / n_cols + arg_names <-c("name", "min", "max") + if (use_init) { + arg_names <- c(arg_names, "init") + } + if (use_block) { + arg_names <- c(arg_names, "block") + } + assert_eq(names(x), rep(arg_names, n_param)) + + # create params dataframe + v <- n_cols*(0:(n_param - 1)) + ret <- data.frame(name = unlist(x[1 + v]), + min = unlist(x[2 + v]), + max = unlist(x[3 + v])) + if (use_init) { + ret$init <- x[which(arg_names == "init") + v] + } + if (use_block) { + ret$block <- x[which(arg_names == "block") + v] + } + + # run checks and standardise format + ret <- check_params(ret) + + return(ret) +} + +#------------------------------------------------ +# Check that params dataframe is formatted correctly, and return in standardised +# format (init and block coerced to list) +#' @noRd +check_params <- function(x) { + + # check dataframe has correct elements + assert_dataframe(x) + assert_in(c("name", "min", "max"), names(x), + message = "df_params must contain the columns 'name', 'min', 'max'") + if (any(duplicated(x$name))) { + stop("parameter names must be unique") + } + use_init <- ("init" %in% names(x)) + use_block <- ("block" %in% names(x)) + + # coerce init and block to list + if (use_init) { + if (!is.list(x$init)) { + x$init <- as.list(x$init) + } + } + if (use_block) { + if (!is.list(x$block)) { + x$block <- as.list(x$block) + } + } + + # check each row in turn + for (i in seq_len(nrow(x))) { + + # check format + assert_single_string(x$name[i], message = "parameter names must be character strings") + assert_single_numeric(x$min[i], message = "min values must be single values") + assert_single_numeric(x$max[i], message = "min values must be single values") + if (use_init) { + assert_vector_numeric(x$init[[i]], message = "init values must be numeric") + } + if (use_block) { + assert_vector_numeric(x$block[[i]], message = "block values must be numeric") + } + + # check order + assert_leq(x$min[i], x$max[i], message = "min values must be less than or equal to max values") + if (use_init) { + assert_greq(x$init[[i]], x$min[i], message = "init values must be greater than or equal to min values") + assert_leq(x$init[[i]], x$max[i], message = "init values must be less than or equal to max values") + } + } + + return(x) +} + +#------------------------------------------------ +#' @title Run drjacoby MCMC +#' +#' @description Run MCMC either with or without parallel tempering turned on. +#' Minimum inputs include a data object, a data.frame of parameters, a +#' log-likelihood function and a log-prior function. Produces an object of +#' class \code{drjacoby_output}, which contains all MCMC output along with +#' some diagnostics and a record of inputs. +#' +#' @details Note that both \code{data} and \code{misc} are passed into +#' log-likelihood/log-prior functions *by reference*. This means if you modify +#' these objects inside the functions then any changes will persist. +#' +#' @param data a named list or data frame or data values. +#' @param df_params a data.frame of parameters (see \code{?define_params}). +#' @param misc optional list object passed to likelihood and prior. This can be +#' useful for passing values that are not strictly data, for example passing a +#' lookup table to the prior function. +#' @param loglike,logprior the log-likelihood and log-prior functions used in +#' the MCMC. Can either be passed in as R functions (not in quotes), or as +#' character strings naming compiled C++ functions (in quotes). +#' @param burnin the number of burn-in iterations. Automatic tuning of proposal +#' standard deviations is only active during the burn-in period. +#' @param samples the number of sampling iterations. +#' @param rungs the number of temperature rungs used in the parallel tempering +#' method. By default, \eqn{\beta} values are equally spaced between 0 and 1, +#' i.e. \eqn{\beta[i]=}\code{(i-1)/(rungs-1)} for \code{i} in \code{1:rungs}. +#' The likelihood for the \out{ith} heated chain is raised to the +#' power \eqn{\beta[i]^\alpha}, meaning we can use the \eqn{\alpha} parameter +#' to concentrate rungs towards the start or the end of the interval (see the +#' \code{alpha} argument). +#' @param chains the number of independent replicates of the MCMC to run. If a +#' \code{cluster} object is defined then these chains are run in parallel, +#' otherwise they are run in serial. +#' @param beta_manual vector of manually defined \eqn{\beta} values used in the +#' parallel tempering approach. If defined, this overrides the spacing defined +#' by \code{rungs}. Note that even manually defined \eqn{\beta} values are +#' raised to the power \eqn{\alpha} internally, hence you should set +#' \code{alpha = 1} if you want to fix \eqn{\beta} values exactly. +#' @param alpha the likelihood for the \out{ith} heated chain is +#' raised to the power \eqn{\beta[i]^\alpha}, meaning we can use the +#' \eqn{\alpha} parameter to concentrate rungs towards the start or the end of +#' the temperature scale. +#' @param target_acceptance Target acceptance rate. Should be between 0 and 1. +#' Default of 0.44, set as optimum for unvariate proposal distributions. +#' @param cluster option to pass in a cluster environment, allowing chains to be +#' run in parallel (see package "parallel"). +#' @param coupling_on whether to implement Metropolis-coupling over temperature +#' rungs. The option of deactivating coupling has been retained for general +#' interest and debugging purposes only. If this parameter is \code{FALSE} +#' then parallel tempering will have no impact on MCMC mixing. +#' @param pb_markdown whether to run progress bars in markdown mode, meaning +#' they are only updated when they reach 100\% to avoid large amounts of output +#' being printed to markdown files. +#' @param save_data if \code{TRUE} (the default) the raw input data is stored +#' for reference in the project output. This allows complete reproducibility +#' from a project, but may be undesirable when datasets are very large. +#' @param save_hot_draws if \code{TRUE} the parameter draws relating to the hot +#' chains are also stored inside the \code{pt} element of the project output. +#' If \code{FALSE} (the default) only log-likelihoods and log-priors are +#' stored from heated chains. +#' @param silent whether to suppress all console output. +#' +#' @importFrom utils txtProgressBar +#' @importFrom stats setNames var runif +#' @export + +run_mcmc <- function(data, + df_params, + misc = list(), + loglike, + logprior, + burnin = 1e3, + samples = 1e4, + rungs = 1, + chains = 5, + beta_manual = NULL, + alpha = 1.0, + target_acceptance = 0.44, + cluster = NULL, + coupling_on = TRUE, + pb_markdown = FALSE, + save_data = TRUE, + save_hot_draws = FALSE, + silent = FALSE) { + + # declare variables to avoid "no visible binding" issues + phase <- rung <- value <- chain <- link <- NULL + + # Cleanup pointers on exit + on.exit(gc()) + + # ---------- check inputs ---------- + + # check data + assert_list_named(data) + + # check misc + assert_list(misc) + + # check loglikelihood and logprior functions + assert_class(loglike, c("function", "character")) + assert_class(logprior, c("function", "character")) + + # check MCMC parameters + assert_single_pos_int(burnin, zero_allowed = FALSE) + assert_single_pos_int(samples, zero_allowed = FALSE) + assert_single_pos_int(rungs, zero_allowed = FALSE) + assert_single_pos_int(chains, zero_allowed = FALSE) + assert_single_logical(coupling_on) + assert_single_pos(alpha) + assert_bounded(target_acceptance, 0, 1) + + # check df_params + df_params <- check_params(df_params) + use_init <- ("init" %in% names(df_params)) + use_block <- ("block" %in% names(df_params)) + if (use_init) { + for (i in 1:nrow(df_params)) { + if (length(df_params$init[[i]]) != 1) { + assert_length(df_params$init[[i]], chains, message = paste0("must define one df_params$init value per parameter, ", + "or alternatively a list of values one for each chain")) + } + } + } + + # calculate/check final temperature vector + if (is.null(beta_manual)) { + beta_manual <- rev(seq(1, 0, l = rungs)) + } + rungs <- length(beta_manual) + assert_vector_bounded(beta_manual) + assert_increasing(beta_manual) + assert_eq(beta_manual[rungs], 1.0) + + # check misc parameters + if (!is.null(cluster)) { + assert_class(cluster, "cluster") + } + assert_single_logical(pb_markdown) + assert_single_logical(save_data) + assert_single_logical(save_hot_draws) + assert_single_logical(silent) + + + # ---------- pre-processing ---------- + + # calculate transformation type for each parameter + # 0 = [-Inf,Inf] -> phi = theta + # 1 = [-Inf,b] -> phi = log(b - theta) + # 2 = [a,Inf] -> phi = log(theta - a) + # 3 = [a,b] -> phi = log((theta - a)/(b - theta)) + df_params$trans_type <- 2*is.finite(df_params$min) + is.finite(df_params$max) + + # flag to skip over fixed parameters + skip_param <- (df_params$min == df_params$max) + + # define default init values + if (!use_init) { + init_list <- list() + for (i in 1:nrow(df_params)) { + p <- runif(chains) + if (df_params$trans_type[i] == 0) { + init_list[[i]] <- log(p) - log(1 - p) + } else if (df_params$trans_type[i] == 1) { + init_list[[i]] <- log(p) + df_params$max[i] + } else if (df_params$trans_type[i] == 2) { + init_list[[i]] <- df_params$min[i] - log(p) + } else if (df_params$trans_type[i] == 3) { + init_list[[i]] <- df_params$min[i] + (df_params$max[i] - df_params$min[i])*p + } + } + df_params$init <- init_list + } + + # define default blocks + if (!use_block) { + df_params$block <- as.list(rep(1, nrow(df_params))) + } + + # get initial values into matrix. Rows for parameters, columns for chains + init_mat <- do.call(rbind, mapply(function(x) { + if (length(x) == 1) { + rep(x, chains) + } else { + x + } + }, df_params$init, SIMPLIFY = FALSE)) + + # flag whether likelihood and/or prior are C++ functions + loglike_use_cpp <- inherits(loglike, "character") + logprior_use_cpp <- inherits(logprior, "character") + + # raise temperature vector to power + beta_raised <- beta_manual^alpha + + # make sure "block" is not an element of misc already being used, and if not + # create dummy element for storing current block + if (length(misc) > 0) { + assert_not_in("block", names(misc), message = "'block' is a reserved name within misc object") + } + misc$block <- -1 + + + # ---------- define argument lists ---------- + + # parameters to pass to C++ + args_params <- list(x = data, + misc = misc, + loglike_use_cpp = loglike_use_cpp, + logprior_use_cpp = logprior_use_cpp, + theta_min = df_params$min, + theta_max = df_params$max, + block = df_params$block, + n_block = max(unlist(df_params$block)), + trans_type = df_params$trans_type, + skip_param = skip_param, + burnin = burnin, + samples = samples, + rungs = rungs, + coupling_on = coupling_on, + beta_raised = beta_raised, + save_hot_draws = save_hot_draws, + pb_markdown = pb_markdown, + silent = silent, + target_acceptance = target_acceptance) + + # functions to pass to C++ + args_functions <- list(loglike = loglike, + logprior = logprior, + test_convergence = test_convergence, + update_progress = update_progress) + + # complete list of arguments + args <- list(args_params = args_params, + args_functions = args_functions) + + # create distinct argument sets over chains + parallel_args <- replicate(chains, args, simplify = FALSE) + for (i in 1:chains) { + parallel_args[[i]]$args_params$chain <- i + + # create named vector object for passing internally within C++ functions. + # Initial values defined separately for each chain + parallel_args[[i]]$args_params$theta_vector <- setNames(init_mat[,i], df_params$name) + } + + + # ---------- run MCMC ---------- + + # split into parallel and serial implementations + if (!is.null(cluster)) { + + # run in parallel + parallel::clusterEvalQ(cluster, library(drjacoby)) + output_raw <- parallel::clusterApplyLB(cl = cluster, parallel_args, deploy_chain) + + } else { + + # run in serial + output_raw <- lapply(parallel_args, deploy_chain) + } + + # print total runtime + chain_runtimes <- mapply(function(x) x$t_diff, output_raw) + if (!silent) { + message(sprintf("total MCMC run-time: %s seconds", signif(sum(chain_runtimes), 3))) + } + + + # ---------- process output ---------- + + # define names + chain_names <- 1:chains + rung_names <- 1:rungs + param_names <- df_params$name + + # get parameter draws into data.frame. This will be over all rungs if + # save_hot_draws is TRUE, otherwise it will only be over the cold chain + df_theta <- do.call(rbind, mapply(function(j) { + do.call(rbind, mapply(function(i) { + + theta_burnin <- do.call(rbind, output_raw[[j]]$theta_burnin[[i]]) |> + as.data.frame() |> + setNames(param_names) |> + mutate(chain = chain_names[j], + rung = rung_names[i], + phase = "burnin", .before = 1) + + theta_sampling <- do.call(rbind, output_raw[[j]]$theta_sampling[[i]]) |> + as.data.frame() |> + setNames(param_names) |> + mutate(chain = chain_names[j], + rung = rung_names[i], + phase = "sampling", .before = 1) + + ret <- theta_burnin |> + bind_rows(theta_sampling) |> + mutate(iteration = row_number(), .after = "phase") + + return(ret) + }, seq_len(ifelse(save_hot_draws, rungs, 1)), SIMPLIFY = FALSE)) + }, seq_len(chains), SIMPLIFY = FALSE)) + + # fix rungs field if save_hot_draws is FALSE + if (!save_hot_draws) { + df_theta$rung <- rungs + } + + # get likelihoods and priors over all rungs + df_pt <- do.call(rbind, mapply(function(j) { + do.call(rbind, mapply(function(i) { + + pt_burnin <- data.frame(chain = chain_names[j], + rung = rung_names[i], + phase = "burnin", + logprior = output_raw[[j]]$logprior_burnin[[i]], + loglikelihood = output_raw[[j]]$loglike_burnin[[i]]) + + pt_sampling <- data.frame(chain = chain_names[j], + rung = rung_names[i], + phase = "sampling", + logprior = output_raw[[j]]$logprior_sampling[[i]], + loglikelihood = output_raw[[j]]$loglike_sampling[[i]]) + + ret <- pt_burnin |> + bind_rows(pt_sampling) |> + mutate(iteration = row_number(), .after = "phase") + + return(ret) + }, seq_len(rungs), SIMPLIFY = FALSE)) + }, seq_len(chains), SIMPLIFY = FALSE)) + + # merge loglike and logprior for cold chain into main output + df_theta <- df_theta %>% + left_join(df_pt, by = c("chain", "rung", "phase", "iteration")) + + # if save_hot_draws = TRUE then merge theta values back into pt output + if (save_hot_draws) { + df_pt <- df_pt %>% + left_join(dplyr::select(df_theta, -.data$loglikelihood, -.data$logprior), by = c("chain", "rung", "phase", "iteration")) + } + + # drop rungs field from main output + df_output <- df_theta %>% + dplyr::filter(.data$rung == max(rungs)) %>% + dplyr::select(-.data$rung) + + # check for bad values in output + if (!all(is.finite(unlist(df_output[, param_names])))) { + stop("output contains non-finite values") + } + + # append to output list + output_processed <- list(output = df_output, + pt = df_pt) + + ## Diagnostics + output_processed$diagnostics <- list() + + # run-times + run_time <- data.frame(chain = chain_names, + seconds = chain_runtimes) + output_processed$diagnostics$run_time <- run_time + + # Rhat (Gelman-Rubin diagnostic) + if (chains > 1) { + rhat_est <- c() + for (p in seq_along(param_names)) { + rhat_est[p] <- df_output %>% + dplyr::filter(phase == "sampling") %>% + dplyr::select(chain, param_names[p]) %>% + gelman_rubin(chains = chains, samples = samples) + } + rhat_est[skip_param] <- NA + names(rhat_est) <- param_names + output_processed$diagnostics$rhat <- rhat_est + } + + # ESS + ess_est <- df_output %>% + dplyr::filter(phase == "sampling") %>% + dplyr::select(param_names) %>% + apply(2, coda::effectiveSize) + ess_est[skip_param] <- NA + output_processed$diagnostics$ess <- ess_est + + # Thermodynamic power + output_processed$diagnostics$rung_details <- data.frame(rung = 1:rungs, + thermodynamic_power = beta_raised) + + # Metropolis-coupling + # store acceptance rates between pairs of rungs (links) + mc_accept <- NA + if (rungs > 1) { + + # MC accept + mc_accept <- expand.grid(link = seq_len(rungs - 1), chain = chain_names) + mc_accept_burnin <- unlist(lapply(output_raw, function(x){x$mc_accept_burnin})) / burnin + mc_accept_sampling <- unlist(lapply(output_raw, function(x){x$mc_accept_sampling})) / samples + mc_accept <- rbind(cbind(mc_accept, phase = "burnin", value = mc_accept_burnin), + cbind(mc_accept, phase = "sampling", value = mc_accept_sampling)) + + } + output_processed$diagnostics$mc_accept <- mc_accept + + # DIC + DIC <- df_pt %>% + dplyr::filter(.data$phase == "sampling" & .data$rung == rungs) %>% + dplyr::select(.data$loglikelihood) %>% + dplyr::mutate(deviance = -2*.data$loglikelihood) %>% + dplyr::summarise(DIC = mean(.data$deviance) + 0.5*var(.data$deviance)) %>% + dplyr::pull(.data$DIC) + output_processed$diagnostics$DIC_Gelman <- DIC + + ## Parameters + data_store <- NULL + if (save_data) { + data_store <- data + } + output_processed$parameters <- list(data = data_store, + df_params = df_params, + loglike = loglike, + logprior = logprior, + burnin = burnin, + samples = samples, + rungs = rungs, + chains = chains, + coupling_on = coupling_on, + alpha = alpha, + beta_manual = beta_manual) + + # save output as custom class + class(output_processed) <- "drjacoby_output" + + # return + return(output_processed) +} + +#------------------------------------------------ +# deploy main_mcmc for this chain +#' @noRd +deploy_chain <- function(args) { + + # Specify pointers to cpp functions + if (args$args_params$loglike_use_cpp) { + args$args_functions$loglike <- create_xptr(args$args_functions$loglike) + } + if (args$args_params$logprior_use_cpp) { + args$args_functions$logprior <- create_xptr(args$args_functions$logprior) + } + + # get parameters + burnin <- args$args_params$burnin + samples <- args$args_params$samples + + # make progress bars + pb_burnin <- txtProgressBar(min = 0, max = burnin, initial = NA, style = 3) + pb_samples <- txtProgressBar(min = 0, max = samples, initial = NA, style = 3) + args$args_progress <- list(pb_burnin = pb_burnin, + pb_samples = pb_samples) + + # run C++ function + ret <- main_cpp(args) + + # remove arguments + rm(args) + + return(ret) +} + +#------------------------------------------------ +# update progress bar +# pb_list = list of progress bar objects +# name = name of this progress bar +# i = new value of bar +# max_i = max value of bar (close when reach this value) +# close = whether to close when reach end +#' @importFrom utils setTxtProgressBar +#' @noRd +update_progress <- function(pb_list, name, i, max_i, close = TRUE) { + setTxtProgressBar(pb_list[[name]], i) + if (i == max_i & close) { + close(pb_list[[name]]) + } +} + +# Deal with user input cpp not being defined +globalVariables(c("create_xptr")) diff --git a/R/plot.R b/R/plot.R index 595d96c..0f6772f 100644 --- a/R/plot.R +++ b/R/plot.R @@ -235,7 +235,7 @@ plot_autocorrelation <- function(x, lag = 20, par = NULL, chain = 1, phase = "sa } #------------------------------------------------ -#' @title Plot parameter estimates +#' @title Plot parameter trace #' #' @description Produce a series of plots corresponding to each parameter, #' including the raw trace, the posterior histogram and an autocorrelation @@ -256,9 +256,9 @@ plot_autocorrelation <- function(x, lag = 20, par = NULL, chain = 1, phase = "sa #' #' @export -plot_par <- function(x, show = NULL, hide = NULL, lag = 20, - downsample = TRUE, phase = "sampling", - chain = NULL, display = TRUE) { +plot_trace <- function(x, show = NULL, hide = NULL, lag = 20, + downsample = TRUE, phase = "sampling", + chain = NULL, display = TRUE) { # check inputs and define defaults assert_class(x, "drjacoby_output") @@ -373,20 +373,21 @@ plot_par <- function(x, show = NULL, hide = NULL, lag = 20, } #------------------------------------------------ -#' @title Plot parameter correlation +#' @title Produce bivariate scatterplot #' -#' @description Plots correlation between two parameters +#' @description Produces scatterplot between two named parameters. #' #' @inheritParams plot_rung_loglike #' @param parameter1 name of parameter first parameter. #' @param parameter2 name of parameter second parameter. -#' @param downsample whether to downsample output to speed up plotting. +#' @param downsample whether to downsample output to 200 values max to speed up +#' plotting. #' #' @export -plot_cor <- function(x, parameter1, parameter2, - downsample = TRUE, phase = "sampling", - chain = NULL) { +plot_scatter <- function(x, parameter1, parameter2, + downsample = TRUE, phase = "sampling", + chain = NULL) { # check inputs assert_class(x, "drjacoby_output") @@ -552,3 +553,90 @@ plot_cor_mat <- function(x, show = NULL, phase = "sampling", param_names = NULL) ggplot2::xlab("") + ggplot2::ylab("") } + +#------------------------------------------------ +#' @title Produce scatterplots between multiple parameters +#' +#' @description Uses \code{ggpairs} function from the \code{GGally} package to +#' produce scatterplots between all named parameters. +#' +#' @inheritParams plot_trace +#' +#' @importFrom GGally ggpairs wrap +#' @export +plot_pairs <- function(x, show = NULL, hide = NULL) { + + # check inputs + assert_class(x, "drjacoby_output") + + # avoid "no visible binding" note + chain <- NULL + + # subsample posterior draws + param_draws <- sample_chains(x, sample_n = 1e3, keep_chain_index = TRUE) + + # get parameter names and apply show and hide conditions + param_names <- setdiff(names(param_draws), c("chain", "sample")) + if (!is.null(show)) { + assert_string(show) + param_names <- intersect(param_names, show) + } + if (!is.null(hide)) { + assert_string(hide) + param_names <- setdiff(param_names, hide) + } + if (length(param_names) == 0) { + stop("no parameters remaining after applying show and hide") + } + + # produce plot + param_draws |> + ggpairs(aes(col = as.factor(chain)), + columns = param_names, + lower = list(continuous = wrap("points", size = 0.5))) + theme_bw() +} + +#------------------------------------------------ +#' @title Produce density plots +#' +#' @description Density plots of all parameters. Use \code{show} and \code{hide} +#' to be more specific about which parameters to plot. +#' +#' @inheritParams plot_trace +#' +#' @importFrom tidyr pivot_longer +#' @export +plot_density <- function(x, show = NULL, hide = NULL) { + + # avoid "no visible binding" notes + value <- NULL + + # check inputs + assert_class(x, "drjacoby_output") + + # subsample posterior draws + param_draws <- sample_chains(x, sample_n = 1e3, keep_chain_index = FALSE) + + # get parameter names and apply show and hide conditions + param_names <- setdiff(names(param_draws), c("chain", "sample")) + if (!is.null(show)) { + assert_string(show) + param_names <- intersect(param_names, show) + } + if (!is.null(hide)) { + assert_string(hide) + param_names <- setdiff(param_names, hide) + } + if (length(param_names) == 0) { + stop("no parameters remaining after applying show and hide") + } + + # produce plot + param_draws |> + select(all_of(param_names)) |> + pivot_longer(cols = everything()) |> + ggplot(aes(x = value)) + theme_bw() + + geom_density(fill = "blue", alpha = 0.5) + + facet_wrap(~name, scales = "free") + + xlab("Parameter value") + ylab("Probability density") +} diff --git a/R/samples.R b/R/samples.R index 98f14fe..583e6e5 100644 --- a/R/samples.R +++ b/R/samples.R @@ -1,41 +1,47 @@ -#------------------------------------------------ -# return 95% quantile -#' @importFrom stats quantile -#' @noRd -quantile_95 <- function(x) { - ret <- quantile(x, probs = c(0.025, 0.5, 0.975)) - names(ret) <- c("Q2.5", "Q50", "Q97.5") - return(ret) -} - -#------------------------------------------------ -#' Sample N draws from all available chains -#' -#' @param x an object of class \code{drjacoby_output} -#' @param sample_n An integer number of samples -#' -#' @return A data.frame of posterior samples -#' @export -sample_chains <- function(x, sample_n) { - - # check inputs - assert_class(x, "drjacoby_output") - assert_pos_int(sample_n, zero_allowed = FALSE) - - # Join chains - all_chains <- dplyr::filter(x$output, .data$phase == "sampling") %>% - dplyr::select(-.data$chain, -.data$iteration, -.data$phase, -.data$logprior, -.data$loglikelihood) - assert_leq(sample_n, nrow(all_chains)) - - # Sample chains - sampled_chains <- all_chains[seq.int(1, nrow(all_chains), length.out = sample_n),, drop = FALSE] - sampled_chains$sample <- 1:nrow(sampled_chains) - - # ESS - ess_est_sampled <- round(apply(sampled_chains[,1:(ncol(sampled_chains) - 1), drop = FALSE], 2, coda::effectiveSize)) - message("Effective sample size of sample has range: ", min(ess_est_sampled), - " to ", max(ess_est_sampled), ". See function ess to estimate.") - - return(sampled_chains) -} - +#------------------------------------------------ +# return 95% quantile +#' @importFrom stats quantile +#' @noRd +quantile_95 <- function(x) { + ret <- quantile(x, probs = c(0.025, 0.5, 0.975)) + names(ret) <- c("Q2.5", "Q50", "Q97.5") + return(ret) +} + +#------------------------------------------------ +#' Sample posterior draws from all available chains +#' +#' @param x an object of class \code{drjacoby_output}. +#' @param sample_n An integer number of samples. +#' @param keep_chain_index if \code{TRUE} then the column giving the chain is +#' retained. +#' +#' @return A data.frame of posterior samples +#' @export +sample_chains <- function(x, sample_n, keep_chain_index = FALSE) { + + # avoid "no visible binding" note + phase <- chain <- iteration <- logprior <- loglikelihood <- NULL + + # check inputs + assert_class(x, "drjacoby_output") + assert_pos_int(sample_n, zero_allowed = FALSE) + assert_single_logical(keep_chain_index) + + # join chains + all_chains <- x$output |> + filter(phase == "sampling") |> + select(-c(iteration, phase, logprior, loglikelihood)) + if (!keep_chain_index) { + all_chains <- all_chains |> + select(-chain) + } + assert_leq(sample_n, nrow(all_chains), message = sprintf("sample_n cannot exceed the total number of samples over all chains (%s)", nrow(all_chains))) + + # sample chains + sampled_chains <- all_chains[seq.int(1, nrow(all_chains), length.out = sample_n),, drop = FALSE] + sampled_chains$sample <- 1:nrow(sampled_chains) + + return(sampled_chains) +} + diff --git a/R_ignore/deploy.R b/R_ignore/deploy.R index 93e27b3..b8d0a05 100644 --- a/R_ignore/deploy.R +++ b/R_ignore/deploy.R @@ -44,4 +44,6 @@ mcmc <- run_mcmc(data = list(x = x), burnin = 1e3, samples = 1e3) -plot_par(mcmc, show = "mu", phase = "burnin") +plot_trace(mcmc, show = "mu", phase = "burnin") + +plot_density(mcmc) diff --git a/docs/404.html b/docs/404.html index 8516f01..c5204d0 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,66 +1,27 @@ - - -
- + + + + -MIT License - -Copyright (c) 2019 MRC Centre for Outbreak Analysis and Modelling - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +YEAR: 2024 +COPYRIGHT HOLDER: MRC Centre for Outbreak Analysis and Modelling
MIT License
+Copyright (c) 2024 MRC Centre for Outbreak Analysis and Modelling
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ + +Developed by Bob Verity, Pete Winskill.
+Site built with pkgdown 2.0.9.
+