From e450186d6d53b5f810ba34f59399b70c0f3ac678 Mon Sep 17 00:00:00 2001 From: FBartos <38475991+FBartos@users.noreply.github.com> Date: Tue, 29 Nov 2022 22:14:00 +0100 Subject: [PATCH] Clusters (#11) * clustering * fixed nested subsampling --- DESCRIPTION | 4 +- NAMESPACE | 1 + NEWS.md | 4 + R/RcppExports.R | 12 +- R/data-preparation.R | 66 ++-- R/main.R | 10 +- R/zcurve_EM.R | 10 + R/zcurve_clustered.R | 353 ++++++++++++++++++ man/zcurve-package.Rd | 2 +- man/zcurve_clustered.Rd | 40 ++ man/zcurve_data.Rd | 4 +- src/RcppExports.cpp | 59 ++- src/zcurveEM.cpp | 178 ++++++++- .../_snaps/zcurve/z-curve-cens-em.svg | 6 +- .../_snaps/zcurve/z-curve-clustered-mixed.svg | 186 +++++++++ .../zcurve/z-curve-clustered-precise.svg | 186 +++++++++ .../zcurve/z-curve-clustered-rounded.svg | 186 +++++++++ .../_snaps/zcurve/z-curve-mixed-em.svg | 4 +- tests/testthat/test-zcurve.R | 160 +++++++- 19 files changed, 1420 insertions(+), 51 deletions(-) create mode 100644 R/zcurve_clustered.R create mode 100644 man/zcurve_clustered.Rd create mode 100644 tests/testthat/_snaps/zcurve/z-curve-clustered-mixed.svg create mode 100644 tests/testthat/_snaps/zcurve/z-curve-clustered-precise.svg create mode 100644 tests/testthat/_snaps/zcurve/z-curve-clustered-rounded.svg diff --git a/DESCRIPTION b/DESCRIPTION index 0ae5473..c1cfd1f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: zcurve Title: An Implementation of Z-Curves -Version: 2.1.2 +Version: 2.2.0 Authors@R: c( person("František", "Bartoš", email = "f.bartos96@gmail.com", role = c("aut", "cre")), person("Ulrich", "Schimmack", email = "ulrich.schimmack@utoronto.ca", role = c("aut"))) @@ -17,7 +17,7 @@ License: GPL-3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.1 Imports: Rcpp (>= 1.0.2), nleqslv, diff --git a/NAMESPACE b/NAMESPACE index a80a25a..b89e2f2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ export(significant_n) export(summary.zcurve) export(z_to_power) export(zcurve) +export(zcurve_clustered) export(zcurve_data) importFrom(Rcpp,sourceCpp) importFrom(Rdpack,reprompt) diff --git a/NEWS.md b/NEWS.md index 5778143..f7e1f5f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +## version 2.2.0 +- Implementation of z-curve version that deals with clustered estimates. +- Fixing bootstrap of censored data -- now both the censored and uncensored data are bootstrapped. + ## version 2.1.2 - Fixing number of statistically significant studies displayed in the annotation of z-curve plot. diff --git a/R/RcppExports.R b/R/RcppExports.R index 5be6984..987372c 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -37,6 +37,10 @@ .Call(`_zcurve_zcurve_EMc_fit_fast_RCpp`, x, lb, ub, mu, sigma, theta, a, b, sig_level, max_iter, criterion) } +.zcurve_EMc_fit_fast_w_RCpp <- function(x, x_w, lb, ub, b_w, mu, sigma, theta, a, b, sig_level, max_iter, criterion) { + .Call(`_zcurve_zcurve_EMc_fit_fast_w_RCpp`, x, x_w, lb, ub, b_w, mu, sigma, theta, a, b, sig_level, max_iter, criterion) +} + .zcurve_EM_start_RCpp <- function(x, type, K, mu, sigma, mu_alpha, mu_max, theta_alpha, a, b, sig_level, fit_reps, max_iter, criterion) { .Call(`_zcurve_zcurve_EM_start_RCpp`, x, type, K, mu, sigma, mu_alpha, mu_max, theta_alpha, a, b, sig_level, fit_reps, max_iter, criterion) } @@ -57,7 +61,11 @@ .Call(`_zcurve_zcurve_EMc_start_fast_RCpp`, x, lb, ub, K, mu, sigma, mu_alpha, mu_max, theta_alpha, a, b, sig_level, fit_reps, max_iter, criterion) } -.zcurve_EMc_boot_fast_RCpp <- function(x, lb, ub, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion) { - .Call(`_zcurve_zcurve_EMc_boot_fast_RCpp`, x, lb, ub, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion) +.zcurve_EMc_boot_fast_RCpp <- function(x, lb, ub, indx, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion) { + .Call(`_zcurve_zcurve_EMc_boot_fast_RCpp`, x, lb, ub, indx, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion) +} + +.zcurve_EMc_boot_fast_w_RCpp <- function(x, x_w, lb, ub, b_w, indx, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion) { + .Call(`_zcurve_zcurve_EMc_boot_fast_w_RCpp`, x, x_w, lb, ub, b_w, indx, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion) } diff --git a/R/data-preparation.R b/R/data-preparation.R index 246a766..66336ff 100644 --- a/R/data-preparation.R +++ b/R/data-preparation.R @@ -8,6 +8,7 @@ #' (i.e., rounded to too few decimals). See details for more information. #' #' @param data a vector strings containing the test statistics. +#' @param id a vector identifying observations from the same cluster. #' @param rounded an optional argument specifying whether de-rounding should be applied. #' Defaults to \code{FALSE} to treat all input as exact values or a numeric #' vector with values specifying precision of the input. The other option, @@ -50,7 +51,19 @@ #' # inspect the resulting object #' data #' @seealso [zcurve()], [print.zcurve_data()], [head.zcurve_data()] -zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){ +zcurve_data <- function(data, id = NULL, rounded = TRUE, stat_precise = 2, p_precise = 3){ + + if(!is.character(data)){ + stop("'data' must be a character vector") + } + + if(is.null(id)){ + id <- 1:length(data) + }else if(is.vector(id) && length(data) == length(id)){ + id <- as.numeric(as.factor(as.character(id))) + }else{ + stop("'id' must be a vector of the same length as the data") + } data <- tolower(data) data <- gsub(" ", "", data) @@ -89,10 +102,10 @@ zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){ # set rounding (0 = un-rounded due to automatic conversion) if(length(rounded) == 1 && !rounded){ # deal with the values as precise values - rounded <- rep(0, length(data)) + rounded <- rep(-1, length(data)) }else if(length(rounded) == 1 && rounded){ # specify automatic rounding - rounded <- rep(FALSE, length(data)) + rounded <- rep(-1, length(data)) rounded[stat_type == "p" & digits < p_precise] <- digits[stat_type == "p" & digits < p_precise] rounded[stat_type != "p" & digits < stat_precise] <- digits[stat_type != "p" & digits < stat_precise] }else{ @@ -112,7 +125,7 @@ zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){ # compute and allocate the p-values accordingly for(i in seq_along(data)){ - if(rounded[i] == 0 && !censored[i]){ + if(rounded[i] == -1 && !censored[i]){ # precise non-censored values p_vals[i] <- tryCatch( switch( @@ -125,7 +138,7 @@ zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){ ), warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'.")) ) - }else if(rounded[i] == 0 && censored[i]){ + }else if(rounded[i] == -1 && censored[i]){ # precise censored values p_vals.ub[i] <- tryCatch( switch( @@ -139,15 +152,21 @@ zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){ warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'.")) ) p_vals.lb[i] <- 0 - }else if(rounded[i] != 0 && !censored[i]){ + }else if(rounded[i] != -1 && !censored[i]){ # rounded non-censored values + + temp_stat_val.lb <- abs(stat_val[i]) - 0.5 * 10^-digits[i] + temp_stat_val.ub <- abs(stat_val[i]) + 0.5 * 10^-digits[i] + + temp_stat_val.lb <- ifelse(temp_stat_val.lb < 0, 0, temp_stat_val.lb) + p_vals.ub[i] <- tryCatch( switch( stat_type[i], - "f" = stats::pf(stat_val[i] - 0.5 * 10^-digits[i] , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE), - "c" = stats::pchisq(stat_val[i] - 0.5 * 10^-digits[i], df = stat_df1[i], lower.tail = FALSE), - "t" = stats::pt(abs(stat_val[i]) - 0.5 * 10^-digits[i], df = stat_df1[i], lower.tail = FALSE) * 2, - "z" = stats::pnorm(abs(stat_val[i]) - 0.5 * 10^-digits[i], lower.tail = FALSE) * 2, + "f" = stats::pf(temp_stat_val.lb , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE), + "c" = stats::pchisq(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE), + "t" = stats::pt(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE) * 2, + "z" = stats::pnorm(temp_stat_val.lb, lower.tail = FALSE) * 2, "p" = stat_val[i] + 0.5 * 10^-digits[i] ), warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'.")) @@ -155,23 +174,26 @@ zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){ p_vals.lb[i] <- tryCatch( switch( stat_type[i], - "f" = stats::pf(stat_val[i] + 0.5 * 10^-digits[i] , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE), - "c" = stats::pchisq(stat_val[i] + 0.5 * 10^-digits[i], df = stat_df1[i], lower.tail = FALSE), - "t" = stats::pt(abs(stat_val[i]) + 0.5 * 10^-digits[i], df = stat_df1[i], lower.tail = FALSE) * 2, - "z" = stats::pnorm(abs(stat_val[i]) + 0.5 * 10^-digits[i], lower.tail = FALSE) * 2, + "f" = stats::pf(temp_stat_val.ub, df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE), + "c" = stats::pchisq(temp_stat_val.ub, df = stat_df1[i], lower.tail = FALSE), + "t" = stats::pt(temp_stat_val.ub, df = stat_df1[i], lower.tail = FALSE) * 2, + "z" = stats::pnorm(temp_stat_val.ub, lower.tail = FALSE) * 2, "p" = stat_val[i] - 0.5 * 10^-digits[i] ), warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'.")) ) - }else if(rounded[i] != 0 && !censored[i]){ + }else if(rounded[i] != -1 && censored[i]){ # rounded censored values + + temp_stat_val.ub <- abs(stat_val[i]) + 0.5 * 10^-digits[i] + p_vals.ub[i] <- tryCatch( switch( stat_type[i], - "f" = stats::pf(stat_val[i] - 0.5 * 10^-digits[i] , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE), - "c" = stats::pchisq(stat_val[i] - 0.5 * 10^-digits[i], df = stat_df1[i], lower.tail = FALSE), - "t" = stats::pt(abs(stat_val[i]) - 0.5 * 10^-digits[i], df = stat_df1[i], lower.tail = FALSE) * 2, - "z" = stats::pnorm(abs(stat_val[i]) - 0.5 * 10^-digits[i], lower.tail = FALSE) * 2, + "f" = stats::pf(temp_stat_val.lb , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE), + "c" = stats::pchisq(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE), + "t" = stats::pt(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE) * 2, + "z" = stats::pnorm(temp_stat_val.lb, lower.tail = FALSE) * 2, "p" = stat_val[i] + 0.5 * 10^-digits[i] ), warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'.")) @@ -183,12 +205,14 @@ zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){ output <- list( precise = data.frame( "input" = data[!is.na(p_vals)], - "p" = p_vals[!is.na(p_vals)] + "p" = p_vals[!is.na(p_vals)], + "id" = id[!is.na(p_vals)] ), censored = data.frame( "input" = data[!is.na(p_vals.lb)], "p.lb" = p_vals.lb[!is.na(p_vals.lb)], - "p.ub" = p_vals.ub[!is.na(p_vals.ub)] + "p.ub" = p_vals.ub[!is.na(p_vals.ub)], + "id" = id[!is.na(p_vals.lb)] ) ) class(output) <- "zcurve_data" diff --git a/R/main.R b/R/main.R index 324a27b..9c37a66 100644 --- a/R/main.R +++ b/R/main.R @@ -104,7 +104,7 @@ zcurve <- function(z, z.lb, z.ub, p, p.lb, p.ub, data, method = "EM", boot stop("Wrong p-values input: Data are not a vector") input_type <- c(input_type, "p") } - }else if(class(data) == "zcurve_data"){ + }else if(inherits(data, "zcurve_data")){ input_type <- "zcurve-data" }else{ stop("The 'data' input must be created by the `zcurve_data()` function. See `?zcurve_data()` for more information.") @@ -175,13 +175,13 @@ zcurve <- function(z, z.lb, z.ub, p, p.lb, p.ub, data, method = "EM", boot }else{ - if(length(data$precise) != 0){ + if(nrow(data$precise) != 0){ object$data <- .p_to_z(data$precise$p) }else{ object$data <- numeric() } - if(length(data$censored) != 0){ + if(nrow(data$censored) != 0){ lb <- .p_to_z(data$censored$p.ub) ub <- .p_to_z(data$censored$p.lb) @@ -316,7 +316,7 @@ print.zcurve <- function(x, ...){ #' @seealso [zcurve()] summary.zcurve <- function(object, type = "results", all = FALSE, ERR.adj = .03, EDR.adj = .05, round.coef = 3, ...){ - if(object$method == "EM"){ + if(substr(object$method, 1, 2) == "EM"){ if(!is.null(object$boot)){ fit_index <- c(object$fit$Q, unname(stats::quantile(object$boot$Q, c(.025, .975)))) }else{ @@ -609,7 +609,7 @@ plot.zcurve <- function(x, annotation = FALSE, CI = FALSE, extrapolate h1 <- graphics::hist(x$data[x$data > x$control$a & x$data < x$control$b], breaks = br1, plot = F) # add censored observations to the histogram - if(length(x$data_censoring) != 0){ + if(nrow(x$data_censoring) != 0){ # spread the censored observation across the z-values cen_counts <- do.call(rbind, lapply(1:nrow(x$data_censoring), function(i){ temp_counts <- (x$data_censoring$lb[i] < h1$breaks[-length(h1$breaks)] & x$data_censoring$ub[i] > h1$breaks[-length(h1$breaks)]) diff --git a/R/zcurve_EM.R b/R/zcurve_EM.R index dfcf399..f0bd382 100644 --- a/R/zcurve_EM.R +++ b/R/zcurve_EM.R @@ -71,6 +71,8 @@ max_iter = control$max_iter, criterion = control$criterion) }else if(control$type == 3){ + + fit <- .zcurve_EMc_fit_fast_RCpp(x = z, lb = lb, ub = ub, @@ -123,9 +125,17 @@ criterion = control$criterion_boot, max_iter = control$max_iter_boot) }else if(control$type == 3){ + + indx <- c( + if(length(z) > 0) 1:length(z), + if(length(lb) > 0) (-length(lb)):-1 + ) + + fit_boot <- .zcurve_EMc_boot_fast_RCpp(x = z, lb = lb, ub = ub, + indx = indx, mu = fit$mu, sigma = control$sigma, theta = fit$weights, diff --git a/R/zcurve_clustered.R b/R/zcurve_clustered.R new file mode 100644 index 0000000..d0caff6 --- /dev/null +++ b/R/zcurve_clustered.R @@ -0,0 +1,353 @@ +#' @title Fit a z-curve to clustered data +#' +#' @description \code{zcurve_clustered} is used to fit z-curve models to +#' clustered data. The function requires a data object created with the +#' [zcurve_data()] function as the input (where id denotes clusters). +#' Two different methods that account for clustering ar implemented via +#' the EM model: \code{"w"} for down weighting the likelihood of the test +#' statistics proportionately to the number of repetitions in the clusters, +#' and \code{"b"} for a nested bootstrap where only a single study from each +#' bootstrap is selected for model fitting. +#' @param data an object created with [zcurve_data()] function. +#' @param method the method to be used for fitting. Possible options are +#' down weighting \code{"w"} and nested bootstrap \code{"b"}. +#' Defaults to \code{"w"}. +#' @param bootstrap the number of bootstraps for estimating CI. To skip +#' bootstrap specify \code{FALSE}. +#' @param control additional options for the fitting algorithm more details in +#' \link[=control_EM]{control EM}. +#' +#' +#' @references +#' \insertAllCited{} +#' +#' @return The fitted z-curve object +#' +#' @seealso [zcurve()], [summary.zcurve()], [plot.zcurve()], [control_EM], [control_density] +#' @export +zcurve_clustered <- function(data, method = "b", bootstrap = 1000, control = NULL){ + + warning("Please note that the clustering adjustment is an experimental feature.", immediate. = TRUE) + + if(!method %in% c("w", "b")) + stop("Wrong method, select a supported option.") + if(method == "b" && is.logical(bootstrap) && !bootstrap) + stop("The boostrap method requires bootstrap.") + + # set bootstrap + if(!is.numeric(bootstrap)){ + bootstrap <- FALSE + }else if(bootstrap <= 0){ + bootstrap <- FALSE + } + + if(!inherits(data, "zcurve_data")){ + stop("The 'data' input must be created by the `zcurve_data()` function. See `?zcurve_data()` for more information.") + } + + # create results object + object <- NULL + object$call <- match.call() + object$method <- switch(method, "w" = "EM (weighted)", "b" = "EM (bootstrapped)") + object$input_type <- "zcurve-data" + + # create control + control <- .zcurve_EM.control(control) + + ### prepare data + if(nrow(data$precise) != 0){ + z <- .p_to_z(data$precise$p) + z_id <- data$precise$id + }else{ + z <- numeric() + z_id <- numeric() + } + + if(nrow(data$censored) != 0){ + + lb <- .p_to_z(data$censored$p.ub) + ub <- .p_to_z(data$censored$p.lb) + b_id <- data$censored$id + + # remove non-significant censored p-values + if(any(lb < control$a)){ + warning(paste0(sum(lb < control$a), " censored p-values removed due to the upper bound being larger that the fitting range."), immediate. = TRUE, call. = FALSE) + b_id <- b_id[lb >= control$a] + ub <- ub[lb >= control$a] + lb <- lb[lb >= control$a] + } + + # move too significant censored p-values among precise p-values + if(length(lb) > 0 && any(lb >= control$b)){ + object$data <- c(object$data, lb[lb >= control$b]) + b_id <- b_id[lb < control$b] + ub <- ub[lb < control$b] + lb <- lb[lb < control$b] + } + + if(length(lb) > 0){ + # restrict the upper censoring to the fitting range + ub <- ifelse(ub > control$b, control$b, ub) + + # update control + control$type <- 3 + }else{ + lb <- numeric() + ub <- numeric() + b_id <- numeric() + } + }else{ + lb <- numeric() + ub <- numeric() + b_id <- numeric() + } + + object$data <- z + object$data_id <- z_id + object$data_censoring <- data.frame(lb = lb, ub = ub, id = b_id) + + object$control <- control + + # only run the algorithm with some significant results + if(sum(z > control$a & z < control$b) + length(lb) < 10) + stop("There must be at least 10 z-scores in the fitting range but a much larger number is recommended.") + + + # use appropriate algorithm + if(method == "b"){ + fit_b <- .zcurve_EM_b(z = z, z_id = z_id, lb = lb, ub = ub, b_id = b_id, control = control, bootstrap = bootstrap) + fit <- fit_b$fit + }else if(method == "w"){ + fit <- .zcurve_EM_w(z = z, z_id = z_id, lb = lb, ub = ub, b_id = b_id, control = control) + } + object$fit <- fit + + # check convergence + object$converged <- ifelse(fit$iter < control$max_iter, TRUE, FALSE) + + + # do bootstrap + if(bootstrap != FALSE){ + if(method == "b"){ + fit_boot <- fit_b$fit_boot + }else if(method == "w"){ + fit_boot <- .zcurve_EM_w_boot(z = z, z_id = z_id, lb = lb, ub = ub, b_id = b_id, control = control, fit = fit, bootstrap = bootstrap) + } + object$boot <- fit_boot + } + + + # estimates + object$coefficients <- .get_estimates(mu = fit$mu, weights = fit$weights, prop_high = fit$prop_high, sig_level = control$sig_level, a = control$a) + + + # boot estimates + if(bootstrap != FALSE){ + object$coefficients_boot <- data.frame(t(sapply(1:bootstrap, function(i){ + .get_estimates(mu = fit_boot$mu[i,], weights = fit_boot$weights[i,], prop_high = fit_boot$prop_high[i], sig_level = control$sig_level, a = control$a) + }))) + } + + + class(object) <- c("zcurve", "zcurve.clustered") + return(object) +} + +.zcurve_EM_b <- function(z, z_id, lb, ub, b_id, control, bootstrap){ + + # get starting value z-curves + fit_start <- .zcurve_EMc_start_fast_RCpp(x = z, + lb = lb, + ub = ub, + K = control$K, + mu = control$mu, + sigma = control$sigma, + mu_alpha = control$mu_alpha, + mu_max = control$mu_max, + theta_alpha = control$theta_alpha, + a = control$a, + b = control$b, + sig_level = control$sig_level, + fit_reps = control$fit_reps, + max_iter = control$max_iter_start, + criterion = control$criterion_start) + + # fit final z-curve + data_index <- rbind( + data.frame( + z = z, + lb = rep(NA, length(z)), + ub = rep(NA, length(z)), + id = z_id, + type = rep(1, length(z)) + ), + data.frame( + z = rep(NA, length(lb)), + lb = lb, + ub = ub, + id = b_id, + type = rep(2, length(lb)) + ) + ) + + + fit <- list() + for(i in 1:bootstrap){ + + boot_data <- .boot_id(data_index) + + fit[[i]] <- .zcurve_EMc_fit_fast_RCpp(x = boot_data$z[boot_data$type == 1], + lb = boot_data$lb[boot_data$type == 2], + ub = boot_data$ub[boot_data$type == 2], + mu = fit_start$mu[which.max(fit_start$Q),], + sigma = control$sigma, + theta = fit_start$weights[which.max(fit_start$Q),], + a = control$a, + b = control$b, + sig_level = control$sig_level, + max_iter = control$max_iter, + criterion = control$criterion) + + } + + fit_boot = list( + "mu" = do.call(rbind, lapply(fit, function(f) f[["mu"]])), + "weights" = do.call(rbind, lapply(fit, function(f) f[["weights"]])), + "prop_high" = do.call(c, lapply(fit, function(f) f[["prop_high"]])), + "Q" = do.call(c, lapply(fit, function(f) f[["Q"]])), + "iter" = do.call(c, lapply(fit, function(f) f[["iter"]])) + ) + + return( + list( + fit = list( + "mu" = apply(fit_boot[["mu"]], 2, mean), + "weights" = apply(fit_boot[["weights"]], 2, mean), + "prop_high" = mean(fit_boot[["prop_high"]]), + "Q" = mean(fit_boot[["Q"]]), + "iter" = mean(fit_boot[["iter"]]), + "iter_start" = fit_start$iter[which.max(fit_start$Q)] + ), + fit_boot = fit_boot + ) + ) +} +.zcurve_EM_w <- function(z, z_id, lb, ub, b_id, control){ + + # get starting value z-curves + fit_start <- .zcurve_EMc_start_fast_RCpp(x = z, + lb = lb, + ub = ub, + K = control$K, + mu = control$mu, + sigma = control$sigma, + mu_alpha = control$mu_alpha, + mu_max = control$mu_max, + theta_alpha = control$theta_alpha, + a = control$a, + b = control$b, + sig_level = control$sig_level, + fit_reps = control$fit_reps, + max_iter = control$max_iter_start, + criterion = control$criterion_start) + + # compute weights + id_freq <- table(c(z_id, b_id)) + id_weights <- data.frame( + id = names(id_freq), + w = 1/as.vector(id_freq) + ) + + # fit final z-curve + fit <- .zcurve_EMc_fit_fast_w_RCpp(x = z, + x_w = id_weights$w[match(z_id, id_weights$id)], + lb = lb, + ub = ub, + b_w = id_weights$w[match(b_id, id_weights$id)], + mu = fit_start$mu[which.max(fit_start$Q),], + sigma = control$sigma, + theta = fit_start$weights[which.max(fit_start$Q),], + a = control$a, + b = control$b, + sig_level = control$sig_level, + max_iter = control$max_iter, + criterion = control$criterion) + + return( + list( + "mu" = fit$mu, + "weights" = fit$weights, + "prop_high" = fit$prop_high, + "Q" = fit$Q, + "iter" = fit$iter, + "iter_start" = fit_start$iter[which.max(fit_start$Q)] + ) + ) +} +.zcurve_EM_w_boot <- function(z, z_id, lb, ub, b_id, control, fit, bootstrap){ + + # compute weights + id_freq <- table(c(z_id, b_id)) + id_weights <- data.frame( + id = names(id_freq), + w = 1/as.vector(id_freq) + ) + + x_w <- id_weights$w[match(z_id, id_weights$id)] + b_w <- id_weights$w[match(b_id, id_weights$id)] + + indx <- c( + if(length(z) > 0) 1:length(z), + if(length(lb) > 0) (-length(lb)):-1 + ) + + fit_boot <- .zcurve_EMc_boot_fast_w_RCpp(x = z, + x_w = x_w, + lb = lb, + ub = ub, + b_w = b_w, + indx = indx, + mu = fit$mu, + sigma = control$sigma, + theta = fit$weights, + a = control$a, + b = control$b, + sig_level = control$sig_level, + bootstrap = bootstrap, + criterion = control$criterion_boot, + max_iter = control$max_iter_boot) + return( + list( + "mu" = fit_boot$mu, + "weights" = fit_boot$weights, + "Q" = fit_boot$Q, + "prop_high" = fit_boot$prop_high, + "iter" = fit_boot$iter + ) + ) + +} + +.boot_id <- function(data){ + + unique_id <- unique(data$id) + + if(length(unique_id) == nrow(data)){ + return(data) + } + + boot_out <- list() + for(id in unique_id){ + + temp_data <- data[data$id == id,] + + if(nrow(temp_data) == 1){ + boot_out[[id]] <- temp_data + }else{ + boot_out[[id]] <- temp_data[sample(nrow(temp_data), 1),] + } + } + boot_out <- do.call(rbind, boot_out) + + return(boot_out) +} diff --git a/man/zcurve-package.Rd b/man/zcurve-package.Rd index 3330c91..83b6d58 100644 --- a/man/zcurve-package.Rd +++ b/man/zcurve-package.Rd @@ -6,7 +6,7 @@ \alias{_PACKAGE} \title{zcurve: An Implementation of Z-Curves} \description{ -An implementation of z-curves - a method for estimating expected discovery and replicability rates on the bases of test-statistics of published studies. The package provides functions for fitting the new density and EM version (Bartoš & Schimmack, 2020, ), censored observations, as well as the original density z-curve (Brunner & Schimmack, 2020, ). Furthermore, the package provides summarizing and plotting functions for the fitted z-curve objects. See the aforementioned articles for more information about the z-curves, expected discovery and replicability rates, validation studies, and limitations. +An implementation of z-curves - a method for estimating expected discovery and replicability rates on the bases of test-statistics of published studies. The package provides functions for fitting the new density and EM version (Bartoš & Schimmack, 2020, \doi{10.31234/osf.io/urgtn}), censored observations, as well as the original density z-curve (Brunner & Schimmack, 2020, \doi{10.15626/MP.2018.874}). Furthermore, the package provides summarizing and plotting functions for the fitted z-curve objects. See the aforementioned articles for more information about the z-curves, expected discovery and replicability rates, validation studies, and limitations. } \seealso{ Useful links: diff --git a/man/zcurve_clustered.Rd b/man/zcurve_clustered.Rd new file mode 100644 index 0000000..941a271 --- /dev/null +++ b/man/zcurve_clustered.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/zcurve_clustered.R +\name{zcurve_clustered} +\alias{zcurve_clustered} +\title{Fit a z-curve to clustered data} +\usage{ +zcurve_clustered(data, method = "b", bootstrap = 1000, control = NULL) +} +\arguments{ +\item{data}{an object created with \code{\link[=zcurve_data]{zcurve_data()}} function.} + +\item{method}{the method to be used for fitting. Possible options are +down weighting \code{"w"} and nested bootstrap \code{"b"}. +Defaults to \code{"w"}.} + +\item{bootstrap}{the number of bootstraps for estimating CI. To skip +bootstrap specify \code{FALSE}.} + +\item{control}{additional options for the fitting algorithm more details in +\link[=control_EM]{control EM}.} +} +\value{ +The fitted z-curve object +} +\description{ +\code{zcurve_clustered} is used to fit z-curve models to +clustered data. The function requires a data object created with the +\code{\link[=zcurve_data]{zcurve_data()}} function as the input (where id denotes clusters). +Two different methods that account for clustering ar implemented via +the EM model: \code{"w"} for down weighting the likelihood of the test +statistics proportionately to the number of repetitions in the clusters, +and \code{"b"} for a nested bootstrap where only a single study from each +bootstrap is selected for model fitting. +} +\references{ +\insertAllCited{} +} +\seealso{ +\code{\link[=zcurve]{zcurve()}}, \code{\link[=summary.zcurve]{summary.zcurve()}}, \code{\link[=plot.zcurve]{plot.zcurve()}}, \link{control_EM}, \link{control_density} +} diff --git a/man/zcurve_data.Rd b/man/zcurve_data.Rd index 2fb2ffb..5f6ca91 100644 --- a/man/zcurve_data.Rd +++ b/man/zcurve_data.Rd @@ -4,11 +4,13 @@ \alias{zcurve_data} \title{Prepare data for z-curve} \usage{ -zcurve_data(data, rounded = TRUE, stat_precise = 2, p_precise = 3) +zcurve_data(data, id = NULL, rounded = TRUE, stat_precise = 2, p_precise = 3) } \arguments{ \item{data}{a vector strings containing the test statistics.} +\item{id}{a vector identifying observations from the same cluster.} + \item{rounded}{an optional argument specifying whether de-rounding should be applied. Defaults to \code{FALSE} to treat all input as exact values or a numeric vector with values specifying precision of the input. The other option, diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index a938be0..f4f8e62 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -157,6 +157,29 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// zcurve_EMc_fit_fast_w_RCpp +List zcurve_EMc_fit_fast_w_RCpp(NumericVector x, NumericVector x_w, NumericVector lb, NumericVector ub, NumericVector b_w, NumericVector mu, NumericVector sigma, NumericVector theta, double a, double b, double sig_level, int max_iter, double criterion); +RcppExport SEXP _zcurve_zcurve_EMc_fit_fast_w_RCpp(SEXP xSEXP, SEXP x_wSEXP, SEXP lbSEXP, SEXP ubSEXP, SEXP b_wSEXP, SEXP muSEXP, SEXP sigmaSEXP, SEXP thetaSEXP, SEXP aSEXP, SEXP bSEXP, SEXP sig_levelSEXP, SEXP max_iterSEXP, SEXP criterionSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP); + Rcpp::traits::input_parameter< NumericVector >::type x_w(x_wSEXP); + Rcpp::traits::input_parameter< NumericVector >::type lb(lbSEXP); + Rcpp::traits::input_parameter< NumericVector >::type ub(ubSEXP); + Rcpp::traits::input_parameter< NumericVector >::type b_w(b_wSEXP); + Rcpp::traits::input_parameter< NumericVector >::type mu(muSEXP); + Rcpp::traits::input_parameter< NumericVector >::type sigma(sigmaSEXP); + Rcpp::traits::input_parameter< NumericVector >::type theta(thetaSEXP); + Rcpp::traits::input_parameter< double >::type a(aSEXP); + Rcpp::traits::input_parameter< double >::type b(bSEXP); + Rcpp::traits::input_parameter< double >::type sig_level(sig_levelSEXP); + Rcpp::traits::input_parameter< int >::type max_iter(max_iterSEXP); + Rcpp::traits::input_parameter< double >::type criterion(criterionSEXP); + rcpp_result_gen = Rcpp::wrap(zcurve_EMc_fit_fast_w_RCpp(x, x_w, lb, ub, b_w, mu, sigma, theta, a, b, sig_level, max_iter, criterion)); + return rcpp_result_gen; +END_RCPP +} // zcurve_EM_start_RCpp List zcurve_EM_start_RCpp(NumericVector x, int type, int K, NumericVector mu, NumericVector sigma, NumericVector mu_alpha, double mu_max, NumericVector theta_alpha, double a, double b, double sig_level, int fit_reps, int max_iter, double criterion); RcppExport SEXP _zcurve_zcurve_EM_start_RCpp(SEXP xSEXP, SEXP typeSEXP, SEXP KSEXP, SEXP muSEXP, SEXP sigmaSEXP, SEXP mu_alphaSEXP, SEXP mu_maxSEXP, SEXP theta_alphaSEXP, SEXP aSEXP, SEXP bSEXP, SEXP sig_levelSEXP, SEXP fit_repsSEXP, SEXP max_iterSEXP, SEXP criterionSEXP) { @@ -271,14 +294,40 @@ BEGIN_RCPP END_RCPP } // zcurve_EMc_boot_fast_RCpp -List zcurve_EMc_boot_fast_RCpp(NumericVector x, NumericVector lb, NumericVector ub, NumericVector mu, NumericVector sigma, NumericVector theta, double a, double b, double sig_level, int bootstrap, int max_iter, double criterion); -RcppExport SEXP _zcurve_zcurve_EMc_boot_fast_RCpp(SEXP xSEXP, SEXP lbSEXP, SEXP ubSEXP, SEXP muSEXP, SEXP sigmaSEXP, SEXP thetaSEXP, SEXP aSEXP, SEXP bSEXP, SEXP sig_levelSEXP, SEXP bootstrapSEXP, SEXP max_iterSEXP, SEXP criterionSEXP) { +List zcurve_EMc_boot_fast_RCpp(NumericVector x, NumericVector lb, NumericVector ub, IntegerVector indx, NumericVector mu, NumericVector sigma, NumericVector theta, double a, double b, double sig_level, int bootstrap, int max_iter, double criterion); +RcppExport SEXP _zcurve_zcurve_EMc_boot_fast_RCpp(SEXP xSEXP, SEXP lbSEXP, SEXP ubSEXP, SEXP indxSEXP, SEXP muSEXP, SEXP sigmaSEXP, SEXP thetaSEXP, SEXP aSEXP, SEXP bSEXP, SEXP sig_levelSEXP, SEXP bootstrapSEXP, SEXP max_iterSEXP, SEXP criterionSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP); + Rcpp::traits::input_parameter< NumericVector >::type lb(lbSEXP); + Rcpp::traits::input_parameter< NumericVector >::type ub(ubSEXP); + Rcpp::traits::input_parameter< IntegerVector >::type indx(indxSEXP); + Rcpp::traits::input_parameter< NumericVector >::type mu(muSEXP); + Rcpp::traits::input_parameter< NumericVector >::type sigma(sigmaSEXP); + Rcpp::traits::input_parameter< NumericVector >::type theta(thetaSEXP); + Rcpp::traits::input_parameter< double >::type a(aSEXP); + Rcpp::traits::input_parameter< double >::type b(bSEXP); + Rcpp::traits::input_parameter< double >::type sig_level(sig_levelSEXP); + Rcpp::traits::input_parameter< int >::type bootstrap(bootstrapSEXP); + Rcpp::traits::input_parameter< int >::type max_iter(max_iterSEXP); + Rcpp::traits::input_parameter< double >::type criterion(criterionSEXP); + rcpp_result_gen = Rcpp::wrap(zcurve_EMc_boot_fast_RCpp(x, lb, ub, indx, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion)); + return rcpp_result_gen; +END_RCPP +} +// zcurve_EMc_boot_fast_w_RCpp +List zcurve_EMc_boot_fast_w_RCpp(NumericVector x, NumericVector x_w, NumericVector lb, NumericVector ub, NumericVector b_w, IntegerVector indx, NumericVector mu, NumericVector sigma, NumericVector theta, double a, double b, double sig_level, int bootstrap, int max_iter, double criterion); +RcppExport SEXP _zcurve_zcurve_EMc_boot_fast_w_RCpp(SEXP xSEXP, SEXP x_wSEXP, SEXP lbSEXP, SEXP ubSEXP, SEXP b_wSEXP, SEXP indxSEXP, SEXP muSEXP, SEXP sigmaSEXP, SEXP thetaSEXP, SEXP aSEXP, SEXP bSEXP, SEXP sig_levelSEXP, SEXP bootstrapSEXP, SEXP max_iterSEXP, SEXP criterionSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP); + Rcpp::traits::input_parameter< NumericVector >::type x_w(x_wSEXP); Rcpp::traits::input_parameter< NumericVector >::type lb(lbSEXP); Rcpp::traits::input_parameter< NumericVector >::type ub(ubSEXP); + Rcpp::traits::input_parameter< NumericVector >::type b_w(b_wSEXP); + Rcpp::traits::input_parameter< IntegerVector >::type indx(indxSEXP); Rcpp::traits::input_parameter< NumericVector >::type mu(muSEXP); Rcpp::traits::input_parameter< NumericVector >::type sigma(sigmaSEXP); Rcpp::traits::input_parameter< NumericVector >::type theta(thetaSEXP); @@ -288,7 +337,7 @@ BEGIN_RCPP Rcpp::traits::input_parameter< int >::type bootstrap(bootstrapSEXP); Rcpp::traits::input_parameter< int >::type max_iter(max_iterSEXP); Rcpp::traits::input_parameter< double >::type criterion(criterionSEXP); - rcpp_result_gen = Rcpp::wrap(zcurve_EMc_boot_fast_RCpp(x, lb, ub, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion)); + rcpp_result_gen = Rcpp::wrap(zcurve_EMc_boot_fast_w_RCpp(x, x_w, lb, ub, b_w, indx, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion)); return rcpp_result_gen; END_RCPP } @@ -303,12 +352,14 @@ static const R_CallMethodDef CallEntries[] = { {"_zcurve_zcurve_EM_fit_RCpp", (DL_FUNC) &_zcurve_zcurve_EM_fit_RCpp, 10}, {"_zcurve_zcurve_EM_fit_fast_RCpp", (DL_FUNC) &_zcurve_zcurve_EM_fit_fast_RCpp, 9}, {"_zcurve_zcurve_EMc_fit_fast_RCpp", (DL_FUNC) &_zcurve_zcurve_EMc_fit_fast_RCpp, 11}, + {"_zcurve_zcurve_EMc_fit_fast_w_RCpp", (DL_FUNC) &_zcurve_zcurve_EMc_fit_fast_w_RCpp, 13}, {"_zcurve_zcurve_EM_start_RCpp", (DL_FUNC) &_zcurve_zcurve_EM_start_RCpp, 14}, {"_zcurve_zcurve_EM_boot_RCpp", (DL_FUNC) &_zcurve_zcurve_EM_boot_RCpp, 11}, {"_zcurve_zcurve_EM_start_fast_RCpp", (DL_FUNC) &_zcurve_zcurve_EM_start_fast_RCpp, 13}, {"_zcurve_zcurve_EM_boot_fast_RCpp", (DL_FUNC) &_zcurve_zcurve_EM_boot_fast_RCpp, 10}, {"_zcurve_zcurve_EMc_start_fast_RCpp", (DL_FUNC) &_zcurve_zcurve_EMc_start_fast_RCpp, 15}, - {"_zcurve_zcurve_EMc_boot_fast_RCpp", (DL_FUNC) &_zcurve_zcurve_EMc_boot_fast_RCpp, 12}, + {"_zcurve_zcurve_EMc_boot_fast_RCpp", (DL_FUNC) &_zcurve_zcurve_EMc_boot_fast_RCpp, 13}, + {"_zcurve_zcurve_EMc_boot_fast_w_RCpp", (DL_FUNC) &_zcurve_zcurve_EMc_boot_fast_w_RCpp, 15}, {NULL, NULL, 0} }; diff --git a/src/zcurveEM.cpp b/src/zcurveEM.cpp index e514c2a..d68837e 100644 --- a/src/zcurveEM.cpp +++ b/src/zcurveEM.cpp @@ -162,6 +162,25 @@ NumericMatrix compute_u_log_lik_c(NumericVector x, NumericVector lb, NumericVect return ll; } +NumericMatrix compute_u_log_lik_w_c(NumericVector x, NumericVector x_w, NumericVector lb, NumericVector ub, NumericVector b_w, NumericVector mu, NumericVector sigma, double a, double b){ + + NumericMatrix ll_o(mu.size(), x.size()); + NumericMatrix ll_c(mu.size(), lb.size()); + + for(int k = 0; k < mu.size(); k++){ + ll_o(k,_) = zdist_lpdf(x,mu[k],sigma[k],a,b) * x_w; + } + + for(int k = 0; k < mu.size(); k++){ + for(int i = 0; i < lb.size(); i++){ + ll_c(k,i) = zdist_cens_lpdf(lb[i],ub[i],mu[k],sigma[k],a,b) * b_w[i]; + } + } + + NumericMatrix ll = transpose(cbind(ll_o, ll_c)); + + return ll; +} NumericMatrix weight_u_log_lik(NumericMatrix ull, NumericVector theta){ NumericMatrix ll(ull.nrow(), ull.ncol()); @@ -239,6 +258,12 @@ NumericVector select_x(NumericVector x, double a, double b){ NumericVector x_new = x[x_true1 & x_true2]; return x_new; } +NumericVector select_x_w(NumericVector x, NumericVector x_w, double a, double b){ + LogicalVector x_true1 = x > a; + LogicalVector x_true2 = x < b; + NumericVector x_w_new = x_w[x_true1 & x_true2]; + return x_w_new; +} double get_prop_high(NumericVector x, double select_sig, double b){ double a = R::pnorm(select_sig/2, 0, 1, false, false); @@ -265,6 +290,19 @@ double get_prop_high_cens(NumericVector x, double select_sig, double b, int n_ce double prop_high = (1.0 * x_high.length()) / (1.0 * (x_sig.length() + n_censored)); return prop_high; } +double get_prop_high_cens_w(NumericVector x, NumericVector x_w, double select_sig, double b, int n_censored){ + + double a = R::pnorm(select_sig/2, 0, 1, false, false); + + LogicalVector x_sig_true = x > a; + NumericVector x_w_sig = x_w[x_sig_true]; + + LogicalVector x_high_true = x > b; + NumericVector x_w_high = x_w[x_high_true]; + + double prop_high = (1.0 * sum(x_w_high)) / (1.0 * (sum(x_w_sig) + n_censored)); + return prop_high; +} // [[Rcpp::export(.zcurve_EM_fit_RCpp)]] @@ -399,6 +437,51 @@ List zcurve_EMc_fit_fast_RCpp(NumericVector x, NumericVector lb, NumericVector u return ret; } +// [[Rcpp::export(.zcurve_EMc_fit_fast_w_RCpp)]] +List zcurve_EMc_fit_fast_w_RCpp(NumericVector x, NumericVector x_w, NumericVector lb, NumericVector ub, NumericVector b_w, + NumericVector mu, NumericVector sigma, NumericVector theta, double a, double b, double sig_level, + int max_iter, double criterion) { + + double prop_high = get_prop_high_cens_w(x, x_w, sig_level, b, sum(b_w)); + x_w = select_x_w(x,x_w,a,b); + x = select_x(x,a,b); + + NumericMatrix log_lik (x.size(), mu.size()); + NumericMatrix lik (x.size(), mu.size()); + NumericVector l_row_sum (mu.size()); + NumericMatrix p (x.size(), mu.size()); + NumericVector Q (max_iter+1); + + int i= 0; + Q[i] = 0; + + + NumericMatrix u_log_lik = compute_u_log_lik_w_c(x, x_w, lb, ub, b_w, mu, sigma, a, b); + do{ + // E-step + log_lik = weight_u_log_lik(u_log_lik, theta); + lik = exp_matrix(log_lik); + l_row_sum = compute_l_row_sum(lik); + p = compute_p(lik,l_row_sum); + + // M-step + theta = update_theta(p); + + Q[i+1] = sum(log(l_row_sum)); + ++i; + + } while ((fabs(Q[i]-Q[i-1]) >= criterion) && (i < max_iter)); + + List ret; + ret["iter"] = i; + ret["Q"] = Q[i]; + ret["mu"] = mu; + ret["weights"] = theta; + ret["sigma"] = sigma; + ret["prop_high"] = prop_high; + + return ret; +} // [[Rcpp::export(.zcurve_EM_start_RCpp)]] List zcurve_EM_start_RCpp(NumericVector x, int type, int K, NumericVector mu, NumericVector sigma,NumericVector mu_alpha, double mu_max, @@ -657,7 +740,7 @@ List zcurve_EMc_start_fast_RCpp(NumericVector x, NumericVector lb, NumericVecto } // [[Rcpp::export(.zcurve_EMc_boot_fast_RCpp)]] -List zcurve_EMc_boot_fast_RCpp(NumericVector x, NumericVector lb, NumericVector ub, +List zcurve_EMc_boot_fast_RCpp(NumericVector x, NumericVector lb, NumericVector ub, IntegerVector indx, NumericVector mu, NumericVector sigma, NumericVector theta, double a, double b, double sig_level, int bootstrap, int max_iter, double criterion){ @@ -667,7 +750,14 @@ List zcurve_EMc_boot_fast_RCpp(NumericVector x, NumericVector lb, NumericVector NumericVector Q_reps (bootstrap); NumericVector prop_high_reps (bootstrap); + IntegerVector temp_indx; + LogicalVector is_temp_indx_x; + LogicalVector is_temp_indx_b; + IntegerVector temp_indx_x; + IntegerVector temp_indx_b; NumericVector temp_x; + NumericVector temp_lb; + NumericVector temp_ub; NumericVector new_mu (mu.size()); NumericVector new_weights (mu.size()); @@ -676,9 +766,21 @@ List zcurve_EMc_boot_fast_RCpp(NumericVector x, NumericVector lb, NumericVector double new_prop_high; for(int i = 0; i < bootstrap; i++){ - temp_x = sample(x, x.size(), true); - List temp_fit = zcurve_EMc_fit_fast_RCpp(temp_x, lb, ub, mu, sigma, theta, a, b, sig_level, + temp_indx = sample(indx, x.size() + lb.size(), true); + + is_temp_indx_x = temp_indx > 0; + is_temp_indx_b = temp_indx < 0; + + temp_indx_x = temp_indx[is_temp_indx_x]; + temp_indx_b = temp_indx[is_temp_indx_b]; + + temp_x = x[temp_indx_x-1]; + + temp_lb = lb[temp_indx_b*(-1)-1]; + temp_ub = ub[temp_indx_b*(-1)-1]; + + List temp_fit = zcurve_EMc_fit_fast_RCpp(temp_x, temp_lb, temp_ub, mu, sigma, theta, a, b, sig_level, max_iter, criterion); new_mu = temp_fit["mu"]; @@ -703,6 +805,76 @@ List zcurve_EMc_boot_fast_RCpp(NumericVector x, NumericVector lb, NumericVector return ret; } +// [[Rcpp::export(.zcurve_EMc_boot_fast_w_RCpp)]] +List zcurve_EMc_boot_fast_w_RCpp(NumericVector x, NumericVector x_w, NumericVector lb, NumericVector ub, NumericVector b_w, IntegerVector indx, + NumericVector mu, NumericVector sigma, NumericVector theta, + double a, double b, double sig_level, + int bootstrap, int max_iter, double criterion){ + NumericMatrix mu_reps (bootstrap, mu.size()); + NumericMatrix weights_reps (bootstrap, mu.size()); + IntegerVector iter_reps (bootstrap); + NumericVector Q_reps (bootstrap); + NumericVector prop_high_reps (bootstrap); + + IntegerVector temp_indx; + LogicalVector is_temp_indx_x; + LogicalVector is_temp_indx_b; + IntegerVector temp_indx_x; + IntegerVector temp_indx_b; + NumericVector temp_x; + NumericVector temp_x_w; + NumericVector temp_lb; + NumericVector temp_ub; + NumericVector temp_b_w; + + NumericVector new_mu (mu.size()); + NumericVector new_weights (mu.size()); + int new_iter; + double new_Q; + double new_prop_high; + + for(int i = 0; i < bootstrap; i++){ + + temp_indx = sample(indx, x.size() + lb.size(), true); + + is_temp_indx_x = temp_indx > 0; + is_temp_indx_b = temp_indx < 0; + + temp_indx_x = temp_indx[is_temp_indx_x]; + temp_indx_b = temp_indx[is_temp_indx_b]; + + temp_x = x[temp_indx_x-1]; + temp_x_w = x_w[temp_indx_x-1]; + + temp_lb = lb[temp_indx_b*(-1)-1]; + temp_ub = ub[temp_indx_b*(-1)-1]; + temp_b_w = b_w[temp_indx_b*(-1)-1]; + + List temp_fit = zcurve_EMc_fit_fast_w_RCpp(temp_x, temp_x_w, temp_lb, temp_ub, temp_b_w, mu, sigma, theta, a, b, sig_level, + max_iter, criterion); + + new_mu = temp_fit["mu"]; + new_weights = temp_fit["weights"]; + new_iter = temp_fit["iter"]; + new_Q = temp_fit["Q"]; + new_prop_high = temp_fit["prop_high"]; + + mu_reps(i,_) = new_mu; + weights_reps(i,_) = new_weights; + iter_reps[i] = new_iter; + Q_reps[i] = new_Q; + prop_high_reps[i] = new_prop_high; + } + + List ret; + ret["iter"] = iter_reps; + ret["Q"] = Q_reps; + ret["mu"] = mu_reps; + ret["weights"] = weights_reps; + ret["prop_high"] = prop_high_reps; + + return ret; +} /* double get_loglik(NumericVector x, NumericVector mu, NumericVector sigma, NumericVector theta, double a, double b) { diff --git a/tests/testthat/_snaps/zcurve/z-curve-cens-em.svg b/tests/testthat/_snaps/zcurve/z-curve-cens-em.svg index 4c6c789..002a587 100644 --- a/tests/testthat/_snaps/zcurve/z-curve-cens-em.svg +++ b/tests/testthat/_snaps/zcurve/z-curve-cens-em.svg @@ -78,8 +78,8 @@ - - - + + + diff --git a/tests/testthat/_snaps/zcurve/z-curve-clustered-mixed.svg b/tests/testthat/_snaps/zcurve/z-curve-clustered-mixed.svg new file mode 100644 index 0000000..650efe1 --- /dev/null +++ b/tests/testthat/_snaps/zcurve/z-curve-clustered-mixed.svg @@ -0,0 +1,186 @@ + + + + + + + + + + + + + + + + + + + +z-curve (EM via EM (bootstrapped)) +z-scores +Density + + + + + + + + + + +0 +1 +2 +3 +4 +5 +6 + + + + + + + + +0.0 +0.1 +0.2 +0.3 +0.4 +0.5 +0.6 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +z-curve (EM via EM (weighted)) +z-scores +Density + + + + + + + + + + +0 +1 +2 +3 +4 +5 +6 + + + + + + + + +0.0 +0.1 +0.2 +0.3 +0.4 +0.5 +0.6 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/zcurve/z-curve-clustered-precise.svg b/tests/testthat/_snaps/zcurve/z-curve-clustered-precise.svg new file mode 100644 index 0000000..0de642a --- /dev/null +++ b/tests/testthat/_snaps/zcurve/z-curve-clustered-precise.svg @@ -0,0 +1,186 @@ + + + + + + + + + + + + + + + + + + + +z-curve (EM via EM (bootstrapped)) +z-scores +Density + + + + + + + + + + +0 +1 +2 +3 +4 +5 +6 + + + + + + + + +0.0 +0.1 +0.2 +0.3 +0.4 +0.5 +0.6 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +z-curve (EM via EM (weighted)) +z-scores +Density + + + + + + + + + + +0 +1 +2 +3 +4 +5 +6 + + + + + + + + +0.0 +0.1 +0.2 +0.3 +0.4 +0.5 +0.6 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/zcurve/z-curve-clustered-rounded.svg b/tests/testthat/_snaps/zcurve/z-curve-clustered-rounded.svg new file mode 100644 index 0000000..414642e --- /dev/null +++ b/tests/testthat/_snaps/zcurve/z-curve-clustered-rounded.svg @@ -0,0 +1,186 @@ + + + + + + + + + + + + + + + + + + + +z-curve (EM via EM (bootstrapped)) +z-scores +Density + + + + + + + + + + +0 +1 +2 +3 +4 +5 +6 + + + + + + + + +0.0 +0.1 +0.2 +0.3 +0.4 +0.5 +0.6 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +z-curve (EM via EM (weighted)) +z-scores +Density + + + + + + + + + + +0 +1 +2 +3 +4 +5 +6 + + + + + + + + +0.0 +0.1 +0.2 +0.3 +0.4 +0.5 +0.6 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/zcurve/z-curve-mixed-em.svg b/tests/testthat/_snaps/zcurve/z-curve-mixed-em.svg index 2ae0571..b0cc804 100644 --- a/tests/testthat/_snaps/zcurve/z-curve-mixed-em.svg +++ b/tests/testthat/_snaps/zcurve/z-curve-mixed-em.svg @@ -79,7 +79,7 @@ - - + + diff --git a/tests/testthat/test-zcurve.R b/tests/testthat/test-zcurve.R index d49632c..d65c644 100644 --- a/tests/testthat/test-zcurve.R +++ b/tests/testthat/test-zcurve.R @@ -121,12 +121,12 @@ test_that("z-curve EM censoring works", { "model: EM via EM" , "" , " Estimate l.CI u.CI" , - "ERR 0.961 0.915 1.000" , - "EDR 0.957 0.889 1.000" , + "ERR 0.961 0.910 1.000" , + "EDR 0.957 0.884 1.000" , "" , "Model converged in 70 + 223 iterations" , "Fitted using 165 z-values. 200 supplied, 200 significant (ODR = 1.00, 95% CI [0.98, 1.00]).", - "Q = -319.22, 95% CI[-332.77, -303.93]" + "Q = -319.22, 95% CI[-346.77, -288.55]" )) # plot @@ -145,14 +145,160 @@ test_that("z-curve EM censoring works", { "model: EM via EM" , "" , " Estimate l.CI u.CI" , - "ERR 0.968 0.938 0.998" , - "EDR 0.965 0.915 1.000" , + "ERR 0.968 0.908 1.000" , + "EDR 0.965 0.879 1.000" , "" , - "Model converged in 88 + 63 iterations" , + "Model converged in 85 + 89 iterations" , "Fitted using 82 -values. 100 supplied, 100 significant (ODR = 1.00, 95% CI [0.95, 1.00]).", - "Q = -205.22, 95% CI[-205.22, -205.22]" + "Q = -205.22, 95% CI[-228.49, -179.08]" )) # plot expect_doppelganger("z-curve cens EM", function()plot(fit.cens, CI = TRUE)) +}) +test_that("z-curve clustered works", { + + set.seed(1) + z <- abs(c(rnorm(300, 3, 1), rnorm(200, 1, 1))) + id <- sample(500, 500, TRUE) + + # rounded + data <- paste0("z = ",round(z, 2)) + data <- zcurve_data(data, id) + fitb <- suppressWarnings(zcurve_clustered(data = data, method = "b", bootstrap = 20)) + fitw <- suppressWarnings(zcurve_clustered(data = data, method = "w", bootstrap = 20)) + + expect_equal( + capture_output_lines(summary(fitb), print = TRUE, width = 150), + c("Call:" , + "zcurve_clustered(data = data, method = \"b\", bootstrap = 20)" , + "" , + "model: EM via EM (bootstrapped)" , + "" , + " Estimate l.CI u.CI" , + "ERR 0.760 0.695 0.829" , + "EDR 0.722 0.632 0.823" , + "" , + "Model converged in 57 + 450.85 iterations" , + "Fitted using 299 zcurve-data-values. 480 supplied, 299 significant (ODR = 0.62, 95% CI [0.58, 0.67]).", + "Q = -236.27, 95% CI[-251.36, -224.63]" + )) + + expect_equal( + capture_output_lines(summary(fitw), print = TRUE, width = 150), + c("Call:" , + "zcurve_clustered(data = data, method = \"w\", bootstrap = 20)" , + "" , + "model: EM via EM (weighted)" , + "" , + " Estimate l.CI u.CI" , + "ERR 0.806 0.714 0.877" , + "EDR 0.783 0.655 0.895" , + "" , + "Model converged in 73 + 178 iterations" , + "Fitted using 299 zcurve-data-values. 480 supplied, 299 significant (ODR = 0.62, 95% CI [0.58, 0.67]).", + "Q = -238.11, 95% CI[-269.95, -219.12]" + )) + + expect_doppelganger("z-curve clustered rounded", function(){ + oldpar <- graphics::par(no.readonly = TRUE) + on.exit(graphics::par(mfrow = oldpar[["mfrow"]])) + par(mfrow = c(1, 2)) + plot(fitb) + plot(fitw) + }) + + + # precise + data <- paste0("z = ",z) + data <- zcurve_data(data, id) + fitb <- suppressWarnings(zcurve_clustered(data = data, method = "b", bootstrap = 20)) + fitw <- suppressWarnings(zcurve_clustered(data = data, method = "w", bootstrap = 20)) + + expect_equal( + capture_output_lines(summary(fitb), print = TRUE, width = 150), + c("Call:" , + "zcurve_clustered(data = data, method = \"b\", bootstrap = 20)" , + "" , + "model: EM via EM (bootstrapped)" , + "" , + " Estimate l.CI u.CI" , + "ERR 0.767 0.691 0.831" , + "EDR 0.730 0.627 0.825" , + "" , + "Model converged in 66 + 629.95 iterations" , + "Fitted using 299 zcurve-data-values. 500 supplied, 299 significant (ODR = 0.60, 95% CI [0.55, 0.64]).", + "Q = -199.02, 95% CI[-209.72, -191.18]" + )) + + expect_equal( + capture_output_lines(summary(fitw), print = TRUE, width = 150), + c("Call:" , + "zcurve_clustered(data = data, method = \"w\", bootstrap = 20)" , + "" , + "model: EM via EM (weighted)" , + "" , + " Estimate l.CI u.CI" , + "ERR 0.809 0.678 0.876" , + "EDR 0.787 0.617 0.893" , + "" , + "Model converged in 73 + 177 iterations" , + "Fitted using 299 zcurve-data-values. 500 supplied, 299 significant (ODR = 0.60, 95% CI [0.55, 0.64]).", + "Q = -201.69, 95% CI[-219.18, -185.82]" + )) + + expect_doppelganger("z-curve clustered precise", function(){ + oldpar <- graphics::par(no.readonly = TRUE) + on.exit(graphics::par(mfrow = oldpar[["mfrow"]])) + par(mfrow = c(1, 2)) + plot(fitb) + plot(fitw) + }) + + + # mixed + data <- paste0("z = ",c(round(z[1:100], 1), round(z[101:200], 2), z[201:500])) + data <- zcurve_data(data, id) + fitb <- suppressWarnings(zcurve_clustered(data = data, method = "b", bootstrap = 20)) + fitw <- suppressWarnings(zcurve_clustered(data = data, method = "w", bootstrap = 20)) + + expect_equal( + capture_output_lines(summary(fitb), print = TRUE, width = 150), + c("Call:" , + "zcurve_clustered(data = data, method = \"b\", bootstrap = 20)" , + "" , + "model: EM via EM (bootstrapped)" , + "" , + " Estimate l.CI u.CI" , + "ERR 0.769 0.703 0.835" , + "EDR 0.733 0.639 0.831" , + "" , + "Model converged in 72 + 430.1 iterations" , + "Fitted using 299 zcurve-data-values. 489 supplied, 299 significant (ODR = 0.61, 95% CI [0.57, 0.65]).", + "Q = -319.14, 95% CI[-340.91, -306.78]" + )) + + expect_equal( + capture_output_lines(summary(fitw), print = TRUE, width = 150), + c("Call:" , + "zcurve_clustered(data = data, method = \"w\", bootstrap = 20)" , + "" , + "model: EM via EM (weighted)" , + "" , + " Estimate l.CI u.CI" , + "ERR 0.810 0.709 0.881" , + "EDR 0.788 0.651 0.901" , + "" , + "Model converged in 59 + 188 iterations" , + "Fitted using 299 zcurve-data-values. 489 supplied, 299 significant (ODR = 0.61, 95% CI [0.57, 0.65]).", + "Q = -321.39, 95% CI[-361.78, -298.20]" + )) + + expect_doppelganger("z-curve clustered mixed", function(){ + oldpar <- graphics::par(no.readonly = TRUE) + on.exit(graphics::par(mfrow = oldpar[["mfrow"]])) + par(mfrow = c(1, 2)) + plot(fitb) + plot(fitw) + }) }) \ No newline at end of file