From cce68c5eb1e6648d5b08caa9a1c563f01ddd3c40 Mon Sep 17 00:00:00 2001 From: FBartos Date: Thu, 23 May 2024 14:36:02 +0200 Subject: [PATCH] fix ODR --- DESCRIPTION | 2 +- NEWS.md | 3 +++ R/data-preparation.R | 41 +++++++++++++++++++++++++++++------- R/main.R | 31 +++++++++++++++++++++------ R/zcurve_clustered.R | 12 ++++++++++- man/zcurve-package.Rd | 1 + man/zcurve_data.Rd | 2 +- tests/testthat/test-zcurve.R | 8 +++---- 8 files changed, 78 insertions(+), 22 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0060e01..21800d2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: zcurve Title: An Implementation of Z-Curves -Version: 2.4.0 +Version: 2.4.1 Authors@R: c( person("František", "Bartoš", email = "f.bartos96@gmail.com", role = c("aut", "cre")), person("Ulrich", "Schimmack", email = "ulrich.schimmack@utoronto.ca", role = c("aut"))) diff --git a/NEWS.md b/NEWS.md index becec42..d86cc07 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +## version 2.4.1 +- Fixes ODR computation with censored data + ## version 2.4.0 - Implementation of ggploting function. diff --git a/R/data-preparation.R b/R/data-preparation.R index 66336ff..c38a50e 100644 --- a/R/data-preparation.R +++ b/R/data-preparation.R @@ -21,7 +21,7 @@ #' p-values treated as exact values. #' #' @details By default, the function extract the type of test statistic: -#' \itemize{ +#' \describe{ #' \item{\code{"F(df1, df2)=x"}}{F-statistic with df1 and df2 degrees of freedom,} #' \item{\code{"chi(df)=x"}}{Chi-square statistic with df degrees of freedom,} #' \item{\code{"t(df)=x"}}{for t-statistic with df degrees of freedom,} @@ -119,9 +119,10 @@ zcurve_data <- function(data, id = NULL, rounded = TRUE, stat_precise = 2, p_pre } # prepare empty containers - p_vals <- rep(NA, length(data)) - p_vals.lb <- rep(NA, length(data)) - p_vals.ub <- rep(NA, length(data)) + p_vals <- rep(NA, length(data)) + p_vals.rep <- rep(NA, length(data)) + p_vals.lb <- rep(NA, length(data)) + p_vals.ub <- rep(NA, length(data)) # compute and allocate the p-values accordingly for(i in seq_along(data)){ @@ -151,10 +152,11 @@ zcurve_data <- function(data, id = NULL, rounded = TRUE, stat_precise = 2, p_pre ), warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'.")) ) - p_vals.lb[i] <- 0 + p_vals.lb[i] <- 0 + p_vals.rep[i] <- p_vals.ub[i] }else if(rounded[i] != -1 && !censored[i]){ # rounded non-censored values - + temp_stat_val <- abs(stat_val[i]) temp_stat_val.lb <- abs(stat_val[i]) - 0.5 * 10^-digits[i] temp_stat_val.ub <- abs(stat_val[i]) + 0.5 * 10^-digits[i] @@ -182,9 +184,20 @@ zcurve_data <- function(data, id = NULL, rounded = TRUE, stat_precise = 2, p_pre ), warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'.")) ) + p_vals.rep[i] <- tryCatch( + switch( + stat_type[i], + "f" = stats::pf(temp_stat_val, df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE), + "c" = stats::pchisq(temp_stat_val, df = stat_df1[i], lower.tail = FALSE), + "t" = stats::pt(abs(temp_stat_val), df = stat_df1[i], lower.tail = FALSE) * 2, + "z" = stats::pnorm(abs(temp_stat_val), lower.tail = FALSE) * 2, + "p" = temp_stat_val + ), + warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'.")) + ) }else if(rounded[i] != -1 && censored[i]){ # rounded censored values - + temp_stat_val <- abs(stat_val[i]) temp_stat_val.ub <- abs(stat_val[i]) + 0.5 * 10^-digits[i] p_vals.ub[i] <- tryCatch( @@ -198,7 +211,18 @@ zcurve_data <- function(data, id = NULL, rounded = TRUE, stat_precise = 2, p_pre ), warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'.")) ) - p_vals.lb[i] <- 0 + p_vals.lb[i] <- 0 + p_vals.rep[i] <- tryCatch( + switch( + stat_type[i], + "f" = stats::pf(temp_stat_val.lb , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE), + "c" = stats::pchisq(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE), + "t" = stats::pt(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE) * 2, + "z" = stats::pnorm(temp_stat_val.lb, lower.tail = FALSE) * 2, + "p" = temp_stat_val + ), + warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'.")) + ) } } @@ -212,6 +236,7 @@ zcurve_data <- function(data, id = NULL, rounded = TRUE, stat_precise = 2, p_pre "input" = data[!is.na(p_vals.lb)], "p.lb" = p_vals.lb[!is.na(p_vals.lb)], "p.ub" = p_vals.ub[!is.na(p_vals.ub)], + "p.rep" = p_vals.rep[!is.na(p_vals.rep)], "id" = id[!is.na(p_vals.lb)] ) ) diff --git a/R/main.R b/R/main.R index 2d499ef..8ec6603 100644 --- a/R/main.R +++ b/R/main.R @@ -128,6 +128,7 @@ zcurve <- function(z, z.lb, z.ub, p, p.lb, p.ub, data, method = "EM", boot ### prepare data if(missing(data)){ + # get point estimates on the same scale if(!missing(z)){ z <- abs(z) @@ -150,11 +151,18 @@ zcurve <- function(z, z.lb, z.ub, p, p.lb, p.ub, data, method = "EM", boot ub <- c(ub, .p_to_z(p.lb)) } + # get the total number of observations before removal for fitting purposes + N_obs <- length(z) + length(lb) + N_sig <- length(z[z >= control$a]) + length(lb[lb >= control$a]) + # restrict censoring to the fitting range & treat extremely censored values as extremely significant values if(!is.null(lb)){ - if(any(lb < control$a)) - stop("All censored observations must be higher than the fitting range.") + if(any(lb < control$a)){ + warning(paste0(sum(lb < control$a), " censored p-values removed due to the upper bound being larger that the fitting range."), immediate. = TRUE, call. = FALSE) + ub <- ub[lb >= control$a] + lb <- lb[lb >= control$a] + } z <- c(z, lb[lb >= control$b]) @@ -184,18 +192,26 @@ zcurve <- function(z, z.lb, z.ub, p, p.lb, p.ub, data, method = "EM", boot object$data <- numeric() } + # get the total number of observations before removal for fitting purposes + N_obs <- length(data$precise$p) + N_sig <- length(data$precise$p[.p_to_z(data$precise$p) >= control$a]) + if(nrow(data$censored) != 0){ lb <- .p_to_z(data$censored$p.ub) ub <- .p_to_z(data$censored$p.lb) + # get the total number of observations before removal for fitting purposes (using the collected bounds before accounting for rounding) + N_obs <- N_obs + length(.p_to_z(data$censored$p.rep)) + N_sig <- N_sig + length(.p_to_z(data$censored$p.rep)[.p_to_z(data$censored$p.rep) >= control$a]) + # remove non-significant censored p-values if(any(lb < control$a)){ warning(paste0(sum(lb < control$a), " censored p-values removed due to the upper bound being larger that the fitting range."), immediate. = TRUE, call. = FALSE) ub <- ub[lb >= control$a] lb <- lb[lb >= control$a] } - + # move too significant censored p-values among precise p-values if(length(lb) > 0 && any(lb >= control$b)){ object$data <- c(object$data, lb[lb >= control$b]) @@ -222,7 +238,9 @@ zcurve <- function(z, z.lb, z.ub, p, p.lb, p.ub, data, method = "EM", boot } } - object$control <- control + object$control <- control + object$N_obs <- N_obs + object$N_sig <- N_sig # only run the algorithm with some significant results if(sum(object$data > control$a & object$data < control$b) + nrow(object$data_censoring) < 10) @@ -353,9 +371,8 @@ summary.zcurve <- function(object, type = "results", all = FALSE, ERR.adj iter_text <- object$fit$iter } - temp_N_sig <- sum(object$data > stats::qnorm(object$control$sig_level/2, lower.tail = FALSE)) + - nrow(object$data_censoring[object$data_censoring$lb > object$control$a,]) - temp_N_obs <- length(object$data) + nrow(object$data_censoring) + temp_N_sig <- object$N_sig + temp_N_obs <- object$N_obs temp_N_used <- sum(object$data > object$control$a & object$data < object$control$b) + nrow(object$data_censoring[object$data_censoring$lb > object$control$a & object$data_censoring$lb < object$control$b ,]) diff --git a/R/zcurve_clustered.R b/R/zcurve_clustered.R index a4b0aeb..a3015c0 100644 --- a/R/zcurve_clustered.R +++ b/R/zcurve_clustered.R @@ -66,12 +66,20 @@ zcurve_clustered <- function(data, method = "b", bootstrap = 1000, parallel = FA z_id <- numeric() } + # get the total number of observations before removal for fitting purposes + N_obs <- length(data$precise$p) + N_sig <- length(data$precise$p[.p_to_z(data$precise$p) >= control$a]) + if(nrow(data$censored) != 0){ lb <- .p_to_z(data$censored$p.ub) ub <- .p_to_z(data$censored$p.lb) b_id <- data$censored$id + # get the total number of observations before removal for fitting purposes (using the collected bounds before accounting for rounding) + N_obs <- N_obs + length(.p_to_z(data$censored$p.rep)) + N_sig <- N_sig + length(.p_to_z(data$censored$p.rep)[.p_to_z(data$censored$p.rep) >= control$a]) + # remove non-significant censored p-values if(any(lb < control$a)){ warning(paste0(sum(lb < control$a), " censored p-values removed due to the upper bound being larger that the fitting range."), immediate. = TRUE, call. = FALSE) @@ -109,7 +117,9 @@ zcurve_clustered <- function(data, method = "b", bootstrap = 1000, parallel = FA object$data_id <- z_id object$data_censoring <- data.frame(lb = lb, ub = ub, id = b_id) - object$control <- control + object$control <- control + object$N_obs <- N_obs + object$N_sig <- N_sig # only run the algorithm with some significant results if(sum(z > control$a & z < control$b) + length(lb) < 10) diff --git a/man/zcurve-package.Rd b/man/zcurve-package.Rd index 83b6d58..d44445f 100644 --- a/man/zcurve-package.Rd +++ b/man/zcurve-package.Rd @@ -12,6 +12,7 @@ An implementation of z-curves - a method for estimating expected discovery and r Useful links: \itemize{ \item \url{https://fbartos.github.io/zcurve/} + \item Report bugs at \url{https://github.com/FBartos/zcurve/issues} } } diff --git a/man/zcurve_data.Rd b/man/zcurve_data.Rd index 5f6ca91..1cc7e73 100644 --- a/man/zcurve_data.Rd +++ b/man/zcurve_data.Rd @@ -37,7 +37,7 @@ to be censored as well as test statistics reported with low accuracy } \details{ By default, the function extract the type of test statistic: -\itemize{ +\describe{ \item{\code{"F(df1, df2)=x"}}{F-statistic with df1 and df2 degrees of freedom,} \item{\code{"chi(df)=x"}}{Chi-square statistic with df degrees of freedom,} \item{\code{"t(df)=x"}}{for t-statistic with df degrees of freedom,} diff --git a/tests/testthat/test-zcurve.R b/tests/testthat/test-zcurve.R index f92118d..2d94d89 100644 --- a/tests/testthat/test-zcurve.R +++ b/tests/testthat/test-zcurve.R @@ -184,7 +184,7 @@ test_that("z-curve clustered works", { "EDR 0.722 0.632 0.823" , "" , "Model converged in 57 + 450.85 iterations" , - "Fitted using 299 zcurve-data-values. 480 supplied, 299 significant (ODR = 0.62, 95% CI [0.58, 0.67]).", + "Fitted using 299 zcurve-data-values. 500 supplied, 300 significant (ODR = 0.60, 95% CI [0.56, 0.64]).", "Q = -236.27, 95% CI[-251.36, -224.63]" )) @@ -200,7 +200,7 @@ test_that("z-curve clustered works", { "EDR 0.783 0.655 0.895" , "" , "Model converged in 73 + 178 iterations" , - "Fitted using 299 zcurve-data-values. 480 supplied, 299 significant (ODR = 0.62, 95% CI [0.58, 0.67]).", + "Fitted using 299 zcurve-data-values. 500 supplied, 300 significant (ODR = 0.60, 95% CI [0.56, 0.64]).", "Q = -238.11, 95% CI[-269.95, -219.12]" )) @@ -278,7 +278,7 @@ test_that("z-curve clustered works", { "EDR 0.733 0.639 0.831" , "" , "Model converged in 72 + 430.1 iterations" , - "Fitted using 299 zcurve-data-values. 489 supplied, 299 significant (ODR = 0.61, 95% CI [0.57, 0.65]).", + "Fitted using 299 zcurve-data-values. 500 supplied, 300 significant (ODR = 0.60, 95% CI [0.56, 0.64]).", "Q = -319.14, 95% CI[-340.91, -306.78]" )) @@ -294,7 +294,7 @@ test_that("z-curve clustered works", { "EDR 0.788 0.651 0.901" , "" , "Model converged in 59 + 188 iterations" , - "Fitted using 299 zcurve-data-values. 489 supplied, 299 significant (ODR = 0.61, 95% CI [0.57, 0.65]).", + "Fitted using 299 zcurve-data-values. 500 supplied, 300 significant (ODR = 0.60, 95% CI [0.56, 0.64]).", "Q = -321.39, 95% CI[-361.78, -298.20]" ))