Skip to content

Commit

Permalink
Clusters (#11)
Browse files Browse the repository at this point in the history
* clustering
* fixed nested subsampling
  • Loading branch information
FBartos authored Nov 29, 2022
1 parent bb62ec4 commit e450186
Show file tree
Hide file tree
Showing 19 changed files with 1,420 additions and 51 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: zcurve
Title: An Implementation of Z-Curves
Version: 2.1.2
Version: 2.2.0
Authors@R: c(
person("František", "Bartoš", email = "[email protected]", role = c("aut", "cre")),
person("Ulrich", "Schimmack", email = "[email protected]", role = c("aut")))
Expand All @@ -17,7 +17,7 @@ License: GPL-3
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
RoxygenNote: 7.2.1
Imports:
Rcpp (>= 1.0.2),
nleqslv,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export(significant_n)
export(summary.zcurve)
export(z_to_power)
export(zcurve)
export(zcurve_clustered)
export(zcurve_data)
importFrom(Rcpp,sourceCpp)
importFrom(Rdpack,reprompt)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## version 2.2.0
- Implementation of z-curve version that deals with clustered estimates.
- Fixing bootstrap of censored data -- now both the censored and uncensored data are bootstrapped.

## version 2.1.2
- Fixing number of statistically significant studies displayed in the annotation of z-curve plot.

Expand Down
12 changes: 10 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@
.Call(`_zcurve_zcurve_EMc_fit_fast_RCpp`, x, lb, ub, mu, sigma, theta, a, b, sig_level, max_iter, criterion)
}

.zcurve_EMc_fit_fast_w_RCpp <- function(x, x_w, lb, ub, b_w, mu, sigma, theta, a, b, sig_level, max_iter, criterion) {
.Call(`_zcurve_zcurve_EMc_fit_fast_w_RCpp`, x, x_w, lb, ub, b_w, mu, sigma, theta, a, b, sig_level, max_iter, criterion)
}

.zcurve_EM_start_RCpp <- function(x, type, K, mu, sigma, mu_alpha, mu_max, theta_alpha, a, b, sig_level, fit_reps, max_iter, criterion) {
.Call(`_zcurve_zcurve_EM_start_RCpp`, x, type, K, mu, sigma, mu_alpha, mu_max, theta_alpha, a, b, sig_level, fit_reps, max_iter, criterion)
}
Expand All @@ -57,7 +61,11 @@
.Call(`_zcurve_zcurve_EMc_start_fast_RCpp`, x, lb, ub, K, mu, sigma, mu_alpha, mu_max, theta_alpha, a, b, sig_level, fit_reps, max_iter, criterion)
}

.zcurve_EMc_boot_fast_RCpp <- function(x, lb, ub, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion) {
.Call(`_zcurve_zcurve_EMc_boot_fast_RCpp`, x, lb, ub, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion)
.zcurve_EMc_boot_fast_RCpp <- function(x, lb, ub, indx, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion) {
.Call(`_zcurve_zcurve_EMc_boot_fast_RCpp`, x, lb, ub, indx, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion)
}

.zcurve_EMc_boot_fast_w_RCpp <- function(x, x_w, lb, ub, b_w, indx, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion) {
.Call(`_zcurve_zcurve_EMc_boot_fast_w_RCpp`, x, x_w, lb, ub, b_w, indx, mu, sigma, theta, a, b, sig_level, bootstrap, max_iter, criterion)
}

66 changes: 45 additions & 21 deletions R/data-preparation.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#' (i.e., rounded to too few decimals). See details for more information.
#'
#' @param data a vector strings containing the test statistics.
#' @param id a vector identifying observations from the same cluster.
#' @param rounded an optional argument specifying whether de-rounding should be applied.
#' Defaults to \code{FALSE} to treat all input as exact values or a numeric
#' vector with values specifying precision of the input. The other option,
Expand Down Expand Up @@ -50,7 +51,19 @@
#' # inspect the resulting object
#' data
#' @seealso [zcurve()], [print.zcurve_data()], [head.zcurve_data()]
zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){
zcurve_data <- function(data, id = NULL, rounded = TRUE, stat_precise = 2, p_precise = 3){

if(!is.character(data)){
stop("'data' must be a character vector")
}

if(is.null(id)){
id <- 1:length(data)
}else if(is.vector(id) && length(data) == length(id)){
id <- as.numeric(as.factor(as.character(id)))
}else{
stop("'id' must be a vector of the same length as the data")
}

data <- tolower(data)
data <- gsub(" ", "", data)
Expand Down Expand Up @@ -89,10 +102,10 @@ zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){
# set rounding (0 = un-rounded due to automatic conversion)
if(length(rounded) == 1 && !rounded){
# deal with the values as precise values
rounded <- rep(0, length(data))
rounded <- rep(-1, length(data))
}else if(length(rounded) == 1 && rounded){
# specify automatic rounding
rounded <- rep(FALSE, length(data))
rounded <- rep(-1, length(data))
rounded[stat_type == "p" & digits < p_precise] <- digits[stat_type == "p" & digits < p_precise]
rounded[stat_type != "p" & digits < stat_precise] <- digits[stat_type != "p" & digits < stat_precise]
}else{
Expand All @@ -112,7 +125,7 @@ zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){

# compute and allocate the p-values accordingly
for(i in seq_along(data)){
if(rounded[i] == 0 && !censored[i]){
if(rounded[i] == -1 && !censored[i]){
# precise non-censored values
p_vals[i] <- tryCatch(
switch(
Expand All @@ -125,7 +138,7 @@ zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){
),
warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'."))
)
}else if(rounded[i] == 0 && censored[i]){
}else if(rounded[i] == -1 && censored[i]){
# precise censored values
p_vals.ub[i] <- tryCatch(
switch(
Expand All @@ -139,39 +152,48 @@ zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){
warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'."))
)
p_vals.lb[i] <- 0
}else if(rounded[i] != 0 && !censored[i]){
}else if(rounded[i] != -1 && !censored[i]){
# rounded non-censored values

temp_stat_val.lb <- abs(stat_val[i]) - 0.5 * 10^-digits[i]
temp_stat_val.ub <- abs(stat_val[i]) + 0.5 * 10^-digits[i]

temp_stat_val.lb <- ifelse(temp_stat_val.lb < 0, 0, temp_stat_val.lb)

p_vals.ub[i] <- tryCatch(
switch(
stat_type[i],
"f" = stats::pf(stat_val[i] - 0.5 * 10^-digits[i] , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(stat_val[i] - 0.5 * 10^-digits[i], df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(abs(stat_val[i]) - 0.5 * 10^-digits[i], df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(abs(stat_val[i]) - 0.5 * 10^-digits[i], lower.tail = FALSE) * 2,
"f" = stats::pf(temp_stat_val.lb , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(temp_stat_val.lb, lower.tail = FALSE) * 2,
"p" = stat_val[i] + 0.5 * 10^-digits[i]
),
warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'."))
)
p_vals.lb[i] <- tryCatch(
switch(
stat_type[i],
"f" = stats::pf(stat_val[i] + 0.5 * 10^-digits[i] , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(stat_val[i] + 0.5 * 10^-digits[i], df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(abs(stat_val[i]) + 0.5 * 10^-digits[i], df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(abs(stat_val[i]) + 0.5 * 10^-digits[i], lower.tail = FALSE) * 2,
"f" = stats::pf(temp_stat_val.ub, df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(temp_stat_val.ub, df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(temp_stat_val.ub, df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(temp_stat_val.ub, lower.tail = FALSE) * 2,
"p" = stat_val[i] - 0.5 * 10^-digits[i]
),
warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'."))
)
}else if(rounded[i] != 0 && !censored[i]){
}else if(rounded[i] != -1 && censored[i]){
# rounded censored values

temp_stat_val.ub <- abs(stat_val[i]) + 0.5 * 10^-digits[i]

p_vals.ub[i] <- tryCatch(
switch(
stat_type[i],
"f" = stats::pf(stat_val[i] - 0.5 * 10^-digits[i] , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(stat_val[i] - 0.5 * 10^-digits[i], df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(abs(stat_val[i]) - 0.5 * 10^-digits[i], df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(abs(stat_val[i]) - 0.5 * 10^-digits[i], lower.tail = FALSE) * 2,
"f" = stats::pf(temp_stat_val.lb , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(temp_stat_val.lb, lower.tail = FALSE) * 2,
"p" = stat_val[i] + 0.5 * 10^-digits[i]
),
warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'."))
Expand All @@ -183,12 +205,14 @@ zcurve_data <- function(data, rounded = TRUE, stat_precise = 2, p_precise = 3){
output <- list(
precise = data.frame(
"input" = data[!is.na(p_vals)],
"p" = p_vals[!is.na(p_vals)]
"p" = p_vals[!is.na(p_vals)],
"id" = id[!is.na(p_vals)]
),
censored = data.frame(
"input" = data[!is.na(p_vals.lb)],
"p.lb" = p_vals.lb[!is.na(p_vals.lb)],
"p.ub" = p_vals.ub[!is.na(p_vals.ub)]
"p.ub" = p_vals.ub[!is.na(p_vals.ub)],
"id" = id[!is.na(p_vals.lb)]
)
)
class(output) <- "zcurve_data"
Expand Down
10 changes: 5 additions & 5 deletions R/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ zcurve <- function(z, z.lb, z.ub, p, p.lb, p.ub, data, method = "EM", boot
stop("Wrong p-values input: Data are not a vector")
input_type <- c(input_type, "p")
}
}else if(class(data) == "zcurve_data"){
}else if(inherits(data, "zcurve_data")){
input_type <- "zcurve-data"
}else{
stop("The 'data' input must be created by the `zcurve_data()` function. See `?zcurve_data()` for more information.")
Expand Down Expand Up @@ -175,13 +175,13 @@ zcurve <- function(z, z.lb, z.ub, p, p.lb, p.ub, data, method = "EM", boot

}else{

if(length(data$precise) != 0){
if(nrow(data$precise) != 0){
object$data <- .p_to_z(data$precise$p)
}else{
object$data <- numeric()
}

if(length(data$censored) != 0){
if(nrow(data$censored) != 0){

lb <- .p_to_z(data$censored$p.ub)
ub <- .p_to_z(data$censored$p.lb)
Expand Down Expand Up @@ -316,7 +316,7 @@ print.zcurve <- function(x, ...){
#' @seealso [zcurve()]
summary.zcurve <- function(object, type = "results", all = FALSE, ERR.adj = .03, EDR.adj = .05, round.coef = 3, ...){

if(object$method == "EM"){
if(substr(object$method, 1, 2) == "EM"){
if(!is.null(object$boot)){
fit_index <- c(object$fit$Q, unname(stats::quantile(object$boot$Q, c(.025, .975))))
}else{
Expand Down Expand Up @@ -609,7 +609,7 @@ plot.zcurve <- function(x, annotation = FALSE, CI = FALSE, extrapolate
h1 <- graphics::hist(x$data[x$data > x$control$a & x$data < x$control$b], breaks = br1, plot = F)

# add censored observations to the histogram
if(length(x$data_censoring) != 0){
if(nrow(x$data_censoring) != 0){
# spread the censored observation across the z-values
cen_counts <- do.call(rbind, lapply(1:nrow(x$data_censoring), function(i){
temp_counts <- (x$data_censoring$lb[i] < h1$breaks[-length(h1$breaks)] & x$data_censoring$ub[i] > h1$breaks[-length(h1$breaks)])
Expand Down
10 changes: 10 additions & 0 deletions R/zcurve_EM.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
max_iter = control$max_iter,
criterion = control$criterion)
}else if(control$type == 3){


fit <- .zcurve_EMc_fit_fast_RCpp(x = z,
lb = lb,
ub = ub,
Expand Down Expand Up @@ -123,9 +125,17 @@
criterion = control$criterion_boot,
max_iter = control$max_iter_boot)
}else if(control$type == 3){

indx <- c(
if(length(z) > 0) 1:length(z),
if(length(lb) > 0) (-length(lb)):-1
)


fit_boot <- .zcurve_EMc_boot_fast_RCpp(x = z,
lb = lb,
ub = ub,
indx = indx,
mu = fit$mu,
sigma = control$sigma,
theta = fit$weights,
Expand Down
Loading

0 comments on commit e450186

Please sign in to comment.