Skip to content

Commit

Permalink
Merge pull request Merck#227 from Merck/226-standardized-fh_weight-ou…
Browse files Browse the repository at this point in the history
…tput

standardize `wlr` and `maxcombo` output
  • Loading branch information
LittleBeannie authored Apr 11, 2024
2 parents fca780f + 9e084fa commit 75d7b67
Show file tree
Hide file tree
Showing 43 changed files with 981 additions and 1,423 deletions.
4 changes: 0 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,14 @@ export(create_test)
export(cut_data_by_date)
export(cut_data_by_event)
export(early_zero)
export(early_zero_weight)
export(fh)
export(fh_weight)
export(fit_pwexp)
export(get_analysis_date)
export(get_cut_date_by_event)
export(maxcombo)
export(mb)
export(mb_weight)
export(milestone)
export(multitest)
export(pvalue_maxcombo)
export(randomize_by_fixed_block)
export(rmst)
export(rpwexp)
Expand Down
5 changes: 5 additions & 0 deletions R/counting_process.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@
#'
#' @export
#'
#' @details
#' The output produced by [counting_process()] produces a
#' counting process dataset grouped by stratum and sorted within stratum
#' by increasing times where events occur.
#'
#' @examples
#' # Example 1
#' x <- data.frame(
Expand Down
68 changes: 4 additions & 64 deletions R/early_zero_weight.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,71 +27,10 @@
#' early zero weighted logrank test for the data in `x`.
#'
#' @importFrom data.table ":=" as.data.table fifelse merge.data.table setDF
#'
#' @export
#'
#' @references
#' Xu, Z., Zhen, B., Park, Y., & Zhu, B. (2017).
#' "Designing therapeutic cancer vaccine trials with delayed treatment effect."
#' _Statistics in Medicine_, 36(4), 592--605.
#'
#' @examples
#' library(dplyr)
#' library(gsDesign2)
#'
#' # Example 1: Unstratified
#' sim_pw_surv(n = 200) |>
#' cut_data_by_event(125) |>
#' counting_process(arm = "experimental") |>
#' early_zero_weight(early_period = 2) |>
#' filter(row_number() %in% seq(5, 200, 40))
#'
#' # Example 2: Stratified
#' n <- 500
#' # Two strata
#' stratum <- c("Biomarker-positive", "Biomarker-negative")
#' prevalence_ratio <- c(0.6, 0.4)
#'
#' # Enrollment rate
#' enroll_rate <- define_enroll_rate(
#' stratum = rep(stratum, each = 2),
#' duration = c(2, 10, 2, 10),
#' rate = c(c(1, 4) * prevalence_ratio[1], c(1, 4) * prevalence_ratio[2])
#' )
#' enroll_rate$rate <- enroll_rate$rate * n / sum(enroll_rate$duration * enroll_rate$rate)
#'
#' # Failure rate
#' med_pos <- 10 # Median of the biomarker positive population
#' med_neg <- 8 # Median of the biomarker negative population
#' hr_pos <- c(1, 0.7) # Hazard ratio of the biomarker positive population
#' hr_neg <- c(1, 0.8) # Hazard ratio of the biomarker negative population
#' fail_rate <- define_fail_rate(
#' stratum = rep(stratum, each = 2),
#' duration = c(3, 1000, 4, 1000),
#' fail_rate = c(log(2) / c(med_pos, med_pos, med_neg, med_neg)),
#' hr = c(hr_pos, hr_neg),
#' dropout_rate = 0.01
#' )
#'
#' # Simulate data
#' temp <- to_sim_pw_surv(fail_rate) # Convert the failure rate
#' set.seed(2023)
#'
#' sim_pw_surv(
#' n = n, # Sample size
#' # Stratified design with prevalence ratio of 6:4
#' stratum = tibble(stratum = stratum, p = prevalence_ratio),
#' # Randomization ratio
#' block = c("control", "control", "experimental", "experimental"),
#' enroll_rate = enroll_rate, # Enrollment rate
#' fail_rate = temp$fail_rate, # Failure rate
#' dropout_rate = temp$dropout_rate # Dropout rate
#' ) |>
#' cut_data_by_event(125) |>
#' counting_process(arm = "experimental") |>
#' early_zero_weight(early_period = 2, fail_rate = fail_rate) |>
#' filter(row_number() %in% seq(5, 200, 40))
#' @noRd
early_zero_weight <- function(x, early_period = 4, fail_rate = NULL) {


ans <- as.data.table(x)
n_stratum <- length(unique(ans$stratum))

Expand All @@ -118,5 +57,6 @@ early_zero_weight <- function(x, early_period = 4, fail_rate = NULL) {
}

setDF(ans)

return(ans)
}
230 changes: 17 additions & 213 deletions R/fh_weight.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,138 +22,29 @@
#'
#' @param x A [counting_process()]-class data frame with a counting process
#' dataset.
#' @param rho_gamma A data frame with variables `rho` and `gamma`, both greater
#' than equal to zero, to specify one Fleming-Harrington weighted logrank test
#' per row; Default: `data.frame(rho = c(0, 0, 1, 1), gamma = c(0, 1, 0, 1))`.
#' @param return_variance A logical flag that, if `TRUE`, adds columns
#' estimated variance for weighted sum of observed minus expected;
#' see details; Default: `FALSE`.
#' @param return_corr A logical flag that, if `TRUE`, adds columns
#' estimated correlation for weighted sum of observed minus expected;
#' see details; Default: `FALSE`.
#' @param rho A numerical value greater than equal to zero,
#' to specify one Fleming-Harrington weighted logrank test
#' @param gamma A numerical value greater than equal to zero,
#' to specify one Fleming-Harrington weighted logrank test
#'
#' @return A data frame with `rho_gamma` as input and the FH test statistic
#' for the data in `x`. (`z`, a directional square root of the usual
#' weighted logrank test); if variance calculations are specified
#' (for example, to be used for covariances in a combination test),
#' then this will be returned in the column `Var`.
#'
#' @details
#' The input value `x` produced by [counting_process()] produces a
#' counting process dataset grouped by stratum and sorted within stratum
#' by increasing times where events occur.
#' - \eqn{z} - Standardized normal Fleming-Harrington weighted logrank test.
#' - \eqn{i} - Stratum index.
#' - \eqn{d_i} - Number of distinct times at which events occurred in
#' stratum \eqn{i}.
#' - \eqn{t_{ij}} - Ordered times at which events in stratum
#' \eqn{i}, \eqn{j = 1, 2, \ldots, d_i} were observed;
#' for each observation, \eqn{t_{ij}} represents the time post study entry.
#' - \eqn{O_{ij.}} - Total number of events in stratum \eqn{i} that occurred
#' at time \eqn{t_{ij}}.
#' - \eqn{O_{ije}} - Total number of events in stratum \eqn{i} in the
#' experimental treatment group that occurred at time \eqn{t_{ij}}.
#' - \eqn{N_{ij.}} - Total number of study subjects in stratum \eqn{i}
#' who were followed for at least duration.
#' - \eqn{E_{ije}} - Expected observations in experimental treatment group
#' given random selection of \eqn{O_{ij.}} from those in
#' stratum \eqn{i} at risk at time \eqn{t_{ij}}.
#' - \eqn{V_{ije}} - Hypergeometric variance for \eqn{E_{ije}} as
#' produced in `Var` from [counting_process()].
#' - \eqn{N_{ije}} - Total number of study subjects in
#' stratum \eqn{i} in the experimental treatment group
#' who were followed for at least duration \eqn{t_{ij}}.
#' - \eqn{E_{ije}} - Expected observations in experimental group in
#' stratum \eqn{i} at time \eqn{t_{ij}} conditioning on the overall number
#' of events and at risk populations at that time and sampling at risk
#' observations without replacement:
#' \deqn{E_{ije} = O_{ij.} N_{ije}/N_{ij.}}
#' - \eqn{S_{ij}} - Kaplan-Meier estimate of survival in combined
#' treatment groups immediately prior to time \eqn{t_{ij}}.
#' - \eqn{\rho, \gamma} - Real parameters for Fleming-Harrington test.
#' - \eqn{X_i} - Numerator for signed logrank test in stratum \eqn{i}
#' \deqn{X_i = \sum_{j=1}^{d_{i}} S_{ij}^\rho(1-S_{ij}^\gamma)(O_{ije}-E_{ije})}
#' - \eqn{V_{ij}} - Variance used in denominator for Fleming-Harrington
#' weighted logrank tests
#' \deqn{V_i = \sum_{j=1}^{d_{i}} (S_{ij}^\rho(1-S_{ij}^\gamma))^2V_{ij})}
#' The stratified Fleming-Harrington weighted logrank test is then computed as:
#' \deqn{z = \sum_i X_i/\sqrt{\sum_i V_i}.}
#' @return A data frame with `rho` and `gamma` as input and the FH weights
#' for the data in `x`.
#'
#' @importFrom data.table data.table merge.data.table setDF
#'
#' @export
#'
#' @examples
#' library(dplyr)
#'
#' # Example 1
#' # Use default enrollment and event rates at cut at 100 events
#' x <- sim_pw_surv(n = 200) |>
#' cut_data_by_event(100) |>
#' counting_process(arm = "experimental")
#'
#' # Compute logrank FH(0, 1)
#' fh_weight(x, rho_gamma = data.frame(rho = 0, gamma = 1))
#' fh_weight(x, rho_gamma = data.frame(rho = 0, gamma = 1), return_variance = TRUE)
#'
#' # Compute the corvariance between FH(0, 0), FH(0, 1) and FH(1, 0)
#' fh_weight(x, rho_gamma = data.frame(rho = c(0, 0, 1), gamma = c(0, 1, 0)))
#' fh_weight(x, rho_gamma = data.frame(rho = c(0, 0, 1), gamma = c(0, 1, 0)), return_variance = TRUE)
#' fh_weight(x, rho_gamma = data.frame(rho = c(0, 0, 1), gamma = c(0, 1, 0)), return_corr = TRUE)
#'
#' # Example 2
#' # Use default enrollment and event rates at cut of 100 events
#' set.seed(123)
#' x <- sim_pw_surv(n = 200) |>
#' cut_data_by_event(100) |>
#' counting_process(arm = "experimental") |>
#' fh_weight(rho_gamma = data.frame(rho = c(0, 0), gamma = c(0, 1)), return_corr = TRUE)
#'
#' # Compute p-value for MaxCombo
#' library(mvtnorm)
#' 1 - pmvnorm(
#' lower = rep(min(x$z), nrow(x)),
#' corr = data.matrix(select(x, -c(rho, gamma, z))),
#' algorithm = GenzBretz(maxpts = 50000, abseps = 0.00001)
#' )[1]
#'
#' # Check that covariance is as expected
#' x <- sim_pw_surv(n = 200) |>
#' cut_data_by_event(100) |>
#' counting_process(arm = "experimental")
#'
#' x |> fh_weight(
#' rho_gamma = data.frame(
#' rho = c(0, 0),
#' gamma = c(0, 1)
#' ),
#' return_variance = TRUE
#' )
#'
#' # Off-diagonal element should be variance in following
#' x |> fh_weight(
#' rho_gamma = data.frame(
#' rho = 0,
#' gamma = .5
#' ),
#' return_variance = TRUE
#' )
#'
#' # Compare off diagonal result with fh_weight()
#' x |> fh_weight(rho_gamma = data.frame(rho = 0, gamma = .5))
#' @noRd
fh_weight <- function(
x = sim_pw_surv(n = 200) |>
cut_data_by_event(150) |>
counting_process(arm = "experimental"),
rho_gamma = data.frame(
rho = c(0, 0, 1, 1),
gamma = c(0, 1, 0, 1)
),
return_variance = FALSE,
return_corr = FALSE) {
n_weight <- nrow(rho_gamma)
rho = 0,
gamma = 1) {


# Input checking ----
if(length(rho) != 1 || length(gamma) != 1) {
stop("fh_weight: please input single numerical values of rho and gamma.")
}

# Check input failure rate assumptions
if (!is.data.frame(x)) {
stop("fh_weight: x in `fh_weight()` must be a data frame.")
}
Expand All @@ -170,94 +61,7 @@ fh_weight <- function(
stop("fh_weight: x column names in `fh_weight()` must contain var_o_minus_e.")
}

if (return_variance && return_corr) {
stop("fh_weight: can't report both covariance and correlation for MaxCombo test.")
}

if (return_corr && n_weight == 1) {
stop("fh_weight: can't report the correlation for a single WLR test.")
}

if (n_weight == 1) {
ans <- wlr_z_stat(x, rho_gamma = rho_gamma, return_variance = return_variance)
} else {
# Get average rho and gamma for FH covariance matrix
# We want ave_rho[i,j] = (rho[i] + rho[j])/2
# and ave_gamma[i,j] = (gamma[i] + gamma[j])/2
ave_rho <- (matrix(rho_gamma$rho, nrow = n_weight, ncol = n_weight, byrow = FALSE) +
matrix(rho_gamma$rho, nrow = n_weight, ncol = n_weight, byrow = TRUE)
) / 2
ave_gamma <- (matrix(rho_gamma$gamma, nrow = n_weight, ncol = n_weight) +
matrix(rho_gamma$gamma, nrow = n_weight, ncol = n_weight, byrow = TRUE)
) / 2

# Convert to data.table
rg_new <- data.table(rho = as.numeric(ave_rho), gamma = as.numeric(ave_gamma))
# Get unique values of rho, gamma
rg_unique <- unique(rg_new)

# Compute FH statistic for unique values
# and merge back to full set of pairs
rg_fh <- merge.data.table(
x = rg_new,
y = wlr_z_stat(x, rho_gamma = rg_unique, return_variance = TRUE),
by = c("rho", "gamma"),
all.x = TRUE,
sort = FALSE
)

# Get z statistics for input rho, gamma combinations
z <- rg_fh$z[(0:(n_weight - 1)) * n_weight + 1:n_weight]

# Get correlation matrix
cov_mat <- matrix(rg_fh$var, nrow = n_weight, byrow = TRUE)

if (return_corr) {
corr_mat <- stats::cov2cor(cov_mat)
colnames(corr_mat) <- paste("v", seq_len(ncol(corr_mat)), sep = "")
ans <- cbind(rho_gamma, z, as.data.frame(corr_mat))
} else if (return_variance) {
corr_mat <- cov_mat
colnames(corr_mat) <- paste("v", seq_len(ncol(corr_mat)), sep = "")
ans <- cbind(rho_gamma, z, as.data.frame(corr_mat))
} else if (return_corr + return_corr == 0) {
corr_mat <- NULL
ans <- cbind(rho_gamma, z)
}
}

setDF(ans)
return(ans)
}

# Build an internal function to compute the Z statistics
# under a sequence of rho and gamma of WLR.
wlr_z_stat <- function(x, rho_gamma, return_variance) {
ans <- rho_gamma

xx <- x[, c("s", "o_minus_e", "var_o_minus_e")]

ans$z <- rep(0, nrow(rho_gamma))

if (return_variance) {
ans$var <- rep(0, nrow(rho_gamma))
}

for (i in seq_len(nrow(rho_gamma))) {
weight <- xx$s^rho_gamma$rho[i] * (1 - xx$s)^rho_gamma$gamma[i]
weighted_o_minus_e <- weight * xx$o_minus_e
weighted_var <- weight^2 * xx$var_o_minus_e

weighted_var_total <- sum(weighted_var)
weighted_o_minus_e_total <- sum(weighted_o_minus_e)


ans$z[i] <- weighted_o_minus_e_total / sqrt(weighted_var_total)

if (return_variance) {
ans$var[i] <- weighted_var_total
}
}
x$weight <- x$s^rho * (1 - x$s)^gamma

return(ans)
return(x)
}
Loading

0 comments on commit 75d7b67

Please sign in to comment.