Merge pull request Merck#227 from Merck/226-standardized-fh_weight-ou…

…tput standardize `wlr` and `maxcombo` output
jdblischak · Apr 11, 2024 · 75d7b67 · 75d7b67
2 parents fca780f + 9e084fa
commit 75d7b67
Show file tree

Hide file tree

Showing 43 changed files with 981 additions and 1,423 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -6,18 +6,14 @@ export(create_test)
 export(cut_data_by_date)
 export(cut_data_by_event)
 export(early_zero)
-export(early_zero_weight)
 export(fh)
-export(fh_weight)
 export(fit_pwexp)
 export(get_analysis_date)
 export(get_cut_date_by_event)
 export(maxcombo)
 export(mb)
-export(mb_weight)
 export(milestone)
 export(multitest)
-export(pvalue_maxcombo)
 export(randomize_by_fixed_block)
 export(rmst)
 export(rpwexp)

diff --git a/R/counting_process.R b/R/counting_process.R
@@ -61,6 +61,11 @@
 #'
 #' @export
 #'
+#' @details
+#' The output produced by [counting_process()] produces a
+#' counting process dataset grouped by stratum and sorted within stratum
+#' by increasing times where events occur.
+#'
 #' @examples
 #' # Example 1
 #' x <- data.frame(

diff --git a/R/early_zero_weight.R b/R/early_zero_weight.R
@@ -27,71 +27,10 @@
 #'   early zero weighted logrank test for the data in `x`.
 #'
 #' @importFrom data.table ":=" as.data.table fifelse merge.data.table setDF
-#'
-#' @export
-#'
-#' @references
-#' Xu, Z., Zhen, B., Park, Y., & Zhu, B. (2017).
-#' "Designing therapeutic cancer vaccine trials with delayed treatment effect."
-#' _Statistics in Medicine_, 36(4), 592--605.
-#'
-#' @examples
-#' library(dplyr)
-#' library(gsDesign2)
-#'
-#' # Example 1: Unstratified
-#' sim_pw_surv(n = 200) |>
-#'   cut_data_by_event(125) |>
-#'   counting_process(arm = "experimental") |>
-#'   early_zero_weight(early_period = 2) |>
-#'   filter(row_number() %in% seq(5, 200, 40))
-#'
-#' # Example 2: Stratified
-#' n <- 500
-#' # Two strata
-#' stratum <- c("Biomarker-positive", "Biomarker-negative")
-#' prevalence_ratio <- c(0.6, 0.4)
-#'
-#' # Enrollment rate
-#' enroll_rate <- define_enroll_rate(
-#'   stratum = rep(stratum, each = 2),
-#'   duration = c(2, 10, 2, 10),
-#'   rate = c(c(1, 4) * prevalence_ratio[1], c(1, 4) * prevalence_ratio[2])
-#' )
-#' enroll_rate$rate <- enroll_rate$rate * n / sum(enroll_rate$duration * enroll_rate$rate)
-#'
-#' # Failure rate
-#' med_pos <- 10 # Median of the biomarker positive population
-#' med_neg <- 8 # Median of the biomarker negative population
-#' hr_pos <- c(1, 0.7) # Hazard ratio of the biomarker positive population
-#' hr_neg <- c(1, 0.8) # Hazard ratio of the biomarker negative population
-#' fail_rate <- define_fail_rate(
-#'   stratum = rep(stratum, each = 2),
-#'   duration = c(3, 1000, 4, 1000),
-#'   fail_rate = c(log(2) / c(med_pos, med_pos, med_neg, med_neg)),
-#'   hr = c(hr_pos, hr_neg),
-#'   dropout_rate = 0.01
-#' )
-#'
-#' # Simulate data
-#' temp <- to_sim_pw_surv(fail_rate) # Convert the failure rate
-#' set.seed(2023)
-#'
-#' sim_pw_surv(
-#'   n = n, # Sample size
-#'   # Stratified design with prevalence ratio of 6:4
-#'   stratum = tibble(stratum = stratum, p = prevalence_ratio),
-#'   # Randomization ratio
-#'   block = c("control", "control", "experimental", "experimental"),
-#'   enroll_rate = enroll_rate, # Enrollment rate
-#'   fail_rate = temp$fail_rate, # Failure rate
-#'   dropout_rate = temp$dropout_rate # Dropout rate
-#' ) |>
-#'   cut_data_by_event(125) |>
-#'   counting_process(arm = "experimental") |>
-#'   early_zero_weight(early_period = 2, fail_rate = fail_rate) |>
-#'   filter(row_number() %in% seq(5, 200, 40))
+#' @noRd
 early_zero_weight <- function(x, early_period = 4, fail_rate = NULL) {
+
+
   ans <- as.data.table(x)
   n_stratum <- length(unique(ans$stratum))
 
@@ -118,5 +57,6 @@ early_zero_weight <- function(x, early_period = 4, fail_rate = NULL) {
   }
 
   setDF(ans)
+
   return(ans)
 }
diff --git a/R/fh_weight.R b/R/fh_weight.R
@@ -22,138 +22,29 @@
 #'
 #' @param x A [counting_process()]-class data frame with a counting process
 #'   dataset.
-#' @param rho_gamma A data frame with variables `rho` and `gamma`, both greater
-#'   than equal to zero, to specify one Fleming-Harrington weighted logrank test
-#'   per row; Default: `data.frame(rho = c(0, 0, 1, 1), gamma = c(0, 1, 0, 1))`.
-#' @param return_variance A logical flag that, if `TRUE`, adds columns
-#'   estimated variance for weighted sum of observed minus expected;
-#'   see details; Default: `FALSE`.
-#' @param return_corr A logical flag that, if `TRUE`, adds columns
-#'   estimated correlation for weighted sum of observed minus expected;
-#'   see details; Default: `FALSE`.
+#' @param rho A numerical value greater than equal to zero,
+#' to specify one Fleming-Harrington weighted logrank test
+#' @param gamma A numerical value greater than equal to zero,
+#' to specify one Fleming-Harrington weighted logrank test
 #'
-#' @return A data frame with `rho_gamma` as input and the FH test statistic
-#'   for the data in `x`. (`z`, a directional square root of the usual
-#'   weighted logrank test); if variance calculations are specified
-#'   (for example, to be used for covariances in a combination test),
-#'   then this will be returned in the column `Var`.
-#'
-#' @details
-#' The input value `x` produced by [counting_process()] produces a
-#' counting process dataset grouped by stratum and sorted within stratum
-#' by increasing times where events occur.
-#' - \eqn{z} - Standardized normal Fleming-Harrington weighted logrank test.
-#' - \eqn{i} - Stratum index.
-#' - \eqn{d_i} - Number of distinct times at which events occurred in
-#'   stratum \eqn{i}.
-#' - \eqn{t_{ij}} - Ordered times at which events in stratum
-#'   \eqn{i}, \eqn{j = 1, 2, \ldots, d_i} were observed;
-#'   for each observation, \eqn{t_{ij}} represents the time post study entry.
-#' - \eqn{O_{ij.}} - Total number of events in stratum \eqn{i} that occurred
-#'   at time \eqn{t_{ij}}.
-#' - \eqn{O_{ije}} - Total number of events in stratum \eqn{i} in the
-#'   experimental treatment group that occurred at time \eqn{t_{ij}}.
-#' - \eqn{N_{ij.}} - Total number of study subjects in stratum \eqn{i}
-#'   who were followed for at least duration.
-#' - \eqn{E_{ije}} - Expected observations in experimental treatment group
-#'   given random selection of \eqn{O_{ij.}} from those in
-#'   stratum \eqn{i} at risk at time \eqn{t_{ij}}.
-#' - \eqn{V_{ije}} - Hypergeometric variance for \eqn{E_{ije}} as
-#'   produced in `Var` from [counting_process()].
-#' - \eqn{N_{ije}} - Total number of study subjects in
-#'   stratum \eqn{i} in the experimental treatment group
-#'   who were followed for at least duration \eqn{t_{ij}}.
-#' - \eqn{E_{ije}} - Expected observations in experimental group in
-#'   stratum \eqn{i} at time \eqn{t_{ij}} conditioning on the overall number
-#'   of events and at risk populations at that time and sampling at risk
-#'   observations without replacement:
-#'   \deqn{E_{ije} = O_{ij.} N_{ije}/N_{ij.}}
-#' - \eqn{S_{ij}} - Kaplan-Meier estimate of survival in combined
-#'   treatment groups immediately prior to time \eqn{t_{ij}}.
-#' - \eqn{\rho, \gamma} - Real parameters for Fleming-Harrington test.
-#' - \eqn{X_i} - Numerator for signed logrank test in stratum \eqn{i}
-#'   \deqn{X_i = \sum_{j=1}^{d_{i}} S_{ij}^\rho(1-S_{ij}^\gamma)(O_{ije}-E_{ije})}
-#' - \eqn{V_{ij}} - Variance used in denominator for Fleming-Harrington
-#'   weighted logrank tests
-#'   \deqn{V_i = \sum_{j=1}^{d_{i}} (S_{ij}^\rho(1-S_{ij}^\gamma))^2V_{ij})}
-#'   The stratified Fleming-Harrington weighted logrank test is then computed as:
-#'   \deqn{z = \sum_i X_i/\sqrt{\sum_i V_i}.}
+#' @return A data frame with `rho` and `gamma` as input and the FH weights
+#' for the data in `x`.
 #'
 #' @importFrom data.table data.table merge.data.table setDF
-#'
-#' @export
-#'
-#' @examples
-#' library(dplyr)
-#'
-#' # Example 1
-#' # Use default enrollment and event rates at cut at 100 events
-#' x <- sim_pw_surv(n = 200) |>
-#'   cut_data_by_event(100) |>
-#'   counting_process(arm = "experimental")
-#'
-#' # Compute logrank FH(0, 1)
-#' fh_weight(x, rho_gamma = data.frame(rho = 0, gamma = 1))
-#' fh_weight(x, rho_gamma = data.frame(rho = 0, gamma = 1), return_variance = TRUE)
-#'
-#' # Compute the corvariance between FH(0, 0), FH(0, 1) and FH(1, 0)
-#' fh_weight(x, rho_gamma = data.frame(rho = c(0, 0, 1), gamma = c(0, 1, 0)))
-#' fh_weight(x, rho_gamma = data.frame(rho = c(0, 0, 1), gamma = c(0, 1, 0)), return_variance = TRUE)
-#' fh_weight(x, rho_gamma = data.frame(rho = c(0, 0, 1), gamma = c(0, 1, 0)), return_corr = TRUE)
-#'
-#' # Example 2
-#' # Use default enrollment and event rates at cut of 100 events
-#' set.seed(123)
-#' x <- sim_pw_surv(n = 200) |>
-#'   cut_data_by_event(100) |>
-#'   counting_process(arm = "experimental") |>
-#'   fh_weight(rho_gamma = data.frame(rho = c(0, 0), gamma = c(0, 1)), return_corr = TRUE)
-#'
-#' # Compute p-value for MaxCombo
-#' library(mvtnorm)
-#' 1 - pmvnorm(
-#'   lower = rep(min(x$z), nrow(x)),
-#'   corr = data.matrix(select(x, -c(rho, gamma, z))),
-#'   algorithm = GenzBretz(maxpts = 50000, abseps = 0.00001)
-#' )[1]
-#'
-#' # Check that covariance is as expected
-#' x <- sim_pw_surv(n = 200) |>
-#'   cut_data_by_event(100) |>
-#'   counting_process(arm = "experimental")
-#'
-#' x |> fh_weight(
-#'   rho_gamma = data.frame(
-#'     rho = c(0, 0),
-#'     gamma = c(0, 1)
-#'   ),
-#'   return_variance = TRUE
-#' )
-#'
-#' # Off-diagonal element should be variance in following
-#' x |> fh_weight(
-#'   rho_gamma = data.frame(
-#'     rho = 0,
-#'     gamma = .5
-#'   ),
-#'   return_variance = TRUE
-#' )
-#'
-#' # Compare off diagonal result with fh_weight()
-#' x |> fh_weight(rho_gamma = data.frame(rho = 0, gamma = .5))
+#' @noRd
 fh_weight <- function(
     x = sim_pw_surv(n = 200) |>
       cut_data_by_event(150) |>
       counting_process(arm = "experimental"),
-    rho_gamma = data.frame(
-      rho = c(0, 0, 1, 1),
-      gamma = c(0, 1, 0, 1)
-    ),
-    return_variance = FALSE,
-    return_corr = FALSE) {
-  n_weight <- nrow(rho_gamma)
+    rho = 0,
+    gamma = 1) {
+
+
+  # Input checking ----
+  if(length(rho) != 1 || length(gamma) != 1) {
+    stop("fh_weight: please input single numerical values of rho and gamma.")
+  }
 
-  # Check input failure rate assumptions
   if (!is.data.frame(x)) {
     stop("fh_weight: x in `fh_weight()` must be a data frame.")
   }
@@ -170,94 +61,7 @@ fh_weight <- function(
     stop("fh_weight: x column names in `fh_weight()` must contain var_o_minus_e.")
   }
 
-  if (return_variance && return_corr) {
-    stop("fh_weight: can't report both covariance and correlation for MaxCombo test.")
-  }
-
-  if (return_corr && n_weight == 1) {
-    stop("fh_weight: can't report the correlation for a single WLR test.")
-  }
-
-  if (n_weight == 1) {
-    ans <- wlr_z_stat(x, rho_gamma = rho_gamma, return_variance = return_variance)
-  } else {
-    # Get average rho and gamma for FH covariance matrix
-    # We want ave_rho[i,j]   = (rho[i] + rho[j])/2
-    # and     ave_gamma[i,j] = (gamma[i] + gamma[j])/2
-    ave_rho <- (matrix(rho_gamma$rho, nrow = n_weight, ncol = n_weight, byrow = FALSE) +
-      matrix(rho_gamma$rho, nrow = n_weight, ncol = n_weight, byrow = TRUE)
-    ) / 2
-    ave_gamma <- (matrix(rho_gamma$gamma, nrow = n_weight, ncol = n_weight) +
-      matrix(rho_gamma$gamma, nrow = n_weight, ncol = n_weight, byrow = TRUE)
-    ) / 2
-
-    # Convert to data.table
-    rg_new <- data.table(rho = as.numeric(ave_rho), gamma = as.numeric(ave_gamma))
-    # Get unique values of rho, gamma
-    rg_unique <- unique(rg_new)
-
-    # Compute FH statistic for unique values
-    # and merge back to full set of pairs
-    rg_fh <- merge.data.table(
-      x = rg_new,
-      y = wlr_z_stat(x, rho_gamma = rg_unique, return_variance = TRUE),
-      by = c("rho", "gamma"),
-      all.x = TRUE,
-      sort = FALSE
-    )
-
-    # Get z statistics for input rho, gamma combinations
-    z <- rg_fh$z[(0:(n_weight - 1)) * n_weight + 1:n_weight]
-
-    # Get correlation matrix
-    cov_mat <- matrix(rg_fh$var, nrow = n_weight, byrow = TRUE)
-
-    if (return_corr) {
-      corr_mat <- stats::cov2cor(cov_mat)
-      colnames(corr_mat) <- paste("v", seq_len(ncol(corr_mat)), sep = "")
-      ans <- cbind(rho_gamma, z, as.data.frame(corr_mat))
-    } else if (return_variance) {
-      corr_mat <- cov_mat
-      colnames(corr_mat) <- paste("v", seq_len(ncol(corr_mat)), sep = "")
-      ans <- cbind(rho_gamma, z, as.data.frame(corr_mat))
-    } else if (return_corr + return_corr == 0) {
-      corr_mat <- NULL
-      ans <- cbind(rho_gamma, z)
-    }
-  }
-
-  setDF(ans)
-  return(ans)
-}
-
-# Build an internal function to compute the Z statistics
-# under a sequence of rho and gamma of WLR.
-wlr_z_stat <- function(x, rho_gamma, return_variance) {
-  ans <- rho_gamma
-
-  xx <- x[, c("s", "o_minus_e", "var_o_minus_e")]
-
-  ans$z <- rep(0, nrow(rho_gamma))
-
-  if (return_variance) {
-    ans$var <- rep(0, nrow(rho_gamma))
-  }
-
-  for (i in seq_len(nrow(rho_gamma))) {
-    weight <- xx$s^rho_gamma$rho[i] * (1 - xx$s)^rho_gamma$gamma[i]
-    weighted_o_minus_e <- weight * xx$o_minus_e
-    weighted_var <- weight^2 * xx$var_o_minus_e
-
-    weighted_var_total <- sum(weighted_var)
-    weighted_o_minus_e_total <- sum(weighted_o_minus_e)
-
-
-    ans$z[i] <- weighted_o_minus_e_total / sqrt(weighted_var_total)
-
-    if (return_variance) {
-      ans$var[i] <- weighted_var_total
-    }
-  }
+  x$weight <- x$s^rho * (1 - x$s)^gamma
 
-  return(ans)
+  return(x)
 }