New version

Russel88 · Mar 26, 2018 · 321412d · 321412d
1 parent dd08c14
commit 321412d
Show file tree

Hide file tree

Showing 11 changed files with 236 additions and 153 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: DAtest
 Title: Comparing Differential Abundance methods
-Version: 2.7.7
+Version: 2.7.8
 Authors@R: person("Jakob", "Russel", email = "[email protected]", role = c("aut", "cre"))
 Description: What the title says.
 Depends: R (>= 3.2.5)

diff --git a/R/DA.kru.R b/R/DA.kru.R
@@ -19,6 +19,8 @@ DA.kru <- function(data, predictor, relative = TRUE, p.adj = "fdr", allResults =
   } else {
     count_table <- data
   }
+
+  predictor <- as.factor(predictor)
 
   # Define function
   kru <- function(x){
@@ -34,16 +36,6 @@ DA.kru <- function(data, predictor, relative = TRUE, p.adj = "fdr", allResults =
 
   # Run tests
   if(allResults){
-    ## Define function for allResults 
-    if(is.null(paired)){
-      kru <- function(x){
-        tryCatch(kruskal.test(x ~ predictor, ...), error = function(e){NA}) 
-      }
-    } else {
-      kru <- function(x){
-        tryCatch(kruskal.test(x ~ predictor, paired = TRUE, ...), error = function(e){NA}) 
-      }
-    }
     return(apply(count.rel,1,kru))
   } else {
     res <- data.frame(pval = apply(count.rel,1,kru))

diff --git a/R/plot.DA.R b/R/plot.DA.R
@@ -1,7 +1,7 @@
 #' Plotting results from \code{testDA}
 #'
 #' @param x The output from the \code{testDA} function
-#' @param sort Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Rank")}
+#' @param sort Sort methods by \code{c("AUC","FDR","Spike.detect.rate","Rank")}
 #' @param p Logical. Should the p-value distribution be plotted (only p-values from non-spiked features)
 #' @param bins Integer. Number of bins in p-value histograms
 #' @param ... Additional arguments for \code{ggdraw}
@@ -10,7 +10,7 @@
 #' @importFrom cowplot draw_plot
 #' @export
 
-plot.DA <- function(x, sort = "Rank", p = FALSE, bins = 20, ...){
+plot.DA <- function(x, sort = "Score", p = FALSE, bins = 20, ...){
 
   if(p){
     # For plotting p-value histograms
@@ -30,64 +30,42 @@ plot.DA <- function(x, sort = "Rank", p = FALSE, bins = 20, ...){
 
   } else {
 
-    # Rank
+    # Score
     ## Find medians
-    output.summary.auc <- aggregate(AUC ~ Method, data = x$table, FUN = function(x) round(median(x),3))
-    output.summary.fpr <- aggregate(FPR ~ Method, data = x$table, FUN = function(x) round(median(x),3))
-    output.summary.sdr <- aggregate(Spike.detect.rate ~ Method, data = x$table, FUN = function(x) round(median(x),3))
+    auc.median <- aggregate(AUC ~ Method, data = x$table, FUN = function(x) round(median(x),3))
+    fdr.median <- aggregate(FDR ~ Method, data = x$table, FUN = function(x) round(median(x),3))
+    sdr.median <- aggregate(Spike.detect.rate ~ Method, data = x$table, FUN = function(x) round(median(x),3))
 
     ## Merge
-    df <- merge(merge(output.summary.auc,output.summary.fpr, by = "Method"),output.summary.sdr, by = "Method")
+    df <- merge(merge(auc.median,fdr.median, by = "Method"),sdr.median, by = "Method")
 
-    ## Rank
-    nobad <- df[df$FPR <= 0.05,]
-    auc.r <- rank(nobad$AUC)
-    sdr.r <- rank(nobad$Spike.detect.rate)
-    mean.r <- (auc.r + sdr.r)/2
-    nobad$Rank <- nrow(nobad) - mean.r
-    bad <- df[df$FPR > 0.05,]
-    bad <- bad[order(bad$AUC, decreasing = TRUE),]
-    bad$Rank <- NA
-    df <- rbind(nobad,bad)
-    df <- df[order(df$Rank, decreasing = FALSE),]
+    # Score
+    df$Score <- round(df$AUC * df$Spike.detect.rate - df$FDR,3)
 
     # Sort the reults
     if(sort == "AUC") {
-      auc.median <- aggregate(AUC ~ Method, data = x$table, FUN = median)
-      x$table$Method <- factor(x$table$Method, levels = auc.median[order(auc.median$AUC, decreasing = TRUE),"Method"])
+      x$table$Method <- factor(x$table$Method, levels = df[order(df$AUC, decreasing = TRUE),"Method"])
     }
-    if(sort == "FPR") {
-      fpr.median <- aggregate(FPR ~ Method, data = x$table, FUN = median)
-      x$table$Method <- factor(x$table$Method, levels = fpr.median[order(fpr.median$FPR, decreasing = FALSE),"Method"])
+    if(sort == "FDR") {
+      x$table$Method <- factor(x$table$Method, levels = df[order(df$FDR, decreasing = FALSE),"Method"])
     }
     if(sort == "Spike.detect.rate") {
-      sdr.median <- aggregate(Spike.detect.rate ~ Method, data = x$table, FUN = median)
-      x$table$Method <- factor(x$table$Method, levels = sdr.median[order(sdr.median$Spike.detect.rate, decreasing = TRUE),"Method"])
+      x$table$Method <- factor(x$table$Method, levels = df[order(df$Spike.detect.rate, decreasing = TRUE),"Method"])
     }
-    if(sort == "Rank") {
-      x$table$Method <- factor(x$table$Method, levels = df$Method)
+    if(sort == "Score") {
+      x$table$Method <- factor(x$table$Method, levels = df[order(df$Score, decreasing = TRUE),"Method"])
     }
 
-    # Colour bad ones
-    cob <- data.frame(x = rep(c((nrow(nobad)+0.5),(nrow(df)+1)),2),
-                      ymin = rep(-Inf,4),
-                      ymax = rep(Inf,4),
-                      FPR = rep(0,4),
-                      AUC = rep(0,4),
-                      Spike.detect.rate = rep(0,4))
-
-    # Define FPR and AUC plots
-    p1 <- ggplot(x$table, aes(Method, FPR)) +
+    # Define FDR and AUC plots
+    p1 <- ggplot(x$table, aes(Method, FDR)) +
       theme_bw() +
       coord_cartesian(ylim = c(0,1)) +
-      geom_hline(yintercept = 0.05, colour = "red") +
       geom_point() +
       stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median,geom = "crossbar",colour="red",width=0.75) +
-      ylab("False Positive Rate") +
+      ylab("False Discovery Rate") +
       theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
       xlab(NULL) +
-      theme(panel.grid.minor = element_blank()) +
-      geom_ribbon(aes(x=x,ymin=ymin,ymax=ymax),data = cob, fill = "red", alpha = 0.2)
+      theme(panel.grid.minor = element_blank())
 
     p2 <- ggplot(x$table, aes(Method, AUC)) +
       theme_bw() +
@@ -99,8 +77,7 @@ plot.DA <- function(x, sort = "Rank", p = FALSE, bins = 20, ...){
       theme(axis.text.x = element_blank(),
             panel.grid.minor = element_blank()) +
       xlab(NULL) +
-      scale_y_continuous(labels=function(x) sprintf("%.2f", x)) +
-      geom_ribbon(aes(x=x,ymin=ymin,ymax=ymax),data = cob, fill = "red", alpha = 0.2)
+      scale_y_continuous(labels=function(x) sprintf("%.2f", x))
 
     p3 <- ggplot(x$table, aes(Method, Spike.detect.rate)) +
       theme_bw() +
@@ -110,8 +87,7 @@ plot.DA <- function(x, sort = "Rank", p = FALSE, bins = 20, ...){
       theme(axis.text.x = element_blank(),
             panel.grid.minor = element_blank()) +
       xlab(NULL) +
-      scale_y_continuous(labels=function(x) sprintf("%.2f", x)) +
-      geom_ribbon(aes(x=x,ymin=ymin,ymax=ymax),data = cob, fill = "red", alpha = 0.2)
+      scale_y_continuous(labels=function(x) sprintf("%.2f", x))
 
     # Plot it
     pp <- cowplot::ggdraw(...) +

diff --git a/R/plot.DAPower.R b/R/plot.DAPower.R
@@ -17,7 +17,7 @@ plot.DAPower <- function(x, ...){
       coord_cartesian(ylim = c(0,1)) +
       geom_point() +
       geom_smooth(method = "loess", colour = "red", alpha = 0.4) +
-      ylab("Empirical Power") +
+      ylab("Spike Detect Rate") +
       xlab("Log2 Effect Size") +
       theme(panel.grid.minor = element_blank())
 

diff --git a/R/print.DA.R b/R/print.DA.R
@@ -1,39 +1,52 @@
 #' Summary of results from \code{testDA}
 #'
 #' @param object The output from the \code{testDA} function
-#' @param sort Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Rank")}
+#' @param sort Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Score")}
+#' @param boot If TRUE will use bootstrap for confidence limits of the Score, else will compute the limits from the original table. Recommended to be TRUE unless \code{R >= 100} in \code{testDA}
+#' @param prob Confidence limits for Score. Default \code{90\%} = \code{c(0.05,0.095)}
+#' @param N Number of bootstraps. Default 1000
+#' @param boot.seed Random seed for reproducibility of bootstraps
 #' @param ... Additional arguments for \code{print}
 #' @import stats
 #' @import methods
 #' @import utils
 #' @export
 
-summary.DA <- function(object, sort = "Rank", ...){
+summary.DA <- function(object, sort = "Score", boot = TRUE, prob = c(0.05,0.95), N = 1000, boot.seed = 1, ...){
 
   # Find medians
   output.summary.auc <- aggregate(AUC ~ Method, data = object$table, FUN = function(x) round(median(x),3))
   output.summary.fpr <- aggregate(FPR ~ Method, data = object$table, FUN = function(x) round(median(x),3))
   output.summary.sdr <- aggregate(Spike.detect.rate ~ Method, data = object$table, FUN = function(x) round(median(x),3))
+  output.summary.fdr <- aggregate(FDR ~ Method, data = object$table, FUN = function(x) round(median(x),3))
 
   # Merge
-  df <- merge(merge(output.summary.auc,output.summary.fpr, by = "Method"),output.summary.sdr, by = "Method")
-
-  # Rank
-  nobad <- df[df$FPR <= 0.05,]
-  auc.r <- rank(nobad$AUC)
-  sdr.r <- rank(nobad$Spike.detect.rate)
-  mean.r <- (auc.r + sdr.r)/2
-  nobad$Rank <- nrow(nobad) - mean.r
-  bad <- df[df$FPR > 0.05,]
-  bad <- bad[order(bad$AUC, decreasing = TRUE),]
-  bad$Rank <- NA
-  df <- rbind(nobad,bad)
+  df <- merge(merge(merge(output.summary.auc,output.summary.fpr, by = "Method", all = TRUE),output.summary.fdr, by = "Method"),output.summary.sdr, by = "Method")
+
+  # Score
+  df$Score <- round(df$AUC * df$Spike.detect.rate - df$FDR,3)
+
+  # Interval
+  object$table$Score <- object$table$AUC * object$table$Spike.detect.rate - object$table$FDR
+
+  if(boot){
+    set.seed(boot.seed)
+    boots <- lapply(unique(object$table$Method), function(x) object$table[object$table$Method == x,][
+      sample(rownames(object$table[object$table$Method == x,]),N,replace = TRUE),
+      ])
+    boot.score <- lapply(boots,function(y) aggregate(Score ~ Method, data = y, FUN = function(x) round(quantile(x,probs = prob),3)))
+    score.cl <- do.call(rbind,boot.score)
+  } else {
+    score.cl <- aggregate(Score ~ Method, data = object$table, FUN = function(x) round(quantile(x,probs = prob),3))
+  }
+
+  df <- merge(df, as.matrix(score.cl), by = "Method")
 
   # Sort
   if(sort == "AUC") df <- df[order(df$AUC, decreasing = TRUE),]
   if(sort == "FPR") df <- df[order(df$FPR, decreasing = FALSE),]
   if(sort == "Spike.detect.rate") df <- df[order(df$Spike.detect.rate, decreasing = TRUE),]
-  if(sort == "Rank") df <- df[order(df$Rank, decreasing = FALSE),]
+  if(sort == "Score") df <- df[order(df$Score, decreasing = TRUE),]
 
   print(df, row.names = FALSE, ...)
 

diff --git a/R/summary.DAPower.R b/R/summary.DAPower.R
@@ -15,7 +15,7 @@ summary.DAPower <- function(x, ...){
 
       # Merge
       df <- merge(merge(output.summary.sdr,output.summary.auc, by = "EffectSize"),output.summary.fdr, by = "EffectSize")
-      colnames(df) <- c("EffectSize","EmpiricalPower","AUC","FDR")
+      colnames(df) <- c("EffectSize","Spike.detect.rate","AUC","FDR")
     } else {
       if(x$Method[1] %in% c("ALDEx2 t-test (adx)","ALDEx2 wilcox (adx)")){
         # Find medians
@@ -27,7 +27,7 @@ summary.DAPower <- function(x, ...){
         # Merge
         df <- cbind(output.summary.sdr,output.summary.auc[,3],output.summary.fpr[,3],output.summary.fdr[,3])
         df <- as.data.frame(df[order(df$Method),])
-        colnames(df) <- c("Method","EffectSize","EmpiricalPower","AUC","FPR","FDR")
+        colnames(df) <- c("Method","EffectSize","Spike.detect.rate","AUC","FPR","FDR")
       } else {
         # Find medians
         output.summary.sdr <- aggregate(SDR ~ EffectSize, data = x, FUN = function(y) round(median(y),3))
@@ -37,7 +37,7 @@ summary.DAPower <- function(x, ...){
 
         # Merge
         df <- merge(merge(merge(output.summary.sdr,output.summary.auc, by = "EffectSize"),output.summary.fpr, by = "EffectSize"),output.summary.fdr, by = "EffectSize")
-        colnames(df) <- c("EffectSize","EmpiricalPower","AUC","FPR","FDR")
+        colnames(df) <- c("EffectSize","Spike.detect.rate","AUC","FPR","FDR")
       }
     } 
 

diff --git a/R/testDA.R b/R/testDA.R
@@ -486,21 +486,31 @@ testDA <- function(data, predictor, paired = NULL, covars = NULL, R = 10, tests
       }
     })
 
+    # Confusion matrix adjusted
+    totalPos.adj <- sapply(res.sub, function(x) nrow(x[x$pval.adj <= alpha,]))
+    truePos.adj <- sapply(res.sub, function(x) sum(x[x$pval.adj <= alpha,"Feature"] %in% spikeds[[r]][[2]]))
+    falsePos.adj <- totalPos.adj - truePos.adj
+
+    fdrs <- sapply(1:length(res.sub), function(x) {
+      if(totalPos.adj[x] != 0){
+        falsePos.adj[x] / totalPos.adj[x]
+      } else {0}})
+
     # Combine and return
     df.combined <- data.frame(Method = sapply(res.sub, function(x) x$Method[1]),
                               AUC = aucs,
                               FPR = fprs,
+                              FDR = fdrs,
                               Spike.detect.rate = sdrs,
                               Run = r)
     rownames(df.combined) <- NULL
 
-    # SAMseq FDR
+    # SAMseq and ANCOM FPRs not possible
     if("sam" %in% newnames){
-      if(nrow(res.sub[["sam"]][res.sub[["sam"]]$pval.adj <= alpha,]) == 0){
-        df.combined[df.combined$Method == "SAMseq (sam)","FPR"] <- 0
-      } else {
-        df.combined[df.combined$Method == "SAMseq (sam)","FPR"] <- nrow(res.sub[["sam"]][res.sub[["sam"]]$pval.adj <= alpha & res.sub[["sam"]]$Spiked == "No",])/nrow(res.sub[["sam"]][res.sub[["sam"]]$pval.adj <= alpha,])
-      }
+      df.combined[df.combined$Method == "SAMseq (sam)","FPR"] <- NA
+    }
+    if("anc" %in% newnames){
+      df.combined[df.combined$Method == "ANCOM (anc)","FPR"] <- NA
     }
 
     return(list(df.combined,res.sub))
@@ -560,16 +570,5 @@ testDA <- function(data, predictor, paired = NULL, covars = NULL, R = 10, tests
   out <- list(table = output.results, results = output.all.results, details = output.details, run.times = run.times.all)
   class(out) <- "DA"
 
-  # Create warning message
-  output.summary.auc <- aggregate(AUC ~ Method, data = output.results, FUN = median)
-  output.summary.fpr <- aggregate(FPR ~ Method, data = output.results, FUN = median)
-  output.summary.sdr <- aggregate(Spike.detect.rate ~ Method, data = output.results, FUN = median)
-  df <- merge(merge(output.summary.auc,output.summary.fpr, by = "Method"),output.summary.sdr, by = "Method")
-  df <- df[df$FPR <= 0.05,]
-  if(nrow(df) > 0){
-    df <- df[order(df$AUC, decreasing = TRUE),]
-    if(df[1,"Spike.detect.rate"] == 0 & verbose) message("Spike detect rate is zero for the top method! You might want to re-run the analysis with a pruned dataset (see preDA) or a higher effectSize")
-  }
-
   return(out)
 }
diff --git a/R/zzz.R b/R/zzz.R
@@ -1,5 +1,5 @@
 
 .onLoad <- function(libname, pkgname){
-  message("DAtest version 2.7.7")
+  message("DAtest version 2.7.8")
 }