diff --git a/DESCRIPTION b/DESCRIPTION index 8e17517..ee943ab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: DAtest Title: Comparing Differential Abundance methods -Version: 2.7.7 +Version: 2.7.8 Authors@R: person("Jakob", "Russel", email = "russel2620@gmail.com", role = c("aut", "cre")) Description: What the title says. Depends: R (>= 3.2.5) diff --git a/R/DA.kru.R b/R/DA.kru.R index 17d522d..974aa5c 100644 --- a/R/DA.kru.R +++ b/R/DA.kru.R @@ -19,6 +19,8 @@ DA.kru <- function(data, predictor, relative = TRUE, p.adj = "fdr", allResults = } else { count_table <- data } + + predictor <- as.factor(predictor) # Define function kru <- function(x){ @@ -34,16 +36,6 @@ DA.kru <- function(data, predictor, relative = TRUE, p.adj = "fdr", allResults = # Run tests if(allResults){ - ## Define function for allResults - if(is.null(paired)){ - kru <- function(x){ - tryCatch(kruskal.test(x ~ predictor, ...), error = function(e){NA}) - } - } else { - kru <- function(x){ - tryCatch(kruskal.test(x ~ predictor, paired = TRUE, ...), error = function(e){NA}) - } - } return(apply(count.rel,1,kru)) } else { res <- data.frame(pval = apply(count.rel,1,kru)) diff --git a/R/plot.DA.R b/R/plot.DA.R index 568bccb..e5edad1 100644 --- a/R/plot.DA.R +++ b/R/plot.DA.R @@ -1,7 +1,7 @@ #' Plotting results from \code{testDA} #' #' @param x The output from the \code{testDA} function -#' @param sort Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Rank")} +#' @param sort Sort methods by \code{c("AUC","FDR","Spike.detect.rate","Rank")} #' @param p Logical. Should the p-value distribution be plotted (only p-values from non-spiked features) #' @param bins Integer. Number of bins in p-value histograms #' @param ... Additional arguments for \code{ggdraw} @@ -10,7 +10,7 @@ #' @importFrom cowplot draw_plot #' @export -plot.DA <- function(x, sort = "Rank", p = FALSE, bins = 20, ...){ +plot.DA <- function(x, sort = "Score", p = FALSE, bins = 20, ...){ if(p){ # For plotting p-value histograms @@ -30,64 +30,42 @@ plot.DA <- function(x, sort = "Rank", p = FALSE, bins = 20, ...){ } else { - # Rank + # Score ## Find medians - output.summary.auc <- aggregate(AUC ~ Method, data = x$table, FUN = function(x) round(median(x),3)) - output.summary.fpr <- aggregate(FPR ~ Method, data = x$table, FUN = function(x) round(median(x),3)) - output.summary.sdr <- aggregate(Spike.detect.rate ~ Method, data = x$table, FUN = function(x) round(median(x),3)) + auc.median <- aggregate(AUC ~ Method, data = x$table, FUN = function(x) round(median(x),3)) + fdr.median <- aggregate(FDR ~ Method, data = x$table, FUN = function(x) round(median(x),3)) + sdr.median <- aggregate(Spike.detect.rate ~ Method, data = x$table, FUN = function(x) round(median(x),3)) ## Merge - df <- merge(merge(output.summary.auc,output.summary.fpr, by = "Method"),output.summary.sdr, by = "Method") + df <- merge(merge(auc.median,fdr.median, by = "Method"),sdr.median, by = "Method") - ## Rank - nobad <- df[df$FPR <= 0.05,] - auc.r <- rank(nobad$AUC) - sdr.r <- rank(nobad$Spike.detect.rate) - mean.r <- (auc.r + sdr.r)/2 - nobad$Rank <- nrow(nobad) - mean.r - bad <- df[df$FPR > 0.05,] - bad <- bad[order(bad$AUC, decreasing = TRUE),] - bad$Rank <- NA - df <- rbind(nobad,bad) - df <- df[order(df$Rank, decreasing = FALSE),] + # Score + df$Score <- round(df$AUC * df$Spike.detect.rate - df$FDR,3) # Sort the reults if(sort == "AUC") { - auc.median <- aggregate(AUC ~ Method, data = x$table, FUN = median) - x$table$Method <- factor(x$table$Method, levels = auc.median[order(auc.median$AUC, decreasing = TRUE),"Method"]) + x$table$Method <- factor(x$table$Method, levels = df[order(df$AUC, decreasing = TRUE),"Method"]) } - if(sort == "FPR") { - fpr.median <- aggregate(FPR ~ Method, data = x$table, FUN = median) - x$table$Method <- factor(x$table$Method, levels = fpr.median[order(fpr.median$FPR, decreasing = FALSE),"Method"]) + if(sort == "FDR") { + x$table$Method <- factor(x$table$Method, levels = df[order(df$FDR, decreasing = FALSE),"Method"]) } if(sort == "Spike.detect.rate") { - sdr.median <- aggregate(Spike.detect.rate ~ Method, data = x$table, FUN = median) - x$table$Method <- factor(x$table$Method, levels = sdr.median[order(sdr.median$Spike.detect.rate, decreasing = TRUE),"Method"]) + x$table$Method <- factor(x$table$Method, levels = df[order(df$Spike.detect.rate, decreasing = TRUE),"Method"]) } - if(sort == "Rank") { - x$table$Method <- factor(x$table$Method, levels = df$Method) + if(sort == "Score") { + x$table$Method <- factor(x$table$Method, levels = df[order(df$Score, decreasing = TRUE),"Method"]) } - # Colour bad ones - cob <- data.frame(x = rep(c((nrow(nobad)+0.5),(nrow(df)+1)),2), - ymin = rep(-Inf,4), - ymax = rep(Inf,4), - FPR = rep(0,4), - AUC = rep(0,4), - Spike.detect.rate = rep(0,4)) - - # Define FPR and AUC plots - p1 <- ggplot(x$table, aes(Method, FPR)) + + # Define FDR and AUC plots + p1 <- ggplot(x$table, aes(Method, FDR)) + theme_bw() + coord_cartesian(ylim = c(0,1)) + - geom_hline(yintercept = 0.05, colour = "red") + geom_point() + stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median,geom = "crossbar",colour="red",width=0.75) + - ylab("False Positive Rate") + + ylab("False Discovery Rate") + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + xlab(NULL) + - theme(panel.grid.minor = element_blank()) + - geom_ribbon(aes(x=x,ymin=ymin,ymax=ymax),data = cob, fill = "red", alpha = 0.2) + theme(panel.grid.minor = element_blank()) p2 <- ggplot(x$table, aes(Method, AUC)) + theme_bw() + @@ -99,8 +77,7 @@ plot.DA <- function(x, sort = "Rank", p = FALSE, bins = 20, ...){ theme(axis.text.x = element_blank(), panel.grid.minor = element_blank()) + xlab(NULL) + - scale_y_continuous(labels=function(x) sprintf("%.2f", x)) + - geom_ribbon(aes(x=x,ymin=ymin,ymax=ymax),data = cob, fill = "red", alpha = 0.2) + scale_y_continuous(labels=function(x) sprintf("%.2f", x)) p3 <- ggplot(x$table, aes(Method, Spike.detect.rate)) + theme_bw() + @@ -110,8 +87,7 @@ plot.DA <- function(x, sort = "Rank", p = FALSE, bins = 20, ...){ theme(axis.text.x = element_blank(), panel.grid.minor = element_blank()) + xlab(NULL) + - scale_y_continuous(labels=function(x) sprintf("%.2f", x)) + - geom_ribbon(aes(x=x,ymin=ymin,ymax=ymax),data = cob, fill = "red", alpha = 0.2) + scale_y_continuous(labels=function(x) sprintf("%.2f", x)) # Plot it pp <- cowplot::ggdraw(...) + diff --git a/R/plot.DAPower.R b/R/plot.DAPower.R index 038c8ee..4950c67 100644 --- a/R/plot.DAPower.R +++ b/R/plot.DAPower.R @@ -17,7 +17,7 @@ plot.DAPower <- function(x, ...){ coord_cartesian(ylim = c(0,1)) + geom_point() + geom_smooth(method = "loess", colour = "red", alpha = 0.4) + - ylab("Empirical Power") + + ylab("Spike Detect Rate") + xlab("Log2 Effect Size") + theme(panel.grid.minor = element_blank()) diff --git a/R/print.DA.R b/R/print.DA.R index 8fe996b..0118124 100644 --- a/R/print.DA.R +++ b/R/print.DA.R @@ -1,39 +1,52 @@ #' Summary of results from \code{testDA} #' #' @param object The output from the \code{testDA} function -#' @param sort Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Rank")} +#' @param sort Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Score")} +#' @param boot If TRUE will use bootstrap for confidence limits of the Score, else will compute the limits from the original table. Recommended to be TRUE unless \code{R >= 100} in \code{testDA} +#' @param prob Confidence limits for Score. Default \code{90\%} = \code{c(0.05,0.095)} +#' @param N Number of bootstraps. Default 1000 +#' @param boot.seed Random seed for reproducibility of bootstraps #' @param ... Additional arguments for \code{print} #' @import stats #' @import methods #' @import utils #' @export -summary.DA <- function(object, sort = "Rank", ...){ +summary.DA <- function(object, sort = "Score", boot = TRUE, prob = c(0.05,0.95), N = 1000, boot.seed = 1, ...){ # Find medians output.summary.auc <- aggregate(AUC ~ Method, data = object$table, FUN = function(x) round(median(x),3)) output.summary.fpr <- aggregate(FPR ~ Method, data = object$table, FUN = function(x) round(median(x),3)) output.summary.sdr <- aggregate(Spike.detect.rate ~ Method, data = object$table, FUN = function(x) round(median(x),3)) + output.summary.fdr <- aggregate(FDR ~ Method, data = object$table, FUN = function(x) round(median(x),3)) # Merge - df <- merge(merge(output.summary.auc,output.summary.fpr, by = "Method"),output.summary.sdr, by = "Method") - - # Rank - nobad <- df[df$FPR <= 0.05,] - auc.r <- rank(nobad$AUC) - sdr.r <- rank(nobad$Spike.detect.rate) - mean.r <- (auc.r + sdr.r)/2 - nobad$Rank <- nrow(nobad) - mean.r - bad <- df[df$FPR > 0.05,] - bad <- bad[order(bad$AUC, decreasing = TRUE),] - bad$Rank <- NA - df <- rbind(nobad,bad) + df <- merge(merge(merge(output.summary.auc,output.summary.fpr, by = "Method", all = TRUE),output.summary.fdr, by = "Method"),output.summary.sdr, by = "Method") + + # Score + df$Score <- round(df$AUC * df$Spike.detect.rate - df$FDR,3) + + # Interval + object$table$Score <- object$table$AUC * object$table$Spike.detect.rate - object$table$FDR + + if(boot){ + set.seed(boot.seed) + boots <- lapply(unique(object$table$Method), function(x) object$table[object$table$Method == x,][ + sample(rownames(object$table[object$table$Method == x,]),N,replace = TRUE), + ]) + boot.score <- lapply(boots,function(y) aggregate(Score ~ Method, data = y, FUN = function(x) round(quantile(x,probs = prob),3))) + score.cl <- do.call(rbind,boot.score) + } else { + score.cl <- aggregate(Score ~ Method, data = object$table, FUN = function(x) round(quantile(x,probs = prob),3)) + } + + df <- merge(df, as.matrix(score.cl), by = "Method") # Sort if(sort == "AUC") df <- df[order(df$AUC, decreasing = TRUE),] if(sort == "FPR") df <- df[order(df$FPR, decreasing = FALSE),] if(sort == "Spike.detect.rate") df <- df[order(df$Spike.detect.rate, decreasing = TRUE),] - if(sort == "Rank") df <- df[order(df$Rank, decreasing = FALSE),] + if(sort == "Score") df <- df[order(df$Score, decreasing = TRUE),] print(df, row.names = FALSE, ...) diff --git a/R/summary.DAPower.R b/R/summary.DAPower.R index 975dff5..07e02ee 100644 --- a/R/summary.DAPower.R +++ b/R/summary.DAPower.R @@ -15,7 +15,7 @@ summary.DAPower <- function(x, ...){ # Merge df <- merge(merge(output.summary.sdr,output.summary.auc, by = "EffectSize"),output.summary.fdr, by = "EffectSize") - colnames(df) <- c("EffectSize","EmpiricalPower","AUC","FDR") + colnames(df) <- c("EffectSize","Spike.detect.rate","AUC","FDR") } else { if(x$Method[1] %in% c("ALDEx2 t-test (adx)","ALDEx2 wilcox (adx)")){ # Find medians @@ -27,7 +27,7 @@ summary.DAPower <- function(x, ...){ # Merge df <- cbind(output.summary.sdr,output.summary.auc[,3],output.summary.fpr[,3],output.summary.fdr[,3]) df <- as.data.frame(df[order(df$Method),]) - colnames(df) <- c("Method","EffectSize","EmpiricalPower","AUC","FPR","FDR") + colnames(df) <- c("Method","EffectSize","Spike.detect.rate","AUC","FPR","FDR") } else { # Find medians output.summary.sdr <- aggregate(SDR ~ EffectSize, data = x, FUN = function(y) round(median(y),3)) @@ -37,7 +37,7 @@ summary.DAPower <- function(x, ...){ # Merge df <- merge(merge(merge(output.summary.sdr,output.summary.auc, by = "EffectSize"),output.summary.fpr, by = "EffectSize"),output.summary.fdr, by = "EffectSize") - colnames(df) <- c("EffectSize","EmpiricalPower","AUC","FPR","FDR") + colnames(df) <- c("EffectSize","Spike.detect.rate","AUC","FPR","FDR") } } diff --git a/R/testDA.R b/R/testDA.R index 1265436..9077cf9 100644 --- a/R/testDA.R +++ b/R/testDA.R @@ -486,21 +486,31 @@ testDA <- function(data, predictor, paired = NULL, covars = NULL, R = 10, tests } }) + # Confusion matrix adjusted + totalPos.adj <- sapply(res.sub, function(x) nrow(x[x$pval.adj <= alpha,])) + truePos.adj <- sapply(res.sub, function(x) sum(x[x$pval.adj <= alpha,"Feature"] %in% spikeds[[r]][[2]])) + falsePos.adj <- totalPos.adj - truePos.adj + + fdrs <- sapply(1:length(res.sub), function(x) { + if(totalPos.adj[x] != 0){ + falsePos.adj[x] / totalPos.adj[x] + } else {0}}) + # Combine and return df.combined <- data.frame(Method = sapply(res.sub, function(x) x$Method[1]), AUC = aucs, FPR = fprs, + FDR = fdrs, Spike.detect.rate = sdrs, Run = r) rownames(df.combined) <- NULL - # SAMseq FDR + # SAMseq and ANCOM FPRs not possible if("sam" %in% newnames){ - if(nrow(res.sub[["sam"]][res.sub[["sam"]]$pval.adj <= alpha,]) == 0){ - df.combined[df.combined$Method == "SAMseq (sam)","FPR"] <- 0 - } else { - df.combined[df.combined$Method == "SAMseq (sam)","FPR"] <- nrow(res.sub[["sam"]][res.sub[["sam"]]$pval.adj <= alpha & res.sub[["sam"]]$Spiked == "No",])/nrow(res.sub[["sam"]][res.sub[["sam"]]$pval.adj <= alpha,]) - } + df.combined[df.combined$Method == "SAMseq (sam)","FPR"] <- NA + } + if("anc" %in% newnames){ + df.combined[df.combined$Method == "ANCOM (anc)","FPR"] <- NA } return(list(df.combined,res.sub)) @@ -560,16 +570,5 @@ testDA <- function(data, predictor, paired = NULL, covars = NULL, R = 10, tests out <- list(table = output.results, results = output.all.results, details = output.details, run.times = run.times.all) class(out) <- "DA" - # Create warning message - output.summary.auc <- aggregate(AUC ~ Method, data = output.results, FUN = median) - output.summary.fpr <- aggregate(FPR ~ Method, data = output.results, FUN = median) - output.summary.sdr <- aggregate(Spike.detect.rate ~ Method, data = output.results, FUN = median) - df <- merge(merge(output.summary.auc,output.summary.fpr, by = "Method"),output.summary.sdr, by = "Method") - df <- df[df$FPR <= 0.05,] - if(nrow(df) > 0){ - df <- df[order(df$AUC, decreasing = TRUE),] - if(df[1,"Spike.detect.rate"] == 0 & verbose) message("Spike detect rate is zero for the top method! You might want to re-run the analysis with a pruned dataset (see preDA) or a higher effectSize") - } - return(out) } diff --git a/R/zzz.R b/R/zzz.R index 31e6cae..f3bcc66 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,5 +1,5 @@ .onLoad <- function(libname, pkgname){ - message("DAtest version 2.7.7") + message("DAtest version 2.7.8") } diff --git a/README.md b/README.md index b23226a..2d77df3 100644 --- a/README.md +++ b/README.md @@ -14,24 +14,153 @@ in choosing a method for a specific dataset based on empirical testing. #### The method goes as follows: - Shuffle predictor variable (E.g. case vs. control) -- Spike in data for some randomly chosen features (OTU/gene/protein), - such that they are associated with the shuffled predictor +- Spike in data for some randomly chosen features + (OTU/gene/protein/metabolite), such that they are associated with + the shuffled predictor - Apply methods, and check: - whether they can find the spike-ins - - whether the false positive rate is controlled + - whether the false discovery rate is controlled + +#### How to choose a method + +A Score is calculated for each method as follows: Area Under the ROC +Curve \* Spike Detect Rate - False Discovery Rate + +The higher the Score, the better the method is estimated to be. + +With `summary` the 90% confidence limits of the Scores are computed. If +these overlap, it means that the methods cannot be differentiated. The +choice then relies on the trade-off between specificity (low FDR) and +sensitivity (high Spike.detect.rate). + +If the best Score is zero, you should run the test again with either a +higher effectSize or with a pruned dataset (see `preDA`) #### The intended workflow (details can be found below): -- Compare methods with `testDA` function - - Check the results with `plot` or `summary` - - Choose method that has high AUC, FPR not higher than ~0.05, and - a Spike.detect.rate above 0 +- Compare methods with `testDA` function (input is a data.frame or a + phyloseq object) + - Check the results with `summary` (and `plot`) + - Choose method that has high Score, as long as the + Spike.detect.rate is above 0 - Explore the sensitivity and false discovery rate of the chosen method with `powerDA` - Run data with the chosen test with `DA."test"` function, where "test" is the name of the test - Check out your final results. +#### An ultra-short tutorial on a simulated dataset: + + library(DAtest) + + ## DAtest version 2.7.8 + + # First we create a random dataset with 200 features and 20 samples + set.seed(1) + df <- matrix(rpois(4000, lambda = 100),200,20) + + # We create a vector saying that the 10 first samples are "Control" the 10 next are "Treatment" + vec <- c(rep("Control",10),rep("Treatment",10)) + + # Spike-in: Multiply all "Treatment" samples by 2 for first 10 features + df[1:10,11:20] <- df[1:10,11:20] * 2 + + ################# From this step, you would input your own data for a real analysis ################### + # Let's compare the methods + test <- testDA(df, predictor = vec) + + ## Seed is set to 123 + + ## predictor is assumed to be a categorical variable with 2 levels: Control, Treatment + + ## bay was excluded due to failure + + summary(test) + + ## Method AUC FPR FDR Spike.detect.rate Score + ## MgSeq Feature (msf) 1.000 0.000 0.000 1.0 1.000 + ## RAIDA (rai) 1.000 0.000 0.000 1.0 1.000 + ## LIMMA voom (vli) 1.000 0.035 0.031 1.0 0.969 + ## DESeq2 (ds2x) 1.000 0.035 0.062 1.0 0.938 + ## DESeq2 man. geoMeans (ds2) 1.000 0.035 0.062 1.0 0.938 + ## EdgeR exact - TMM (ere) 1.000 0.043 0.062 1.0 0.938 + ## EdgeR exact - RLE (ere2) 1.000 0.049 0.118 1.0 0.882 + ## EdgeR qll - TMM (erq) 1.000 0.049 0.118 1.0 0.882 + ## SAMseq (sam) 1.000 NA 0.118 1.0 0.882 + ## EdgeR qll - RLE (erq2) 1.000 0.054 0.167 1.0 0.833 + ## MgSeq ZIG (zig) 0.980 0.127 0.434 0.9 0.457 + ## ALDEx2 wilcox (adx) 1.000 0.097 0.545 1.0 0.455 + ## ALDEx2 t-test (adx) 1.000 0.124 0.583 1.0 0.417 + ## Log t-test (ltt) 1.000 0.670 0.901 1.0 0.099 + ## Log LIMMA (lli) 1.000 0.689 0.906 1.0 0.094 + ## Log t-test2 (ltt2) 1.000 0.968 0.924 1.0 0.076 + ## Quasi-Poisson GLM (qpo) 1.000 0.970 0.924 1.0 0.076 + ## Log LIMMA 2 (lli2) 1.000 0.989 0.925 1.0 0.075 + ## Negbinom GLM (neb) 1.000 0.989 0.925 1.0 0.075 + ## Poisson GLM (poi) 1.000 1.000 0.925 1.0 0.075 + ## t-test (ttt) 0.986 0.968 0.924 1.0 0.075 + ## Wilcox (wil) 0.884 0.957 0.924 1.0 0.067 + ## Permutation (per) 0.595 0.962 0.924 1.0 0.045 + ## ZI-NegBin GLM (znb) 0.500 0.000 0.000 0.0 0.000 + ## ZI-Poisson GLM (zpo) 0.500 0.000 0.000 0.0 0.000 + ## Score.5% Score.95% + ## 1.000 1.000 + ## 0.577 1.000 + ## 0.536 1.000 + ## 0.556 1.000 + ## 0.556 1.000 + ## 0.536 1.000 + ## 0.536 1.000 + ## 0.536 1.000 + ## 0.500 0.938 + ## 0.500 1.000 + ## 0.000 0.652 + ## 0.214 0.556 + ## 0.183 0.455 + ## 0.081 0.109 + ## 0.081 0.101 + ## 0.075 0.079 + ## 0.073 0.079 + ## 0.075 0.079 + ## 0.075 0.079 + ## 0.075 0.076 + ## 0.069 0.078 + ## 0.065 0.071 + ## 0.042 0.047 + ## 0.000 0.000 + ## 0.000 0.000 + + # MetagenomeSeq Featue model appears to be the best + res1 <- DA.msf(df, predictor = vec) + + ## Default value being used. + + res1[res1$pval.adj < 0.05,"Feature"] + + ## [1] "10" "9" "3" "4" "7" "8" "1" "6" "2" "5" + + # And indeed, it finds the 10 spiked features ("1" to "10") and nothing else + + # Wilcoxon test was predicted to find all spike-ins (Spike.detect.rate = 1.0), but have a too high FDR: + res2 <- DA.wil(df, predictor = vec) + res2[res2$pval.adj < 0.05,"Feature"] + + ## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "16" + ## [12] "95" "121" "122" "125" "141" "148" "159" + + # It finds feature "1" to "10", but also 8 other non-spiked features! + +**Things to consider:** + +[Do you have a paired or blocked experimental +design](#if-you-have-a-paired-or-blocked-experimental-design) [Do you +have covariates?](#if-you-have-covariates) [Does your predictor have +more than two +classes?](#if-your-predictor-is-categorical-with-more-than-two-levels) +[Is your data normalized externally or is it absolute +abundances?](#if-data-is-normalized-externally-or-represent-absolute-abundances) +[Do you have a Phyloseq object?](#if-you-have-a-phyloseq-object) + #### Main functions: - `testDA`: It shuffles predictor, spike-in data, runs all methods and @@ -164,20 +293,24 @@ The following are suggested, but not needed: How to compare methods ====================== -Methods are compared with "False Positve Rate" (FPR), "Area Under the -(Receiver Operator) Curve" (AUC), and "Spike Detection Rate" -(Spike.detect.rate). +Methods are compared with "False Discovery Rate" (FDR), "Area Under the +(Receiver Operator) Curve" (AUC), "Spike Detection Rate" +(Spike.detect.rate), and "False Positive Rate" (FPR). By shuffling the predictor variable and spiking a subset of the features, we know a priori which features should be siginificant (the spiked features) and which features shouldn't (the non-spiked features). FPR indicates the proportion of non-spiked features that were found to -be significant (raw p-value below 0.05). With randomly generated data -the p-value distribution should be uniform (flat) and 5% of the raw +be significant (nominal p-value below 0.05). With randomly generated +data the p-value distribution should be uniform (flat) and 5% of the raw p-values are expected to be below 0.05. We therefore want an FPR at 0.05 or lower. +FDR indicates the proportion of significant features (after multiple +correction) that we're not spiked and therefore shouldn't be +significant. This should be as low as possible. + AUC is estimated by ranking all features by their respective p-value, and finding the area under the ROC curve. For a good method p-values from the spiked features should be among the lowest. For a perfect @@ -193,20 +326,6 @@ significant after multiple correction of the p-values. It is therefore the proportion of features you would expect to detect in a regular analysis. The higher Spike.detect.rate, the better. -- The intended workflow for choosing a method is: - - Omit methods with FPR higher than 0.05 - - Of remaining methods, choose the one with highest AUC if it has - a Spike.detect.rate above 0 - - If the method with highest AUC (and FPR < 0.05) has a - Spike.detect.rate of zero, rerun analysis with pruned dataset - (see `preDA` below) or increase `effectSize` - -**Assumption of the test:** the test assumes that only few features are -truly differentially abundant. Therefore, if many features are different -between the samples, and this difference can truly be explained by the -`predictor`, AUCs of the methods will be less than they actually are, -but FPRs can still be trusted. - ### Pre-process data: An optional step is to pre-process the data to reduce the number of @@ -245,9 +364,9 @@ wilcoxon-test, and others dedicated for two-class predictors. Write variable. The `testDA` function will produce a message telling you how the `predictor` is read - Make sure this fits your expectations! -`R` denotes how many times the spike-in and FPR/AUC calculation should -be replicated. It is advised to use at least 10, but it can be set to 1 -for a fast test of the function. +`R` denotes how many times the spike-in and FPR/FDR/AUC calculation +should be replicated. It is advised to use at least 10, but it can be +set to 1 for a fast test of the function. A subset of methods can be run by setting the `tests` argument. @@ -414,30 +533,12 @@ the same order as columns in `data`): # Average run times of each method: mytest$run.times -**Note:** - -As SAMseq does not output p-values, FPR for this method is the final -false discovery rate (false positives / total positives). FPR might -therefore vary quite a lot. If SAMSeq is one of the methods with highest -AUC, you can run `testDA` again but with a higher `fdr.output` argument -for "sam" (e.g. -`testDA(...,args = list(sam = list(fdr.output = 0.25))))` to get a -better estimate. If `fdr.output=0.25` we should expect FPR for "sam" -from `testDA` to be 0.25 or lower. - -P-values for baySeq are defined as 1 - posterior likelihoods. - ### Power analysis After a method has been chosen, you can run a power analysis. This can also be run to distinguish between methods appearing to be equally good -in `testDA`. `powerDA` spikes the data with different effects sizes and -along with AUC and FPR it estimates with Empircal Power (aka -Spike.detect.rate aka sensitivity) and the False Discovery Rate (FDR). -The Empirical Power gives you an estimate of the proportion of true -effects at a given effect size you are likely to discover (after p-value -adjustment). The FDR tells you what proportion of the detected features -are likely false positives (after p-value adjustment). +in `testDA`. It is similar `testDA` besides from spiking with different +effects sizes. Only one test can be run at a time, here MetagenomeSeq Feature model is run (see details in `testDA` for test abbreviations): @@ -446,13 +547,6 @@ run (see details in `testDA` for test abbreviations): plot(po.msf) summary(po.msf) -**Assumption of the test:** as with `testDA`, `powerDA` also assumes -that only few features are truly differentially abundant. Therefore, if -many features are different between the samples, and this difference can -truly be explained by the `predictor`, AUC and Power of the methods will -appear worse than they actually are. In contrast, FPR and FDR can still -be trusted. - How to run real data ==================== diff --git a/man/plot.DA.Rd b/man/plot.DA.Rd index 801c50b..803e54d 100644 --- a/man/plot.DA.Rd +++ b/man/plot.DA.Rd @@ -4,12 +4,12 @@ \alias{plot.DA} \title{Plotting results from \code{testDA}} \usage{ -\method{plot}{DA}(x, sort = "Rank", p = FALSE, bins = 20, ...) +\method{plot}{DA}(x, sort = "Score", p = FALSE, bins = 20, ...) } \arguments{ \item{x}{The output from the \code{testDA} function} -\item{sort}{Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Rank")}} +\item{sort}{Sort methods by \code{c("AUC","FDR","Spike.detect.rate","Rank")}} \item{p}{Logical. Should the p-value distribution be plotted (only p-values from non-spiked features)} diff --git a/man/summary.DA.Rd b/man/summary.DA.Rd index 45b132c..29a6aa0 100644 --- a/man/summary.DA.Rd +++ b/man/summary.DA.Rd @@ -4,12 +4,21 @@ \alias{summary.DA} \title{Summary of results from \code{testDA}} \usage{ -\method{summary}{DA}(object, sort = "Rank", ...) +\method{summary}{DA}(object, sort = "Score", boot = TRUE, prob = c(0.05, + 0.95), N = 1000, boot.seed = 1, ...) } \arguments{ \item{object}{The output from the \code{testDA} function} -\item{sort}{Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Rank")}} +\item{sort}{Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Score")}} + +\item{boot}{If TRUE will use bootstrap for confidence limits of the Score, else will compute the limits from the original table. Recommended to be TRUE unless \code{R >= 100} in \code{testDA}} + +\item{prob}{Confidence limits for Score. Default \code{90\%} = \code{c(0.05,0.095)}} + +\item{N}{Number of bootstraps. Default 1000} + +\item{boot.seed}{Random seed for reproducibility of bootstraps} \item{...}{Additional arguments for \code{print}} }