From d6179c21a1a20593311ed9558a887e1ba2ba89ce Mon Sep 17 00:00:00 2001 From: Jakob Russel Date: Sat, 31 Mar 2018 19:23:13 -0400 Subject: [PATCH] new version --- DESCRIPTION | 2 +- R/plot.DA.R | 4 +- R/print.DA.R | 54 --------------------- R/summary.DA.R | 56 ++++++++++++++++++++++ R/zzz.R | 2 +- README.md | 120 ++++++++++++++++++++++------------------------ man/plot.DA.Rd | 2 +- man/summary.DA.Rd | 2 +- 8 files changed, 120 insertions(+), 122 deletions(-) create mode 100644 R/summary.DA.R diff --git a/DESCRIPTION b/DESCRIPTION index ee943ab..dc18019 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: DAtest Title: Comparing Differential Abundance methods -Version: 2.7.8 +Version: 2.7.9 Authors@R: person("Jakob", "Russel", email = "russel2620@gmail.com", role = c("aut", "cre")) Description: What the title says. Depends: R (>= 3.2.5) diff --git a/R/plot.DA.R b/R/plot.DA.R index e5edad1..bf070cd 100644 --- a/R/plot.DA.R +++ b/R/plot.DA.R @@ -1,7 +1,7 @@ #' Plotting results from \code{testDA} #' #' @param x The output from the \code{testDA} function -#' @param sort Sort methods by \code{c("AUC","FDR","Spike.detect.rate","Rank")} +#' @param sort Sort methods by median \code{c("AUC","FDR","Spike.detect.rate","Score")} #' @param p Logical. Should the p-value distribution be plotted (only p-values from non-spiked features) #' @param bins Integer. Number of bins in p-value histograms #' @param ... Additional arguments for \code{ggdraw} @@ -40,7 +40,7 @@ plot.DA <- function(x, sort = "Score", p = FALSE, bins = 20, ...){ df <- merge(merge(auc.median,fdr.median, by = "Method"),sdr.median, by = "Method") # Score - df$Score <- round(df$AUC * df$Spike.detect.rate - df$FDR,3) + df$Score <- round((df$AUC-0.5) * df$Spike.detect.rate - df$FDR,3) # Sort the reults if(sort == "AUC") { diff --git a/R/print.DA.R b/R/print.DA.R index 0118124..628eb85 100644 --- a/R/print.DA.R +++ b/R/print.DA.R @@ -1,57 +1,3 @@ -#' Summary of results from \code{testDA} -#' -#' @param object The output from the \code{testDA} function -#' @param sort Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Score")} -#' @param boot If TRUE will use bootstrap for confidence limits of the Score, else will compute the limits from the original table. Recommended to be TRUE unless \code{R >= 100} in \code{testDA} -#' @param prob Confidence limits for Score. Default \code{90\%} = \code{c(0.05,0.095)} -#' @param N Number of bootstraps. Default 1000 -#' @param boot.seed Random seed for reproducibility of bootstraps -#' @param ... Additional arguments for \code{print} -#' @import stats -#' @import methods -#' @import utils -#' @export - -summary.DA <- function(object, sort = "Score", boot = TRUE, prob = c(0.05,0.95), N = 1000, boot.seed = 1, ...){ - - # Find medians - output.summary.auc <- aggregate(AUC ~ Method, data = object$table, FUN = function(x) round(median(x),3)) - output.summary.fpr <- aggregate(FPR ~ Method, data = object$table, FUN = function(x) round(median(x),3)) - output.summary.sdr <- aggregate(Spike.detect.rate ~ Method, data = object$table, FUN = function(x) round(median(x),3)) - output.summary.fdr <- aggregate(FDR ~ Method, data = object$table, FUN = function(x) round(median(x),3)) - - # Merge - df <- merge(merge(merge(output.summary.auc,output.summary.fpr, by = "Method", all = TRUE),output.summary.fdr, by = "Method"),output.summary.sdr, by = "Method") - - # Score - df$Score <- round(df$AUC * df$Spike.detect.rate - df$FDR,3) - - # Interval - object$table$Score <- object$table$AUC * object$table$Spike.detect.rate - object$table$FDR - - if(boot){ - set.seed(boot.seed) - boots <- lapply(unique(object$table$Method), function(x) object$table[object$table$Method == x,][ - sample(rownames(object$table[object$table$Method == x,]),N,replace = TRUE), - ]) - boot.score <- lapply(boots,function(y) aggregate(Score ~ Method, data = y, FUN = function(x) round(quantile(x,probs = prob),3))) - score.cl <- do.call(rbind,boot.score) - } else { - score.cl <- aggregate(Score ~ Method, data = object$table, FUN = function(x) round(quantile(x,probs = prob),3)) - } - - df <- merge(df, as.matrix(score.cl), by = "Method") - - # Sort - if(sort == "AUC") df <- df[order(df$AUC, decreasing = TRUE),] - if(sort == "FPR") df <- df[order(df$FPR, decreasing = FALSE),] - if(sort == "Spike.detect.rate") df <- df[order(df$Spike.detect.rate, decreasing = TRUE),] - if(sort == "Score") df <- df[order(df$Score, decreasing = TRUE),] - - print(df, row.names = FALSE, ...) - -} - #' Print results from \code{testDA} #' #' @param x The output from the \code{testDA} function diff --git a/R/summary.DA.R b/R/summary.DA.R new file mode 100644 index 0000000..fb3b104 --- /dev/null +++ b/R/summary.DA.R @@ -0,0 +1,56 @@ +#' Summary of results from \code{testDA} +#' +#' @param object The output from the \code{testDA} function +#' @param sort Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Score")} +#' @param boot If TRUE will use bootstrap for confidence limits of the Score, else will compute the limits from the original table. Recommended to be TRUE unless \code{R >= 100} in \code{testDA} +#' @param prob Confidence limits for Score. Default \code{90\%} = \code{c(0.05,0.095)} +#' @param N Number of bootstraps. Default 1000 +#' @param boot.seed Random seed for reproducibility of bootstraps +#' @param ... Additional arguments for \code{print} +#' @import stats +#' @import methods +#' @import utils +#' @export + +summary.DA <- function(object, sort = "Score", boot = TRUE, prob = c(0.05,0.95), N = 1000, boot.seed = 1, ...){ + + # Find medians + output.summary.auc <- aggregate(AUC ~ Method, data = object$table, FUN = function(x) round(median(x),3)) + output.summary.fpr <- aggregate(FPR ~ Method, data = object$table, FUN = function(x) round(median(x),3)) + output.summary.sdr <- aggregate(Spike.detect.rate ~ Method, data = object$table, FUN = function(x) round(median(x),3)) + output.summary.fdr <- aggregate(FDR ~ Method, data = object$table, FUN = function(x) round(median(x),3)) + + # Merge + df <- merge(merge(merge(output.summary.auc,output.summary.fpr, by = "Method", all = TRUE),output.summary.fdr, by = "Method"),output.summary.sdr, by = "Method") + + # Score + df$Score <- round((df$AUC-0.5) * df$Spike.detect.rate - df$FDR,3) + + # Interval + object$table$Score <- (object$table$AUC-0.5) * object$table$Spike.detect.rate - object$table$FDR + + if(boot){ + set.seed(boot.seed) + boots <- lapply(unique(object$table$Method), function(x) object$table[object$table$Method == x,][ + sample(rownames(object$table[object$table$Method == x,]),N,replace = TRUE), + ]) + boot.score <- lapply(boots,function(y) aggregate(Score ~ Method, data = y, FUN = function(x) round(quantile(x,probs = prob),3))) + score.cl <- do.call(rbind,boot.score) + } else { + score.cl <- aggregate(Score ~ Method, data = object$table, FUN = function(x) round(quantile(x,probs = prob),3)) + } + + df <- merge(df, as.matrix(score.cl), by = "Method") + + # Sort + if(sort == "AUC") df <- df[order(df$AUC, decreasing = TRUE),] + if(sort == "FPR") df <- df[order(df$FPR, decreasing = FALSE),] + if(sort == "Spike.detect.rate") df <- df[order(df$Spike.detect.rate, decreasing = TRUE),] + if(sort == "Score") { + df <- df[order(df$Score,df[,7],df[,8], decreasing = TRUE),] + if(df[1,"Score"] <= 0) warning("Best Score is less or equal to zero!\nYou might want to rerun with a higher effectSize or pruned dataset (see preDA)") + } + + print(df, row.names = FALSE, ...) + +} diff --git a/R/zzz.R b/R/zzz.R index f3bcc66..8430293 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,5 +1,5 @@ .onLoad <- function(libname, pkgname){ - message("DAtest version 2.7.8") + message("DAtest version 2.7.9") } diff --git a/README.md b/README.md index e69394f..ae7444b 100644 --- a/README.md +++ b/README.md @@ -26,19 +26,19 @@ in choosing a method for a specific dataset based on empirical testing. - Compare methods with `testDA` function (input is a data.frame or a phyloseq object) - Check the results with `summary` (and `plot`) - - Choose method that has high Score, as long as the - Spike.detect.rate is above 0 + - Choose method that has high Score, as long as it is above 0 - Explore the sensitivity and false discovery rate of the chosen method with `powerDA` - Run data with the chosen test with `DA."test"` function, where "test" is the name of the test - Check out your final results. - Overview of this tutorial ------------------------- + - [Installation of packages](#installation-of-packages) -- [A short tutorial on a simulated dataset](#a-short-tutorial-on-a-simulated-dataset) +- [A short tutorial on a simulated + dataset](#a-short-tutorial-on-a-simulated-dataset) - [How to compare methods](#how-to-compare-methods) - [The test](#run-the-test) - [Multi-class @@ -56,7 +56,6 @@ Overview of this tutorial - [Implemented methods](#implemented-methods) - [Extra features](#extra-features) - #### Main functions: - `testDA`: It shuffles predictor, spike-in data, runs all methods and @@ -165,14 +164,13 @@ The following are suggested, but not needed: # For post-hoc testing (generalized) linear models install.packages("lsmeans") - -A short tutorial on a simulated dataset -======================================= +A short tutorial on a simulated dataset: +======================================== #### How to choose a method -A Score is calculated for each method as follows: Area Under the ROC -Curve \* Spike Detect Rate - False Discovery Rate +A Score is calculated for each method as follows: (Area Under the ROC +Curve - 0.5) \* Spike Detect Rate - False Discovery Rate The higher the Score, the better the method is estimated to be. @@ -181,71 +179,68 @@ these overlap, it means that the methods cannot be differentiated. The choice then relies on the trade-off between specificity (low FDR) and sensitivity (high Spike.detect.rate). -If the best Score is zero, you should run the test again with either a -higher effectSize or with a pruned dataset (see `preDA`) - +If the best Score is zero or below, you should run the test again with +either a higher effectSize or with a pruned dataset (see `preDA`) library(DAtest) - ## DAtest version 2.7.8 - -First we create a random dataset with 200 features and 20 samples + ## DAtest version 2.7.9 + # First we create a random dataset with 200 features and 20 samples set.seed(1) df <- matrix(rpois(4000, lambda = 100),200,20) -We create a vector saying that the 10 first samples are "Control" the 10 next are "Treatment" - + # We create a vector saying that the 10 first samples are "Control" the 10 next are "Treatment" vec <- c(rep("Control",10),rep("Treatment",10)) -Spike-in: Multiply all "Treatment" samples by 2 for first 10 features - - df[1:10,11:20] <- df[1:10,11:20] * 2 - -**From this step, you would input your own data for a real analysis** - -Let's compare the methods + # Spike-in: Multiply all "Treatment" samples by 2 for first 10 features + df[1:10,11:20] <- round(df[1:10,11:20] * 2) + ################# From this step, you would input your own data for a real analysis ################### + # Let's compare the methods test <- testDA(df, predictor = vec) ## Seed is set to 123 ## predictor is assumed to be a categorical variable with 2 levels: Control, Treatment + ## + |=================================================================| 100% + ## bay was excluded due to failure summary(test) - ## Method AUC FPR FDR Spike.detect.rate Score Score.5% Score.95% - ## MgSeq Feature (msf) 1.000 0.000 0.000 1.0 1.000 1.000 1.000 - ## RAIDA (rai) 1.000 0.000 0.000 1.0 1.000 0.577 1.000 - ## LIMMA voom (vli) 1.000 0.035 0.031 1.0 0.969 0.536 1.000 - ## DESeq2 (ds2x) 1.000 0.035 0.062 1.0 0.938 0.556 1.000 - ## DESeq2 man. geoMeans (ds2) 1.000 0.035 0.062 1.0 0.938 0.556 1.000 - ## EdgeR exact - TMM (ere) 1.000 0.043 0.062 1.0 0.938 0.536 1.000 - ## EdgeR exact - RLE (ere2) 1.000 0.049 0.118 1.0 0.882 0.536 1.000 - ## EdgeR qll - TMM (erq) 1.000 0.049 0.118 1.0 0.882 0.536 1.000 - ## SAMseq (sam) 1.000 NA 0.118 1.0 0.882 0.500 0.938 - ## EdgeR qll - RLE (erq2) 1.000 0.054 0.167 1.0 0.833 0.500 1.000 - ## MgSeq ZIG (zig) 0.980 0.127 0.434 0.9 0.457 0.000 0.652 - ## ALDEx2 wilcox (adx) 1.000 0.097 0.545 1.0 0.455 0.214 0.556 - ## ALDEx2 t-test (adx) 1.000 0.124 0.583 1.0 0.417 0.183 0.455 - ## Log t-test (ltt) 1.000 0.670 0.901 1.0 0.099 0.081 0.109 - ## Log LIMMA (lli) 1.000 0.689 0.906 1.0 0.094 0.081 0.101 - ## Log t-test2 (ltt2) 1.000 0.968 0.924 1.0 0.076 0.075 0.079 - ## Quasi-Poisson GLM (qpo) 1.000 0.970 0.924 1.0 0.076 0.073 0.079 - ## Log LIMMA 2 (lli2) 1.000 0.989 0.925 1.0 0.075 0.075 0.079 - ## Negbinom GLM (neb) 1.000 0.989 0.925 1.0 0.075 0.075 0.079 - ## Poisson GLM (poi) 1.000 1.000 0.925 1.0 0.075 0.075 0.076 - ## t-test (ttt) 0.986 0.968 0.924 1.0 0.075 0.069 0.078 - ## Wilcox (wil) 0.884 0.957 0.924 1.0 0.067 0.065 0.071 - ## Permutation (per) 0.595 0.962 0.924 1.0 0.045 0.042 0.047 - ## ZI-NegBin GLM (znb) 0.500 0.000 0.000 0.0 0.000 0.000 0.000 - ## ZI-Poisson GLM (zpo) 0.500 0.000 0.000 0.0 0.000 0.000 0.000 + ## Method AUC FPR FDR Spike.detect.rate Score Score.5% Score.95% + ## MgSeq Feature (msf) 1.000 0.000 0.000 1.0 0.500 0.500 0.500 + ## RAIDA (rai) 1.000 0.000 0.000 1.0 0.500 0.077 0.500 + ## LIMMA voom (vli) 1.000 0.035 0.031 1.0 0.469 0.035 0.500 + ## DESeq2 (ds2x) 1.000 0.035 0.062 1.0 0.438 0.056 0.500 + ## DESeq2 man. geoMeans (ds2) 1.000 0.035 0.062 1.0 0.438 0.056 0.500 + ## EdgeR exact - TMM (ere) 1.000 0.043 0.062 1.0 0.438 0.036 0.500 + ## EdgeR exact - RLE (ere2) 1.000 0.049 0.118 1.0 0.382 0.036 0.500 + ## EdgeR qll - TMM (erq) 1.000 0.049 0.118 1.0 0.382 0.036 0.500 + ## SAMseq (sam) 1.000 NA 0.118 1.0 0.382 0.000 0.438 + ## EdgeR qll - RLE (erq2) 1.000 0.054 0.167 1.0 0.333 0.000 0.500 + ## ZI-NegBin GLM (znb) 0.500 0.000 0.000 0.0 0.000 0.000 0.000 + ## ZI-Poisson GLM (zpo) 0.500 0.000 0.000 0.0 0.000 0.000 0.000 + ## MgSeq ZIG (zig) 0.980 0.127 0.434 0.9 -0.002 -0.364 0.219 + ## ALDEx2 wilcox (adx) 1.000 0.097 0.545 1.0 -0.045 -0.286 0.056 + ## ALDEx2 t-test (adx) 1.000 0.124 0.583 1.0 -0.083 -0.317 -0.045 + ## Log t-test (ltt) 1.000 0.670 0.901 1.0 -0.401 -0.419 -0.391 + ## Log LIMMA (lli) 1.000 0.689 0.906 1.0 -0.406 -0.419 -0.399 + ## Quasi-Poisson GLM (qpo) 1.000 0.970 0.924 1.0 -0.424 -0.457 -0.421 + ## Log t-test2 (ltt2) 1.000 0.968 0.924 1.0 -0.424 -0.431 -0.421 + ## Poisson GLM (poi) 1.000 1.000 0.925 1.0 -0.425 -0.425 -0.424 + ## Log LIMMA 2 (lli2) 1.000 0.989 0.925 1.0 -0.425 -0.425 -0.421 + ## Negbinom GLM (neb) 1.000 0.989 0.925 1.0 -0.425 -0.425 -0.421 + ## t-test (ttt) 0.986 0.968 0.924 1.0 -0.438 -0.509 -0.423 + ## Wilcox (wil) 0.884 0.957 0.924 1.0 -0.540 -0.584 -0.484 + ## Permutation (per) 0.595 0.962 0.924 1.0 -0.829 -0.863 -0.808 ## -MetagenomeSeq Featue model appears to be the best + # MetagenomeSeq Featue model appears to be the best res1 <- DA.msf(df, predictor = vec) ## Default value being used. @@ -254,27 +249,28 @@ MetagenomeSeq Featue model appears to be the best ## [1] "10" "9" "3" "4" "7" "8" "1" "6" "2" "5" -And indeed, MgSeq Featuremodel finds the 10 spiked features ("1" to "10") and nothing else! - -Wilcoxon test was predicted to find all spike-ins (Spike.detect.rate = 1.0), but have a too high FDR: + # And indeed, it finds the 10 spiked features ("1" to "10") and nothing else + # Wilcoxon test was predicted to find many spike-ins (Spike.detect.rate = 1.0), but have a too high FDR: res2 <- DA.wil(df, predictor = vec) res2[res2$pval.adj < 0.05,"Feature"] ## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "16" ## [12] "95" "121" "122" "125" "141" "148" "159" -Wilcoxon finds feature "1" to "10", but also 8 other non-spiked features! + # It finds feature "1" to "10", but also 8 other non-spiked features! **Things to consider:** -- [Do you have a paired or blocked experimental design](#if-you-have-a-paired-or-blocked-experimental-design) -- [Do you have covariates?](#if-you-have-covariates) -- [Does your predictor have more than two classes?](#if-your-predictor-is-categorical-with-more-than-two-levels) -- [Is your data normalized externally or is it absolute abundances?](#if-data-is-normalized-externally-or-represent-absolute-abundances) +- [Do you have a paired or blocked experimental + design](#if-you-have-a-paired-or-blocked-experimental-design) +- [Do you have covariates?](#if-you-have-covariates) +- [Does your predictor have more than two + classes?](#if-your-predictor-is-categorical-with-more-than-two-levels) +- [Is your data normalized externally or is it absolute + abundances?](#if-data-is-normalized-externally-or-represent-absolute-abundances) - [Do you have a Phyloseq object?](#if-you-have-a-phyloseq-object) - How to compare methods ====================== @@ -293,7 +289,7 @@ p-values are expected to be below 0.05. We therefore want an FPR at 0.05 or lower. FDR indicates the proportion of significant features (after multiple -correction) that were not spiked and therefore shouldn't be +correction) that we're not spiked and therefore shouldn't be significant. This should be as low as possible. AUC is estimated by ranking all features by their respective p-value, diff --git a/man/plot.DA.Rd b/man/plot.DA.Rd index 803e54d..35be0b4 100644 --- a/man/plot.DA.Rd +++ b/man/plot.DA.Rd @@ -9,7 +9,7 @@ \arguments{ \item{x}{The output from the \code{testDA} function} -\item{sort}{Sort methods by \code{c("AUC","FDR","Spike.detect.rate","Rank")}} +\item{sort}{Sort methods by median \code{c("AUC","FDR","Spike.detect.rate","Score")}} \item{p}{Logical. Should the p-value distribution be plotted (only p-values from non-spiked features)} diff --git a/man/summary.DA.Rd b/man/summary.DA.Rd index 29a6aa0..8cdeabf 100644 --- a/man/summary.DA.Rd +++ b/man/summary.DA.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/print.DA.R +% Please edit documentation in R/summary.DA.R \name{summary.DA} \alias{summary.DA} \title{Summary of results from \code{testDA}}