diff --git a/DESCRIPTION b/DESCRIPTION index a1a905a..1a1b7e8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,11 @@ Package: DAtest Title: Comparing Differential Abundance methods -Version: 2.6.1 +Version: 2.6.2 Authors@R: person("Jakob", "Russel", email = "russel2620@gmail.com", role = c("aut", "cre")) Description: What the title says. Depends: R (>= 3.2.5) Imports: foreach, doParallel, doSNOW, snow, MASS, pROC, ggplot2, cowplot, lme4, nlme, stats, methods, utils, statmod, pscl, samr, minqa, nloptr +Suggests: lsmeans, venneuler License: GPL (>= 3) Encoding: UTF-8 LazyData: true diff --git a/NAMESPACE b/NAMESPACE index 31b506a..ebfa26c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,10 +3,13 @@ S3method(plot,DA) S3method(print,DA) S3method(summary,DA) +export(DA.TukeyHSD) export(DA.adx) export(DA.anc) +export(DA.anova) export(DA.aov) export(DA.bay) +export(DA.drop1) export(DA.ds2) export(DA.ere) export(DA.ere2) @@ -22,6 +25,7 @@ export(DA.lli2) export(DA.llm) export(DA.llm2) export(DA.lrm) +export(DA.lsmeans) export(DA.ltt) export(DA.ltt2) export(DA.msf) @@ -65,4 +69,3 @@ importFrom(lme4,glmer) importFrom(lme4,glmer.nb) importFrom(pROC,roc) importFrom(parallel,detectCores) -importFrom(venneuler,venneuler) diff --git a/R/DA.poi.R b/R/DA.poi.R index 54a8319..9b37f2a 100644 --- a/R/DA.poi.R +++ b/R/DA.poi.R @@ -89,7 +89,7 @@ DA.poi <- function(data, predictor, paired = NULL, covars = NULL, relative = TRU pois <- function(x){ fit <- NULL tryCatch( - fit <- lme4::glmer(x ~ predictor + offset(log(log(libSize))) + (1|paired),family="poisson", ...), + fit <- lme4::glmer(x ~ predictor + offset(log(libSize)) + (1|paired),family="poisson", ...), error = function(x) fit <- NULL) if(!is.null(fit)) { if(nrow(coef(summary(fit))) > 1) { diff --git a/R/posthocs.R b/R/posthocs.R new file mode 100644 index 0000000..e5b6e2f --- /dev/null +++ b/R/posthocs.R @@ -0,0 +1,254 @@ +#' Run drop1 on all features from DAtest results with allResults = TRUE +#' +#' Works on "zpo", "znb", "qpo", "neb", "poi". Non-paired "lrm", "llm", "llm2" +#' @param results Output from a DA."test" function with allResults = TRUE +#' @param test Which test to use to calculate p-values. See ?drop1 for details +#' @param p.adj P-value adjustment method. See ?p.adjust for details +#' @param ... Additional arguments for drop1 function +#' @export +DA.drop1 <- function(results, test = "Chisq", p.adj = "fdr", ...){ + + # Class + k <- 1 + while(class(results[[k]])[1] == "NULL"){ + k<- k+1 + } + xclass <- class(results[[k]]) + + if(!any(c("lm","glm","zeroinfl","negbin","glmerMod") %in% xclass)) stop(paste("Class should be one of lm, glm, zeroinfl, negbin or glmerMod and not:",xclass)) + + xres <- lapply(results, function(x) tryCatch(drop1(x, test = test, ...),error = function(e) NA)) + + if(all(xclass == "lm")){ + AIC <- lapply(xres, function(x) x[,4]) + pv <- lapply(xres, function(x) x[,5]) + + AIC <- do.call(rbind,AIC[lapply(AIC, length) > 1]) + pv <- do.call(rbind,pv[lapply(pv, length) > 1]) + pva <- apply(pv, 2, function(x) p.adjust(x, method=p.adj)) + + colnames(AIC) <- paste0("AIC_",rownames(drop1(results[[1]], ...))) + colnames(pv) <- paste0("pval_",rownames(drop1(results[[1]], ...))) + colnames(pva) <- paste0("pval.adj_",rownames(drop1(results[[1]], ...))) + + res <- cbind(AIC,pv,pva) + + } + + if(xclass[1] == "glm"){ + + if(is.na(results[[k]]$aic)){ + + Dev <- lapply(xres, function(x) x[,2]) + pv <- lapply(xres, function(x) x[,4]) + + Dev <- do.call(rbind,Dev[lapply(Dev, length) > 1]) + pv <- do.call(rbind,pv[lapply(pv, length) > 1]) + pva <- apply(pv, 2, function(x) p.adjust(x, method=p.adj)) + + colnames(Dev) <- paste0("Deviance_",rownames(drop1(results[[1]], ...))) + colnames(pv) <- paste0("pval_",rownames(drop1(results[[1]], ...))) + colnames(pva) <- paste0("pval.adj_",rownames(drop1(results[[1]], ...))) + + res <- cbind(Dev,pv,pva) + + } else { + + AIC <- lapply(xres, function(x) x[,3]) + LRT <- lapply(xres, function(x) x[,4]) + pv <- lapply(xres, function(x) x[,5]) + + AIC <- do.call(rbind,AIC[lapply(AIC, length) > 1]) + LRT <- do.call(rbind,LRT[lapply(LRT, length) > 1]) + pv <- do.call(rbind,pv[lapply(pv, length) > 1]) + pva <- apply(pv, 2, function(x) p.adjust(x, method=p.adj)) + + colnames(AIC) <- paste0("AIC_",rownames(drop1(results[[1]], ...))) + colnames(LRT) <- paste0("LRT",rownames(drop1(results[[1]], ...))) + colnames(pv) <- paste0("pval_",rownames(drop1(results[[1]], ...))) + colnames(pva) <- paste0("pval.adj_",rownames(drop1(results[[1]], ...))) + + res <- cbind(AIC,LRT,pv,pva) + + } + + } + + if(all(xclass == "zeroinfl" | xclass == "glmerMod")){ + AIC <- lapply(xres, function(x) x[,2]) + LRT <- lapply(xres, function(x) x[,3]) + pv <- lapply(xres, function(x) x[,4]) + + AIC <- do.call(rbind,AIC[lapply(AIC, length) > 1]) + LRT <- do.call(rbind,LRT[lapply(LRT, length) > 1]) + pv <- do.call(rbind,pv[lapply(pv, length) > 1]) + pva <- apply(pv, 2, function(x) p.adjust(x, method=p.adj)) + + colnames(AIC) <- paste0("AIC_",rownames(drop1(results[[1]], ...))) + colnames(LRT) <- paste0("LRT",rownames(drop1(results[[1]], ...))) + colnames(pv) <- paste0("pval_",rownames(drop1(results[[1]], ...))) + colnames(pva) <- paste0("pval.adj_",rownames(drop1(results[[1]], ...))) + + res <- cbind(AIC,LRT,pv,pva) + + } + + if(xclass[1] == "negbin"){ + AIC <- lapply(xres, function(x) x[,3]) + LRT <- lapply(xres, function(x) x[,4]) + pv <- lapply(xres, function(x) x[,5]) + + AIC <- do.call(rbind,AIC[lapply(AIC, length) > 1]) + LRT <- do.call(rbind,LRT[lapply(LRT, length) > 1]) + pv <- do.call(rbind,pv[lapply(pv, length) > 1]) + pva <- apply(pv, 2, function(x) p.adjust(x, method=p.adj)) + + colnames(AIC) <- paste0("AIC_",rownames(drop1(results[[k]], ...))) + colnames(LRT) <- paste0("LRT",rownames(drop1(results[[k]], ...))) + colnames(pv) <- paste0("pval_",rownames(drop1(results[[k]], ...))) + colnames(pva) <- paste0("pval.adj_",rownames(drop1(results[[k]], ...))) + + res <- cbind(AIC,LRT,pv,pva) + + } + + return(res) +} + + +#' Run anova on all features from DAtest results with allResults = TRUE +#' +#' Works on "lrm", "llm", "llm2". Non-paired "neb" +#' @param results Output from a DA."test" function with allResults = TRUE +#' @param p.adj P-value adjustment method. See ?p.adjust for details +#' @param ... Additional arguments for anova function +#' @export +DA.anova <- function(results, p.adj = "fdr", ...){ + + # Class + k <- 1 + while(class(results[[k]])[1] == "NULL"){ + k<- k+1 + } + xclass <- class(results[[k]]) + + if(!any(c("lm","nebgin","lme") %in% xclass)) stop(paste("Class should be one of lm, lme or negbin and not:",xclass)) + + if(all(xclass == "lme")){ + + pv <- lapply(results, function(x) tryCatch(anova(x, ...)[,4],error = function(e) NA)) + + pv <- do.call(rbind,pv[lapply(pv, length) > 1]) + pva <- apply(pv, 2, function(x) p.adjust(x, method=p.adj)) + + colnames(pv) <- paste0("pval_",rownames(anova(results[[1]], ...))) + colnames(pva) <- paste0("pval.adj_",rownames(anova(results[[1]], ...))) + + res <- cbind(pv,pva) + + } + + if(xclass[1] == "negbin" | xclass[1] == "lm"){ + + pv <- lapply(results, function(x) tryCatch(anova(x)[,5],error = function(e) NA)) + + pv <- do.call(rbind,pv[lapply(pv, length) > 1]) + pva <- apply(pv, 2, function(x) p.adjust(x, method=p.adj)) + + colnames(pv) <- paste0("pval_",rownames(anova(results[[1]]))) + colnames(pva) <- paste0("pval.adj_",rownames(anova(results[[1]]))) + + res <- cbind(pv,pva) + + } + + + return(res) +} + + +#' Run TukeyHSD on all features from DAtest results with allResults = TRUE +#' +#' Works on "aov", "lao", "lao2" +#' @param results Output from a DA."test" function with allResults = TRUE +#' @param variable Which variable to test. Default predictor. Alternatively, the name of a covar +#' @param p.adj P-value adjustment method. See ?p.adjust for details +#' @param ... Additional arguments for TukeyHSD function +#' @export +DA.TukeyHSD <- function(results, variable = "predictor", p.adj = "fdr", ...){ + + # Class + k <- 1 + while(class(results[[k]])[1] == "NULL"){ + k<- k+1 + } + xclass <- class(results[[k]]) + + if(xclass[1] != "aov") stop(paste("Class should be aov and not:",xclass)) + if(!variable %in% attr(results[[k]]$terms,"term.labels")) stop(paste(variable,"not found in the models.")) + + pv <- lapply(results, function(x) tryCatch(as.data.frame(TukeyHSD(x, ...)[variable])[,4],error = function(e) NA)) + + pvs <- do.call(rbind,pv[lapply(pv, length) > 1]) + colnames(pvs) <- rownames(as.data.frame(TukeyHSD(results[[k]], ...)[variable])) + pva <- apply(pvs, 2, function(x) p.adjust(x, method=p.adj)) + + colnames(pvs) <- paste0("pval_",colnames(pvs)) + colnames(pva) <- paste0("pval.adj_",colnames(pva)) + + res <- cbind(pvs,pva) + + return(res) +} + + +#' Run lsmeans on all features from DAtest results with allResults = TRUE +#' +#' Pairwise testing on predictor and covars. Works on "poi", "neb", "lrm", "llm", "llm2", "qpo", "znb", "zpo". +#' +#' Require the lsmeans package +#' @param results Output from a DA."test" function with allResults = TRUE +#' @param variable Which variable to test. Default predictor. Alternatively, the name of a covar +#' @param predictor If results come from a paired "lrm", "llm" or "llm2" supply the original predictor variable in the form of as a vector +#' @param covars If results come from a paired "lrm", "llm" or "llm2" supply the original covars in the form of a named list +#' @param p.adj P-value adjustment method. See ?p.adjust for details +#' @param ... Additional arguments for lsmeans function +#' @export +DA.lsmeans <- function(results, variable = "predictor", predictor = NULL, covars = NULL, p.adj = "fdr", ...){ + + library(lsmeans) + + # Class + k <- 1 + while(class(results[[k]])[1] == "NULL"){ + k<- k+1 + } + + if(class(results[[k]])[1] == "lme"){ + form <<- as.formula(paste("x ~",paste(attr(results[[1]]$terms,"term.labels"), collapse = "+"))) + if(is.null(predictor)) stop("predictor has to be supplied for a paired lrm, llm and llm2") + if(!is.null(covars)){ + for(i in 1:length(covars)){ + assign(names(covars)[i],covars[[i]]) + } + } + } + + mc <- lapply(results, function(x) tryCatch(summary(pairs(lsmeans(x, variable))),error = function(e) NA)) + pv <- lapply(mc, function(x) as.data.frame(x)$p.value) + est <- lapply(mc, function(x) as.data.frame(x)$estimate) + + pvs <- do.call(rbind,pv[lapply(pv, length) > 1]) + est <- do.call(rbind,est[lapply(est, length) > 1]) + colnames(pvs) <- summary(pairs(lsmeans(results[[k]], variable)))$contrast + pva <- apply(pvs, 2, function(x) p.adjust(x, method=p.adj)) + + colnames(est) <- paste0("estimate_",colnames(pvs)) + colnames(pvs) <- paste0("pval_",colnames(pvs)) + colnames(pva) <- paste0("pval.adj_",colnames(pva)) + + res <- cbind(est,pvs,pva) + if(class(results[[k]])[1] == "lme") rm(form, envir = .GlobalEnv) + return(res) +} + diff --git a/R/vennDA.R b/R/vennDA.R index f200781..50c7155 100644 --- a/R/vennDA.R +++ b/R/vennDA.R @@ -1,15 +1,18 @@ #' Plot Venn diagram from allDA object #' -#' Plot a Venn (Euler) diagram of features found by different methods +#' Plot a Venn (Euler) diagram of features found by different methods. +#' +#' Require the venneuler package. #' @param x Output from the allDA function #' @param tests Character vector with tests to plot (E.g. c("ttt","adx.t","wil"), see names(x$results)). Default none #' @param alpha Numeric. q-value threshold for significant features. Default 0.05 #' @param ... Additional arguments for plotting #' @return Nothing -#' @importFrom venneuler venneuler #' @export vennDA <- function(x, tests = NULL, alpha = 0.05, ...){ + library(venneuler) + if(!all(names(x) == c("table","results"))) stop("x is not an allDA object") plottests <- tests[tests %in% names(x[[2]])] diff --git a/R/zzz.R b/R/zzz.R index c4620df..87f39a5 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,6 +1,6 @@ .onLoad <- function(libname, pkgname){ - message("DAtest version 2.6.1") + message("DAtest version 2.6.2") # Fix samr problem if(.Platform$OS.type == "windows"){ diff --git a/README.md b/README.md index ce20d87..eb4462e 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,7 @@ Overview of this tutorial Installation of packages ======================== +It is advised not to have any packages loaded when installing DAtest. Installation of DAtest and all dependencies has been tested to work on a clean R version 3.4.1 by installing in the following order: @@ -131,9 +132,10 @@ only the second level is spiked. #### **The function automatically uses multiple CPUs for fast execution** The methods run in parallel, and by default the number of cores used is -one less than available. This can be changed with the `cores` argument. -`cores = 1` will turn off parallel computing. If the function is -terminated before ending you might get the following warning: +one less than available. It has been tested to work on Windows, OS X and +Linux Debian. It can be changed with the `cores` argument. `cores = 1` +will turn off parallel computing. If the function is terminated before +ending you might get the following warning: closing unused connection X (...) @@ -260,6 +262,8 @@ the abbreviation given in the details of the `testDA` function. It is advised to set `allResults = TRUE` for checking final results. For all methods where relevant, this will output the raw results, often in a list with each element corresponding to a feature (OTU/gene/protein). +For published methods, it is advised to check their tutorials on how to +read to output. - ***IMPORTANT:*** - Set `out.anova` similar in `testDA` as in your final analysis @@ -270,7 +274,7 @@ list with each element corresponding to a feature (OTU/gene/protein). an `anova`/`drop1` function. This can be changed with the `out.anova` argument - All limma models (lim,lli,lli2,vli) test all levels of the - `predictor` against the intercept (with `topTable`). This can be + `predictor` against an intercept (with `topTable`). This can be changed with the `out.anova` argument - For ANCOM: If the FPR = 0, you would not expect false positives with the default settings. If "anc" has an FPR > 0, set @@ -280,26 +284,36 @@ For linear models the `drop1`/`anova` functions can be used to test significance of the `predictor` and `covars` variables: ### Apply drop1 for each feature and output the adjusted p-values: - # For lm/glm functions (lrm, llm, llm2, poi, neb, qpo): + # Works on "zpo", "znb", "qpo", "neb", "poi". Non-paired "lrm", "llm", "llm2" results <- DA.lrm(data, predictor, allResults = TRUE) - test <- apply(sapply(results, function(x) drop1(x, test = "Chisq")[,5]),1,function(y) p.adjust(y,method="fdr")) - colnames(test) <- rownames(drop1(results[[1]])) + res.drop1 <- DA.drop1(results) - # For glmer/zeroinfl functions (poi, neb with paired variable + znb and zpo): - results <- DA.poi(data, predictor, paired, allResults = TRUE) - test <- sapply(results, function(x) tryCatch(drop1(x, test = "Chisq")[,4],error = function(e) NA)) - test <- do.call(rbind,test[lapply(test, length) > 1]) - test <- apply(test, 2, function(x) p.adjust(x, method="fdr")) - colnames(test) <- rownames(drop1(results[[1]])) + ### Apply anova for each feature and output the adjusted p-values: + # Works on "lrm", "llm", "llm2". Non-paired "neb" + results <- DA.lrm(data, predictor, allResults = TRUE) + res.anova <- DA.anova(results) + +For anova and linear models we can also run post-hoc tests for all +pairwise comparisons of multi-class `predictor`/`covars`. + + ### Apply TukeyHSD for each feature for a selected variable and output the adjusted p-values: + # Works on "aov", "lao", "lao2" + results <- DA.aov(data, predictor, allResults = TRUE) + res.tukey <- DA.TukeyHSD(results, variable = "predictor") # variable can also be the name of a covar + + ### Apply lsmeans for each feature for a selected variable and output the adjusted p-values: + # This requires the lsmeans package. + # Works on "poi", "neb", "lrm", "llm", "llm2", "qpo", "znb", "zpo" + results <- DA.lrm(data, predictor, allResults = TRUE) + res.lsm <- DA.lsmeans(results, variable = "predictor") # variable can also be the name of a covar - ### Compare likelihoods for each feature and output the adjusted p-values (also possible for lm/glm): - # For lme functions (lrm, llm, llm2 with paired variable): - results <- DA.lrm(data, predictor, paired, method = "ML", allResults = TRUE) - test <- apply(sapply(results, function(x) anova(x)[,4]),1,function(y) p.adjust(y,method="fdr")) - colnames(test) <- rownames(anova(results[[1]])) + # For paired "lrm", "llm", "llm2" the original predictor variable has to be supplied. For example: + results <- DA.lrm(data, predictor = mypred, paired = SubjectID, allResults = TRUE) + res.lsm <- DA.lsmeans(results, variable = "predictor", predictor = mypred) -The `anova` function can also be used to compare different models, e.g. -test significance of a random component (paired variable). + # and if covars are used, they also need to be supplied for paired "lrm", "llm", "llm2". For example: + results <- DA.lrm(data, predictor = mypred, paired = SubjectID, covars = list(covar1 = mycovar), allResults = TRUE) + res.lsm <- DA.lsmeans(results, variable = "predictor", predictor = mypred, covars = list(covar1 = mycovar)) **Alternatively, run all (or several) methods and check which features are found by several methods.** @@ -311,6 +325,7 @@ are found by several methods.** View(res.all$table) # Venn diagram of detected features from selected methods: + # This requires the venneuler package. vennDA(res.all, tests = c("wil","ttt","ltt")) # See results from a method (e.g. t.test "ttt"): diff --git a/man/DA.TukeyHSD.Rd b/man/DA.TukeyHSD.Rd new file mode 100644 index 0000000..24106fa --- /dev/null +++ b/man/DA.TukeyHSD.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/posthocs.R +\name{DA.TukeyHSD} +\alias{DA.TukeyHSD} +\title{Run TukeyHSD on all features from DAtest results with allResults = TRUE} +\usage{ +DA.TukeyHSD(results, variable = "predictor", p.adj = "fdr", ...) +} +\arguments{ +\item{results}{Output from a DA."test" function with allResults = TRUE} + +\item{variable}{Which variable to test. Default predictor. Alternatively, the name of a covar} + +\item{p.adj}{P-value adjustment method. See ?p.adjust for details} + +\item{...}{Additional arguments for TukeyHSD function} +} +\description{ +Works on "aov", "lao", "lao2" +} diff --git a/man/DA.anova.Rd b/man/DA.anova.Rd new file mode 100644 index 0000000..a16a2bc --- /dev/null +++ b/man/DA.anova.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/posthocs.R +\name{DA.anova} +\alias{DA.anova} +\title{Run anova on all features from DAtest results with allResults = TRUE} +\usage{ +DA.anova(results, p.adj = "fdr", ...) +} +\arguments{ +\item{results}{Output from a DA."test" function with allResults = TRUE} + +\item{p.adj}{P-value adjustment method. See ?p.adjust for details} + +\item{...}{Additional arguments for anova function} +} +\description{ +Works on "lrm", "llm", "llm2". Non-paired "neb" +} diff --git a/man/DA.drop1.Rd b/man/DA.drop1.Rd new file mode 100644 index 0000000..03c4d9b --- /dev/null +++ b/man/DA.drop1.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/posthocs.R +\name{DA.drop1} +\alias{DA.drop1} +\title{Run drop1 on all features from DAtest results with allResults = TRUE} +\usage{ +DA.drop1(results, test = "Chisq", p.adj = "fdr", ...) +} +\arguments{ +\item{results}{Output from a DA."test" function with allResults = TRUE} + +\item{test}{Which test to use to calculate p-values. See ?drop1 for details} + +\item{p.adj}{P-value adjustment method. See ?p.adjust for details} + +\item{...}{Additional arguments for drop1 function} +} +\description{ +Works on "zpo", "znb", "qpo", "neb", "poi". Non-paired "lrm", "llm", "llm2" +} diff --git a/man/DA.lsmeans.Rd b/man/DA.lsmeans.Rd new file mode 100644 index 0000000..f4b4fb5 --- /dev/null +++ b/man/DA.lsmeans.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/posthocs.R +\name{DA.lsmeans} +\alias{DA.lsmeans} +\title{Run lsmeans on all features from DAtest results with allResults = TRUE} +\usage{ +DA.lsmeans(results, variable = "predictor", predictor = NULL, + covars = NULL, p.adj = "fdr", ...) +} +\arguments{ +\item{results}{Output from a DA."test" function with allResults = TRUE} + +\item{variable}{Which variable to test. Default predictor. Alternatively, the name of a covar} + +\item{predictor}{If results come from a paired "lrm", "llm" or "llm2" supply the original predictor variable in the form of as a vector} + +\item{covars}{If results come from a paired "lrm", "llm" or "llm2" supply the original covars in the form of a named list} + +\item{p.adj}{P-value adjustment method. See ?p.adjust for details} + +\item{...}{Additional arguments for lsmeans function} +} +\description{ +Pairwise testing on predictor and covars. Works on "poi", "neb", "lrm", "llm", "llm2", "qpo", "znb", "zpo". +} +\details{ +Require the lsmeans package +} diff --git a/man/vennDA.Rd b/man/vennDA.Rd index d0a2b14..c8d0c39 100644 --- a/man/vennDA.Rd +++ b/man/vennDA.Rd @@ -19,5 +19,8 @@ vennDA(x, tests = NULL, alpha = 0.05, ...) Nothing } \description{ -Plot a Venn (Euler) diagram of features found by different methods +Plot a Venn (Euler) diagram of features found by different methods. +} +\details{ +Require the venneuler package. }