diff --git a/DESCRIPTION b/DESCRIPTION index f015e79..54c2a92 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: DAtest Title: Comparing Differential Abundance/Expression Methods -Version: 2.7.17 +Version: 2.7.18 Authors@R: person("Jakob", "Russel", email = "russel2620@gmail.com", role = c("aut", "cre")) diff --git a/NAMESPACE b/NAMESPACE index eb9d2b0..4eb8b24 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,6 +48,7 @@ export(DA.sam) export(DA.spe) export(DA.tta) export(DA.ttc) +export(DA.ttr) export(DA.ttt) export(DA.vli) export(DA.wil) diff --git a/R/DA.ttr.R b/R/DA.ttr.R new file mode 100644 index 0000000..28efd7e --- /dev/null +++ b/R/DA.ttr.R @@ -0,0 +1,86 @@ +#' Welch t-test with rank-normalization +#' +#' Apply welch t-test to multiple features with one \code{predictor} +#' @param data Either a matrix with counts/abundances, OR a \code{phyloseq} object. If a matrix/data.frame is provided rows should be taxa/genes/proteins and columns samples +#' @param predictor The predictor of interest. Factor, OR if \code{data} is a \code{phyloseq} object the name of the variable in \code{sample_data(data)} in quotation +#' @param paired For paired/blocked experimental designs. Either a Factor with Subject/Block ID for running paired/blocked analysis, OR if data is a \code{phyloseq} object the name of the variable in \code{sample_data(data)} in quotation +#' @param relative Logical. Should \code{data} be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See \code{p.adjust} for details +#' @param testStat Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: \code{log2((mean(case abundances)+0.001)/(mean(control abundances)+0.001))} +#' @param testStat.pair Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: \code{log2(mean((case abundances+0.001)/(control abundances+0.001)))} +#' @param allResults If TRUE will return raw results from the \code{t.test} function +#' @param ... Additional arguments for the \code{t.test} function +#' @return A data.frame with with results. +#' @examples +#' # Creating random count_table and predictor +#' set.seed(4) +#' mat <- matrix(rnbinom(1000, size = 0.1, mu = 500), nrow = 100, ncol = 10) +#' rownames(mat) <- 1:100 +#' pred <- c(rep("Control", 5), rep("Treatment", 5)) +#' +#' # Running t-test +#' res <- DA.ttr(data = mat, predictor = pred) +#' @export + +DA.ttr <- function(data, predictor,paired = NULL, relative = TRUE, p.adj = "fdr", testStat = function(case,control){log2((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){log2(mean((case+1)/(control+1)))},allResults = FALSE, ...){ + + # Extract from phyloseq + if(is(data, "phyloseq")){ + DAdata <- DA.phyloseq(data, predictor, paired) + count_table <- DAdata$count_table + predictor <- DAdata$predictor + paired <- DAdata$paired + } else { + count_table <- data + } + + # Define function + tt <- function(x){ + tryCatch(t.test(x ~ predictor, ...)$p.value, error = function(e){NA}) + } + + # Order data and define function for paired analysis + if(!is.null(paired)){ + count_table <- count_table[,order(paired)] + predictor <- predictor[order(paired)] + testStat <- testStat.pair + tt <- function(x){ + tryCatch(t.test(x ~ predictor, paired = TRUE, ...)$p.value, error = function(e){NA}) + } + } + + # Normalization + count.norm <- apply(count_table,2,rank) + + # Run tests + if(allResults){ + if(is.null(paired)){ + tt <- function(x){ + tryCatch(t.test(x ~ predictor, ...), error = function(e){NA}) + } + } else { + tt <- function(x){ + tryCatch(t.test(x ~ predictor, paired = TRUE, ...), error = function(e){NA}) + } + } + return(apply(count.norm,1,tt)) + } else { + res <- data.frame(pval = apply(count.norm,1,tt)) + res$pval.adj <- p.adjust(res$pval, method = p.adj) + # Teststat + predictor.num <- as.numeric(as.factor(predictor))-1 + testfun <- function(x){ + case <- x[predictor.num==1] + control <- x[predictor.num==0] + testStat(case,control) + } + res$log2FC <- apply(count.norm,1,testfun) + res$ordering <- NA + res[!is.na(res$log2FC) & res$log2FC > 0,"ordering"] <- paste0(levels(as.factor(predictor))[2],">",levels(as.factor(predictor))[1]) + res[!is.na(res$log2FC) & res$log2FC < 0,"ordering"] <- paste0(levels(as.factor(predictor))[1],">",levels(as.factor(predictor))[2]) + res$Feature <- rownames(res) + res$Method <- "t-test - Rank (ttr)" + if(is(data, "phyloseq")) res <- addTax(data, res) + return(res) + } +} \ No newline at end of file diff --git a/R/allDA.R b/R/allDA.R index 5dc54d6..4160e0f 100644 --- a/R/allDA.R +++ b/R/allDA.R @@ -63,7 +63,7 @@ allDA <- function(data, predictor, paired = NULL, covars = NULL, "lrm","llm","llm2","lma","lmc", "ere","ere2","pea","spe", "wil","kru","qua","fri", - "ttt","ltt","ltt2","tta","ttc", + "ttt","ltt","ltt2","tta","ttc","ttr", "aov","lao","lao2","aoa","aoc", "vli","lim","lli","lli2","lia","lic"), relative = TRUE, cores = (detectCores()-1), @@ -186,6 +186,7 @@ allDA <- function(data, predictor, paired = NULL, covars = NULL, mva = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired,covars,relative, p.adj), argsL[[i]])), wil = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired, relative, p.adj), argsL[[i]])), ttt = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired, relative, p.adj), argsL[[i]])), + ttr = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired, relative, p.adj), argsL[[i]])), ltt = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired,relative, p.adj), argsL[[i]])), tta = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired, p.adj), argsL[[i]])), ttc = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired, p.adj), argsL[[i]])), @@ -333,6 +334,7 @@ allDA <- function(data, predictor, paired = NULL, covars = NULL, adx.w = "effect", wil = "log2FC", ttt = "log2FC", + ttr = "log2FC", ltt = "log2FC", ltt2 = "log2FC", tta = "log2FC", diff --git a/R/powerDA.R b/R/powerDA.R index a98167c..7560899 100644 --- a/R/powerDA.R +++ b/R/powerDA.R @@ -202,6 +202,7 @@ powerDA <- function(data, predictor, paired = NULL, covars = NULL, test = NULL, mva = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired,covars,relative, p.adj), args)), wil = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired, relative,p.adj), args)), ttt = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired, relative,p.adj), args)), + ttr = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired, relative,p.adj), args)), ltt = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired,relative,p.adj), args)), ttc = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired,p.adj), args)), tta = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired,p.adj), args)), diff --git a/R/pruneTests.R b/R/pruneTests.R index 2ec784d..c9f68d5 100644 --- a/R/pruneTests.R +++ b/R/pruneTests.R @@ -17,7 +17,7 @@ pruneTests <- function(tests, predictor, paired, covars, relative, decimal, zero tests <- tests[!tests %in% c("qpo","zpo","znb","bay","adx","ere","ere2","msf","aov","lao","lao2","aoa","aoc","kru","rai","spe","pea")] # Exclude tests that only work with one value for each combination of predictor and paired arguments if(!all(table(paired,predictor) == 1)){ - tests <- tests[!tests %in% c("ttt","ltt","ltt2","wil","per","fri","qua","sam","tta","ttc")] + tests <- tests[!tests %in% c("ttt","ttr","ltt","ltt2","wil","per","fri","qua","sam","tta","ttc")] } # Exclude if too few levels if(length(unique(paired)) < 5){ diff --git a/R/runtimeDA.R b/R/runtimeDA.R index bd2a274..564c422 100644 --- a/R/runtimeDA.R +++ b/R/runtimeDA.R @@ -31,7 +31,7 @@ #' runtimeDA(mat, pred, cores = 1, tests = c("ttt","wil"), tests.slow = c("neb")) #' @export runtimeDA <- function(data, predictor, paired = NULL, covars = NULL, subsamples = c(500,1000,1500,2000), subsamples.slow = c(100,150,200,250), - tests = c("sam", "qua", "fri", "vli", "qpo", "pea", "wil", "ttt", "ltt", "ltt2","ere", "ere2", "msf", "zig", "lim", "lli", "lli2", "aov", "lao", "lao2", "kru", "lrm", "llm", "llm2", "spe", "aoa", "aoc", "tta", "ttc", "lma", "lmc", "lia", "lic"), + tests = c("sam", "qua", "fri", "vli", "qpo", "pea", "wil", "ttt", "ttr", "ltt", "ltt2","ere", "ere2", "msf", "zig", "lim", "lli", "lli2", "aov", "lao", "lao2", "kru", "lrm", "llm", "llm2", "spe", "aoa", "aoc", "tta", "ttc", "lma", "lmc", "lia", "lic"), tests.slow = c("mva", "neb", "bay", "per", "ds2", "ds2x", "zpo", "znb", "adx", "poi", "erq", "erq2"), cores = (detectCores()-1), ...){ stopifnot(exists("data"),exists("predictor")) diff --git a/R/testDA.R b/R/testDA.R index b9fb5fe..99997a4 100644 --- a/R/testDA.R +++ b/R/testDA.R @@ -67,7 +67,7 @@ testDA <- function(data, predictor, paired = NULL, covars = NULL, R = 20, "lrm","llm","llm2","lma","lmc", "ere","ere2","pea","spe", "wil","kru","qua","fri", - "ttt","ltt","ltt2","tta","ttc", + "ttt","ltt","ltt2","tta","ttc","ttr", "aov","lao","lao2","aoa","aoc", "vli","lim","lli","lli2","lia","lic"), relative = TRUE, effectSize = 5, k = NULL, cores = (detectCores()-1), @@ -242,6 +242,7 @@ testDA <- function(data, predictor, paired = NULL, covars = NULL, R = 20, mva = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired,covars,relative, p.adj), argsL[[i]])), wil = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired, relative, p.adj), argsL[[i]])), ttt = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired, relative, p.adj), argsL[[i]])), + ttr = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired, relative, p.adj), argsL[[i]])), ltt = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired,relative, p.adj), argsL[[i]])), tta = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired, p.adj), argsL[[i]])), ttc = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired, p.adj), argsL[[i]])), diff --git a/R/vennDA.R b/R/vennDA.R index 34e492e..4b8db60 100644 --- a/R/vennDA.R +++ b/R/vennDA.R @@ -75,7 +75,7 @@ vennDA <- function(x, tests = NULL, alpha = 0.1, split = FALSE, output = FALSE, featurelist.pos[[i]] <- featurelist[[i]][featurelist[[i]] %in% sub.p] featurelist.neg[[i]] <- featurelist[[i]][featurelist[[i]] %in% sub.n] } - if(plottests[i] %in% c("mva","sam","znb","zpo","poi","qpo","neb","lim","lli","lli2","vli","lia","lic","pea","spe","per","adx.t","adx.w","wil","ttt","ltt","ltt2","tta","ttc","ere","ere2","erq","erq2","ds2","ds2x","msf","zig","rai")){ + if(plottests[i] %in% c("mva","sam","znb","zpo","poi","qpo","neb","lim","lli","lli2","vli","lia","lic","pea","spe","per","adx.t","adx.w","wil","ttt","ttr","ltt","ltt2","tta","ttc","ere","ere2","erq","erq2","ds2","ds2x","msf","zig","rai")){ if(is.null(ncol(subs))){ featurelist.pos[[i]] <- featurelist[[i]] featurelist.neg[[i]] <- featurelist[[i]] @@ -87,7 +87,7 @@ vennDA <- function(x, tests = NULL, alpha = 0.1, split = FALSE, output = FALSE, } } # If no estimate/logFC provided throw all significant in both positive and negative list - if(!plottests[i] %in% c("mva","sam","bay","znb","zpo","poi","qpo","neb","lim","lli","lli2","vli","lia","lic","pea","spe","per","adx.t","adx.w","wil","ttt","ltt","ltt2","tta","ttc","ere","ere2","erq","erq2","ds2","ds2x","msf","zig","rai")){ + if(!plottests[i] %in% c("mva","sam","bay","znb","zpo","poi","qpo","neb","lim","lli","lli2","vli","lia","lic","pea","spe","per","adx.t","adx.w","wil","ttt","ttr","ltt","ltt2","tta","ttc","ere","ere2","erq","erq2","ds2","ds2x","msf","zig","rai")){ featurelist.pos[[i]] <- featurelist[[i]] featurelist.neg[[i]] <- featurelist[[i]] } diff --git a/R/zzz.R b/R/zzz.R index 5aca69e..0b678e5 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,5 +1,5 @@ .onAttach <- function(libname, pkgname){ - packageStartupMessage("DAtest version 2.7.17") + packageStartupMessage("DAtest version 2.7.18") if (Sys.getenv("RSTUDIO") == "1" && !nzchar(Sys.getenv("RSTUDIO_TERM")) && Sys.info()["sysname"] == "Darwin" && getRversion() == "4.0.0") { snow::setDefaultClusterOptions(setup_strategy = "sequential") diff --git a/man/DA.ttr.Rd b/man/DA.ttr.Rd new file mode 100644 index 0000000..469d91a --- /dev/null +++ b/man/DA.ttr.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DA.ttr.R +\name{DA.ttr} +\alias{DA.ttr} +\title{Welch t-test with rank-normalization} +\usage{ +DA.ttr( + data, + predictor, + paired = NULL, + relative = TRUE, + p.adj = "fdr", + testStat = function(case, control) { log2((mean(case) + 1)/(mean(control) + 1)) }, + testStat.pair = function(case, control) { log2(mean((case + 1)/(control + 1))) }, + allResults = FALSE, + ... +) +} +\arguments{ +\item{data}{Either a matrix with counts/abundances, OR a \code{phyloseq} object. If a matrix/data.frame is provided rows should be taxa/genes/proteins and columns samples} + +\item{predictor}{The predictor of interest. Factor, OR if \code{data} is a \code{phyloseq} object the name of the variable in \code{sample_data(data)} in quotation} + +\item{paired}{For paired/blocked experimental designs. Either a Factor with Subject/Block ID for running paired/blocked analysis, OR if data is a \code{phyloseq} object the name of the variable in \code{sample_data(data)} in quotation} + +\item{relative}{Logical. Should \code{data} be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See \code{p.adjust} for details} + +\item{testStat}{Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: \code{log2((mean(case abundances)+0.001)/(mean(control abundances)+0.001))}} + +\item{testStat.pair}{Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: \code{log2(mean((case abundances+0.001)/(control abundances+0.001)))}} + +\item{allResults}{If TRUE will return raw results from the \code{t.test} function} + +\item{...}{Additional arguments for the \code{t.test} function} +} +\value{ +A data.frame with with results. +} +\description{ +Apply welch t-test to multiple features with one \code{predictor} +} +\examples{ +# Creating random count_table and predictor +set.seed(4) +mat <- matrix(rnbinom(1000, size = 0.1, mu = 500), nrow = 100, ncol = 10) +rownames(mat) <- 1:100 +pred <- c(rep("Control", 5), rep("Treatment", 5)) + +# Running t-test +res <- DA.ttr(data = mat, predictor = pred) +} diff --git a/man/allDA.Rd b/man/allDA.Rd index 68da20d..7a00131 100644 --- a/man/allDA.Rd +++ b/man/allDA.Rd @@ -12,7 +12,7 @@ allDA( tests = c("bay", "ds2", "ds2x", "per", "adx", "znb", "zpo", "msf", "zig", "erq", "erq2", "neb", "qpo", "poi", "sam", "lrm", "llm", "llm2", "lma", "lmc", "ere", "ere2", "pea", "spe", "wil", "kru", "qua", "fri", "ttt", "ltt", "ltt2", "tta", "ttc", - "aov", "lao", "lao2", "aoa", "aoc", "vli", "lim", "lli", "lli2", "lia", "lic"), + "ttr", "aov", "lao", "lao2", "aoa", "aoc", "vli", "lim", "lli", "lli2", "lia", "lic"), relative = TRUE, cores = (detectCores() - 1), p.adj = "fdr", diff --git a/man/runtimeDA.Rd b/man/runtimeDA.Rd index c53e81b..2c6ef0b 100644 --- a/man/runtimeDA.Rd +++ b/man/runtimeDA.Rd @@ -11,9 +11,10 @@ runtimeDA( covars = NULL, subsamples = c(500, 1000, 1500, 2000), subsamples.slow = c(100, 150, 200, 250), - tests = c("sam", "qua", "fri", "vli", "qpo", "pea", "wil", "ttt", "ltt", "ltt2", - "ere", "ere2", "msf", "zig", "lim", "lli", "lli2", "aov", "lao", "lao2", "kru", - "lrm", "llm", "llm2", "spe", "aoa", "aoc", "tta", "ttc", "lma", "lmc", "lia", "lic"), + tests = c("sam", "qua", "fri", "vli", "qpo", "pea", "wil", "ttt", "ttr", "ltt", + "ltt2", "ere", "ere2", "msf", "zig", "lim", "lli", "lli2", "aov", "lao", "lao2", + "kru", "lrm", "llm", "llm2", "spe", "aoa", "aoc", "tta", "ttc", "lma", "lmc", "lia", + "lic"), tests.slow = c("mva", "neb", "bay", "per", "ds2", "ds2x", "zpo", "znb", "adx", "poi", "erq", "erq2"), cores = (detectCores() - 1), diff --git a/man/testDA.Rd b/man/testDA.Rd index fb7473b..265146b 100644 --- a/man/testDA.Rd +++ b/man/testDA.Rd @@ -13,7 +13,7 @@ testDA( tests = c("bay", "ds2", "ds2x", "per", "adx", "znb", "zpo", "msf", "zig", "erq", "erq2", "neb", "qpo", "poi", "sam", "lrm", "llm", "llm2", "lma", "lmc", "ere", "ere2", "pea", "spe", "wil", "kru", "qua", "fri", "ttt", "ltt", "ltt2", "tta", "ttc", - "aov", "lao", "lao2", "aoa", "aoc", "vli", "lim", "lli", "lli2", "lia", "lic"), + "ttr", "aov", "lao", "lao2", "aoa", "aoc", "vli", "lim", "lli", "lli2", "lia", "lic"), relative = TRUE, effectSize = 5, k = NULL,