Skip to content

Commit

Permalink
New version
Browse files Browse the repository at this point in the history
  • Loading branch information
Jakob Russel committed Mar 26, 2018
1 parent dd08c14 commit 321412d
Show file tree
Hide file tree
Showing 11 changed files with 236 additions and 153 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: DAtest
Title: Comparing Differential Abundance methods
Version: 2.7.7
Version: 2.7.8
Authors@R: person("Jakob", "Russel", email = "[email protected]", role = c("aut", "cre"))
Description: What the title says.
Depends: R (>= 3.2.5)
Expand Down
12 changes: 2 additions & 10 deletions R/DA.kru.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ DA.kru <- function(data, predictor, relative = TRUE, p.adj = "fdr", allResults =
} else {
count_table <- data
}

predictor <- as.factor(predictor)

# Define function
kru <- function(x){
Expand All @@ -34,16 +36,6 @@ DA.kru <- function(data, predictor, relative = TRUE, p.adj = "fdr", allResults =

# Run tests
if(allResults){
## Define function for allResults
if(is.null(paired)){
kru <- function(x){
tryCatch(kruskal.test(x ~ predictor, ...), error = function(e){NA})
}
} else {
kru <- function(x){
tryCatch(kruskal.test(x ~ predictor, paired = TRUE, ...), error = function(e){NA})
}
}
return(apply(count.rel,1,kru))
} else {
res <- data.frame(pval = apply(count.rel,1,kru))
Expand Down
66 changes: 21 additions & 45 deletions R/plot.DA.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Plotting results from \code{testDA}
#'
#' @param x The output from the \code{testDA} function
#' @param sort Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Rank")}
#' @param sort Sort methods by \code{c("AUC","FDR","Spike.detect.rate","Rank")}
#' @param p Logical. Should the p-value distribution be plotted (only p-values from non-spiked features)
#' @param bins Integer. Number of bins in p-value histograms
#' @param ... Additional arguments for \code{ggdraw}
Expand All @@ -10,7 +10,7 @@
#' @importFrom cowplot draw_plot
#' @export

plot.DA <- function(x, sort = "Rank", p = FALSE, bins = 20, ...){
plot.DA <- function(x, sort = "Score", p = FALSE, bins = 20, ...){

if(p){
# For plotting p-value histograms
Expand All @@ -30,64 +30,42 @@ plot.DA <- function(x, sort = "Rank", p = FALSE, bins = 20, ...){

} else {

# Rank
# Score
## Find medians
output.summary.auc <- aggregate(AUC ~ Method, data = x$table, FUN = function(x) round(median(x),3))
output.summary.fpr <- aggregate(FPR ~ Method, data = x$table, FUN = function(x) round(median(x),3))
output.summary.sdr <- aggregate(Spike.detect.rate ~ Method, data = x$table, FUN = function(x) round(median(x),3))
auc.median <- aggregate(AUC ~ Method, data = x$table, FUN = function(x) round(median(x),3))
fdr.median <- aggregate(FDR ~ Method, data = x$table, FUN = function(x) round(median(x),3))
sdr.median <- aggregate(Spike.detect.rate ~ Method, data = x$table, FUN = function(x) round(median(x),3))

## Merge
df <- merge(merge(output.summary.auc,output.summary.fpr, by = "Method"),output.summary.sdr, by = "Method")
df <- merge(merge(auc.median,fdr.median, by = "Method"),sdr.median, by = "Method")

## Rank
nobad <- df[df$FPR <= 0.05,]
auc.r <- rank(nobad$AUC)
sdr.r <- rank(nobad$Spike.detect.rate)
mean.r <- (auc.r + sdr.r)/2
nobad$Rank <- nrow(nobad) - mean.r
bad <- df[df$FPR > 0.05,]
bad <- bad[order(bad$AUC, decreasing = TRUE),]
bad$Rank <- NA
df <- rbind(nobad,bad)
df <- df[order(df$Rank, decreasing = FALSE),]
# Score
df$Score <- round(df$AUC * df$Spike.detect.rate - df$FDR,3)

# Sort the reults
if(sort == "AUC") {
auc.median <- aggregate(AUC ~ Method, data = x$table, FUN = median)
x$table$Method <- factor(x$table$Method, levels = auc.median[order(auc.median$AUC, decreasing = TRUE),"Method"])
x$table$Method <- factor(x$table$Method, levels = df[order(df$AUC, decreasing = TRUE),"Method"])
}
if(sort == "FPR") {
fpr.median <- aggregate(FPR ~ Method, data = x$table, FUN = median)
x$table$Method <- factor(x$table$Method, levels = fpr.median[order(fpr.median$FPR, decreasing = FALSE),"Method"])
if(sort == "FDR") {
x$table$Method <- factor(x$table$Method, levels = df[order(df$FDR, decreasing = FALSE),"Method"])
}
if(sort == "Spike.detect.rate") {
sdr.median <- aggregate(Spike.detect.rate ~ Method, data = x$table, FUN = median)
x$table$Method <- factor(x$table$Method, levels = sdr.median[order(sdr.median$Spike.detect.rate, decreasing = TRUE),"Method"])
x$table$Method <- factor(x$table$Method, levels = df[order(df$Spike.detect.rate, decreasing = TRUE),"Method"])
}
if(sort == "Rank") {
x$table$Method <- factor(x$table$Method, levels = df$Method)
if(sort == "Score") {
x$table$Method <- factor(x$table$Method, levels = df[order(df$Score, decreasing = TRUE),"Method"])
}

# Colour bad ones
cob <- data.frame(x = rep(c((nrow(nobad)+0.5),(nrow(df)+1)),2),
ymin = rep(-Inf,4),
ymax = rep(Inf,4),
FPR = rep(0,4),
AUC = rep(0,4),
Spike.detect.rate = rep(0,4))

# Define FPR and AUC plots
p1 <- ggplot(x$table, aes(Method, FPR)) +
# Define FDR and AUC plots
p1 <- ggplot(x$table, aes(Method, FDR)) +
theme_bw() +
coord_cartesian(ylim = c(0,1)) +
geom_hline(yintercept = 0.05, colour = "red") +
geom_point() +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median,geom = "crossbar",colour="red",width=0.75) +
ylab("False Positive Rate") +
ylab("False Discovery Rate") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
xlab(NULL) +
theme(panel.grid.minor = element_blank()) +
geom_ribbon(aes(x=x,ymin=ymin,ymax=ymax),data = cob, fill = "red", alpha = 0.2)
theme(panel.grid.minor = element_blank())

p2 <- ggplot(x$table, aes(Method, AUC)) +
theme_bw() +
Expand All @@ -99,8 +77,7 @@ plot.DA <- function(x, sort = "Rank", p = FALSE, bins = 20, ...){
theme(axis.text.x = element_blank(),
panel.grid.minor = element_blank()) +
xlab(NULL) +
scale_y_continuous(labels=function(x) sprintf("%.2f", x)) +
geom_ribbon(aes(x=x,ymin=ymin,ymax=ymax),data = cob, fill = "red", alpha = 0.2)
scale_y_continuous(labels=function(x) sprintf("%.2f", x))

p3 <- ggplot(x$table, aes(Method, Spike.detect.rate)) +
theme_bw() +
Expand All @@ -110,8 +87,7 @@ plot.DA <- function(x, sort = "Rank", p = FALSE, bins = 20, ...){
theme(axis.text.x = element_blank(),
panel.grid.minor = element_blank()) +
xlab(NULL) +
scale_y_continuous(labels=function(x) sprintf("%.2f", x)) +
geom_ribbon(aes(x=x,ymin=ymin,ymax=ymax),data = cob, fill = "red", alpha = 0.2)
scale_y_continuous(labels=function(x) sprintf("%.2f", x))

# Plot it
pp <- cowplot::ggdraw(...) +
Expand Down
2 changes: 1 addition & 1 deletion R/plot.DAPower.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ plot.DAPower <- function(x, ...){
coord_cartesian(ylim = c(0,1)) +
geom_point() +
geom_smooth(method = "loess", colour = "red", alpha = 0.4) +
ylab("Empirical Power") +
ylab("Spike Detect Rate") +
xlab("Log2 Effect Size") +
theme(panel.grid.minor = element_blank())

Expand Down
43 changes: 28 additions & 15 deletions R/print.DA.R
Original file line number Diff line number Diff line change
@@ -1,39 +1,52 @@
#' Summary of results from \code{testDA}
#'
#' @param object The output from the \code{testDA} function
#' @param sort Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Rank")}
#' @param sort Sort methods by \code{c("AUC","FPR","Spike.detect.rate","Score")}
#' @param boot If TRUE will use bootstrap for confidence limits of the Score, else will compute the limits from the original table. Recommended to be TRUE unless \code{R >= 100} in \code{testDA}
#' @param prob Confidence limits for Score. Default \code{90\%} = \code{c(0.05,0.095)}
#' @param N Number of bootstraps. Default 1000
#' @param boot.seed Random seed for reproducibility of bootstraps
#' @param ... Additional arguments for \code{print}
#' @import stats
#' @import methods
#' @import utils
#' @export

summary.DA <- function(object, sort = "Rank", ...){
summary.DA <- function(object, sort = "Score", boot = TRUE, prob = c(0.05,0.95), N = 1000, boot.seed = 1, ...){

# Find medians
output.summary.auc <- aggregate(AUC ~ Method, data = object$table, FUN = function(x) round(median(x),3))
output.summary.fpr <- aggregate(FPR ~ Method, data = object$table, FUN = function(x) round(median(x),3))
output.summary.sdr <- aggregate(Spike.detect.rate ~ Method, data = object$table, FUN = function(x) round(median(x),3))
output.summary.fdr <- aggregate(FDR ~ Method, data = object$table, FUN = function(x) round(median(x),3))

# Merge
df <- merge(merge(output.summary.auc,output.summary.fpr, by = "Method"),output.summary.sdr, by = "Method")

# Rank
nobad <- df[df$FPR <= 0.05,]
auc.r <- rank(nobad$AUC)
sdr.r <- rank(nobad$Spike.detect.rate)
mean.r <- (auc.r + sdr.r)/2
nobad$Rank <- nrow(nobad) - mean.r
bad <- df[df$FPR > 0.05,]
bad <- bad[order(bad$AUC, decreasing = TRUE),]
bad$Rank <- NA
df <- rbind(nobad,bad)
df <- merge(merge(merge(output.summary.auc,output.summary.fpr, by = "Method", all = TRUE),output.summary.fdr, by = "Method"),output.summary.sdr, by = "Method")

# Score
df$Score <- round(df$AUC * df$Spike.detect.rate - df$FDR,3)

# Interval
object$table$Score <- object$table$AUC * object$table$Spike.detect.rate - object$table$FDR

if(boot){
set.seed(boot.seed)
boots <- lapply(unique(object$table$Method), function(x) object$table[object$table$Method == x,][
sample(rownames(object$table[object$table$Method == x,]),N,replace = TRUE),
])
boot.score <- lapply(boots,function(y) aggregate(Score ~ Method, data = y, FUN = function(x) round(quantile(x,probs = prob),3)))
score.cl <- do.call(rbind,boot.score)
} else {
score.cl <- aggregate(Score ~ Method, data = object$table, FUN = function(x) round(quantile(x,probs = prob),3))
}

df <- merge(df, as.matrix(score.cl), by = "Method")

# Sort
if(sort == "AUC") df <- df[order(df$AUC, decreasing = TRUE),]
if(sort == "FPR") df <- df[order(df$FPR, decreasing = FALSE),]
if(sort == "Spike.detect.rate") df <- df[order(df$Spike.detect.rate, decreasing = TRUE),]
if(sort == "Rank") df <- df[order(df$Rank, decreasing = FALSE),]
if(sort == "Score") df <- df[order(df$Score, decreasing = TRUE),]

print(df, row.names = FALSE, ...)

Expand Down
6 changes: 3 additions & 3 deletions R/summary.DAPower.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ summary.DAPower <- function(x, ...){

# Merge
df <- merge(merge(output.summary.sdr,output.summary.auc, by = "EffectSize"),output.summary.fdr, by = "EffectSize")
colnames(df) <- c("EffectSize","EmpiricalPower","AUC","FDR")
colnames(df) <- c("EffectSize","Spike.detect.rate","AUC","FDR")
} else {
if(x$Method[1] %in% c("ALDEx2 t-test (adx)","ALDEx2 wilcox (adx)")){
# Find medians
Expand All @@ -27,7 +27,7 @@ summary.DAPower <- function(x, ...){
# Merge
df <- cbind(output.summary.sdr,output.summary.auc[,3],output.summary.fpr[,3],output.summary.fdr[,3])
df <- as.data.frame(df[order(df$Method),])
colnames(df) <- c("Method","EffectSize","EmpiricalPower","AUC","FPR","FDR")
colnames(df) <- c("Method","EffectSize","Spike.detect.rate","AUC","FPR","FDR")
} else {
# Find medians
output.summary.sdr <- aggregate(SDR ~ EffectSize, data = x, FUN = function(y) round(median(y),3))
Expand All @@ -37,7 +37,7 @@ summary.DAPower <- function(x, ...){

# Merge
df <- merge(merge(merge(output.summary.sdr,output.summary.auc, by = "EffectSize"),output.summary.fpr, by = "EffectSize"),output.summary.fdr, by = "EffectSize")
colnames(df) <- c("EffectSize","EmpiricalPower","AUC","FPR","FDR")
colnames(df) <- c("EffectSize","Spike.detect.rate","AUC","FPR","FDR")
}
}

Expand Down
33 changes: 16 additions & 17 deletions R/testDA.R
Original file line number Diff line number Diff line change
Expand Up @@ -486,21 +486,31 @@ testDA <- function(data, predictor, paired = NULL, covars = NULL, R = 10, tests
}
})

# Confusion matrix adjusted
totalPos.adj <- sapply(res.sub, function(x) nrow(x[x$pval.adj <= alpha,]))
truePos.adj <- sapply(res.sub, function(x) sum(x[x$pval.adj <= alpha,"Feature"] %in% spikeds[[r]][[2]]))
falsePos.adj <- totalPos.adj - truePos.adj

fdrs <- sapply(1:length(res.sub), function(x) {
if(totalPos.adj[x] != 0){
falsePos.adj[x] / totalPos.adj[x]
} else {0}})

# Combine and return
df.combined <- data.frame(Method = sapply(res.sub, function(x) x$Method[1]),
AUC = aucs,
FPR = fprs,
FDR = fdrs,
Spike.detect.rate = sdrs,
Run = r)
rownames(df.combined) <- NULL

# SAMseq FDR
# SAMseq and ANCOM FPRs not possible
if("sam" %in% newnames){
if(nrow(res.sub[["sam"]][res.sub[["sam"]]$pval.adj <= alpha,]) == 0){
df.combined[df.combined$Method == "SAMseq (sam)","FPR"] <- 0
} else {
df.combined[df.combined$Method == "SAMseq (sam)","FPR"] <- nrow(res.sub[["sam"]][res.sub[["sam"]]$pval.adj <= alpha & res.sub[["sam"]]$Spiked == "No",])/nrow(res.sub[["sam"]][res.sub[["sam"]]$pval.adj <= alpha,])
}
df.combined[df.combined$Method == "SAMseq (sam)","FPR"] <- NA
}
if("anc" %in% newnames){
df.combined[df.combined$Method == "ANCOM (anc)","FPR"] <- NA
}

return(list(df.combined,res.sub))
Expand Down Expand Up @@ -560,16 +570,5 @@ testDA <- function(data, predictor, paired = NULL, covars = NULL, R = 10, tests
out <- list(table = output.results, results = output.all.results, details = output.details, run.times = run.times.all)
class(out) <- "DA"

# Create warning message
output.summary.auc <- aggregate(AUC ~ Method, data = output.results, FUN = median)
output.summary.fpr <- aggregate(FPR ~ Method, data = output.results, FUN = median)
output.summary.sdr <- aggregate(Spike.detect.rate ~ Method, data = output.results, FUN = median)
df <- merge(merge(output.summary.auc,output.summary.fpr, by = "Method"),output.summary.sdr, by = "Method")
df <- df[df$FPR <= 0.05,]
if(nrow(df) > 0){
df <- df[order(df$AUC, decreasing = TRUE),]
if(df[1,"Spike.detect.rate"] == 0 & verbose) message("Spike detect rate is zero for the top method! You might want to re-run the analysis with a pruned dataset (see preDA) or a higher effectSize")
}

return(out)
}
2 changes: 1 addition & 1 deletion R/zzz.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

.onLoad <- function(libname, pkgname){
message("DAtest version 2.7.7")
message("DAtest version 2.7.8")
}

Loading

0 comments on commit 321412d

Please sign in to comment.