diff --git a/DESCRIPTION b/DESCRIPTION
index 6d453e8..5af8eae 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
Package: OUTRIDER
Title: OUTRIDER - OUTlier in RNA-Seq fInDER
Type: Package
-Version: 1.21.1
-Date: 2020-10-20
+Version: 1.21.2
+Date: 2024-04-24
URL: https://github.com/gagneurlab/OUTRIDER
BugReports: https://github.com/gagneurlab/OUTRIDER/issues
Authors@R: c(
@@ -42,6 +42,7 @@ Imports:
generics,
GenomicRanges,
ggplot2,
+ ggrepel,
grDevices,
heatmaply,
pheatmap,
@@ -70,7 +71,10 @@ Suggests:
RMariaDB,
AnnotationDbi,
beeswarm,
- covr
+ covr,
+ GenomeInfoDb,
+ ggbio,
+ biovizBase
LinkingTo:
Rcpp,
RcppArmadillo
diff --git a/NAMESPACE b/NAMESPACE
index 911bc19..6f118f9 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -35,6 +35,7 @@ export(plotExpectedVsObservedCounts)
export(plotExpressedGenes)
export(plotExpressionRank)
export(plotFPKM)
+export(plotManhattan)
export(plotPowerAnalysis)
export(plotQQ)
export(plotSizeFactors)
@@ -53,6 +54,7 @@ exportMethods(plotAberrantPerSample)
exportMethods(plotCountCorHeatmap)
exportMethods(plotDispEsts)
exportMethods(plotEncDimSearch)
+exportMethods(plotManhattan)
exportMethods(plotQQ)
exportMethods(plotVolcano)
exportMethods(results)
@@ -73,6 +75,7 @@ importFrom(BiocGenerics,plotDispEsts)
importFrom(BiocParallel,SerialParam)
importFrom(BiocParallel,bpisup)
importFrom(BiocParallel,bplapply)
+importFrom(BiocParallel,bpmapply)
importFrom(BiocParallel,bpparam)
importFrom(BiocParallel,bpstart)
importFrom(BiocParallel,bpstop)
@@ -94,14 +97,22 @@ importFrom(DESeq2,show)
importFrom(DESeq2,sizeFactors)
importFrom(GenomicFeatures,exonsBy)
importFrom(GenomicRanges,GRanges)
+importFrom(GenomicRanges,end)
+importFrom(GenomicRanges,findOverlaps)
importFrom(GenomicRanges,reduce)
+importFrom(GenomicRanges,start)
importFrom(GenomicRanges,width)
importFrom(IRanges,IRanges)
importFrom(PRROC,pr.curve)
importFrom(RColorBrewer,brewer.pal)
importFrom(S4Vectors,"metadata<-")
+importFrom(S4Vectors,"values<-")
importFrom(S4Vectors,DataFrame)
+importFrom(S4Vectors,endoapply)
importFrom(S4Vectors,metadata)
+importFrom(S4Vectors,queryHits)
+importFrom(S4Vectors,subjectHits)
+importFrom(S4Vectors,values)
importFrom(SummarizedExperiment,"assay<-")
importFrom(SummarizedExperiment,"assays<-")
importFrom(SummarizedExperiment,"colData<-")
@@ -113,10 +124,13 @@ importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
importFrom(SummarizedExperiment,mcols)
importFrom(SummarizedExperiment,rowData)
+importFrom(SummarizedExperiment,rowRanges)
importFrom(generics,fit)
importFrom(ggplot2,aes)
importFrom(ggplot2,annotate)
importFrom(ggplot2,element_blank)
+importFrom(ggplot2,facet_grid)
+importFrom(ggplot2,facet_wrap)
importFrom(ggplot2,geom_abline)
importFrom(ggplot2,geom_bar)
importFrom(ggplot2,geom_histogram)
@@ -129,6 +143,7 @@ importFrom(ggplot2,geom_vline)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggtitle)
importFrom(ggplot2,labs)
+importFrom(ggplot2,quo_name)
importFrom(ggplot2,scale_color_brewer)
importFrom(ggplot2,scale_color_identity)
importFrom(ggplot2,scale_color_manual)
@@ -142,6 +157,7 @@ importFrom(ggplot2,theme_bw)
importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(ggplot2,ylim)
+importFrom(ggrepel,geom_text_repel)
importFrom(grDevices,colorRampPalette)
importFrom(graphics,abline)
importFrom(graphics,axis)
diff --git a/NEWS b/NEWS
index 4ff42b8..3967b32 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,9 @@
+CHANGES IN VERSION 1.20.1
+-----------------------------
+ o Add Manhattenplot functionallity
+ o Bugfix in restricting for FDR correction and others (#54 and aa6e56a)
+ o Ensure correct injections of outliers (#37)
+
CHANGES IN VERSION 1.16.1
-----------------------------
o Add option to restrict FDR correction to sets of genes of interest
diff --git a/R/AllGenerics.R b/R/AllGenerics.R
index 6eaad0b..98ad193 100644
--- a/R/AllGenerics.R
+++ b/R/AllGenerics.R
@@ -27,6 +27,11 @@ setGeneric("plotAberrantPerSample", function(object, ...)
setGeneric("plotCountCorHeatmap", function(object, ...)
standardGeneric("plotCountCorHeatmap"))
+#' @rdname plotFunctions
+#' @export
+setGeneric("plotManhattan", function(object, ...)
+ standardGeneric("plotManhattan"))
+
#' @importFrom DESeq2 plotDispEsts
#' @noRd
#' @export
diff --git a/R/ZscoreMatrix.R b/R/ZscoreMatrix.R
index 38513d1..0e51c39 100644
--- a/R/ZscoreMatrix.R
+++ b/R/ZscoreMatrix.R
@@ -1,14 +1,13 @@
#'
#' Z score computation
#'
-#' Computes the z scores for every count in the matrix.
-#' The z score is defined in the log2 space as follows:
-#' \ifelse{html}{
-#' \out{zij = (lij - mujl)/
-#' sigmajl}}{
-#' \deqn{z_{ij} = \frac{l_{ij} - \mu_j^l}{\sigma_j^l}}},
-#' where l is the log2 transformed normalized count and mu and sigma the
-#' mean and standard deviation for gene j, respectively.
+#' Computes the z scores for every count in the matrix.
+#' The z score is defined in the log\eqn{_2}{[2]} space as follows:
+#' \eqn{z_{ij} = \frac{l_{ij} - \mu_j^l}{
+#' \sigma_j^l}}{z_ij = (l[ij] - mu[j]^l)/sigma[ij]}
+#' where \code{l} is the log\eqn{_2}{[2]} transformed normalized count and
+#' \eqn{\mu}{mu} and \eqn{\sigma}{sigma} the mean and standard deviation
+#' for gene \code{j} and sample \code{i}, respectively.
#'
#' @param ods OutriderDataSet
#' @param ... Further arguments passed on to \code{ZscoreMatrix}.
diff --git a/R/inputCheckerFunctions.R b/R/inputCheckerFunctions.R
index 2e41689..27e9db5 100644
--- a/R/inputCheckerFunctions.R
+++ b/R/inputCheckerFunctions.R
@@ -15,6 +15,8 @@ checkCountRequirements <- function(ods, test=FALSE){
stop("There are genes without any read. Please filter first ",
"the data with: ods <- filterExpression(ods)")
}
+ # Filter out genes that have on average less than 1 count per 100 samples.
+ # Ony affects datasets with more than 100 samples.
filterGenes <- filterGenes | rowSums(counts(ods)) < ncol(ods)/100
if(any(filterGenes) & isFALSE(test)){
stop("The model requires for each gene at least 1 read ",
diff --git a/R/method-evaluation.R b/R/method-evaluation.R
index 0cdd431..51b6785 100644
--- a/R/method-evaluation.R
+++ b/R/method-evaluation.R
@@ -28,7 +28,7 @@ aberrant.OUTRIDER <- function(object, padjCutoff=0.05, zScoreCutoff=0,
#' @param by if the results should be summarized by 'sample',
#' 'gene' or not at all (default).
#' @param subsetName The name of a subset of genes of interest for which FDR
-#' corected pvalues have been previously computed. Those FDR values
+#' corected pvalues were previously computed. Those FDR values
#' on the subset will then be used to determine aberrant status.
#' Default is NULL (using transcriptome-wide FDR corrected pvalues).
#' @param ... Currently not in use.
diff --git a/R/method-plot.R b/R/method-plot.R
index ccf8567..0ccb96d 100644
--- a/R/method-plot.R
+++ b/R/method-plot.R
@@ -32,6 +32,10 @@
#' Can also be a vector. Integers are treated as indices.
#' @param padjCutoff,zScoreCutoff Significance or Z-score cutoff
#' to mark outliers
+#' @param label Indicates which genes or samples should be labeled. By default
+#' all aberrant genes/samples are labelled. Can be set to NULL for
+#' no labels. Provide a vector of geneIDs/sampleIDs to label
+#' specific genes/samples.
#' @param global Flag to plot a global Q-Q plot, default FALSE
#' @param outlierRatio The fraction to be used for the outlier sample filtering
#' @param normalized If TRUE, the normalized counts are used, the default,
@@ -39,6 +43,20 @@
#' @param compareDisp If TRUE, the default, and if the autoCorrect normalization
#' was used it computes the dispersion without autoCorrect and
#' plots it for comparison.
+#' @param xaxis Indicates which assay should be shown on the x-axis of the
+#' volcano plot. Defaults to 'zscore'. Other options are 'fc' and
+#' 'log2fc' for the fold-change or log2 fold-change.
+#' @param value Indicates which assay is shown in the manhattan plot. Defaults
+#' to 'pvalue'. Other options are 'zScore' and 'log2fc'.
+#' @param featureRanges A GRanges object of the same length as the
+#' OutriderDataSet object that contains the genomic positions of
+#' features that are shown in the manhattan plot.
+#' @param subsetName The name of a subset of genes of interest for which FDR
+#' corrected pvalues were previously computed. Those FDR values
+#' on the subset will then be used to determine aberrant status.
+#' Default is NULL (using transcriptome-wide FDR corrected pvalues).
+#' @param chr The chromosomes to be displayed in the \code{plotManhattan}
+#' function. Default is \code{NULL}, i.e. all chromosomes are shown.
#### Graphical parameters
#' @param main Title for the plot, if missing a default title will be used.
#' @param groups A character vector containing either group assignments of
@@ -79,6 +97,9 @@
#' @param bins Number of bins used in the histogram. Defaults to 100.
#' @param yadjust Option to adjust position of Median and 90 percentile labels.
#' @param ylab The y axis label
+#' @param chrColor A vector of length 2 giving the two colors used for
+#' coloring alternating chromosomes in the manhattan plot. Default
+#' colors are 'black' and 'darkgrey'.
#'
#### Additional ... parameter
#' @param ... Additional parameters passed to plot() or plot_ly() if not stated
@@ -143,6 +164,11 @@
#' precision-recall curve). From this plot the optimum should be choosen for
#' the \code{q} in fitting process.
#'
+#' \code{plotManhattan}: Visualizes different metrics for each gene (pvalue,
+#' log2 fold-change, z-score) along with the genomic coordinates of the
+#' respective gene as a manhattan plot. Detected outlier genes are highlighted
+#' in red.
+#'
#' @return If base R graphics are used nothing is returned else the plotly or
#' the gplot object is returned.
#'
@@ -157,22 +183,35 @@
#'
#' mcols(ods)$basepairs <- 300 # assign pseudo gene length for filtering
#' ods <- filterExpression(ods)
-#' ods <- OUTRIDER(ods, implementation=implementation)
-#'
+#' # restrict FDR correction to set of genes of interest per sample
+#' genesOfInterest <- list(MUC1372 = c("ATPIF1", "UROD", "YBX1",
+#' sample(rownames(ods), 25)),
+#' MUC1360 = sample(rownames(ods), 50),
+#' MUC1350 = sample(rownames(ods), 75),
+#' X76619 = sample(rownames(ods), 20),
+#' X74172 = sample(rownames(ods), 150))
+#' ods <- OUTRIDER(ods, implementation=implementation, subsets=list("exampleGenes"=genesOfInterest))
+#'
#' plotAberrantPerSample(ods)
+#' plotAberrantPerSample(ods, subsetName="exampleGenes")
#'
#' plotVolcano(ods, 49)
#' plotVolcano(ods, 'MUC1365', basePlot=TRUE)
+#' plotVolcano(ods, 'MUC1351', basePlot=TRUE, xaxis="log2fc", label=c("NBPF16"))
+#' plotVolcano(ods, 'MUC1372', basePlot=TRUE, subsetName="exampleGenes")
#'
#' plotExpressionRank(ods, 35)
-#' plotExpressionRank(ods, "NDUFS5", normalized=FALSE,
+#' plotExpressionRank(ods, 35, subsetName="exampleGenes")
+#' plotExpressionRank(ods, "NDUFS5", normalized=FALSE, label="MUC1372",
#' log=FALSE, main="Over expression outlier", basePlot=TRUE)
#'
#' plotQQ(ods, 149)
+#' plotQQ(ods, 149, subsetName="exampleGenes")
#' plotQQ(ods, global=TRUE, outlierRatio=0.001)
#'
#' plotExpectedVsObservedCounts(ods, 149)
#' plotExpectedVsObservedCounts(ods, "ATAD3C", basePlot=TRUE)
+#' plotExpectedVsObservedCounts(ods, "UROD", subsetName="exampleGenes")
#'
#' plotExpressedGenes(ods)
#'
@@ -202,19 +241,35 @@
#' implementation=implementation)
#' plotEncDimSearch(ods)
#' }
+#'
+#' # To show the pvalues of a sample in a manhattan plot, rowRanges(ods) must
+#' # contain the genomic position of each feature or a GRanges object must
+#' # be provided
+#' \dontrun{
+#' # in case rowRanges(ods) is a GRangesList, run this first once to speed up:
+#' rowRanges(ods) <- unlist(endoapply(rowRanges(ods), range))
+#' }
+#' gr <- GRanges(
+#' seqnames=sample(paste0("chr", 1:22), nrow(ods), replace=TRUE),
+#' ranges=IRanges(start=runif(nrow(ods), min=0, max=1e5), width=100))
+#' plotManhattan(ods, "MUC1350", value="pvalue", featureRanges=gr)
+#' plotManhattan(ods, "MUC1350", value="l2fc", featureRanges=gr)
+#' plotManhattan(ods, "MUC1372", featureRanges=gr, subsetName="exampleGenes")
#'
#' @rdname plotFunctions
#' @aliases plotFunctions plotVolcano plotQQ plotExpectedVsObservedCounts
#' plotExpressionRank plotCountCorHeatmap plotCountGeneSampleHeatmap
#' plotAberrantPerSample plotFPKM plotDispEsts plotPowerAnalysis
-#' plotEncDimSearch plotExpressedGenes plotSizeFactors
+#' plotEncDimSearch plotExpressedGenes plotSizeFactors plotManhattan
#'
NULL
plotVolcano.OUTRIDER <- function(object, sampleID, main, padjCutoff=0.05,
- zScoreCutoff=0,
- pch=16, basePlot=FALSE, col=c("gray", "firebrick")){
+ zScoreCutoff=0, label="aberrant",
+ xaxis=c("zscore", "log2fc", "fc"),
+ pch=16, basePlot=FALSE,
+ col=c("gray", "firebrick"), subsetName=NULL){
if(missing(sampleID)){
stop("specify which sample should be plotted, sampleID = 'sample5'")
}
@@ -237,7 +292,8 @@ plotVolcano.OUTRIDER <- function(object, sampleID, main, padjCutoff=0.05,
if(length(sampleID) > 1){
ans <- lapply(sampleID, plotVolcano, object=object,
padjCutoff=padjCutoff,
- zScoreCutoff=zScoreCutoff, basePlot=basePlot)
+ zScoreCutoff=zScoreCutoff, basePlot=basePlot,
+ subsetName=subsetName)
return(ans)
}
if(missing(main)){
@@ -247,25 +303,61 @@ plotVolcano.OUTRIDER <- function(object, sampleID, main, padjCutoff=0.05,
if(is.null(rownames(object))){
rownames(object) <- paste("feature", seq_len(nrow(object)), sep="_")
}
+
+ xaxis <- match.arg(xaxis)
+ if(xaxis == "zscore"){
+ if("zScore" %in% assayNames(object)){
+ xaxis <- zScore(object)
+ xaxis_name <- "Z-Score"
+ base_x_lab <- xaxis_name
+ } else{
+ stop("Calculate zScores first or choose other xaxis type.")
+ }
+
+ } else if(xaxis == "fc"){
+ if("l2fc" %in% assayNames(object)){
+ xaxis <- 2^assay(object, "l2fc")
+ xaxis_name <- "Fold change"
+ base_x_lab <- xaxis_name
+ } else{
+ stop("Calculate log2fc first or choose other xaxis type.")
+ }
+ } else if(xaxis == "log2fc"){
+ if("l2fc" %in% assayNames(object)){
+ xaxis <- assay(object, "l2fc")
+ xaxis_name <- expression(paste(log[2], "(fold-change)"))
+ base_x_lab <- paste("log2(fold-change)")
+ } else{
+ stop("Calculate log2fc first or choose other xaxis type.")
+ }
+ } else {
+ stop("Unknown xaxis type, choose one of zscore, fc, log2fc.")
+ }
dt <- data.table(
GENE_ID = rownames(object),
pValue = pValue(object)[,sampleID],
- padjust = padj(object)[,sampleID],
- zScore = zScore(object)[,sampleID],
+ padjust = padj(object, subsetName=subsetName)[,sampleID],
+ # zScore = zScore(object)[,sampleID],
+ xaxis = xaxis[,sampleID],
normCts = counts(object, normalized=TRUE)[,sampleID],
medianCts = rowMedians(counts(object, normalized=TRUE)),
expRank = apply(
counts(object, normalized=TRUE), 2, rank)[,sampleID],
aberrant = aberrant(object, padjCutoff=padjCutoff,
- zScoreCutoff=zScoreCutoff)[,sampleID],
+ zScoreCutoff=zScoreCutoff, subsetName=subsetName)[,sampleID],
color = col[1])
dt[aberrant == TRUE, color:=col[2]]
+ dt[is.na(padjust), color:=col[3]]
+ dt[aberrant == TRUE, aberrantLabel:="aberrant"]
+ dt[aberrant == FALSE, aberrantLabel:="not aberrant"]
+ dt[is.na(aberrant), aberrantLabel:="not in tested group"]
# remove the NAs from the zScores for plotting
- dt[is.na(zScore),zScore:=0]
+ dt[is.na(xaxis),xaxis:=0]
- p <- ggplot(dt, aes(zScore, -log10(pValue), color=color, text=paste0(
+ p <- ggplot(dt, aes(xaxis, -log10(pValue), color=aberrantLabel,
+ label=GENE_ID, text=paste0(
"Gene ID: ", GENE_ID,
"
Sample ID: ", sampleID,
"
Median normcount: ", round(medianCts, 2),
@@ -273,14 +365,52 @@ plotVolcano.OUTRIDER <- function(object, sampleID, main, padjCutoff=0.05,
"
expression rank: ", as.integer(expRank),
"
nominal P-value: ", signif(pValue,3),
"
adj. P-value: ", signif(padjust,3),
- "
Z-score: ", signif(zScore,2)))) +
+ "
", xaxis_name, ": ", signif(xaxis,2)))) +
geom_point() +
theme_bw() +
- xlab("Z-score") +
+ xlab(xaxis_name) +
ylab(expression(paste(-log[10], "(", italic(P), "-value)"))) +
ggtitle(main) +
- scale_color_identity() +
- theme(legend.position = 'none')
+ scale_color_manual(values=c("not aberrant"=col[1], "aberrant"=col[2],
+ "not in tested group"="gray90")) +
+ theme(legend.position = 'bottom',
+ legend.title=element_blank())
+
+ if(!is.null(subsetName)){
+ p <- p + labs(subtitle=paste0("FDR across genes in the ", subsetName,
+ " group (N=", dt[!is.na(padjust), .N], ")"))
+ }
+
+ # Log scale if fold change is plotted
+ if(isTRUE(basePlot)){
+ if(!is(xaxis_name, "expression") && xaxis_name == 'Fold change'){
+ p <- p + scale_x_log10(labels = scales::trans_format(
+ "log10", scales::math_format(10^.x)))
+ }
+ if(!is.null(label)){
+ if(isScalarCharacter(label) && label == "aberrant"){
+ if(nrow(dt[aberrant == TRUE,]) > 0){
+ p <- p +
+ geom_text_repel(data=dt[aberrant == TRUE,],
+ aes(col=aberrantLabel), fontface='bold',
+ hjust=-.2, vjust=.2)
+ }
+ }
+ else{
+ if(nrow(dt[GENE_ID %in% label]) > 0){
+ p <- p +
+ geom_text_repel(data=subset(dt, GENE_ID %in% label),
+ aes(col=aberrantLabel), fontface='bold',
+ hjust=-.2, vjust=.2)
+ }
+ if(any(!(label %in% dt[,GENE_ID]))){
+ warning("Did not find gene(s) ",
+ paste(label[!(label %in% dt[,GENE_ID])],
+ collapse=", "), " to label.")
+ }
+ }
+ }
+ }
if(isFALSE(basePlot)){
p <- p + ylab(paste("-log10(P-value)"))
@@ -297,7 +427,7 @@ setMethod("plotVolcano", signature(object="OutriderDataSet"),
plotQQ.OUTRIDER <- function(object, geneID, main, global=FALSE, padjCutoff=0.05,
zScoreCutoff=0, samplePoints=TRUE, legendPos="topleft",
- outlierRatio=0.001, conf.alpha=0.05,
+ outlierRatio=0.001, conf.alpha=0.05, subsetName=NULL,
pch=16, xlim=NULL, ylim=NULL, col=NULL){
checkOutriderDataSet(object)
stopifnot(isScalarLogical(global))
@@ -318,6 +448,10 @@ plotQQ.OUTRIDER <- function(object, geneID, main, global=FALSE, padjCutoff=0.05,
if(missing(main)){
main <- paste0('Q-Q plot for gene: ', geneID)
}
+ if(!is.null(subsetName)){
+ main <- paste0(main, "\nFDR across genes in the ", subsetName,
+ " group")
+ }
if(is.null(col)){
col <- c('black', 'firebrick')
}
@@ -326,7 +460,7 @@ plotQQ.OUTRIDER <- function(object, geneID, main, global=FALSE, padjCutoff=0.05,
pointCex <- 1
#data table with expected and observerd log10(pValues)
aberrantEvent <- aberrant(object[geneID,], padjCutoff=padjCutoff,
- zScoreCutoff=zScoreCutoff, by='sample')
+ zScoreCutoff=zScoreCutoff, subsetName=subsetName, by='sample')
df <- data.table(obs= -log10(pVal), pch=pch, subset=FALSE,
col=ifelse(aberrantEvent, col[2], col[1]))
@@ -373,7 +507,7 @@ plotQQ.OUTRIDER <- function(object, geneID, main, global=FALSE, padjCutoff=0.05,
if(is.null(ylim)){
ylim=range(df[,obs], na.rm=TRUE)
}
- plot(NA, xlim=xlim, ylim=ylim, main=main,
+ plot(NA, xlim=xlim, ylim=ylim, main=main,
xlab=expression(
paste(-log[10], " (expected ", italic(P), "-value)")),
ylab=expression(
@@ -449,7 +583,8 @@ setMethod("plotQQ", signature(object="OutriderDataSet"), plotQQ.OUTRIDER)
#' @rdname plotFunctions
#' @export
plotExpectedVsObservedCounts <- function(ods, geneID, main, basePlot=FALSE,
- log=TRUE, groups=c(), groupColSet='Set1', ...){
+ log=TRUE, groups=c(), groupColSet='Set1', label="aberrant",
+ subsetName=NULL, ...){
# check user input
checkOutriderDataSet(ods)
@@ -463,6 +598,11 @@ plotExpectedVsObservedCounts <- function(ods, geneID, main, basePlot=FALSE,
if (missing(main)) {
main <- paste("Predicted expression plot:", geneID)
}
+ if(is.null(subsetName)){
+ subtitle <- NULL
+ } else{
+ subtitle <- paste0("FDR across genes in the ", subsetName, " group")
+ }
ods <- ods[geneID]
cnts <- data.table(
@@ -470,7 +610,8 @@ plotExpectedVsObservedCounts <- function(ods, geneID, main, basePlot=FALSE,
sampleID = colnames(ods),
observed = as.vector(counts(ods)) + isTRUE(log),
expected = as.vector(normalizationFactors(ods)) + isTRUE(log),
- aberrant = as.vector(aberrant(ods)))
+ aberrant = as.vector(aberrant(ods, subsetName=subsetName)))
+ cnts[is.na(aberrant), aberrant:=FALSE]
# group assignment
if(length(groups) != ncol(ods)) {
@@ -483,14 +624,15 @@ plotExpectedVsObservedCounts <- function(ods, geneID, main, basePlot=FALSE,
groups[is.na(groups)] <- 'NA'
cnts[, group := groups]
- g <- ggplot(cnts, aes(expected, observed, text=paste0(
+ g <- ggplot(cnts, aes(expected, observed, label=sampleID,
+ text=paste0(
"Gene ID: ", feature_id, "
",
"Sample ID: ", sampleID, "
",
"Raw count: ", observed, "
",
"Expected count: ", round(expected, 2), "
"))) +
theme_bw() +
geom_abline(slope = 1, intercept = 0) +
- labs(title = main,
+ labs(title = main, subtitle=subtitle,
x=paste('Expected counts', ifelse(isTRUE(log), '+ 1', '')),
y=paste('Raw counts', ifelse(isTRUE(log), '+ 1', '')))
@@ -512,11 +654,35 @@ plotExpectedVsObservedCounts <- function(ods, geneID, main, basePlot=FALSE,
} else { # no grouping given
point_mapping$colour <- substitute(aberrant)
g <- g + geom_point(point_mapping) +
- scale_color_manual(values = c("gray", "firebrick")) +
+ scale_color_manual(values = c("FALSE"="gray",
+ "TRUE"="firebrick")) +
theme(legend.position = 'none')
}
if (isTRUE(basePlot)) {
+ if(!is.null(label)){
+ if(isScalarCharacter(label) && label == "aberrant"){
+ if(nrow(cnts[aberrant == TRUE,]) > 0){
+ g <- g +
+ geom_text_repel(data=cnts[aberrant == TRUE,],
+ aes(col=aberrant), fontface='bold',
+ hjust=-.2, vjust=.5)
+ }
+ }
+ else{
+ if(nrow(cnts[sampleID %in% label]) > 0){
+ g <- g +
+ geom_text_repel(data=subset(cnts, sampleID %in% label),
+ aes(col=aberrant), fontface='bold',
+ hjust=-.2, vjust=.5)
+ }
+ if(any(!(label %in% cnts[,sampleID]))){
+ warning("Did not find sample(s) ",
+ paste(label[!(label %in% cnts[,sampleID])],
+ collapse=", "), " to label.")
+ }
+ }
+ }
return(g)
}
ggplotly(g, tooltip="text")
@@ -528,7 +694,8 @@ plotExpectedVsObservedCounts <- function(ods, geneID, main, basePlot=FALSE,
plotExpressionRank <- function(ods, geneID, main, padjCutoff=0.05,
zScoreCutoff=0, normalized=TRUE, basePlot=FALSE, log=TRUE,
col=c("gray", "firebrick"), groups=c(),
- groupColSet='Accent'){
+ groupColSet='Accent', label="aberrant",
+ subsetName=NULL){
# check user input
checkOutriderDataSet(ods)
if(isTRUE(normalized) & is.null(sizeFactors(ods))){
@@ -557,7 +724,7 @@ plotExpressionRank <- function(ods, geneID, main, padjCutoff=0.05,
if(length(geneID) > 1){
ans <- lapply(geneID, plotExpressionRank, ods=ods,
padjCutoff=padjCutoff, zScoreCutoff=zScoreCutoff,
- basePlot=basePlot, normalized=normalized)
+ basePlot=basePlot, normalized=normalized, subsetName=subsetName)
return(ans)
}
stopifnot(isScalarValue(geneID))
@@ -575,10 +742,11 @@ plotExpressionRank <- function(ods, geneID, main, padjCutoff=0.05,
dt[, norm_rank:= rank(counts, ties.method = 'first')]
if(all(c('pValue', 'padjust', 'zScore') %in% assayNames(ods))){
dt[, pValue := assay(ods, 'pValue')[1,]]
- dt[, padjust := assay(ods, 'padjust')[1,]]
+ dt[, padjust := padj(ods, subsetName=subsetName)[1,]]
dt[, zScore := assay(ods, 'zScore')[1,]]
dt[, aberrant := aberrant(ods, padjCutoff=padjCutoff,
- zScoreCutoff=zScoreCutoff)[1,]]
+ zScoreCutoff=zScoreCutoff, subsetName=subsetName)[1,]]
+ dt[is.na(aberrant), aberrant:=FALSE]
} else {
dt[,aberrant:=FALSE]
}
@@ -587,9 +755,15 @@ plotExpressionRank <- function(ods, geneID, main, padjCutoff=0.05,
if(missing(main)){
main <- paste("Expression rank plot:", geneID)
}
+ if(is.null(subsetName)){
+ subtitle <- NULL
+ } else{
+ subtitle <- paste0("FDR across genes in the ", subsetName, " group")
+ }
# create ggplot object
- g <- ggplot(data=dt, aes(x = norm_rank, y = counts, text = paste0(
+ g <- ggplot(data=dt, aes(x = norm_rank, y = counts, label = sampleID,
+ text = paste0(
"Gene ID: ", geneID, "
",
"Sample ID: ", sampleID, "
",
ifelse(uniqueN(groups) == 1, "", paste0("Group: ", group, "
")),
@@ -601,7 +775,7 @@ plotExpressionRank <- function(ods, geneID, main, padjCutoff=0.05,
"nominal P-value: ", sprintf("%1.1E", pValue), "
",
"Z-score: ", round(zScore, digits = 1), "
")
))) +
- labs(title = main, x = 'Sample rank', y = ylab) +
+ labs(title = main, subtitle=subtitle, x = 'Sample rank', y = ylab) +
theme_bw()
if(isTRUE(log)){
@@ -628,6 +802,29 @@ plotExpressionRank <- function(ods, geneID, main, padjCutoff=0.05,
}
if(isTRUE(basePlot)){
+ if(!is.null(label)){
+ if(isScalarCharacter(label) && label == "aberrant"){
+ if(nrow(dt[aberrant == TRUE,]) > 0){
+ g <- g +
+ geom_text_repel(data=dt[aberrant == TRUE,],
+ aes(col=aberrant), fontface='bold',
+ hjust=-.2, vjust=.5)
+ }
+ }
+ else{
+ if(nrow(dt[sampleID %in% label]) > 0){
+ g <- g +
+ geom_text_repel(data=subset(dt, sampleID %in% label),
+ aes(col=aberrant), fontface='bold',
+ hjust=-.2, vjust=.5)
+ }
+ if(any(!(label %in% dt[,sampleID]))){
+ warning("Did not find sample(s) ",
+ paste(label[!(label %in% dt[,sampleID])],
+ collapse=", "), " to label.")
+ }
+ }
+ }
return(g)
}
return(ggplotly(g))
@@ -865,9 +1062,15 @@ plotHeatmap <- function(ods, mtx, annotation_row=NULL, annotation_col=NULL,
plotAberrantPerSample.OUTRIDER <- function(object,
main='Aberrant Genes per Sample',
outlierRatio=0.001, col='Dark2', yadjust=1.2,
- ylab="#Aberrantly expressed genes", ...){
+ ylab="Aberrantly expressed genes",
+ subsetName=NULL, ...){
oneOffset <- 0.8
- count_vector <- sort(aberrant(object, by="sample", ...))
+ count_vector <- aberrant(object, by="sample",
+ subsetName=subsetName, ...)
+ testedSamples <- colnames(object)[which(
+ colSums(is.na(padj(object, subsetName=subsetName))) != nrow(object))]
+ count_vector <- sort(count_vector[testedSamples])
+
hlines = quantile(count_vector, c(0.5, 0.9))
hlines[hlines == 0] <- oneOffset
@@ -876,13 +1079,19 @@ plotAberrantPerSample.OUTRIDER <- function(object,
y=c(count_vector, c(1, oneOffset)[(count_vector != 0) + 1]),
fill=!count_vector <= max(1, length(object)*outlierRatio))
+ if(is.null(subsetName)){
+ subtitle <- NULL
+ } else{
+ subtitle <- paste0("FDR across genes in the ", subsetName, " group")
+ }
+
g <- ggplot(dt2p, aes(x=x, y=y, fill=fill)) +
geom_bar(stat = "Identity") +
theme_bw() +
scale_y_log10(limits=c(oneOffset, NA)) +
ylab(ylab) +
xlab("Sample rank") +
- ggtitle(main) +
+ ggtitle(main, subtitle=subtitle) +
theme(legend.position="none") +
geom_hline(yintercept=hlines) +
annotate("text", label=c("Median", "90^th ~ percentile"),
@@ -1128,3 +1337,247 @@ checkDeprication <- function(names2check, ...){
"\t'", names2check[i], "' --> '", names(names2check[i]), "'")
}
}
+
+plotManhattan.OUTRIDER <- function(object, sampleID, value="pvalue",
+ chr=NULL, main=paste0("Sample: ", sampleID),
+ featureRanges=rowRanges(object),
+ subsetName=NULL,
+ chrColor = c("black", "darkgrey")){
+ requireNamespace("ggbio")
+ requireNamespace("GenomeInfoDb")
+
+ # check user input
+ checkOutriderDataSet(object)
+ stopifnot("Sample not in ods" = sampleID %in% colnames(object))
+ stopifnot("Value should be either pvalue, zscore or l2fc" =
+ value %in% c('pvalue', 'pValue', 'pv', 'zscore', 'zScore',
+ 'l2fc', 'L2FC', 'log2fc'))
+
+ # get granges from rowRanges(ods) or provided ranges, check dimension
+ if(is.null(featureRanges)){
+ if(is.null(rowRanges(object))){
+ stop("No rowRanges(ods) found. Assign them first to use ",
+ "this function or provide a GRanges object.")
+ }
+ } else if(length(featureRanges) != nrow(object)){
+ stop("The provided feature ranges must be of the same length as the ",
+ "ods object.")
+ } else if(is(featureRanges, "GRanges")){
+ gr <- featureRanges
+ } else if(is(featureRanges, "GRangesList")){
+ gr <- unlist(endoapply(featureRanges, range))
+ # faster than range but gives error for empty GRangesList:
+ # gr <- unlist(endoapply(featureRanges, function(rr) rr[1,]))
+ if(length(gr) != nrow(object)){
+ stop("The provided gene ranges do not contain ranges for all rows ",
+ "of the ods object")
+ }
+ } else{
+ stop("The provided feature_ranges must a a GRanges or GRangesList ",
+ "object.")
+ }
+
+ GenomeInfoDb::seqlevelsStyle(gr) <- 'NCBI'
+
+ # Add values to granges
+ if(value %in% c('pvalue', 'pValue', 'pv')){
+ gr$value <- -log10(pValue(object)[, sampleID])
+ # value <- '-log10(pvalue)'
+ value <- expression(paste(-log[10], "(P-value)"))
+ }
+ if(value %in% c('zscore', 'zScore')){
+ gr$value <- zScore(object)[, sampleID]
+ }
+ if(value %in% c('l2fc', 'L2FC', 'log2fc')){
+ if(!"l2fc" %in% assayNames(object)){
+ stop("Please compute the log2 fold changes first before ",
+ "retrieving them.")
+ }
+ gr$value <- assay(object, "l2fc")[, sampleID]
+ value <- expression(paste(log[2], "(fold-change)"))
+ }
+ gr$aberrant <- aberrant(object, subsetName=subsetName)[,sampleID]
+
+ # Sort granges for plot
+ gr <- GenomeInfoDb::sortSeqlevels(gr)
+ gr <- sort(gr)
+
+ # subset to chromosomes in chrSubset if requested
+ if(!is.null(chr)){
+ # check input
+ if(any(grepl("chr", chr))){
+ chr <- gsub("chr", "", chr)
+ }
+ if(!all(chr %in% unique(GenomeInfoDb::seqnames(gr)))){
+ stop("Not all chromosomes selected for subsetting are present ",
+ "in the GRanges object.")
+ }
+
+ # subset
+ gr <- gr[as.character(GenomeInfoDb::seqnames(gr)) %in% chr]
+
+ # add chr to plot title if only one chr given
+ if(length(chr) == 1){
+ main <- paste0(main, "; ",
+ paste("chr", chr, collapse=", ", sep=""))
+ }
+ }
+ if(is.null(subsetName)){
+ subtitle <- NULL
+ } else{
+ subtitle <- paste0("FDR across genes in the ", subsetName, " group")
+ }
+
+ p <- plotGrandLinear.adapted(gr, aes(y = value),
+ color = chrColor,
+ highlight.gr = gr[which(gr$aberrant == TRUE)],
+ highlight.col = 'firebrick',
+ highlight.overlap = 'equal',
+ use.genome.coords=is.null(chr)) +
+ labs(x="Chromosome", y = value, title=main, subtitle=subtitle)
+
+ return(p)
+}
+
+#' @rdname plotFunctions
+#' @export
+setMethod("plotManhattan", signature="OutriderDataSet",
+ plotManhattan.OUTRIDER)
+
+#'
+#' Adapted function from ggbio package to create manhattan plot.
+#' Adapted to allow manhattan plots creation for a subset of chromomes only,
+#' as well as highlighting only ranges that exactly match. Uses functions
+#' from package biovizBase.
+#'
+#' @noRd
+plotGrandLinear.adapted <- function (obj, ..., facets, space.skip = 0.01,
+ geom = NULL, cutoff = NULL, cutoff.color = "red", cutoff.size = 1,
+ legend = FALSE, xlim, ylim, xlab, ylab, main, highlight.gr = NULL,
+ highlight.name = NULL, highlight.col = "red", highlight.label = TRUE,
+ highlight.label.size = 5, highlight.label.offset = 0.05,
+ highlight.label.col = "black",
+ highlight.overlap = c("any", "start", "end", "within", "equal"),
+ spaceline = FALSE,
+ use.genome.coords=TRUE){
+ requireNamespace("biovizBase")
+ if (is.null(geom))
+ geom <- "point"
+ args <- list(...)
+ args.aes <- biovizBase::parseArgsForAes(args)
+ args.non <- biovizBase::parseArgsForNonAes(args)
+ two.color <- c("#0080FF", "#4CC4FF")
+ .is.seq <- FALSE
+ if (!"colour" %in% names(args.aes)) {
+ if (!any(c("color", "colour") %in% names(args.non))) {
+ .color <- two.color
+ args.aes$color <- as.name("seqnames")
+ .is.seq <- TRUE
+ }
+ else {
+ if (length(args.non$color) > 1) {
+ .color <- args.non$color
+ args.aes$color <- as.name("seqnames")
+ .is.seq <- TRUE
+ args.non <- args.non[!names(args.non) %in% c("colour",
+ "color")]
+ }
+ }
+ }
+ else {
+ if (quo_name(args.aes$colour) == "seqnames")
+ args.aes$colour <- as.name("seqnames")
+ }
+ if (!"y" %in% names(args.aes))
+ stop("need to provide y")
+ if(isTRUE(use.genome.coords)){
+ args.non$coord <- "genome"
+ }
+ args.non$space.skip <- space.skip
+ args.non$geom <- geom
+ args.non$object <- obj
+ aes.res <- do.call(aes, args.aes)
+ p <- do.call(ggbio::autoplot, c(list(aes.res), args.non))
+ if (!legend)
+ p <- p + theme(legend.position = "none")
+ if (!missing(ylab))
+ p <- p + ylab(ylab)
+ if (!is.null(cutoff))
+ p <- p + geom_hline(yintercept = cutoff, color = cutoff.color,
+ size = cutoff.size)
+ chrs <- names(GenomeInfoDb::seqlengths(obj))
+ if (.is.seq) {
+ N <- length(chrs)
+ cols <- rep(.color, round(N/length(.color)) + 1)[1:N]
+ names(cols) <- chrs
+ p <- p + scale_color_manual(values = cols)
+ }
+ if (!missing(facets)) {
+ args$facets <- facets
+ args.facets <- biovizBase::subsetArgsByFormals(args, facet_grid,
+ facet_wrap)
+ facet <- ggbio:::.buildFacetsFromArgs(obj, args.facets)
+ p <- p + facet
+ }
+ p <- p + theme(panel.grid.minor = element_blank())
+ if (!is.null(highlight.gr)) {
+ highlight.overlap <- match.arg(highlight.overlap)
+ idx <- findOverlaps(obj, highlight.gr, type=highlight.overlap)
+ .h.pos <- lapply(split(queryHits(idx), subjectHits(idx)), function(id) {
+ gr <- GRanges(as.character(GenomeInfoDb::seqnames(p@data))[id][1],
+ IRanges(start = min(start(p@data[id])),
+ end = max(end(p@data[id]))))
+ val <- max(as.numeric(values(p@data[id])[, quo_name(args.aes$y)]))
+ val <- val * (1 + highlight.label.offset)
+ values(gr)$val <- val
+ gr
+ })
+ .h.pos <- suppressWarnings(do.call("c", unname(.h.pos)))
+ if (length(.h.pos)) {
+ if (is.null(highlight.name)) {
+ highlight.name <- names(highlight.gr)
+ }
+ else {
+ highlight.name <- values(highlight.gr)[, highlight.name]
+ }
+ p <- p + geom_point(data = biovizBase::mold(p@data[queryHits(idx)]),
+ do.call(aes, list(x = substitute(midpoint), y = args.aes$y)),
+ color = highlight.col)
+ if (!is.null(highlight.name)) {
+ GenomeInfoDb::seqlevels(.h.pos, pruning.mode = "coarse") <-
+ GenomeInfoDb::seqlevels(obj)
+ suppressWarnings(GenomeInfoDb::seqinfo(.h.pos) <-
+ GenomeInfoDb::seqinfo(obj))
+ .trans <- biovizBase::transformToGenome(.h.pos, space.skip = space.skip)
+ values(.trans)$mean <- (start(.trans) + end(.trans))/2
+ values(.trans)$names <- highlight.name
+ p <- p + geom_text(data = biovizBase::mold(.trans),
+ size = highlight.label.size,
+ vjust = 0, color = highlight.label.col,
+ do.call(aes,
+ list(x = substitute(mean), y = as.name("val"),
+ label = as.name("names"))))
+ }
+ }
+ }
+ if (spaceline) {
+ vline.df <- p@ggplot$data
+ vline.df <- do.call(rbind, by(vline.df, vline.df$seqnames,
+ function(dd) {
+ data.frame(start = min(dd$start), end = max(dd$end))
+ }))
+ gap <- (vline.df$start[-1] + vline.df$end[-nrow(vline.df)])/2
+ p <- p + geom_vline(xintercept = gap, alpha = 0.5, color = "gray70") +
+ theme(panel.grid = element_blank())
+ }
+ if (!missing(main))
+ p <- p + labs(title = main)
+ if (!missing(xlim))
+ p <- p + xlim(xlim)
+ if (!missing(ylim))
+ p <- p + ylim(ylim)
+ if (missing(xlab))
+ xlab <- ""
+ p <- p + ggplot2::xlab(xlab)
+ p
+}
diff --git a/R/method-results.R b/R/method-results.R
index d1e68c3..144682a 100644
--- a/R/method-results.R
+++ b/R/method-results.R
@@ -20,13 +20,17 @@ compileResults.OUTRIDER <- function(object, padjCutoff=0.05, zScoreCutoff=0,
}
meanCorrectedCounts <- rowMeans(counts(object, normalized=TRUE))
+ meanRawCounts <- rowMeans(counts(object, normalized=FALSE))
if(isFALSE(all)){
abByGene <- aberrant(object,
- padjCutoff=padjCutoff, zScoreCutoff=zScoreCutoff, by="gene")
+ padjCutoff=padjCutoff, zScoreCutoff=zScoreCutoff, by="gene",
+ subsetName=subsetName)
abBySample <- aberrant(object,
- padjCutoff=padjCutoff, zScoreCutoff=zScoreCutoff, by="sample")
+ padjCutoff=padjCutoff, zScoreCutoff=zScoreCutoff, by="sample",
+ subsetName=subsetName)
object <- object[abByGene > 0, abBySample > 0]
meanCorrectedCounts <- meanCorrectedCounts[abByGene > 0]
+ meanRawCounts <- meanRawCounts[abByGene > 0]
}
# warning if no rows to return
@@ -44,6 +48,7 @@ compileResults.OUTRIDER <- function(object, padjCutoff=0.05, zScoreCutoff=0,
zScore=NA_real_,
l2fc=NA_real_,
rawcounts=NA_integer_,
+ meanRawcounts=NA_real_,
normcounts=NA_real_,
meanCorrected=NA_real_,
theta=NA_real_,
@@ -65,6 +70,7 @@ compileResults.OUTRIDER <- function(object, padjCutoff=0.05, zScoreCutoff=0,
zScore = c(zScore(object)),
l2fc = c(assay(object, "l2fc")),
rawcounts = c(counts(object)),
+ meanRawcounts = meanRawCounts,
normcounts = c(counts(object, normalized=TRUE)),
meanCorrected = meanCorrectedCounts,
theta = theta(object),
@@ -77,15 +83,15 @@ compileResults.OUTRIDER <- function(object, padjCutoff=0.05, zScoreCutoff=0,
AberrantByGene = aberrant(object,
padjCutoff=padjCutoff, zScoreCutoff=zScoreCutoff, by="gene",
subsetName=subsetName),
- padj_rank = c(apply(padj(object), 2, rank)),
+ padj_rank = c(apply(padj(object, subsetName=subsetName), 2, rank)),
FDR_set = ifelse(is.null(subsetName), "transcriptome-wide",
subsetName)
)
# round columns if requested
if(is.numeric(round)){
- devNull <- lapply(
- c("normcounts", "zScore", "l2fc", "theta", "meanCorrected"),
+ devNull <- lapply(c("normcounts", "zScore", "l2fc", "theta",
+ "meanRawcounts", "meanCorrected"),
function(x) ans[,c(x):=round(get(x), as.integer(round))] )
}
@@ -140,10 +146,49 @@ compileResultsAll.OUTRIDER <- function(object, padjCutoff=0.05, zScoreCutoff=0,
return(resSub)
})
res <- rbindlist(resSubsets)
-
+
# sort it if existing
- if(length(res) > 0){
- res <- res[order(padjust)]
+ if(nrow(res) > 0){
+ # get aberrant columns from transcriptome-wide results
+ res_tw <- res[FDR_set == "transcriptome-wide",
+ .(geneID, sampleID, padj_rank, aberrant,
+ AberrantBySample, AberrantByGene)]
+
+ # dcast to have only one row per gene-sample combination
+ res <- dcast(res[, !c("padj_rank", "aberrant", "AberrantBySample",
+ "AberrantByGene"), with=FALSE],
+ ... ~ FDR_set, value.var="padjust")
+
+ # merge again with aberrant columns for transcriptome-wide results
+ if(isTRUE(returnTranscriptomewideResults)){
+ # rename column back to padjust for tw results after dcast
+ if("transcriptome-wide" %in% colnames(res)){
+ setnames(res, "transcriptome-wide", "padjust")
+ } else{
+ res[, padjust := NA]
+ }
+
+ # merge with aberrant columns
+ res <- merge(res, res_tw, by=c("geneID", "sampleID"), all.x=TRUE)
+
+ # set aberrant to FALSE if only significant in subset results
+ res[is.na(aberrant), aberrant := FALSE]
+ res[is.na(padjust), padjust := padj(object)[cbind(geneID,sampleID)]]
+
+ # restore previous column order
+ setcolorder(res, colnames(resSubsets[[1]][,!"FDR_set", with=FALSE]))
+
+ # order rows based on transcriptome-wide FDR
+ res <- res[order(padjust)]
+ }
+
+ # rename padjust columns for other gene sets to padjust_setname
+ for(subsetAssayName in padjAssays[padjAssays != "padjust"]){
+ setnames(res, gsub("padjust_", "", subsetAssayName),
+ subsetAssayName, skip_absent=TRUE)
+ }
+ } else{
+ res <- res[, !"FDR_set", with=FALSE]
}
return(res)
@@ -164,9 +209,7 @@ compileResultsAll.OUTRIDER <- function(object, padjCutoff=0.05, zScoreCutoff=0,
#' more user friendly
#' @param all By default FALSE, only significant read counts are listed in the
#' results. If TRUE all results are assembled resulting in a
-#' data.table of length samples x genes. If FDR corrected pvalues
-#' for subsets of genes of interest have been calculated, this
-#' indicates whether the whole subset will be listed in the results.
+#' data.table of length samples x genes.
#' @param returnTranscriptomewideResults If FDR corrected pvalues for subsets
#' of genes of interest have been calculated, this parameter
#' indicates whether additionally the transcriptome-wide results
@@ -174,7 +217,34 @@ compileResultsAll.OUTRIDER <- function(object, padjCutoff=0.05, zScoreCutoff=0,
#' for those subsets should be retrieved.
#' @param ... Additional arguments, currently not used
#' @return A data.table where each row is an outlier event and the columns
-#' contain additional information about this event. Eg padj, l2fc
+#' contain additional information about this event. In details the
+#' table contains:
+#' \itemize{
+#' \item{sampleID / geneID: }{The gene or sample ID as provided by the
+#' user, e.g. \code{rowData(ods)} and \code{colData(ods)},
+#' respectively.}
+#' \item{pValue / padjust: }{The nominal P-value and the FDR corrected
+#' P-value (transcriptome-wide) indicating the outlier status.}
+#' \item{zScore / l2fc: }{The z score and log\eqn{_2}{[2]} fold change
+#' as computed by \code{\link[OUTRIDER]{computeZscores}}.}
+#' \item{rawcounts: }{The observed read counts.}
+#' \item{normcounts: }{The expected count given the fitted
+#' autoencoder model for the given gene-sample combination.}
+#' \item{meanRawcounts / meanCorrected: }{For this gene, the mean of the
+#' observed or expected counts, respectively, given the fitted
+#' autoencoder model.}
+#' \item{theta: }{The dispersion parameter of the NB distribution
+#' for the given gene.}
+#' \item{aberrant: }{The transcriptome-wide outlier status of this event:
+#' \code{TRUE} or \code{FALSE}.}
+#' \item{AberrantBySample / AberrantByGene: }{Number of outliers for the
+#' given sample or gene (transcriptome-wide), respectively.}
+#' \item{padj_rank: }{Rank of this outlier event within the given sample.}
+#' \item{padjust_FDRset: }{The FDR corrected P-value with respect to the
+#' gene subset called 'FDRset', if gene subsets were specified
+#' during the P-value computation. Find more details at
+#' \code{\link[OUTRIDER]{computePvalues}}.}
+#' }
#'
#' @examples
#'
diff --git a/R/pValMatrix.R b/R/pValMatrix.R
index f011e80..b786964 100644
--- a/R/pValMatrix.R
+++ b/R/pValMatrix.R
@@ -2,9 +2,10 @@
#' Calculate P-values
#'
#' This function computes the P-values based on the fitted negative binomial
-#' model. It computes two matrices with the same dimension as the count matrix
-#' (samples x genes), which contain the corresponding P-values and
-#' adjusted P-values of every count.
+#' model being an outlier for the given sample gene combination. It computes
+#' two matrices with the same dimension as the count matrix (samples x genes),
+#' which contain the corresponding P-values and adjusted P-values of every
+#' count. The adjusted P-values are computed across all genes per sample.
#'
#' @param object An OutriderDataSet
#' @param alternative Can be one of "two.sided", "greater" or "less" to specify
diff --git a/R/package-OUTRIDER.R b/R/package-OUTRIDER.R
index d05cc15..14912b9 100644
--- a/R/package-OUTRIDER.R
+++ b/R/package-OUTRIDER.R
@@ -18,25 +18,29 @@
#' estimateSizeFactorsForMatrix replaceOutliers dispersions
#'
#' @importFrom SummarizedExperiment colData colData<- rowData rowData<-
-#' assays assays<- assayNames mcols mcols<- assay assay<-
+#' assays assays<- assayNames mcols mcols<- assay assay<- rowRanges
#'
#' @importFrom BBmisc isScalarLogical isScalarNumeric isScalarCharacter isFALSE
#' isScalarValue isScalarNA chunk seq_col seq_row
#'
#' @importFrom BiocParallel bplapply bpparam SerialParam bpisup bpstart bpstop
+#' bpmapply
#'
#' @importFrom GenomicFeatures exonsBy
#'
#' @importFrom txdbmaker makeTxDbFromGFF
#'
-#' @importFrom GenomicRanges GRanges reduce width
+#' @importFrom GenomicRanges GRanges reduce width start end findOverlaps
#'
#' @importFrom ggplot2 ggplot aes annotate geom_bar geom_histogram
#' geom_hline geom_smooth geom_point labs scale_x_log10
#' scale_y_log10 scale_fill_manual scale_color_manual
#' scale_fill_brewer scale_color_brewer theme ylim
#' ggtitle geom_vline geom_text scale_linetype_manual geom_line
-#' geom_abline theme_bw element_blank xlab ylab scale_color_identity
+#' geom_abline theme_bw element_blank xlab ylab scale_color_identity
+#' facet_grid facet_wrap quo_name
+#'
+#' @importFrom ggrepel geom_text_repel
#'
#' @importFrom grDevices colorRampPalette
#'
@@ -61,7 +65,8 @@
#'
#' @importFrom reshape2 melt
#'
-#' @importFrom S4Vectors DataFrame metadata metadata<-
+#' @importFrom S4Vectors DataFrame metadata metadata<- endoapply queryHits
+#' subjectHits values values<-
#'
#' @importFrom scales math_format trans_format
#'
@@ -127,5 +132,10 @@ globalVariables(package="OUTRIDER", c(
"Var2",
".x",
"y",
- "z"))
+ "z",
+ "AberrantByGene",
+ "AberrantBySample",
+ "FDR_set",
+ "aberrantLabel",
+ "geneID"))
diff --git a/README.md b/README.md
index a282123..e515ee1 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ install.packages('devtools')
devtools::install_github('gagneurlab/OUTRIDER', dependencies=TRUE)
# installing a specific version/tag of OUTRIDER
-devtools::install_github('gagneurlab/OUTRIDER@1.16.1', dependencies=TRUE)
+devtools::install_github('gagneurlab/OUTRIDER@1.20.1', dependencies=TRUE)
```
To check which versions/tags are available you can check the GitHub repo
diff --git a/man/aberrant.Rd b/man/aberrant.Rd
index 5ce6152..4174fb8 100644
--- a/man/aberrant.Rd
+++ b/man/aberrant.Rd
@@ -30,7 +30,7 @@ if NA or NULL no Z-score cutoff is used}
'gene' or not at all (default).}
\item{subsetName}{The name of a subset of genes of interest for which FDR
-corected pvalues have been previously computed. Those FDR values
+corected pvalues were previously computed. Those FDR values
on the subset will then be used to determine aberrant status.
Default is NULL (using transcriptome-wide FDR corrected pvalues).}
}
diff --git a/man/computePvalues.Rd b/man/computePvalues.Rd
index 6d48e60..74583e2 100644
--- a/man/computePvalues.Rd
+++ b/man/computePvalues.Rd
@@ -42,9 +42,10 @@ An OutriderDataSet object with computed nominal and adjusted P-values
}
\description{
This function computes the P-values based on the fitted negative binomial
-model. It computes two matrices with the same dimension as the count matrix
-(samples x genes), which contain the corresponding P-values and
-adjusted P-values of every count.
+model being an outlier for the given sample gene combination. It computes
+two matrices with the same dimension as the count matrix (samples x genes),
+which contain the corresponding P-values and adjusted P-values of every
+count. The adjusted P-values are computed across all genes per sample.
}
\examples{
ods <- makeExampleOutriderDataSet()
diff --git a/man/computeZscores.Rd b/man/computeZscores.Rd
index 8677b36..cb87d01 100644
--- a/man/computeZscores.Rd
+++ b/man/computeZscores.Rd
@@ -22,14 +22,13 @@ An OutriderDataSet containing the Z score matrix "zScore" and
the log2 fold changes "l2fc" as asasys.
}
\description{
-Computes the z scores for every count in the matrix.
-The z score is defined in the log2 space as follows:
-\ifelse{html}{
- \out{zij = (lij - mujl)/
- sigmajl}}{
- \deqn{z_{ij} = \frac{l_{ij} - \mu_j^l}{\sigma_j^l}}},
-where l is the log2 transformed normalized count and mu and sigma the
-mean and standard deviation for gene j, respectively.
+Computes the z scores for every count in the matrix.
+The z score is defined in the log\eqn{_2}{[2]} space as follows:
+\eqn{z_{ij} = \frac{l_{ij} - \mu_j^l}{
+ \sigma_j^l}}{z_ij = (l[ij] - mu[j]^l)/sigma[ij]}
+where \code{l} is the log\eqn{_2}{[2]} transformed normalized count and
+\eqn{\mu}{mu} and \eqn{\sigma}{sigma} the mean and standard deviation
+for gene \code{j} and sample \code{i}, respectively.
}
\examples{
ods <- makeExampleOutriderDataSet()
diff --git a/man/plotFunctions.Rd b/man/plotFunctions.Rd
index b5bd579..0fa518b 100644
--- a/man/plotFunctions.Rd
+++ b/man/plotFunctions.Rd
@@ -3,6 +3,7 @@
\name{plotAberrantPerSample}
\alias{plotAberrantPerSample}
\alias{plotCountCorHeatmap}
+\alias{plotManhattan}
\alias{plotEncDimSearch}
\alias{plotQQ}
\alias{plotVolcano}
@@ -22,12 +23,15 @@
\alias{plotAberrantPerSample,OutriderDataSet-method}
\alias{plotDispEsts,OutriderDataSet-method}
\alias{plotEncDimSearch,OutriderDataSet-method}
+\alias{plotManhattan,OutriderDataSet-method}
\title{Visualization functions for OUTRIDER}
\usage{
plotAberrantPerSample(object, ...)
plotCountCorHeatmap(object, ...)
+plotManhattan(object, ...)
+
plotEncDimSearch(object, ...)
plotQQ(object, ...)
@@ -40,9 +44,12 @@ plotVolcano(object, ...)
main,
padjCutoff = 0.05,
zScoreCutoff = 0,
+ label = "aberrant",
+ xaxis = c("zscore", "log2fc", "fc"),
pch = 16,
basePlot = FALSE,
- col = c("gray", "firebrick")
+ col = c("gray", "firebrick"),
+ subsetName = NULL
)
\S4method{plotQQ}{OutriderDataSet}(
@@ -56,6 +63,7 @@ plotVolcano(object, ...)
legendPos = "topleft",
outlierRatio = 0.001,
conf.alpha = 0.05,
+ subsetName = NULL,
pch = 16,
xlim = NULL,
ylim = NULL,
@@ -70,6 +78,8 @@ plotExpectedVsObservedCounts(
log = TRUE,
groups = c(),
groupColSet = "Set1",
+ label = "aberrant",
+ subsetName = NULL,
...
)
@@ -84,7 +94,9 @@ plotExpressionRank(
log = TRUE,
col = c("gray", "firebrick"),
groups = c(),
- groupColSet = "Accent"
+ groupColSet = "Accent",
+ label = "aberrant",
+ subsetName = NULL
)
\S4method{plotCountCorHeatmap}{OutriderDataSet}(
@@ -128,7 +140,8 @@ plotCountGeneSampleHeatmap(
outlierRatio = 0.001,
col = "Dark2",
yadjust = 1.2,
- ylab = "#Aberrantly expressed genes",
+ ylab = "Aberrantly expressed genes",
+ subsetName = NULL,
...
)
@@ -150,6 +163,17 @@ plotPowerAnalysis(ods)
plotExpressedGenes(ods, main = "Statistics of expressed genes")
plotSizeFactors(ods, basePlot = TRUE)
+
+\S4method{plotManhattan}{OutriderDataSet}(
+ object,
+ sampleID,
+ value = "pvalue",
+ chr = NULL,
+ main = paste0("Sample: ", sampleID),
+ featureRanges = rowRanges(object),
+ subsetName = NULL,
+ chrColor = c("black", "darkgrey")
+)
}
\arguments{
\item{...}{Additional parameters passed to plot() or plot_ly() if not stated
@@ -163,6 +187,15 @@ Can also be a vector. Integers are treated as indices.}
\item{padjCutoff, zScoreCutoff}{Significance or Z-score cutoff
to mark outliers}
+\item{label}{Indicates which genes or samples should be labeled. By default
+all aberrant genes/samples are labelled. Can be set to NULL for
+no labels. Provide a vector of geneIDs/sampleIDs to label
+specific genes/samples.}
+
+\item{xaxis}{Indicates which assay should be shown on the x-axis of the
+volcano plot. Defaults to 'zscore'. Other options are 'fc' and
+'log2fc' for the fold-change or log2 fold-change.}
+
\item{pch}{Integer or character to be used for plotting the points}
\item{basePlot}{if \code{TRUE}, use the R base plot version, else use the
@@ -172,6 +205,11 @@ plotly framework, which is the default}
of length 2. (1. normal point; 2. outlier point) or a single
character referring to a color palette from \code{RColorBrewer}.}
+\item{subsetName}{The name of a subset of genes of interest for which FDR
+corrected pvalues were previously computed. Those FDR values
+on the subset will then be used to determine aberrant status.
+Default is NULL (using transcriptome-wide FDR corrected pvalues).}
+
\item{global}{Flag to plot a global Q-Q plot, default FALSE}
\item{samplePoints}{Sample points for Q-Q plot, defaults to max 30k points}
@@ -234,6 +272,20 @@ top n genes based on the BCV.}
\item{compareDisp}{If TRUE, the default, and if the autoCorrect normalization
was used it computes the dispersion without autoCorrect and
plots it for comparison.}
+
+\item{value}{Indicates which assay is shown in the manhattan plot. Defaults
+to 'pvalue'. Other options are 'zScore' and 'log2fc'.}
+
+\item{chr}{The chromosomes to be displayed in the \code{plotManhattan}
+function. Default is \code{NULL}, i.e. all chromosomes are shown.}
+
+\item{featureRanges}{A GRanges object of the same length as the
+OutriderDataSet object that contains the genomic positions of
+features that are shown in the manhattan plot.}
+
+\item{chrColor}{A vector of length 2 giving the two colors used for
+coloring alternating chromosomes in the manhattan plot. Default
+colors are 'black' and 'darkgrey'.}
}
\value{
If base R graphics are used nothing is returned else the plotly or
@@ -319,7 +371,12 @@ levels. The curves are smooths to make the reading of the plot easier.
\code{plotEncDimSearch}: Visualization of the hyperparameter optimization.
It plots the encoding dimension against the achieved loss (area under the
precision-recall curve). From this plot the optimum should be choosen for
-the \code{q} in fitting process.
+the \code{q} in fitting process.
+
+\code{plotManhattan}: Visualizes different metrics for each gene (pvalue,
+log2 fold-change, z-score) along with the genomic coordinates of the
+respective gene as a manhattan plot. Detected outlier genes are highlighted
+in red.
}
\examples{
ods <- makeExampleOutriderDataSet(dataset="Kremer")
@@ -332,22 +389,35 @@ implementation <- 'autoencoder'
mcols(ods)$basepairs <- 300 # assign pseudo gene length for filtering
ods <- filterExpression(ods)
-ods <- OUTRIDER(ods, implementation=implementation)
+# restrict FDR correction to set of genes of interest per sample
+genesOfInterest <- list(MUC1372 = c("ATPIF1", "UROD", "YBX1",
+ sample(rownames(ods), 25)),
+ MUC1360 = sample(rownames(ods), 50),
+ MUC1350 = sample(rownames(ods), 75),
+ X76619 = sample(rownames(ods), 20),
+ X74172 = sample(rownames(ods), 150))
+ods <- OUTRIDER(ods, implementation=implementation, subsets=list("exampleGenes"=genesOfInterest))
plotAberrantPerSample(ods)
+plotAberrantPerSample(ods, subsetName="exampleGenes")
plotVolcano(ods, 49)
plotVolcano(ods, 'MUC1365', basePlot=TRUE)
+plotVolcano(ods, 'MUC1351', basePlot=TRUE, xaxis="log2fc", label=c("NBPF16"))
+plotVolcano(ods, 'MUC1372', basePlot=TRUE, subsetName="exampleGenes")
plotExpressionRank(ods, 35)
-plotExpressionRank(ods, "NDUFS5", normalized=FALSE,
+plotExpressionRank(ods, 35, subsetName="exampleGenes")
+plotExpressionRank(ods, "NDUFS5", normalized=FALSE, label="MUC1372",
log=FALSE, main="Over expression outlier", basePlot=TRUE)
plotQQ(ods, 149)
+plotQQ(ods, 149, subsetName="exampleGenes")
plotQQ(ods, global=TRUE, outlierRatio=0.001)
plotExpectedVsObservedCounts(ods, 149)
plotExpectedVsObservedCounts(ods, "ATAD3C", basePlot=TRUE)
+plotExpectedVsObservedCounts(ods, "UROD", subsetName="exampleGenes")
plotExpressedGenes(ods)
@@ -378,4 +448,18 @@ ods <- findEncodingDim(ods, params=c(3, 10, 20, 35, 50),
plotEncDimSearch(ods)
}
+# To show the pvalues of a sample in a manhattan plot, rowRanges(ods) must
+# contain the genomic position of each feature or a GRanges object must
+# be provided
+\dontrun{
+# in case rowRanges(ods) is a GRangesList, run this first once to speed up:
+rowRanges(ods) <- unlist(endoapply(rowRanges(ods), range))
+}
+gr <- GRanges(
+ seqnames=sample(paste0("chr", 1:22), nrow(ods), replace=TRUE),
+ ranges=IRanges(start=runif(nrow(ods), min=0, max=1e5), width=100))
+plotManhattan(ods, "MUC1350", value="pvalue", featureRanges=gr)
+plotManhattan(ods, "MUC1350", value="l2fc", featureRanges=gr)
+plotManhattan(ods, "MUC1372", featureRanges=gr, subsetName="exampleGenes")
+
}
diff --git a/man/results.Rd b/man/results.Rd
index 9702ab2..78e0bd6 100644
--- a/man/results.Rd
+++ b/man/results.Rd
@@ -32,9 +32,7 @@ more user friendly}
\item{all}{By default FALSE, only significant read counts are listed in the
results. If TRUE all results are assembled resulting in a
-data.table of length samples x genes. If FDR corrected pvalues
-for subsets of genes of interest have been calculated, this
-indicates whether the whole subset will be listed in the results.}
+data.table of length samples x genes.}
\item{returnTranscriptomewideResults}{If FDR corrected pvalues for subsets
of genes of interest have been calculated, this parameter
@@ -44,7 +42,34 @@ for those subsets should be retrieved.}
}
\value{
A data.table where each row is an outlier event and the columns
- contain additional information about this event. Eg padj, l2fc
+ contain additional information about this event. In details the
+ table contains:
+ \itemize{
+ \item{sampleID / geneID: }{The gene or sample ID as provided by the
+ user, e.g. \code{rowData(ods)} and \code{colData(ods)},
+ respectively.}
+ \item{pValue / padjust: }{The nominal P-value and the FDR corrected
+ P-value (transcriptome-wide) indicating the outlier status.}
+ \item{zScore / l2fc: }{The z score and log\eqn{_2}{[2]} fold change
+ as computed by \code{\link[OUTRIDER]{computeZscores}}.}
+ \item{rawcounts: }{The observed read counts.}
+ \item{normcounts: }{The expected count given the fitted
+ autoencoder model for the given gene-sample combination.}
+ \item{meanRawcounts / meanCorrected: }{For this gene, the mean of the
+ observed or expected counts, respectively, given the fitted
+ autoencoder model.}
+ \item{theta: }{The dispersion parameter of the NB distribution
+ for the given gene.}
+ \item{aberrant: }{The transcriptome-wide outlier status of this event:
+ \code{TRUE} or \code{FALSE}.}
+ \item{AberrantBySample / AberrantByGene: }{Number of outliers for the
+ given sample or gene (transcriptome-wide), respectively.}
+ \item{padj_rank: }{Rank of this outlier event within the given sample.}
+ \item{padjust_FDRset: }{The FDR corrected P-value with respect to the
+ gene subset called 'FDRset', if gene subsets were specified
+ during the P-value computation. Find more details at
+ \code{\link[OUTRIDER]{computePvalues}}.}
+ }
}
\description{
This function assembles a results table of significant outlier events based
diff --git a/tests/testthat/test_methods.R b/tests/testthat/test_methods.R
index 6c06910..31add16 100644
--- a/tests/testthat/test_methods.R
+++ b/tests/testthat/test_methods.R
@@ -26,7 +26,7 @@ test_that("result method", {
set.seed(42)
ods <- makeExampleOutriderDataSet(100, 50)
expect_error(results(ods), "Please calculate..*")
- ods <- OUTRIDER(ods, iteration=2)
+ ods <- OUTRIDER(ods, q=2, iteration=2)
expect_warning(res <- results(ods, padj=1e-10), "No significant events:")
expect_equal(colnames(res), colnames(results(ods)))
diff --git a/tests/testthat/test_plotting.R b/tests/testthat/test_plotting.R
index c9d27df..9656804 100644
--- a/tests/testthat/test_plotting.R
+++ b/tests/testthat/test_plotting.R
@@ -6,33 +6,51 @@ test_that("plotting", {
mcols(ods)[['basepairs']] <- rnbinom(nrow(ods), mu=1e4, size=2)
ods <- filterExpression(ods, filterGenes=FALSE, fpkmCutoff=1e3)
- ods <- OUTRIDER(ods, implementation='pca', q=2)
+ genesOfInterest <- list("sample_1"=sample(rownames(ods), 10),
+ "sample_3"=sample(rownames(ods), 8),
+ "sample_10"=sample(rownames(ods), 20))
+ ods <- OUTRIDER(ods, implementation='pca', q=2,
+ subsets=list("testSet"=genesOfInterest))
sex <- sample(c("female", "male"), dim(ods)[2], replace=TRUE)
colData(ods)$Sex <- sex
expect_is(plotAberrantPerSample(ods), "ggplot")
-
+ expect_is(plotAberrantPerSample(ods, subsetName="testSet"), "ggplot")
expect_equal(class(plotVolcano(ods, 30)), c("plotly", "htmlwidget"))
expect_is(plotVolcano(ods, "sample_10", basePlot=TRUE), "ggplot")
+ expect_is(plotVolcano(ods, "sample_10", subsetName="testSet",
+ basePlot=TRUE), "ggplot")
+ expect_is(plotVolcano(ods, "sample_10", label="feature_3", basePlot=TRUE),
+ "ggplot")
expect_equal(class(plotExpressionRank(ods, 23)), c("plotly", "htmlwidget"))
expect_is(plotExpressionRank(ods, "feature_13", normalized=FALSE,
log=FALSE, basePlot=TRUE), 'ggplot')
+ expect_is(plotExpressionRank(ods, "feature_13", normalized=TRUE,
+ subsetName="testSet", basePlot=TRUE), 'ggplot')
+ expect_is(plotExpressionRank(ods, "feature_13", normalized=FALSE,
+ label=c("sample_1", "sample_5"),
+ log=FALSE, basePlot=TRUE), 'ggplot')
expect_equal(class(plotExpectedVsObservedCounts(ods, 39)),
c("plotly", "htmlwidget"))
expect_is(plotExpectedVsObservedCounts(ods, "feature_32", basePlot=TRUE),
"ggplot")
+ expect_is(plotExpectedVsObservedCounts(ods, "feature_32",
+ subsetName="testSet", basePlot=TRUE), "ggplot")
+ expect_is(plotExpectedVsObservedCounts(ods, "feature_32", label="aberrant",
+ basePlot=TRUE), "ggplot")
expect_is(plotExpressedGenes(ods), "ggplot")
expect_null(plotQQ(ods, 1))
expect_null(plotQQ(ods, "feature_20"))
+ expect_null(plotQQ(ods, "feature_20", subsetName="testSet"))
expect_null(plotQQ(ods, global=TRUE))
expect_null(plotQQ(ods, global=TRUE, outlierRatio=0.001))
expect_null(plotQQ(ods, global=TRUE, outlierRatio=NULL))
@@ -62,6 +80,15 @@ test_that("plotting", {
plotPowerAnalysis(ods)
+ expect_error(plotManhattan(ods, "sample_1"))
+ gr <- GRanges(
+ seqnames=sample(paste0("chr", 1:22), nrow(ods), replace=TRUE),
+ ranges=IRanges(start=runif(nrow(ods), min=0, max=1e5), width=100))
+ expect_is(plotManhattan(ods, "sample_1", featureRanges=gr), "GGbio")
+ expect_is(plotManhattan(ods, "sample_1", featureRanges=gr, chr=10), "GGbio")
+ expect_is(plotManhattan(ods, "sample_26", featureRanges=gr,
+ subsetName="testSet"), "GGbio")
+
})
diff --git a/vignettes/OUTRIDER.Rnw b/vignettes/OUTRIDER.Rnw
index 36bcc11..b1367d9 100644
--- a/vignettes/OUTRIDER.Rnw
+++ b/vignettes/OUTRIDER.Rnw
@@ -513,6 +513,31 @@ head(res)
dim(res)
@
+In details the table contains:
+\itemize{
+ \item{sampleID / geneID: }{The gene or sample ID as provided by the
+ user, e.g. rowData(ods) and colData(ods) respectively.}
+ \item{pValue / padjust: }{The nominal P-value and the FDR corrected
+ P-value indicating the outlier status. Find more details at
+ computePvalues.}
+ \item{zScore / l2fc: }{The z score and $\log_{2}$ fold change
+ as computed by computeZscores.}
+ \item{rawcounts: }{The observed read counts.}
+ \item{normcounts: }{The expected count given the fitted
+ autoencoder model for the given gene-sample combination.}
+ \item{meanRawcounts / meanCorrected: }{For this gene, the mean of the
+ observed or expected counts, respectively, given the fitted
+ autoencoder model.}
+ \item{theta: }{The dispersion parameter of the NB distribution
+ for the given gene.}
+ \item{aberrant: }{The outlier status of this event: TRUE or FALSE.}
+ \item{AberrantBySample / AberrantByGene: }{Number of outliers for the
+ given sample or gene, respectively.}
+ \item{padj rank: }{Rank of this outlier event within the given sample.}
+ \item{FDR set: }{The subset-name used for the P-value computation.}
+}
+
+
\subsection{Number of aberrant genes per sample}