diff --git a/DESCRIPTION b/DESCRIPTION index 012c647..71722b8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,6 +20,8 @@ Imports: Biobase, BiocParallel, Boruta, + ComplexHeatmap, + circlize, data.table, doParallel, DOSE, @@ -30,6 +32,7 @@ Imports: ggrepel, glmnet, gplots, + grid, Heatplus, invgamma, iterators, @@ -38,7 +41,6 @@ Imports: Matrix, outliers, parallel, - pheatmap, purrr, qvalue, randomForest, diff --git a/NAMESPACE b/NAMESPACE index 6b4eec0..fc1480a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(ComBat.NA) export(blue.colors) +export(complex_heatmap) export(concatenate_by_features) export(correct_batch_effect) export(correct_batch_effect_NA) @@ -39,9 +40,8 @@ export(plotAUC) export(plot_annHeatmap2) export(plot_feature) export(plot_pca) -export(plot_sample_correlation_heatmap) -export(plot_sample_correlation_pheatmap) export(plot_umap) +export(plot_upset) export(plot_volcano) export(protein_inference) export(readMaxQuantPeptides) @@ -55,8 +55,10 @@ export(rollup_using_representative) export(rrollup) export(subset_by_size) export(xtable2) +import(ComplexHeatmap) import(ggplot2) import(glmnet) +import(grid) import(limma) import(statmod) importClassesFrom(Matrix,dgCMatrix) @@ -85,6 +87,7 @@ importFrom(MSnbase,sampleNames) importFrom(ROCR,performance) importFrom(ROCR,prediction) importFrom(WGCNA,empiricalBayesLM) +importFrom(circlize,colorRamp2) importFrom(data.table,rbindlist) importFrom(data.table,setDT) importFrom(doParallel,registerDoParallel) @@ -97,7 +100,6 @@ importFrom(dplyr,case_when) importFrom(dplyr,desc) importFrom(dplyr,distinct) importFrom(dplyr,do) -importFrom(dplyr,everything) importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) @@ -153,8 +155,14 @@ importFrom(ggplot2,ylim) importFrom(ggrepel,geom_label_repel) importFrom(gplots,bluered) importFrom(gplots,heatmap.2) +importFrom(grDevices,bmp) importFrom(grDevices,colorRampPalette) +importFrom(grDevices,dev.off) +importFrom(grDevices,jpeg) +importFrom(grDevices,pdf) +importFrom(grDevices,png) importFrom(grDevices,rgb) +importFrom(grDevices,tiff) importFrom(graphics,abline) importFrom(graphics,axis) importFrom(graphics,barplot) @@ -186,7 +194,6 @@ importFrom(parallel,detectCores) importFrom(parallel,makeCluster) importFrom(parallel,mclapply) importFrom(parallel,stopCluster) -importFrom(pheatmap,pheatmap) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_chr) @@ -203,6 +210,7 @@ importFrom(scales,log_breaks) importFrom(scales,pretty_breaks) importFrom(scales,rescale) importFrom(scales,trans_new) +importFrom(scales,viridis_pal) importFrom(stats,anova) importFrom(stats,as.dendrogram) importFrom(stats,as.dist) @@ -219,7 +227,6 @@ importFrom(stats,dnorm) importFrom(stats,fisher.test) importFrom(stats,glm) importFrom(stats,hclust) -importFrom(stats,heatmap) importFrom(stats,lm) importFrom(stats,loess) importFrom(stats,loess.control) @@ -243,6 +250,7 @@ importFrom(stats,quantile) importFrom(stats,reorder) importFrom(stats,rnorm) importFrom(stats,sd) +importFrom(stats,setNames) importFrom(stats,smooth.spline) importFrom(stats,terms) importFrom(stats,var) @@ -259,6 +267,7 @@ importFrom(umap,umap) importFrom(umap,umap.defaults) importFrom(utils,flush.console) importFrom(utils,head) +importFrom(utils,modifyList) importFrom(utils,read.csv) importFrom(utils,read.delim) importFrom(utils,setTxtProgressBar) diff --git a/R/complex_heatmap.R b/R/complex_heatmap.R new file mode 100644 index 0000000..98b7ba0 --- /dev/null +++ b/R/complex_heatmap.R @@ -0,0 +1,485 @@ +#' @title Annotated Expression or Correlation Heatmaps +#' +#' +#' @description A wrapper around functions from +#' \code{\link[ComplexHeatmap]{ComplexHeatmap-package}}. Creates expression or +#' correlation heatmaps from \code{eSet} or \code{MSnSet} objects. +#' +#' +#' @param eset an object of class \code{\link[Biobase]{eSet}} or +#' \code{\link[MSnbase]{MSnSet-class}}. +#' @param clustering_distance character; the distance measure used to cluster +#' rows and columns. Passed to \code{\link[stats]{dist}}. One of +#' (\code{"euclidean"}, \code{"maximum"}, \code{"manhattan"}, +#' \code{"canberra"}, \code{"binary"}, \code{"minkowski"}, \code{"pearson"}, +#' \code{"spearman"}, or \code{"kendall"}). Default is \code{"euclidean"}. +#' @param clustering_method character; the agglomeration method used to cluster +#' rows and columns. Passed to \code{\link[stats]{hclust}}. One of +#' (\code{"ward.D"}, \code{"ward.D2"}, \code{"single"}, \code{"average"}, +#' \code{"complete"}, \code{"mcquitty"}, \code{"median"}, or +#' \code{"centroid"}). Default is \code{"ward.D"}. +#' @param heatmap_type character; the type of heatmap to generate. Must be (an +#' abbreviation of) either \code{"expression"}, \code{"sample_correlation"}, +#' or \code{"feature_correlation"}. The default (\code{"expression"}) +#' generates a heatmap with features as rows and samples as columns. The other +#' options calculate the sample or feature correlation matrix and generate a +#' heatmap with samples or features as both rows and columns. +#' @param cor_method character; the method used to generate the correlation +#' matrix if \code{heatmap_type} is a correlation heatmap. One of +#' (\code{"pearson"}, \code{"kendall"}, or \code{"spearman"}). Defaults to +#' \code{"pearson"}. Passed to \code{\link[stats]{cor}}. +#' @param cluster_columns logical; whether to cluster the columns. +#' @param cluster_rows logical; whether to cluster the rows. +#' @param show_column_dendrogram logical; whether to show the column dendrogram. +#' Does not affect clustering. +#' @param show_row_dendrogram similar to \code{show_column_dendrogram}, but for +#' rows. +#' @param show_column_names logical; whether to show the column names. +#' @param show_row_names logical; whether to show the row names. +#' @param heatmap_title character; overall title for the heatmap. +#' @param heatmap_legend_title character; title of the heatmap color legend. +#' @param color_range numeric; vector of length 2 used to restrict the colors of +#' the heatmap body. Useful when color differences are not easily discernible +#' due to outliers. +#' @param heatmap_args list of arguments passed to +#' \code{\link[ComplexHeatmap]{Heatmap}}. +#' @param anno_column character; one or more column names of \code{pData(eset)} +#' used to annotate the columns of the heatmap. By default, columns are not +#' annotated. +#' @param anno_row character; one or more column names of \code{fData(eset)} +#' used to annotate the rows of the heatmap. By default, rows are not +#' annotated. +#' @param anno_column_titles character; names for the column annotations, if +#' different from \code{anno_column}. Must be the same length as +#' \code{anno_column}. +#' @param anno_row_titles similar to \code{anno_column_titles}, but for row +#' annotations. +#' @param anno_column_colors list of custom colors for column annotations +#' specified by \code{anno_column}. List names must match one or more names in +#' \code{anno_column}. If modifying the colors of a continuous column +#' annotation, **always use \code{\link[circlize]{colorRamp2}}** with desired +#' breaks and colors. Otherwise, pass a vector of unique colors (order +#' matters). +#' @param anno_row_colors same as \code{anno_column_colors}, but for +#' \code{anno_row}. +#' @param anno_args list of arguments passed to +#' \code{\link[ComplexHeatmap]{HeatmapAnnotation}}. +#' @param filename character; file name used to save the heatmap. Must end in +#' (".png", ".bmp", ".jpg", ".tif", or ".pdf"). +#' @param height numeric; height of heatmap in inches. Default is \code{5}. +#' @param width numeric; width of heatmap in inches. Default is \code{5}. +#' @param draw_args list of arguments passed to +#' \code{\link[ComplexHeatmap]{draw-HeatmapList-method}}. Unlikely to be used +#' often. +#' +#' @return An object of class \code{\link[ComplexHeatmap]{HeatmapList-class}} or +#' nothing (save to file instead). +#' +#' @seealso +#' \href{https://jokergoo.github.io/ComplexHeatmap-reference/book/}{ComplexHeatmap +#' Complete Reference} +#' +#' @note If the `hclust` error `NA/NaN/Inf in foreign function call` occurs, it +#' means there are cases where two or more features are not present in the +#' same samples, so their distances will be `NA`, and it is impossible to +#' perform hierarchical clustering. If this happens, filter to features +#' present in more than 50% of samples with `m[rowMeans(!is.na(m)) > 0.5, ]`, +#' where `m` is the `MSnSet`. +#' +#' @references Gu, Z., Eils, R., & Schlesner, M. (2016). Complex heatmaps reveal +#' patterns and correlations in multidimensional genomic data. +#' *Bioinformatics*, *32*(18), 2847-2849. +#' \url{https://doi.org/10.1093/bioinformatics/btw313} +#' +#' Gu, Z., Gu, L., Eils, R., Schlesner, M., & Brors, B. (2014). Circlize +#' implements and enhances circular visualization in R. *Bioinformatics*, +#' *30*(19), 2811-2812. \url{https://doi.org/10.1093/bioinformatics/btu393} +#' +#' @md +#' +#' @import ComplexHeatmap +#' @importFrom dplyr %>% +#' @importFrom Biobase exprs pData fData +#' @importFrom stats cor setNames +#' @importFrom grDevices bmp jpeg png tiff pdf dev.off +#' @importFrom purrr map2 +#' @importFrom circlize colorRamp2 +#' @importFrom utils modifyList +#' +#' @export complex_heatmap +#' +#' @examples +#' # library(MSnSet.utils) +#' data(longitudinal_biomarker_study) # MSnSet +#' ee <- longitudinal_biomarker_study +#' # Missingness filter - clustering may fail otherwise +#' ee <- ee[rowMeans(!is.na(exprs(ee))) >= 0.5, ] +#' +#' # Expression heatmap +#' complex_heatmap(ee) +#' +#' # Limit color range to see differences more easily +#' complex_heatmap(ee, color_range = c(-2, 2)) +#' +#' # Sample correlation heatmap +#' complex_heatmap(ee, heatmap_type = "s") +#' +#' # Annotate columns by "Type" (categorical) and "Age" (continuous) +#' # Annotate rows by "isSpike" (logical) +#' complex_heatmap(ee, anno_column = c("Type", "Age"), +#' anno_row = "isSpike") +#' + + +# Generic heatmap function ---- +complex_heatmap <- function( + eset, # MSnSet object + clustering_distance = c("euclidean", "maximum", "manhattan", "canberra", + "binary", "minkowski", "pearson", "spearman", + "kendall"), + clustering_method = c("ward.D", "ward.D2", "single", "average", "complete", + "mcquitty", "median", "centroid"), + heatmap_type = "expression", + cor_method = c("pearson", "kendall", "spearman"), + cluster_columns = TRUE, + cluster_rows = TRUE, + show_column_dendrogram = TRUE, + show_row_dendrogram = TRUE, + show_column_names = FALSE, + show_row_names = FALSE, + heatmap_title = character(0), + heatmap_legend_title = NULL, + color_range = NULL, + heatmap_args = list(), + anno_column = NULL, + anno_row = NULL, + anno_column_titles = anno_column, + anno_row_titles = anno_row, + anno_column_colors = list(), + anno_row_colors = list(), + anno_args = list(), + filename = NULL, + height = 5, + width = 5, + draw_args = list() +) { + + ## Check input -------------------------------------------------- + # Match arguments + clustering_distance <- match.arg(clustering_distance) + clustering_method <- match.arg(clustering_method) + cor_method <- match.arg(cor_method) + + # Check for pData + if (!is.null(anno_column)) { + if (ncol(pData(eset)) == 0) { + stop("anno_column is provided, but pData(eset) is empty") + } + } + # check for fData + if (!is.null(anno_row)) { + if (ncol(fData(eset)) == 0) { + stop("anno_row is provided, but fData(eset) is empty") + } + } + + # Check that args are lists + if (any(!unlist(lapply(list(heatmap_args, anno_row_colors, + anno_column_colors, + anno_args, draw_args), + is.list)))) { + stop(paste("heatmap_args, anno_row_colors, anno_col_colors, anno_args,", + "and draw_args must be lists")) + } + + # Check color_range + if (!is.null(color_range)) { + if(!is.numeric(color_range) | length(color_range) != 2) { + stop(paste("color_range must be a numeric vector of length 2", + "specifying lower and upper bounds for heatmap colors")) + } + } + + # Check width and height + if (!is.numeric(height) | !is.numeric(width)) { + stop(paste("width and height must be numbers specifying the", + "width and height of the plot in inches")) + } + + # Check that all arguments passed as lists are valid + if (!all(names(heatmap_args) %in% names(formals(Heatmap)))) { + stop(paste("One or more elements of heatmap_args is not", + "an argument of ComplexHeatmap::Heatmap")) + } + if (!all(names(anno_args) %in% + names(formals(HeatmapAnnotation)))) { + stop(paste("One or more elements of anno_args is not", + "an argument of ComplexHeatmap::HeatmapAnnotation")) + } + # Not sure how to check draw_args because draw is a method + + # Check that the number of annotation titles is the same + # as the number of annotations + if(length(anno_column) != length(anno_column_titles)) { + stop(paste("length of anno_column is not the same as", + "length of anno_column_titles")) + } + if(length(anno_row) != length(anno_row_titles)) { + stop("length of anno_row is not the same as length of anno_row_titles") + } + + + # Data for heatmap ---------------------------------------------- + pttrn <- paste0("^", heatmap_type) + + if (grepl(pttrn, "expression")) { + x <- exprs(eset) + y <- pData(eset) + z <- fData(eset) + } else if (grepl(pttrn, "sample_correlation")) { + x <- cor(exprs(eset), exprs(eset), + use = "pairwise.complete.obs", + method = cor_method) + y <- z <- pData(eset) + anno_row <- anno_column # row annotation same as column annotation + } else if (grepl(pttrn, "feature_correlation")) { + x <- cor(t(exprs(eset)), t(exprs(eset)), + use = "pairwise.complete.obs", + method = cor_method) + y <- z <- fData(eset) + anno_column <- anno_row # column annotation same as row annotation + } else { + stop(paste("heatmap_type must be (an abbreviation of) either 'expression',", + "'sample_correlation', or 'feature_correlation'")) + } + + + # Row and column annotations ------------------------------------ + if (!is.null(anno_column)) { + # Use pData for columns + column_anno <- annotate_heatmap(y, anno = anno_column, + anno_names = anno_column_titles, + anno_colors = anno_column_colors, + choice = "column", anno_args = anno_args) + } else { + column_anno <- NULL # No annotation + } + if (!is.null(anno_row)) { + # Use fData for rows + row_anno <- annotate_heatmap(z, anno = anno_row, + anno_names = anno_row_titles, + anno_colors = anno_row_colors, + choice = "row", anno_args = anno_args) + } else { + row_anno <- NULL # No annotation + } + + + # Colors and color breaks for the heatmap body ------------------ + breaks_and_colors <- get_color_breaks(range(x, na.rm = TRUE), + user_breaks = color_range) + + breaks <- breaks_and_colors[["breaks"]] + colors <- breaks_and_colors[["colors"]] + + # Color function for heatmap body + col_fun <- circlize::colorRamp2(breaks = breaks, colors = colors) + + # Create heatmap ------------------------------------------------ + # Arguments that will be passed to ComplexHeatmap::Heatmap + heatmap_args <- list(matrix = x, + col = col_fun, + column_title = heatmap_title, + border = TRUE, # border around heatmap body + # do not trim row names + row_names_max_width = max_text_width(rownames(x)), + # do not trim column names + column_names_max_height = max_text_width(colnames(x)), + clustering_distance_rows = clustering_distance, + clustering_distance_columns = clustering_distance, + clustering_method_rows = clustering_method, + clustering_method_columns = clustering_method, + cluster_columns = cluster_columns, + cluster_rows = cluster_rows, + show_row_dend = show_row_dendrogram, + show_column_dend = show_column_dendrogram, + show_column_names = show_column_names, + show_row_names = show_row_names, + top_annotation = column_anno, + left_annotation = row_anno, + heatmap_legend_param = list( + title = heatmap_legend_title + ) + ) %>% + modifyList(val = heatmap_args, keep.null = TRUE) # update arguments + + # Create heatmap + ht <- do.call(what = Heatmap, args = heatmap_args) + + + # Save or display heatmap ---------------------------------------- + if (!is.null(filename)) { + # Begin saving + save_plot(filename = filename, height = height, width = width) + # After plot is created, run dev.off + on.exit(expr = dev.off()) + } + # Create heatmap + c(modifyList(list(object = ht, + merge_legends = TRUE), + val = draw_args, keep.null = TRUE)) %>% + {do.call(what = draw, args = .)} +} + +utils::globalVariables(".") + + +## Helper functions ------------------------------------------------ + + + +# Get appropriate color breaks for heatmap body ---- +# Set color_range for unique colors within full range of values +get_color_breaks <- function(x, user_breaks = NULL) { + + # If user_breaks is not provided, set to x + if (is.null(user_breaks)) { + user_breaks <- x + } + # Extend or reduce range for unique colors + lower <- sort(unique(c(x[1], user_breaks[1]))) + n_lower <- length(lower) # 1 or 2 + upper <- sort(unique(c(x[2], user_breaks[2]))) + n_upper <- length(upper) # 1 or 2 + + if (all(sign(x) == +1 | sign(x) == 0)) { # If mix of positive and 0 + # at most 3 breaks and colors + breaks <- c(0, upper) + colors <- c("white", rep("red", n_upper)) + } else if (all(sign(x) == -1 | sign(x) == 0)) { # If mix of negative and 0 + # at most 3 breaks and colors + breaks <- c(lower, 0) + colors <- c(rep("blue", n_upper), "white") + } else { # If mix of positive and negative + # at most 5 breaks and colors + breaks <- c(lower, 0, upper) + colors <- c(rep("blue", n_lower), "white", + rep("red", n_upper)) + } + + return(list(breaks = breaks, colors = colors)) +} + + +# Get colors for each column or row annotation ---- +# Use MSnSet.utils::jet.colors for discrete variables and +# scales::viridis_pal for continuous variables. +#' +#' @importFrom scales viridis_pal +#' +get_anno_colors <- function(x) { + # If x is numeric, use colorRamp2. Otherwise, use a discrete + # color palette with length equal to the number of unique groups. + if (is.numeric(x)) { + seq(min(x, na.rm = TRUE), max(x, na.rm = TRUE), length.out = 5) %>% + {circlize::colorRamp2(breaks = ., + colors = rev(scales::viridis_pal()(length(.))))} + } else if (is.factor(x)) { + jet2.colors(nlevels(x)) + } else { + jet2.colors(length(unique(x))) + } + +} + +utils::globalVariables(".") + + +# Name annotation colors ---- +# x is a vector of colors or a color function from get_anno_colors +# or supplied by the user. y is the values from the data +set_anno_names <- function(x, y) { + if (!is.function(x)) { + if (is.factor(y)) { + names(x) <- levels(y) + } else { # character, logical + y <- unique(y) + y <- y[!is.na(y)] + names(x) <- y + } + } + + return(x) +} + + +# Create row or column annotations ---- +annotate_heatmap <- function(y, anno, anno_names = anno, + choice = c("column", "row"), + anno_colors = list(), + anno_args = list()) { + # Check input + choice <- match.arg(choice) + + # Data frame of annotation values + anno_df <- data.frame(y[, anno]) %>% + setNames(anno) + + # List of annotation colors + anno_col <- lapply(as.list(anno_df), get_anno_colors) %>% + # Replace colors if provided by user + modifyList(val = anno_colors, keep.null = TRUE) %>% + # Set names on colors + map2(.y = as.list(anno_df), .f = set_anno_names) + + + # Arguments passed to HeatmapAnnotation + anno_args <- list(df = anno_df, + col = anno_col, + annotation_label = anno_names, + border = FALSE, + which = choice, + # gp = gpar(col = "black"), + annotation_legend_param = list( + border = TRUE + ) + ) %>% + modifyList(val = anno_args, keep.null = TRUE) # Update arguments + + # Create HeatmapAnnotation object + return(do.call(what = HeatmapAnnotation, args = anno_args)) +} + + +# Save non-ggplot objects ---- +save_plot <- function(filename, height, width) { + # File extension + file_ext <- gsub(".*\\.([a-z])", "\\1", filename) + + # Check file extension + file_ext <- match.arg(file_ext, + choices = c("png", "bmp", "jpg", "tif", "pdf")) + + # Function to save plot + save_func <- switch(file_ext, + "png" = png, + "bmp" = bmp, + "jpg" = jpeg, + "tif" = tiff, + "pdf" = pdf) + + message(sprintf("Saving %g x %g inch heatmap as %s", + height, width, filename)) + + # arguments passed to save_func + save_args <- list(filename = filename, file = filename, + width = width, height = height, + res = 300, dpi = 300, + units = "in", compression = "lzw") %>% + # Subset to relevant arguments + .[names(.) %in% names(formals(save_func))] + + # Begin saving + do.call(what = save_func, args = save_args) +} + diff --git a/R/limma_contrasts.R b/R/limma_contrasts.R index 4b98ed3..cba9b8d 100644 --- a/R/limma_contrasts.R +++ b/R/limma_contrasts.R @@ -1,71 +1,87 @@ #' Test Custom Contrasts with LIMMA #' -#' A convenience wrapper for differential analysis with LIMMA using -#' custom contrasts. +#' A convenience wrapper for differential analysis with LIMMA using custom +#' contrasts. #' #' @param eset \code{eSet} (or most likely \code{eSet} subclass) object -#' @param model.str character; formulation of the model (e.g. -#' \code{"~ 0 + a + b"}). -#' This should be a no-intercept model (it should include 0 or -1 as -#' terms). See \code{\link[stats]{lm}} for more details. +#' @param model.str character; formulation of the model (e.g. \code{"~ 0 + a + +#' b"}). This should be a no-intercept model (it should include 0 or -1 as +#' terms). See \code{\link[stats]{lm}} for more details. #' @param coef.str character; coefficient of interest. One of -#' \code{colnames(pData(eset))}. E.g. \code{"a"}. -#' @param contrasts character; contrasts to test. All factor levels in -#' the contrasts should begin with the name of that factor. It is -#' recommended to use \code{\link{paircomp}} with the -#' \code{name} argument specified to create pairwise contrasts of the form -#' \code{"a-b"}. +#' \code{colnames(pData(eset))}. E.g. \code{"a"}. +#' @param contrasts character; contrasts to test. All factor levels in the +#' contrasts should begin with the name of that factor. It is recommended to +#' use \code{\link{paircomp}} with the \code{name} argument specified to +#' create pairwise contrasts of the form \code{"a-b"}. +#' @param stat character; the moderated test statistic to use. Either \code{"t"} +#' or \code{"F"}. #' @param var.group \code{NULL} or character; the column in \code{pData(eset)} -#' indicating groups that will have different relative quality weights. -#' The default (\code{NULL}), means that each sample will be weighted -#' equally. +#' indicating groups that will have different relative quality weights. The +#' default (\code{NULL}), means that each sample will be weighted equally. #' @param plot logical; whether to generate diagnostic plots. The default -#' (\code{TRUE}) generates a barplot of weights applied to -#' each sample and a plot of the mean-variance trend using -#' \code{\link[limma]{plotSA}}. +#' (\code{TRUE}) generates a barplot of weights applied to each sample and a +#' plot of the mean-variance trend using \code{\link[limma]{plotSA}}. +#' @param col.group character; the column in \code{pData(eset)} used to color +#' the text of the MDS plot. #' @param adjust.method method for p-value adjustment. Default is \code{"BH"} -#' (Benjamini-Hochberg). -#' @param remove_name if \code{TRUE} (the default) \code{coef.str} -#' will be removed from the contrasts. This simply improves readability, but -#' it will not work properly if \code{coef.str} is a substring of any group -#' name. +#' (Benjamini-Hochberg). See \code{\link[stats]{p.adjust}} for details. +#' @param remove_name if \code{TRUE} (the default) \code{coef.str} will be +#' removed from the contrasts. This simply improves readability, but it will +#' not work properly if \code{coef.str} is a substring of any group name. #' @param ... additional arguments passed to \code{\link[limma]{eBayes}}. #' -#' @return -#' \code{data.frame}. Basically the output of \code{\link[limma]{topTable}} -#' with additional columns \code{feature} and \code{contrast}. -#' -#' @details -#' A PCA plot is used to determine the appropriate value of \code{var.group}. -#' If samples within phenotype groups tend to cluster well and have similar -#' dispersions, the default \code{var.group = NULL} is recommended. If samples -#' within phenotype groups tend to cluster well, but different groups are more -#' or less dispersed, it may be beneficial to set \code{var.group} to the name -#' of the phenotype column. If one or more samples tend to cluster poorly with -#' samples of the same phenotype, it may be beneficial to weight by sample. That -#' is, set \code{var.group} to the name of the column in \code{pData(eset)} that -#' uniquely identifies each sample. If variation between samples or groups of -#' samples is biological rather than technical, weighting is not recommended. -#' -#' The plot of the mean-variance trend helps determine whether to fit a trend -#' line to the prior variance (default is \code{trend = FALSE}). It also helps -#' determine if the prior variance should be robustified against outlier sample -#' variances (\code{robust = TRUE}; default is \code{FALSE}). See -#' \code{\link[limma]{eBayes}} for more details. -#' -#' p-values are adjusted across all contrasts (globally). It is recommended to -#' adjust p-values from similar contrasts together. See the limma User's Guide -#' linked below for more information. +#' @return \code{data.frame}. Basically the output of +#' \code{\link[limma]{topTable}} with additional columns \code{feature} and +#' \code{contrast} (if \code{stat = "t"}). All columns from \code{fData} are +#' also included. +#' +#' @details An MDS plot is used to determine the appropriate value of +#' \code{var.group}. If samples within phenotype groups tend to cluster well +#' and have similar dispersions, the default \code{var.group = NULL} is +#' recommended. If samples within phenotype groups tend to cluster well, but +#' different groups are more or less dispersed, it may be beneficial to set +#' \code{var.group} to the name of the phenotype column. If one or more +#' samples tend to cluster poorly with samples of the same phenotype, it may +#' be beneficial to weight by sample. That is, set \code{var.group} to the +#' name of the column in \code{pData(eset)} that uniquely identifies each +#' sample. If variation between samples or groups of samples is biological +#' rather than technical, weighting is not recommended. +#' +#' The plot of the mean-variance trend helps determine whether to fit a trend +#' line to the prior variance (default is \code{trend = FALSE}). It also helps +#' determine if the prior variance should be robustified against outlier +#' sample variances (\code{robust = TRUE}; default is \code{FALSE}). See +#' \code{\link[limma]{eBayes}} for more details. +#' +#' p-values are adjusted across all contrasts (globally). It is recommended to +#' adjust p-values from similar contrasts together. See the "Multiple Testing +#' Across Contrasts" section of the LIMMA User's Guide (linked below) for more +#' information. #' #' @seealso -#' \href{https://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf}{LIMMA User's Guide}, -#' \code{\link{limma_a_b}}, -#' \code{\link{limma_gen}}, -#' \code{\link{plot_pca}} +#' \href{https://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf}{LIMMA +#' User's Guide}, \code{\link{limma_a_b}}, \code{\link{limma_gen}} #' +#' @references Liu, R., Holik, A. Z., Su, S., Jansz, N., Chen, K., Leong, H. S., +#' Blewitt, M. E., Asselin-Labat, M. L., Smyth, G. K., & Ritchie, M. E. +#' (2015). Why weight? Modelling sample and observational level variability +#' improves power in RNA-seq analyses. *Nucleic acids research*, *43*(15), +#' e97. \url{https://doi.org/10.1093/nar/gkv412} +#' +#' Phipson, B., Lee, S., Majewski, I. J., Alexander, W. S., & Smyth, G. K. +#' (2016). ROBUST HYPERPARAMETER ESTIMATION PROTECTS AGAINST HYPERVARIABLE +#' GENES AND IMPROVES POWER TO DETECT DIFFERENTIAL EXPRESSION. *The annals of +#' applied statistics*, *10*(2), 946--963. +#' \url{https://doi.org/10.1214/16-AOAS920} +#' +#' Smyth G. K. (2004). Linear models and empirical bayes methods for assessing +#' differential expression in microarray experiments. *Statistical +#' applications in genetics and molecular biology*, *3*, Article3. +#' \url{https://doi.org/10.2202/1544-6115.1027} +#' +#' @md #' #' @importFrom MSnbase exprs pData -#' @importFrom dplyr mutate select %>% everything #' @importFrom stats terms model.matrix p.adjust #' @importFrom data.table rbindlist #' @importFrom graphics barplot @@ -75,41 +91,50 @@ #' @export limma_contrasts #' #' @examples -#' library(MSnSet.utils) +#' # library(MSnSet.utils) #' data(cptac_oca) #' -#' # A no-intercept model is required +#' # A no-intercept model is required. Testing SUBTYPE contrasts +#' # with AGE as covariate #' model.str <- "~ 0 + SUBTYPE + AGE" #' coef.str <- "SUBTYPE" #' #' # Create contrasts #' contrasts <- paircomp(pData(oca.set)[, coef.str], name = coef.str) #' -#' # Basic specification -#' res1 <- limma_contrasts(oca.set, -#' model.str = model.str, +#' # Moderated t-statistics +#' res1 <- limma_contrasts(oca.set, model.str = model.str, #' coef.str = coef.str, #' contrasts = contrasts) #' -#' # Show diagnostic plots and robustify mean-variance trend. -#' res2 <- limma_contrasts(oca.set, model.str = model.str, coef.str = coef.str, -#' contrasts = contrasts, plot = TRUE, +#' # Moderated F-statistics +#' res2 <- limma_contrasts(oca.set, model.str = model.str, +#' coef.str = coef.str, +#' contrasts = contrasts, stat = "F") +#' +#' # Count number of significant (< 0.05) features +#' sum(res2$adj.P.Val < 0.05) +#' +#' # Robustify mean-variance trend (based on MDS plot) +#' res3 <- limma_contrasts(oca.set, model.str = model.str, +#' coef.str = coef.str, +#' contrasts = contrasts, #' trend = TRUE, robust = TRUE) #' #' # Count number of significant (< 0.05) features in each contrast -#' library(dplyr) -#' filter(res2, adj.P.Val < 0.05) %>% -#' group_by(contrast) %>% -#' tally() +#' table(res3$contrast, res3$adj.P.Val < 0.05) #' limma_contrasts <- function(eset, model.str, coef.str, contrasts, - var.group = NULL, plot = TRUE, - # label.by = NULL, color.by = NULL, + stat = c("t", "F"), var.group = character(0), + plot = FALSE, col.group = coef.str, adjust.method = "BH", remove_name = TRUE, ...) { + # test statistic + stat <- match.arg(arg = stat, choices = c("t", "F")) + model.formula <- eval(parse(text = model.str), envir = pData(eset)) if (attr(terms(model.formula), which = "intercept") != 0) { @@ -128,7 +153,7 @@ limma_contrasts <- function(eset, model.str, coef.str, contrasts, # If var.group is specified, determine group or sample-level # quality weights. Otherwise, set all weights to 1. - if (!is.null(var.group)) { + if (!identical(var.group, character(0))) { weights <- arrayWeights(eset, design, var.group = pData(eset)[, var.group]) } else { @@ -142,45 +167,56 @@ limma_contrasts <- function(eset, model.str, coef.str, contrasts, fit.smooth <- eBayes(fit.contr, ...) ## Diagnostic plots - if (plot) { + if (plot) limma_diagnostics(eset, col.group, coef.str, weights, fit.smooth) + + # topTable for each contrast or for all contrasts + if (stat == "t") { coef <- contrasts } else { coef <- list(contrasts) } + res <- lapply( + coef, + function(contrast_i) { + x <- topTable(fit.smooth, coef = contrast_i, number = nrow(eset)) + x$feature <- rownames(x) # add feature column + # Add contrast column + if (stat == "t") { + x$contrast <- contrast_i + } + x + }) + res <- rbindlist(res) + + # Adjust p-values across all contrasts (global adjustment) + res$adj.P.Val <- p.adjust(res$P.Value, method = adjust.method) - # # Sample PCA plot - # plot_pca(eset, phenotype = color.by, label = label.by) + # Remove coef.str from the contrasts + if (remove_name & stat == "t") { + res$contrast <- gsub(coef.str, "", res$contrast) + } - # Barplot of sample weights - barplot(weights, space = 0, - xlab = "Sample", ylab = "Weight", - main = "Sample-specific weights") - abline(1, 0, lty = "longdash", col = "red") + return(res) +} - # Plot of the mean-variance trend - plotSA(fit.smooth, cex = 1, - main = "Mean-variance trend") - } +## Helper functions ---- - # topTable for each contrast - res <- lapply(contrasts, - function(contrast_i) { - topTable(fit.smooth, - coef = contrast_i, - number = nrow(eset)) %>% - mutate(feature = rownames(.), - contrast = contrast_i) - }) %>% - rbindlist() %>% - # Adjust p-values across all contrasts (global adjustment) - mutate(adj.P.Val = p.adjust(P.Value, method = adjust.method)) %>% - # Reorder columns - select(feature, contrast, everything()) +# diagnostic plots +limma_diagnostics <- function(eset, col.group, coef.str, weights, fit.smooth) { + # MDS plot + col.group <- as.factor(pData(eset)[, col.group]) + levels(col.group) <- jet2.colors(nlevels(col.group)) + col.group <- as.character(col.group) + plotMDS(exprs(eset), col = col.group, + labels = pData(eset)[, coef.str, drop = TRUE], + main = "Multidimensional Scaling Plot") - # Remove coef.str from the contrasts - if (remove_name) { - res$contrast <- gsub(coef.str, "", res$contrast) - } + # Barplot of sample weights + barplot(weights, space = 0, + xlab = "Sample", ylab = "Weight", + main = "Sample-specific weights") + abline(1, 0, lty = "longdash", col = "red") - return(res) + # Plot of the mean-variance trend + plotSA(fit.smooth, cex = 1, main = "Mean-variance trend") } -utils::globalVariables(c(".", "feature", "contrast", "P.Value")) + diff --git a/R/plot_pca.R b/R/plot_pca.R index af413c1..ea1e281 100644 --- a/R/plot_pca.R +++ b/R/plot_pca.R @@ -1,44 +1,44 @@ #' PCA Plot #' -#' A convenience function for plotting PCA scatter plot -#' for samples in ExpressionSet/MSnSet object. The biplot is essentially the -#' ggplot version of \code{\link[stats]{biplot.prcomp}}. +#' A convenience function for plotting PCA scatter plot for samples in +#' ExpressionSet/MSnSet object. The biplot is essentially the ggplot version of +#' \code{\link[stats]{biplot.prcomp}}. #' #' @param eset eset (or most likely eset subclass) object. #' @param phenotype \code{NULL} or string; one of \code{colnames(pData(eset))}. -#' This is used to color the points or labels, if \code{label} is not -#' \code{NULL}. Default is \code{NULL}, which colors all points or -#' labels black. -#' @param label \code{NULL} or string; one of \code{colnames(pData(eset))}. -#' If a string is provided, labels will be used instead of points. +#' This is used to color the points or labels, if \code{label} is not +#' \code{NULL}. Default is \code{NULL}, which colors all points or labels +#' black. +#' @param label \code{NULL} or string; one of \code{colnames(pData(eset))}. If a +#' string is provided, labels will be used instead of points. #' @param z_score logical; whether to convert values to Z-Scores by sample. -#' Default is \code{TRUE}. +#' Default is \code{TRUE}. #' @param standardize logical; if \code{TRUE} (default), the feature loadings -#' and scores are scaled in opposite directions by the standard deviations -#' of the principal components. This will produce a biplot similar to -#' \code{\link[stats]{biplot.prcomp}}. If \code{FALSE}, the result will -#' be similar to \code{\link[stats]{biplot.default}}. +#' and scores are scaled in opposite directions by the standard deviations of +#' the principal components. This will produce a biplot similar to +#' \code{\link[stats]{biplot.prcomp}}. If \code{FALSE}, the result will be +#' similar to \code{\link[stats]{biplot.default}}. #' @param show_ellipse logical; whether to show the confidence ellipses if -#' \code{phenotype} is not \code{NULL}. +#' \code{phenotype} is not \code{NULL}. #' @param components numeric; a vector of length two specifying the principal -#' components to plot. Default is \code{c(1, 2)}, which plots PC1 on the -#' x-axis and PC2 on the y-axis. Order matters. +#' components to plot. Default is \code{c(1, 2)}, which plots PC1 on the +#' x-axis and PC2 on the y-axis. Order matters. #' @param biplot logical; whether to display the biplot. #' @param biplot_labels \code{NULL} or string; the name of a column in -#' \code{fData(eset)} used to label the biplot features. If \code{NULL} -#' (default), \code{featureNames(eset)} is used. +#' \code{fData(eset)} used to label the biplot features. If \code{NULL} +#' (default), \code{featureNames(eset)} is used. #' @param num_features numeric; the number of most influential features from -#' each principal component to label. Default is \code{6}. -#' @param show_NA logical; whether to include samples with missing -#' phenotype information. Default is \code{TRUE}. +#' each principal component to label. Default is \code{6}. +#' @param show_NA logical; whether to include samples with missing phenotype +#' information. Default is \code{TRUE}. #' @param legend_title string; title of the plot legend. Defaults to -#' \code{phenodata}. +#' \code{phenodata}. #' @param arrow_args a list of arguments passed to -#' \code{\link[ggplot2]{geom_segment}} to modify the biplot arrows. +#' \code{\link[ggplot2]{geom_segment}} to modify the biplot arrows. #' @param label_args a list of arguments passed to -#' \code{\link[ggrepel]{geom_label_repel}} to modify the biplot labels. +#' \code{\link[ggrepel]{geom_label_repel}} to modify the biplot labels. #' @param ... additional arguments passed to \code{\link[ggplot2]{geom_point}} -#' or \code{\link[ggplot2]{geom_text}}, such as \code{size} and \code{pch}. +#' or \code{\link[ggplot2]{geom_text}}, such as \code{size} and \code{pch}. #' #' @return A ggplot object #' @@ -47,6 +47,7 @@ #' @importFrom Biobase exprs pData fData #' @importFrom ggrepel geom_label_repel #' @importFrom stats complete.cases prcomp +#' @importFrom utils modifyList #' #' @export plot_pca #' @@ -161,7 +162,7 @@ plot_pca <- function(eset, phenotype = NULL, label = NULL, z_score = TRUE, ## Visualization # Base plot - p <- ggplot(df.u, aes(x = df.u[, 1], y = df.u[, 2], color = colorBy)) + + p <- ggplot(data = df.u, mapping = aes(x = df.u[, 1], y = df.u[, 2], color = colorBy)) + geom_hline(yintercept = 0, lty = "longdash", color = "darkgrey") + geom_vline(xintercept = 0, lty = "longdash", color = "darkgrey") + labs(x = axis_labs[1], y = axis_labs[2]) + @@ -172,7 +173,7 @@ plot_pca <- function(eset, phenotype = NULL, label = NULL, z_score = TRUE, # beneath the layer of points or labels. if (show_ellipse & !is.numeric(colorBy)) { p <- p + - stat_ellipse(aes(fill = colorBy, color = NULL), + stat_ellipse(mapping = aes(fill = colorBy, color = NULL), geom = "polygon", type = "norm", level = 0.5, alpha = 0.1, show.legend = TRUE) } @@ -184,7 +185,7 @@ plot_pca <- function(eset, phenotype = NULL, label = NULL, z_score = TRUE, } else { labels <- pData(eset)[, label] p <- p + - geom_text(aes(label = labels), ...) + geom_text(mapping = aes(label = labels), ...) } # Set titles for color and fill legend @@ -224,20 +225,21 @@ plot_pca <- function(eset, phenotype = NULL, label = NULL, z_score = TRUE, sec.axis = sec_axis(~ . * ratio)) # Arguments for geom_segment - arrow_args <- list(aes(x = x, y = y, xend = xend, yend = yend), + arrow_args <- list(mapping = aes(x = x, y = y, xend = xend, yend = yend), arrow = arrow(length = unit(0.5, "line")), data = df.v, color = "red3") %>% # Allow user-supplied args to overwrite defaults - {c(.[!(names(.) %in% names(arrow_args))], arrow_args)} + modifyList(val = arrow_args, keep.null = TRUE) # Arguments for geom_label_repel label_args <- list(mapping = aes(x = xend, y = yend, label = labels), data = df.v, + color = arrow_args[["color"]], max.overlaps = Inf, min.segment.length = 0, fill = alpha("white", 0.5)) %>% # Allow user-supplied args to overwrite defaults - {c(.[!(names(.) %in% names(label_args))], label_args)} + modifyList(val = label_args, keep.null = TRUE) # Add segments with arrows and text labels p <- p + diff --git a/R/plot_sample_correlation_heatmap.R b/R/plot_sample_correlation_heatmap.R index 9d3e8fe..5ccf69b 100644 --- a/R/plot_sample_correlation_heatmap.R +++ b/R/plot_sample_correlation_heatmap.R @@ -1,55 +1,55 @@ -#' Heatmap for sample correlations -#' -#' Wrapper functions to generate correlation matrix plots -#' for MSnSet objects. There is a parameter for the phenotype to -#' order the columns (samples) by. -#' -#' @param m (MSnSet) -#' @param phenotype (character) name of the column to order by (usually plex) -#' -#' @param ... additional params to the heatmap call -#' -#' @return (list) plot object -#' -#' @importFrom MSnbase pData exprs -#' @importFrom dplyr %>% select -#' @importFrom pheatmap pheatmap -#' @importFrom stats cor heatmap -#' -#' @examples -#' -#' library(MSnSet.utils) -#' data(cptac_oca) -#' -#' m <- oca.set -#' -#' # Using base heatmap -#' plot_sample_correlation_heatmap(m, "Batch") -#' -#' # Using `pheatmap` (pretty heatmap) -#' plot_sample_correlation_pheatmap(m, "Batch") - -#' @export -#' @describeIn plot_sample_correlation_heatmap Using base heatmap -plot_sample_correlation_heatmap <- function(m, phenotype, ...) { - m <- m[,order(pData(m)[,phenotype])] - x <- cor(exprs(m), use = "complete.obs") - x <- x[nrow(x):1, ] - heatmap(x, Rowv = NA, Colv = NA, - scale = "none", symm = TRUE, - ...) -} - - -#' @export -#' @describeIn plot_sample_correlation_heatmap Using the \code{pheatmap} package -plot_sample_correlation_pheatmap <- function(m, phenotype, ...) { - m <- m[,order(pData(m)[,phenotype])] - x <- cor(exprs(m), use = "complete.obs") - pheno = pData(m) %>% - select(!!phenotype) - pheatmap(x, - cluster_rows = FALSE, cluster_cols = FALSE, - annotation_col = pheno, annotation_row = pheno, - ...) -} +#' #' Heatmap for sample correlations +#' #' +#' #' Wrapper functions to generate correlation matrix plots +#' #' for MSnSet objects. There is a parameter for the phenotype to +#' #' order the columns (samples) by. +#' #' +#' #' @param m (MSnSet) +#' #' @param phenotype (character) name of the column to order by (usually plex) +#' #' +#' #' @param ... additional params to the heatmap call +#' #' +#' #' @return (list) plot object +#' #' +#' #' @importFrom MSnbase pData exprs +#' #' @importFrom dplyr %>% select +#' #' @importFrom pheatmap pheatmap +#' #' @importFrom stats cor heatmap +#' #' +#' #' @examples +#' #' +#' #' library(MSnSet.utils) +#' #' data(cptac_oca) +#' #' +#' #' m <- oca.set +#' #' +#' #' # Using base heatmap +#' #' plot_sample_correlation_heatmap(m, "Batch") +#' #' +#' #' # Using `pheatmap` (pretty heatmap) +#' #' plot_sample_correlation_pheatmap(m, "Batch") +#' +#' #' @export +#' #' @describeIn plot_sample_correlation_heatmap Using base heatmap +#' plot_sample_correlation_heatmap <- function(m, phenotype, ...) { +#' m <- m[,order(pData(m)[,phenotype])] +#' x <- cor(exprs(m), use = "complete.obs") +#' x <- x[nrow(x):1, ] +#' heatmap(x, Rowv = NA, Colv = NA, +#' scale = "none", symm = TRUE, +#' ...) +#' } +#' +#' +#' #' @export +#' #' @describeIn plot_sample_correlation_heatmap Using the \code{pheatmap} package +#' plot_sample_correlation_pheatmap <- function(m, phenotype, ...) { +#' m <- m[,order(pData(m)[,phenotype])] +#' x <- cor(exprs(m), use = "complete.obs") +#' pheno = pData(m) %>% +#' select(!!phenotype) +#' pheatmap(x, +#' cluster_rows = FALSE, cluster_cols = FALSE, +#' annotation_col = pheno, annotation_row = pheno, +#' ...) +#' } diff --git a/R/plot_upset.R b/R/plot_upset.R new file mode 100644 index 0000000..bbb173a --- /dev/null +++ b/R/plot_upset.R @@ -0,0 +1,248 @@ +#' @title Labeled UpSet Plot +#' +#' @description A wrapper around functions from +#' \code{\link[ComplexHeatmap]{ComplexHeatmap-package}}. Creates an +#' \href{https://doi.org/10.1109/TVCG.2014.2346248}{UpSet plot} with labeled +#' bars to visualize relationships between sets. UpSet plots are preferred to +#' Venn diagrams, especially when the number of intersections is large. +#' +#' @param ... Input sets (a list is recommended). See +#' \code{\link[ComplexHeatmap]{make_comb_mat}} for details. +#' @param mode The mode for forming the combination set, see Mode section of +#' \code{\link[ComplexHeatmap]{make_comb_mat}}. +#' @param top_n_comb Number of largest intersections to display. Defaults to all +#' intersections. +#' @param scale_set_bars logical; whether to scale set size bars so that their +#' lengths are proportional to the lengths of the intersection size bars. +#' @param row_labels vector of labels for the rows. Defaults to the names of the +#' input sets. +#' @param row_names_gp graphical parameters for row labels specified by +#' \code{\link[grid]{gpar}}. +#' @param annotation_name_gp graphical parameters for barplot titles specified +#' by \code{\link[grid]{gpar}}. +#' @param heatmap_args list of additional arguments passed to +#' \code{\link[ComplexHeatmap]{Heatmap}}. +#' @param cell_dim \code{\link[grid]{unit}} object used for both the height and +#' width of each heatmap cell. +#' @param extend numeric; multiply y-axis upper limit by (1 + `extend`). +#' Increase to prevent clipping of barplot labels and overall title. +#' @param rot numeric \[-360, +360\]; angle to rotate column barplot +#' labels. +#' @param vjust numeric \[0, 1\]; vertical justification of column barplot +#' labels. +#' @param hjust numeric \[0, 1\]; horizontal justification of column barplot +#' labels. +#' @param bar_label_gp graphical parameters for barplot labels specified by +#' \code{\link[grid]{gpar}}. +#' @param filename character; name of the file to save the plot. Must end in +#' ".png", ".bmp", ".jpg", ".tif", or ".pdf". If not provided, the UpSet plot +#' will be displayed instead of saved. +#' @param height numeric; height of the entire plot in inches. +#' @param width numeric; width of the entire plot in inches. +#' +#' @return An object of class \code{\link[ComplexHeatmap]{HeatmapList-class}} or +#' nothing (save to file instead). +#' +#' @note \code{\link[base]{split}} is useful for creating input lists. +#' +#' @seealso +#' \href{https://jokergoo.github.io/ComplexHeatmap-reference/book/upset-plot.html}{ComplexHeatmap +#' Complete Reference: UpSet Plot} +#' +#' @references Gu, Z., Eils, R., & Schlesner, M. (2016). Complex heatmaps reveal +#' patterns and correlations in multidimensional genomic data. +#' *Bioinformatics*, *32*(18), 2847-2849. +#' \url{https://doi.org/10.1093/bioinformatics/btw313} +#' +#' Lex, A., Gehlenborg, N., Strobelt, H., Vuillemot, R., & Pfister, H. (2014). +#' UpSet: Visualization of Intersecting Sets. *IEEE transactions on +#' visualization and computer graphics*, *20*(12), 1983-1992. +#' \url{https://doi.org/10.1109/TVCG.2014.2346248} +#' +#' @md +#' +#' @import ComplexHeatmap +#' @import grid +#' @importFrom grDevices bmp jpeg png tiff pdf dev.off +#' @importFrom utils modifyList +#' +#' @export plot_upset +#' +#' @examples +#' # Input list +#' set.seed(99) +#' x <- list(A = sample(letters, 5), +#' B = sample(letters, 10), +#' C = sample(letters, 15), +#' D = sample(letters, 20)) +#' +#' # Base plot +#' plot_upset(x) +#' +#' # Change label rotation and center horizontally +#' plot_upset(x, rot = 0, hjust = 0.5) +#' +#' # Top 2 largest intersections +#' plot_upset(x, top_n_comb = 2) +#' +#' # Add overall title +#' plot_upset(x, heatmap_args = list(column_title = "Title")) +#' +#' \dontrun{ +#' # Save to 3.5" by 7" PDF +#' plot_upset(x, filename = "upset_plot.pdf") +#' } +#' + + +plot_upset <- function(..., + mode = c("distinct", "intersect", "union"), + top_n_comb = Inf, + scale_set_bars = FALSE, + row_labels = character(0), + row_names_gp = gpar(fontsize = 10), + annotation_name_gp = gpar(fontsize = 10, + fontface = "bold"), + heatmap_args = list(), + cell_dim = unit(10, "mm"), + extend = 0.2, rot = 45, hjust = 0, vjust = 0, + bar_label_gp = gpar(fontsize = 10), + filename = character(0), height = 3.5, width = 7) { + + # Make combination matrix and calculate size of intersections and sets + m <- make_comb_mat(..., mode = mode) + cs <- comb_size(m) # size of combinations + ss <- set_size(m) # size of full sets + + # Subset m to top_n_sets + m <- m[, order(-cs)[1:min(top_n_comb, length(cs))]] + cs <- comb_size(m) # recalculate cs (do not change ss) + + # Ratio of max set size to max combination size or 1 + bar_ratio <- ifelse(scale_set_bars, max(ss) / max(cs), 1) + + # NOTE: Eventually may need to add comb_order and set_order options + row_order <- 1:length(ss) + column_order <- order(-cs) + + # Default row labels + if (identical(row_labels, character(0))) { + row_labels <- rownames(m) + } + + # Arguments passed to ComplexHeatmap::Heatmap -------------------------------- + + # NOTE: + # I had to use Heatmap instead of UpSet because do.call failed when I + # tried to modify the column barplots, despite me being able to modify the + # row barplots without issue. Both row and column annotations are + # S4 objects, but it only complained about the column annotation. + # The layer_fun code is taken from UpSet. + + heatmap_args <- modifyList( + x = list( + matrix = m, + row_labels = row_labels, + row_names_gp = row_names_gp, + cluster_rows = FALSE, cluster_columns = FALSE, + row_names_side = "left", + row_names_max_width = max_text_width(row_labels), # do not trim row names + show_heatmap_legend = FALSE, + row_order = row_order, + column_order = column_order, + height = length(ss) * cell_dim, + width = length(cs) * cell_dim, + rect_gp = gpar(type = "none"), + layer_fun = function(j, i, x, y, w, h, fill) { + n_row <- round(1 / as.numeric(h[1])) # num rows + n_col <- round(1 / as.numeric(w[1])) # num columns + subm <- matrix(pindex(m, i, j), nrow = n_row, byrow = FALSE) + for (k in seq_len(n_row)) { + # Even rows are grey, odd are white (no color) + if (k %% 2) { + grid.rect(y = k / n_row, height = 1 / n_row, just = "top", + gp = gpar(fill = "#f0f0f0", col = NA)) + } + } + # Black or light grey points + grid.points(x, y, size = unit(min(3, as.numeric(cell_dim)), "mm"), + pch = 16, + gp = gpar(col = ifelse(pindex(m, i, j), "black", "#cccccc"))) + # For each column k, find indices of entries that are 1, and draw a + # segment from the min to the max + for (k in seq_len(n_col)) { + # if (sum(subm[, k]) >= 2) { # Intersection of 2 or more groups + i_range <- range(which(subm[, k] == 1)) # segment start and end + grid.lines(c(k - 0.5, k - 0.5) / n_col, # center on column + (n_row - i_range + 0.5) / n_row, # center on row + gp = gpar(col = "black", lwd = 2)) + # } + } + }, + top_annotation = HeatmapAnnotation( + "Intersection Size" = anno_barplot( + x = cs, + extend = extend, + border = FALSE, + gp = gpar(fill = "black"), + height = unit(1.2, "in"), + which = "column", + axis = FALSE), + # annotation_label changes the axis title + annotation_name_gp = annotation_name_gp, + annotation_name_side = "left", + annotation_name_rot = 90), + right_annotation = HeatmapAnnotation( + "Set Size" = anno_barplot( + x = ss, + extend = extend, + border = FALSE, + gp = gpar(fill = "black"), + width = unit(1.2, "in") * bar_ratio, + which = "row", + axis = FALSE), + which = "row", + annotation_name_gp = annotation_name_gp, + annotation_name_side = "bottom", + annotation_name_rot = 0 + )), + val = heatmap_args, keep.null = TRUE) # modify args + + ## Save to file if filename is provided -------------------------------------- + if (!identical(filename, character(0))) { + save_plot(filename = filename, height = height, width = width) + # After plot is created, run dev.off + on.exit(expr = dev.off()) + } + + # UpSet plot ----------------------------------------------------------------- + ht <- do.call(what = Heatmap, args = heatmap_args) + draw(ht) + + # Update row and column order if changed by user + column_order <- heatmap_args$column_order + row_order <- rev(heatmap_args$row_order) # reverse for labels + + # Add Intersection Size labels + decorate_annotation(annotation = "Intersection Size", { + grid.text(cs[column_order], + x = seq_along(cs), + y = unit(cs[column_order], "native") + unit(3, "pt"), + vjust = vjust, hjust = hjust, rot = rot, + default.units = "native", #just = c("left", "top"), + gp = bar_label_gp) + }) + + # Add Set Size labels + decorate_annotation(annotation = "Set Size", { + grid.text(label = ss[row_order], + x = unit(ss[row_order], "native") + unit(3, "pt"), + y = seq(1 / (length(ss) * 2), 1 - 1 / (length(ss) * 2), + length.out = length(ss)), + hjust = 0, + gp = bar_label_gp) + }) +} + + + diff --git a/R/plot_volcano.R b/R/plot_volcano.R index 3fda6c4..7edd71c 100644 --- a/R/plot_volcano.R +++ b/R/plot_volcano.R @@ -2,41 +2,38 @@ #' #' @description A convenience function for creating a volcano plot. #' -#' #' @param df \code{data.frame} or object that can be coerced to a -#' \code{data.frame} +#' \code{data.frame} #' @param logFC character; the name of the column in \code{df} containing log2 -#' fold-changes. +#' fold-changes. #' @param pvals character; the name of the column in \code{df} containing -#' p-values (adjusted or not). -#' @param sig_threshold \code{NULL} (default) or numeric between 0 and 1; -#' the threshold indicating statistical significance. Creates a dashed -#' horizontal line at that value. Any points above this line in the graph -#' are statistically significant. +#' p-values (adjusted or not). +#' @param sig_threshold \code{NULL} (default) or numeric between 0 and 1; the +#' threshold indicating statistical significance. Creates a dashed horizontal +#' line at that value. Any points above this line in the graph are +#' statistically significant. #' @param label \code{NULL} or character; the name of the column in \code{df} -#' used to label the top most significant features. See details for more. +#' used to label the top most significant features. See details for more. #' @param num_features numeric; the number of most significant features to -#' label. Default is 8. +#' label. Default is 8. #' @param point_args a list of arguments passed to -#' \code{\link[ggplot2]{geom_point}}. +#' \code{\link[ggplot2]{geom_point}}. #' @param label_args a list of arguments passed to -#' \code{\link[ggrepel]{geom_label_repel}}. +#' \code{\link[ggrepel]{geom_label_repel}}. #' #' -#' @details -#' \code{sig_threshold} will create a secondary axis if the threshold is not -#' one of the existing y-axis breaks. +#' @details \code{sig_threshold} will create a secondary axis if the threshold +#' is not one of the existing y-axis breaks. #' -#' To label specific features (not top \code{num_features}), \code{label} -#' needs to be the name of a column where all but the features that will be -#' labeled are \code{NA}. Also, set \code{num_features} to \code{nrow(df)}. +#' To label specific features (not top \code{num_features}), \code{label} +#' needs to be the name of a column where all but the features that will be +#' labeled are \code{NA}. Also, set \code{num_features} to \code{nrow(df)}. #' #' -#' @return -#' A ggplot object. +#' @return A ggplot object. #' #' @examples -#' library(ggplot2) # for labs +#' library(ggplot2) # additional plot modifications #' library(MSnSet.utils) #' data("cptac_oca") #' @@ -69,11 +66,13 @@ #' #' @importFrom dplyr %>% rename sym arrange select mutate slice_min left_join #' @importFrom ggplot2 ggplot aes labs theme_bw expansion sec_axis -#' scale_y_continuous geom_hline +#' scale_y_continuous geom_hline #' @importFrom ggrepel geom_label_repel #' @importFrom scales pretty_breaks alpha trans_new log_breaks +#' @importFrom utils modifyList #' #' @export plot_volcano +#' plot_volcano <- function(df, logFC, pvals, sig_threshold = NULL, @@ -122,8 +121,8 @@ plot_volcano <- function(df, logFC, pvals, sig_threshold = NULL, # transform, and round to 1 significant digit. Use curly # braces to prevent piping to first argument. breaks <- min(df$pvals, na.rm = TRUE) %>% - ifelse(. > 1e-3, 1e-3, .) %>% - {signif(10 ^ (pretty_breaks()(0 : floor(log10(.) * 2.16)) / 2), 1)} + # ifelse(. > 1e-3, 1e-3, .) %>% # set base range + {signif(10^(pretty_breaks()(0:floor(log10(.) * 2.16)) / 2), 1)} # y-axis labels labels <- ifelse(breaks > 1e-3, @@ -133,7 +132,7 @@ plot_volcano <- function(df, logFC, pvals, sig_threshold = NULL, # Arguments for geom_point point_args <- list(na.rm = TRUE) %>% # Allow user-supplied args to overwrite defaults - {c(.[!(names(.) %in% names(point_args))], point_args)} + modifyList(val = point_args, keep.null = TRUE) # Base plot p <- ggplot(data = df, mapping = aes(x = logFC, y = pvals)) + @@ -175,8 +174,7 @@ plot_volcano <- function(df, logFC, pvals, sig_threshold = NULL, } # Modify y axis - p <- p + - do.call(what = scale_y_continuous, args = scale_args) + + p <- p + do.call(what = scale_y_continuous, args = scale_args) + # Dashed line indicating significance cutoff. # If NULL, no line is plotted. geom_hline(yintercept = sig_threshold, @@ -187,7 +185,7 @@ plot_volcano <- function(df, logFC, pvals, sig_threshold = NULL, # Select top most significant rows to label label_df <- df %>% - select(pvals, !!sym(label)) %>% + # select(pvals, !!sym(label)) %>% mutate(feature_labels = !!sym(label)) %>% slice_min(order_by = pvals, n = num_features) @@ -204,11 +202,10 @@ plot_volcano <- function(df, logFC, pvals, sig_threshold = NULL, na.rm = TRUE ) %>% # Allow user-supplied args to overwrite defaults - {c(.[!(names(.) %in% names(label_args))], label_args)} + modifyList(val = label_args, keep.null = TRUE) # Add labels - p <- p + - do.call(what = geom_label_repel, args = label_args) + p <- p + do.call(what = geom_label_repel, args = label_args) } return(p) diff --git a/man/complex_heatmap.Rd b/man/complex_heatmap.Rd new file mode 100644 index 0000000..9b2c234 --- /dev/null +++ b/man/complex_heatmap.Rd @@ -0,0 +1,182 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/complex_heatmap.R +\name{complex_heatmap} +\alias{complex_heatmap} +\title{Annotated Expression or Correlation Heatmaps} +\usage{ +complex_heatmap( + eset, + clustering_distance = c("euclidean", "maximum", "manhattan", "canberra", "binary", + "minkowski", "pearson", "spearman", "kendall"), + clustering_method = c("ward.D", "ward.D2", "single", "average", "complete", + "mcquitty", "median", "centroid"), + heatmap_type = "expression", + cor_method = c("pearson", "kendall", "spearman"), + cluster_columns = TRUE, + cluster_rows = TRUE, + show_column_dendrogram = TRUE, + show_row_dendrogram = TRUE, + show_column_names = FALSE, + show_row_names = FALSE, + heatmap_title = character(0), + heatmap_legend_title = NULL, + color_range = NULL, + heatmap_args = list(), + anno_column = NULL, + anno_row = NULL, + anno_column_titles = anno_column, + anno_row_titles = anno_row, + anno_column_colors = list(), + anno_row_colors = list(), + anno_args = list(), + filename = NULL, + height = 5, + width = 5, + draw_args = list() +) +} +\arguments{ +\item{eset}{an object of class \code{\link[Biobase]{eSet}} or +\code{\link[MSnbase]{MSnSet-class}}.} + +\item{clustering_distance}{character; the distance measure used to cluster +rows and columns. Passed to \code{\link[stats]{dist}}. One of +(\code{"euclidean"}, \code{"maximum"}, \code{"manhattan"}, +\code{"canberra"}, \code{"binary"}, \code{"minkowski"}, \code{"pearson"}, +\code{"spearman"}, or \code{"kendall"}). Default is \code{"euclidean"}.} + +\item{clustering_method}{character; the agglomeration method used to cluster +rows and columns. Passed to \code{\link[stats]{hclust}}. One of +(\code{"ward.D"}, \code{"ward.D2"}, \code{"single"}, \code{"average"}, +\code{"complete"}, \code{"mcquitty"}, \code{"median"}, or +\code{"centroid"}). Default is \code{"ward.D"}.} + +\item{heatmap_type}{character; the type of heatmap to generate. Must be (an +abbreviation of) either \code{"expression"}, \code{"sample_correlation"}, +or \code{"feature_correlation"}. The default (\code{"expression"}) +generates a heatmap with features as rows and samples as columns. The other +options calculate the sample or feature correlation matrix and generate a +heatmap with samples or features as both rows and columns.} + +\item{cor_method}{character; the method used to generate the correlation +matrix if \code{heatmap_type} is a correlation heatmap. One of +(\code{"pearson"}, \code{"kendall"}, or \code{"spearman"}). Defaults to +\code{"pearson"}. Passed to \code{\link[stats]{cor}}.} + +\item{cluster_columns}{logical; whether to cluster the columns.} + +\item{cluster_rows}{logical; whether to cluster the rows.} + +\item{show_column_dendrogram}{logical; whether to show the column dendrogram. +Does not affect clustering.} + +\item{show_row_dendrogram}{similar to \code{show_column_dendrogram}, but for +rows.} + +\item{show_column_names}{logical; whether to show the column names.} + +\item{show_row_names}{logical; whether to show the row names.} + +\item{heatmap_title}{character; overall title for the heatmap.} + +\item{heatmap_legend_title}{character; title of the heatmap color legend.} + +\item{color_range}{numeric; vector of length 2 used to restrict the colors of +the heatmap body. Useful when color differences are not easily discernible +due to outliers.} + +\item{heatmap_args}{list of arguments passed to +\code{\link[ComplexHeatmap]{Heatmap}}.} + +\item{anno_column}{character; one or more column names of \code{pData(eset)} +used to annotate the columns of the heatmap. By default, columns are not +annotated.} + +\item{anno_row}{character; one or more column names of \code{fData(eset)} +used to annotate the rows of the heatmap. By default, rows are not +annotated.} + +\item{anno_column_titles}{character; names for the column annotations, if +different from \code{anno_column}. Must be the same length as +\code{anno_column}.} + +\item{anno_row_titles}{similar to \code{anno_column_titles}, but for row +annotations.} + +\item{anno_column_colors}{list of custom colors for column annotations +specified by \code{anno_column}. List names must match one or more names in +\code{anno_column}. If modifying the colors of a continuous column +annotation, \strong{always use \code{\link[circlize]{colorRamp2}}} with desired +breaks and colors. Otherwise, pass a vector of unique colors (order +matters).} + +\item{anno_row_colors}{same as \code{anno_column_colors}, but for +\code{anno_row}.} + +\item{anno_args}{list of arguments passed to +\code{\link[ComplexHeatmap]{HeatmapAnnotation}}.} + +\item{filename}{character; file name used to save the heatmap. Must end in +(".png", ".bmp", ".jpg", ".tif", or ".pdf").} + +\item{height}{numeric; height of heatmap in inches. Default is \code{5}.} + +\item{width}{numeric; width of heatmap in inches. Default is \code{5}.} + +\item{draw_args}{list of arguments passed to +\code{\link[ComplexHeatmap]{draw-HeatmapList-method}}. Unlikely to be used +often.} +} +\value{ +An object of class \code{\link[ComplexHeatmap]{HeatmapList-class}} or +nothing (save to file instead). +} +\description{ +A wrapper around functions from +\code{\link[ComplexHeatmap]{ComplexHeatmap-package}}. Creates expression or +correlation heatmaps from \code{eSet} or \code{MSnSet} objects. +} +\note{ +If the \code{hclust} error \verb{NA/NaN/Inf in foreign function call} occurs, it +means there are cases where two or more features are not present in the +same samples, so their distances will be \code{NA}, and it is impossible to +perform hierarchical clustering. If this happens, filter to features +present in more than 50\% of samples with \code{m[rowMeans(!is.na(m)) > 0.5, ]}, +where \code{m} is the \code{MSnSet}. +} +\examples{ +# library(MSnSet.utils) +data(longitudinal_biomarker_study) # MSnSet +ee <- longitudinal_biomarker_study +# Missingness filter - clustering may fail otherwise +ee <- ee[rowMeans(!is.na(exprs(ee))) >= 0.5, ] + +# Expression heatmap +complex_heatmap(ee) + +# Limit color range to see differences more easily +complex_heatmap(ee, color_range = c(-2, 2)) + +# Sample correlation heatmap +complex_heatmap(ee, heatmap_type = "s") + +# Annotate columns by "Type" (categorical) and "Age" (continuous) +# Annotate rows by "isSpike" (logical) +complex_heatmap(ee, anno_column = c("Type", "Age"), + anno_row = "isSpike") + +} +\references{ +Gu, Z., Eils, R., & Schlesner, M. (2016). Complex heatmaps reveal +patterns and correlations in multidimensional genomic data. +\emph{Bioinformatics}, \emph{32}(18), 2847-2849. +\url{https://doi.org/10.1093/bioinformatics/btw313} + +Gu, Z., Gu, L., Eils, R., Schlesner, M., & Brors, B. (2014). Circlize +implements and enhances circular visualization in R. \emph{Bioinformatics}, +\emph{30}(19), 2811-2812. \url{https://doi.org/10.1093/bioinformatics/btu393} +} +\seealso{ +\href{https://jokergoo.github.io/ComplexHeatmap-reference/book/}{ComplexHeatmap +Complete Reference} +} diff --git a/man/limma_contrasts.Rd b/man/limma_contrasts.Rd index 1444026..bfdc6a1 100644 --- a/man/limma_contrasts.Rd +++ b/man/limma_contrasts.Rd @@ -9,8 +9,10 @@ limma_contrasts( model.str, coef.str, contrasts, - var.group = NULL, - plot = TRUE, + stat = c("t", "F"), + var.group = character(0), + plot = FALSE, + col.group = coef.str, adjust.method = "BH", remove_name = TRUE, ... @@ -19,102 +21,129 @@ limma_contrasts( \arguments{ \item{eset}{\code{eSet} (or most likely \code{eSet} subclass) object} -\item{model.str}{character; formulation of the model (e.g. -\code{"~ 0 + a + b"}). -This should be a no-intercept model (it should include 0 or -1 as +\item{model.str}{character; formulation of the model (e.g. \code{"~ 0 + a + + b"}). This should be a no-intercept model (it should include 0 or -1 as terms). See \code{\link[stats]{lm}} for more details.} \item{coef.str}{character; coefficient of interest. One of \code{colnames(pData(eset))}. E.g. \code{"a"}.} -\item{contrasts}{character; contrasts to test. All factor levels in -the contrasts should begin with the name of that factor. It is -recommended to use \code{\link{paircomp}} with the -\code{name} argument specified to create pairwise contrasts of the form -\code{"a-b"}.} +\item{contrasts}{character; contrasts to test. All factor levels in the +contrasts should begin with the name of that factor. It is recommended to +use \code{\link{paircomp}} with the \code{name} argument specified to +create pairwise contrasts of the form \code{"a-b"}.} + +\item{stat}{character; the moderated test statistic to use. Either \code{"t"} +or \code{"F"}.} \item{var.group}{\code{NULL} or character; the column in \code{pData(eset)} -indicating groups that will have different relative quality weights. -The default (\code{NULL}), means that each sample will be weighted -equally.} +indicating groups that will have different relative quality weights. The +default (\code{NULL}), means that each sample will be weighted equally.} \item{plot}{logical; whether to generate diagnostic plots. The default -(\code{TRUE}) generates a barplot of weights applied to -each sample and a plot of the mean-variance trend using -\code{\link[limma]{plotSA}}.} +(\code{TRUE}) generates a barplot of weights applied to each sample and a +plot of the mean-variance trend using \code{\link[limma]{plotSA}}.} + +\item{col.group}{character; the column in \code{pData(eset)} used to color +the text of the MDS plot.} \item{adjust.method}{method for p-value adjustment. Default is \code{"BH"} -(Benjamini-Hochberg).} +(Benjamini-Hochberg). See \code{\link[stats]{p.adjust}} for details.} -\item{remove_name}{if \code{TRUE} (the default) \code{coef.str} -will be removed from the contrasts. This simply improves readability, but -it will not work properly if \code{coef.str} is a substring of any group -name.} +\item{remove_name}{if \code{TRUE} (the default) \code{coef.str} will be +removed from the contrasts. This simply improves readability, but it will +not work properly if \code{coef.str} is a substring of any group name.} \item{...}{additional arguments passed to \code{\link[limma]{eBayes}}.} } \value{ -\code{data.frame}. Basically the output of \code{\link[limma]{topTable}} -with additional columns \code{feature} and \code{contrast}. +\code{data.frame}. Basically the output of +\code{\link[limma]{topTable}} with additional columns \code{feature} and +\code{contrast} (if \code{stat = "t"}). All columns from \code{fData} are +also included. } \description{ -A convenience wrapper for differential analysis with LIMMA using -custom contrasts. +A convenience wrapper for differential analysis with LIMMA using custom +contrasts. } \details{ -A PCA plot is used to determine the appropriate value of \code{var.group}. -If samples within phenotype groups tend to cluster well and have similar -dispersions, the default \code{var.group = NULL} is recommended. If samples -within phenotype groups tend to cluster well, but different groups are more -or less dispersed, it may be beneficial to set \code{var.group} to the name -of the phenotype column. If one or more samples tend to cluster poorly with -samples of the same phenotype, it may be beneficial to weight by sample. That -is, set \code{var.group} to the name of the column in \code{pData(eset)} that -uniquely identifies each sample. If variation between samples or groups of -samples is biological rather than technical, weighting is not recommended. +An MDS plot is used to determine the appropriate value of +\code{var.group}. If samples within phenotype groups tend to cluster well +and have similar dispersions, the default \code{var.group = NULL} is +recommended. If samples within phenotype groups tend to cluster well, but +different groups are more or less dispersed, it may be beneficial to set +\code{var.group} to the name of the phenotype column. If one or more +samples tend to cluster poorly with samples of the same phenotype, it may +be beneficial to weight by sample. That is, set \code{var.group} to the +name of the column in \code{pData(eset)} that uniquely identifies each +sample. If variation between samples or groups of samples is biological +rather than technical, weighting is not recommended. The plot of the mean-variance trend helps determine whether to fit a trend line to the prior variance (default is \code{trend = FALSE}). It also helps -determine if the prior variance should be robustified against outlier sample -variances (\code{robust = TRUE}; default is \code{FALSE}). See +determine if the prior variance should be robustified against outlier +sample variances (\code{robust = TRUE}; default is \code{FALSE}). See \code{\link[limma]{eBayes}} for more details. p-values are adjusted across all contrasts (globally). It is recommended to -adjust p-values from similar contrasts together. See the limma User's Guide -linked below for more information. +adjust p-values from similar contrasts together. See the "Multiple Testing +Across Contrasts" section of the LIMMA User's Guide (linked below) for more +information. } \examples{ -library(MSnSet.utils) +# library(MSnSet.utils) data(cptac_oca) -# A no-intercept model is required +# A no-intercept model is required. Testing SUBTYPE contrasts +# with AGE as covariate model.str <- "~ 0 + SUBTYPE + AGE" coef.str <- "SUBTYPE" # Create contrasts contrasts <- paircomp(pData(oca.set)[, coef.str], name = coef.str) -# Basic specification -res1 <- limma_contrasts(oca.set, - model.str = model.str, +# Moderated t-statistics +res1 <- limma_contrasts(oca.set, model.str = model.str, coef.str = coef.str, contrasts = contrasts) -# Show diagnostic plots and robustify mean-variance trend. -res2 <- limma_contrasts(oca.set, model.str = model.str, coef.str = coef.str, - contrasts = contrasts, plot = TRUE, +# Moderated F-statistics +res2 <- limma_contrasts(oca.set, model.str = model.str, + coef.str = coef.str, + contrasts = contrasts, stat = "F") + +# Count number of significant (< 0.05) features +sum(res2$adj.P.Val < 0.05) + +# Robustify mean-variance trend (based on MDS plot) +res3 <- limma_contrasts(oca.set, model.str = model.str, + coef.str = coef.str, + contrasts = contrasts, trend = TRUE, robust = TRUE) # Count number of significant (< 0.05) features in each contrast -library(dplyr) -filter(res2, adj.P.Val < 0.05) \%>\% - group_by(contrast) \%>\% - tally() +table(res3$contrast, res3$adj.P.Val < 0.05) +} +\references{ +Liu, R., Holik, A. Z., Su, S., Jansz, N., Chen, K., Leong, H. S., +Blewitt, M. E., Asselin-Labat, M. L., Smyth, G. K., & Ritchie, M. E. +(2015). Why weight? Modelling sample and observational level variability +improves power in RNA-seq analyses. \emph{Nucleic acids research}, \emph{43}(15), +e97. \url{https://doi.org/10.1093/nar/gkv412} + +Phipson, B., Lee, S., Majewski, I. J., Alexander, W. S., & Smyth, G. K. +(2016). ROBUST HYPERPARAMETER ESTIMATION PROTECTS AGAINST HYPERVARIABLE +GENES AND IMPROVES POWER TO DETECT DIFFERENTIAL EXPRESSION. \emph{The annals of +applied statistics}, \emph{10}(2), 946--963. +\url{https://doi.org/10.1214/16-AOAS920} + +Smyth G. K. (2004). Linear models and empirical bayes methods for assessing +differential expression in microarray experiments. \emph{Statistical +applications in genetics and molecular biology}, \emph{3}, Article3. +\url{https://doi.org/10.2202/1544-6115.1027} } \seealso{ -\href{https://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf}{LIMMA User's Guide}, -\code{\link{limma_a_b}}, -\code{\link{limma_gen}}, -\code{\link{plot_pca}} +\href{https://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf}{LIMMA +User's Guide}, \code{\link{limma_a_b}}, \code{\link{limma_gen}} } diff --git a/man/plot_pca.Rd b/man/plot_pca.Rd index c8854b1..76d0374 100644 --- a/man/plot_pca.Rd +++ b/man/plot_pca.Rd @@ -27,11 +27,11 @@ plot_pca( \item{phenotype}{\code{NULL} or string; one of \code{colnames(pData(eset))}. This is used to color the points or labels, if \code{label} is not -\code{NULL}. Default is \code{NULL}, which colors all points or -labels black.} +\code{NULL}. Default is \code{NULL}, which colors all points or labels +black.} -\item{label}{\code{NULL} or string; one of \code{colnames(pData(eset))}. -If a string is provided, labels will be used instead of points.} +\item{label}{\code{NULL} or string; one of \code{colnames(pData(eset))}. If a +string is provided, labels will be used instead of points.} \item{z_score}{logical; whether to convert values to Z-Scores by sample. Default is \code{TRUE}.} @@ -50,16 +50,16 @@ x-axis and PC2 on the y-axis. Order matters.} (default), \code{featureNames(eset)} is used.} \item{standardize}{logical; if \code{TRUE} (default), the feature loadings -and scores are scaled in opposite directions by the standard deviations -of the principal components. This will produce a biplot similar to -\code{\link[stats]{biplot.prcomp}}. If \code{FALSE}, the result will -be similar to \code{\link[stats]{biplot.default}}.} +and scores are scaled in opposite directions by the standard deviations of +the principal components. This will produce a biplot similar to +\code{\link[stats]{biplot.prcomp}}. If \code{FALSE}, the result will be +similar to \code{\link[stats]{biplot.default}}.} \item{num_features}{numeric; the number of most influential features from each principal component to label. Default is \code{6}.} -\item{show_NA}{logical; whether to include samples with missing -phenotype information. Default is \code{TRUE}.} +\item{show_NA}{logical; whether to include samples with missing phenotype +information. Default is \code{TRUE}.} \item{legend_title}{string; title of the plot legend. Defaults to \code{phenodata}.} @@ -77,9 +77,9 @@ or \code{\link[ggplot2]{geom_text}}, such as \code{size} and \code{pch}.} A ggplot object } \description{ -A convenience function for plotting PCA scatter plot -for samples in ExpressionSet/MSnSet object. The biplot is essentially the -ggplot version of \code{\link[stats]{biplot.prcomp}}. +A convenience function for plotting PCA scatter plot for samples in +ExpressionSet/MSnSet object. The biplot is essentially the ggplot version of +\code{\link[stats]{biplot.prcomp}}. } \examples{ library(MSnSet.utils) diff --git a/man/plot_sample_correlation_heatmap.Rd b/man/plot_sample_correlation_heatmap.Rd deleted file mode 100644 index 9814420..0000000 --- a/man/plot_sample_correlation_heatmap.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot_sample_correlation_heatmap.R -\name{plot_sample_correlation_heatmap} -\alias{plot_sample_correlation_heatmap} -\alias{plot_sample_correlation_pheatmap} -\title{Heatmap for sample correlations} -\usage{ -plot_sample_correlation_heatmap(m, phenotype, ...) - -plot_sample_correlation_pheatmap(m, phenotype, ...) -} -\arguments{ -\item{m}{(MSnSet)} - -\item{phenotype}{(character) name of the column to order by (usually plex)} - -\item{...}{additional params to the heatmap call} -} -\value{ -(list) plot object -} -\description{ -Wrapper functions to generate correlation matrix plots -for MSnSet objects. There is a parameter for the phenotype to -order the columns (samples) by. -} -\section{Functions}{ -\itemize{ -\item \code{plot_sample_correlation_heatmap}: Using base heatmap - -\item \code{plot_sample_correlation_pheatmap}: Using the \code{pheatmap} package -}} - -\examples{ - -library(MSnSet.utils) -data(cptac_oca) - -m <- oca.set - -# Using base heatmap -plot_sample_correlation_heatmap(m, "Batch") - -# Using `pheatmap` (pretty heatmap) -plot_sample_correlation_pheatmap(m, "Batch") -} diff --git a/man/plot_upset.Rd b/man/plot_upset.Rd new file mode 100644 index 0000000..73e94e8 --- /dev/null +++ b/man/plot_upset.Rd @@ -0,0 +1,132 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_upset.R +\name{plot_upset} +\alias{plot_upset} +\title{Labeled UpSet Plot} +\usage{ +plot_upset( + ..., + mode = c("distinct", "intersect", "union"), + top_n_comb = Inf, + scale_set_bars = FALSE, + row_labels = character(0), + row_names_gp = gpar(fontsize = 10), + annotation_name_gp = gpar(fontsize = 10, fontface = "bold"), + heatmap_args = list(), + cell_dim = unit(10, "mm"), + extend = 0.2, + rot = 45, + hjust = 0, + vjust = 0, + bar_label_gp = gpar(fontsize = 10), + filename = character(0), + height = 3.5, + width = 7 +) +} +\arguments{ +\item{...}{Input sets (a list is recommended). See +\code{\link[ComplexHeatmap]{make_comb_mat}} for details.} + +\item{mode}{The mode for forming the combination set, see Mode section of +\code{\link[ComplexHeatmap]{make_comb_mat}}.} + +\item{top_n_comb}{Number of largest intersections to display. Defaults to all +intersections.} + +\item{scale_set_bars}{logical; whether to scale set size bars so that their +lengths are proportional to the lengths of the intersection size bars.} + +\item{row_labels}{vector of labels for the rows. Defaults to the names of the +input sets.} + +\item{row_names_gp}{graphical parameters for row labels specified by +\code{\link[grid]{gpar}}.} + +\item{annotation_name_gp}{graphical parameters for barplot titles specified +by \code{\link[grid]{gpar}}.} + +\item{heatmap_args}{list of additional arguments passed to +\code{\link[ComplexHeatmap]{Heatmap}}.} + +\item{cell_dim}{\code{\link[grid]{unit}} object used for both the height and +width of each heatmap cell.} + +\item{extend}{numeric; multiply y-axis upper limit by (1 + \code{extend}). +Increase to prevent clipping of barplot labels and overall title.} + +\item{rot}{numeric [-360, +360]; angle to rotate column barplot +labels.} + +\item{hjust}{numeric [0, 1]; horizontal justification of column barplot +labels.} + +\item{vjust}{numeric [0, 1]; vertical justification of column barplot +labels.} + +\item{bar_label_gp}{graphical parameters for barplot labels specified by +\code{\link[grid]{gpar}}.} + +\item{filename}{character; name of the file to save the plot. Must end in +".png", ".bmp", ".jpg", ".tif", or ".pdf". If not provided, the UpSet plot +will be displayed instead of saved.} + +\item{height}{numeric; height of the entire plot in inches.} + +\item{width}{numeric; width of the entire plot in inches.} +} +\value{ +An object of class \code{\link[ComplexHeatmap]{HeatmapList-class}} or +nothing (save to file instead). +} +\description{ +A wrapper around functions from +\code{\link[ComplexHeatmap]{ComplexHeatmap-package}}. Creates an +\href{https://doi.org/10.1109/TVCG.2014.2346248}{UpSet plot} with labeled +bars to visualize relationships between sets. UpSet plots are preferred to +Venn diagrams, especially when the number of intersections is large. +} +\note{ +\code{\link[base]{split}} is useful for creating input lists. +} +\examples{ +# Input list +set.seed(99) +x <- list(A = sample(letters, 5), + B = sample(letters, 10), + C = sample(letters, 15), + D = sample(letters, 20)) + +# Base plot +plot_upset(x) + +# Change label rotation and center horizontally +plot_upset(x, rot = 0, hjust = 0.5) + +# Top 2 largest intersections +plot_upset(x, top_n_comb = 2) + +# Add overall title +plot_upset(x, heatmap_args = list(column_title = "Title")) + +\dontrun{ +# Save to 3.5" by 7" PDF +plot_upset(x, filename = "upset_plot.pdf") +} + +} +\references{ +Gu, Z., Eils, R., & Schlesner, M. (2016). Complex heatmaps reveal +patterns and correlations in multidimensional genomic data. +\emph{Bioinformatics}, \emph{32}(18), 2847-2849. +\url{https://doi.org/10.1093/bioinformatics/btw313} + +Lex, A., Gehlenborg, N., Strobelt, H., Vuillemot, R., & Pfister, H. (2014). +UpSet: Visualization of Intersecting Sets. \emph{IEEE transactions on +visualization and computer graphics}, \emph{20}(12), 1983-1992. +\url{https://doi.org/10.1109/TVCG.2014.2346248} +} +\seealso{ +\href{https://jokergoo.github.io/ComplexHeatmap-reference/book/upset-plot.html}{ComplexHeatmap +Complete Reference: UpSet Plot} +} diff --git a/man/plot_volcano.Rd b/man/plot_volcano.Rd index e9f1371..faeefbe 100644 --- a/man/plot_volcano.Rd +++ b/man/plot_volcano.Rd @@ -25,10 +25,10 @@ fold-changes.} \item{pvals}{character; the name of the column in \code{df} containing p-values (adjusted or not).} -\item{sig_threshold}{\code{NULL} (default) or numeric between 0 and 1; -the threshold indicating statistical significance. Creates a dashed -horizontal line at that value. Any points above this line in the graph -are statistically significant.} +\item{sig_threshold}{\code{NULL} (default) or numeric between 0 and 1; the +threshold indicating statistical significance. Creates a dashed horizontal +line at that value. Any points above this line in the graph are +statistically significant.} \item{label}{\code{NULL} or character; the name of the column in \code{df} used to label the top most significant features. See details for more.} @@ -49,15 +49,15 @@ A ggplot object. A convenience function for creating a volcano plot. } \details{ -\code{sig_threshold} will create a secondary axis if the threshold is not -one of the existing y-axis breaks. +\code{sig_threshold} will create a secondary axis if the threshold + is not one of the existing y-axis breaks. -To label specific features (not top \code{num_features}), \code{label} -needs to be the name of a column where all but the features that will be -labeled are \code{NA}. Also, set \code{num_features} to \code{nrow(df)}. + To label specific features (not top \code{num_features}), \code{label} + needs to be the name of a column where all but the features that will be + labeled are \code{NA}. Also, set \code{num_features} to \code{nrow(df)}. } \examples{ -library(ggplot2) # for labs +library(ggplot2) # additional plot modifications library(MSnSet.utils) data("cptac_oca") diff --git a/vignettes/complex_heatmap.Rmd b/vignettes/complex_heatmap.Rmd new file mode 100644 index 0000000..a3b95c1 --- /dev/null +++ b/vignettes/complex_heatmap.Rmd @@ -0,0 +1,109 @@ +--- +title: "Create Annotated Expression and Correlation Heatmaps" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Create Annotated Expression and Correlation Heatmaps} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE, fig.width = 5, fig.height = 4.5) +``` + +```{r, message=FALSE, warning=FALSE} +library(MSnSet.utils) # complex_heatmap +library(circlize) # colorRamp2 + +# Data +data("longitudinal_biomarker_study") +# Shorten name +ee <- longitudinal_biomarker_study +``` + +## Expression Heatmaps + +A heatmap of the `exprs` slot of an `eSet` or `MSnSet` object with features as rows and samples as columns. The rows can be annotated with columns in `fData` and the columns can be annotated with columns in `pData`. + +Prior to heatmap creation, the MSnSet should be put through a missingness filter. This is because if there is a high degree of missingness, clustering may fail. The alternative is to set `cluster_rows` (and possible `cluster_columns`) to `FALSE`. This is rarely necessary if creating correlation heatmaps, as the correlation matrix will have few, if any, missing values. + +```{r} +# Only keep features identified in at least 50% of samples +ee <- ee[rowMeans(!is.na(exprs(ee))) >= 0.5, ] +``` + +Now, we can make the heatmap. For matrices with thousands of rows, this may take a while. By default, neither sample nor feature names are displayed, but this can be changed with the `show_column_names` and `show_row_names` arguments, respectively. + +```{r} +complex_heatmap(ee) +``` + + +Due to the wide range of values, it is difficult to see color differences. We can fix this by setting limits on the range of the color scale. This does not affect the actual data: just the colors of the heatmap body. We will limit colors to (-1.5, 1.5). Values below -1.5 will be the same shade of blue, and values above 1.5 will be the same shade of red. + +```{r} +complex_heatmap(ee, color_range = c(-1.5, 1.5)) +``` + + +## Sample Correlation Heatmaps + +Sample correlation heatmaps are generated by setting `heatmap_type` to (an abbreviation of) `"sample_correlation"` such as `"s"`. This will calculate the matrix of correlations between samples using the method specified by `cor_method` and use this to generate a heatmap with samples as both rows and columns. For sample correlation heatmaps, only `anno_column` may be specified, and both rows and columns are annotated the same way. + +Sample correlation heatmaps are generally more useful when we have sample labels, so it is usually a good idea to set `show_rownames` and `show_colnames` to `TRUE`. If there are too many samples to see labels in the plot window, consider saving the heatmap to a file with appropriate values for `filename`, `height` and `width` to view the labels more easily. + +We will create a sample correlation heatmap and annotate rows and columns with the `"Plate"` column of `pData(ee)`. + +```{r} +complex_heatmap(ee, heatmap_type = "sample", anno_column = "Plate") +``` + + +## Feature Correlation Heatmaps + +Similar to the sample correlation heatmaps, but displays the correlations between *features* (proteins, peptides, etc.). Only `anno_row` may be specified, and both rows and columns are annotated the same way. + +We will create a feature correlation heatmap and annotate rows and columns with the `"isSpike"` column of `fData(ee)`. + +```{r} +complex_heatmap(ee, heatmap_type = "feature", anno_row = "isSpike") +``` + + +## Row and Column Annotation + +To annotate columns and rows of the heatmap, we use the `anno_column` and `anno_row` arguments, respectively. `anno_column` takes a vector of one or more strings that correspond to the names of columns in `pData`, and `anno_row` takes a vector of one or more strings that correspond to the names of column in `fData`. By default, `MSnSet.utils::jet2.colors` is used for character, factor, and logical values, and `circlize::colorRamp2` with a viridis color palette is used for numeric values. + +We will annotate the rows using values in the `"isSpike"` (logical) column of `fData(ee)` and annotate rows using the values in the `"Type"` (factor) and `"Age"` (numeric) columns of `pData(ee)`. + +```{r, fig.height=5.5} +# Expression heatmap with annotated rows and columns +complex_heatmap(ee, anno_row = "isSpike", anno_column = c("Type", "Age")) +``` + + +### Modifying Annotation Colors + +We can change the colors of row and column annotations by passing lists to `anno_row_colors` and `anno_column_colors` respectively. For example, we will change the colors of `"Type"` so that "Control" is a different shade of blue ("#414DBE") and "Case" is a dark yellow (#BEB241). We only need to supply a list with new colors for `"Type"`. + +**Tip:** Use the RColorBrewer package, an interactive color wheel (like this one from canva), or a color palette generator (like this one that is also from canva) to easily find the right colors. + +```{r} +# Modify colors for categorical variables +complex_heatmap(ee, anno_column = c("Type", "Age"), + anno_column_colors = list(Type = c("#414DBE", "#BEB241"))) +``` + +Notice that the colors for `"Age"` are not changed. For continuous colors, we need to use `circlize::colorRamp2`. See documentation page for how to use this function. Any number of breaks can be provided, as long as the number of colors is the same. For this example, we will use a sequential color scale that goes from white to green for `"Age"`. + +```{r} +# Modify colors for continuous variables +complex_heatmap(ee, anno_column = c("Type", "Age"), + anno_column_colors = list( + Age = colorRamp2(breaks = range(ee[["Age"]], na.rm = TRUE), + colors = c("white", "darkgreen")) + ) +) +``` + +