diff --git a/.Rbuildignore b/.Rbuildignore index e157760..65ede9b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,3 +4,5 @@ ^NAMESPACE-old$ ^CHANGELOG\.md$ ^data-raw$ +^benchmark/* +^ignore_me.R$ diff --git a/.gitignore b/.gitignore index e3d674c..5b0e3e9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,4 @@ test_data/* ring_dir private -*.~lock* +ignore_me.R diff --git a/CHANGELOG.md b/CHANGELOG.md index 589cc49..20c5815 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,45 @@ +* 2023-11-21 v0.1.0 + * **BREAKING CHANGES** + * **Completely re-writes `plot_read_depth()`.** + * allows users to plot read depth with a variable on the X axis and + a colour parameter + * users can access the old function with `plt_read_depth()` + temporarily, but this will be removed before the next full release. + * **`rank_abund()` is broken and is no longer exported.** Please file a + bug report if you were using this function. + * exports `order_taxa()`, by request + * makes `prop_tax_down()` slightly more efficient by checking up front if + there is nothing to do. + * deprecates `order_levs()` because it isn't used anywhere. Its intended + function is performed by `order_taxa()`. + * introduces visual and automatic testing of the new `plot_read_depth()` + function. + * in `plot_tax_bar()` + * the `legloc` argument is now passed directly to + `ggplot2::theme(legend.position)` and can take any value that can take. + * added a `r_ticks` argument. FALSE by default (default behaviour + is unchanged). If TRUE, the tick text on the x-axis is rotated 90 + degrees and reads down to up. + * introduced improved functionality when a custom colour vector is used, + with and without names + * introduce a `leglen` option to allow the user to limit how many taxa + are displayed in the legend without removing any taxa from the plot. + * soft-deprecate the `yscale` argument. Will stop supporting non-linear + y-axes soon + * improve error when the `rank` argument is missing from the input dat + frame + * introduce a warning when the per-sample abundaces sum to greater than + 1 but the `mean` argument is not set to `TRUE`. + * prep the function so I can stop exporting the whole `ggplot2` + namespace + * introduce lifecycle management with the `lifecycle()` package + * start using roxygen2md to use Markdown in documentation. + * introduce the `benchmark` folder which contains "good" plotting outputs + against which new versions of the package can be tested. Created a .Rmd + file in that folder which tests `plot_tax_bar()`. + * remove the files that held the old colour vectors + + * 2023-04-14 v0.0.1 (was v1.0.1) * The multiple colour vectors have been replaced with a single object, `tax_colours`, which will cycle if there are more than 30 taxa. Having more diff --git a/DESCRIPTION b/DESCRIPTION index eb15cac..7f6285c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: AfterSl1p Type: Package Title: Generate Summary Graphics and Basic Analysis of 16s Data -Version: 0.0.1 +Version: 0.1.0 Author: J. C. Szamosi and Shahrokh Shekarriz Authors@R: c(person("JC", "Szamosi", email = "szamosjc@mcmaster.ca", role = c('aut','cre')), @@ -19,6 +19,7 @@ Depends: Imports: dplyr (>= 0.7.2), ggplot2 (>= 2.2.1), + lifecycle, phyloseq (>= 1.19.1), pipeR, rlang (>= 0.1.2), @@ -28,3 +29,4 @@ RoxygenNote: 7.2.3 Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 +Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index 50eea04..ba0d364 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,12 +4,15 @@ export(long_distance_df) export(make_ord_df) export(make_phy_df) export(order_levs) +export(order_taxa) export(plot_read_depth) export(plot_tax_bar) export(plt_ord) +export(plt_read_depth) export(prop_tax_down) -export(rank_abund) export(rotate_ticks) +export(tax_colours) import(ggplot2) import(pipeR) import(rlang) +importFrom(lifecycle,deprecated) diff --git a/R/AfterSl1p-package.R b/R/AfterSl1p-package.R index 925e6d4..94f1515 100644 --- a/R/AfterSl1p-package.R +++ b/R/AfterSl1p-package.R @@ -2,8 +2,9 @@ "_PACKAGE" ## usethis namespace: start -#' @import rlang -#' @import pipeR #' @import ggplot2 +#' @import pipeR +#' @import rlang +#' @importFrom lifecycle deprecated ## usethis namespace: end NULL diff --git a/R/data.R b/R/data.R index b23da31..33a183d 100644 --- a/R/data.R +++ b/R/data.R @@ -2,13 +2,15 @@ #' #' A vector of colours for use with taxa bar charts. #' -#' @section Details: There are 30, and grey will be added by the -#' \code{plot_tax_bar} function for the "Other" category. If a plot calls for -#' more than 30 colours, this will just recycle. That is usually fine because -#' low-abundance stuff can't be seen anyway, but if you have a situation where -#' you have more than 30 things that actually need to be distinguished, you'll -#' need to provide your own vector. Also, if you are in that situation, try to -#' stop. +#' @section Details: There are 30 colours, and grey will be added by the +#' [plot_tax_bar()] function for the "Other" category. If a plot +#' calls for more than 30 colours, this will just recycle. That is usually +#' fine because low-abundance stuff can't be seen anyway, but if you have a +#' situation where you have more than 30 things that actually need to be +#' distinguished, you'll need to provide your own vector. Also, if you are in +#' that situation, try to find another way to do what you are doing. People +#' cannot generally distinguish anywhere near 30 colours on a single plot. +#' @export tax_colours = c("#87c5ab","#eea27c","#a9a8d2","#ffff99","#9999ff","#fb8072", "#80b1d3","#fdb462","#b3de69","#fccde5","#bc80bd","#ccebc5", "#ffed6f","#a6cee3","#fb9a99","#fdbf6f","#d38d99","#b3b3ff", diff --git a/R/long_distance_df.R b/R/long_distance_df.R index 0c61dca..7170f4e 100644 --- a/R/long_distance_df.R +++ b/R/long_distance_df.R @@ -3,13 +3,13 @@ #' Create a long data frame of among-sample distances #' #' -#' \code{long_distance_df} creates a long data frame of all the pairwise +#' `long_distance_df` creates a long data frame of all the pairwise #' distances from a sample distance matrix (e.g. the output of -#' \code{\link{phyloseq::distance}}) with all the metadata listed for each sample. -#' Allows for easy within- and among-group boxplots, or whatever other -#' comparisons are of interest. +#' [phyloseq::distance()]) with all the metadata listed for +#' each sample. Allows for easy within- and among-group boxplots, or whatever +#' other comparisons are of interest. #' -#' @section Value: A data frame \eqn{N(N-1)} (or \eqn{N^2} if \code{diag = TRUE} +#' @section Value: A data frame \eqn{N(N-1)} (or \eqn{N^2} if `diag = TRUE` #' is set) rows (where N is the number of samples) with sample IDs, metadata, #' and pairwise distances listed for each pair of samples. Sample ID and #' metadata columns have '1' or '2' appended to them so the user can tell @@ -18,22 +18,22 @@ #' names as row and column names. #' @param metadat A data frame or data frame-like object with the data set's #' metadata -#' @param idcol (\code{'X.SampleID'}.) A string. The column in \code{metadat} +#' @param idcol (`'X.SampleID'`.) A string. The column in `metadat` #' that holds the sample names. Sample names should match the row/column namse #' of the distance matrix. If there are samples in the metadata data frame #' that are missing from the distance matrix, they will be excluded with a #' warning. If there are samples in the distance matrix that are missing from #' the metadata, you will get an error. -#' @param diag (\code{FALSE}.) Logical. Whether the diagonal elements (zeros in +#' @param diag (`FALSE`.) Logical. Whether the diagonal elements (zeros in #' a distance matrix) should be included in the long data frame. Defaults to -#' \code{FALSE} because we almost never want them. -#' @param suff (\code{c('1','2')}.) A character vector of length 2. The suffixes +#' `FALSE` because we almost never want them. +#' @param suff (`c('1','2')`.) A character vector of length 2. The suffixes #' to be appended to the metadata column names in the output. The two elements #' must not be identical. -#' @param distcol (\code{'Distance'}.) A string. The desired column name for the +#' @param distcol (`'Distance'`.) A string. The desired column name for the #' distance column in your long data frame. Only here to avoid clashes with #' existing metadata column names. -#' @param baseline (\code{'NULL'}). A dataframe whose column names must also be +#' @param baseline (`'NULL'`). A dataframe whose column names must also be #' column names in the metadat data frame, and whose rows contain a subset of #' the possible values/combinations. If this parameter is used, all the #' samples whose metadata matches a row in this data frame will end up in @@ -79,13 +79,9 @@ long_distance_df = function(dmat, metadat, idcol = 'X.SampleID', diag = FALSE, return(lddf) } - - -## Functions to generate distance bar charts ----------------------------------- - ### lddf_check ----------------------------------------------------------------- -#' Check the inputs of \code{long_distance_df()} +#' Check the inputs of `long_distance_df()` #' #' For internal use only lddf_check = function(dmat, metadat, idcol = 'X.SampleID', diag = FALSE, @@ -131,26 +127,28 @@ lddf_check = function(dmat, metadat, idcol = 'X.SampleID', diag = FALSE, #' Does the actual gathering and spreading without testing assumptions #' -#' \code{lddf_work} Does the actual gathering, spreading, and joining associated -#' with making the lddf, but without checking if the distance matrix is sensible -#' or removing diagonals and repeats. This is for when you know what you're -#' doing and have trimmed your distance matrix down to only what you know you -#' need. Good for permutation tests. +#' `lddf_work` Used internally by `long_distance_df()`. I recommend +#' you use that function unless you really know what you're doing. This function +#' does the actual gathering, spreading, and joining associated with making the +#' lddf, but without checking if the distance matrix is sensible or removing +#' diagonals and repeats. Use this function if you know exactly what you want +#' and have trimmed your distance matrix down to only what you know you need. +#' Good for permutation tests. #' #' @param dmat A distance matrix or other diagonal matrix object with sample #' names as row and column names. #' @param metadat A data frame or data frame-like object with the data set's #' metadata -#' @param idcol (\code{'X.SampleID'}.) A string. The column in \code{metadat} +#' @param idcol (`'X.SampleID'`.) A string. The column in `metadat` #' that holds the sample names. Sample names should match the row/column namse #' of the distance matrix. If there are samples in the metadata data frame #' that are missing from the distance matrix, they will be excluded with a #' warning. If there are samples in the distance matrix that are missing from #' the metadata, you will get an error. -#' @param suff (\code{c('1','2')}.) A character vector of length 2. The suffixes +#' @param suff (`c('1','2')`.) A character vector of length 2. The suffixes #' to be appended to the metadata column names in the output. The two elements #' must not be identical. -#' @param distcol (\code{'Distance'}.) A string. The desired column name for the +#' @param distcol (`'Distance'`.) A string. The desired column name for the #' distance column in your long data frame. Only here to avoid clashes with #' existing metadata column names. lddf_work = function(dmat, metadat, idcol = 'X.SampleID', suff = c('1','2'), diff --git a/R/make_phy_df.R b/R/make_phy_df.R index 4f62b60..c4f7ffe 100644 --- a/R/make_phy_df.R +++ b/R/make_phy_df.R @@ -2,7 +2,7 @@ #' Generate a Data Frame for Taxon Bar Charts #' -#' \code{make_phy_df} generates a data frame that is useful for generating taxon +#' `make_phy_df` generates a data frame that is useful for generating taxon #' bar charts. #' #' @section Details: This function takes a phyloseq object and generates a data @@ -13,8 +13,8 @@ #' abundance, and weird things will happen if it is not. #' #' @section Value: A data frame similar in structure to that generated by -#' \code{psmelt}, but with an 'Other' category added and taxon levels ordered -#' for use in plotting. +#' [phyloseq::psmelt()], but with an 'Other' category added and +#' taxon levels ordered for use in plotting. #' #' @param physeq A phyloseq object. #' @param rank The rank at which to glom taxa. Must be one of 'OTU', 'Genus', @@ -22,14 +22,14 @@ #' @param cutoff The abundance cutoff below which taxa are grouped into 'Other'. #' If you don't want anything grouped into 'Other', set this to 0. Default is #' 0.001. -#' @param indic a flag to indicate if the taxon names have level indicators. -#' If FALSE, they are added. +#' @param indic a flag to indicate if the taxon names have level indicators. If +#' FALSE, they are added. #' @param prop Specifies whether taxa need to be propogated down the taxonomy -#' table (default, TRUE) or if this has already been done. -#' @param count If FALSE (default) the function will expect a relative abundance -#' table and create an 'Other' category for taxa below the cutoff (and will -#' raise an error if the table is not relative abundance). If TRUE, the -#' function will not check for relative abundance and will not create an +#' table (default is `TRUE`) or if this has already been done. +#' @param count If `FALSE` (default) the function will expect a relative +#' abundance table and create an 'Other' category for taxa below the cutoff +#' (and will raise an error if the table is not relative abundance). If TRUE, +#' the function will not check for relative abundance and will not create an #' 'Other' category. #' @export make_phy_df = function(physeq, rank = 'Genus', cutoff = 0.001, indic = FALSE, @@ -41,7 +41,7 @@ make_phy_df = function(physeq, rank = 'Genus', cutoff = 0.001, indic = FALSE, stop('physeq must be a relative abundance table. You have counts > 1.') } ranks = colnames(phyloseq::tax_table(physeq)) - if (rank == 'OTU'){ + if ((rank == 'OTU') & !('OTU' %in% ranks)) { ranks = c(ranks, rank) } @@ -131,7 +131,7 @@ remain = function(x, tot = 1){ #' Order Taxon Name Factors #' -#' \code{order_taxa} reorders the taxon names in a taxon column (e.g. 'Class' or +#' `order_taxa()` reorders the taxon names in a taxon column (e.g. 'Class' or #' 'Phylum') by the taxon's mean abundance (but always makes sure to put Other #' first). #' @@ -139,19 +139,20 @@ remain = function(x, tot = 1){ #' specified column re-ordered by its mean abundance #' #' @param phy_df A data frame of a phyloseq object, as produced by -#' \code{\link{psmelt}} or \code{\link{make_phy_df}}. +#' [phyloseq::psmelt()] or [make_phy_df()]. #' @param rank The name of the column to be re-ordered #' @param abund The name of the abundances column. Defaults to 'Abundance' #' @param decreasing Specifies whether the taxon order should be based on #' decreasing or increasing abundance. Defaults to FALSE. +#' @export order_taxa = function(phy_df, rank, abund = 'Abundance', decreasing = FALSE){ phy_df[,rank] = factor(phy_df[,rank]) - phy_df %>% - dplyr::filter(UQ(sym(rank)) != 'Other') %>% - dplyr::group_by(UQ(sym(rank))) %>% - dplyr::summarize(Tot = sum(UQ(sym(abund)))) %>% - data.frame() -> total_abunds + total_abunds = (phy_df + %>% dplyr::filter(.data[[rank]] != 'Other') + %>% dplyr::group_by(.data[[rank]]) + %>% dplyr::summarize(Tot = sum(.data[[abund]])) + %>% data.frame()) lev_ord = levels(droplevels(total_abunds[,rank])) if (decreasing){ diff --git a/R/plot_read_depth.R b/R/plot_read_depth.R index 699ff45..73938ab 100644 --- a/R/plot_read_depth.R +++ b/R/plot_read_depth.R @@ -1,19 +1,196 @@ -#### plot_read_depth ----------------------------------------------------------- +#### plot_read_depth ------------------------------------------------------------ + +#' View read depth +#' +#' @section Details: Creates a read depth boxplot +#' +#' @section Value: A [ggplot2] object +#' +#' @param physeq A phyloseq object +#' @param xvar `NULL` A variable (column name) from `sample_data(physeq)` by +#' which to group samples on the x-axis. Must be categorical. If `NULL`, no +#' grouping will be performed. +#' @param lin `FALSE` If `TRUE`, the y-axis will be on a linear scale. By +#' default it is log-scaled. +#' @param cvar `NULL` A variable (column name) from `sample_data(physeq)` by +#' which the points should be coloured. If `NULL`, all points will be black. +#' May be continuous or categorical. See the `clrs` argument if you want to +#' specify your colours. +#' @param clrs `NULL` If the `cvar` parameter is set and this is unset, colours +#' will be taken from [tax_colours] if `cvar` is categorical and use +#' [ggplot2]'s default (blue) if `cvar` is continuous. If `cvar` is +#' categorical, you may provide a named colour vector to use instead of +#' [tax_colours] and if it is continuous you may provide a two-colour vector. +#' The vector must have names "low" and "high". If `cvar` is NOT set, you may +#' use this parameter to provide a single colour name or hex code that will be +#' applied to all points. +#' @export +plot_read_depth = function(physeq, xvar = NULL, lin = FALSE, cvar = NULL, + clrs = NULL){ + # Check inputs + sample_sum_df = data.frame(Total = phyloseq::sample_sums(physeq), + phyloseq::sample_data(physeq)) + + prd_check(sample_sum_df, cvar, xvar) + + # Deal with the colour vector + clrs = prd_clrs(sample_sum_df, cvar, clrs) + + + # Construct the plot foundation with or without a grouping variable + if (is.null(xvar)){ + depth_plot = ggplot2::ggplot(sample_sum_df, + ggplot2::aes(x = 'All', y = Total)) + } else { + depth_plot = ggplot2::ggplot(sample_sum_df, + ggplot2::aes(x = .data[[xvar]], y = Total)) + } + + # Add the jittered points, taking colour into account + # No colouring variable + if (is.null(cvar)){ + # And no specified colour + if (is.null(clrs)){ + depth_plot = depth_plot + ggplot2::geom_jitter(width = 0.2) + # And colour is specified + } else { + depth_plot = depth_plot + ggplot2::geom_jitter(width = 0.2, + colour = clrs) + } + # colouring variable is specified and continuous + } else if (is.numeric(sample_sum_df[[cvar]])) { + depth_plot = depth_plot + ggplot2::geom_jitter(width = 0.2, + ggplot2::aes(colour = .data[[cvar]])) + # if clrs not specified, do nothing. defaults will do. + if (is.null(clrs)){ + NULL + # if clrs is specified, use it + } else { + depth_plot = depth_plot + + ggplot2::scale_colour_gradient(low = clrs['low'], + high = clrs['high']) + } + # if colouring variable is specified and categorical + } else { + depth_plot = depth_plot + ggplot2::geom_jitter(width = 0.2, + aes(colour = .data[[cvar]])) + + ggplot2::scale_colour_manual(values = clrs) + } + + if (!lin){ + depth_plot = depth_plot + ggplot2::scale_y_log10() + } + return(depth_plot) +} + +#### prd_check() --------------------------------------------------------------- + +#' Check inputs for plot_read_depth() +#' +#' Checks the inputs of `plot_read_depth()` +#' @param sample_sum_df data frame of sample sums and sample data +#' @param cvar,xvar passed through from `plot_read_depth()` + +prd_check = function(sample_sum_df, cvar, xvar){ + # Are cvar and xvar columns? + if (!is.null(xvar) && !(xvar %in% colnames(sample_sum_df))){ + stop(paste('"xvar" must be one of the columns in sample_data(physeq).')) + } + + if (!is.null(cvar) && !(cvar %in% colnames(sample_sum_df))){ + stop(paste('"cvar" must be one of the columns in sample_data(physeq).')) + } +} + +#### prd_clrs() ---------------------------------------------------------------- + +#' Deal w colour vector for plot_read_depth() +#' +#' @param cvar,clrs passed through from `plot_read_depth()` +prd_clrs = function(sample_sum_df, cvar, clrs){ + # cvar unset, clrs set + if (is.null(cvar) & !is.null(clrs)){ + if (length(clrs) > 1){ + warn(paste('With no cvar specified, only the first colour in "clrs"', + 'will be used.')) + clrs = clrs[1] + return(clrs) + } + } else if (!is.null(cvar)){ + # Built-in colour vector + if (is.null(clrs)){ + # Categorical + if (is.factor(sample_sum_df[[cvar]]) || + is.character(sample_sum_df[[cvar]])){ + n = dplyr::n_distinct(sample_sum_df[[cvar]]) + nx = ceiling(n/length(tax_colours)) + clrs = rep(tax_colours, nx) + return(clrs) + + # continuous requires no action + } else { + return(clrs) + } + # User-specified colour vector + } else { + # Categorical + if (is.factor(sample_sum_df[[cvar]]) | + is.character(sample_sum_df[[cvar]])){ + if (is.null(names(clrs))){ + return(clrs) + # nothing to do here. use it as is + + } else if (!all(unique(sample_sum_df) %in% names(clrs))){ + stop(paste('If "clrs" is named, its names must contain', + 'all the values in the "cvar" column')) + } + # if we get this far, the colour vector is fine. + + return(clrs) + + # Continuous + } else { + if (is.null(names(clrs)) | + !all(c('high','low') %in% names(clrs))){ + stop(paste('With a continuous "cvar", the provided colour', + 'vector "clrs" must have names "high" and "low".')) + } else { + # if we get this far, the colour vector is fine. + return(clrs) + } + return(clrs) + } + return(clrs) + } + return(clrs) + } + return(clrs) +} + +#### plt_read_depth ----------------------------------------------------------- #' Plot read depth #' -#' Creates a read depth histogram. +#' @describeIn +#' `r lifecycle::badge("deprecated")` +#' +#' Creates a read depth histogram. Deprecated in favour of `plot_read_depth()` #' @param physeq A phyloseq object #' @export -plot_read_depth = function(physeq){ - sample_sum_df = data.frame(Total = sample_sums(physeq), - sample_data(physeq)) - depth_plot = ggplot(sample_sum_df, aes(x = Total)) + - geom_histogram(colour = 'black', fill = 'grey57') + - scale_x_log10() + - xlab('Log10 read depth') + - ylab('Number of samples') + - ggtitle('Distribution of read depths') + - theme_bw() +plt_read_depth = function(physeq){ + lifecycle::deprecate_soft('0.1.0.', 'plt_read_depth()', + 'plot_read_depth()', + details = paste("This is the old version of", + "plot_read_depth(), and will be", + "removed soon.")) + sample_sum_df = data.frame(Total = phyloseq::sample_sums(physeq), + phyloseq::sample_data(physeq)) + depth_plot = ggplot2::ggplot(sample_sum_df, aes(x = Total)) + + ggplot2::geom_histogram(colour = 'black', fill = 'grey57') + + ggplot2::scale_x_log10() + + ggplot2::xlab('Log10 read depth') + + ggplot2::ylab('Number of samples') + + ggplot2::ggtitle('Distribution of read depths') + + ggplot2::theme_bw() return(depth_plot) } diff --git a/R/plot_tax_bar.R b/R/plot_tax_bar.R index 305af5d..3a102ca 100644 --- a/R/plot_tax_bar.R +++ b/R/plot_tax_bar.R @@ -2,8 +2,8 @@ #' Create a Taxa Bar Chart #' -#' \code{plot_tax_bar()} creates a taxa bar chart from the data frame generated -#' by \code{\link{make_phy_df}}. +#' `plot_tax_bar()` creates a taxa bar chart from the data frame generated +#' by [make_phy_df()]. #' #' @section Details: This function generates a ggplot object that is a #' first-pass, reasonable attempt at a taxon bar chart. The taxa are ordered @@ -14,31 +14,41 @@ #' @section Value: A ggplot object. #' #' @param tax_df The data frame used for plotting. Unless you really know what -#' you're doing, use the data frame output by \code{\link{make_phy_df}}. +#' you're doing, use the data frame output by [make_phy_df()]. #' @param rank The taxonomic rank at which to view the data. Must be one of #' 'Genus', 'Family', 'Order', 'Class', 'Phylum'. May not be a lower rank than -#' the rank given to \code{\link{make_phy_df}}. +#' the rank given to [make_phy_df()]. #' @param colours A character vector of colour names or hex values. Must have #' enough colours to accommodate all your taxa at the appropriate rank. If you #' don't provide one, there are a few internal vectors that have 21, 31, 60, #' or 70 colours that the function will try to use. -#' @param sample \code{'X.SampleID'} The name of the sample column in the data +#' @param sample `'X.SampleID'` The name of the sample column in the data #' frame. -#' @param abund \code{'Abundance'} The name of the abundance column in the data +#' @param abund `'Abundance'` The name of the abundance column in the data #' frame. -#' @param legloc \code{'right'} Location of the legend. Can be 'right' -#' (default), 'bottom', or 'none' (absent) -#' @param yscale \code{'lin'} Can be either 'lin' or 'sqrt'. The 'sqrt' plot can -#' look weird. -#' @param means \code{FALSE} If \code{TRUE}, sets \code{position = fill} in the -#' \code{geom_bar()} to constrain the abundances to sum to 1. Good to use if -#' your \code{sample = } parameter is not actually sample names, but rather -#' larger categories, to produce a plot of category means. +#' @param legloc `'right'` Location of the legend. Passed directly to +#' `ggplot2::theme(legend.position = legloc)` and can be any of "none", +#' "left", "right", "top", or "bottom". If it is anything else, +#' [ggplot2::theme()] will default to "none". +#' @param yscale `r lifecycle::badge("deprecated")` This function no longer +#' supports y-axis scales other than linear. +#' @param means `FALSE` If `TRUE`, sets `position = fill` in the +#' [ggplot2::geom_bar()] to constrain the abundances to sum to 1. +#' Good to use if your `sample = ` parameter is not actually sample +#' names, but rather larger categories, to produce a plot of category means. +#' @param r_ticks `FALSE` If `TRUE` x-axis tickmark text is rotated 90 +#' degrees and read bottom-to-top. #' @export plot_tax_bar = function(taxa_df,rank,colours = NULL, sample = 'X.SampleID', abund = 'Abundance', - legloc = 'right', yscale = 'lin', means = FALSE){ - # Fed by make_phy_df() + legloc = 'right', yscale = 'lin', + means = FALSE, r_ticks = FALSE, leglen = NULL){ + # Lifecycle Management + if (yscale != 'lin'){ + lifecycle::deprecate_warn('0.1.0', 'plot_tax_bar(yscale)', + details = paste('The ability to set non-linear y-axis', + 'scaling will be removed soon.')) + } # Check the inputs if (!(sample %in% names(taxa_df))){ @@ -52,14 +62,55 @@ plot_tax_bar = function(taxa_df,rank,colours = NULL, if (!(yscale %in% c('lin','log','sqrt'))){ stop('yscale argument must be one of \'lin\', \'log\', or \'sqrt\'') } - + if (!(rank %in% names(taxa_df))){ + stop('rank argument must be one of the columns in your data frame') + } # Pick colours num = length(unique(taxa_df[,rank])) if (is.null(colours)){ n = num/length(tax_colours) colours = c('grey69',rev(rep(tax_colours, ceiling(n))[1:num-1])) + names(colours) = levels(taxa_df[,rank]) } else if (is.null(names(colours))) { + if (length(colours) < (dplyr::n_distinct(taxa_df[,rank])-1)){ + warn(paste('The supplied colour vector is shorter than the number', + 'of distinct taxa.')) + } colours = c('grey69',rev(colours[1:(num-1)])) + names(colours) = levels(taxa_df[,rank]) + } else { + if (length(colours) < dplyr::n_distinct(taxa_df[,rank])){ + warn(paste('The supplied colour vector is shorter than the number', + 'of distinct taxa.')) + } + } + + # Check legend length + if (is.null(leglen)){ + leglen = num + } else if (!is.numeric(leglen)) { + stop(paste("leglen must be an integer")) + } else if (leglen > num){ + warn(paste("leglen is greater than the number of unique taxa.", + "showing all taxa.")) + leglen = num + } else if (leglen == 0){ + legloc = 'none' + leglen = num + } else if (leglen < 0){ + warn(paste('leglen can not be negative. treating it like 0.')) + legloc = 'none' + leglen = num + } + + # Check for means + mn_chk = (taxa_df + %>% dplyr::group_by(.[,sample]) + %>% dplyr::summarize(.chksms = sum(Abundance, na.rm = TRUE))) + tol = 1e-5 + if (any(mn_chk$.chksms - 1 > tol) & !means){ + warn(paste('Your per-sample abundances sum to >1. Did you mean to', + 'specify \'means = TRUE\'?')) } # Make sure the x axis is categorical @@ -68,42 +119,59 @@ plot_tax_bar = function(taxa_df,rank,colours = NULL, # If the colour vector is named, make sure it is ordered like the factor # it's colouring if (!is.null(names(colours))) { - taxa_df[,rank] = droplevels(taxa_df[,rank]) - if (!all(levels(taxa_df[,rank]) %in% names(colours))){ - stop('The colour vector names need to match the rank levels') + if ((length(colours) < dplyr::n_distinct(taxa_df[,rank])) & + all(names(colours) %in% droplevels(taxa_df[,rank]))){ + NULL + } else { + taxa_df[,rank] = droplevels(taxa_df[,rank]) + if (!all(levels(taxa_df[,rank]) %in% names(colours))){ + stop('The colour vector names need to match the rank levels') + } } colours = colours[levels(taxa_df[,rank])] } - indiv = ggplot(taxa_df, aes_string(x = sample, y = abund, fill = rank, - colour = rank)) + - theme(axis.title.x = element_blank(), - axis.text.x = element_text(size = 10, - angle = 90, - hjust = 1, - vjust = 0.5)) + - scale_fill_manual(values = colours, - guide = guide_legend(reverse = TRUE)) + - scale_colour_manual(values = colours, - guide = guide_legend(reverse = TRUE)) + - ylab(paste("Relative Abundance (",rank,")\n",sep='')) + brks = c('Other', rev(rev(levels(taxa_df[,rank]))[1:(leglen-1)])) + # Generate the plot + indiv = ggplot2::ggplot(taxa_df, + ggplot2::aes(x = .data[[sample]], + y = .data[[abund]], + fill = .data[[rank]], + colour = .data[[rank]])) + + ggplot2::theme_bw() + + # adjust the title and axis text + ggplot2::theme(axis.title.x = ggplot2::element_blank(), + # position the legend + legend.position = legloc) + + # use the right colours + ggplot2::scale_fill_manual(values = colours, + breaks = brks, + guide = ggplot2::guide_legend(reverse = TRUE)) + + ggplot2::scale_colour_manual(values = colours, + breaks = brks, + guide = ggplot2::guide_legend(reverse = TRUE)) + + # label the axis + ggplot2::ylab(paste("Relative Abundance (",rank,")\n",sep='')) if (means){ - indiv = indiv + geom_bar(stat = 'identity', position = 'fill') + # use mean abundance across a group + indiv = indiv + ggplot2::geom_bar(stat = 'identity', position = 'fill') } else { - indiv = indiv + geom_bar(stat = "identity") + # individual samples + indiv = indiv + ggplot2::geom_bar(stat = "identity") } - if (yscale == 'sqrt') { - indiv = indiv + scale_y_sqrt() + # rotate the ticks + if (r_ticks){ + indiv = indiv + rotate_ticks() } - if (legloc == 'bottom'){ - indiv = indiv + theme(legend.position = 'bottom') - } else if (legloc == 'none') { - indiv = indiv + guides(fill = FALSE, colour = FALSE) + if (yscale == 'sqrt') { + # square-root transform the y-axis. You probably don't want this. It should + # be deprecated. + indiv = indiv + ggplot2::scale_y_sqrt() } - indiv + theme_bw() + return(indiv) } diff --git a/R/plt_ord.R b/R/plt_ord.R index ba66473..9e5f414 100644 --- a/R/plt_ord.R +++ b/R/plt_ord.R @@ -1,13 +1,20 @@ ## Plot the ordination -------------------------------------------------------- ### plt_ord ------------------------------------------------------------------ + #' Plot the ordination #' #' Make a multi-axis PCoA plot from the data frame -#' @param ord_long The ordination data frame produced by \code{make_ord_df} +#' +#' @section Details: +#' +#' @section Value: +#' +#' @param ord_long The ordination data frame produced by +#' [make_ord_df()] #' @param colour The name of the column to use to colour the points. #' @param shape The name of the coloumn governing the shape of the points. -#' @param size The size of the points. Passed to the \code{geom_point()} size -#' argument and defaults to 1. +#' @param size The size of the points. Passed to the +#' [ggplot2::geom_point()] size argument and defaults to 1. #' @param pt_alph The transparency of the points. Default is 0.7 #' @export plt_ord = function(ord_long, colour = NULL, shape = NULL, size = 1, diff --git a/R/prep_plot_rank_ab.R b/R/prep_plot_rank_ab.R index 7995556..337ace5 100644 --- a/R/prep_plot_rank_ab.R +++ b/R/prep_plot_rank_ab.R @@ -2,34 +2,35 @@ #' Rank Taxa by Abundance #' -#' \code{rank_abund()} generates a data frame that's ready to be used by -#' \code{\link{plot_rank_abund}} +#' `rank_abund()` generates a data frame that's ready to be used by +#' [plot_rank_ab()] #' #' @section Value: A data frame whose taxa have been ranked by their mean #' abundance in the user-specified baseline level of some grouping variable or #' variables. #' #' @param phy_df A dataframe of a phyloseq object, like that generated by -#' \code{\link{psmelt}} or \code{\link{make_phy_df}} -#' @param varbs (\code{NULL}) A character vector of grouping variables from -#' which the baseline values are chosen to define the abundance ordering. If -#' it is \code{NULL}, the ordering will be based on mean abundances in the -#' whole data frame. -#' @param bases (\code{NULL}) A character vector of baseline values for the -#' variables given in \code{vars}. The ordering of the taxa will be given -#' based only on the samples with these baseline values for these variables. -#' Must be in the same order as vars. -#' @param abunds (\code{'Abundance'}) The name of the abundance column. -#' @param rank (\code{'Genus'}) The rank to base the ordering on. -#' @param IDcol (\code{'X.SampleID'}) The column name of the sample IDs -#' @export -rank_abund = function(phy_df, varbs = NULL, bases = NULL, abunds = 'Abundance', - rank = 'Genus', IDcol = 'X.SampleID'){ +#' [phyloseq::psmelt()] or [make_phy_df()] +#' @param gvars (`NULL`) A character vector of grouping variables from which the +#' baseline values are chosen to define the abundance ordering. If it is +#' `NULL`, the ordering will be based on mean abundances in the whole data +#' frame. +#' @param bases (`NULL`) A character vector of baseline values for the variables +#' given in `gvars`. The ordering of the taxa will be given based only on the +#' samples with these baseline values for these variables. Must be in the same +#' order as `gvars`. +#' @param abunds (`'Abundance'`) The name of the abundance column. +#' @param rank (`'Genus'`) The rank to base the ordering on. Must be a column in +#' `phy_df` +#' @param IDcol (`'X.SampleID'`) The column name of the sample IDs +#' +rank_abund = function(phy_df, gvars = NULL, bases = NULL, abunds = 'Abundance', + rank = 'Genus', IDcol){ # Set up the groups for the plotting totals rank_abs = df_glom(phy_df, IDcol = IDcol, rank = rank, abunds = abunds) # Subset and order - ranked = subset_order(rank_abs, varbs, bases, rank = rank) + ranked = subset_order(rank_abs, gvars, bases, rank = rank) # Order the bigger data frame by the above ordering lev_ord = levels(ranked[,rank]) @@ -42,22 +43,25 @@ rank_abund = function(phy_df, varbs = NULL, bases = NULL, abunds = 'Abundance', #' Subset and generate taxon ordering #' -#' \code{subset_order} generates a data frame whose taxon column given by -#' \code{rank} has been ranked according to its mean abundance in the -#' \code{abunds} column. Used internally by \code{rank_abund()} +#' `subset_order` generates a data frame whose taxon column given by +#' `rank` has been ranked according to its mean abundance in the +#' `abunds` column. Used internally by +#' [rank_abund()] #' -#' @param phy_df A phyloseq data frame, as generated by \code{psmelt}, but -#' probably generated by \code{\link{df_glom}} or \code{\link{make_phy_df()}}. -#' @param varbs (\code{NULL}) A character vector of grouping variables from +#' @param phy_df A phyloseq data frame, as generated by +#' [phyloseq::psmelt()], but probably generated by +#' [df_glom()] or +#' [make_phy_df()]. +#' @param varbs (`NULL`) A character vector of grouping variables from #' which the baseline values are chosen to define the abundance ordering. If -#' it is \code{NULL}, the ordering will be based on mean abundances in the +#' it is `NULL`, the ordering will be based on mean abundances in the #' whole data frame. -#' @param bases (\code{NULL}) A character vector of baseline values for the -#' variables given in \code{vars}. The ordering of the taxa will be given +#' @param bases (`NULL`) A character vector of baseline values for the +#' variables given in `vars`. The ordering of the taxa will be given #' based only on the samples with these baseline values for these variables. #' Must be in the same order as varbs. -#' @param abund (\code{'Abundance'}) The name of the abundance column. -#' @param rank (\code{'Genus'}) The taxonomic rank to base the ordering on. +#' @param abund (`'Abundance'`) The name of the abundance column. +#' @param rank (`'Genus'`) The taxonomic rank to base the ordering on. subset_order = function(phy_df, varbs = NULL, bases = NULL, rank = 'Genus', abunds = 'TotalAbunds'){ # Check inputs @@ -89,10 +93,10 @@ subset_order = function(phy_df, varbs = NULL, bases = NULL, rank = 'Genus', #' Like tax_glom, but for data frames #' -#' \code{df_glom} take totals within sample at a given taxonomic rank. +#' [df_glom()] take totals within sample at a given taxonomic rank. #' -#' @param phy_df A phyloseq data frame, as generated by \code{\link{psmelt}} or -#' \code{\link{make_phy_df}} +#' @param phy_df A phyloseq data frame, as generated by +#' [phyloseq::psmelt()] or [make_phy_df()] #' @param ranks A character vector with the taxon rank names #' @param IDcol The column name of the sample IDs #' @param rank The taxonomic rank to glom at diff --git a/R/prep_plt_ord.R b/R/prep_plt_ord.R index 7bb91a8..d0fffc3 100644 --- a/R/prep_plt_ord.R +++ b/R/prep_plt_ord.R @@ -13,19 +13,19 @@ #' that the transformation you have used is appropriate for the distance #' method you choose. #' @param dist_meth The distance method to be use. Must be one of the methods -#' accepted by the \code{phyloseq::distance()} function. Default is 'bray'. If -#' 'jaccard' is used, adds the \code{binary = TRUE} argument. If you have -#' taken a clr transform of your data and want Aitchison distances, choose -#' \code{'euclidean'}. Currently, tree-based methods like \code{'unifrac'} and -#' \code{'wunifrac'} are not supported. +#' accepted by the [phyloseq::distance()] function. Default is +#' 'bray'. If 'jaccard' is used, adds the `binary = TRUE` argument. If +#' you have taken a clr transform of your data and want Aitchison distances, +#' choose `'euclidean'`. Currently, tree-based methods like +#' `'unifrac'` and `'wunifrac'` are not supported. #' @param ord_meth The ordination method. Must be one of the methods accepted by -#' the \code{phyloseq::ordinate()} function. Default is 'PCoA'. Make sure the -#' ordination method you choose is appropriate for your distance method. -#' @param scree_only If \code{TRUE}, this function will print the scree plot of +#' the [phyloseq::ordinate()] function. Default is 'PCoA'. Make sure +#' the ordination method you choose is appropriate for your distance method. +#' @param scree_only If `TRUE`, this function will print the scree plot of #' the requested ordination and then exit. Good for deciding how many axes you -#' care about. Default is \code{FALSE}. +#' care about. Default is `FALSE`. #' @param axes A vector of integers indicating which ordination axes to include -#' in the data frame. Defaults to \code{1:4}. +#' in the data frame. Defaults to `1:4`. #' @export make_ord_df = function(physeq, dist_meth = 'bray', ord_meth = 'PCoA', scree_only = FALSE, axes = 1:4){ @@ -92,12 +92,12 @@ make_ord_df = function(physeq, dist_meth = 'bray', ord_meth = 'PCoA', #' Make a data frame of the ordination if the ordination method is RDA. Not #' exported. +#' @section Description: Take a phyloseq object and generate a data frame of the +#' distance-based ordination of the samples for plotting when the ordination +#' method was RDA. Used internally by [make_ord_df()]. #' -#' Take a phyloseq object and generate a data frame of the distance-based -#' ordination of the samples for plotting when the ordination method was -#' RDA. Used internally by \code{make_ord_df()} -#' -#' @param ord The ordination object generated by \code{phyloseq::ordinate()}. +#' @param ord The ordination object generated by +#' [phyloseq::ordinate()]. #' @param physeq The phyloseq object we're making things from #' @param axes A numeric vector of the axes to plot make_rda_df = function(ord, physeq, axes){ @@ -129,9 +129,9 @@ make_rda_df = function(ord, physeq, axes){ #' #' Take a phyloseq objet and generate a data frame of the distance-based #' ordination of the samples for plotting when the ordination method was PCoA. -#' Used internally by \code{make_ord_df()} +#' Used internally by `make_ord_df()` #' -#' @param ord The ordination object generated by \code{phyloseq::ordinate()}. +#' @param ord The ordination object generated by `phyloseq::ordinate()`. #' @param physeq The phyloseq object we're making things from. #' @param axes A numeric vector of the axes to include. make_pcoa_df = function(ord, physeq, axes){ diff --git a/R/prop_tax_down.R b/R/prop_tax_down.R index a674328..127af94 100644 --- a/R/prop_tax_down.R +++ b/R/prop_tax_down.R @@ -1,9 +1,9 @@ ##### prop_tax_down ------------------------------------------------------------ -#' Propagate taxon information down an entire \code{tax_table} in a phylose +#' Propagate taxon information down an entire `tax_table` in a phylose #' object #' -#' \code{prop_tax_down} Takes the taxon assignment from the lowest assigned +#' `prop_tax_down` Takes the taxon assignment from the lowest assigned #' level in an OTU's assignment and fills it in to all the lower, unresolved #' fields. #' @@ -15,12 +15,12 @@ #' fields with the string 'o_Enterobacteriales', so they are not parsed as NAs #' by ggplot and other functions. #' -#' @section Value: A phyloseq object whose \code{tax_table} has had NAs replaced +#' @section Value: A phyloseq object whose `tax_table` has had NAs replaced #' with the higher level taxon assignments. #' -#' @param physeq a phyloseq object with a filled \code{tax_table} slot. +#' @param physeq a phyloseq object with a filled `tax_table` slot. #' @param indic a flag to indicate if the taxon names have level indicators. If -#' FALSE, they are added. Just gets passed to \code{\link{prop_tax_tab}}. +#' FALSE, they are added. Just gets passed to [prop_tax_tab()]. #' @param dbig a flag to indicate whether genus names that exist in multiple #' families should be disambiguated by appending the family name. #' @export @@ -36,6 +36,11 @@ prop_tax_down = function(physeq, indic, dbig = TRUE){ colnames(tt) = colnames(phyloseq::tax_table(physeq)) rownames(tt) = sp + # If there are no unspecified taxa, stop + if (!any(is.na(tt))){ + return(physeq) + } + tt = prop_tax_tab(tt, indic) phyloseq::tax_table(physeq) = tt @@ -52,7 +57,7 @@ prop_tax_down = function(physeq, indic, dbig = TRUE){ #' Check for any taxa that have the same name but are in different higher-level #' classifications, and append the family name #' -#' @section Details: \code{dbig_genera} takes a phyloseq object with a +#' @section Details: `dbig_genera` takes a phyloseq object with a #' taxonomy table, disamibugates the genera by appending family names to any #' genus names that are found in multiple families in this data set, and #' returns the object with an update tax_table() object. The input phyloseq @@ -108,9 +113,9 @@ dbig_genera = function(physeq){ ##### prop_tax_tab ------------------------------------------------------------- -#' Propagate taxon information down an entire \code{tax_table} object +#' Propagate taxon information down an entire `tax_table` object #' -#' \code{prop_tax_tab} Takes the taxon assignment from the lowest assigned level +#' `prop_tax_tab` Takes the taxon assignment from the lowest assigned level #' in an OTU's assignment and fills it in to all the lower, unresolved fields. #' #' @section Details: Often taxon cannot be assigned all the way down to genus @@ -121,12 +126,12 @@ dbig_genera = function(physeq){ #' fields with the string 'o_Enterobacteriales', so they are not parsed as NAs #' by ggplot and other functions. #' -#' @section Value: A \code{tax_table} object with NAs replaced with higher-level +#' @section Value: A `tax_table` object with NAs replaced with higher-level #' taxon assignment #' -#' @param taxtab a \code{tax_table} object +#' @param taxtab a `tax_table` object #' @param indic a flag to indicate if the taxon names have level indicators. If -#' FALSE, they are added. Just gets passed to \code{\link{prop_tax_row}}. +#' FALSE, they are added. Just gets passed to [prop_tax_row()]. #' #' @keywords internal prop_tax_tab = function(taxtab, indic){ @@ -144,9 +149,9 @@ prop_tax_tab = function(taxtab, indic){ ##### prop_tax_row ------------------------------------------------------------- -#' Propagate taxon information in a single row of a \code{tax_table} object. +#' Propagate taxon information in a single row of a `tax_table` object. #' -#' \code{prop_tax_row} Takes the taxon assignment from the lowest assigned level +#' `prop_tax_row` Takes the taxon assignment from the lowest assigned level #' in an OTU's assignment and fills it in to all the lower, unresolved fields. #' #' @section Details: Often taxon cannot be assigned all the way down to genus @@ -157,7 +162,7 @@ prop_tax_tab = function(taxtab, indic){ #' fields with the string 'o_Enterobacteriales', so they are not parsed as NAs #' by ggplot and other functions. #' -#' @section Value: A single row \code{tax_table} object to with NAs replaced +#' @section Value: A single row `tax_table` object to with NAs replaced #' with higher-level taxon assignments #' #' @param taxrow A single row in a taxon table diff --git a/R/utils.R b/R/utils.R index 961bde7..6bfc419 100644 --- a/R/utils.R +++ b/R/utils.R @@ -2,7 +2,14 @@ #' Order the levels of one factor by the values of another #' -#' \code{order_levs()} takes two factors, the first of which has values that are +#' @description `r lifecycle::badge("deprecated")` +#' +#' This function will be deprecated because it is just recapitulating +#' functionality provided by `ggplot2::facet_grid()`. If you are using this +#' function and don't want it it go away, please get in touch and let me know +#' what you're using it for. +#' +#' `order_levs()` takes two factors, the first of which has values that are #' nested within the values of the second, and orders the levels of the first #' factor such that they are clustered within the second factor. #' @@ -10,10 +17,11 @@ #' re-ordered. #' #' @param f1 The factor to re-order. Levels of this factor must be nested within -#' \code{f2} -#' @param f2 The factor to use when re-ordering \code{f1}. +#' `f2` +#' @param f2 The factor to use when re-ordering `f1`. #' @export order_levs = function(f1,...){ + lifecycle::deprecate_warn('0.1.0', 'order_levs()') # if (is.numeric(f2)){ # ord = order(...) diff --git a/benchmark/data.RData b/benchmark/data.RData new file mode 100644 index 0000000..8a71486 Binary files /dev/null and b/benchmark/data.RData differ diff --git a/benchmark/expectations/bad_plt.pdf b/benchmark/expectations/bad_plt.pdf new file mode 100644 index 0000000..04413cd Binary files /dev/null and b/benchmark/expectations/bad_plt.pdf differ diff --git a/benchmark/expectations/bot_plt.pdf b/benchmark/expectations/bot_plt.pdf new file mode 100644 index 0000000..4aa6eea Binary files /dev/null and b/benchmark/expectations/bot_plt.pdf differ diff --git a/benchmark/expectations/class_plt.pdf b/benchmark/expectations/class_plt.pdf new file mode 100644 index 0000000..7758a6a Binary files /dev/null and b/benchmark/expectations/class_plt.pdf differ diff --git a/benchmark/expectations/fam_plt.pdf b/benchmark/expectations/fam_plt.pdf new file mode 100644 index 0000000..c08deec Binary files /dev/null and b/benchmark/expectations/fam_plt.pdf differ diff --git a/benchmark/expectations/gen_plt.pdf b/benchmark/expectations/gen_plt.pdf new file mode 100644 index 0000000..f04dc4b Binary files /dev/null and b/benchmark/expectations/gen_plt.pdf differ diff --git a/benchmark/expectations/king_plt.pdf b/benchmark/expectations/king_plt.pdf new file mode 100644 index 0000000..9d8afe9 Binary files /dev/null and b/benchmark/expectations/king_plt.pdf differ diff --git a/benchmark/expectations/left_plt.pdf b/benchmark/expectations/left_plt.pdf new file mode 100644 index 0000000..200b6f3 Binary files /dev/null and b/benchmark/expectations/left_plt.pdf differ diff --git a/benchmark/expectations/ltz_plt.pdf b/benchmark/expectations/ltz_plt.pdf new file mode 100644 index 0000000..63a15b1 Binary files /dev/null and b/benchmark/expectations/ltz_plt.pdf differ diff --git a/benchmark/expectations/mn_dn_frgt_plt.pdf b/benchmark/expectations/mn_dn_frgt_plt.pdf new file mode 100644 index 0000000..80f670c Binary files /dev/null and b/benchmark/expectations/mn_dn_frgt_plt.pdf differ diff --git a/benchmark/expectations/mn_dn_plt.pdf b/benchmark/expectations/mn_dn_plt.pdf new file mode 100644 index 0000000..0d74f65 Binary files /dev/null and b/benchmark/expectations/mn_dn_plt.pdf differ diff --git a/benchmark/expectations/mn_frgt_plt.pdf b/benchmark/expectations/mn_frgt_plt.pdf new file mode 100644 index 0000000..b17a57e Binary files /dev/null and b/benchmark/expectations/mn_frgt_plt.pdf differ diff --git a/benchmark/expectations/mn_plt.pdf b/benchmark/expectations/mn_plt.pdf new file mode 100644 index 0000000..2358694 Binary files /dev/null and b/benchmark/expectations/mn_plt.pdf differ diff --git a/benchmark/expectations/nm_gd_plt.pdf b/benchmark/expectations/nm_gd_plt.pdf new file mode 100644 index 0000000..e295445 Binary files /dev/null and b/benchmark/expectations/nm_gd_plt.pdf differ diff --git a/benchmark/expectations/nmbd_col_plt.pdf b/benchmark/expectations/nmbd_col_plt.pdf new file mode 100644 index 0000000..5baf064 Binary files /dev/null and b/benchmark/expectations/nmbd_col_plt.pdf differ diff --git a/benchmark/expectations/none_plt.pdf b/benchmark/expectations/none_plt.pdf new file mode 100644 index 0000000..f40ab51 Binary files /dev/null and b/benchmark/expectations/none_plt.pdf differ diff --git a/benchmark/expectations/null_plt.pdf b/benchmark/expectations/null_plt.pdf new file mode 100644 index 0000000..9b0484d Binary files /dev/null and b/benchmark/expectations/null_plt.pdf differ diff --git a/benchmark/expectations/ord_plt.pdf b/benchmark/expectations/ord_plt.pdf new file mode 100644 index 0000000..afaa3df Binary files /dev/null and b/benchmark/expectations/ord_plt.pdf differ diff --git a/benchmark/expectations/phy_plt.pdf b/benchmark/expectations/phy_plt.pdf new file mode 100644 index 0000000..dbb068b Binary files /dev/null and b/benchmark/expectations/phy_plt.pdf differ diff --git a/benchmark/expectations/right_plt.pdf b/benchmark/expectations/right_plt.pdf new file mode 100644 index 0000000..5694722 Binary files /dev/null and b/benchmark/expectations/right_plt.pdf differ diff --git a/benchmark/expectations/rot_plt.pdf b/benchmark/expectations/rot_plt.pdf new file mode 100644 index 0000000..7cd12f5 Binary files /dev/null and b/benchmark/expectations/rot_plt.pdf differ diff --git a/benchmark/expectations/sml_plt.pdf b/benchmark/expectations/sml_plt.pdf new file mode 100644 index 0000000..203441b Binary files /dev/null and b/benchmark/expectations/sml_plt.pdf differ diff --git a/benchmark/expectations/spec_plt.pdf b/benchmark/expectations/spec_plt.pdf new file mode 100644 index 0000000..a89b033 Binary files /dev/null and b/benchmark/expectations/spec_plt.pdf differ diff --git a/benchmark/expectations/test_plot_read_depth.html b/benchmark/expectations/test_plot_read_depth.html new file mode 100644 index 0000000..1f92a3f --- /dev/null +++ b/benchmark/expectations/test_plot_read_depth.html @@ -0,0 +1,540 @@ + + + + +
+ + + + + + + + +# Import Data
+
+library(tidyverse)
+## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+## ✔ dplyr 1.1.2 ✔ readr 2.1.4
+## ✔ forcats 1.0.0 ✔ stringr 1.5.0
+## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
+## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
+## ✔ purrr 1.0.1
+## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+## ✖ dplyr::filter() masks stats::filter()
+## ✖ dplyr::lag() masks stats::lag()
+## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+theme_set(theme_bw())
+library(phyloseq)
+devtools::load_all()
+## ℹ Loading AfterSl1p
+load('~/Projects/Mine/aftersl1p/benchmark/data.RData')
+
+seqs = rownames(asv_tab)
+all(seqs == rownames(tax_tab))
+## [1] TRUE
+rownames(asv_tab) = rownames(tax_tab) = NULL
+head(map_tab)
+## SampleID Source
+## 1 SHCM32 Stool
+## 2 SHCM33 Stool
+## 3 SHCM34 Stool
+## 4 SHCM35 Stool
+## 5 MikeAnested Swab
+## 6 MikeBnested Swab
+map_tab = (map_tab
+ %>% mutate(ContVar = c(13.6, 8.3, 2.8, 15.5, 10, 6.6, 13.0, 11.5)))
+rownames(map_tab) = map_tab$SampleID
+
+setdiff(colnames(asv_tab), rownames(map_tab))
+## character(0)
+asv_tab = as.matrix(asv_tab)
+tax_tab = as.matrix(tax_tab)
+dat = phyloseq(otu_table(asv_tab, taxa_are_rows = TRUE),
+ tax_table(tax_tab),
+ sample_data(map_tab))
+
+# Clean Data
+dat
+## phyloseq-class experiment-level object
+## otu_table() OTU Table: [ 830 taxa and 8 samples ]
+## sample_data() Sample Data: [ 8 samples by 3 sample variables ]
+## tax_table() Taxonomy Table: [ 830 taxa by 7 taxonomic ranks ]
+dat = prop_tax_down(dat, indic = FALSE)
+dat_rel = transform_sample_counts(dat, function(x) x/sum(x))
+keep = taxa_sums(dat)/nsamples(dat) >= 100
+dat_filt = prune_taxa(keep, dat)
+dat_rel_filt = prune_taxa(keep, dat_rel)
+
+col_vect = c("#E41A1C","#377EB8","#4DAF4A","#984EA3","#FF7F00", 'dodgerblue2',
+ 'goldenrod3', 'pink')
+plt_read_depth(dat_filt)
+
+plt_read_depth(dat_filt, xvar = 'Source')
+
+plt_read_depth(dat_filt, cvar = 'ContVar')
+
+undebug(plt_read_depth)
+## Warning in undebug(plt_read_depth): argument is not being debugged
+plt_read_depth(dat_filt, cvar = 'Source', xvar = 'SampleID')
+
+plt_read_depth(dat_filt, cvar = 'Source', xvar = 'Source')
+
+plt_read_depth(dat_filt, cvar = 'SampleID')
+
+plt_read_depth(dat_filt, clrs = col_vect[1])
+
+devtools::load_all()
+## ℹ Loading AfterSl1p
+plt_read_depth(dat_filt, clrs = col_vect)
+## Warning: With no cvar specified, only the first colour in "clrs" will be used.
+
+Raises an error. See testthat
tests
col_hl = col_vect[1:2]
+names(col_hl) = c('low','high')
+col_hl
+## low high
+## "#E41A1C" "#377EB8"
+plt_read_depth(dat_filt, cvar = 'ContVar', clrs = col_hl)
+
+devtools::load_all()
+## ℹ Loading AfterSl1p
+plt_read_depth(dat_filt, cvar = 'SampleID', clrs = col_vect)
+
+# Import Data
+
+library(tidyverse)
+## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+## ✔ dplyr 1.1.1 ✔ readr 2.1.4
+## ✔ forcats 1.0.0 ✔ stringr 1.5.0
+## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
+## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
+## ✔ purrr 1.0.1
+## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+## ✖ dplyr::filter() masks stats::filter()
+## ✖ dplyr::lag() masks stats::lag()
+## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+theme_set(theme_bw())
+library(phyloseq)
+devtools::load_all('~/Projects/Mine/aftersl1p/')
+## ℹ Loading AfterSl1p
+load('~/Projects/Mine/aftersl1p/benchmark/data.RData')
+
+seqs = rownames(asv_tab)
+all(seqs == rownames(tax_tab))
+## [1] TRUE
+rownames(asv_tab) = rownames(tax_tab) = NULL
+
+rownames(map_tab) = map_tab$SampleID
+
+setdiff(colnames(asv_tab), rownames(map_tab))
+## character(0)
+asv_tab = as.matrix(asv_tab)
+tax_tab = as.matrix(tax_tab)
+dat = phyloseq(otu_table(asv_tab, taxa_are_rows = TRUE),
+ tax_table(tax_tab),
+ sample_data(map_tab))
+
+# Clean Data
+dat
+## phyloseq-class experiment-level object
+## otu_table() OTU Table: [ 830 taxa and 8 samples ]
+## sample_data() Sample Data: [ 8 samples by 2 sample variables ]
+## tax_table() Taxonomy Table: [ 830 taxa by 7 taxonomic ranks ]
+dat = prop_tax_down(dat, indic = FALSE)
+dat_rel = transform_sample_counts(dat, function(x) x/sum(x))
+keep = taxa_sums(dat)/nsamples(dat) >= 100
+dat_filt = prune_taxa(keep, dat)
+dat_rel_filt = prune_taxa(keep, dat_rel)
+king_df = make_phy_df(dat_rel_filt, 'Kingdom', prop = FALSE)
+king_plt = plot_tax_bar(king_df, 'Kingdom', sample = 'SampleID')
+king_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/king_plt.pdf',
+ width = 10, height = 5)
+phy_df = make_phy_df(dat_rel_filt, 'Phylum', prop = FALSE)
+phy_plt = plot_tax_bar(phy_df, 'Phylum', sample = 'SampleID')
+phy_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/phy_plt.pdf',
+ width = 10, height = 5)
+
+plot_tax_bar(phy_df, 'Kingdom', sample = 'SampleID')
+
+class_df = make_phy_df(dat_rel_filt, 'Class', prop = FALSE)
+class_plt = plot_tax_bar(class_df, 'Class', sample = 'SampleID')
+class_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/class_plt.pdf',
+ width = 10, height = 5)
+plot_tax_bar(class_df, 'Phylum', sample = 'SampleID')
+
+plot_tax_bar(class_df, 'Kingdom', sample = 'SampleID')
+
+ord_df = make_phy_df(dat_rel_filt, 'Order', prop = FALSE)
+ord_plt = plot_tax_bar(ord_df, 'Order', sample = 'SampleID')
+ord_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/ord_plt.pdf',
+ width = 10, height = 5)
+plot_tax_bar(ord_df, 'Class', sample = 'SampleID')
+
+plot_tax_bar(ord_df, 'Phylum', sample = 'SampleID')
+
+plot_tax_bar(ord_df, 'Kingdom', sample = 'SampleID')
+
+fam_df = make_phy_df(dat_rel_filt, 'Family', prop = FALSE)
+fam_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID')
+fam_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/fam_plt.pdf',
+ width = 10, height = 5)
+plot_tax_bar(fam_df, 'Order', sample = 'SampleID')
+
+plot_tax_bar(fam_df, 'Class', sample = 'SampleID')
+
+plot_tax_bar(fam_df, 'Phylum', sample = 'SampleID')
+
+plot_tax_bar(fam_df, 'Kingdom', sample = 'SampleID')
+
+gen_df = make_phy_df(dat_rel_filt, 'Genus', prop = FALSE)
+gen_plt = plot_tax_bar(gen_df, 'Genus', sample = 'SampleID')
+gen_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/gen_plt.pdf',
+ width = 10, height = 5)
+plot_tax_bar(gen_df, 'Family', sample = 'SampleID')
+
+plot_tax_bar(gen_df, 'Order', sample = 'SampleID')
+
+plot_tax_bar(gen_df, 'Class', sample = 'SampleID')
+
+plot_tax_bar(gen_df, 'Phylum', sample = 'SampleID')
+
+plot_tax_bar(gen_df, 'Kingdom', sample = 'SampleID')
+
+spec_df = make_phy_df(dat_rel_filt, 'Species', prop = FALSE)
+spec_plt = plot_tax_bar(spec_df, 'Species', sample = 'SampleID')
+spec_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/spec_plt.pdf',
+ width = 10, height = 5)
+plot_tax_bar(spec_df, 'Genus', sample = 'SampleID')
+
+plot_tax_bar(spec_df, 'Family', sample = 'SampleID')
+
+plot_tax_bar(spec_df, 'Order', sample = 'SampleID')
+
+plot_tax_bar(spec_df, 'Class', sample = 'SampleID')
+
+plot_tax_bar(spec_df, 'Phylum', sample = 'SampleID')
+
+plot_tax_bar(spec_df, 'Kingdom', sample = 'SampleID')
+
+null_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = NULL)
+null_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/null_plt.pdf',
+ width = 10, height = 5)
+too_big_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID',
+ leglen = n_distinct(fam_df$Family) + 2)
+## Warning: leglen is greater than the number of unique taxa. showing all taxa.
+too_big_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/too_big_plt.pdf',
+ width = 10, height = 5)
+sml_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID',
+ leglen = round(n_distinct(fam_df$Family)/2))
+sml_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/sml_plt.pdf',
+ width = 10, height = 5)
+zero_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID',
+ leglen = 0)
+zero_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/zero_plt.pdf',
+ width = 10, height = 5)
+ltz_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID',
+ leglen = -1)
+## Warning: leglen can not be negative. treating it like 0.
+ltz_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/ltz_plt.pdf',
+ width = 10, height = 5)
+This raises an error. Test is via testthat
top_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10,
+ legloc = 'top')
+top_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/top_plt.pdf',
+ width = 10, height = 5)
+bot_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10,
+ legloc = 'bottom')
+bot_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/bot_plt.pdf',
+ width = 10, height = 5)
+right_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10,
+ legloc = 'right')
+right_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/right_plt.pdf',
+ width = 10, height = 5)
+left_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10,
+ legloc = 'left')
+left_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/left_plt.pdf',
+ width = 10, height = 5)
+none_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10,
+ legloc = 'none')
+none_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/none_plt.pdf',
+ width = 10, height = 5)
+bad_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10,
+ legloc = 3)
+bad_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/bad_plt.pdf',
+ width = 10, height = 5)
+cols = c("#E41A1C","#377EB8","#4DAF4A","#984EA3","#FF7F00","#FFFF33","#A65628",
+ "#F781BF")
+nmcols = cols
+names(nmcols) = c('Other',
+ rev(levels(class_df$Class)[-1]))
+nm_gd_plt = plot_tax_bar(class_df, 'Class', colours = nmcols,
+ sample = 'SampleID')
+nm_gd_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/nm_gd_plt.pdf',
+ width = 10, height = 5)
+This raises an error. Test is via testthat
.
nmbd_col_plt = plot_tax_bar(class_df, 'Class', colours = nmcols[1:3],
+ sample = 'SampleID')
+## Warning: The supplied colour vector is shorter than the number of distinct
+## taxa.
+nmbd_col_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/nmbd_col_plt.pdf',
+ width = 10, height = 5)
+un_col_plt = plot_tax_bar(class_df, 'Class', colours = cols,
+ sample = 'SampleID')
+un_col_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/un_col_plt.pdf',
+ width = 10, height = 5)
+unbd_col_plt = plot_tax_bar(class_df, 'Class', colours = cols[1:3],
+ sample = 'SampleID')
+## Warning: The supplied colour vector is shorter than the number of distinct
+## taxa.
+unbd_col_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/unbd_col_plt.pdf',
+ width = 10, height = 5)
+rot_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', r_ticks = TRUE)
+rot_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/rot_plt.pdf',
+ width = 10, height = 5)
+mn_plt = plot_tax_bar(fam_df, 'Family', sample = 'Source', means = TRUE)
+mn_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/mn_plt.pdf',
+ width = 10, height = 5)
+mn_dn_plt = plot_tax_bar(filter(fam_df, SampleID != 'SHCM34'), 'Family',
+ sample = 'Source', means = TRUE)
+mn_dn_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/mn_dn_plt.pdf',
+ width = 10, height = 5)
+mn_frgt_plt = plot_tax_bar(fam_df, 'Family', sample = 'Source')
+## Warning: Your per-sample abundances sum to >1. Did you mean to specify 'means =
+## TRUE'?
+mn_frgt_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/mn_frgt_plt.pdf',
+ width = 10, height = 5)
+mn_dn_frgt_plt = plot_tax_bar(filter(fam_df, SampleID != 'SHCM34'), 'Family',
+ sample = 'Source')
+## Warning: Your per-sample abundances sum to >1. Did you mean to specify 'means =
+## TRUE'?
+mn_dn_frgt_plt
+
+ggsave('~/Projects/Mine/aftersl1p/benchmark/mn_dn_frgt_plt.pdf',
+ width = 10, height = 5)
+