diff --git a/.Rbuildignore b/.Rbuildignore index e157760..65ede9b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,3 +4,5 @@ ^NAMESPACE-old$ ^CHANGELOG\.md$ ^data-raw$ +^benchmark/* +^ignore_me.R$ diff --git a/.gitignore b/.gitignore index e3d674c..5b0e3e9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,4 @@ test_data/* ring_dir private -*.~lock* +ignore_me.R diff --git a/CHANGELOG.md b/CHANGELOG.md index 589cc49..20c5815 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,45 @@ +* 2023-11-21 v0.1.0 + * **BREAKING CHANGES** + * **Completely re-writes `plot_read_depth()`.** + * allows users to plot read depth with a variable on the X axis and + a colour parameter + * users can access the old function with `plt_read_depth()` + temporarily, but this will be removed before the next full release. + * **`rank_abund()` is broken and is no longer exported.** Please file a + bug report if you were using this function. + * exports `order_taxa()`, by request + * makes `prop_tax_down()` slightly more efficient by checking up front if + there is nothing to do. + * deprecates `order_levs()` because it isn't used anywhere. Its intended + function is performed by `order_taxa()`. + * introduces visual and automatic testing of the new `plot_read_depth()` + function. + * in `plot_tax_bar()` + * the `legloc` argument is now passed directly to + `ggplot2::theme(legend.position)` and can take any value that can take. + * added a `r_ticks` argument. FALSE by default (default behaviour + is unchanged). If TRUE, the tick text on the x-axis is rotated 90 + degrees and reads down to up. + * introduced improved functionality when a custom colour vector is used, + with and without names + * introduce a `leglen` option to allow the user to limit how many taxa + are displayed in the legend without removing any taxa from the plot. + * soft-deprecate the `yscale` argument. Will stop supporting non-linear + y-axes soon + * improve error when the `rank` argument is missing from the input dat + frame + * introduce a warning when the per-sample abundaces sum to greater than + 1 but the `mean` argument is not set to `TRUE`. + * prep the function so I can stop exporting the whole `ggplot2` + namespace + * introduce lifecycle management with the `lifecycle()` package + * start using roxygen2md to use Markdown in documentation. + * introduce the `benchmark` folder which contains "good" plotting outputs + against which new versions of the package can be tested. Created a .Rmd + file in that folder which tests `plot_tax_bar()`. + * remove the files that held the old colour vectors + + * 2023-04-14 v0.0.1 (was v1.0.1) * The multiple colour vectors have been replaced with a single object, `tax_colours`, which will cycle if there are more than 30 taxa. Having more diff --git a/DESCRIPTION b/DESCRIPTION index eb15cac..7f6285c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: AfterSl1p Type: Package Title: Generate Summary Graphics and Basic Analysis of 16s Data -Version: 0.0.1 +Version: 0.1.0 Author: J. C. Szamosi and Shahrokh Shekarriz Authors@R: c(person("JC", "Szamosi", email = "szamosjc@mcmaster.ca", role = c('aut','cre')), @@ -19,6 +19,7 @@ Depends: Imports: dplyr (>= 0.7.2), ggplot2 (>= 2.2.1), + lifecycle, phyloseq (>= 1.19.1), pipeR, rlang (>= 0.1.2), @@ -28,3 +29,4 @@ RoxygenNote: 7.2.3 Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 +Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index 50eea04..ba0d364 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,12 +4,15 @@ export(long_distance_df) export(make_ord_df) export(make_phy_df) export(order_levs) +export(order_taxa) export(plot_read_depth) export(plot_tax_bar) export(plt_ord) +export(plt_read_depth) export(prop_tax_down) -export(rank_abund) export(rotate_ticks) +export(tax_colours) import(ggplot2) import(pipeR) import(rlang) +importFrom(lifecycle,deprecated) diff --git a/R/AfterSl1p-package.R b/R/AfterSl1p-package.R index 925e6d4..94f1515 100644 --- a/R/AfterSl1p-package.R +++ b/R/AfterSl1p-package.R @@ -2,8 +2,9 @@ "_PACKAGE" ## usethis namespace: start -#' @import rlang -#' @import pipeR #' @import ggplot2 +#' @import pipeR +#' @import rlang +#' @importFrom lifecycle deprecated ## usethis namespace: end NULL diff --git a/R/data.R b/R/data.R index b23da31..33a183d 100644 --- a/R/data.R +++ b/R/data.R @@ -2,13 +2,15 @@ #' #' A vector of colours for use with taxa bar charts. #' -#' @section Details: There are 30, and grey will be added by the -#' \code{plot_tax_bar} function for the "Other" category. If a plot calls for -#' more than 30 colours, this will just recycle. That is usually fine because -#' low-abundance stuff can't be seen anyway, but if you have a situation where -#' you have more than 30 things that actually need to be distinguished, you'll -#' need to provide your own vector. Also, if you are in that situation, try to -#' stop. +#' @section Details: There are 30 colours, and grey will be added by the +#' [plot_tax_bar()] function for the "Other" category. If a plot +#' calls for more than 30 colours, this will just recycle. That is usually +#' fine because low-abundance stuff can't be seen anyway, but if you have a +#' situation where you have more than 30 things that actually need to be +#' distinguished, you'll need to provide your own vector. Also, if you are in +#' that situation, try to find another way to do what you are doing. People +#' cannot generally distinguish anywhere near 30 colours on a single plot. +#' @export tax_colours = c("#87c5ab","#eea27c","#a9a8d2","#ffff99","#9999ff","#fb8072", "#80b1d3","#fdb462","#b3de69","#fccde5","#bc80bd","#ccebc5", "#ffed6f","#a6cee3","#fb9a99","#fdbf6f","#d38d99","#b3b3ff", diff --git a/R/long_distance_df.R b/R/long_distance_df.R index 0c61dca..7170f4e 100644 --- a/R/long_distance_df.R +++ b/R/long_distance_df.R @@ -3,13 +3,13 @@ #' Create a long data frame of among-sample distances #' #' -#' \code{long_distance_df} creates a long data frame of all the pairwise +#' `long_distance_df` creates a long data frame of all the pairwise #' distances from a sample distance matrix (e.g. the output of -#' \code{\link{phyloseq::distance}}) with all the metadata listed for each sample. -#' Allows for easy within- and among-group boxplots, or whatever other -#' comparisons are of interest. +#' [phyloseq::distance()]) with all the metadata listed for +#' each sample. Allows for easy within- and among-group boxplots, or whatever +#' other comparisons are of interest. #' -#' @section Value: A data frame \eqn{N(N-1)} (or \eqn{N^2} if \code{diag = TRUE} +#' @section Value: A data frame \eqn{N(N-1)} (or \eqn{N^2} if `diag = TRUE` #' is set) rows (where N is the number of samples) with sample IDs, metadata, #' and pairwise distances listed for each pair of samples. Sample ID and #' metadata columns have '1' or '2' appended to them so the user can tell @@ -18,22 +18,22 @@ #' names as row and column names. #' @param metadat A data frame or data frame-like object with the data set's #' metadata -#' @param idcol (\code{'X.SampleID'}.) A string. The column in \code{metadat} +#' @param idcol (`'X.SampleID'`.) A string. The column in `metadat` #' that holds the sample names. Sample names should match the row/column namse #' of the distance matrix. If there are samples in the metadata data frame #' that are missing from the distance matrix, they will be excluded with a #' warning. If there are samples in the distance matrix that are missing from #' the metadata, you will get an error. -#' @param diag (\code{FALSE}.) Logical. Whether the diagonal elements (zeros in +#' @param diag (`FALSE`.) Logical. Whether the diagonal elements (zeros in #' a distance matrix) should be included in the long data frame. Defaults to -#' \code{FALSE} because we almost never want them. -#' @param suff (\code{c('1','2')}.) A character vector of length 2. The suffixes +#' `FALSE` because we almost never want them. +#' @param suff (`c('1','2')`.) A character vector of length 2. The suffixes #' to be appended to the metadata column names in the output. The two elements #' must not be identical. -#' @param distcol (\code{'Distance'}.) A string. The desired column name for the +#' @param distcol (`'Distance'`.) A string. The desired column name for the #' distance column in your long data frame. Only here to avoid clashes with #' existing metadata column names. -#' @param baseline (\code{'NULL'}). A dataframe whose column names must also be +#' @param baseline (`'NULL'`). A dataframe whose column names must also be #' column names in the metadat data frame, and whose rows contain a subset of #' the possible values/combinations. If this parameter is used, all the #' samples whose metadata matches a row in this data frame will end up in @@ -79,13 +79,9 @@ long_distance_df = function(dmat, metadat, idcol = 'X.SampleID', diag = FALSE, return(lddf) } - - -## Functions to generate distance bar charts ----------------------------------- - ### lddf_check ----------------------------------------------------------------- -#' Check the inputs of \code{long_distance_df()} +#' Check the inputs of `long_distance_df()` #' #' For internal use only lddf_check = function(dmat, metadat, idcol = 'X.SampleID', diag = FALSE, @@ -131,26 +127,28 @@ lddf_check = function(dmat, metadat, idcol = 'X.SampleID', diag = FALSE, #' Does the actual gathering and spreading without testing assumptions #' -#' \code{lddf_work} Does the actual gathering, spreading, and joining associated -#' with making the lddf, but without checking if the distance matrix is sensible -#' or removing diagonals and repeats. This is for when you know what you're -#' doing and have trimmed your distance matrix down to only what you know you -#' need. Good for permutation tests. +#' `lddf_work` Used internally by `long_distance_df()`. I recommend +#' you use that function unless you really know what you're doing. This function +#' does the actual gathering, spreading, and joining associated with making the +#' lddf, but without checking if the distance matrix is sensible or removing +#' diagonals and repeats. Use this function if you know exactly what you want +#' and have trimmed your distance matrix down to only what you know you need. +#' Good for permutation tests. #' #' @param dmat A distance matrix or other diagonal matrix object with sample #' names as row and column names. #' @param metadat A data frame or data frame-like object with the data set's #' metadata -#' @param idcol (\code{'X.SampleID'}.) A string. The column in \code{metadat} +#' @param idcol (`'X.SampleID'`.) A string. The column in `metadat` #' that holds the sample names. Sample names should match the row/column namse #' of the distance matrix. If there are samples in the metadata data frame #' that are missing from the distance matrix, they will be excluded with a #' warning. If there are samples in the distance matrix that are missing from #' the metadata, you will get an error. -#' @param suff (\code{c('1','2')}.) A character vector of length 2. The suffixes +#' @param suff (`c('1','2')`.) A character vector of length 2. The suffixes #' to be appended to the metadata column names in the output. The two elements #' must not be identical. -#' @param distcol (\code{'Distance'}.) A string. The desired column name for the +#' @param distcol (`'Distance'`.) A string. The desired column name for the #' distance column in your long data frame. Only here to avoid clashes with #' existing metadata column names. lddf_work = function(dmat, metadat, idcol = 'X.SampleID', suff = c('1','2'), diff --git a/R/make_phy_df.R b/R/make_phy_df.R index 4f62b60..c4f7ffe 100644 --- a/R/make_phy_df.R +++ b/R/make_phy_df.R @@ -2,7 +2,7 @@ #' Generate a Data Frame for Taxon Bar Charts #' -#' \code{make_phy_df} generates a data frame that is useful for generating taxon +#' `make_phy_df` generates a data frame that is useful for generating taxon #' bar charts. #' #' @section Details: This function takes a phyloseq object and generates a data @@ -13,8 +13,8 @@ #' abundance, and weird things will happen if it is not. #' #' @section Value: A data frame similar in structure to that generated by -#' \code{psmelt}, but with an 'Other' category added and taxon levels ordered -#' for use in plotting. +#' [phyloseq::psmelt()], but with an 'Other' category added and +#' taxon levels ordered for use in plotting. #' #' @param physeq A phyloseq object. #' @param rank The rank at which to glom taxa. Must be one of 'OTU', 'Genus', @@ -22,14 +22,14 @@ #' @param cutoff The abundance cutoff below which taxa are grouped into 'Other'. #' If you don't want anything grouped into 'Other', set this to 0. Default is #' 0.001. -#' @param indic a flag to indicate if the taxon names have level indicators. -#' If FALSE, they are added. +#' @param indic a flag to indicate if the taxon names have level indicators. If +#' FALSE, they are added. #' @param prop Specifies whether taxa need to be propogated down the taxonomy -#' table (default, TRUE) or if this has already been done. -#' @param count If FALSE (default) the function will expect a relative abundance -#' table and create an 'Other' category for taxa below the cutoff (and will -#' raise an error if the table is not relative abundance). If TRUE, the -#' function will not check for relative abundance and will not create an +#' table (default is `TRUE`) or if this has already been done. +#' @param count If `FALSE` (default) the function will expect a relative +#' abundance table and create an 'Other' category for taxa below the cutoff +#' (and will raise an error if the table is not relative abundance). If TRUE, +#' the function will not check for relative abundance and will not create an #' 'Other' category. #' @export make_phy_df = function(physeq, rank = 'Genus', cutoff = 0.001, indic = FALSE, @@ -41,7 +41,7 @@ make_phy_df = function(physeq, rank = 'Genus', cutoff = 0.001, indic = FALSE, stop('physeq must be a relative abundance table. You have counts > 1.') } ranks = colnames(phyloseq::tax_table(physeq)) - if (rank == 'OTU'){ + if ((rank == 'OTU') & !('OTU' %in% ranks)) { ranks = c(ranks, rank) } @@ -131,7 +131,7 @@ remain = function(x, tot = 1){ #' Order Taxon Name Factors #' -#' \code{order_taxa} reorders the taxon names in a taxon column (e.g. 'Class' or +#' `order_taxa()` reorders the taxon names in a taxon column (e.g. 'Class' or #' 'Phylum') by the taxon's mean abundance (but always makes sure to put Other #' first). #' @@ -139,19 +139,20 @@ remain = function(x, tot = 1){ #' specified column re-ordered by its mean abundance #' #' @param phy_df A data frame of a phyloseq object, as produced by -#' \code{\link{psmelt}} or \code{\link{make_phy_df}}. +#' [phyloseq::psmelt()] or [make_phy_df()]. #' @param rank The name of the column to be re-ordered #' @param abund The name of the abundances column. Defaults to 'Abundance' #' @param decreasing Specifies whether the taxon order should be based on #' decreasing or increasing abundance. Defaults to FALSE. +#' @export order_taxa = function(phy_df, rank, abund = 'Abundance', decreasing = FALSE){ phy_df[,rank] = factor(phy_df[,rank]) - phy_df %>% - dplyr::filter(UQ(sym(rank)) != 'Other') %>% - dplyr::group_by(UQ(sym(rank))) %>% - dplyr::summarize(Tot = sum(UQ(sym(abund)))) %>% - data.frame() -> total_abunds + total_abunds = (phy_df + %>% dplyr::filter(.data[[rank]] != 'Other') + %>% dplyr::group_by(.data[[rank]]) + %>% dplyr::summarize(Tot = sum(.data[[abund]])) + %>% data.frame()) lev_ord = levels(droplevels(total_abunds[,rank])) if (decreasing){ diff --git a/R/plot_read_depth.R b/R/plot_read_depth.R index 699ff45..73938ab 100644 --- a/R/plot_read_depth.R +++ b/R/plot_read_depth.R @@ -1,19 +1,196 @@ -#### plot_read_depth ----------------------------------------------------------- +#### plot_read_depth ------------------------------------------------------------ + +#' View read depth +#' +#' @section Details: Creates a read depth boxplot +#' +#' @section Value: A [ggplot2] object +#' +#' @param physeq A phyloseq object +#' @param xvar `NULL` A variable (column name) from `sample_data(physeq)` by +#' which to group samples on the x-axis. Must be categorical. If `NULL`, no +#' grouping will be performed. +#' @param lin `FALSE` If `TRUE`, the y-axis will be on a linear scale. By +#' default it is log-scaled. +#' @param cvar `NULL` A variable (column name) from `sample_data(physeq)` by +#' which the points should be coloured. If `NULL`, all points will be black. +#' May be continuous or categorical. See the `clrs` argument if you want to +#' specify your colours. +#' @param clrs `NULL` If the `cvar` parameter is set and this is unset, colours +#' will be taken from [tax_colours] if `cvar` is categorical and use +#' [ggplot2]'s default (blue) if `cvar` is continuous. If `cvar` is +#' categorical, you may provide a named colour vector to use instead of +#' [tax_colours] and if it is continuous you may provide a two-colour vector. +#' The vector must have names "low" and "high". If `cvar` is NOT set, you may +#' use this parameter to provide a single colour name or hex code that will be +#' applied to all points. +#' @export +plot_read_depth = function(physeq, xvar = NULL, lin = FALSE, cvar = NULL, + clrs = NULL){ + # Check inputs + sample_sum_df = data.frame(Total = phyloseq::sample_sums(physeq), + phyloseq::sample_data(physeq)) + + prd_check(sample_sum_df, cvar, xvar) + + # Deal with the colour vector + clrs = prd_clrs(sample_sum_df, cvar, clrs) + + + # Construct the plot foundation with or without a grouping variable + if (is.null(xvar)){ + depth_plot = ggplot2::ggplot(sample_sum_df, + ggplot2::aes(x = 'All', y = Total)) + } else { + depth_plot = ggplot2::ggplot(sample_sum_df, + ggplot2::aes(x = .data[[xvar]], y = Total)) + } + + # Add the jittered points, taking colour into account + # No colouring variable + if (is.null(cvar)){ + # And no specified colour + if (is.null(clrs)){ + depth_plot = depth_plot + ggplot2::geom_jitter(width = 0.2) + # And colour is specified + } else { + depth_plot = depth_plot + ggplot2::geom_jitter(width = 0.2, + colour = clrs) + } + # colouring variable is specified and continuous + } else if (is.numeric(sample_sum_df[[cvar]])) { + depth_plot = depth_plot + ggplot2::geom_jitter(width = 0.2, + ggplot2::aes(colour = .data[[cvar]])) + # if clrs not specified, do nothing. defaults will do. + if (is.null(clrs)){ + NULL + # if clrs is specified, use it + } else { + depth_plot = depth_plot + + ggplot2::scale_colour_gradient(low = clrs['low'], + high = clrs['high']) + } + # if colouring variable is specified and categorical + } else { + depth_plot = depth_plot + ggplot2::geom_jitter(width = 0.2, + aes(colour = .data[[cvar]])) + + ggplot2::scale_colour_manual(values = clrs) + } + + if (!lin){ + depth_plot = depth_plot + ggplot2::scale_y_log10() + } + return(depth_plot) +} + +#### prd_check() --------------------------------------------------------------- + +#' Check inputs for plot_read_depth() +#' +#' Checks the inputs of `plot_read_depth()` +#' @param sample_sum_df data frame of sample sums and sample data +#' @param cvar,xvar passed through from `plot_read_depth()` + +prd_check = function(sample_sum_df, cvar, xvar){ + # Are cvar and xvar columns? + if (!is.null(xvar) && !(xvar %in% colnames(sample_sum_df))){ + stop(paste('"xvar" must be one of the columns in sample_data(physeq).')) + } + + if (!is.null(cvar) && !(cvar %in% colnames(sample_sum_df))){ + stop(paste('"cvar" must be one of the columns in sample_data(physeq).')) + } +} + +#### prd_clrs() ---------------------------------------------------------------- + +#' Deal w colour vector for plot_read_depth() +#' +#' @param cvar,clrs passed through from `plot_read_depth()` +prd_clrs = function(sample_sum_df, cvar, clrs){ + # cvar unset, clrs set + if (is.null(cvar) & !is.null(clrs)){ + if (length(clrs) > 1){ + warn(paste('With no cvar specified, only the first colour in "clrs"', + 'will be used.')) + clrs = clrs[1] + return(clrs) + } + } else if (!is.null(cvar)){ + # Built-in colour vector + if (is.null(clrs)){ + # Categorical + if (is.factor(sample_sum_df[[cvar]]) || + is.character(sample_sum_df[[cvar]])){ + n = dplyr::n_distinct(sample_sum_df[[cvar]]) + nx = ceiling(n/length(tax_colours)) + clrs = rep(tax_colours, nx) + return(clrs) + + # continuous requires no action + } else { + return(clrs) + } + # User-specified colour vector + } else { + # Categorical + if (is.factor(sample_sum_df[[cvar]]) | + is.character(sample_sum_df[[cvar]])){ + if (is.null(names(clrs))){ + return(clrs) + # nothing to do here. use it as is + + } else if (!all(unique(sample_sum_df) %in% names(clrs))){ + stop(paste('If "clrs" is named, its names must contain', + 'all the values in the "cvar" column')) + } + # if we get this far, the colour vector is fine. + + return(clrs) + + # Continuous + } else { + if (is.null(names(clrs)) | + !all(c('high','low') %in% names(clrs))){ + stop(paste('With a continuous "cvar", the provided colour', + 'vector "clrs" must have names "high" and "low".')) + } else { + # if we get this far, the colour vector is fine. + return(clrs) + } + return(clrs) + } + return(clrs) + } + return(clrs) + } + return(clrs) +} + +#### plt_read_depth ----------------------------------------------------------- #' Plot read depth #' -#' Creates a read depth histogram. +#' @describeIn +#' `r lifecycle::badge("deprecated")` +#' +#' Creates a read depth histogram. Deprecated in favour of `plot_read_depth()` #' @param physeq A phyloseq object #' @export -plot_read_depth = function(physeq){ - sample_sum_df = data.frame(Total = sample_sums(physeq), - sample_data(physeq)) - depth_plot = ggplot(sample_sum_df, aes(x = Total)) + - geom_histogram(colour = 'black', fill = 'grey57') + - scale_x_log10() + - xlab('Log10 read depth') + - ylab('Number of samples') + - ggtitle('Distribution of read depths') + - theme_bw() +plt_read_depth = function(physeq){ + lifecycle::deprecate_soft('0.1.0.', 'plt_read_depth()', + 'plot_read_depth()', + details = paste("This is the old version of", + "plot_read_depth(), and will be", + "removed soon.")) + sample_sum_df = data.frame(Total = phyloseq::sample_sums(physeq), + phyloseq::sample_data(physeq)) + depth_plot = ggplot2::ggplot(sample_sum_df, aes(x = Total)) + + ggplot2::geom_histogram(colour = 'black', fill = 'grey57') + + ggplot2::scale_x_log10() + + ggplot2::xlab('Log10 read depth') + + ggplot2::ylab('Number of samples') + + ggplot2::ggtitle('Distribution of read depths') + + ggplot2::theme_bw() return(depth_plot) } diff --git a/R/plot_tax_bar.R b/R/plot_tax_bar.R index 305af5d..3a102ca 100644 --- a/R/plot_tax_bar.R +++ b/R/plot_tax_bar.R @@ -2,8 +2,8 @@ #' Create a Taxa Bar Chart #' -#' \code{plot_tax_bar()} creates a taxa bar chart from the data frame generated -#' by \code{\link{make_phy_df}}. +#' `plot_tax_bar()` creates a taxa bar chart from the data frame generated +#' by [make_phy_df()]. #' #' @section Details: This function generates a ggplot object that is a #' first-pass, reasonable attempt at a taxon bar chart. The taxa are ordered @@ -14,31 +14,41 @@ #' @section Value: A ggplot object. #' #' @param tax_df The data frame used for plotting. Unless you really know what -#' you're doing, use the data frame output by \code{\link{make_phy_df}}. +#' you're doing, use the data frame output by [make_phy_df()]. #' @param rank The taxonomic rank at which to view the data. Must be one of #' 'Genus', 'Family', 'Order', 'Class', 'Phylum'. May not be a lower rank than -#' the rank given to \code{\link{make_phy_df}}. +#' the rank given to [make_phy_df()]. #' @param colours A character vector of colour names or hex values. Must have #' enough colours to accommodate all your taxa at the appropriate rank. If you #' don't provide one, there are a few internal vectors that have 21, 31, 60, #' or 70 colours that the function will try to use. -#' @param sample \code{'X.SampleID'} The name of the sample column in the data +#' @param sample `'X.SampleID'` The name of the sample column in the data #' frame. -#' @param abund \code{'Abundance'} The name of the abundance column in the data +#' @param abund `'Abundance'` The name of the abundance column in the data #' frame. -#' @param legloc \code{'right'} Location of the legend. Can be 'right' -#' (default), 'bottom', or 'none' (absent) -#' @param yscale \code{'lin'} Can be either 'lin' or 'sqrt'. The 'sqrt' plot can -#' look weird. -#' @param means \code{FALSE} If \code{TRUE}, sets \code{position = fill} in the -#' \code{geom_bar()} to constrain the abundances to sum to 1. Good to use if -#' your \code{sample = } parameter is not actually sample names, but rather -#' larger categories, to produce a plot of category means. +#' @param legloc `'right'` Location of the legend. Passed directly to +#' `ggplot2::theme(legend.position = legloc)` and can be any of "none", +#' "left", "right", "top", or "bottom". If it is anything else, +#' [ggplot2::theme()] will default to "none". +#' @param yscale `r lifecycle::badge("deprecated")` This function no longer +#' supports y-axis scales other than linear. +#' @param means `FALSE` If `TRUE`, sets `position = fill` in the +#' [ggplot2::geom_bar()] to constrain the abundances to sum to 1. +#' Good to use if your `sample = ` parameter is not actually sample +#' names, but rather larger categories, to produce a plot of category means. +#' @param r_ticks `FALSE` If `TRUE` x-axis tickmark text is rotated 90 +#' degrees and read bottom-to-top. #' @export plot_tax_bar = function(taxa_df,rank,colours = NULL, sample = 'X.SampleID', abund = 'Abundance', - legloc = 'right', yscale = 'lin', means = FALSE){ - # Fed by make_phy_df() + legloc = 'right', yscale = 'lin', + means = FALSE, r_ticks = FALSE, leglen = NULL){ + # Lifecycle Management + if (yscale != 'lin'){ + lifecycle::deprecate_warn('0.1.0', 'plot_tax_bar(yscale)', + details = paste('The ability to set non-linear y-axis', + 'scaling will be removed soon.')) + } # Check the inputs if (!(sample %in% names(taxa_df))){ @@ -52,14 +62,55 @@ plot_tax_bar = function(taxa_df,rank,colours = NULL, if (!(yscale %in% c('lin','log','sqrt'))){ stop('yscale argument must be one of \'lin\', \'log\', or \'sqrt\'') } - + if (!(rank %in% names(taxa_df))){ + stop('rank argument must be one of the columns in your data frame') + } # Pick colours num = length(unique(taxa_df[,rank])) if (is.null(colours)){ n = num/length(tax_colours) colours = c('grey69',rev(rep(tax_colours, ceiling(n))[1:num-1])) + names(colours) = levels(taxa_df[,rank]) } else if (is.null(names(colours))) { + if (length(colours) < (dplyr::n_distinct(taxa_df[,rank])-1)){ + warn(paste('The supplied colour vector is shorter than the number', + 'of distinct taxa.')) + } colours = c('grey69',rev(colours[1:(num-1)])) + names(colours) = levels(taxa_df[,rank]) + } else { + if (length(colours) < dplyr::n_distinct(taxa_df[,rank])){ + warn(paste('The supplied colour vector is shorter than the number', + 'of distinct taxa.')) + } + } + + # Check legend length + if (is.null(leglen)){ + leglen = num + } else if (!is.numeric(leglen)) { + stop(paste("leglen must be an integer")) + } else if (leglen > num){ + warn(paste("leglen is greater than the number of unique taxa.", + "showing all taxa.")) + leglen = num + } else if (leglen == 0){ + legloc = 'none' + leglen = num + } else if (leglen < 0){ + warn(paste('leglen can not be negative. treating it like 0.')) + legloc = 'none' + leglen = num + } + + # Check for means + mn_chk = (taxa_df + %>% dplyr::group_by(.[,sample]) + %>% dplyr::summarize(.chksms = sum(Abundance, na.rm = TRUE))) + tol = 1e-5 + if (any(mn_chk$.chksms - 1 > tol) & !means){ + warn(paste('Your per-sample abundances sum to >1. Did you mean to', + 'specify \'means = TRUE\'?')) } # Make sure the x axis is categorical @@ -68,42 +119,59 @@ plot_tax_bar = function(taxa_df,rank,colours = NULL, # If the colour vector is named, make sure it is ordered like the factor # it's colouring if (!is.null(names(colours))) { - taxa_df[,rank] = droplevels(taxa_df[,rank]) - if (!all(levels(taxa_df[,rank]) %in% names(colours))){ - stop('The colour vector names need to match the rank levels') + if ((length(colours) < dplyr::n_distinct(taxa_df[,rank])) & + all(names(colours) %in% droplevels(taxa_df[,rank]))){ + NULL + } else { + taxa_df[,rank] = droplevels(taxa_df[,rank]) + if (!all(levels(taxa_df[,rank]) %in% names(colours))){ + stop('The colour vector names need to match the rank levels') + } } colours = colours[levels(taxa_df[,rank])] } - indiv = ggplot(taxa_df, aes_string(x = sample, y = abund, fill = rank, - colour = rank)) + - theme(axis.title.x = element_blank(), - axis.text.x = element_text(size = 10, - angle = 90, - hjust = 1, - vjust = 0.5)) + - scale_fill_manual(values = colours, - guide = guide_legend(reverse = TRUE)) + - scale_colour_manual(values = colours, - guide = guide_legend(reverse = TRUE)) + - ylab(paste("Relative Abundance (",rank,")\n",sep='')) + brks = c('Other', rev(rev(levels(taxa_df[,rank]))[1:(leglen-1)])) + # Generate the plot + indiv = ggplot2::ggplot(taxa_df, + ggplot2::aes(x = .data[[sample]], + y = .data[[abund]], + fill = .data[[rank]], + colour = .data[[rank]])) + + ggplot2::theme_bw() + + # adjust the title and axis text + ggplot2::theme(axis.title.x = ggplot2::element_blank(), + # position the legend + legend.position = legloc) + + # use the right colours + ggplot2::scale_fill_manual(values = colours, + breaks = brks, + guide = ggplot2::guide_legend(reverse = TRUE)) + + ggplot2::scale_colour_manual(values = colours, + breaks = brks, + guide = ggplot2::guide_legend(reverse = TRUE)) + + # label the axis + ggplot2::ylab(paste("Relative Abundance (",rank,")\n",sep='')) if (means){ - indiv = indiv + geom_bar(stat = 'identity', position = 'fill') + # use mean abundance across a group + indiv = indiv + ggplot2::geom_bar(stat = 'identity', position = 'fill') } else { - indiv = indiv + geom_bar(stat = "identity") + # individual samples + indiv = indiv + ggplot2::geom_bar(stat = "identity") } - if (yscale == 'sqrt') { - indiv = indiv + scale_y_sqrt() + # rotate the ticks + if (r_ticks){ + indiv = indiv + rotate_ticks() } - if (legloc == 'bottom'){ - indiv = indiv + theme(legend.position = 'bottom') - } else if (legloc == 'none') { - indiv = indiv + guides(fill = FALSE, colour = FALSE) + if (yscale == 'sqrt') { + # square-root transform the y-axis. You probably don't want this. It should + # be deprecated. + indiv = indiv + ggplot2::scale_y_sqrt() } - indiv + theme_bw() + return(indiv) } diff --git a/R/plt_ord.R b/R/plt_ord.R index ba66473..9e5f414 100644 --- a/R/plt_ord.R +++ b/R/plt_ord.R @@ -1,13 +1,20 @@ ## Plot the ordination -------------------------------------------------------- ### plt_ord ------------------------------------------------------------------ + #' Plot the ordination #' #' Make a multi-axis PCoA plot from the data frame -#' @param ord_long The ordination data frame produced by \code{make_ord_df} +#' +#' @section Details: +#' +#' @section Value: +#' +#' @param ord_long The ordination data frame produced by +#' [make_ord_df()] #' @param colour The name of the column to use to colour the points. #' @param shape The name of the coloumn governing the shape of the points. -#' @param size The size of the points. Passed to the \code{geom_point()} size -#' argument and defaults to 1. +#' @param size The size of the points. Passed to the +#' [ggplot2::geom_point()] size argument and defaults to 1. #' @param pt_alph The transparency of the points. Default is 0.7 #' @export plt_ord = function(ord_long, colour = NULL, shape = NULL, size = 1, diff --git a/R/prep_plot_rank_ab.R b/R/prep_plot_rank_ab.R index 7995556..337ace5 100644 --- a/R/prep_plot_rank_ab.R +++ b/R/prep_plot_rank_ab.R @@ -2,34 +2,35 @@ #' Rank Taxa by Abundance #' -#' \code{rank_abund()} generates a data frame that's ready to be used by -#' \code{\link{plot_rank_abund}} +#' `rank_abund()` generates a data frame that's ready to be used by +#' [plot_rank_ab()] #' #' @section Value: A data frame whose taxa have been ranked by their mean #' abundance in the user-specified baseline level of some grouping variable or #' variables. #' #' @param phy_df A dataframe of a phyloseq object, like that generated by -#' \code{\link{psmelt}} or \code{\link{make_phy_df}} -#' @param varbs (\code{NULL}) A character vector of grouping variables from -#' which the baseline values are chosen to define the abundance ordering. If -#' it is \code{NULL}, the ordering will be based on mean abundances in the -#' whole data frame. -#' @param bases (\code{NULL}) A character vector of baseline values for the -#' variables given in \code{vars}. The ordering of the taxa will be given -#' based only on the samples with these baseline values for these variables. -#' Must be in the same order as vars. -#' @param abunds (\code{'Abundance'}) The name of the abundance column. -#' @param rank (\code{'Genus'}) The rank to base the ordering on. -#' @param IDcol (\code{'X.SampleID'}) The column name of the sample IDs -#' @export -rank_abund = function(phy_df, varbs = NULL, bases = NULL, abunds = 'Abundance', - rank = 'Genus', IDcol = 'X.SampleID'){ +#' [phyloseq::psmelt()] or [make_phy_df()] +#' @param gvars (`NULL`) A character vector of grouping variables from which the +#' baseline values are chosen to define the abundance ordering. If it is +#' `NULL`, the ordering will be based on mean abundances in the whole data +#' frame. +#' @param bases (`NULL`) A character vector of baseline values for the variables +#' given in `gvars`. The ordering of the taxa will be given based only on the +#' samples with these baseline values for these variables. Must be in the same +#' order as `gvars`. +#' @param abunds (`'Abundance'`) The name of the abundance column. +#' @param rank (`'Genus'`) The rank to base the ordering on. Must be a column in +#' `phy_df` +#' @param IDcol (`'X.SampleID'`) The column name of the sample IDs +#' +rank_abund = function(phy_df, gvars = NULL, bases = NULL, abunds = 'Abundance', + rank = 'Genus', IDcol){ # Set up the groups for the plotting totals rank_abs = df_glom(phy_df, IDcol = IDcol, rank = rank, abunds = abunds) # Subset and order - ranked = subset_order(rank_abs, varbs, bases, rank = rank) + ranked = subset_order(rank_abs, gvars, bases, rank = rank) # Order the bigger data frame by the above ordering lev_ord = levels(ranked[,rank]) @@ -42,22 +43,25 @@ rank_abund = function(phy_df, varbs = NULL, bases = NULL, abunds = 'Abundance', #' Subset and generate taxon ordering #' -#' \code{subset_order} generates a data frame whose taxon column given by -#' \code{rank} has been ranked according to its mean abundance in the -#' \code{abunds} column. Used internally by \code{rank_abund()} +#' `subset_order` generates a data frame whose taxon column given by +#' `rank` has been ranked according to its mean abundance in the +#' `abunds` column. Used internally by +#' [rank_abund()] #' -#' @param phy_df A phyloseq data frame, as generated by \code{psmelt}, but -#' probably generated by \code{\link{df_glom}} or \code{\link{make_phy_df()}}. -#' @param varbs (\code{NULL}) A character vector of grouping variables from +#' @param phy_df A phyloseq data frame, as generated by +#' [phyloseq::psmelt()], but probably generated by +#' [df_glom()] or +#' [make_phy_df()]. +#' @param varbs (`NULL`) A character vector of grouping variables from #' which the baseline values are chosen to define the abundance ordering. If -#' it is \code{NULL}, the ordering will be based on mean abundances in the +#' it is `NULL`, the ordering will be based on mean abundances in the #' whole data frame. -#' @param bases (\code{NULL}) A character vector of baseline values for the -#' variables given in \code{vars}. The ordering of the taxa will be given +#' @param bases (`NULL`) A character vector of baseline values for the +#' variables given in `vars`. The ordering of the taxa will be given #' based only on the samples with these baseline values for these variables. #' Must be in the same order as varbs. -#' @param abund (\code{'Abundance'}) The name of the abundance column. -#' @param rank (\code{'Genus'}) The taxonomic rank to base the ordering on. +#' @param abund (`'Abundance'`) The name of the abundance column. +#' @param rank (`'Genus'`) The taxonomic rank to base the ordering on. subset_order = function(phy_df, varbs = NULL, bases = NULL, rank = 'Genus', abunds = 'TotalAbunds'){ # Check inputs @@ -89,10 +93,10 @@ subset_order = function(phy_df, varbs = NULL, bases = NULL, rank = 'Genus', #' Like tax_glom, but for data frames #' -#' \code{df_glom} take totals within sample at a given taxonomic rank. +#' [df_glom()] take totals within sample at a given taxonomic rank. #' -#' @param phy_df A phyloseq data frame, as generated by \code{\link{psmelt}} or -#' \code{\link{make_phy_df}} +#' @param phy_df A phyloseq data frame, as generated by +#' [phyloseq::psmelt()] or [make_phy_df()] #' @param ranks A character vector with the taxon rank names #' @param IDcol The column name of the sample IDs #' @param rank The taxonomic rank to glom at diff --git a/R/prep_plt_ord.R b/R/prep_plt_ord.R index 7bb91a8..d0fffc3 100644 --- a/R/prep_plt_ord.R +++ b/R/prep_plt_ord.R @@ -13,19 +13,19 @@ #' that the transformation you have used is appropriate for the distance #' method you choose. #' @param dist_meth The distance method to be use. Must be one of the methods -#' accepted by the \code{phyloseq::distance()} function. Default is 'bray'. If -#' 'jaccard' is used, adds the \code{binary = TRUE} argument. If you have -#' taken a clr transform of your data and want Aitchison distances, choose -#' \code{'euclidean'}. Currently, tree-based methods like \code{'unifrac'} and -#' \code{'wunifrac'} are not supported. +#' accepted by the [phyloseq::distance()] function. Default is +#' 'bray'. If 'jaccard' is used, adds the `binary = TRUE` argument. If +#' you have taken a clr transform of your data and want Aitchison distances, +#' choose `'euclidean'`. Currently, tree-based methods like +#' `'unifrac'` and `'wunifrac'` are not supported. #' @param ord_meth The ordination method. Must be one of the methods accepted by -#' the \code{phyloseq::ordinate()} function. Default is 'PCoA'. Make sure the -#' ordination method you choose is appropriate for your distance method. -#' @param scree_only If \code{TRUE}, this function will print the scree plot of +#' the [phyloseq::ordinate()] function. Default is 'PCoA'. Make sure +#' the ordination method you choose is appropriate for your distance method. +#' @param scree_only If `TRUE`, this function will print the scree plot of #' the requested ordination and then exit. Good for deciding how many axes you -#' care about. Default is \code{FALSE}. +#' care about. Default is `FALSE`. #' @param axes A vector of integers indicating which ordination axes to include -#' in the data frame. Defaults to \code{1:4}. +#' in the data frame. Defaults to `1:4`. #' @export make_ord_df = function(physeq, dist_meth = 'bray', ord_meth = 'PCoA', scree_only = FALSE, axes = 1:4){ @@ -92,12 +92,12 @@ make_ord_df = function(physeq, dist_meth = 'bray', ord_meth = 'PCoA', #' Make a data frame of the ordination if the ordination method is RDA. Not #' exported. +#' @section Description: Take a phyloseq object and generate a data frame of the +#' distance-based ordination of the samples for plotting when the ordination +#' method was RDA. Used internally by [make_ord_df()]. #' -#' Take a phyloseq object and generate a data frame of the distance-based -#' ordination of the samples for plotting when the ordination method was -#' RDA. Used internally by \code{make_ord_df()} -#' -#' @param ord The ordination object generated by \code{phyloseq::ordinate()}. +#' @param ord The ordination object generated by +#' [phyloseq::ordinate()]. #' @param physeq The phyloseq object we're making things from #' @param axes A numeric vector of the axes to plot make_rda_df = function(ord, physeq, axes){ @@ -129,9 +129,9 @@ make_rda_df = function(ord, physeq, axes){ #' #' Take a phyloseq objet and generate a data frame of the distance-based #' ordination of the samples for plotting when the ordination method was PCoA. -#' Used internally by \code{make_ord_df()} +#' Used internally by `make_ord_df()` #' -#' @param ord The ordination object generated by \code{phyloseq::ordinate()}. +#' @param ord The ordination object generated by `phyloseq::ordinate()`. #' @param physeq The phyloseq object we're making things from. #' @param axes A numeric vector of the axes to include. make_pcoa_df = function(ord, physeq, axes){ diff --git a/R/prop_tax_down.R b/R/prop_tax_down.R index a674328..127af94 100644 --- a/R/prop_tax_down.R +++ b/R/prop_tax_down.R @@ -1,9 +1,9 @@ ##### prop_tax_down ------------------------------------------------------------ -#' Propagate taxon information down an entire \code{tax_table} in a phylose +#' Propagate taxon information down an entire `tax_table` in a phylose #' object #' -#' \code{prop_tax_down} Takes the taxon assignment from the lowest assigned +#' `prop_tax_down` Takes the taxon assignment from the lowest assigned #' level in an OTU's assignment and fills it in to all the lower, unresolved #' fields. #' @@ -15,12 +15,12 @@ #' fields with the string 'o_Enterobacteriales', so they are not parsed as NAs #' by ggplot and other functions. #' -#' @section Value: A phyloseq object whose \code{tax_table} has had NAs replaced +#' @section Value: A phyloseq object whose `tax_table` has had NAs replaced #' with the higher level taxon assignments. #' -#' @param physeq a phyloseq object with a filled \code{tax_table} slot. +#' @param physeq a phyloseq object with a filled `tax_table` slot. #' @param indic a flag to indicate if the taxon names have level indicators. If -#' FALSE, they are added. Just gets passed to \code{\link{prop_tax_tab}}. +#' FALSE, they are added. Just gets passed to [prop_tax_tab()]. #' @param dbig a flag to indicate whether genus names that exist in multiple #' families should be disambiguated by appending the family name. #' @export @@ -36,6 +36,11 @@ prop_tax_down = function(physeq, indic, dbig = TRUE){ colnames(tt) = colnames(phyloseq::tax_table(physeq)) rownames(tt) = sp + # If there are no unspecified taxa, stop + if (!any(is.na(tt))){ + return(physeq) + } + tt = prop_tax_tab(tt, indic) phyloseq::tax_table(physeq) = tt @@ -52,7 +57,7 @@ prop_tax_down = function(physeq, indic, dbig = TRUE){ #' Check for any taxa that have the same name but are in different higher-level #' classifications, and append the family name #' -#' @section Details: \code{dbig_genera} takes a phyloseq object with a +#' @section Details: `dbig_genera` takes a phyloseq object with a #' taxonomy table, disamibugates the genera by appending family names to any #' genus names that are found in multiple families in this data set, and #' returns the object with an update tax_table() object. The input phyloseq @@ -108,9 +113,9 @@ dbig_genera = function(physeq){ ##### prop_tax_tab ------------------------------------------------------------- -#' Propagate taxon information down an entire \code{tax_table} object +#' Propagate taxon information down an entire `tax_table` object #' -#' \code{prop_tax_tab} Takes the taxon assignment from the lowest assigned level +#' `prop_tax_tab` Takes the taxon assignment from the lowest assigned level #' in an OTU's assignment and fills it in to all the lower, unresolved fields. #' #' @section Details: Often taxon cannot be assigned all the way down to genus @@ -121,12 +126,12 @@ dbig_genera = function(physeq){ #' fields with the string 'o_Enterobacteriales', so they are not parsed as NAs #' by ggplot and other functions. #' -#' @section Value: A \code{tax_table} object with NAs replaced with higher-level +#' @section Value: A `tax_table` object with NAs replaced with higher-level #' taxon assignment #' -#' @param taxtab a \code{tax_table} object +#' @param taxtab a `tax_table` object #' @param indic a flag to indicate if the taxon names have level indicators. If -#' FALSE, they are added. Just gets passed to \code{\link{prop_tax_row}}. +#' FALSE, they are added. Just gets passed to [prop_tax_row()]. #' #' @keywords internal prop_tax_tab = function(taxtab, indic){ @@ -144,9 +149,9 @@ prop_tax_tab = function(taxtab, indic){ ##### prop_tax_row ------------------------------------------------------------- -#' Propagate taxon information in a single row of a \code{tax_table} object. +#' Propagate taxon information in a single row of a `tax_table` object. #' -#' \code{prop_tax_row} Takes the taxon assignment from the lowest assigned level +#' `prop_tax_row` Takes the taxon assignment from the lowest assigned level #' in an OTU's assignment and fills it in to all the lower, unresolved fields. #' #' @section Details: Often taxon cannot be assigned all the way down to genus @@ -157,7 +162,7 @@ prop_tax_tab = function(taxtab, indic){ #' fields with the string 'o_Enterobacteriales', so they are not parsed as NAs #' by ggplot and other functions. #' -#' @section Value: A single row \code{tax_table} object to with NAs replaced +#' @section Value: A single row `tax_table` object to with NAs replaced #' with higher-level taxon assignments #' #' @param taxrow A single row in a taxon table diff --git a/R/utils.R b/R/utils.R index 961bde7..6bfc419 100644 --- a/R/utils.R +++ b/R/utils.R @@ -2,7 +2,14 @@ #' Order the levels of one factor by the values of another #' -#' \code{order_levs()} takes two factors, the first of which has values that are +#' @description `r lifecycle::badge("deprecated")` +#' +#' This function will be deprecated because it is just recapitulating +#' functionality provided by `ggplot2::facet_grid()`. If you are using this +#' function and don't want it it go away, please get in touch and let me know +#' what you're using it for. +#' +#' `order_levs()` takes two factors, the first of which has values that are #' nested within the values of the second, and orders the levels of the first #' factor such that they are clustered within the second factor. #' @@ -10,10 +17,11 @@ #' re-ordered. #' #' @param f1 The factor to re-order. Levels of this factor must be nested within -#' \code{f2} -#' @param f2 The factor to use when re-ordering \code{f1}. +#' `f2` +#' @param f2 The factor to use when re-ordering `f1`. #' @export order_levs = function(f1,...){ + lifecycle::deprecate_warn('0.1.0', 'order_levs()') # if (is.numeric(f2)){ # ord = order(...) diff --git a/benchmark/data.RData b/benchmark/data.RData new file mode 100644 index 0000000..8a71486 Binary files /dev/null and b/benchmark/data.RData differ diff --git a/benchmark/expectations/bad_plt.pdf b/benchmark/expectations/bad_plt.pdf new file mode 100644 index 0000000..04413cd Binary files /dev/null and b/benchmark/expectations/bad_plt.pdf differ diff --git a/benchmark/expectations/bot_plt.pdf b/benchmark/expectations/bot_plt.pdf new file mode 100644 index 0000000..4aa6eea Binary files /dev/null and b/benchmark/expectations/bot_plt.pdf differ diff --git a/benchmark/expectations/class_plt.pdf b/benchmark/expectations/class_plt.pdf new file mode 100644 index 0000000..7758a6a Binary files /dev/null and b/benchmark/expectations/class_plt.pdf differ diff --git a/benchmark/expectations/fam_plt.pdf b/benchmark/expectations/fam_plt.pdf new file mode 100644 index 0000000..c08deec Binary files /dev/null and b/benchmark/expectations/fam_plt.pdf differ diff --git a/benchmark/expectations/gen_plt.pdf b/benchmark/expectations/gen_plt.pdf new file mode 100644 index 0000000..f04dc4b Binary files /dev/null and b/benchmark/expectations/gen_plt.pdf differ diff --git a/benchmark/expectations/king_plt.pdf b/benchmark/expectations/king_plt.pdf new file mode 100644 index 0000000..9d8afe9 Binary files /dev/null and b/benchmark/expectations/king_plt.pdf differ diff --git a/benchmark/expectations/left_plt.pdf b/benchmark/expectations/left_plt.pdf new file mode 100644 index 0000000..200b6f3 Binary files /dev/null and b/benchmark/expectations/left_plt.pdf differ diff --git a/benchmark/expectations/ltz_plt.pdf b/benchmark/expectations/ltz_plt.pdf new file mode 100644 index 0000000..63a15b1 Binary files /dev/null and b/benchmark/expectations/ltz_plt.pdf differ diff --git a/benchmark/expectations/mn_dn_frgt_plt.pdf b/benchmark/expectations/mn_dn_frgt_plt.pdf new file mode 100644 index 0000000..80f670c Binary files /dev/null and b/benchmark/expectations/mn_dn_frgt_plt.pdf differ diff --git a/benchmark/expectations/mn_dn_plt.pdf b/benchmark/expectations/mn_dn_plt.pdf new file mode 100644 index 0000000..0d74f65 Binary files /dev/null and b/benchmark/expectations/mn_dn_plt.pdf differ diff --git a/benchmark/expectations/mn_frgt_plt.pdf b/benchmark/expectations/mn_frgt_plt.pdf new file mode 100644 index 0000000..b17a57e Binary files /dev/null and b/benchmark/expectations/mn_frgt_plt.pdf differ diff --git a/benchmark/expectations/mn_plt.pdf b/benchmark/expectations/mn_plt.pdf new file mode 100644 index 0000000..2358694 Binary files /dev/null and b/benchmark/expectations/mn_plt.pdf differ diff --git a/benchmark/expectations/nm_gd_plt.pdf b/benchmark/expectations/nm_gd_plt.pdf new file mode 100644 index 0000000..e295445 Binary files /dev/null and b/benchmark/expectations/nm_gd_plt.pdf differ diff --git a/benchmark/expectations/nmbd_col_plt.pdf b/benchmark/expectations/nmbd_col_plt.pdf new file mode 100644 index 0000000..5baf064 Binary files /dev/null and b/benchmark/expectations/nmbd_col_plt.pdf differ diff --git a/benchmark/expectations/none_plt.pdf b/benchmark/expectations/none_plt.pdf new file mode 100644 index 0000000..f40ab51 Binary files /dev/null and b/benchmark/expectations/none_plt.pdf differ diff --git a/benchmark/expectations/null_plt.pdf b/benchmark/expectations/null_plt.pdf new file mode 100644 index 0000000..9b0484d Binary files /dev/null and b/benchmark/expectations/null_plt.pdf differ diff --git a/benchmark/expectations/ord_plt.pdf b/benchmark/expectations/ord_plt.pdf new file mode 100644 index 0000000..afaa3df Binary files /dev/null and b/benchmark/expectations/ord_plt.pdf differ diff --git a/benchmark/expectations/phy_plt.pdf b/benchmark/expectations/phy_plt.pdf new file mode 100644 index 0000000..dbb068b Binary files /dev/null and b/benchmark/expectations/phy_plt.pdf differ diff --git a/benchmark/expectations/right_plt.pdf b/benchmark/expectations/right_plt.pdf new file mode 100644 index 0000000..5694722 Binary files /dev/null and b/benchmark/expectations/right_plt.pdf differ diff --git a/benchmark/expectations/rot_plt.pdf b/benchmark/expectations/rot_plt.pdf new file mode 100644 index 0000000..7cd12f5 Binary files /dev/null and b/benchmark/expectations/rot_plt.pdf differ diff --git a/benchmark/expectations/sml_plt.pdf b/benchmark/expectations/sml_plt.pdf new file mode 100644 index 0000000..203441b Binary files /dev/null and b/benchmark/expectations/sml_plt.pdf differ diff --git a/benchmark/expectations/spec_plt.pdf b/benchmark/expectations/spec_plt.pdf new file mode 100644 index 0000000..a89b033 Binary files /dev/null and b/benchmark/expectations/spec_plt.pdf differ diff --git a/benchmark/expectations/test_plot_read_depth.html b/benchmark/expectations/test_plot_read_depth.html new file mode 100644 index 0000000..1f92a3f --- /dev/null +++ b/benchmark/expectations/test_plot_read_depth.html @@ -0,0 +1,540 @@ + + + + + + + + + + + + + +test_plot_read_depth.knit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

Test plot_read_depth()

+
+

Import and clean data

+
# Import Data
+
+library(tidyverse)
+
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+## ✔ dplyr     1.1.2     ✔ readr     2.1.4
+## ✔ forcats   1.0.0     ✔ stringr   1.5.0
+## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
+## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
+## ✔ purrr     1.0.1     
+## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+## ✖ dplyr::filter() masks stats::filter()
+## ✖ dplyr::lag()    masks stats::lag()
+## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+
theme_set(theme_bw())
+library(phyloseq)
+devtools::load_all()
+
## ℹ Loading AfterSl1p
+
load('~/Projects/Mine/aftersl1p/benchmark/data.RData')
+
+seqs = rownames(asv_tab)
+all(seqs == rownames(tax_tab))
+
## [1] TRUE
+
rownames(asv_tab) = rownames(tax_tab) = NULL
+head(map_tab)
+
##      SampleID Source
+## 1      SHCM32  Stool
+## 2      SHCM33  Stool
+## 3      SHCM34  Stool
+## 4      SHCM35  Stool
+## 5 MikeAnested   Swab
+## 6 MikeBnested   Swab
+
map_tab = (map_tab
+           %>% mutate(ContVar = c(13.6, 8.3, 2.8, 15.5, 10, 6.6, 13.0, 11.5)))
+rownames(map_tab) = map_tab$SampleID
+
+setdiff(colnames(asv_tab), rownames(map_tab))
+
## character(0)
+
asv_tab = as.matrix(asv_tab)
+tax_tab = as.matrix(tax_tab)
+dat = phyloseq(otu_table(asv_tab, taxa_are_rows = TRUE),
+               tax_table(tax_tab),
+               sample_data(map_tab))
+
+# Clean Data
+dat
+
## phyloseq-class experiment-level object
+## otu_table()   OTU Table:         [ 830 taxa and 8 samples ]
+## sample_data() Sample Data:       [ 8 samples by 3 sample variables ]
+## tax_table()   Taxonomy Table:    [ 830 taxa by 7 taxonomic ranks ]
+
dat = prop_tax_down(dat, indic = FALSE)
+dat_rel = transform_sample_counts(dat, function(x) x/sum(x))
+keep = taxa_sums(dat)/nsamples(dat) >= 100
+dat_filt = prune_taxa(keep, dat)
+dat_rel_filt = prune_taxa(keep, dat_rel)
+
+col_vect = c("#E41A1C","#377EB8","#4DAF4A","#984EA3","#FF7F00", 'dodgerblue2',
+             'goldenrod3', 'pink')
+
+
+

Tests

+
+

All Defaults

+
plt_read_depth(dat_filt)
+

+
+
+

xvar

+
plt_read_depth(dat_filt, xvar = 'Source')
+

+
+
+

cvar continuous, colour default

+
plt_read_depth(dat_filt, cvar = 'ContVar')
+

+
+
+

cvar categorical, colour default

+
+

with xvar

+
undebug(plt_read_depth)
+
## Warning in undebug(plt_read_depth): argument is not being debugged
+
plt_read_depth(dat_filt, cvar = 'Source', xvar = 'SampleID')
+

+
plt_read_depth(dat_filt, cvar = 'Source', xvar = 'Source')
+

+
+
+

without xvar

+
plt_read_depth(dat_filt, cvar = 'SampleID')
+

+
+
+
+

1 colour specified, no cvar

+
plt_read_depth(dat_filt, clrs = col_vect[1])
+

+
+
+

>1 colour specified, no cvar

+
devtools::load_all()
+
## ℹ Loading AfterSl1p
+
plt_read_depth(dat_filt, clrs = col_vect)
+
## Warning: With no cvar specified, only the first colour in "clrs" will be used.
+

+
+
+

cvar continuous, bad colours

+

Raises an error. See testthat tests

+
+
+

cvar continuous, high and low

+
col_hl = col_vect[1:2]
+names(col_hl) = c('low','high')
+col_hl 
+
##       low      high 
+## "#E41A1C" "#377EB8"
+
plt_read_depth(dat_filt, cvar = 'ContVar', clrs = col_hl)
+

+
+
+

cvar categorical, unnamed colour vector

+
devtools::load_all()
+
## ℹ Loading AfterSl1p
+
plt_read_depth(dat_filt, cvar = 'SampleID', clrs = col_vect)
+

+
+
+

cvar categorical, bad colour vector names

+
+
+

cvar categorical, names colour vector

+
+
+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/benchmark/expectations/test_plot_tax_bar.html b/benchmark/expectations/test_plot_tax_bar.html new file mode 100644 index 0000000..beab77a --- /dev/null +++ b/benchmark/expectations/test_plot_tax_bar.html @@ -0,0 +1,807 @@ + + + + + + + + + + + + + +test_plot_tax_bar.knit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

Test Plotting

+
+

Import and clea data

+
# Import Data
+
+library(tidyverse)
+
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+## ✔ dplyr     1.1.1     ✔ readr     2.1.4
+## ✔ forcats   1.0.0     ✔ stringr   1.5.0
+## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
+## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
+## ✔ purrr     1.0.1     
+## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+## ✖ dplyr::filter() masks stats::filter()
+## ✖ dplyr::lag()    masks stats::lag()
+## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+
theme_set(theme_bw())
+library(phyloseq)
+devtools::load_all('~/Projects/Mine/aftersl1p/')
+
## ℹ Loading AfterSl1p
+
load('~/Projects/Mine/aftersl1p/benchmark/data.RData')
+
+seqs = rownames(asv_tab)
+all(seqs == rownames(tax_tab))
+
## [1] TRUE
+
rownames(asv_tab) = rownames(tax_tab) = NULL
+
+rownames(map_tab) = map_tab$SampleID
+
+setdiff(colnames(asv_tab), rownames(map_tab))
+
## character(0)
+
asv_tab = as.matrix(asv_tab)
+tax_tab = as.matrix(tax_tab)
+dat = phyloseq(otu_table(asv_tab, taxa_are_rows = TRUE),
+               tax_table(tax_tab),
+               sample_data(map_tab))
+
+# Clean Data
+dat
+
## phyloseq-class experiment-level object
+## otu_table()   OTU Table:         [ 830 taxa and 8 samples ]
+## sample_data() Sample Data:       [ 8 samples by 2 sample variables ]
+## tax_table()   Taxonomy Table:    [ 830 taxa by 7 taxonomic ranks ]
+
dat = prop_tax_down(dat, indic = FALSE)
+dat_rel = transform_sample_counts(dat, function(x) x/sum(x))
+keep = taxa_sums(dat)/nsamples(dat) >= 100
+dat_filt = prune_taxa(keep, dat)
+dat_rel_filt = prune_taxa(keep, dat_rel)
+
+
+

Taxa Bar Charts

+
    +
  • All taxonomic ranks
  • +
  • leglen options: +
      +
    • NULL
    • +
    • > n_taxa
    • +
    • 0 < leglen < n_taxa
    • +
    • 0
    • +
    • < 0
    • +
    • not a number
    • +
  • +
  • legloc options: +
      +
    • top
    • +
    • bottom
    • +
    • right
    • +
    • left
    • +
    • none
    • +
    • bad
    • +
  • +
  • provide own colour vector +
      +
    • named (good)
    • +
    • named (bad)
    • +
    • named (too short)
    • +
    • unnamed (long enough)
    • +
    • unnamed (not long enough)
    • +
  • +
  • rotate ticks
  • +
  • means
  • +
+
+

All taxonomic ranks

+
+

Kingdom

+
king_df = make_phy_df(dat_rel_filt, 'Kingdom', prop = FALSE)
+king_plt = plot_tax_bar(king_df, 'Kingdom', sample = 'SampleID')
+king_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/king_plt.pdf',
+       width = 10, height = 5)
+
+
+

Phylum

+
phy_df = make_phy_df(dat_rel_filt, 'Phylum', prop = FALSE)
+phy_plt = plot_tax_bar(phy_df, 'Phylum', sample = 'SampleID')
+phy_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/phy_plt.pdf',
+       width = 10, height = 5)
+
+plot_tax_bar(phy_df, 'Kingdom', sample = 'SampleID')
+

+
+
+

Class

+
class_df = make_phy_df(dat_rel_filt, 'Class', prop = FALSE)
+class_plt = plot_tax_bar(class_df, 'Class', sample = 'SampleID')
+class_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/class_plt.pdf',
+       width = 10, height = 5)
+plot_tax_bar(class_df, 'Phylum', sample = 'SampleID')
+

+
plot_tax_bar(class_df, 'Kingdom', sample = 'SampleID')
+

+
+
+

Order

+
ord_df = make_phy_df(dat_rel_filt, 'Order', prop = FALSE)
+ord_plt = plot_tax_bar(ord_df, 'Order', sample = 'SampleID')
+ord_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/ord_plt.pdf',
+       width = 10, height = 5)
+plot_tax_bar(ord_df, 'Class', sample = 'SampleID')
+

+
plot_tax_bar(ord_df, 'Phylum', sample = 'SampleID')
+

+
plot_tax_bar(ord_df, 'Kingdom', sample = 'SampleID')
+

+
+
+

Family

+
fam_df = make_phy_df(dat_rel_filt, 'Family', prop = FALSE)
+fam_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID')
+fam_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/fam_plt.pdf',
+       width = 10, height = 5)
+plot_tax_bar(fam_df, 'Order', sample = 'SampleID')
+

+
plot_tax_bar(fam_df, 'Class', sample = 'SampleID')
+

+
plot_tax_bar(fam_df, 'Phylum', sample = 'SampleID')
+

+
plot_tax_bar(fam_df, 'Kingdom', sample = 'SampleID')
+

+
+
+

Genus

+
gen_df = make_phy_df(dat_rel_filt, 'Genus', prop = FALSE)
+gen_plt = plot_tax_bar(gen_df, 'Genus', sample = 'SampleID')
+gen_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/gen_plt.pdf',
+       width = 10, height = 5)
+plot_tax_bar(gen_df, 'Family', sample = 'SampleID')
+

+
plot_tax_bar(gen_df, 'Order', sample = 'SampleID')
+

+
plot_tax_bar(gen_df, 'Class', sample = 'SampleID')
+

+
plot_tax_bar(gen_df, 'Phylum', sample = 'SampleID')
+

+
plot_tax_bar(gen_df, 'Kingdom', sample = 'SampleID')
+

+
+
+

Species

+
spec_df = make_phy_df(dat_rel_filt, 'Species', prop = FALSE)
+spec_plt = plot_tax_bar(spec_df, 'Species', sample = 'SampleID')
+spec_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/spec_plt.pdf',
+       width = 10, height = 5)
+plot_tax_bar(spec_df, 'Genus', sample = 'SampleID')
+

+
plot_tax_bar(spec_df, 'Family', sample = 'SampleID')
+

+
plot_tax_bar(spec_df, 'Order', sample = 'SampleID')
+

+
plot_tax_bar(spec_df, 'Class', sample = 'SampleID')
+

+
plot_tax_bar(spec_df, 'Phylum', sample = 'SampleID')
+

+
plot_tax_bar(spec_df, 'Kingdom', sample = 'SampleID')
+

+
+
+
+

Leglen Options

+
+

NULL

+
null_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = NULL)
+null_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/null_plt.pdf',
+       width = 10, height = 5)
+
+
+

> n_taxa

+
too_big_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', 
+                           leglen = n_distinct(fam_df$Family) + 2)
+
## Warning: leglen is greater than the number of unique taxa. showing all taxa.
+
too_big_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/too_big_plt.pdf',
+       width = 10, height = 5)
+
+
+

0 < leglen < n_taxa

+
sml_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', 
+                           leglen = round(n_distinct(fam_df$Family)/2))
+sml_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/sml_plt.pdf',
+       width = 10, height = 5)
+
+
+

0

+
zero_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', 
+                           leglen = 0)
+zero_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/zero_plt.pdf',
+       width = 10, height = 5)
+
+
+

< 0

+
ltz_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', 
+                           leglen = -1)
+
## Warning: leglen can not be negative. treating it like 0.
+
ltz_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/ltz_plt.pdf',
+       width = 10, height = 5)
+
+
+

not a number

+

This raises an error. Test is via testthat

+
+
+
+

Legloc Options

+
+

Top

+
top_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10,
+                       legloc = 'top')
+top_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/top_plt.pdf',
+       width = 10, height = 5)
+
+
+

Bottom

+
bot_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10,
+                       legloc = 'bottom')
+bot_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/bot_plt.pdf',
+       width = 10, height = 5)
+
+ +
+

Left

+
left_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10,
+                       legloc = 'left')
+left_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/left_plt.pdf',
+       width = 10, height = 5)
+
+
+

None

+
none_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10,
+                       legloc = 'none')
+none_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/none_plt.pdf',
+       width = 10, height = 5)
+
+
+

Bad

+
bad_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10,
+                       legloc = 3)
+bad_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/bad_plt.pdf',
+       width = 10, height = 5)
+
+
+
+

Colour Vectors

+
cols = c("#E41A1C","#377EB8","#4DAF4A","#984EA3","#FF7F00","#FFFF33","#A65628",
+         "#F781BF")
+
+

Named (good)

+
nmcols = cols
+names(nmcols) = c('Other',
+                  rev(levels(class_df$Class)[-1]))
+nm_gd_plt = plot_tax_bar(class_df, 'Class', colours = nmcols, 
+                         sample = 'SampleID')
+nm_gd_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/nm_gd_plt.pdf',
+       width = 10, height = 5)
+
+
+

Named (bad)

+

This raises an error. Test is via testthat.

+
+
+

Named (too short)

+
nmbd_col_plt = plot_tax_bar(class_df, 'Class', colours = nmcols[1:3],
+                            sample = 'SampleID')
+
## Warning: The supplied colour vector is shorter than the number of distinct
+## taxa.
+
nmbd_col_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/nmbd_col_plt.pdf',
+       width = 10, height = 5)
+
+
+

Unnamed (long enough)

+
un_col_plt = plot_tax_bar(class_df, 'Class', colours = cols, 
+                          sample = 'SampleID')
+un_col_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/un_col_plt.pdf',
+       width = 10, height = 5)
+
+
+

Unnamed (not long enough)

+
unbd_col_plt = plot_tax_bar(class_df, 'Class', colours = cols[1:3], 
+                          sample = 'SampleID')
+
## Warning: The supplied colour vector is shorter than the number of distinct
+## taxa.
+
unbd_col_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/unbd_col_plt.pdf',
+       width = 10, height = 5)
+
+
+
+

Rotate ticks

+
rot_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', r_ticks = TRUE)
+rot_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/rot_plt.pdf',
+       width = 10, height = 5)
+
+
+

Means

+
+

Means (good)

+
mn_plt = plot_tax_bar(fam_df, 'Family', sample = 'Source', means = TRUE)
+mn_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/mn_plt.pdf',
+       width = 10, height = 5)
+
+
+

Means (good, different ns)

+
mn_dn_plt = plot_tax_bar(filter(fam_df, SampleID != 'SHCM34'), 'Family',
+                         sample = 'Source', means = TRUE)
+mn_dn_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/mn_dn_plt.pdf',
+       width = 10, height = 5)
+
+
+

Means (forget)

+
mn_frgt_plt = plot_tax_bar(fam_df, 'Family', sample = 'Source')
+
## Warning: Your per-sample abundances sum to >1. Did you mean to specify 'means =
+## TRUE'?
+
mn_frgt_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/mn_frgt_plt.pdf',
+       width = 10, height = 5)
+
+
+

Means (forget, different ns)

+
mn_dn_frgt_plt = plot_tax_bar(filter(fam_df, SampleID != 'SHCM34'), 'Family',
+                              sample = 'Source')
+
## Warning: Your per-sample abundances sum to >1. Did you mean to specify 'means =
+## TRUE'?
+
mn_dn_frgt_plt
+

+
ggsave('~/Projects/Mine/aftersl1p/benchmark/mn_dn_frgt_plt.pdf',
+       width = 10, height = 5)
+
+
+
+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/benchmark/expectations/too_big_plt.pdf b/benchmark/expectations/too_big_plt.pdf new file mode 100644 index 0000000..2a3d5e2 Binary files /dev/null and b/benchmark/expectations/too_big_plt.pdf differ diff --git a/benchmark/expectations/top_plt.pdf b/benchmark/expectations/top_plt.pdf new file mode 100644 index 0000000..c2e6b32 Binary files /dev/null and b/benchmark/expectations/top_plt.pdf differ diff --git a/benchmark/expectations/un_col_plt.pdf b/benchmark/expectations/un_col_plt.pdf new file mode 100644 index 0000000..ca0d986 Binary files /dev/null and b/benchmark/expectations/un_col_plt.pdf differ diff --git a/benchmark/expectations/unbd_col_plt.pdf b/benchmark/expectations/unbd_col_plt.pdf new file mode 100644 index 0000000..f4c8c41 Binary files /dev/null and b/benchmark/expectations/unbd_col_plt.pdf differ diff --git a/benchmark/expectations/zero_plt.pdf b/benchmark/expectations/zero_plt.pdf new file mode 100644 index 0000000..63a15b1 Binary files /dev/null and b/benchmark/expectations/zero_plt.pdf differ diff --git a/benchmark/test_plot_read_depth.Rmd b/benchmark/test_plot_read_depth.Rmd new file mode 100644 index 0000000..df1ae9d --- /dev/null +++ b/benchmark/test_plot_read_depth.Rmd @@ -0,0 +1,114 @@ +Test plot_read_depth() +====================== + +## Import and clean data + +```{r} +# Import Data + +library(tidyverse) +theme_set(theme_bw()) +library(phyloseq) +devtools::load_all() + +load('~/Projects/Mine/aftersl1p/benchmark/data.RData') + +seqs = rownames(asv_tab) +all(seqs == rownames(tax_tab)) +rownames(asv_tab) = rownames(tax_tab) = NULL +head(map_tab) +map_tab = (map_tab + %>% mutate(ContVar = c(13.6, 8.3, 2.8, 15.5, 10, 6.6, 13.0, 11.5))) +rownames(map_tab) = map_tab$SampleID + +setdiff(colnames(asv_tab), rownames(map_tab)) +asv_tab = as.matrix(asv_tab) +tax_tab = as.matrix(tax_tab) +dat = phyloseq(otu_table(asv_tab, taxa_are_rows = TRUE), + tax_table(tax_tab), + sample_data(map_tab)) + +# Clean Data +dat +dat = prop_tax_down(dat, indic = FALSE) +dat_rel = transform_sample_counts(dat, function(x) x/sum(x)) +keep = taxa_sums(dat)/nsamples(dat) >= 100 +dat_filt = prune_taxa(keep, dat) +dat_rel_filt = prune_taxa(keep, dat_rel) + +col_vect = c("#E41A1C","#377EB8","#4DAF4A","#984EA3","#FF7F00", 'dodgerblue2', + 'goldenrod3', 'pink') +``` + + + +## Tests + +### All Defaults + +```{r} +plot_read_depth(dat_filt) +``` + +### xvar + +```{r} +plot_read_depth(dat_filt, xvar = 'Source') +``` + +### cvar continuous, colour default + +```{r} +plot_read_depth(dat_filt, cvar = 'ContVar') +``` + +### cvar categorical, colour default + +#### with xvar + +```{r} +plot_read_depth(dat_filt, cvar = 'Source', xvar = 'SampleID') +plot_read_depth(dat_filt, cvar = 'Source', xvar = 'Source') +``` + +#### without xvar + +```{r} +plot_read_depth(dat_filt, cvar = 'SampleID') +``` + + +### 1 colour specified, no cvar + +```{r} +plot_read_depth(dat_filt, clrs = col_vect[1]) +``` + +### >1 colour specified, no cvar + +```{r} +plot_read_depth(dat_filt, clrs = col_vect) +``` + +### cvar continuous, bad colours + +Raises an error. See `testthat` tests + +### cvar continuous, high and low + +```{r} +col_hl = col_vect[1:2] +names(col_hl) = c('low','high') +col_hl +plot_read_depth(dat_filt, cvar = 'ContVar', clrs = col_hl) +``` + +### cvar categorical, unnamed colour vector + +```{r} +plot_read_depth(dat_filt, cvar = 'SampleID', clrs = col_vect) +``` + +### cvar categorical, bad colour vector names + +### cvar categorical, names colour vector diff --git a/benchmark/test_plot_tax_bar.Rmd b/benchmark/test_plot_tax_bar.Rmd new file mode 100644 index 0000000..394ff1b --- /dev/null +++ b/benchmark/test_plot_tax_bar.Rmd @@ -0,0 +1,381 @@ +Test plot_tax_bar() +=================== + +## Import and clean data + +```{r} +# Import Data + +library(tidyverse) +theme_set(theme_bw()) +library(phyloseq) +devtools::load_all('~/Projects/Mine/aftersl1p/') + +load('~/Projects/Mine/aftersl1p/benchmark/data.RData') + +seqs = rownames(asv_tab) +all(seqs == rownames(tax_tab)) +rownames(asv_tab) = rownames(tax_tab) = NULL + +rownames(map_tab) = map_tab$SampleID + +setdiff(colnames(asv_tab), rownames(map_tab)) +asv_tab = as.matrix(asv_tab) +tax_tab = as.matrix(tax_tab) +dat = phyloseq(otu_table(asv_tab, taxa_are_rows = TRUE), + tax_table(tax_tab), + sample_data(map_tab)) + +# Clean Data +dat +dat = prop_tax_down(dat, indic = FALSE) +dat_rel = transform_sample_counts(dat, function(x) x/sum(x)) +keep = taxa_sums(dat)/nsamples(dat) >= 100 +dat_filt = prune_taxa(keep, dat) +dat_rel_filt = prune_taxa(keep, dat_rel) +``` + +## Taxa Bar Charts + +* ~~All taxonomic ranks~~ +* ~~leglen options:~~ + * ~~NULL~~ + * ~~> n_taxa~~ + * ~~0 < leglen < n_taxa~~ + * ~~0~~ + * ~~< 0~~ + * ~~not a number~~ +* legloc options: + * ~~top~~ + * ~~bottom~~ + * ~~right~~ + * ~~left~~ + * ~~none~~ + * ~~bad~~ +* provide own colour vector + * ~~named (good)~~ + * ~~named (bad)~~ + * ~~named (too short)~~ + * ~~unnamed (long enough)~~ + * ~~unnamed (not long enough)~~ +* ~~rotate ticks~~ +* ~~means~~ + +### All taxonomic ranks + +#### Kingdom + +```{r} +king_df = make_phy_df(dat_rel_filt, 'Kingdom', prop = FALSE) +king_plt = plot_tax_bar(king_df, 'Kingdom', sample = 'SampleID') +king_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/king_plt.pdf', + width = 10, height = 5) +``` + +#### Phylum + +```{r} +phy_df = make_phy_df(dat_rel_filt, 'Phylum', prop = FALSE) +phy_plt = plot_tax_bar(phy_df, 'Phylum', sample = 'SampleID') +phy_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/phy_plt.pdf', + width = 10, height = 5) + +plot_tax_bar(phy_df, 'Kingdom', sample = 'SampleID') +``` + +#### Class + +```{r} +class_df = make_phy_df(dat_rel_filt, 'Class', prop = FALSE) +class_plt = plot_tax_bar(class_df, 'Class', sample = 'SampleID') +class_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/class_plt.pdf', + width = 10, height = 5) +plot_tax_bar(class_df, 'Phylum', sample = 'SampleID') +plot_tax_bar(class_df, 'Kingdom', sample = 'SampleID') +``` + +#### Order + +```{r} +ord_df = make_phy_df(dat_rel_filt, 'Order', prop = FALSE) +ord_plt = plot_tax_bar(ord_df, 'Order', sample = 'SampleID') +ord_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/ord_plt.pdf', + width = 10, height = 5) +plot_tax_bar(ord_df, 'Class', sample = 'SampleID') +plot_tax_bar(ord_df, 'Phylum', sample = 'SampleID') +plot_tax_bar(ord_df, 'Kingdom', sample = 'SampleID') +``` + +#### Family + +```{r} +fam_df = make_phy_df(dat_rel_filt, 'Family', prop = FALSE) +fam_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID') +fam_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/fam_plt.pdf', + width = 10, height = 5) +plot_tax_bar(fam_df, 'Order', sample = 'SampleID') +plot_tax_bar(fam_df, 'Class', sample = 'SampleID') +plot_tax_bar(fam_df, 'Phylum', sample = 'SampleID') +plot_tax_bar(fam_df, 'Kingdom', sample = 'SampleID') +``` + +#### Genus + +```{r} +gen_df = make_phy_df(dat_rel_filt, 'Genus', prop = FALSE) +gen_plt = plot_tax_bar(gen_df, 'Genus', sample = 'SampleID') +gen_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/gen_plt.pdf', + width = 10, height = 5) +plot_tax_bar(gen_df, 'Family', sample = 'SampleID') +plot_tax_bar(gen_df, 'Order', sample = 'SampleID') +plot_tax_bar(gen_df, 'Class', sample = 'SampleID') +plot_tax_bar(gen_df, 'Phylum', sample = 'SampleID') +plot_tax_bar(gen_df, 'Kingdom', sample = 'SampleID') +``` + +#### Species + +```{r} +spec_df = make_phy_df(dat_rel_filt, 'Species', prop = FALSE) +spec_plt = plot_tax_bar(spec_df, 'Species', sample = 'SampleID') +spec_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/spec_plt.pdf', + width = 10, height = 5) +plot_tax_bar(spec_df, 'Genus', sample = 'SampleID') +plot_tax_bar(spec_df, 'Family', sample = 'SampleID') +plot_tax_bar(spec_df, 'Order', sample = 'SampleID') +plot_tax_bar(spec_df, 'Class', sample = 'SampleID') +plot_tax_bar(spec_df, 'Phylum', sample = 'SampleID') +plot_tax_bar(spec_df, 'Kingdom', sample = 'SampleID') +``` + + +### Leglen Options + +#### NULL + +```{r} +null_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = NULL) +null_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/null_plt.pdf', + width = 10, height = 5) +``` + +#### > n_taxa + +```{r} +too_big_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', + leglen = n_distinct(fam_df$Family) + 2) +too_big_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/too_big_plt.pdf', + width = 10, height = 5) +``` + +#### 0 < leglen < n_taxa + +```{r} +sml_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', + leglen = round(n_distinct(fam_df$Family)/2)) +sml_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/sml_plt.pdf', + width = 10, height = 5) +``` + +#### 0 + +```{r} +zero_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', + leglen = 0) +zero_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/zero_plt.pdf', + width = 10, height = 5) +``` + +#### < 0 + +```{r} +ltz_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', + leglen = -1) +ltz_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/ltz_plt.pdf', + width = 10, height = 5) +``` + +#### not a number + +This raises an error. Test is via `testthat` + + +### Legloc Options + +#### Top + +```{r} +top_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10, + legloc = 'top') +top_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/top_plt.pdf', + width = 10, height = 5) +``` + +#### Bottom + +```{r} +bot_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10, + legloc = 'bottom') +bot_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/bot_plt.pdf', + width = 10, height = 5) +``` + +#### Right + +```{r} +right_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10, + legloc = 'right') +right_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/right_plt.pdf', + width = 10, height = 5) +``` + +#### Left + +```{r} +left_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10, + legloc = 'left') +left_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/left_plt.pdf', + width = 10, height = 5) +``` + +#### None + +```{r} +none_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10, + legloc = 'none') +none_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/none_plt.pdf', + width = 10, height = 5) +``` + +#### Bad + +```{r} +bad_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', leglen = 10, + legloc = 3) +bad_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/bad_plt.pdf', + width = 10, height = 5) +``` + +### Colour Vectors + +```{r} +cols = c("#E41A1C","#377EB8","#4DAF4A","#984EA3","#FF7F00","#FFFF33","#A65628", + "#F781BF") +``` + +#### Named (good) + +```{r} +nmcols = cols +names(nmcols) = c('Other', + rev(levels(class_df$Class)[-1])) +nm_gd_plt = plot_tax_bar(class_df, 'Class', colours = nmcols, + sample = 'SampleID') +nm_gd_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/nm_gd_plt.pdf', + width = 10, height = 5) +``` + +#### Named (bad) + +This raises an error. Test is via `testthat`. + +#### Named (too short) + +```{r} +nmbd_col_plt = plot_tax_bar(class_df, 'Class', colours = nmcols[1:3], + sample = 'SampleID') +nmbd_col_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/nmbd_col_plt.pdf', + width = 10, height = 5) +``` + + +#### Unnamed (long enough) + +```{r} +un_col_plt = plot_tax_bar(class_df, 'Class', colours = cols, + sample = 'SampleID') +un_col_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/un_col_plt.pdf', + width = 10, height = 5) +``` + +#### Unnamed (not long enough) + +```{r} +unbd_col_plt = plot_tax_bar(class_df, 'Class', colours = cols[1:3], + sample = 'SampleID') +unbd_col_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/unbd_col_plt.pdf', + width = 10, height = 5) +``` + +### Rotate ticks + +```{r} +rot_plt = plot_tax_bar(fam_df, 'Family', sample = 'SampleID', r_ticks = TRUE) +rot_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/rot_plt.pdf', + width = 10, height = 5) +``` + +### Means + +#### Means (good) + +```{r} +mn_plt = plot_tax_bar(fam_df, 'Family', sample = 'Source', means = TRUE) +mn_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/mn_plt.pdf', + width = 10, height = 5) +``` + +#### Means (good, different ns) + +```{r} +mn_dn_plt = plot_tax_bar(filter(fam_df, SampleID != 'SHCM34'), 'Family', + sample = 'Source', means = TRUE) +mn_dn_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/mn_dn_plt.pdf', + width = 10, height = 5) +``` + + +#### Means (forget) + +```{r} +mn_frgt_plt = plot_tax_bar(fam_df, 'Family', sample = 'Source') +mn_frgt_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/mn_frgt_plt.pdf', + width = 10, height = 5) +``` + +#### Means (forget, different ns) + +```{r} +mn_dn_frgt_plt = plot_tax_bar(filter(fam_df, SampleID != 'SHCM34'), 'Family', + sample = 'Source') +mn_dn_frgt_plt +ggsave('~/Projects/Mine/aftersl1p/benchmark/mn_dn_frgt_plt.pdf', + width = 10, height = 5) +``` + diff --git a/data-raw/DATASET.R b/data-raw/DATASET.R deleted file mode 100644 index 6514a94..0000000 --- a/data-raw/DATASET.R +++ /dev/null @@ -1,3 +0,0 @@ -## code to prepare `DATASET` dataset goes here - -usethis::use_data(DATASET, overwrite = TRUE) diff --git a/data-raw/acols_60.R b/data-raw/acols_60.R deleted file mode 100644 index 4cb1f06..0000000 --- a/data-raw/acols_60.R +++ /dev/null @@ -1,13 +0,0 @@ -## code to prepare `acols_60` dataset goes here -acols_60 = c("#f44400","#6151fe","#66f035","#3015be","#c2ff29","#7d00be", - "#fbf700","#01189d","#86ff6c","#8d00a6","#3db500","#ff3fd6", - "#01c145","#b277ff","#809900","#0058ca","#f5ff80","#23005d", - "#00b765","#f70078","#00be90","#de0081","#00d6d5","#ff343d", - "#00bbfd","#ba0004","#01badf","#ff496c","#00562a","#ff7fde", - "#456800","#a391ff","#a67c00","#002368","#ffa74c","#016cb1", - "#aa4f00","#0091c7","#ffe18e","#970077","#d1ffcb","#5e0044", - "#bae4e5","#350001","#e6b3ff","#2c4900","#ff96ae","#001d18", - "#ffccd7","#280020","#ffb3a4","#003559","#786200","#008292", - "#790036","#00775f","#540d00","#373738","#5b3300","#352f00") - -usethis::use_data(acols_60, overwrite = TRUE) diff --git a/data-raw/cols_12.R b/data-raw/cols_12.R deleted file mode 100644 index 2d28480..0000000 --- a/data-raw/cols_12.R +++ /dev/null @@ -1,3 +0,0 @@ -## code to prepare `cols_12` dataset goes here - -usethis::use_data(cols_12, overwrite = TRUE) diff --git a/data-raw/cols_21.R b/data-raw/cols_21.R deleted file mode 100644 index b5eaba4..0000000 --- a/data-raw/cols_21.R +++ /dev/null @@ -1,7 +0,0 @@ -## code to prepare `cols_21` dataset goes here -cols_21 = rev(c("#4f8579","#783fcc","#69d24d","#cb4bbd","#c6dc46","#542871", - "#78d792","#cc4472","#83d7d0","#d44d33","#676fcd","#ceb854", - "#403d57","#b97839","#84a4cb","#588038","#c68ac4","#48472a", - "#c9c39c","#6e2b34","#c78889")) - -usethis::use_data(cols_21, overwrite = TRUE) diff --git a/data-raw/cols_31.R b/data-raw/cols_31.R deleted file mode 100644 index edf4d5f..0000000 --- a/data-raw/cols_31.R +++ /dev/null @@ -1,9 +0,0 @@ -## code to prepare `cols_31` dataset goes here -cols_31 = rev(c("#69006f","#62e622","#cf00d0","#00a60c","#8568ff","#01e479", - "#ff4562","#00dda5","#bc0024","#7cffff","#630039","#daff8b", - "#00285d","#dbb600","#ff7fc4","#8dae00","#a7d0ff","#9e5000", - "#bcf2ff","#662000","#f1ffd0","#241500","#ffdfe7","#376b00", - "#ff8d6d","#00322c","#ff9d44","#005e75","#ffce96","#3c4400", - "#695000")) - -usethis::use_data(cols_31, overwrite = TRUE) diff --git a/data-raw/cols_60.R b/data-raw/cols_60.R deleted file mode 100644 index 83545be..0000000 --- a/data-raw/cols_60.R +++ /dev/null @@ -1,13 +0,0 @@ -## code to prepare `cols_60` dataset goes here -cols_60 = rev(c("#005981","#71e000","#5b4cf8","#d0e400","#6600b2","#01c54e", - "#da1ada","#dbff76","#003daa","#a6a700","#948dff","#e29700", - "#003787","#96ff9c","#b4007c","#01cb7d","#f2006a","#01eeb9", - "#ee3e00","#1dfff2","#b70028","#01b284","#ff52b5","#007b20", - "#ff86e2","#002e02","#e1a4ff","#525900","#920072","#b0ffc9", - "#39004c","#ffe689","#001a52","#ffb869","#006abc","#a96800", - "#64b6ff","#9d2f00","#81efff","#ff545c","#01a8aa","#8b003e", - "#e7ffd0","#440032","#fefffe","#030017","#ff925b","#0092bd", - "#ff8463","#028899","#600017","#c8e3ff","#702f00","#f0d4ff", - "#1f1100","#ffc18a","#003138","#ffbcbd","#01704c","#432b00")) - -usethis::use_data(cols_60, overwrite = TRUE) diff --git a/data-raw/cols_70.R b/data-raw/cols_70.R deleted file mode 100644 index da65f46..0000000 --- a/data-raw/cols_70.R +++ /dev/null @@ -1,15 +0,0 @@ -## code to prepare `cols_70` dataset goes here -cols_70 = rev(c("#00cf9b","#fc00bf","#52e62c","#a000b2","#9fff37","#5066ff", - "#c2de00","#003ebb","#69c500","#cf69ff","#8dff6e","#1f0063", - "#ffed43","#9572ff","#01a30f","#ff64df","#01ce53","#76007c", - "#7cff8e","#0057cc","#ffc93a","#4191ff","#de0200","#1af4ff", - "#cc003a","#01d57d","#ca0057","#00e5d4","#a3001c","#8dffc7", - "#ac0053","#a5ffa9","#003a8a","#d0ff8d","#001f50","#94a600", - "#cbaeff","#4e9100","#ffb1f8","#3f8100","#ff9acd","#006b32", - "#ff6c62","#00aec9","#dc5f00","#004a72","#c08c00","#260021", - "#ffe08c","#001524","#deffd6","#2c0100","#c8f8ff","#660014", - "#9fd4ff","#701a00","#dcd0ff","#003c11","#ffd9f4","#181500", - "#ffd8a1","#00332c","#ff8763","#018b69","#9d5d00","#006a7f", - "#ffae8e","#472700","#ffb4b6","#807100")) - -usethis::use_data(cols_70, overwrite = TRUE) diff --git a/man/dbig_genera.Rd b/man/dbig_genera.Rd index d4768df..d26b640 100644 --- a/man/dbig_genera.Rd +++ b/man/dbig_genera.Rd @@ -16,17 +16,17 @@ classifications, and append the family name } \section{Details}{ \code{dbig_genera} takes a phyloseq object with a - taxonomy table, disamibugates the genera by appending family names to any - genus names that are found in multiple families in this data set, and - returns the object with an update tax_table() object. The input phyloseq - object must have 'Genus' and 'Family' columns in its tax_table object. The - output object with have the disambiguated names in the 'Genus' column and - will have a new column called 'AmbigGenus' where the old genus names can be - found. +taxonomy table, disamibugates the genera by appending family names to any +genus names that are found in multiple families in this data set, and +returns the object with an update tax_table() object. The input phyloseq +object must have 'Genus' and 'Family' columns in its tax_table object. The +output object with have the disambiguated names in the 'Genus' column and +will have a new column called 'AmbigGenus' where the old genus names can be +found. } \section{Value}{ A phyloseq object with an updated Genus column and a column - called AmbigGenus containing the old Genus names. +called AmbigGenus containing the old Genus names. } diff --git a/man/df_glom.Rd b/man/df_glom.Rd index 235d346..95e795e 100644 --- a/man/df_glom.Rd +++ b/man/df_glom.Rd @@ -13,8 +13,8 @@ df_glom( ) } \arguments{ -\item{phy_df}{A phyloseq data frame, as generated by \code{\link{psmelt}} or -\code{\link{make_phy_df}}} +\item{phy_df}{A phyloseq data frame, as generated by +\code{\link[phyloseq:psmelt]{phyloseq::psmelt()}} or \code{\link[=make_phy_df]{make_phy_df()}}} \item{ranks}{A character vector with the taxon rank names} @@ -27,5 +27,5 @@ df_glom( \item{tots}{The desired column name of the summed (glommed) abundances} } \description{ -\code{df_glom} take totals within sample at a given taxonomic rank. +\code{\link[=df_glom]{df_glom()}} take totals within sample at a given taxonomic rank. } diff --git a/man/hello.Rd b/man/hello.Rd deleted file mode 100644 index 0fa7c4b..0000000 --- a/man/hello.Rd +++ /dev/null @@ -1,12 +0,0 @@ -\name{hello} -\alias{hello} -\title{Hello, World!} -\usage{ -hello() -} -\description{ -Prints 'Hello, world!'. -} -\examples{ -hello() -} diff --git a/man/lddf_work.Rd b/man/lddf_work.Rd index c80687c..ce188ec 100644 --- a/man/lddf_work.Rd +++ b/man/lddf_work.Rd @@ -35,9 +35,11 @@ distance column in your long data frame. Only here to avoid clashes with existing metadata column names.} } \description{ -\code{lddf_work} Does the actual gathering, spreading, and joining associated -with making the lddf, but without checking if the distance matrix is sensible -or removing diagonals and repeats. This is for when you know what you're -doing and have trimmed your distance matrix down to only what you know you -need. Good for permutation tests. +\code{lddf_work} Used internally by \code{long_distance_df()}. I recommend +you use that function unless you really know what you're doing. This function +does the actual gathering, spreading, and joining associated with making the +lddf, but without checking if the distance matrix is sensible or removing +diagonals and repeats. Use this function if you know exactly what you want +and have trimmed your distance matrix down to only what you know you need. +Good for permutation tests. } diff --git a/man/long_distance_df.Rd b/man/long_distance_df.Rd index db650f6..8a52ab2 100644 --- a/man/long_distance_df.Rd +++ b/man/long_distance_df.Rd @@ -44,7 +44,7 @@ existing metadata column names.} column names in the metadat data frame, and whose rows contain a subset of the possible values/combinations. If this parameter is used, all the samples whose metadata matches a row in this data frame will end up in -Sample1 and the rest will end up in Sample2. This means you will _not_ get +Sample1 and the rest will end up in Sample2. This means you will \emph{not} get all the pairs, because the samples in Sample1 will not get compared to each other, and neither will the samples in Sample2. If this parameter is not used, the upper triangle of the distance matrix is used, without regard for @@ -53,15 +53,15 @@ metadata values.} \description{ \code{long_distance_df} creates a long data frame of all the pairwise distances from a sample distance matrix (e.g. the output of -\code{\link{phyloseq::distance}}) with all the metadata listed for each sample. -Allows for easy within- and among-group boxplots, or whatever other -comparisons are of interest. +\code{\link[phyloseq:distance]{phyloseq::distance()}}) with all the metadata listed for +each sample. Allows for easy within- and among-group boxplots, or whatever +other comparisons are of interest. } \section{Value}{ A data frame \eqn{N(N-1)} (or \eqn{N^2} if \code{diag = TRUE} - is set) rows (where N is the number of samples) with sample IDs, metadata, - and pairwise distances listed for each pair of samples. Sample ID and - metadata columns have '1' or '2' appended to them so the user can tell - which column belongs to which sample. +is set) rows (where N is the number of samples) with sample IDs, metadata, +and pairwise distances listed for each pair of samples. Sample ID and +metadata columns have '1' or '2' appended to them so the user can tell +which column belongs to which sample. } diff --git a/man/make_ord_df.Rd b/man/make_ord_df.Rd index 7415772..5598449 100644 --- a/man/make_ord_df.Rd +++ b/man/make_ord_df.Rd @@ -20,15 +20,15 @@ that the transformation you have used is appropriate for the distance method you choose.} \item{dist_meth}{The distance method to be use. Must be one of the methods -accepted by the \code{phyloseq::distance()} function. Default is 'bray'. If -'jaccard' is used, adds the \code{binary = TRUE} argument. If you have -taken a clr transform of your data and want Aitchison distances, choose -\code{'euclidean'}. Currently, tree-based methods like \code{'unifrac'} and -\code{'wunifrac'} are not supported.} +accepted by the \code{\link[phyloseq:distance]{phyloseq::distance()}} function. Default is +'bray'. If 'jaccard' is used, adds the \code{binary = TRUE} argument. If +you have taken a clr transform of your data and want Aitchison distances, +choose \code{'euclidean'}. Currently, tree-based methods like +\code{'unifrac'} and \code{'wunifrac'} are not supported.} \item{ord_meth}{The ordination method. Must be one of the methods accepted by -the \code{phyloseq::ordinate()} function. Default is 'PCoA'. Make sure the -ordination method you choose is appropriate for your distance method.} +the \code{\link[phyloseq:ordinate]{phyloseq::ordinate()}} function. Default is 'PCoA'. Make sure +the ordination method you choose is appropriate for your distance method.} \item{scree_only}{If \code{TRUE}, this function will print the scree plot of the requested ordination and then exit. Good for deciding how many axes you diff --git a/man/make_phy_df.Rd b/man/make_phy_df.Rd index e4bed4e..fb8f723 100644 --- a/man/make_phy_df.Rd +++ b/man/make_phy_df.Rd @@ -23,16 +23,16 @@ make_phy_df( If you don't want anything grouped into 'Other', set this to 0. Default is 0.001.} -\item{indic}{a flag to indicate if the taxon names have level indicators. -If FALSE, they are added.} +\item{indic}{a flag to indicate if the taxon names have level indicators. If +FALSE, they are added.} \item{prop}{Specifies whether taxa need to be propogated down the taxonomy -table (default, TRUE) or if this has already been done.} +table (default is \code{TRUE}) or if this has already been done.} -\item{count}{If FALSE (default) the function will expect a relative abundance -table and create an 'Other' category for taxa below the cutoff (and will -raise an error if the table is not relative abundance). If TRUE, the -function will not check for relative abundance and will not create an +\item{count}{If \code{FALSE} (default) the function will expect a relative +abundance table and create an 'Other' category for taxa below the cutoff +(and will raise an error if the table is not relative abundance). If TRUE, +the function will not check for relative abundance and will not create an 'Other' category.} } \description{ @@ -41,16 +41,16 @@ bar charts. } \section{Details}{ This function takes a phyloseq object and generates a data - frame that is useful for plotting taxon abundance information. By default - it propagates taxon assignment information down the tree into unassigned - leves, and aggregates all taxa below 0.1 percent into a single 'Other' - category. This function expects the phyloseq object to be relative - abundance, and weird things will happen if it is not. +frame that is useful for plotting taxon abundance information. By default +it propagates taxon assignment information down the tree into unassigned +leves, and aggregates all taxa below 0.1 percent into a single 'Other' +category. This function expects the phyloseq object to be relative +abundance, and weird things will happen if it is not. } \section{Value}{ A data frame similar in structure to that generated by - \code{psmelt}, but with an 'Other' category added and taxon levels ordered - for use in plotting. +\code{\link[phyloseq:psmelt]{phyloseq::psmelt()}}, but with an 'Other' category added and +taxon levels ordered for use in plotting. } diff --git a/man/make_rda_df.Rd b/man/make_rda_df.Rd index bdcaeff..97c2a58 100644 --- a/man/make_rda_df.Rd +++ b/man/make_rda_df.Rd @@ -8,14 +8,20 @@ exported.} make_rda_df(ord, physeq, axes) } \arguments{ -\item{ord}{The ordination object generated by \code{phyloseq::ordinate()}.} +\item{ord}{The ordination object generated by +\code{\link[phyloseq:ordinate]{phyloseq::ordinate()}}.} \item{physeq}{The phyloseq object we're making things from} \item{axes}{A numeric vector of the axes to plot} } \description{ -Take a phyloseq object and generate a data frame of the distance-based -ordination of the samples for plotting when the ordination method was -RDA. Used internally by \code{make_ord_df()} +Make a data frame of the ordination if the ordination method is RDA. Not +exported. } +\section{Description}{ + Take a phyloseq object and generate a data frame of the +distance-based ordination of the samples for plotting when the ordination +method was RDA. Used internally by \code{\link[=make_ord_df]{make_ord_df()}}. +} + diff --git a/man/order_levs.Rd b/man/order_levs.Rd index 9a72fc2..1420de8 100644 --- a/man/order_levs.Rd +++ b/man/order_levs.Rd @@ -13,12 +13,19 @@ order_levs(f1, ...) \item{f2}{The factor to use when re-ordering \code{f1}.} } \description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} + +This function will be deprecated because it is just recapitulating +functionality provided by \code{ggplot2::facet_grid()}. If you are using this +function and don't want it it go away, please get in touch and let me know +what you're using it for. + \code{order_levs()} takes two factors, the first of which has values that are nested within the values of the second, and orders the levels of the first factor such that they are clustered within the second factor. } \section{Value}{ A factor that has the same levels as f1, but that has been - re-ordered. +re-ordered. } diff --git a/man/order_taxa.Rd b/man/order_taxa.Rd index 6df0c29..8389d20 100644 --- a/man/order_taxa.Rd +++ b/man/order_taxa.Rd @@ -8,7 +8,7 @@ order_taxa(phy_df, rank, abund = "Abundance", decreasing = FALSE) } \arguments{ \item{phy_df}{A data frame of a phyloseq object, as produced by -\code{\link{psmelt}} or \code{\link{make_phy_df}}.} +\code{\link[phyloseq:psmelt]{phyloseq::psmelt()}} or \code{\link[=make_phy_df]{make_phy_df()}}.} \item{rank}{The name of the column to be re-ordered} @@ -18,12 +18,12 @@ order_taxa(phy_df, rank, abund = "Abundance", decreasing = FALSE) decreasing or increasing abundance. Defaults to FALSE.} } \description{ -\code{order_taxa} reorders the taxon names in a taxon column (e.g. 'Class' or +\code{order_taxa()} reorders the taxon names in a taxon column (e.g. 'Class' or 'Phylum') by the taxon's mean abundance (but always makes sure to put Other first). } \section{Value}{ A data frame that is identical to the one given, but with the - specified column re-ordered by its mean abundance +specified column re-ordered by its mean abundance } diff --git a/man/plot_read_depth.Rd b/man/plot_read_depth.Rd index 287241d..3107c40 100644 --- a/man/plot_read_depth.Rd +++ b/man/plot_read_depth.Rd @@ -2,13 +2,42 @@ % Please edit documentation in R/plot_read_depth.R \name{plot_read_depth} \alias{plot_read_depth} -\title{Plot read depth} +\title{View read depth} \usage{ -plot_read_depth(physeq) +plot_read_depth(physeq, xvar = NULL, lin = FALSE, cvar = NULL, clrs = NULL) } \arguments{ \item{physeq}{A phyloseq object} + +\item{xvar}{\code{NULL} A variable (column name) from \code{sample_data(physeq)} by +which to group samples on the x-axis. Must be categorical. If \code{NULL}, no +grouping will be performed.} + +\item{lin}{\code{FALSE} If \code{TRUE}, the y-axis will be on a linear scale. By +default it is log-scaled.} + +\item{cvar}{\code{NULL} A variable (column name) from \code{sample_data(physeq)} by +which the points should be coloured. If \code{NULL}, all points will be black. +May be continuous or categorical. See the \code{clrs} argument if you want to +specify your colours.} + +\item{clrs}{\code{NULL} If the \code{cvar} parameter is set and this is unset, colours +will be taken from \link{tax_colours} if \code{cvar} is categorical and use +\link{ggplot2}'s default (blue) if \code{cvar} is continuous. If \code{cvar} is +categorical, you may provide a named colour vector to use instead of +\link{tax_colours} and if it is continuous you may provide a two-colour vector. +The vector must have names "low" and "high". If \code{cvar} is NOT set, you may +use this parameter to provide a single colour name or hex code that will be +applied to all points.} } \description{ -Creates a read depth histogram. +View read depth } +\section{Details}{ + Creates a read depth boxplot +} + +\section{Value}{ + A \link{ggplot2} object +} + diff --git a/man/plot_tax_bar.Rd b/man/plot_tax_bar.Rd index 1e14794..ed47297 100644 --- a/man/plot_tax_bar.Rd +++ b/man/plot_tax_bar.Rd @@ -12,13 +12,15 @@ plot_tax_bar( abund = "Abundance", legloc = "right", yscale = "lin", - means = FALSE + means = FALSE, + r_ticks = FALSE, + leglen = NULL ) } \arguments{ \item{rank}{The taxonomic rank at which to view the data. Must be one of 'Genus', 'Family', 'Order', 'Class', 'Phylum'. May not be a lower rank than -the rank given to \code{\link{make_phy_df}}.} +the rank given to \code{\link[=make_phy_df]{make_phy_df()}}.} \item{colours}{A character vector of colour names or hex values. Must have enough colours to accommodate all your taxa at the appropriate rank. If you @@ -31,30 +33,35 @@ frame.} \item{abund}{\code{'Abundance'} The name of the abundance column in the data frame.} -\item{legloc}{\code{'right'} Location of the legend. Can be 'right' -(default), 'bottom', or 'none' (absent)} +\item{legloc}{\code{'right'} Location of the legend. Passed directly to +\code{ggplot2::theme(legend.position = legloc)} and can be any of "none", +"left", "right", "top", or "bottom". If it is anything else, +\code{\link[ggplot2:theme]{ggplot2::theme()}} will default to "none".} -\item{yscale}{\code{'lin'} Can be either 'lin' or 'sqrt'. The 'sqrt' plot can -look weird.} +\item{yscale}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This function no longer +supports y-axis scales other than linear.} \item{means}{\code{FALSE} If \code{TRUE}, sets \code{position = fill} in the -\code{geom_bar()} to constrain the abundances to sum to 1. Good to use if -your \code{sample = } parameter is not actually sample names, but rather -larger categories, to produce a plot of category means.} +\code{\link[ggplot2:geom_bar]{ggplot2::geom_bar()}} to constrain the abundances to sum to 1. +Good to use if your \verb{sample = } parameter is not actually sample +names, but rather larger categories, to produce a plot of category means.} + +\item{r_ticks}{\code{FALSE} If \code{TRUE} x-axis tickmark text is rotated 90 +degrees and read bottom-to-top.} \item{tax_df}{The data frame used for plotting. Unless you really know what -you're doing, use the data frame output by \code{\link{make_phy_df}}.} +you're doing, use the data frame output by \code{\link[=make_phy_df]{make_phy_df()}}.} } \description{ \code{plot_tax_bar()} creates a taxa bar chart from the data frame generated -by \code{\link{make_phy_df}}. +by \code{\link[=make_phy_df]{make_phy_df()}}. } \section{Details}{ This function generates a ggplot object that is a - first-pass, reasonable attempt at a taxon bar chart. The taxa are ordered - by mean abundance and the 'Other' category is at the top. Because it's a - regular ggplot object, you can add more geoms or facet it or manipulate it - however you like using ggplot functions. +first-pass, reasonable attempt at a taxon bar chart. The taxa are ordered +by mean abundance and the 'Other' category is at the top. Because it's a +regular ggplot object, you can add more geoms or facet it or manipulate it +however you like using ggplot functions. } \section{Value}{ diff --git a/man/plt_ord.Rd b/man/plt_ord.Rd index 67ddc1d..6881091 100644 --- a/man/plt_ord.Rd +++ b/man/plt_ord.Rd @@ -7,17 +7,26 @@ plt_ord(ord_long, colour = NULL, shape = NULL, size = 1, pt_alph = 0.7) } \arguments{ -\item{ord_long}{The ordination data frame produced by \code{make_ord_df}} +\item{ord_long}{The ordination data frame produced by +\code{\link[=make_ord_df]{make_ord_df()}}} \item{colour}{The name of the column to use to colour the points.} \item{shape}{The name of the coloumn governing the shape of the points.} -\item{size}{The size of the points. Passed to the \code{geom_point()} size -argument and defaults to 1.} +\item{size}{The size of the points. Passed to the +\code{\link[ggplot2:geom_point]{ggplot2::geom_point()}} size argument and defaults to 1.} \item{pt_alph}{The transparency of the points. Default is 0.7} } \description{ Make a multi-axis PCoA plot from the data frame } +\section{Details}{ + +} + +\section{Value}{ + +} + diff --git a/man/prd_check.Rd b/man/prd_check.Rd new file mode 100644 index 0000000..514b189 --- /dev/null +++ b/man/prd_check.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_read_depth.R +\name{prd_check} +\alias{prd_check} +\title{Check inputs for plot_read_depth()} +\usage{ +prd_check(sample_sum_df, cvar, xvar) +} +\arguments{ +\item{sample_sum_df}{data frame of sample sums and sample data} + +\item{cvar, xvar}{passed through from \code{plot_read_depth()}} +} +\description{ +Checks the inputs of \code{plot_read_depth()} +} diff --git a/man/prd_clrs.Rd b/man/prd_clrs.Rd new file mode 100644 index 0000000..af7741e --- /dev/null +++ b/man/prd_clrs.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_read_depth.R +\name{prd_clrs} +\alias{prd_clrs} +\title{Deal w colour vector for plot_read_depth()} +\usage{ +prd_clrs(sample_sum_df, cvar, clrs) +} +\arguments{ +\item{cvar, clrs}{passed through from \code{plot_read_depth()}} +} +\description{ +Deal w colour vector for plot_read_depth() +} diff --git a/man/prop_tax_down.Rd b/man/prop_tax_down.Rd index 57c61c2..62cb336 100644 --- a/man/prop_tax_down.Rd +++ b/man/prop_tax_down.Rd @@ -11,7 +11,7 @@ prop_tax_down(physeq, indic, dbig = TRUE) \item{physeq}{a phyloseq object with a filled \code{tax_table} slot.} \item{indic}{a flag to indicate if the taxon names have level indicators. If -FALSE, they are added. Just gets passed to \code{\link{prop_tax_tab}}.} +FALSE, they are added. Just gets passed to \code{\link[=prop_tax_tab]{prop_tax_tab()}}.} \item{dbig}{a flag to indicate whether genus names that exist in multiple families should be disambiguated by appending the family name.} @@ -23,16 +23,16 @@ fields. } \section{Details}{ Often taxon cannot be assigned all the way down to genus - from the limited information available from a given variable region of 16S - (or other marker gene). If an OTU has been assigned to Order - Enterobacteriales, but the Family and genus cannot be resolved, then the - Family and Genus fields are left blank by QIIME. This function fills those - fields with the string 'o_Enterobacteriales', so they are not parsed as NAs - by ggplot and other functions. +from the limited information available from a given variable region of 16S +(or other marker gene). If an OTU has been assigned to Order +Enterobacteriales, but the Family and genus cannot be resolved, then the +Family and Genus fields are left blank by QIIME. This function fills those +fields with the string 'o_Enterobacteriales', so they are not parsed as NAs +by ggplot and other functions. } \section{Value}{ A phyloseq object whose \code{tax_table} has had NAs replaced - with the higher level taxon assignments. +with the higher level taxon assignments. } diff --git a/man/prop_tax_row.Rd b/man/prop_tax_row.Rd index 991d8c4..fb532aa 100644 --- a/man/prop_tax_row.Rd +++ b/man/prop_tax_row.Rd @@ -18,17 +18,17 @@ in an OTU's assignment and fills it in to all the lower, unresolved fields. } \section{Details}{ Often taxon cannot be assigned all the way down to genus - from the limited information available from a given variable region of 16S - (or other marker gene). If an OTU has been assigned to Order - Enterobacteriales, but the Family and genus cannot be resolved, then the - Family and Genus fields are left blank by QIIME. This function fills those - fields with the string 'o_Enterobacteriales', so they are not parsed as NAs - by ggplot and other functions. +from the limited information available from a given variable region of 16S +(or other marker gene). If an OTU has been assigned to Order +Enterobacteriales, but the Family and genus cannot be resolved, then the +Family and Genus fields are left blank by QIIME. This function fills those +fields with the string 'o_Enterobacteriales', so they are not parsed as NAs +by ggplot and other functions. } \section{Value}{ A single row \code{tax_table} object to with NAs replaced - with higher-level taxon assignments +with higher-level taxon assignments } \keyword{internal} diff --git a/man/prop_tax_tab.Rd b/man/prop_tax_tab.Rd index 08b7dc6..24d7aad 100644 --- a/man/prop_tax_tab.Rd +++ b/man/prop_tax_tab.Rd @@ -10,7 +10,7 @@ prop_tax_tab(taxtab, indic) \item{taxtab}{a \code{tax_table} object} \item{indic}{a flag to indicate if the taxon names have level indicators. If -FALSE, they are added. Just gets passed to \code{\link{prop_tax_row}}.} +FALSE, they are added. Just gets passed to \code{\link[=prop_tax_row]{prop_tax_row()}}.} } \description{ \code{prop_tax_tab} Takes the taxon assignment from the lowest assigned level @@ -18,17 +18,17 @@ in an OTU's assignment and fills it in to all the lower, unresolved fields. } \section{Details}{ Often taxon cannot be assigned all the way down to genus - from the limited information available from a given variable region of 16S - (or other marker gene). If an OTU has been assigned to Order - Enterobacteriales, but the Family and genus cannot be resolved, then the - Family and Genus fields are left blank by QIIME. This function fills those - fields with the string 'o_Enterobacteriales', so they are not parsed as NAs - by ggplot and other functions. +from the limited information available from a given variable region of 16S +(or other marker gene). If an OTU has been assigned to Order +Enterobacteriales, but the Family and genus cannot be resolved, then the +Family and Genus fields are left blank by QIIME. This function fills those +fields with the string 'o_Enterobacteriales', so they are not parsed as NAs +by ggplot and other functions. } \section{Value}{ A \code{tax_table} object with NAs replaced with higher-level - taxon assignment +taxon assignment } \keyword{internal} diff --git a/man/rank_abund.Rd b/man/rank_abund.Rd index 2f99199..202328f 100644 --- a/man/rank_abund.Rd +++ b/man/rank_abund.Rd @@ -6,40 +6,41 @@ \usage{ rank_abund( phy_df, - varbs = NULL, + gvars = NULL, bases = NULL, abunds = "Abundance", rank = "Genus", - IDcol = "X.SampleID" + IDcol ) } \arguments{ \item{phy_df}{A dataframe of a phyloseq object, like that generated by -\code{\link{psmelt}} or \code{\link{make_phy_df}}} +\code{\link[phyloseq:psmelt]{phyloseq::psmelt()}} or \code{\link[=make_phy_df]{make_phy_df()}}} -\item{varbs}{(\code{NULL}) A character vector of grouping variables from -which the baseline values are chosen to define the abundance ordering. If -it is \code{NULL}, the ordering will be based on mean abundances in the -whole data frame.} +\item{gvars}{(\code{NULL}) A character vector of grouping variables from which the +baseline values are chosen to define the abundance ordering. If it is +\code{NULL}, the ordering will be based on mean abundances in the whole data +frame.} -\item{bases}{(\code{NULL}) A character vector of baseline values for the -variables given in \code{vars}. The ordering of the taxa will be given -based only on the samples with these baseline values for these variables. -Must be in the same order as vars.} +\item{bases}{(\code{NULL}) A character vector of baseline values for the variables +given in \code{gvars}. The ordering of the taxa will be given based only on the +samples with these baseline values for these variables. Must be in the same +order as \code{gvars}.} \item{abunds}{(\code{'Abundance'}) The name of the abundance column.} -\item{rank}{(\code{'Genus'}) The rank to base the ordering on.} +\item{rank}{(\code{'Genus'}) The rank to base the ordering on. Must be a column in +\code{phy_df}} \item{IDcol}{(\code{'X.SampleID'}) The column name of the sample IDs} } \description{ \code{rank_abund()} generates a data frame that's ready to be used by -\code{\link{plot_rank_abund}} +\code{\link[=plot_rank_ab]{plot_rank_ab()}} } \section{Value}{ A data frame whose taxa have been ranked by their mean - abundance in the user-specified baseline level of some grouping variable or - variables. +abundance in the user-specified baseline level of some grouping variable or +variables. } diff --git a/man/remain.Rd b/man/remain.Rd index 6d432e4..a30dac5 100644 --- a/man/remain.Rd +++ b/man/remain.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/make_phy_df.R \name{remain} \alias{remain} -\title{Calculate the difference between included taxa and 100% for the 'Other' +\title{Calculate the difference between included taxa and 100\% for the 'Other' section of a phyloseq data frame.} \usage{ remain(x, tot = 1) @@ -13,7 +13,7 @@ remain(x, tot = 1) \item{tot}{The desired total. Default is 1.} } \description{ -Calculate the difference between included taxa and 100% for the 'Other' +Calculate the difference between included taxa and 100\% for the 'Other' section of a phyloseq data frame. } \section{Value}{ diff --git a/man/subset_order.Rd b/man/subset_order.Rd index 8fd9bb7..55597b5 100644 --- a/man/subset_order.Rd +++ b/man/subset_order.Rd @@ -13,8 +13,10 @@ subset_order( ) } \arguments{ -\item{phy_df}{A phyloseq data frame, as generated by \code{psmelt}, but -probably generated by \code{\link{df_glom}} or \code{\link{make_phy_df()}}.} +\item{phy_df}{A phyloseq data frame, as generated by +\code{\link[phyloseq:psmelt]{phyloseq::psmelt()}}, but probably generated by +\code{\link[=df_glom]{df_glom()}} or +\code{\link[=make_phy_df]{make_phy_df()}}.} \item{varbs}{(\code{NULL}) A character vector of grouping variables from which the baseline values are chosen to define the abundance ordering. If @@ -33,5 +35,6 @@ Must be in the same order as varbs.} \description{ \code{subset_order} generates a data frame whose taxon column given by \code{rank} has been ranked according to its mean abundance in the -\code{abunds} column. Used internally by \code{rank_abund()} +\code{abunds} column. Used internally by +\code{\link[=rank_abund]{rank_abund()}} } diff --git a/man/tax_colours.Rd b/man/tax_colours.Rd index 45a490f..3ef7dbd 100644 --- a/man/tax_colours.Rd +++ b/man/tax_colours.Rd @@ -14,13 +14,14 @@ tax_colours A vector of colours for use with taxa bar charts. } \section{Details}{ - There are 30, and grey will be added by the - \code{plot_tax_bar} function for the "Other" category. If a plot calls for - more than 30 colours, this will just recycle. That is usually fine because - low-abundance stuff can't be seen anyway, but if you have a situation where - you have more than 30 things that actually need to be distinguished, you'll - need to provide your own vector. Also, if you are in that situation, try to - stop. + There are 30 colours, and grey will be added by the +\code{\link[=plot_tax_bar]{plot_tax_bar()}} function for the "Other" category. If a plot +calls for more than 30 colours, this will just recycle. That is usually +fine because low-abundance stuff can't be seen anyway, but if you have a +situation where you have more than 30 things that actually need to be +distinguished, you'll need to provide your own vector. Also, if you are in +that situation, try to find another way to do what you are doing. People +cannot generally distinguish anywhere near 30 colours on a single plot. } \keyword{datasets} diff --git a/tests/testthat/plot_read_depth/dat.RData b/tests/testthat/plot_read_depth/dat.RData new file mode 100644 index 0000000..7422c2e Binary files /dev/null and b/tests/testthat/plot_read_depth/dat.RData differ diff --git a/tests/testthat/plot_tax_bar/dats.RData b/tests/testthat/plot_tax_bar/dats.RData new file mode 100644 index 0000000..f309e5b Binary files /dev/null and b/tests/testthat/plot_tax_bar/dats.RData differ diff --git a/tests/testthat/test-plot_read_depth.R b/tests/testthat/test-plot_read_depth.R index 8849056..b2d9d7e 100644 --- a/tests/testthat/test-plot_read_depth.R +++ b/tests/testthat/test-plot_read_depth.R @@ -1,3 +1,22 @@ -test_that("multiplication works", { - expect_equal(2 * 2, 4) +test_that('plot_read_depth() is raising the right warnings',{ + load('plot_read_depth/dat.RData') + wrn = paste('With no cvar specified,', + 'only the first colour in "clrs" will be used.') + expect_warning(plot_read_depth(dat_filt, clrs = col_vect), + wrn) +}) + +test_that('plot_read_depth() is raising the right errors',{ + load('plot_read_depth/dat.RData') + err = paste('With a continuous "cvar", the provided colour', + 'vector "clrs" must have names "high" and "low".') + expect_error(plot_read_depth(dat_filt, cvar = 'ContVar', clrs = col_vect), + err) + + err = paste('If "clrs" is named, its names must contain', + 'all the values in the "cvar" column') + col_nms = col_vect + names(col_nms) = c('a','b','c','d','e','f','g','h') + expect_error(plot_read_depth(dat_filt, cvar = 'Source', clrs = col_nms), + err) }) diff --git a/tests/testthat/test-plot_tax_bar.R b/tests/testthat/test-plot_tax_bar.R index 8849056..877bccf 100644 --- a/tests/testthat/test-plot_tax_bar.R +++ b/tests/testthat/test-plot_tax_bar.R @@ -1,3 +1,46 @@ -test_that("multiplication works", { - expect_equal(2 * 2, 4) +test_that('plot_tax_bar() is throwing the right errors',{ + load('plot_tax_bar/dats.RData') + fam_df = make_phy_df(dat_rel_filt, 'Family', prop = FALSE) + + err = 'rank argument must be one of the columns in your data frame' + expect_error(plot_tax_bar(fam_df, 'Genus', sample = 'SampleID'),err) + + err = "leglen must be an integer" + expect_error(plot_tax_bar(fam_df, 'Family', sample = 'SampleID', + leglen = 'foo'), err) + + cols = rep(c("#E41A1C","#377EB8","#4DAF4A","#984EA3","#FF7F00","#FFFF33", + "#A65628","#F781BF"), 3) + names(cols) = unique(fam_df$Class) + err = 'The colour vector names need to match the rank levels' + expect_error(plot_tax_bar(fam_df, 'Family', sample = 'SampleID', + colours = cols), err) + +}) + +test_that('plot_tax_bar() is throwing the right warnings',{ + load('plot_tax_bar/dats.RData') + fam_df = make_phy_df(dat_rel_filt, 'Family', prop= FALSE) + + wrn = "leglen is greater than the number of unique taxa. showing all taxa." + expect_warning(plot_tax_bar(fam_df, 'Family', sample = 'SampleID', + leglen = 30), wrn) + wrn = 'leglen can not be negative. treating it like 0.' + expect_warning(plot_tax_bar(fam_df, 'Family', sample = 'SampleID', + leglen = -3), wrn) + + wrn = paste('The supplied colour vector is shorter than the number', + 'of distinct taxa.') + cols = c("#E41A1C","#377EB8","#4DAF4A","#984EA3","#FF7F00","#FFFF33", + "#A65628","#F781BF") + expect_warning(plot_tax_bar(fam_df, 'Family', sample = 'SampleID', + colours = cols), wrn) + + names(cols) = unique(fam_df$Family)[1:length(cols)] + expect_warning(plot_tax_bar(fam_df, 'Family', sample = 'SampleID', + colours = cols), wrn) + + wrn = paste('Your per-sample abundances sum to >1. Did you mean to', + 'specify \'means = TRUE\'?') + expect_warning(plot_tax_bar(fam_df, 'Family', sample = 'Source'),wrn) })