diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index af5b818..e58f2e8 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: config: - - { os: ubuntu-latest, r: '4.2', bioc: '3.16', cont: "bioconductor/bioconductor_docker:RELEASE_3_16" } + - { os: ubuntu-latest, r: '4.3', bioc: '3.18', cont: "bioconductor/bioconductor_docker:RELEASE_3_18" } env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 12bb147..4fcd545 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: config: - - { os: ubuntu-latest, r: '4.2', bioc: '3.16', cont: "bioconductor/bioconductor_docker:RELEASE_3_16" } + - { os: ubuntu-latest, r: '4.3', bioc: '3.18', cont: "bioconductor/bioconductor_docker:RELEASE_3_18" } env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} diff --git a/.lintr b/.lintr index 6c7e8fa..7ffcdad 100644 --- a/.lintr +++ b/.lintr @@ -2,7 +2,6 @@ linters: linters_with_defaults( object_name_linter = NULL, line_length_linter = line_length_linter(120), cyclocomp_linter = NULL, - no_tab_linter = NULL, commented_code_linter = NULL, object_usage_linter = NULL ) diff --git a/DESCRIPTION b/DESCRIPTION index 4e71aa1..c435a47 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,14 @@ Package: tfpscanner -Title: Transmission fitness polymorphism scanner -Version: 0.3.0 +Title: Transmission fitness polymorphism scanner +Version: 0.3.1 Date: 2023-01-18 Author: Erik Volz, Olivia Boyd Maintainer: Erik Volz -Description: A pipeline for scanning a SARS-CoV-2 phylogeny for clades with outlying growth - patterns and high evolutionary rates. +Description: A pipeline for scanning a SARS-CoV-2 phylogeny for clades + with outlying growth patterns and high evolutionary rates. +License: MIT + file LICENSE Depends: - R (>= 4.1.0) + R (>= 4.1.0) Imports: ape, dplyr, @@ -38,9 +39,8 @@ Suggests: testthat (>= 3.0.0), withr Remotes: - emvolz-phylodynamics/mlesky -SystemRequirements: libopenmpi-dev -Encoding: UTF-8 -License: MIT + file LICENSE -RoxygenNote: 7.2.3 + emvolz-phylodynamics/mlesky Config/testthat/edition: 3 +Encoding: UTF-8 +RoxygenNote: 7.3.2 +SystemRequirements: libopenmpi-dev diff --git a/NEWS.md b/NEWS.md index b454e46..4ed37e6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,14 @@ +# tfpscanner 0.3.1 + +- Feature: genotype TRUE/FALSE legends were squashed into 1 column on the heatmap/dendrogram figure +- Feature: genotype TRUE/FALSE colours in the heatmaps are now light/mid grey instead of colourful + to reduce colour clashes with the dendrogram colour scheme +- Feature: user can set `heatmap_offset` in `treeview()` for dictating the distance between the + heatmap and dendrogram +- Feature: Colours for dendrogram branches can be defined in the `treeview()` args; by default these + are now a Blue-Red diverging palette with light-grey midpoint +- Feature: Dendrogram statistic ranges are now centred on zero + # tfpscanner 0.3.0 _2023-07-06_ - Feature: function `create_browser_data()` was added to generate all treeview illustrations and diff --git a/R/create_browser_data.R b/R/create_browser_data.R index d3b1cce..883cea3 100644 --- a/R/create_browser_data.R +++ b/R/create_browser_data.R @@ -12,6 +12,10 @@ create_browser_data <- function(e0, output_dir, ...) { + if (!dir.exists(output_dir)) { + dir.create(output_dir) + } + dirs <- list( treeview = file.path(output_dir, "treeview"), mutations = file.path(output_dir, "mutations"), diff --git a/R/plot_tree.R b/R/plot_tree.R index 3dbfa03..d717fc9 100644 --- a/R/plot_tree.R +++ b/R/plot_tree.R @@ -10,10 +10,10 @@ #' \code{lins$node_names} a string vector giving the name of the lineage. For \code{nodes} and #' \code{node_names} the order of entries matches that for \code{lins}. #' @param sc0,cmuts Data-frames. -#' @param heatmap_width,heatmap_lab_offset Parameters for positioning of the heatmap. #' @param mut_regex Regular expression. Defines the mutations under study here. #' @param colours Vector of colours. #' @param colour_limits Min and max values for the colours. +#' @inheritParams treeview #' #' @return A list with several entries. Each entry is a \code{ggtree} object. The list names are #' "noninteractive", "with_interactivity_data", "with_heatmap", "interactive". @@ -25,7 +25,9 @@ create_trees <- function(ggtree_data, sc0, cmuts, heatmap_width, + heatmap_offset, heatmap_lab_offset, + heatmap_fill = c("FALSE" = "grey90", "TRUE" = "grey70"), mut_regex = NULL, colours = NULL, colour_limits = NULL) { @@ -63,7 +65,9 @@ create_trees <- function(ggtree_data, ggobj = tree_list[["with_interactivity_data"]], genotype = genotype, heatmap_width = heatmap_width, - heatmap_lab_offset = heatmap_lab_offset + heatmap_offset = heatmap_offset, + heatmap_lab_offset = heatmap_lab_offset, + heatmap_fill = heatmap_fill ) tree_list$interactive <- create_interactive_ggtree( @@ -90,16 +94,18 @@ create_trees <- function(ggtree_data, #' object will be placed in an \code{rds} file. For \code{html}, a \code{htmlwidget} will be #' placed in a \code{html} file. #' @param include_date Boolean. Should the file-paths include the current date? -#' @inheritParams create_trees +#' @param height_svg,width_svg Scalar numeric. Height/width of the generated plots. Passed on to +#' `ggplot2::ggsave(..., height, width)` and `ggiraph::girafe(..., height_svg, width_svg)`. #' #' @return A named vector containing the file paths that were generated. save_trees <- function(tree_list, branch_col, - n_leaves, output_dir, output_format = c("rds", "html"), - include_date = FALSE) { + include_date = FALSE, + height_svg = NULL, + width_svg = NULL) { output_format <- match.arg(output_format, several.ok = TRUE) required_filetypes <- c( "noninteractive", @@ -118,18 +124,16 @@ save_trees <- function(tree_list, interactive_html = glue::glue("{basename_prefix}.html") )[required_filetypes] - files <- setNames( + files <- stats::setNames( file.path(output_dir, basenames), required_filetypes ) - plot_height <- max(14, floor(n_leaves / 10)) - ggplot2::ggsave( tree_list[["noninteractive"]], filename = files[["noninteractive"]], - height = plot_height, - width = 16, + height = height_svg, + width = width_svg, limitsize = FALSE ) @@ -143,8 +147,8 @@ save_trees <- function(tree_list, if ("html" %in% output_format) { widget <- create_widget( tree_list[["interactive"]], - width_svg = 15, - height_svg = plot_height + width_svg = width_svg, + height_svg = height_svg ) htmlwidgets::saveWidget( @@ -202,6 +206,7 @@ create_noninteractive_ggtree <- function(ggtree_data, name = "Cluster size", range = c(2, 16) ) + + ggplot2::guides(shape = "none") + ggplot2::ggtitle(glue::glue("{Sys.Date()}, colour: {branch_col}")) + ggplot2::theme(legend.position = "top") @@ -381,17 +386,24 @@ extract_genotype_data <- function(ggobj, append_heatmap <- function(ggobj, genotype, heatmap_width = 1, - heatmap_lab_offset = 0) { + heatmap_offset, + heatmap_lab_offset = 0, + heatmap_fill = c("FALSE" = "grey90", "TRUE" = "grey70")) { ggtree::gheatmap( p = ggobj, data = genotype, width = heatmap_width, - offset = 0.0005, + offset = heatmap_offset, colnames_angle = -90, colnames_position = "top", - colnames_offset_y = heatmap_lab_offset, - legend_title = "Genotype" - ) + colnames_offset_y = heatmap_lab_offset + ) + + ggplot2::scale_fill_manual( + values = heatmap_fill + ) + + ggplot2::guides( + fill = ggplot2::guide_legend(title = "Genotype", nrow = 2) + ) } #' Converts a \code{ggtree} object into a \code{ggiraph} object with interactive potential @@ -461,9 +473,7 @@ sort_mutations <- function(muts) { upres <- sort(unique(pre)) sorted_mutations <- do.call(c, lapply(upres, function(.pre) { .muts <- muts[pre == .pre] - .muts1 <- sapply(strsplit(.muts, - split = ":" - ), "[", 2) + .muts1 <- sapply(strsplit(.muts, split = ":"), "[", 2) sites <- regmatches( .muts1, regexpr(.muts1, diff --git a/R/tfpscanner.R b/R/tfpscanner.R index fbb77f9..b29209c 100644 --- a/R/tfpscanner.R +++ b/R/tfpscanner.R @@ -591,28 +591,30 @@ tfpscan <- function(tre, allsegregating <- Reduce(union, segregatingmut) # remove stops from allseg - allsegregating <- allsegregating[!grepl(allsegregating, - pattern = "[*]$" - )] - allsegregating <- allsegregating[!grepl(allsegregating, - pattern = ":[*]" - )] + allsegregating <- allsegregating[ + !grepl(allsegregating, pattern = "[*]$") + ] + allsegregating <- allsegregating[ + !grepl(allsegregating, pattern = ":[*]") + ] annots <- rep("", ape::Ntip(tr) + ape::Nnode(tr)) if (length(allsegregating) > 0) { - allseg1 <- substr(regmatches(allsegregating, regexpr(allsegregating, - pattern = ":[A-Z]" - )), 2, 2) - allseg2 <- regmatches(allsegregating, regexpr(allsegregating, - pattern = "[A-Z*]$" - )) - sites_post <- regmatches(allsegregating, regexpr(allsegregating, - pattern = ":.*$" - )) + allseg1 <- substr( + regmatches(allsegregating, regexpr(allsegregating, pattern = ":[A-Z]")), + 2, + 2 + ) + allseg2 <- regmatches( + allsegregating, regexpr(allsegregating, pattern = "[A-Z*]$") + ) + sites_post <- regmatches( + allsegregating, regexpr(allsegregating, pattern = ":.*$") + ) sites_post <- substr(sites_post, 3, nchar(sites_post) - 1) - sites_pre <- regmatches(allsegregating, regexpr(allsegregating, - pattern = "^.*:" - )) + sites_pre <- regmatches( + allsegregating, regexpr(allsegregating, pattern = "^.*:") + ) sites <- paste0(sites_pre, sites_post) aas <- c() @@ -661,7 +663,7 @@ tfpscan <- function(tre, gtr1 <- gtr1 + ggtree::geom_tiplab(align = TRUE) gtr2 <- gtr1 - if ((length(allsegregating) < 100) & (length(allsegregating) > 0)) { + if ((length(allsegregating) < 100) && (length(allsegregating) > 0)) { gtr2 <- ggtree::gheatmap(gtr1, as.data.frame(aas), width = .66, diff --git a/R/treeview.R b/R/treeview.R index 7b9a405..ce54ad3 100644 --- a/R/treeview.R +++ b/R/treeview.R @@ -15,8 +15,14 @@ #' format(s) should the interactive plots be saved? For \code{rds}, a \code{ggtree} or #' \code{ggplot2} object will be placed in \code{rds} files. For \code{html}, \code{htmlwidget}s #' will be placed in a \code{html} file. -#' @param heatmap_width,heatmap_lab_offset Width and label-offset parameters for the constructed +#' @param dendrogram_colours Colours for the dendrogram statistics. Default is blue (low) to red +#' (high) with a light-grey for the mid-point of the statistic range. +#' @param heatmap_width,heatmap_offset Width relative to the tree and offset from the tree for the #' heatmap. +#' @param heatmap_lab_offset Label-offset parameter for the constructed heatmap. +#' @param heatmap_fill Colours for filling the interior of the heatmap (which indicates the +#' presence / absence of a particular genotype). By default this is light grey for `FALSE` and +#' mid-grey for `TRUE`. #' #' @importFrom rlang .data #' @@ -30,8 +36,13 @@ treeview <- function(e0, lineages = c("AY\\.9", "AY\\.43", "AY\\.4\\.2"), output_dir = "treeview", output_format = c("rds", "html"), + dendrogram_colours = c( + "#2166ac", "#738fc0", "#afbad4", "#e8e8e8", "#e0a9a4", "#ce6964", "#b2182b" + ), heatmap_width = .075, - heatmap_lab_offset = -6) { + heatmap_offset = 0.0005, + heatmap_lab_offset = -6, + heatmap_fill = c("FALSE" = "grey90", "TRUE" = "grey70")) { output_format <- match.arg(output_format, several.ok = TRUE) # require logistic growth rate, prevent non-empty @@ -173,9 +184,6 @@ treeview <- function(e0, } } - # cols for continuous stats - cols <- rev(c("red", "orange", "green", "cyan", "blue")) - # lineages for clade labels td$lineages1 <- sapply(strsplit(td$lineages, split = "\\|"), "[", 1) sc0$lineage1 <- sapply(strsplit(sc0$lineage, split = "\\|"), "[", 1) @@ -234,13 +242,17 @@ treeview <- function(e0, sc0 = sc0, cmuts = cmuts, mut_regex = mutations, - colours = cols, + colours = dendrogram_colours, heatmap_width = heatmap_width, + heatmap_offset = heatmap_offset, heatmap_lab_offset = heatmap_lab_offset ) } suppressWarnings({ + height_svg <- max(14, floor(n_leaves / 10)) + width_svg <- 16 + lgr_trees <- create_trees_curried( branch_col = "logistic_growth_rate", colour_limits = c(-.5, .5) @@ -248,22 +260,25 @@ treeview <- function(e0, save_trees( lgr_trees, branch_col = "logistic_growth_rate", - n_leaves = n_leaves, output_dir = output_dir, - output_format = output_format + output_format = output_format, + height_svg = height_svg, + width_svg = width_svg ) for (branch_col in setdiff(branch_cols, c("logistic_growth_rate"))) { + zero_centred_colour_limits <- c(-1, 1) * max(abs(td[[branch_col]])) tree_list <- create_trees_curried( branch_col = branch_col, - colour_limits = range(td[[branch_col]]) + colour_limits = zero_centred_colour_limits ) save_trees( tree_list, branch_col = branch_col, - n_leaves = n_leaves, output_dir = output_dir, - output_format = output_format + output_format = output_format, + height_svg = height_svg, + width_svg = width_svg ) } }) diff --git a/README.md b/README.md index ddd9307..39a5898 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ Then, to set up all the remaining files required by tfpbrowser, you would make t function call: ``` -tfpscanner::create_browser_files( +tfpscanner::create_browser_data( e0 = "tfpbrowser_files/scanner_output/scanner-env-2023-07-05.rds", output_dir = "tfpbrowser_files", [any additional arguments to be passed on to `tfpscanner::treeview()`] diff --git a/inst/WORDLIST b/inst/WORDLIST index 3853463..5005116 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,4 +1,6 @@ allmuts +args +centred clade Clade clades @@ -6,6 +8,7 @@ clusterID clusterIDs CMD colour +colourful Config CoV cpu @@ -22,7 +25,9 @@ foreach generalised ggiraph ggplot +ggsave ggtree +girafe grey htmlwidget htmlwidgets @@ -61,6 +66,7 @@ stringr styler sublicense summarised +svg svglite SystemRequirements testthat diff --git a/man/append_heatmap.Rd b/man/append_heatmap.Rd index de676dd..f5b90d9 100644 --- a/man/append_heatmap.Rd +++ b/man/append_heatmap.Rd @@ -4,14 +4,28 @@ \alias{append_heatmap} \title{Adds a heatmap to the right of a ggtree object} \usage{ -append_heatmap(ggobj, genotype, heatmap_width = 1, heatmap_lab_offset = 0) +append_heatmap( + ggobj, + genotype, + heatmap_width = 1, + heatmap_offset, + heatmap_lab_offset = 0, + heatmap_fill = c(`FALSE` = "grey90", `TRUE` = "grey70") +) } \arguments{ \item{ggobj}{A ggtree object.} \item{genotype}{The heatmap data.} -\item{heatmap_width, heatmap_lab_offset}{Parameters for positioning of the heatmap.} +\item{heatmap_width, heatmap_offset}{Width relative to the tree and offset from the tree for the +heatmap.} + +\item{heatmap_lab_offset}{Label-offset parameter for the constructed heatmap.} + +\item{heatmap_fill}{Colours for filling the interior of the heatmap (which indicates the +presence / absence of a particular genotype). By default this is light grey for `FALSE` and +mid-grey for `TRUE`.} } \value{ A \code{ggtree} / \code{gg} / \code{ggplot} object with an appended heatmap. diff --git a/man/create_trees.Rd b/man/create_trees.Rd index b551af4..2ed1e82 100644 --- a/man/create_trees.Rd +++ b/man/create_trees.Rd @@ -12,7 +12,9 @@ create_trees( sc0, cmuts, heatmap_width, + heatmap_offset, heatmap_lab_offset, + heatmap_fill = c(`FALSE` = "grey90", `TRUE` = "grey70"), mut_regex = NULL, colours = NULL, colour_limits = NULL @@ -34,7 +36,14 @@ vector of node numbers, these correspond to nodes in \code{ggtree_data$node}. \item{sc0, cmuts}{Data-frames.} -\item{heatmap_width, heatmap_lab_offset}{Parameters for positioning of the heatmap.} +\item{heatmap_width, heatmap_offset}{Width relative to the tree and offset from the tree for the +heatmap.} + +\item{heatmap_lab_offset}{Label-offset parameter for the constructed heatmap.} + +\item{heatmap_fill}{Colours for filling the interior of the heatmap (which indicates the +presence / absence of a particular genotype). By default this is light grey for `FALSE` and +mid-grey for `TRUE`.} \item{mut_regex}{Regular expression. Defines the mutations under study here.} diff --git a/man/save_trees.Rd b/man/save_trees.Rd index a140cf7..2790177 100644 --- a/man/save_trees.Rd +++ b/man/save_trees.Rd @@ -7,10 +7,11 @@ save_trees( tree_list, branch_col, - n_leaves, output_dir, output_format = c("rds", "html"), - include_date = FALSE + include_date = FALSE, + height_svg = NULL, + width_svg = NULL ) } \arguments{ @@ -20,8 +21,6 @@ entries will be saved to an external file. As generated by \code{create_trees}.} \item{branch_col}{Scalar string. The name of the statistic that is under study here (e.g., "logistic_growth_rate" or "clock_outlier"). This name will be used in the output file paths.} -\item{n_leaves}{Scalar integer. The number of leaves in the tree.} - \item{output_dir}{Scalar string. The directory where the plots will be saved.} \item{output_format}{String (either \code{rds} or \code{html} or both). Default: both. Which @@ -30,6 +29,9 @@ object will be placed in an \code{rds} file. For \code{html}, a \code{htmlwidget placed in a \code{html} file.} \item{include_date}{Boolean. Should the file-paths include the current date?} + +\item{height_svg, width_svg}{Scalar numeric. Height/width of the generated plots. Passed on to +`ggplot2::ggsave(..., height, width)` and `ggiraph::girafe(..., height_svg, width_svg)`.} } \value{ A named vector containing the file paths that were generated. diff --git a/man/treeview.Rd b/man/treeview.Rd index 7226756..773118e 100644 --- a/man/treeview.Rd +++ b/man/treeview.Rd @@ -11,8 +11,12 @@ treeview( lineages = c("AY\\\\.9", "AY\\\\.43", "AY\\\\.4\\\\.2"), output_dir = "treeview", output_format = c("rds", "html"), + dendrogram_colours = c("#2166ac", "#738fc0", "#afbad4", "#e8e8e8", "#e0a9a4", + "#ce6964", "#b2182b"), heatmap_width = 0.075, - heatmap_lab_offset = -6 + heatmap_offset = 5e-04, + heatmap_lab_offset = -6, + heatmap_fill = c(`FALSE` = "grey90", `TRUE` = "grey70") ) } \arguments{ @@ -34,8 +38,17 @@ format(s) should the interactive plots be saved? For \code{rds}, a \code{ggtree} \code{ggplot2} object will be placed in \code{rds} files. For \code{html}, \code{htmlwidget}s will be placed in a \code{html} file.} -\item{heatmap_width, heatmap_lab_offset}{Width and label-offset parameters for the constructed +\item{dendrogram_colours}{Colours for the dendrogram statistics. Default is blue (low) to red +(high) with a light-grey for the mid-point of the statistic range.} + +\item{heatmap_width, heatmap_offset}{Width relative to the tree and offset from the tree for the heatmap.} + +\item{heatmap_lab_offset}{Label-offset parameter for the constructed heatmap.} + +\item{heatmap_fill}{Colours for filling the interior of the heatmap (which indicates the +presence / absence of a particular genotype). By default this is light grey for `FALSE` and +mid-grey for `TRUE`.} } \value{ A \code{ggtree} plot. diff --git a/tests/testthat/test-plot_tree.R b/tests/testthat/test-plot_tree.R index 9ed7596..1c7eca7 100644 --- a/tests/testthat/test-plot_tree.R +++ b/tests/testthat/test-plot_tree.R @@ -33,7 +33,7 @@ describe("save_trees", { created_files <- save_trees( tree_list, - branch_col = branch_col, output_dir = td, n_leaves = 100 + branch_col = branch_col, output_dir = td, width_svg = 16, height_svg = 14 ) expect_mapequal(created_files, expected_files) @@ -54,7 +54,7 @@ describe("save_trees", { created_files <- save_trees( tree_list, - branch_col = branch_col, output_dir = td, n_leaves = 100, + branch_col = branch_col, output_dir = td, width_svg = 16, height_svg = 14, output_format = "html" ) @@ -77,7 +77,7 @@ describe("save_trees", { created_files <- save_trees( tree_list, - branch_col = branch_col, output_dir = td, n_leaves = 100, + branch_col = branch_col, output_dir = td, width_svg = 16, height_svg = 14, output_format = "rds" ) @@ -100,7 +100,7 @@ describe("save_trees", { created_files <- save_trees( tree_list, - branch_col = branch_col, output_dir = td, n_leaves = 100, include_date = TRUE + branch_col = branch_col, output_dir = td, width_svg = 16, height_svg = 14, include_date = TRUE ) expect_mapequal(created_files, expected_files)