From 0ff9e89ccae560526971f42e796d6d4bd663231d Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Wed, 5 Jul 2023 15:18:49 +0100 Subject: [PATCH 1/4] ci: remove document() pre-commit hook --- .pre-commit-config.yaml | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fee9ad7..cbf98b6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,29 +6,6 @@ repos: hooks: - id: style-files args: [--style_pkg=styler, --style_fun=tidyverse_style] - - id: roxygenize - # roxygen requires loading pkg -> add dependencies from DESCRIPTION - additional_dependencies: - - ape - - dplyr - - foreach - - ggiraph - - ggplot2 - - bioc::ggtree - - glue - - htmlwidgets - - knitr - - lubridate - - mgcv - - git::git@github.com:emvolz-phylodynamics/mlesky - - phangorn - - rlang - - Rmpi - - scales - - stats - - stringr - - treedater - - utils # codemeta must be above use-tidy-description when both are used # - id: codemeta-description-updated - id: use-tidy-description From 43bce393efb9c1afc709dc4c4b23197c24aa166e Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Wed, 5 Jul 2023 15:23:39 +0100 Subject: [PATCH 2/4] feat: create files required by tfpbrowser using create_browser_data() --- DESCRIPTION | 2 +- NAMESPACE | 5 + R/create_browser_data.R | 251 +++++++++++++++++++++++++++++++++ R/treeview.R | 4 - inst/WORDLIST | 10 ++ man/available_treeview.Rd | 14 ++ man/create_all_node_lookups.Rd | 15 ++ man/create_browser_data.Rd | 21 +++ man/create_dir_if_missing.Rd | 14 ++ man/create_mutation_files.Rd | 18 +++ man/create_node_lookup.Rd | 16 +++ man/create_sequences_lookup.Rd | 17 +++ man/empty_treeview.Rd | 18 +++ man/get_cluster_ID.Rd | 16 +++ man/process_seq_table.Rd | 17 +++ 15 files changed, 433 insertions(+), 5 deletions(-) create mode 100644 R/create_browser_data.R create mode 100644 man/available_treeview.Rd create mode 100644 man/create_all_node_lookups.Rd create mode 100644 man/create_browser_data.Rd create mode 100644 man/create_dir_if_missing.Rd create mode 100644 man/create_mutation_files.Rd create mode 100644 man/create_node_lookup.Rd create mode 100644 man/create_sequences_lookup.Rd create mode 100644 man/empty_treeview.Rd create mode 100644 man/get_cluster_ID.Rd create mode 100644 man/process_seq_table.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 815bd5b..9126c9f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,5 +42,5 @@ Remotes: SystemRequirements: libopenmpi-dev Encoding: UTF-8 License: MIT + file LICENSE -RoxygenNote: 7.2.2 +RoxygenNote: 7.2.3 Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 4517d7c..cca6eb6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,10 @@ # Generated by roxygen2: do not edit by hand +export(create_all_node_lookups) +export(create_browser_data) +export(create_sequences_lookup) +export(empty_treeview) +export(get_cluster_ID) export(get_clusternode_mlesky) export(tfpscan) export(treeview) diff --git a/R/create_browser_data.R b/R/create_browser_data.R new file mode 100644 index 0000000..1b6fcf8 --- /dev/null +++ b/R/create_browser_data.R @@ -0,0 +1,251 @@ +#' Generate treeview illustrations and all metadata for presentation by tfpbrowser +#' +#' @param e0 Path to the scanner environment produced by \code{tfpscan}. Alternatively can pass +#' the environment directly. +#' @param output_dir Where should the output files be saved to? This function will create +#' subdirectories `/treeview`, `/mutations`, `/sequences` and +#' assumes that `/scanner_output` already exists. +#' @param ... Further arguments for `treeview()`. +#' +#' @export + +create_browser_data <- function(e0, + output_dir, + ...) { + dirs <- list( + treeview = file.path(output_dir, "treeview"), + mutations = file.path(output_dir, "mutations"), + scanner = file.path(output_dir, "scanner_output"), + sequences = file.path(output_dir, "sequences") + ) + + treeview_results <- treeview(e0, output_dir = dirs[["treeview"]], ...) + + # Create blank treeview images + empty_treeview( + treeview = "tree-logistic_growth_rate.rds", + treeview_dir = dirs[["treeview"]] + ) + + # Save .csv containing node-lookups + create_all_node_lookups(output_dir) + + # Save .csv containing cluster IDs and associated sequences + create_sequences_lookup(output_dir) + + # Save .csvs containing defining- and all-mutations associated with cluster IDs + create_mutation_files(e0, mutations_dir = dirs[["mutations"]]) +} + +#' Function to create a treeview with all grey nodes to display +#' +#' @param treeview RDS file containing an existing treeview plot +#' @param treeview_dir The directory where data should be read from / written to. +#' @param types Character vector of new variables to colour by +#' +#' @export + +empty_treeview <- function(treeview, + treeview_dir, + types = c("mutations", "sequences")) { + filename <- file.path(treeview_dir, treeview) + stopifnot(file.exists(filename)) + g <- readRDS(filename) + + make_treeview_type <- function(type) { + new_g <- g + + ggplot2::scale_colour_gradient(low = "grey", high = "grey") + + ggplot2::guides( + colour = "none", + fill = "none", + shape = "none" + ) + + ggplot2::labs(title = glue::glue("Colour: {type}")) + new_filename <- file.path(treeview_dir, glue::glue("tree-{type}.rds")) + saveRDS(new_g, file = new_filename) + } + + purrr::walk(.x = types, .f = ~ make_treeview_type(.x)) +} + +#' Function to create lookups for nodes for all .rds files in the treeview directory +#' +#' @param data_dir The directory where the data should be read from / written to. This must +#' contain a `treeview` subdirectory. +#' +#' @export + +create_all_node_lookups <- function(data_dir) { + # get list of all widgets + all_widgets <- available_treeview(data_dir) + purrr::walk(.x = all_widgets, .f = ~ create_node_lookup(.x, data_dir = data_dir)) +} + +#' Function to create lookup for a single treeview +#' +#' @param widgetChoice The .rds filename for selected treeview output from radio button. +#' @param data_dir The directory where the data should be read from / written to. + +create_node_lookup <- function(widgetChoice, data_dir) { + dirs <- list( + data = data_dir, + treeview = file.path(data_dir, "treeview"), + node_lookup = file.path(data_dir, "treeview", "node_lookup") + ) + + stopifnot(dir.exists(dirs[["treeview"]])) + create_dir_if_missing(dirs[["node_lookup"]]) + + output_basename <- stringr::str_replace(widgetChoice, ".rds", ".csv") + files <- list( + input = file.path(dirs[["treeview"]], widgetChoice), + output = file.path(dirs[["node_lookup"]], output_basename) + ) + + g <- readRDS(files[["input"]]) + + built <- suppressWarnings(ggplot2::ggplot_build(g)) + if (widgetChoice %in% c( + "sina-logistic_growth_rate.rds", + "sina-simple_logistic_growth_rate.rds", + "sina-clock_outlier.rds" + )) { + ids <- built$data[1][[1]]["data_id"] + tooltips <- built$data[1][[1]]$tooltip + tooltip_ids <- get_cluster_ID(tooltips) + } else { + n_layers <- length(built$data) + ids <- built$data[n_layers][[1]]["data_id"] + tooltips <- built$data[n_layers][[1]]$tooltip + tooltip_ids <- suppressWarnings(readr::parse_number(tooltips)) + } + ids$cluster_ids <- tooltip_ids + + readr::write_csv(ids, file = files[["output"]]) +} + +#' Function to save a CSV file of all sequences for all clusterIDs +#' +#' The files `/scanner_output/*/sequences.csv` will be combined together to create the +#' output file `/sequences/all_sequences.csv`. +#' +#' @param data_dir The data directory for the application. Must have a `scanner_output` +#' subdirectory. Within `/scanner_output/` every subdirectory must contain a +#' `sequences.csv` file. +#' +#' @export + +create_sequences_lookup <- function(data_dir) { + dirs <- list( + input = file.path(data_dir, "scanner_output"), + output = file.path(data_dir, "sequences") + ) + cluster_ids <- list.dirs( + dirs[["input"]], + recursive = FALSE, + full.names = FALSE + ) + output_filepath <- file.path(dirs[["output"]], "all_sequences.csv") + + create_dir_if_missing(dirs[["output"]]) + + lookup_table <- purrr::map_dfr(.x = cluster_ids, .f = ~ process_seq_table(.x, data_dir)) + readr::write_csv(lookup_table, file = output_filepath) +} + +#' Function to get lookup table of clusterID and sequence +#' +#' @param selected_folder Folder name relating to a single clusterID +#' @param data_dir The data directory for the application. Must have a `scanner_output` +#' subdirectory. + +process_seq_table <- function(selected_folder, data_dir) { + sequences <- file.path(data_dir, "scanner_output", selected_folder, "sequences.csv") + sequences <- suppressMessages(readr::read_csv(sequences)) + if (nrow(sequences) > 0) { + seq_names <- unique(sequences$sequence_name) + output <- tibble::tibble( + cluster_id = rep(selected_folder, length(seq_names)), + sequence = seq_names + ) + return(output) + } +} + +#' Create .csvs containing defining- and all-mutations +#' +#' @param e0 Path to the scanner environment produced by \code{tfpscan}. Alternatively can pass +#' the environment directly. +#' @param mutations_dir The directory where the mutations .csv files will be placed. The +#' filenames are `/all_mutations.csv` and `/defining_mutations.csv`. + +create_mutation_files <- function(e0, + mutations_dir) { + create_dir_if_missing(mutations_dir) + + files <- list( + defining = file.path(mutations_dir, "defining_mutations.csv"), + all = file.path(mutations_dir, "all_mutations.csv") + ) + + # load env + if (is.character(e0)) { + e0 <- readRDS(e0) + } + sc0 <- e0$Y + + cmut_tables <- get_mutation_tables(sc0) + readr::write_csv(cmut_tables[["defining"]], files[["defining"]]) + readr::write_csv(cmut_tables[["all"]], files[["all"]]) +} + +#' function to return treeview options +#' +#' @param data_dir The directory containing the data for the application. + +available_treeview <- function(data_dir) { + all_trees <- list.files( + file.path(data_dir, "treeview"), + pattern = "\\.rds$" + ) + all_trees <- factor( + all_trees, + c( + stringr::str_subset(all_trees, "tree"), + stringr::str_subset(all_trees, "sina") + ) + ) + all_trees <- as.character(sort(all_trees)) + names(all_trees) <- all_trees %>% + stringr::str_replace_all("_|-|\\.rds", " ") %>% + stringr::str_trim() %>% + stringr::str_to_title() + return(all_trees) +} + +#' Create a directory if it doesn't yet exist +#' +#' @param path The path for the required directory + +create_dir_if_missing <- function(path) { + stopifnot(length(path) == 1) + + if (!dir.exists(path)) { + dir.create(path) + } +} + +#' Function to get node id from data_id column of ggplot +#' TO BE REMOVED AFTER TOOLTIPS TFPSCANNER PR IS MERGED +#' +#' @param tooltip_input Character vector of tooltip content +#' +#' @export + +get_cluster_ID <- function(tooltip_input) { + # start searching the string after the "Cluster.ID" text + # until the next new line + match_matrix <- stringr::str_match(tooltip_input, pattern = r"(Cluster.ID\s+#(\d+))") + cluster_ids <- as.numeric(match_matrix[, 2]) + return(cluster_ids) +} diff --git a/R/treeview.R b/R/treeview.R index 985dac0..7b9a405 100644 --- a/R/treeview.R +++ b/R/treeview.R @@ -54,10 +54,6 @@ treeview <- function(e0, # node (row) in sc0 cmuts <- get_mutation_list(sc0) - cmut_tables <- get_mutation_tables(sc0) - readr::write_csv(cmut_tables[["defining"]], file.path(output_dir, "defining_mutations.csv")) - readr::write_csv(cmut_tables[["all"]], file.path(output_dir, "all_mutations.csv")) - tr1 <- e0$tre stopifnot(all(branch_cols %in% colnames(e0$Y))) diff --git a/inst/WORDLIST b/inst/WORDLIST index 1da8635..3853463 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -2,14 +2,19 @@ allmuts clade Clade clades +clusterID +clusterIDs CMD colour Config CoV cpu +csv +csvs del dev dichotomised +dir doMPI dplyr emvolz @@ -18,6 +23,7 @@ generalised ggiraph ggplot ggtree +grey htmlwidget htmlwidgets ide @@ -41,6 +47,7 @@ phylo phylodynamics polymorphism purrr +rds readr Reformats repo @@ -57,14 +64,17 @@ summarised svglite SystemRequirements testthat +tfpbrowser tfpscanner Thr tibble tis +tooltip tooltips treedata treedater treeio +treeview uk visualisations volz diff --git a/man/available_treeview.Rd b/man/available_treeview.Rd new file mode 100644 index 0000000..4ac8c35 --- /dev/null +++ b/man/available_treeview.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_browser_data.R +\name{available_treeview} +\alias{available_treeview} +\title{function to return treeview options} +\usage{ +available_treeview(data_dir) +} +\arguments{ +\item{data_dir}{The directory containing the data for the application.} +} +\description{ +function to return treeview options +} diff --git a/man/create_all_node_lookups.Rd b/man/create_all_node_lookups.Rd new file mode 100644 index 0000000..ec9bb31 --- /dev/null +++ b/man/create_all_node_lookups.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_browser_data.R +\name{create_all_node_lookups} +\alias{create_all_node_lookups} +\title{Function to create lookups for nodes for all .rds files in the treeview directory} +\usage{ +create_all_node_lookups(data_dir) +} +\arguments{ +\item{data_dir}{The directory where the data should be read from / written to. This must +contain a `treeview` subdirectory.} +} +\description{ +Function to create lookups for nodes for all .rds files in the treeview directory +} diff --git a/man/create_browser_data.Rd b/man/create_browser_data.Rd new file mode 100644 index 0000000..545af51 --- /dev/null +++ b/man/create_browser_data.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_browser_data.R +\name{create_browser_data} +\alias{create_browser_data} +\title{Generate treeview illustrations and all metadata for presentation by tfpbrowser} +\usage{ +create_browser_data(e0, output_dir, ...) +} +\arguments{ +\item{e0}{Path to the scanner environment produced by \code{tfpscan}. Alternatively can pass +the environment directly.} + +\item{output_dir}{Where should the output files be saved to? This function will create +subdirectories `/treeview`, `/mutations`, `/sequences` and +assumes that `/scanner_output` already exists.} + +\item{...}{Further arguments for `treeview()`.} +} +\description{ +Generate treeview illustrations and all metadata for presentation by tfpbrowser +} diff --git a/man/create_dir_if_missing.Rd b/man/create_dir_if_missing.Rd new file mode 100644 index 0000000..fab1115 --- /dev/null +++ b/man/create_dir_if_missing.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_browser_data.R +\name{create_dir_if_missing} +\alias{create_dir_if_missing} +\title{Create a directory if it doesn't yet exist} +\usage{ +create_dir_if_missing(path) +} +\arguments{ +\item{path}{The path for the required directory} +} +\description{ +Create a directory if it doesn't yet exist +} diff --git a/man/create_mutation_files.Rd b/man/create_mutation_files.Rd new file mode 100644 index 0000000..179a55d --- /dev/null +++ b/man/create_mutation_files.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_browser_data.R +\name{create_mutation_files} +\alias{create_mutation_files} +\title{Create .csvs containing defining- and all-mutations} +\usage{ +create_mutation_files(e0, mutations_dir) +} +\arguments{ +\item{e0}{Path to the scanner environment produced by \code{tfpscan}. Alternatively can pass +the environment directly.} + +\item{mutations_dir}{The directory where the mutations .csv files will be placed. The +filenames are `/all_mutations.csv` and `/defining_mutations.csv`.} +} +\description{ +Create .csvs containing defining- and all-mutations +} diff --git a/man/create_node_lookup.Rd b/man/create_node_lookup.Rd new file mode 100644 index 0000000..367f0a1 --- /dev/null +++ b/man/create_node_lookup.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_browser_data.R +\name{create_node_lookup} +\alias{create_node_lookup} +\title{Function to create lookup for a single treeview} +\usage{ +create_node_lookup(widgetChoice, data_dir) +} +\arguments{ +\item{widgetChoice}{The .rds filename for selected treeview output from radio button.} + +\item{data_dir}{The directory where the data should be read from / written to.} +} +\description{ +Function to create lookup for a single treeview +} diff --git a/man/create_sequences_lookup.Rd b/man/create_sequences_lookup.Rd new file mode 100644 index 0000000..3e95c93 --- /dev/null +++ b/man/create_sequences_lookup.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_browser_data.R +\name{create_sequences_lookup} +\alias{create_sequences_lookup} +\title{Function to save a CSV file of all sequences for all clusterIDs} +\usage{ +create_sequences_lookup(data_dir) +} +\arguments{ +\item{data_dir}{The data directory for the application. Must have a `scanner_output` +subdirectory. Within `/scanner_output/` every subdirectory must contain a +`sequences.csv` file.} +} +\description{ +The files `/scanner_output/*/sequences.csv` will be combined together to create the +output file `/sequences/all_sequences.csv`. +} diff --git a/man/empty_treeview.Rd b/man/empty_treeview.Rd new file mode 100644 index 0000000..f648dd6 --- /dev/null +++ b/man/empty_treeview.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_browser_data.R +\name{empty_treeview} +\alias{empty_treeview} +\title{Function to create a treeview with all grey nodes to display} +\usage{ +empty_treeview(treeview, treeview_dir, types = c("mutations", "sequences")) +} +\arguments{ +\item{treeview}{RDS file containing an existing treeview plot} + +\item{treeview_dir}{The directory where data should be read from / written to.} + +\item{types}{Character vector of new variables to colour by} +} +\description{ +Function to create a treeview with all grey nodes to display +} diff --git a/man/get_cluster_ID.Rd b/man/get_cluster_ID.Rd new file mode 100644 index 0000000..6ea35f0 --- /dev/null +++ b/man/get_cluster_ID.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_browser_data.R +\name{get_cluster_ID} +\alias{get_cluster_ID} +\title{Function to get node id from data_id column of ggplot +TO BE REMOVED AFTER TOOLTIPS TFPSCANNER PR IS MERGED} +\usage{ +get_cluster_ID(tooltip_input) +} +\arguments{ +\item{tooltip_input}{Character vector of tooltip content} +} +\description{ +Function to get node id from data_id column of ggplot +TO BE REMOVED AFTER TOOLTIPS TFPSCANNER PR IS MERGED +} diff --git a/man/process_seq_table.Rd b/man/process_seq_table.Rd new file mode 100644 index 0000000..c9d6ef5 --- /dev/null +++ b/man/process_seq_table.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_browser_data.R +\name{process_seq_table} +\alias{process_seq_table} +\title{Function to get lookup table of clusterID and sequence} +\usage{ +process_seq_table(selected_folder, data_dir) +} +\arguments{ +\item{selected_folder}{Folder name relating to a single clusterID} + +\item{data_dir}{The data directory for the application. Must have a `scanner_output` +subdirectory.} +} +\description{ +Function to get lookup table of clusterID and sequence +} From f81d006ca8539dcf287af2027abd2eaf241be86b Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Wed, 5 Jul 2023 15:50:14 +0100 Subject: [PATCH 3/4] fix: ensure correct cluster IDs are added to node-lookup tables --- R/create_browser_data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_browser_data.R b/R/create_browser_data.R index 1b6fcf8..1287e01 100644 --- a/R/create_browser_data.R +++ b/R/create_browser_data.R @@ -117,7 +117,7 @@ create_node_lookup <- function(widgetChoice, data_dir) { n_layers <- length(built$data) ids <- built$data[n_layers][[1]]["data_id"] tooltips <- built$data[n_layers][[1]]$tooltip - tooltip_ids <- suppressWarnings(readr::parse_number(tooltips)) + tooltip_ids <- get_cluster_ID(tooltips) } ids$cluster_ids <- tooltip_ids From 0f1bafa829f0f22e264edff0d0da11f41b1a5092 Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Wed, 5 Jul 2023 17:06:29 +0100 Subject: [PATCH 4/4] add notes on how to set up tfpbrowser files using tfpscanner --- README.md | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/README.md b/README.md index aeba83a..ddd9307 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,66 @@ ``` remotes::install_github("mrc-ide/tfpscanner") ``` + +## Use with {tfpbrowser} + +{tfpscanner} is able to produce data that can be used in the {tfpbrowser} application. That app +must be pointed to a data-directory that contains the tfpscanner-generated files (see the notes in +{tfpbrowser} for details). + +The structure of the data-directory used by tfpbrowser is as follows: + +```bash + + |- mutations + | |- all_mutations.csv + | |- defining_mutations.csv + |- scanner_output + | |- + | | |- cocirculating_lineages.csv + | | |- lineage_composition.csv + | | |- regional_composition.csv + | | |- sequences.csv + | | |- summary.csv + | |- + | | |- + | |- ... + |- sequences + | |- all_sequences.csv + |- treeview + |- node_lookup + | |- sina-logistic_growth_rate.csv + | |- tree-logistic_growth_rate.csv + | |- (a .csv for each .rds file in ./treeview) + |- sina-logistic_growth_rate.rds + |- tree-logistic_growth_rate.rds + |- tree-mutations.rds + |- tree-sequences.rds + |- (various other .rds files containing trees that are presented by tfpbrowser) +``` + +The files in that data directory can be populated using `tfpscanner::create_browser_data()`. +To run that function requires that a few files are in place: + +- A directory into which the tfpbrowser files are to be added has been added to the file-system +- That data directory contains a `scanner_output` directory (this is typically generated by + `tfpscanner::tfpscan()`) +- A scanner environment file (typically having the name `scanner_output/scanner-env-.rds`), as + generated by `tfpscanner::tfpscan()` is present in the data-directory + +Suppose: + +- your data-directory was `./tfpbrowser_files/`; +- the files generated by `tfpscan()` had been added to `./tfpbrowser_files/scanner_output/`; and +- the scanner-environment file was `./tfpbrowser_files/scanner_output/scanner-env-2023-07-05.rds`. + +Then, to set up all the remaining files required by tfpbrowser, you would make the following +function call: + +``` +tfpscanner::create_browser_files( + e0 = "tfpbrowser_files/scanner_output/scanner-env-2023-07-05.rds", + output_dir = "tfpbrowser_files", + [any additional arguments to be passed on to `tfpscanner::treeview()`] +) +```