Skip to content

Commit

Permalink
Merge pull request #24 from jumpingrivers/prep-browser-data
Browse files Browse the repository at this point in the history
Prep browser data
  • Loading branch information
russHyde authored Jul 6, 2023
2 parents 0c0daf3 + 0f1bafa commit a4f0fbc
Show file tree
Hide file tree
Showing 17 changed files with 496 additions and 28 deletions.
23 changes: 0 additions & 23 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,6 @@ repos:
hooks:
- id: style-files
args: [--style_pkg=styler, --style_fun=tidyverse_style]
- id: roxygenize
# roxygen requires loading pkg -> add dependencies from DESCRIPTION
additional_dependencies:
- ape
- dplyr
- foreach
- ggiraph
- ggplot2
- bioc::ggtree
- glue
- htmlwidgets
- knitr
- lubridate
- mgcv
- git::[email protected]:emvolz-phylodynamics/mlesky
- phangorn
- rlang
- Rmpi
- scales
- stats
- stringr
- treedater
- utils
# codemeta must be above use-tidy-description when both are used
# - id: codemeta-description-updated
- id: use-tidy-description
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,5 @@ Remotes:
SystemRequirements: libopenmpi-dev
Encoding: UTF-8
License: MIT + file LICENSE
RoxygenNote: 7.2.2
RoxygenNote: 7.2.3
Config/testthat/edition: 3
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Generated by roxygen2: do not edit by hand

export(create_all_node_lookups)
export(create_browser_data)
export(create_sequences_lookup)
export(empty_treeview)
export(get_cluster_ID)
export(get_clusternode_mlesky)
export(tfpscan)
export(treeview)
Expand Down
251 changes: 251 additions & 0 deletions R/create_browser_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
#' Generate treeview illustrations and all metadata for presentation by tfpbrowser
#'
#' @param e0 Path to the scanner environment produced by \code{tfpscan}. Alternatively can pass
#' the environment directly.
#' @param output_dir Where should the output files be saved to? This function will create
#' subdirectories `<output_dir>/treeview`, `<output_dir>/mutations`, `<output_dir>/sequences` and
#' assumes that `<output_dir>/scanner_output` already exists.
#' @param ... Further arguments for `treeview()`.
#'
#' @export

create_browser_data <- function(e0,
output_dir,
...) {
dirs <- list(
treeview = file.path(output_dir, "treeview"),
mutations = file.path(output_dir, "mutations"),
scanner = file.path(output_dir, "scanner_output"),
sequences = file.path(output_dir, "sequences")
)

treeview_results <- treeview(e0, output_dir = dirs[["treeview"]], ...)

# Create blank treeview images
empty_treeview(
treeview = "tree-logistic_growth_rate.rds",
treeview_dir = dirs[["treeview"]]
)

# Save .csv containing node-lookups
create_all_node_lookups(output_dir)

# Save .csv containing cluster IDs and associated sequences
create_sequences_lookup(output_dir)

# Save .csvs containing defining- and all-mutations associated with cluster IDs
create_mutation_files(e0, mutations_dir = dirs[["mutations"]])
}

#' Function to create a treeview with all grey nodes to display
#'
#' @param treeview RDS file containing an existing treeview plot
#' @param treeview_dir The directory where data should be read from / written to.
#' @param types Character vector of new variables to colour by
#'
#' @export

empty_treeview <- function(treeview,
treeview_dir,
types = c("mutations", "sequences")) {
filename <- file.path(treeview_dir, treeview)
stopifnot(file.exists(filename))
g <- readRDS(filename)

make_treeview_type <- function(type) {
new_g <- g +
ggplot2::scale_colour_gradient(low = "grey", high = "grey") +
ggplot2::guides(
colour = "none",
fill = "none",
shape = "none"
) +
ggplot2::labs(title = glue::glue("Colour: {type}"))
new_filename <- file.path(treeview_dir, glue::glue("tree-{type}.rds"))
saveRDS(new_g, file = new_filename)
}

purrr::walk(.x = types, .f = ~ make_treeview_type(.x))
}

#' Function to create lookups for nodes for all .rds files in the treeview directory
#'
#' @param data_dir The directory where the data should be read from / written to. This must
#' contain a `treeview` subdirectory.
#'
#' @export

create_all_node_lookups <- function(data_dir) {
# get list of all widgets
all_widgets <- available_treeview(data_dir)
purrr::walk(.x = all_widgets, .f = ~ create_node_lookup(.x, data_dir = data_dir))
}

#' Function to create lookup for a single treeview
#'
#' @param widgetChoice The .rds filename for selected treeview output from radio button.
#' @param data_dir The directory where the data should be read from / written to.

create_node_lookup <- function(widgetChoice, data_dir) {
dirs <- list(
data = data_dir,
treeview = file.path(data_dir, "treeview"),
node_lookup = file.path(data_dir, "treeview", "node_lookup")
)

stopifnot(dir.exists(dirs[["treeview"]]))
create_dir_if_missing(dirs[["node_lookup"]])

output_basename <- stringr::str_replace(widgetChoice, ".rds", ".csv")
files <- list(
input = file.path(dirs[["treeview"]], widgetChoice),
output = file.path(dirs[["node_lookup"]], output_basename)
)

g <- readRDS(files[["input"]])

built <- suppressWarnings(ggplot2::ggplot_build(g))
if (widgetChoice %in% c(
"sina-logistic_growth_rate.rds",
"sina-simple_logistic_growth_rate.rds",
"sina-clock_outlier.rds"
)) {
ids <- built$data[1][[1]]["data_id"]
tooltips <- built$data[1][[1]]$tooltip
tooltip_ids <- get_cluster_ID(tooltips)
} else {
n_layers <- length(built$data)
ids <- built$data[n_layers][[1]]["data_id"]
tooltips <- built$data[n_layers][[1]]$tooltip
tooltip_ids <- get_cluster_ID(tooltips)
}
ids$cluster_ids <- tooltip_ids

readr::write_csv(ids, file = files[["output"]])
}

#' Function to save a CSV file of all sequences for all clusterIDs
#'
#' The files `<data_dir>/scanner_output/*/sequences.csv` will be combined together to create the
#' output file `<data_dir>/sequences/all_sequences.csv`.
#'
#' @param data_dir The data directory for the application. Must have a `scanner_output`
#' subdirectory. Within `<data_dir>/scanner_output/` every subdirectory must contain a
#' `sequences.csv` file.
#'
#' @export

create_sequences_lookup <- function(data_dir) {
dirs <- list(
input = file.path(data_dir, "scanner_output"),
output = file.path(data_dir, "sequences")
)
cluster_ids <- list.dirs(
dirs[["input"]],
recursive = FALSE,
full.names = FALSE
)
output_filepath <- file.path(dirs[["output"]], "all_sequences.csv")

create_dir_if_missing(dirs[["output"]])

lookup_table <- purrr::map_dfr(.x = cluster_ids, .f = ~ process_seq_table(.x, data_dir))
readr::write_csv(lookup_table, file = output_filepath)
}

#' Function to get lookup table of clusterID and sequence
#'
#' @param selected_folder Folder name relating to a single clusterID
#' @param data_dir The data directory for the application. Must have a `scanner_output`
#' subdirectory.

process_seq_table <- function(selected_folder, data_dir) {
sequences <- file.path(data_dir, "scanner_output", selected_folder, "sequences.csv")
sequences <- suppressMessages(readr::read_csv(sequences))
if (nrow(sequences) > 0) {
seq_names <- unique(sequences$sequence_name)
output <- tibble::tibble(
cluster_id = rep(selected_folder, length(seq_names)),
sequence = seq_names
)
return(output)
}
}

#' Create .csvs containing defining- and all-mutations
#'
#' @param e0 Path to the scanner environment produced by \code{tfpscan}. Alternatively can pass
#' the environment directly.
#' @param mutations_dir The directory where the mutations .csv files will be placed. The
#' filenames are `<mutations_dir>/all_mutations.csv` and `<mutations_dir>/defining_mutations.csv`.

create_mutation_files <- function(e0,
mutations_dir) {
create_dir_if_missing(mutations_dir)

files <- list(
defining = file.path(mutations_dir, "defining_mutations.csv"),
all = file.path(mutations_dir, "all_mutations.csv")
)

# load env
if (is.character(e0)) {
e0 <- readRDS(e0)
}
sc0 <- e0$Y

cmut_tables <- get_mutation_tables(sc0)
readr::write_csv(cmut_tables[["defining"]], files[["defining"]])
readr::write_csv(cmut_tables[["all"]], files[["all"]])
}

#' function to return treeview options
#'
#' @param data_dir The directory containing the data for the application.

available_treeview <- function(data_dir) {
all_trees <- list.files(
file.path(data_dir, "treeview"),
pattern = "\\.rds$"
)
all_trees <- factor(
all_trees,
c(
stringr::str_subset(all_trees, "tree"),
stringr::str_subset(all_trees, "sina")
)
)
all_trees <- as.character(sort(all_trees))
names(all_trees) <- all_trees %>%
stringr::str_replace_all("_|-|\\.rds", " ") %>%
stringr::str_trim() %>%
stringr::str_to_title()
return(all_trees)
}

#' Create a directory if it doesn't yet exist
#'
#' @param path The path for the required directory

create_dir_if_missing <- function(path) {
stopifnot(length(path) == 1)

if (!dir.exists(path)) {
dir.create(path)
}
}

#' Function to get node id from data_id column of ggplot
#' TO BE REMOVED AFTER TOOLTIPS TFPSCANNER PR IS MERGED
#'
#' @param tooltip_input Character vector of tooltip content
#'
#' @export

get_cluster_ID <- function(tooltip_input) {
# start searching the string after the "Cluster.ID" text
# until the next new line
match_matrix <- stringr::str_match(tooltip_input, pattern = r"(Cluster.ID\s+#(\d+))")
cluster_ids <- as.numeric(match_matrix[, 2])
return(cluster_ids)
}
4 changes: 0 additions & 4 deletions R/treeview.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ treeview <- function(e0,
# node (row) in sc0
cmuts <- get_mutation_list(sc0)

cmut_tables <- get_mutation_tables(sc0)
readr::write_csv(cmut_tables[["defining"]], file.path(output_dir, "defining_mutations.csv"))
readr::write_csv(cmut_tables[["all"]], file.path(output_dir, "all_mutations.csv"))

tr1 <- e0$tre

stopifnot(all(branch_cols %in% colnames(e0$Y)))
Expand Down
63 changes: 63 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,66 @@
```
remotes::install_github("mrc-ide/tfpscanner")
```

## Use with {tfpbrowser}

{tfpscanner} is able to produce data that can be used in the {tfpbrowser} application. That app
must be pointed to a data-directory that contains the tfpscanner-generated files (see the notes in
{tfpbrowser} for details).

The structure of the data-directory used by tfpbrowser is as follows:

```bash
<data-root>
|- mutations
| |- all_mutations.csv
| |- defining_mutations.csv
|- scanner_output
| |- <clusterID1>
| | |- cocirculating_lineages.csv
| | |- lineage_composition.csv
| | |- regional_composition.csv
| | |- sequences.csv
| | |- summary.csv
| |- <clusterID2>
| | |- <as for clusterID1>
| |- ...
|- sequences
| |- all_sequences.csv
|- treeview
|- node_lookup
| |- sina-logistic_growth_rate.csv
| |- tree-logistic_growth_rate.csv
| |- (a .csv for each .rds file in ./treeview)
|- sina-logistic_growth_rate.rds
|- tree-logistic_growth_rate.rds
|- tree-mutations.rds
|- tree-sequences.rds
|- (various other .rds files containing trees that are presented by tfpbrowser)
```
The files in that data directory can be populated using `tfpscanner::create_browser_data()`.
To run that function requires that a few files are in place:
- A directory into which the tfpbrowser files are to be added has been added to the file-system
- That data directory contains a `scanner_output` directory (this is typically generated by
`tfpscanner::tfpscan()`)
- A scanner environment file (typically having the name `scanner_output/scanner-env-<date>.rds`), as
generated by `tfpscanner::tfpscan()` is present in the data-directory
Suppose:
- your data-directory was `./tfpbrowser_files/`;
- the files generated by `tfpscan()` had been added to `./tfpbrowser_files/scanner_output/`; and
- the scanner-environment file was `./tfpbrowser_files/scanner_output/scanner-env-2023-07-05.rds`.
Then, to set up all the remaining files required by tfpbrowser, you would make the following
function call:
```
tfpscanner::create_browser_files(
e0 = "tfpbrowser_files/scanner_output/scanner-env-2023-07-05.rds",
output_dir = "tfpbrowser_files",
[any additional arguments to be passed on to `tfpscanner::treeview()`]
)
```
Loading

0 comments on commit a4f0fbc

Please sign in to comment.