Skip to content

Commit

Permalink
Merge pull request #21 from jumpingrivers/mut-lookups
Browse files Browse the repository at this point in the history
Mutation lookup tables
  • Loading branch information
russHyde authored Jan 17, 2023
2 parents ba25896 + 0295c11 commit da1ae81
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 30 deletions.
43 changes: 23 additions & 20 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,29 @@ Description: A pipeline for scanning a SARS-CoV-2 phylogeny for clades with outl
Depends:
R (>= 4.1.0)
Imports:
ape,
dplyr,
foreach,
ggiraph,
ggplot2,
ggtree,
glue,
htmlwidgets,
knitr,
lubridate,
mgcv,
mlesky,
phangorn (>= 2.9.0),
rlang,
Rmpi,
scales,
stats,
stringr,
treedater,
utils
ape,
dplyr,
foreach,
ggiraph,
ggplot2,
ggtree,
glue,
htmlwidgets,
knitr,
lubridate,
mgcv,
mlesky,
phangorn (>= 2.9.0),
purrr,
readr,
rlang,
Rmpi,
scales,
stats,
stringr,
tibble,
treedater,
utils
Suggests:
doMPI,
svglite,
Expand Down
49 changes: 49 additions & 0 deletions R/parse_sc0.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#' Extract lists of 'defining' and 'all' mutations for each cluster in the data.frame `sc0`
#'
#' @param sc0 data.frame. Must contains columns \code{cluster_id}, \code{defining_mutations} and
#' \code{all_mutations}. The "mutation" columns contain a "|"-separated string of the mutations
#' present in any given cluster. Each row represents a cluster in the phylogeny.
#'
#' @return List of lists. Each entry in the named list corresponds to a cluster in the phylogeny.
#' The inner lists all have entries "defining" and "all", character vectors defining the mutations
#' that are present in the cluster.

get_mutation_list <- function(sc0) {
required_columns <- c("defining_mutations", "all_mutations", "cluster_id")
stopifnot(all(required_columns %in% colnames(sc0)))

cmuts <- lapply(seq_len(nrow(sc0)), function(i) {
list(
defining = strsplit(sc0$defining_mutations[i], split = "\\|")[[1]],
all = strsplit(sc0$all_mutations[i], split = "\\|")[[1]]
)
})
names(cmuts) <- sc0$cluster_id

cmuts
}

#' Extract data.frames containing the 'defining' and 'all' mutations for each cluster
#'
#' @inheritParams get_mutation_list
#'
#' @return List of two data.frames with names "all" and "defining". These contain all mutations-
#' and just the defining mutations for each cluster in the phylogeny. The data.frames have
#' identical structure with column names \code{cluster_id} and \code{mutation}.

get_mutation_tables <- function(sc0) {
mutation_list <- get_mutation_list(sc0)
defining_mutations <- purrr::map_df(
mutation_list, ~ tibble::tibble(mutation = .x[["defining"]]),
.id = "cluster_id"
)
all_mutations <- purrr::map_df(
mutation_list, ~ tibble::tibble(mutation = .x[["all"]]),
.id = "cluster_id"
)

list(
defining = defining_mutations,
all = all_mutations
)
}
16 changes: 9 additions & 7 deletions R/treeview.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ treeview <- function(e0,
e0 <- readRDS(e0)
}
sc0 <- e0$Y
cmuts <- lapply(seq_len(nrow(sc0)), function(i) {
list(
defining = strsplit(sc0$defining_mutations[i], split = "\\|")[[1]],
all = strsplit(sc0$all_mutations[i], split = "\\|")[[1]]
)
})
names(cmuts) <- sc0$cluster_id

# 'cmuts' is a list. Each element has entries "defining" and "all". There is one entry for each
# node (row) in sc0
cmuts <- get_mutation_list(sc0)

cmut_tables <- get_mutation_tables(sc0)
readr::write_csv(cmut_tables[["defining"]], file.path(output_dir, "defining_mutations.csv"))
readr::write_csv(cmut_tables[["all"]], file.path(output_dir, "all_mutations.csv"))

tr1 <- e0$tre

stopifnot(all(branch_cols %in% colnames(e0$Y)))
Expand Down
4 changes: 4 additions & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,14 @@ phangorn
phylo
phylodynamics
polymorphism
purrr
readr
Reformats
repo
rlang
Rmpi
RoxygenNote
sc
scannint
sina
stringr
Expand All @@ -56,6 +59,7 @@ SystemRequirements
testthat
tfpscanner
Thr
tibble
tis
tooltips
treedata
Expand Down
21 changes: 21 additions & 0 deletions man/get_mutation_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/get_mutation_tables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/save_sina_plot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/treeview.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit da1ae81

Please sign in to comment.