Skip to content

Commit

Permalink
Merge pull request #5 from wenjie1991/devel
Browse files Browse the repository at this point in the history
feat: add more options for controlling beta-binomial model fitting process
  • Loading branch information
wenjie1991 authored Jan 17, 2024
2 parents f9efcf4 + a3894b7 commit cd70b5a
Show file tree
Hide file tree
Showing 11 changed files with 137 additions and 23 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ name: R

on:
push:
branches: [ "master" ]
branches: [ "main" ]
pull_request:
branches: [ "master" ]
branches: [ "main" ]

permissions:
contents: read
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: scMitoMut
Title: Single-cell Mitochondrial Mutation Analysis Tool
Version: 0.99.2
Version: 0.99.5
Authors@R: c(
person("Wenjie", "Sun", email = "[email protected]", role = c("cre", "aut"), comment=c(ORCID="0000-0002-3100-2346")),
person("Leila", "Perie", email = "[email protected]", role = "ctb")
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@

# scMitoMut 0.99.5
- Update README add biocondutor installing info
- Make sure the subsetting cells and loc must be unique in scMitoMutObj
- Add alternative reads count threshold for loc filtering (alt/_count)

# scMitoMut 0.99.2

Initiate package.
52 changes: 50 additions & 2 deletions R/io.R
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ open_h5_file <- function(h5_file) {
min_cell = 1,
model = "bb",
p_threshold = 0.05,
alt_count_threshold = 0,
p_adj_method = "fdr"
)
)
Expand Down Expand Up @@ -365,6 +366,7 @@ format.mtmutObj <- function(x, ...) {
cat("\t", "min_cell: ", x$loc_filter$min_cell, "\n", sep = "")
cat("\t", "model: ", x$loc_filter$model, "\n", sep = "")
cat("\t", "p_threshold: ", x$loc_filter$p_threshold, "\n", sep = "")
cat("\t", "alt_count_threshold: ", x$loc_filter$alt_count_threshold, "\n", sep = "")
cat("\t", "p_adj_method: ", x$loc_filter$p_adj_method, "\n", sep = "")
}

Expand All @@ -376,6 +378,19 @@ format.mtmutObj <- function(x, ...) {
#' @param ... other parameters passed to \code{\link[base]{format}} or \code{\link[base]{print}}.
#' @return a string
#' @export
#' @examples
#' ## Use the example data
#' f <- system.file("extdata", "mini_dataset.tsv.gz", package = "scMitoMut")
#' ## Create a temporary h5 file
#' ## In real case, we keep the h5 in project folder for future use
#' f_h5_tmp <- tempfile(fileext = ".h5")
#' ## Load the data with parse_table function
#' f_h5 <- parse_table(f, sep = "\t", h5_file = f_h5_tmp)
#' f_h5
#' ## open the h5 file and create a mtmutObj object
#' x <- open_h5_file(f_h5)
#' x
#' print(x)
print.mtmutObj <- function(x, ...) {
format(x, ...)
}
Expand Down Expand Up @@ -420,6 +435,9 @@ subset_cell <- function(mtmutObj, cell_list) {
if ("cell_selected" %in% h5ls(mtmutObj$h5f, recursive = FALSE)$name) {
h5delete(mtmutObj$h5f, "cell_selected")
}
if (length(unique(cell_list)) != length(cell_list)) {
stop("cell_list should be unique")
}
h5write(cell_list, mtmutObj$h5f, "cell_selected")
mtmutObj$cell_selected <- cell_list
mtmutObj
Expand All @@ -435,6 +453,11 @@ subset_loc <- function(mtmutObj, loc_list) {
if ("loc_selected" %in% h5ls(mtmutObj$h5f, recursive = FALSE)$name) {
h5delete(mtmutObj$h5f, "loc_selected")
}

if (length(unique(loc_list)) != length(loc_list)) {
stop("loc_list should be unique")
}

h5write(loc_list, mtmutObj$h5f, "loc_selected")
mtmutObj$loc_selected <- loc_list
mtmutObj
Expand Down Expand Up @@ -492,6 +515,7 @@ get_pval <- function(mtmutObj, loc, model = "bb", method = "fdr") {
#' @param p_threshold a numeric of the p-value threshold, the default is 0.05.
#' @param p_adj_method a string of the method for p-value adjustment, .
#' refer to \code{\link[stats]{p.adjust}}. The default is "fdr".
#' @param alt_count_threshold a integer of the minimum number of alternative base count, the default is 0.
#' @return a mtmutObj object with loc_pass and loc_filter updated.
#' @examples
#' ## Use the example data
Expand All @@ -510,7 +534,7 @@ get_pval <- function(mtmutObj, loc, model = "bb", method = "fdr") {
#' x <- filter_loc(x, min_cell = 5, model = "bb", p_threshold = 0.05, p_adj_method = "fdr")
#' x
#' @export
filter_loc <- function(mtmutObj, min_cell = 5, model = "bb", p_threshold = 0.05, p_adj_method = "fdr") {
filter_loc <- function(mtmutObj, min_cell = 5, model = "bb", p_threshold = 0.05, alt_count_threshold = 0, p_adj_method = "fdr") {

if (!is(mtmutObj, "mtmutObj")) {
stop("mtmutObj should be a mtmutObj object")
Expand All @@ -528,10 +552,15 @@ filter_loc <- function(mtmutObj, min_cell = 5, model = "bb", p_threshold = 0.05,
stop("p_threshold should be in [0, 1]")
}

if (alt_count_threshold < 0) {
stop("alt_count_threshold should be >= 0")
}

## Get the parameters from mtmutObj if they are NULL
min_cell <- ifelse(is.null(min_cell), mtmutObj$loc_filter$min_cell, min_cell)
model <- ifelse(is.null(model), mtmutObj$loc_filter$model, model)
p_threshold <- ifelse(is.null(p_threshold), mtmutObj$loc_filter$p_threshold, p_threshold)
alt_count_threshold <- ifelse(is.null(alt_count_threshold), mtmutObj$loc_filter$alt_count_threshold, alt_count_threshold)
p_adj_method <- ifelse(is.null(p_adj_method), mtmutObj$loc_filter$p_adj_method, p_adj_method)

loc_list <- mtmutObj$loc_selected
Expand All @@ -540,11 +569,23 @@ filter_loc <- function(mtmutObj, min_cell = 5, model = "bb", p_threshold = 0.05,
data.frame(loc = xi, mut_cell_n = sum(pval <= p_threshold, na.rm = TRUE))
}) %>% rbindlist()
res <- res[mut_cell_n >= min_cell]

if (alt_count_threshold > 0) {
res_2 <- parallel::mclapply(res$loc, function(xi) {
d <- read_locus(mtmutObj, xi)
pval <- get_pval(mtmutObj, xi, model = model, method = p_adj_method)
n <- sum((d$coverage - d$alt_depth) >= alt_count_threshold & pval <= p_threshold, na.rm = TRUE)
data.frame(loc = xi, mut_cell_n = n)
}) %>% rbindlist()
res <- res_2[mut_cell_n >= min_cell]
}

mtmutObj$loc_pass <- res$loc
mtmutObj$loc_filter <- list(
min_cell = min_cell,
model = model,
p_threshold = p_threshold,
alt_count_threshold = alt_count_threshold,
p_adj_method = p_adj_method
)
mtmutObj
Expand Down Expand Up @@ -606,6 +647,7 @@ export_dt <- function(mtmutObj, percent_interp = 1, n_interp = 3, all_cell = FAL
min_cell <- mtmutObj$loc_filter$min_cell
model <- mtmutObj$loc_filter$model
p_threshold <- mtmutObj$loc_filter$p_threshold
alt_count_threshold <- mtmutObj$loc_filter$alt_count_threshold
p_adj_method <- mtmutObj$loc_filter$p_adj_method

res <- parallel::mclapply(loc_list, function(loc_i) {
Expand All @@ -616,6 +658,7 @@ export_dt <- function(mtmutObj, percent_interp = 1, n_interp = 3, all_cell = FAL
}) %>% rbindlist()

res[, af := ((fwd_depth + rev_depth) / coverage)]
res[, alt_count := (coverage - alt_depth)]

# if (!all_cell) {
# cell_list = res[, .(n = sum(pval < p_threshold, na.rm=TRUE)), by = cell_barcode][n > 0, cell_barcode]
Expand All @@ -634,8 +677,13 @@ export_dt <- function(mtmutObj, percent_interp = 1, n_interp = 3, all_cell = FAL
m_a <- data.matrix(d[, -1])
rownames(m_a) <- d[[1]]

## matrix alt_count
d <- data.table::dcast(res, loc ~ cell_barcode, value.var = "alt_count")
m_c <- data.matrix(d[, -1])
rownames(m_c) <- d[[1]]

## matrix binary
m_b <- m_p < p_threshold
m_b <- m_p < p_threshold & m_c >= alt_count_threshold

## interpolate the mutation has higher frequency with the mutation has lower frequency
if (percent_interp < 1) {
Expand Down
18 changes: 14 additions & 4 deletions R/stat_ensemble.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ get_bm_pval <- function(x, method = "none") {
#' @param loc string given the locus name (e.g. "chrM1000").
#' @param dom_allele string given the dominant allele (e.g. "A"), if NULL auto detect the dominant allele.
#' @param return_data logical whether to return the allele count data, if FALSE, the \code{data} in the return value will be NULL. The default is FALSE.
#' @param bb_over_bm logical weather to use binomial mixture model result to define the wildtype cells for training beta binomial model.
#' @param bb_over_bm_p numeric the binomial mixutre model p value threshold for selecting the wildtype cells for training beta binomial model.
#' @param bb_over_bm_adj string the method for adjusting the binomial mixture p value, default is "fdr".
#' @param ... other parameters control the model fitting.
#' @return A list of three elements:
#' \item{data}{data.frame of the allele count data.}
Expand All @@ -38,7 +41,7 @@ get_bm_pval <- function(x, method = "none") {
#' res
#'
#' @export
process_locus_bmbb <- function(mtmutObj, loc, dom_allele = NULL, return_data = FALSE, ...) {
process_locus_bmbb <- function(mtmutObj, loc, dom_allele = NULL, return_data = FALSE, bb_over_bm = TRUE, bb_over_bm_p = 0.05, bb_over_bm_adj = "fdr", ...) {

if (!is(mtmutObj, "mtmutObj")) {
stop("mtmutObj must be a mtmutObj object.")
Expand All @@ -57,7 +60,11 @@ process_locus_bmbb <- function(mtmutObj, loc, dom_allele = NULL, return_data = F
res_bm <- process_locus_bm(d_dom_allele, ...)

## fit beta binomial model
selected_maj_cell <- d_dom_allele[get_bm_pval(res_bm, "fdr") >= 0.05]$cell_barcode
if (bb_over_bm) {
selected_maj_cell <- d_dom_allele[get_bm_pval(res_bm, bb_over_bm_adj) >= bb_over_bm_p]$cell_barcode
} else {
selected_maj_cell <- d_dom_allele$cell_barcode
}
res_bb <- process_locus_bb(d_dom_allele, selected_maj_cell, ...)

## VMR and consistency of fwd rev strand
Expand All @@ -83,6 +90,9 @@ process_locus_bmbb <- function(mtmutObj, loc, dom_allele = NULL, return_data = F
#'
#' @param mtmutObj a mtmutObj object.
#' @param mc.cores integer number of cores to use.
#' @param bb_over_bm logical weather to use binomial mixture model result to define the wildtype cells for training beta binomial model.
#' @param bb_over_bm_p numeric the binomial mixutre model p value threshold for selecting the wildtype cells for training beta binomial model.
#' @param bb_over_bm_adj string the method for adjusting the binomial mixture p value, default is "fdr".
#' @return NULL, the results are saved in the h5f file.
#' @details
#' This function will fit three models for every candidate locus:
Expand All @@ -107,7 +117,7 @@ process_locus_bmbb <- function(mtmutObj, loc, dom_allele = NULL, return_data = F
#' x <- filter_loc(x, min_cell = 5, model = "bb", p_threshold = 0.05, p_adj_method = "fdr")
#' x
#' @export
run_model_fit <- function(mtmutObj, mc.cores = getOption("mc.cores", 1L)) {
run_model_fit <- function(mtmutObj, mc.cores = getOption("mc.cores", 1L), bb_over_bm = TRUE, bb_over_bm_p = 0.05, bb_over_bm_adj = "fdr") {

if (!is(mtmutObj, "mtmutObj")) {
stop("mtmutObj must be a mtmutObj object.")
Expand All @@ -127,7 +137,7 @@ run_model_fit <- function(mtmutObj, mc.cores = getOption("mc.cores", 1L)) {
res_l <- parallel::mclapply(loc_list, function(xi) {
message(xi)
# pb$tick()
res <- process_locus_bmbb(mtmutObj, xi, return_data = FALSE)
res <- process_locus_bmbb(mtmutObj, xi, return_data = FALSE, bb_over_bm = bb_over_bm, bb_over_bm_p = bb_over_bm_p, bb_over_bm_adj = bb_over_bm_adj)

list(
bi_pval = res$model$binom_mix$bi_pval,
Expand Down
21 changes: 12 additions & 9 deletions R/visual_static.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@

# Draw depth ~ AF scatter plot
plot_locus <- function(
d_select_maj_base, p, p_threshold = 0.05, loc = NA, maj_base = NA) {
d_select_maj_base, p, p_threshold = 0.05, alt_count_threshold = 0, loc = NA, maj_base = NA) {
## alt / depth
N <- d_select_maj_base$coverage
y <- d_select_maj_base[, alt_depth]
d_plot <- data.table(
depth = N,
af = y / (N + 0.001),
highlight = p < p_threshold
highlight = p < p_threshold & (N - y) > alt_count_threshold
)
ggplot(d_plot, aes(x = depth, y = af, color = highlight)) +
geom_point() +
Expand Down Expand Up @@ -88,6 +88,7 @@ plot_locus <- function(
#' @param model a string of model name, one of "bb", "bm", "bi", the default value is "bb".
#' @param p_threshold a numeric value of p-value threshold, the default is 0.05.
#' @param p_adj_method a string of p-value adjustment method, the default is FDR.
#' @param alt_count_threshold a numeric value of alternative allele count threshold, the default is NULL, which means use the value in mtmutObj.
#' @return a ggplot object.
#' @examples
#' ## Use the example data
Expand All @@ -113,7 +114,14 @@ plot_locus <- function(
#' # plot the locus profile for chrM.200
#' plot_af_coverage(x, "chrM.204")
#' @export
plot_af_coverage <- function(mtmutObj, loc, model = 'bb', p_threshold = 0.05, p_adj_method = 'fdr') {
plot_af_coverage <- function(mtmutObj, loc, model = NULL, p_threshold = NULL, alt_count_threshold = NULL, p_adj_method = NULL) {

## get parameters
model <- ifelse(is.null(model), mtmutObj$loc_filter$model, model)
p_threshold <- ifelse(is.null(p_threshold), mtmutObj$loc_filter$p_threshold, p_threshold)
p_adj_method <- ifelse(is.null(p_adj_method), mtmutObj$loc_filter$p_adj_method, p_adj_method)
alt_count_threshold <- ifelse(is.null(alt_count_threshold), mtmutObj$loc_filter$alt_count_threshold, alt_count_threshold)
model <- ifelse(is.null(model), mtmutObj$loc_filter$model, model)

if (!is(mtmutObj, "mtmutObj")) {
stop("mtmutObj must be a mtmutObj object.")
Expand All @@ -127,13 +135,8 @@ plot_af_coverage <- function(mtmutObj, loc, model = 'bb', p_threshold = 0.05, p_
stop("p_threshold should be between 0 and 1")
}

## get parameters
model <- ifelse(is.null(model), mtmutObj$loc_filter$model, model)
p_threshold <- ifelse(is.null(p_threshold), mtmutObj$loc_filter$p_threshold, p_threshold)
p_adj_method <- ifelse(is.null(p_adj_method), mtmutObj$loc_filter$p_adj_method, p_adj_method)

d <- read_locus(mtmutObj, loc)
plot_locus(d, p = get_pval(mtmutObj, loc, model, p_adj_method), p_threshold = p_threshold, loc = loc)
plot_locus(d, p = get_pval(mtmutObj, loc, model, p_adj_method), p_threshold = p_threshold, alt_count_threshold = alt_count_threshold, loc = loc)
}

#' Heatmap plot
Expand Down
3 changes: 3 additions & 0 deletions man/filter_loc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions man/plot_af_coverage.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions man/print.mtmutObj.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 16 additions & 1 deletion man/process_locus_bmbb.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit cd70b5a

Please sign in to comment.