From 455c6c6b0fe054083dc5bd41a3e4f4f7805fade5 Mon Sep 17 00:00:00 2001 From: Williams Date: Tue, 1 Oct 2024 08:12:47 -0400 Subject: [PATCH 1/9] Default to no filtering due to low bin numbers, filter cells for phasing but do not remove --- R/callHSCN.R | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/R/callHSCN.R b/R/callHSCN.R index baffbdd..f661bcb 100644 --- a/R/callHSCN.R +++ b/R/callHSCN.R @@ -677,8 +677,8 @@ callHaplotypeSpecificCN <- function(CNbins, phasebyarm = FALSE, minfrachaplotypes = 0.7, likelihood = "auto", - minbins = 100, - minbinschr = 10, + minbins = 0, + minbinschr = 0, phased_haplotypes = NULL, clustering_method = "copy", maxloherror = 0.035, @@ -794,18 +794,21 @@ callHaplotypeSpecificCN <- function(CNbins, cells_to_remove <- ave_dist %>% dplyr::filter(average_distance >= 0.05) message(paste0("Removing ", dim(cells_to_remove)[1], " cells for phasing")) - ascn <- ascn %>% dplyr::filter(cell_id %in% cells_to_keep$cell_id) - haplotypes <- haplotypes %>% dplyr::filter(cell_id %in% cells_to_keep$cell_id) + ascn_filt <- ascn %>% dplyr::filter(cell_id %in% cells_to_keep$cell_id) + haplotypes_filt <- haplotypes %>% dplyr::filter(cell_id %in% cells_to_keep$cell_id) + } else{ + ascn_filt <- ascn + haplotypes_filt <- haplotypes } - infloherror <- ascn %>% + infloherror <- ascn_filt %>% dplyr::filter(state_phase == "A-Hom") %>% dplyr::summarise(err = weighted.mean(x = BAF, w = totalcounts, na.rm = TRUE)) %>% dplyr::pull(err) infloherror <- min(infloherror, maxloherror) # ensure loh error rate is < maxloherror if (likelihood == "betabinomial" | likelihood == "auto") { - bbfit <- fitBB(ascn) + bbfit <- fitBB(ascn_filt) if (bbfit$taronesZ > 5) { likelihood <- "betabinomial" message(paste0("Tarones Z-score: ", round(bbfit$taronesZ, 3), ", using ", likelihood, " model for inference.")) @@ -824,11 +827,11 @@ callHaplotypeSpecificCN <- function(CNbins, ) } - ascn$balance <- ifelse(ascn$phase == "Balanced", 0, 1) + ascn_filt$balance <- ifelse(ascn_filt$phase == "Balanced", 0, 1) if (is.null(phased_haplotypes)) { - chrlist <- proportion_imbalance(ascn, - haplotypes, + chrlist <- proportion_imbalance(ascn_filt, + haplotypes_filt, phasebyarm = phasebyarm, minfrachaplotypes = minfrachaplotypes, mincells = mincells, @@ -839,8 +842,8 @@ callHaplotypeSpecificCN <- function(CNbins, propdf <- chrlist$propdf chrlist <- chrlist$chrlist phased_haplotypes <- phase_haplotypes_bychr( - ascn = ascn, - haplotypes = haplotypes, + ascn = ascn_filt, + haplotypes = haplotypes_filt, chrlist = chrlist, phasebyarm = phasebyarm, global_phasing_for_balanced = global_phasing_for_balanced, @@ -920,6 +923,11 @@ callHaplotypeSpecificCN <- function(CNbins, tidyr::fill( c("state_min", "A", "B", "state_phase", "state_AS", "state_AS_phased", "LOH", "state_BAF", "phase"), .direction = "downup") %>% dplyr::ungroup() + #add 0|0 states for hom deletions + out[["data"]] <- out[["data"]] %>% + dplyr::mutate(A = ifelse(state == 0, 0, A)) %>% + dplyr::mutate(B = ifelse(state == 0, 0, B)) + out[["data"]] <- add_states(out[["data"]]) out[["data"]] <- as.data.frame(out[["data"]]) } From dd0e5efe01bc87ee0e5a75008898796dbc4aa8b6 Mon Sep 17 00:00:00 2001 From: Williams Date: Tue, 1 Oct 2024 08:43:32 -0400 Subject: [PATCH 2/9] data.table update --- R/import_packages.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/import_packages.R b/R/import_packages.R index 4db9cee..c581746 100644 --- a/R/import_packages.R +++ b/R/import_packages.R @@ -1,2 +1,5 @@ #' @import data.table NULL + +#' @importFrom data.table ":=" +NULL From 0cd6d2dec257118a092996efbdb2fcb0895ce3ad Mon Sep 17 00:00:00 2001 From: Williams Date: Tue, 1 Oct 2024 09:12:41 -0400 Subject: [PATCH 3/9] Convert to dt in add_states --- R/util.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/util.R b/R/util.R index 5b3ed2a..df9491c 100644 --- a/R/util.R +++ b/R/util.R @@ -962,6 +962,7 @@ per_chr_cn <- function(hscn, arms = NULL) { #' @export add_states <- function(df) { df <- df %>% + data.table::as.data.table() %>% .[, state_AS_phased := paste0(A, "|", B)] %>% .[, state_AS := paste0(pmax(state - B, B), "|", pmin(state - B, B))] %>% .[, state_min := pmin(A, B)] %>% From 3fe9609a803bf669b4ad65531bf1c515e4fc9c85 Mon Sep 17 00:00:00 2001 From: Williams Date: Tue, 1 Oct 2024 11:11:56 -0400 Subject: [PATCH 4/9] Option for male/femal --- NAMESPACE | 1 + R/callHSCN.R | 21 ++++++++++++++++++++- R/clustering.R | 3 ++- man/callHaplotypeSpecificCN.Rd | 9 ++++++--- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d32eee9..0fd66ee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -98,5 +98,6 @@ export(widen_bins) export(widen_haplotypebins) import(data.table) importFrom(Rcpp,sourceCpp) +importFrom(data.table,":=") importFrom(magrittr,"%>%") useDynLib(signals) diff --git a/R/callHSCN.R b/R/callHSCN.R index f661bcb..10e0f12 100644 --- a/R/callHSCN.R +++ b/R/callHSCN.R @@ -634,6 +634,7 @@ filter_haplotypes <- function(haplotypes, fraction){ #' @param fillmissing For bins with missing counts fill in values based on neighbouring bins #' @param global_phasing_for_diploid When using cluster_per_chr, use all cells for phasing diploid regions within the cluster #' @param chrs_for_global_phasing Which chromosomes to phase using all cells for diploid regions, default is NULL which uses all chromosomes +#' @param female Default is `TRUE`, if set to `FALSE` and patient is "XY", X chromosome states are set to A|0 where A=Hmmcopy state #' #' @return Haplotype specific copy number object #' @@ -691,7 +692,8 @@ callHaplotypeSpecificCN <- function(CNbins, smoothsingletons = TRUE, fillmissing = TRUE, global_phasing_for_balanced = FALSE, - chrs_for_global_phasing = NULL) { + chrs_for_global_phasing = NULL, + female = TRUE) { if (!clustering_method %in% c("copy", "breakpoints")) { stop("Clustering method must be one of copy or breakpoints") } @@ -720,6 +722,17 @@ callHaplotypeSpecificCN <- function(CNbins, haplotypes$chr <- sub("chr", "", haplotypes$chr) } + if (female == TRUE){ + #do not infer states for chr "Y" + haplotypes <- filter(haplotypes, chr != "Y") + CNbins <- filter(CNbins, chr != "Y") + } else{ + #do not infer states for chr "X" or "Y" + haplotypes <- filter(haplotypes, chr != "Y") + haplotypes <- filter(haplotypes, chr != "X") + CNbins <- filter(CNbins, chr != "Y") + } + nhaplotypes <- haplotypes %>% dplyr::group_by(cell_id) %>% dplyr::summarize(n = sum(totalcounts)) %>% @@ -927,6 +940,12 @@ callHaplotypeSpecificCN <- function(CNbins, out[["data"]] <- out[["data"]] %>% dplyr::mutate(A = ifelse(state == 0, 0, A)) %>% dplyr::mutate(B = ifelse(state == 0, 0, B)) + if (female == FALSE){ + #for male patients set A == state and B == 0 + out[["data"]] <- out[["data"]] %>% + dplyr::mutate(A = ifelse(chr == "X", state, A)) %>% + dplyr::mutate(B = ifelse(chr == "X", 0, B)) + } out[["data"]] <- add_states(out[["data"]]) out[["data"]] <- as.data.frame(out[["data"]]) } diff --git a/R/clustering.R b/R/clustering.R index bc9b72e..8d3bd34 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -92,7 +92,8 @@ umap_clustering <- function(CNbins, ret_model = TRUE, ret_nn = TRUE, pca = pca, - fast_sgd = fast_sgd) + fast_sgd = fast_sgd, + pca_method = "svdr") } ) diff --git a/man/callHaplotypeSpecificCN.Rd b/man/callHaplotypeSpecificCN.Rd index 1e8ac40..e98843a 100644 --- a/man/callHaplotypeSpecificCN.Rd +++ b/man/callHaplotypeSpecificCN.Rd @@ -17,8 +17,8 @@ callHaplotypeSpecificCN( phasebyarm = FALSE, minfrachaplotypes = 0.7, likelihood = "auto", - minbins = 100, - minbinschr = 10, + minbins = 0, + minbinschr = 0, phased_haplotypes = NULL, clustering_method = "copy", maxloherror = 0.035, @@ -31,7 +31,8 @@ callHaplotypeSpecificCN( smoothsingletons = TRUE, fillmissing = TRUE, global_phasing_for_balanced = FALSE, - chrs_for_global_phasing = NULL + chrs_for_global_phasing = NULL, + female = TRUE ) } \arguments{ @@ -85,6 +86,8 @@ callHaplotypeSpecificCN( \item{chrs_for_global_phasing}{Which chromosomes to phase using all cells for diploid regions, default is NULL which uses all chromosomes} +\item{female}{Default is \code{TRUE}, if set to \code{FALSE} and patient is "XY", X chromosome states are set to A|0 where A=Hmmcopy state} + \item{viterbver}{Version of viterbi algorithm to use (cpp or R)} \item{global_phasing_for_diploid}{When using cluster_per_chr, use all cells for phasing diploid regions within the cluster} From c105bcc63de6311a44d2923c65fa7b2fcff278b0 Mon Sep 17 00:00:00 2001 From: Williams Date: Tue, 1 Oct 2024 16:31:14 -0400 Subject: [PATCH 5/9] Do not include chrY in sims --- R/sim-data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/sim-data.R b/R/sim-data.R index c956067..af24c4d 100644 --- a/R/sim-data.R +++ b/R/sim-data.R @@ -30,7 +30,7 @@ simulate_cell <- function(nchr = 2, } data("dlpbins", envir = environment()) - bins <- dlpbins + bins <- dlpbins %>% dplyr::filter(chr != "Y") bins$state <- base_ploidy bins$state_min <- round(base_ploidy / 2) chrvec <- setdiff(unique(bins$chr), names(clonal_events)) From 92aadaf623689314152d44d3f2b3e1478d386bd9 Mon Sep 17 00:00:00 2001 From: Marc Williams Date: Tue, 1 Oct 2024 17:12:29 -0400 Subject: [PATCH 6/9] add dplyr tag to filter call --- R/callHSCN.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/callHSCN.R b/R/callHSCN.R index 10e0f12..c6b0a81 100644 --- a/R/callHSCN.R +++ b/R/callHSCN.R @@ -724,13 +724,13 @@ callHaplotypeSpecificCN <- function(CNbins, if (female == TRUE){ #do not infer states for chr "Y" - haplotypes <- filter(haplotypes, chr != "Y") - CNbins <- filter(CNbins, chr != "Y") + haplotypes <- dplyr::filter(haplotypes, chr != "Y") + CNbins <- dplyr::filter(CNbins, chr != "Y") } else{ #do not infer states for chr "X" or "Y" - haplotypes <- filter(haplotypes, chr != "Y") - haplotypes <- filter(haplotypes, chr != "X") - CNbins <- filter(CNbins, chr != "Y") + haplotypes <- dplyr::filter(haplotypes, chr != "Y") + haplotypes <- dplyr::filter(haplotypes, chr != "X") + CNbins <- dplyr::filter(CNbins, chr != "Y") } nhaplotypes <- haplotypes %>% From cde8ca452d0b2db1ead0ee1c4f746c639aeb0be2 Mon Sep 17 00:00:00 2001 From: Marc Williams Date: Tue, 1 Oct 2024 17:35:54 -0400 Subject: [PATCH 7/9] update vignette --- R/callHSCN.R | 8 ++++---- vignettes/ASCN.Rmd | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/callHSCN.R b/R/callHSCN.R index c6b0a81..ac79f5c 100644 --- a/R/callHSCN.R +++ b/R/callHSCN.R @@ -724,13 +724,13 @@ callHaplotypeSpecificCN <- function(CNbins, if (female == TRUE){ #do not infer states for chr "Y" - haplotypes <- dplyr::filter(haplotypes, chr != "Y") - CNbins <- dplyr::filter(CNbins, chr != "Y") + #haplotypes <- dplyr::filter(haplotypes, chr != "Y") + #CNbins <- dplyr::filter(CNbins, chr != "Y") } else{ #do not infer states for chr "X" or "Y" - haplotypes <- dplyr::filter(haplotypes, chr != "Y") + #haplotypes <- dplyr::filter(haplotypes, chr != "Y") haplotypes <- dplyr::filter(haplotypes, chr != "X") - CNbins <- dplyr::filter(CNbins, chr != "Y") + #CNbins <- dplyr::filter(CNbins, chr != "Y") } nhaplotypes <- haplotypes %>% diff --git a/vignettes/ASCN.Rmd b/vignettes/ASCN.Rmd index dc0428c..1196909 100644 --- a/vignettes/ASCN.Rmd +++ b/vignettes/ASCN.Rmd @@ -97,9 +97,9 @@ There are also some function to plot per cell BAF and state plots. You can speci plotCNprofileBAF(hscn, cellid = "SA921-A90554A-R03-C44") ``` -Here we'll plot an equivalent plot merging data across cell clusters using the utility function `consensyscopynumber`. Here, the `cell_id` column becomes the -```{r, fig.show='hold', fig.height=5 , fig.width=10} +Here we'll plot an equivalent plot merging data across cell clusters using the utility function `consensyscopynumber`. Here, the `cell_id` column becomes the `clone_id`. +```{r, fig.show='hold', fig.height=5 , fig.width=10} consensus_clusters <- consensuscopynumber(hscn$data, cl = cl$clustering) plotCNprofileBAF(consensus_clusters, cellid = "A") From 5cecce7817add230d6539ace0b451688fc70347a Mon Sep 17 00:00:00 2001 From: Marc Williams Date: Wed, 2 Oct 2024 04:28:56 -0400 Subject: [PATCH 8/9] fix test due to chrY --- tests/testthat/test-util.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-util.R b/tests/testthat/test-util.R index 9e16c12..3d0188e 100644 --- a/tests/testthat/test-util.R +++ b/tests/testthat/test-util.R @@ -39,7 +39,7 @@ test_that("Test widen bins functions", { segs <- create_segments(sim_data_bb$CNbins) test_that("Test create segments", { - expect_equal(dim(segs)[1], 24 * sum(clones_dist)) + expect_equal(dim(segs)[1], 23 * sum(clones_dist)) }) segs_cn <- create_segments(consensuscopynumber(CNbins)) From acb95d0ed3daac96a01c72100707c3f2ce41174e Mon Sep 17 00:00:00 2001 From: Marc Williams Date: Wed, 2 Oct 2024 04:44:34 -0400 Subject: [PATCH 9/9] fix some warnings from deprecations --- R/plotting.R | 20 ++++++++++---------- R/util.R | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index 099b506..40d341c 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -432,14 +432,14 @@ plot_umap <- function(clustering, bycol = NULL, alphavalue = 0.5, raster = FALSE ) } g <- ggplot2::ggplot(clustering, ggplot2::aes(x = umap1, y = umap2)) + - ggrastr::geom_point_rast(ggplot2::aes_string(col = bycol), alpha = alphavalue) + + ggrastr::geom_point_rast(ggplot2::aes(col = .data[[bycol]]), alpha = alphavalue) + ggplot2::xlab("UMAP 1") + ggplot2::ylab("UMAP 2") + ggplot2::theme_bw() + ggplot2::guides(colour = ggplot2::guide_legend(ncol = 3)) } else { g <- ggplot2::ggplot(clustering, ggplot2::aes(x = umap1, y = umap2)) + - ggplot2::geom_point(ggplot2::aes_string(col = bycol), alpha = alphavalue) + + ggplot2::geom_point(ggplot2::aes(col = .data[[bycol]]), alpha = alphavalue) + ggplot2::xlab("UMAP 1") + ggplot2::ylab("UMAP 2") + ggplot2::theme_bw() + @@ -713,7 +713,7 @@ plotCNprofile <- function(CNbins, dplyr::mutate(state = factor(paste0(state), levels = c(paste0(seq(0, 10, 1)), "11+"))) %>% ggplot2::ggplot(ggplot2::aes(x = idx, y = copy)) + ggplot2::geom_vline(xintercept = pl$chrbreaks, col = "grey90", alpha = 0.75) + - ggrastr::geom_point_rast(ggplot2::aes_string(col = statecol), size = pointsize, alpha = alphaval, shape = shape) + + ggrastr::geom_point_rast(ggplot2::aes(col = .data[[statecol]]), size = pointsize, alpha = alphaval, shape = shape) + ggplot2::scale_color_manual( name = "Copy number", breaks = names(statecolpal), @@ -743,7 +743,7 @@ plotCNprofile <- function(CNbins, dplyr::mutate(state = factor(paste0(state), levels = c(paste0(seq(0, 10, 1)), "11+"))) %>% ggplot2::ggplot(ggplot2::aes(x = idx, y = copy)) + ggplot2::geom_vline(xintercept = pl$chrbreaks, col = "grey90", alpha = 0.75) + - ggplot2::geom_point(ggplot2::aes_string(col = statecol), size = pointsize, alpha = alphaval, shape = 16) + + ggplot2::geom_point(ggplot2::aes(col = .data[[statecol]]), size = pointsize, alpha = alphaval, shape = 16) + ggplot2::scale_color_manual( name = "Allele Specific CN", breaks = names(statecolpal), @@ -1405,7 +1405,7 @@ plotCNprofileBAF <- function(cn, dplyr::mutate(state_min = paste0(state_min)) %>% ggplot2::ggplot(ggplot2::aes(x = idx, y = BAF)) + ggplot2::geom_vline(xintercept = pl$chrbreaks, col = "grey90", alpha = 0.75) + - ggrastr::geom_point_rast(ggplot2::aes_string(col = BAFcol), size = pointsize, alpha = alphaval, shape = shape) + + ggrastr::geom_point_rast(ggplot2::aes(col = .data[[BAFcol]]), size = pointsize, alpha = alphaval, shape = shape) + ggplot2::scale_color_manual( name = "CN", breaks = names(BAFcolpal), @@ -1440,7 +1440,7 @@ plotCNprofileBAF <- function(cn, dplyr::mutate(state_min = paste0(state_min)) %>% ggplot2::ggplot(ggplot2::aes(x = idx, y = copy)) + ggplot2::geom_vline(xintercept = pl$chrbreaks, col = "grey90", alpha = 0.75) + - ggrastr::geom_point_rast(ggplot2::aes_string(col = statecol), size = pointsize, alpha = alphaval, shape = shape) + + ggrastr::geom_point_rast(ggplot2::aes(col = .data[[statecol]]), size = pointsize, alpha = alphaval, shape = shape) + ggplot2::scale_color_manual( name = "Allele Specific CN", breaks = names(statecolpal), @@ -1469,7 +1469,7 @@ plotCNprofileBAF <- function(cn, dplyr::mutate(state_min = paste0(state_min)) %>% ggplot2::ggplot(ggplot2::aes(x = idx, y = BAF)) + ggplot2::geom_vline(xintercept = pl$chrbreaks, col = "grey90", alpha = 0.75) + - ggplot2::geom_point(ggplot2::aes_string(col = BAFcol), size = pointsize, alpha = alphaval, shape = shape) + + ggplot2::geom_point(ggplot2::aes(col = .data[[BAFcol]]), size = pointsize, alpha = alphaval, shape = shape) + ggplot2::scale_color_manual( name = "CN", breaks = names(BAFcolpal), @@ -1504,7 +1504,7 @@ plotCNprofileBAF <- function(cn, dplyr::mutate(state_min = paste0(state_min)) %>% ggplot2::ggplot(ggplot2::aes(x = idx, y = copy)) + ggplot2::geom_vline(xintercept = pl$chrbreaks, col = "grey90", alpha = 0.75) + - ggplot2::geom_point(ggplot2::aes_string(col = statecol), size = pointsize, alpha = alphaval, shape = shape) + + ggplot2::geom_point(ggplot2::aes(col = .data[[statecol]]), size = pointsize, alpha = alphaval, shape = shape) + ggplot2::scale_color_manual( name = "Allele Specific CN", breaks = names(statecolpal), @@ -1730,7 +1730,7 @@ plotBAFperstate <- function(cn, minpts = 250, minfrac = 0.01, maxstate = 10, den cowplot::theme_cowplot() + ggplot2::geom_crossbar( data = allASstates, ggplot2::aes(y = cBAF, ymin = cBAF, ymax = cBAF), - alpha = 0.2, size = 0.2 + alpha = 0.2, linewidth = 0.2 ) + ggplot2::geom_text(data = text_fraction, ggplot2::aes(x = state_AS_phased, y = y, label = pct)) + ggplot2::xlab("") + @@ -1776,7 +1776,7 @@ plot_density_histogram <- function(dat, mystate, rho, nbins = 30, frac = "NA", l ) g <- ggplot2::ggplot(dat, ggplot2::aes(x = BAF)) + - ggplot2::geom_histogram(ggplot2::aes(y = (..density..)), bins = nbins, fill = "azure4", alpha = 0.8) + + ggplot2::geom_histogram(ggplot2::aes(y = ggplot2::after_stat(density)), bins = nbins, fill = "azure4", alpha = 0.8) + ggplot2::geom_line(data = dffit_bb, stat = "density", ggplot2::aes(BAF, col = type), size = 0.5, adjust = 5) + ggplot2::geom_line(data = dffit_b, stat = "density", ggplot2::aes(BAF, col = type), size = 0.5, adjust = 5, linetype = 2) + ggplot2::scale_colour_manual(values = c(ggplot2::alpha("deepskyblue4", 0.6), ggplot2::alpha("firebrick4", 0.6))) + diff --git a/R/util.R b/R/util.R index df9491c..c25b55d 100644 --- a/R/util.R +++ b/R/util.R @@ -75,7 +75,7 @@ createCNmatrix <- function(CNbins, cnmatrix <- cnmatrix %>% dplyr::as_tibble() %>% - tidyr::fill(., colnames, .direction = "updown") + tidyr::fill(., tidyr::all_of(colnames), .direction = "updown") } cnmatrix <- as.data.frame(cnmatrix)