Skip to content

Commit

Permalink
refactor: put allelic_cn before events calling
Browse files Browse the repository at this point in the history
  • Loading branch information
shihabdider committed May 8, 2024
1 parent 2b0deb6 commit c661907
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 256 deletions.
20 changes: 11 additions & 9 deletions bin/ascat_seg.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
library(optparse)

## DO NOT FAIL SILENTLY
options(error = function() {traceback(2); quit("no", 1)})

Expand Down Expand Up @@ -35,7 +35,7 @@
opt = parse_args(parseobj)

print(opt)

print(.libPaths())
options(error=function()traceback(2))

Expand All @@ -59,7 +59,7 @@
#'
#' @param gg (gGraph) diploid gGraph (expect cn.low and cn.high as node metadata)
#' @param verbose (logical) default FALSE
#'
#'
#' @return gGraph with field allele and cn
diploid2haploid = function(gg, verbose = FALSE) {
og.nodes.gr = gg$nodes$gr[, c("cn.low", "cn.high", "var.low", "var.high", "cn", "node.id", "nhets")]
Expand Down Expand Up @@ -286,7 +286,7 @@
## log.p = T) +
## pnbinom(y, mu = centers[tot.cn - j + 1],
## size = centers[tot.cn - j + 1],
## log.p = T)))
## log.p = T)))
ll = sapply(0:(floor(tot.cn/2)), function(j) sum(ppois(x,centers[j+1], log.p = T) +
ppois(y,centers[tot.cn-j+1],log.p = T)))
ll = ll - min(ll)
Expand All @@ -300,7 +300,7 @@
## log.p = T) +
## pnbinom(y, mu = centers[tot.cn - j],
## size = centers[tot.cn - j] / 2,
## log.p = T)))
## log.p = T)))
ll = sapply(0:(floor((tot.cn - 1)/2)), function(j) sum(ppois(x,centers[j+1], log.p = T) +
ppois(y,centers[tot.cn-j],log.p = T)))
ll = ll - min(ll)
Expand All @@ -315,7 +315,7 @@
## log.p = T) +
## pnbinom(y, mu = centers[tot.cn - j + 2],
## size = centers[tot.cn - j + 2],
## log.p = T)))
## log.p = T)))
ll = sapply(0:(floor((tot.cn + 1)/2)), function(j) sum(ppois(x,centers[j+1], log.p = T) +
ppois(y,centers[tot.cn-j+2],log.p = T)))
ll = ll - min(ll)
Expand Down Expand Up @@ -363,7 +363,7 @@

## ## ###########
## ## phasing
## ## ###########
## ## ###########

## ## iterate through all reference junctions and apply (wishful thinking) heuristic
## ##
Expand Down Expand Up @@ -561,7 +561,7 @@
#' @param agt.fname (character) path to sites.txt
#' @param min.frac (numeric) between 0 and 1, min frequency in normal to count as het site
#' @param max.frac (numeric) between 0 and 1, max frequency in normal to count as het site
#'
#'
#' @return allele gTrack
grab.hets = function(agt.fname = NULL,
min.frac = 0.2,
Expand Down Expand Up @@ -605,7 +605,7 @@
if (!file.exists(agt.fname)) {
stop("invalid file")
}

gr = readRDS(agt.fname)

if (!inherits(gr, "GRanges")) {
Expand Down Expand Up @@ -748,6 +748,8 @@
median(ratio, na.rm = T)] > 0.8,
"XX",
"XY")
# handle single chr case as XY
gender = ifelse(!is.na(gender), gender, "XY")
message("The gender of this sample: ", gender)

message("Starting ASCAT!!")
Expand Down
85 changes: 46 additions & 39 deletions bin/lp_phased_balance.R
Original file line number Diff line number Diff line change
Expand Up @@ -705,45 +705,51 @@
dt2gr(hets.dt[, .(seqnames, start, end, strand = "*")]),
return.type = "data.table")

nodes.ov.hets[, count := .N, by = query.id]

## we should only segment relatively wide nodes with low copy number
nodes.ov.hets = nodes.ov.hets[count >= 100,]

nodes.to.segment = nodes.ov.hets[, unique(query.id)]

new.sl = seqlengths(gg)

segs = lapply(nodes.to.segment,
function(qid) {
message("Starting segmentation for : ", qid)
hets.subset.dt = hets.dt[nodes.ov.hets[query.id == qid, subject.id],]
cna = CNA(hets.subset.dt[, BAF],
hets.subset.dt[, as.character(seqnames)],
hets.subset.dt[, start],
data.type = "logratio")
seg = segment(smooth.CNA(cna),
alpha = 1e-5,
verbose = TRUE)
utils::capture.output({seg_dt = print(seg); setDT(seg_dt)},
type = "output",
file = "/dev/null")
out = seg2gr(seg_dt[!(is.na(seg.mean) | is.na(loc.start) | is.na(loc.end))], new.sl)
out = gr.fix(out, new.sl, drop = T)
## get the number of hets per segment
values(out)[, "nhets"] = out %N% hets.gr
## make sure the segment is on the order of high kbp
out = out %Q% (nhets > 50)
message("Number of segments: ", length(out))
names(out) = NULL
if (length(out) > 1) {
return(gr.start(out[2:length(out)]))
}
return(GRanges())
message("Finished!")
})

segs = do.call(grbind, segs)
# handle case where nodes.ov.hets is empty
if (nrow(nodes.ov.hets) == 0) {
message("No nodes overlap with hets")
segs = NULL
} else {
nodes.ov.hets[, count := .N, by = query.id]

## we should only segment relatively wide nodes with low copy number
nodes.ov.hets = nodes.ov.hets[count >= 100,]

nodes.to.segment = nodes.ov.hets[, unique(query.id)]

new.sl = seqlengths(gg)

segs = lapply(nodes.to.segment,
function(qid) {
message("Starting segmentation for : ", qid)
hets.subset.dt = hets.dt[nodes.ov.hets[query.id == qid, subject.id],]
cna = CNA(hets.subset.dt[, BAF],
hets.subset.dt[, as.character(seqnames)],
hets.subset.dt[, start],
data.type = "logratio")
seg = segment(smooth.CNA(cna),
alpha = 1e-5,
verbose = TRUE)
utils::capture.output({seg_dt = print(seg); setDT(seg_dt)},
type = "output",
file = "/dev/null")
out = seg2gr(seg_dt[!(is.na(seg.mean) | is.na(loc.start) | is.na(loc.end))], new.sl)
out = gr.fix(out, new.sl, drop = T)
## get the number of hets per segment
values(out)[, "nhets"] = out %N% hets.gr
## make sure the segment is on the order of high kbp
out = out %Q% (nhets > 50)
message("Number of segments: ", length(out))
names(out) = NULL
if (length(out) > 1) {
return(gr.start(out[2:length(out)]))
}
return(GRanges())
message("Finished!")
})

segs = do.call(grbind, segs)
}

if (is.null(segs)) {
message("No extra breakends")
Expand Down Expand Up @@ -789,6 +795,7 @@
##binstats.gg$edges$mark(reward = new.gg$edges$dt[match(binstats.gg$edges$dt$og.edge.id, edge.id), reward])
binstats.gg$edges$mark(cnloh = new.gg$edges$dt[match(binstats.gg$edges$dt$og.edge.id, edge.id), cnloh])
## binstats.gg$edges$mark(cnloh = gg$edges$dt[match(binstats.gg$edges$dt$og.edge.id, edge.id), cnloh])
print('tracer2')
} else {
tmp = as.data.table(readRDS(opt$hets))
tmp = rbind(tmp[, .(seqnames, start, end, strand = "*",
Expand Down
6 changes: 5 additions & 1 deletion bin/non_integer_balance.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,11 @@
if ((opt$overwrite) | (!file.exists(binstats.gg.fn))) {

message("Starting binstats")
binstats.gg = gGnome::binstats(jab, bins = cov, field = opt$field, lp = TRUE)
if (ncn.x == 0) { # handle no sex chr case by disabling loess
binstats.gg = gGnome::binstats(jab, bins = cov, field = opt$field, lp = TRUE, loess=FALSE)
} else {
binstats.gg = gGnome::binstats(jab, bins = cov, field = opt$field, lp = TRUE)
}

## save binstats
saveRDS(binstats.gg, binstats.gg.fn)
Expand Down
2 changes: 1 addition & 1 deletion tests/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ params {
blacklist_coverage_jabba = "${params.mski_base}/JaBbA/blacklist_coverage/hg19/maskA_re.rds"
build_non_integer_balance = "hg19"
mask_non_integer_balance = "${params.mski_base}/allelic_cn/non_integer_balance/hg19/mask_with_segdups.rds"
mask_lp_phased_balance = "${params.mski_base}/JaBbA/blacklist_coverage/hg19/maskA_re.rds"
mask_lp_phased_balance = "${params.mski_base}/allelic_cn/lp_phased_balance/lp_phased_balance_maskA_re.rds"
}
'GATK.GRCh38' {
fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta"
Expand Down
Loading

0 comments on commit c661907

Please sign in to comment.