From bf874c447329363525ed7f4f12755af3a93106b9 Mon Sep 17 00:00:00 2001 From: Max Chao Date: Mon, 22 May 2023 12:04:43 -0400 Subject: [PATCH 01/13] changes to drop chr for all inputs --- R/JaBbA.R | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/R/JaBbA.R b/R/JaBbA.R index 89ecca2..8cde17b 100755 --- a/R/JaBbA.R +++ b/R/JaBbA.R @@ -114,6 +114,7 @@ low.count=high.count=seg=chromosome=alpha_high=alpha_low=beta_high=beta_low=pred #' @param fix.thres (numeric) freeze the CN of large nodes with cost penalty exceeding this multiple of lambda. default -1 (no nodes are fixed) #' @param min.bins (numeric) minimum number of bins needed for a valid segment CN estimate (default 5) #' @param filter_loose (logical) run loose end annotation? (default FALSE) +#' @param drop.chr (logical) Drops chr from chromosome names. (default TRUE) #' @export JaBbA = function(## Two required inputs junctions, @@ -167,7 +168,8 @@ JaBbA = function(## Two required inputs fix.thres = -1, min.bins = 1, filter_loose = FALSE, - QCout=FALSE) + QCout=FALSE, + drop.chr = TRUE) { system(paste('mkdir -p', outdir)) jmessage('Starting analysis in ', outdir <- normalizePath(outdir)) @@ -193,8 +195,7 @@ JaBbA = function(## Two required inputs !file.exists(paste0(cplex.dir, "/cplex/lib"))){ jerror("${CPLEX_DIR}/cplex/[(include)|(lib)] do not both exist") } - } - + } ## if (is.character(ra)) ## { ## if (!file.exists(ra)) @@ -453,6 +454,7 @@ JaBbA = function(## Two required inputs ism = ism, fix.thres = fix.thres, min.bins = min.bins, + drop.chr = TRUE, filter_loose = filter_loose) gc() @@ -690,6 +692,7 @@ JaBbA = function(## Two required inputs #' @param fix.thres (numeric) multiple of lambda above which to fix nodes #' @param min.bins (numeric) min number of coverage bins for a valid CN estimate #' @param filter_loose (logical) run loose end analysis? +#' @param drop.chr (logical) Drops chr from chromosome names. (default TRUE) jabba_stub = function(junctions, # path to junction VCF file, dRanger txt file or rds of GRangesList of junctions (with strands oriented pointing AWAY from breakpoint) coverage, # path to cov file, rds of GRanges blacklist.coverage = NULL, @@ -733,7 +736,8 @@ jabba_stub = function(junctions, # path to junction VCF file, dRanger txt file o geno = FALSE, filter_loose = FALSE, outlier.thresh = 0.9999, - cn.signif = 1e-5) + cn.signif = 1e-5, + drop.chr = TRUE) { kag.file = paste(outdir, 'karyograph.rds', sep = '/') hets.gr.rds.file = paste(outdir, 'hets.gr.rds', sep = '/') @@ -1038,7 +1042,12 @@ jabba_stub = function(junctions, # path to junction VCF file, dRanger txt file o } else { jwarning("doing nothing special to the small INDEL-like isolated junctions") } - + ## Dropping chr across the sample + if (drop.chr){ + seg = gr.nochr(seg) + coverage = gr.nochr(coverage) + ra = gr.nochr(ra) + } ## clean up the seqlevels before moving on seg.sl = seqlengths(seg) cov.sl = seqlengths(coverage) From 211f998d9355e0f1511197ecb61c06da7c8828c1 Mon Sep 17 00:00:00 2001 From: Max Chao Date: Mon, 22 May 2023 17:20:23 -0400 Subject: [PATCH 02/13] fix up grangelist drop chr --- R/JaBbA.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/JaBbA.R b/R/JaBbA.R index 8cde17b..63e2d9b 100755 --- a/R/JaBbA.R +++ b/R/JaBbA.R @@ -1046,7 +1046,8 @@ jabba_stub = function(junctions, # path to junction VCF file, dRanger txt file o if (drop.chr){ seg = gr.nochr(seg) coverage = gr.nochr(coverage) - ra = gr.nochr(ra) + ra = GrangesList(lapply(ra, function(x){gr.nochr(x)}), + compress = T) } ## clean up the seqlevels before moving on seg.sl = seqlengths(seg) From 453a2ec6b7204aa820dbd42999ace7177c47668d Mon Sep 17 00:00:00 2001 From: Max Chao Date: Mon, 22 May 2023 17:35:39 -0400 Subject: [PATCH 03/13] i'm dumb, seqlevels works for grangeslist --- R/JaBbA.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/JaBbA.R b/R/JaBbA.R index 63e2d9b..8cde17b 100755 --- a/R/JaBbA.R +++ b/R/JaBbA.R @@ -1046,8 +1046,7 @@ jabba_stub = function(junctions, # path to junction VCF file, dRanger txt file o if (drop.chr){ seg = gr.nochr(seg) coverage = gr.nochr(coverage) - ra = GrangesList(lapply(ra, function(x){gr.nochr(x)}), - compress = T) + ra = gr.nochr(ra) } ## clean up the seqlevels before moving on seg.sl = seqlengths(seg) From e40e90728bf0efd3b78954db9f8cc57ee6b0b413 Mon Sep 17 00:00:00 2001 From: Max Chao Date: Mon, 22 May 2023 18:26:22 -0400 Subject: [PATCH 04/13] edit jba with extra arg --- jba | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/jba b/jba index 140f11d..87990d7 100755 --- a/jba +++ b/jba @@ -178,6 +178,11 @@ if (!exists("opt")) type = "logical", default = FALSE, help = "Add ISM constraints? (only valid if LP = TRUE)"), + make_option( + c("--drop.chr"), + type = "logical", + default = TRUE, + help = "drops chr in seqlevels for inputs"), ## make_option(c("--gurobi"), ## type = 'character', ## default = 'FALSE', @@ -311,7 +316,8 @@ suppressPackageStartupMessages( mc.cores = opt$cores, lp = opt$lp, ism = opt$ism, - verbose = 2 + verbose = 2, + drop.chr = op$drop.chr ## TODO init, dyn.tuning ) }) From e05dc29273495d298375ca9e49f96dba2f0dc418 Mon Sep 17 00:00:00 2001 From: Max Chao Date: Tue, 23 May 2023 13:30:18 -0400 Subject: [PATCH 05/13] cleanup --- jba | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/jba b/jba index 87990d7..e8e2456 100755 --- a/jba +++ b/jba @@ -1,5 +1,11 @@ #!/usr/bin/env Rscript +require(devtools) library(optparse) +devtools::load_all("~/git/forks/JaBbA/") +# Setting up the dev version +message("Loading Max's development version of JaBbA...") + + jbastr = " _____ ___ _ _____ (___ ) ( _`\\ ( ) ( _ ) @@ -168,6 +174,16 @@ if (!exists("opt")) type = "integer", default = 1, help = "Number of cores for JaBBa MIP"), + make_option( + c("--mem"), + type = "numeric", + default = 16, + help = "(uncompressed) tree memory limit for CPLEX"), + make_option( + c("--fix.thres"), + type = "numeric", + default = -1, + help = "fix nodes above a threshold multiple of lambda"), make_option( c("--lp"), type = "logical", @@ -199,11 +215,11 @@ if (!exists("opt")) "\tuse --field=FIELD argument so specify which column to use if specific meta field of a multi-column table"), collapse="\n"), option_list=option_list) - + parse_args(parseobj, positional_arguments= 2, print_help_and_exit = FALSE) args = tryCatch(parse_args(parseobj, positional_arguments= 2, print_help_and_exit = FALSE), error = function(e) {message(jbastr); print_help(parseobj); NULL}) - + opt = NULL; if (!is.null(args)) { opt = args$options @@ -317,7 +333,7 @@ suppressPackageStartupMessages( lp = opt$lp, ism = opt$ism, verbose = 2, - drop.chr = op$drop.chr + drop.chr = opt$drop.chr ## TODO init, dyn.tuning ) }) From 96aff68ac5791ea568c925c970a83209c3e823bc Mon Sep 17 00:00:00 2001 From: MaxLChao <98108161+MaxLChao@users.noreply.github.com> Date: Tue, 23 May 2023 13:57:25 -0400 Subject: [PATCH 06/13] Update JaBbA.R pass arg into stub --- R/JaBbA.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/JaBbA.R b/R/JaBbA.R index 8cde17b..677c9ae 100755 --- a/R/JaBbA.R +++ b/R/JaBbA.R @@ -454,7 +454,7 @@ JaBbA = function(## Two required inputs ism = ism, fix.thres = fix.thres, min.bins = min.bins, - drop.chr = TRUE, + drop.chr = drop.chr, filter_loose = filter_loose) gc() From 4cc9211fc3769dd92aa25ed77fb86f18d43e4089 Mon Sep 17 00:00:00 2001 From: Max Chao Date: Tue, 23 May 2023 17:54:58 -0400 Subject: [PATCH 07/13] edits to read.junctions to expose param --- R/JaBbA.R | 544 +++++++++++++++++++++++++++--------------------------- 1 file changed, 274 insertions(+), 270 deletions(-) diff --git a/R/JaBbA.R b/R/JaBbA.R index 677c9ae..6829346 100755 --- a/R/JaBbA.R +++ b/R/JaBbA.R @@ -202,7 +202,7 @@ JaBbA = function(## Two required inputs ## { ## jerror(paste('Junction path', ra, 'does not exist')) ## } - ra.all = read.junctions(ra, geno = geno) ## GRL + ra.all = read.junctions(ra, geno = geno, chr.convert = drop.chr) ## GRL if (is.null(ra.all)){ jwarning("no junction file is given, will be running JaBbA without junctions!") @@ -308,285 +308,284 @@ JaBbA = function(## Two required inputs black.ix = which(gUtils::grl.in(ra.all, blacklist.junctions)) if (length(black.ix)>0){ jmessage("Removing ", length(black.ix), " junctions matched the blacklist") - ra.all = ra.all[setdiff(seq_along(ra.all), black.ix)] - } - } + ra.all = ra.all[setdiff(seq_along(ra.all), black.ix)] + } + } } if (!is.null(whitelist.junctions) && file.exists(whitelist.junctions) && file.info(whitelist.junctions)$size > 0){ - whitelist.junctions = read.junctions(whitelist.junctions) - if (length(whitelist.junctions)>0){ - white.ix = which(gUtils::grl.in(ra.all, whitelist.junctions)) - if (length(white.ix)>0){ - jmessage("Forcing incorporation of ", length(white.ix), " junctions matched the whitelist") - values(ra.all)[white.ix, tfield] = 1 - } - } + whitelist.junctions = read.junctions(whitelist.junctions) + if (length(whitelist.junctions)>0){ + white.ix = which(gUtils::grl.in(ra.all, whitelist.junctions)) + if (length(white.ix)>0){ + jmessage("Forcing incorporation of ", length(white.ix), " junctions matched the whitelist") + values(ra.all)[white.ix, tfield] = 1 + } + } } if (length(ra.all)>0){ - ## final sanity check before running - if (!all(unique(values(ra.all)[, tfield]) %in% 1:3)){ - jerror('Tiers in tfield can only have values 1,2,or 3') - } - jmessage("In the input, There are ", sum(values(ra.all)[, tfield]==1), " tier 1 junctions; ", - sum(values(ra.all)[, tfield]==2), " tier 2 junctions; ", - sum(values(ra.all)[, tfield]==3), " tier 3 junctions.") +## final sanity check before running + if (!all(unique(values(ra.all)[, tfield]) %in% 1:3)){ + jerror('Tiers in tfield can only have values 1,2,or 3') + } + jmessage("In the input, There are ", sum(values(ra.all)[, tfield]==1), " tier 1 junctions; ", + sum(values(ra.all)[, tfield]==2), " tier 2 junctions; ", + sum(values(ra.all)[, tfield]==3), " tier 3 junctions.") } - ## big change tonight, I'm gonna start with all of the tiers in the first round - ## and then in each of following iterations keep the ones incorporated - ## plus the ones that didn't but fall inside the range of a lo +## big change tonight, I'm gonna start with all of the tiers in the first round +## and then in each of following iterations keep the ones incorporated +## plus the ones that didn't but fall inside the range of a lo if (all.in & length(ra.all)>0){ - t3 = values(ra.all)[, tfield]==3 - - if (any(t3)){ - ## save every t3 except small indel - t3.indel = which.indel(ra.all[which(t3)]) - t3.non.indel = which(t3)[setdiff(seq_along(which(t3)), t3.indel)] - values(ra.all)[t3.non.indel, tfield] = 2 - jmessage('All-in mode: ', length(t3.non.indel), - ' tier 3 junctions being included yielding ', - sum(values(ra.all)[, tfield]==2), ' total junctions\n') - } - - ## and then bump t2 to t1 - t2 = values(ra.all)[, tfield]==2 - if (any(t2)){ - values(ra.all)[t2, tfield] = 1 - jmessage("All-in mode: ", length(t2), - " tier 2 junctions forced into the model") - } - } - - ## if we are iterating more than once + t3 = values(ra.all)[, tfield]==3 + + if (any(t3)){ +## save every t3 except small indel + t3.indel = which.indel(ra.all[which(t3)]) + t3.non.indel = which(t3)[setdiff(seq_along(which(t3)), t3.indel)] + values(ra.all)[t3.non.indel, tfield] = 2 + jmessage('All-in mode: ', length(t3.non.indel), + ' tier 3 junctions being included yielding ', + sum(values(ra.all)[, tfield]==2), ' total junctions\n') + } + +## and then bump t2 to t1 + t2 = values(ra.all)[, tfield]==2 + if (any(t2)){ + values(ra.all)[t2, tfield] = 1 + jmessage("All-in mode: ", length(t2), + " tier 2 junctions forced into the model") + } + } +## if we are iterating more than once if (reiterate>1){ - ## important: rescue.all should always be TRUE if not running filter.loose - if ((!rescue.all) & (!filter_loose)) { - jmessage("Resetting rescue.all to TRUE as filter.loose is FALSE") - rescue.all = TRUE - } - - continue = TRUE - this.iter = 1; - - values(ra.all)$id = seq_along(ra.all) - saveRDS(ra.all, paste(outdir, '/junctions.all.rds', sep = '')) - - last.ra = ra.all[values(ra.all)[, tfield]<3] - - jmessage('Starting JaBbA iterative with ', length(last.ra), ' junctions') - jmessage('Will progressively add junctions within ', rescue.window, 'bp of a loose end in prev iter') - jmessage('Iterating for max ', reiterate, ' iterations or until convergence (i.e. no new junctions added)') - - while (continue) { - gc() - - this.iter.dir = paste(outdir, '/iteration', this.iter, sep = '') - system(paste('mkdir -p', this.iter.dir)) - - jmessage('Starting iteration ', this.iter, ' in ', this.iter.dir, ' using ', length(last.ra), ' junctions') - - if (this.iter>1){ - kag1 = readRDS(paste0(outdir, '/iteration1/karyograph.rds')) - ploidy = kag1$ploidy - purity = kag1$purity - jmessage("Using ploidy ", ploidy, - " and purity ", purity, - " consistent with the initial iteration") - - if (lp) { - jmessage("Using segments from JaBbA output of previous iteration") - loose.ends.fn = file.path(outdir, - paste0("iteration", this.iter - 1), - "loose.end.stats.rds") - seg.fn = file.path(outdir, paste0("iteration", this.iter - 1), "jabba.simple.rds") - - seg = readRDS(seg.fn)$segstats[, c()] - seg = seg %Q% (strand(seg) == "+") - seg = gr.stripstrand(seg) - } - - } - - this.ra.file = paste(this.iter.dir, '/junctions.rds', sep = '') - saveRDS(last.ra, this.ra.file) - - jab = jabba_stub( - junctions = this.ra.file, - seg = seg, - coverage = coverage, - blacklist.coverage = blacklist.coverage, - hets = hets, - nseg = nseg, - cfield = cfield, - tfield = tfield, - nudge.balanced = as.logical(nudge.balanced), - outdir = this.iter.dir, - mc.cores = as.numeric(mc.cores), - max.threads = as.numeric(max.threads), - max.mem = as.numeric(max.mem), - max.na = max.na, - edgenudge = as.numeric(edgenudge), - tilim = as.numeric(tilim), - strict = strict, - name = name, - use.gurobi = as.logical(use.gurobi), - field = field, - epgap = epgap, - ## subsample = subsample, - slack.penalty = as.numeric(slack.penalty), - loose.penalty.mode = loose.penalty.mode, - mipstart = init, - ploidy = as.numeric(ploidy), - purity = as.numeric(purity), - pp.method = pp.method, - indel = indel, - min.nbins = min.nbins, - overwrite = as.logical(overwrite), - verbose = as.numeric(verbose), - dyn.tuning = dyn.tuning, - geno = geno, - cn.signif = cn.signif, - lp = lp, - ism = ism, - fix.thres = fix.thres, - min.bins = min.bins, - drop.chr = drop.chr, - filter_loose = filter_loose) - - gc() - - jab = readRDS(paste(this.iter.dir, '/jabba.simple.rds', sep = '')) - jabr = readRDS(paste(this.iter.dir, '/jabba.raw.rds', sep = '')) - le = gr.stripstrand(jab$segstats %Q% (loose==TRUE & strand=="+")) - if (length(le)==0){ - jmessage("No more loose ends to resolve, terminating.") - break - } else if (!rescue.all){ - le = le %Q% which(passed==TRUE) - if (length(le)==0){ - jmessage("No more plausible loose ends, terminating") - break - } - } else { - jmessage("Rescuing all ", length(le), " loose ends, regardless of confidence.") - } - - ## determine orientation of loose ends - le.right = le %&% gr.start(jab$segstats %Q% (loose==FALSE)) - strand(le.right) = "+" - le.left = le %&% gr.end(jab$segstats %Q% (loose==FALSE)) - strand(le.left) = "-" - le = grbind(le.right, le.left) - - ## Annotate ra.all - all.input = readRDS(paste0(outdir, "/junctions.all.rds")) - all.ov = ra.overlaps(all.input, jab$junctions, pad=0, arr.ind=TRUE) - if (ncol(all.ov)==2){ - all.ov = data.table(data.frame(all.ov)) - all.ov[, this.cn := values(jab$junctions)$cn[ra2.ix]] - values(all.input)[, paste0("iteration", this.iter, ".cn")] = - all.ov[, setNames(this.cn, ra1.ix)][as.character(seq_along(all.input))] - } else { - values(all.input)[, paste0("iteration", this.iter, ".cn")] = NA - } - saveRDS(all.input, paste0(outdir, "/junctions.all.rds")) - - ## junction rescue - ## rescues junctions that are within rescue.window bp of a loose end - ## got used, stay there - ## but not loose ends overlapping an exorbitant number of junctions - le.keep = which((le %N% (stack(ra.all) + rescue.window)) < 6) - tokeep = which(values(jab$junctions)$cn>0) - new.ra.id = unique(c( - values(jab$junctions)$id[tokeep], - ## near a loose ends, got another chance - values(ra.all)$id[which(grl.in(ra.all, - le[le.keep] + rescue.window, - some = T, - ignore.strand = FALSE))], - ## tier 2 or higher must stay for all iterations - values(ra.all)$id[which(values(ra.all)$tier==2)] - )) - if (tfield %in% colnames(ra.all)){ - high.tier.id = values(ra.all)$id[which(as.numeric(values(ra.all)[, tfield])<3)] - new.ra.id = union(new.ra.id, high.tier.id) - } - new.ra = ra.all[which(values(ra.all)$id %in% new.ra.id)] - ## new.ra = ra.all[union(values(last.ra)$id, - ## values(ra.all)$id[grl.in(ra.all, le + rescue.window, some = T)])] - ## new.junc.id = setdiff(new.ra.id, values(jab$junctions)$id[which(values(jab$junctions)$cn>0)]) - new.junc.id = setdiff(new.ra.id, values(jab$junctions)$id) - ## num.new.junc = length(setdiff(values(new.ra)$id, values(last.ra)$id)==0) - num.new.junc = length(new.junc.id) - jcn = rep(0, nrow(jab$ab.edges)) - abix = rowSums(is.na(rbind(jab$ab.edges[, 1:2, 1])))==0 - if (any(abix)){ - jcn[abix] = jab$adj[rbind(jab$ab.edges[abix, 1:2, 1])] - } - num.used.junc = length(which(jcn>0)) - - t3 = values(new.ra)[, tfield]==3 - jmessage('Adding ', num.new.junc, - ' new junctions, including ', sum(t3), - ' tier 3 junctions, yielding ', num.used.junc, - ' used junctions and ', length(new.ra), ' total junctions\n') - - if (any(t3)){ - values(new.ra)[t3, tfield] = 2 - } - - if (num.new.junc==0 | this.iter >= reiterate) - continue = FALSE - else { - last.ra = new.ra - this.iter = this.iter + 1 - } - ## keep using the initial purity ploidy values - pp1 = readRDS(paste0( - outdir, - '/iteration1/karyograph.rds.ppgrid.solutions.rds')) - purity = pp1$purity[1] - ploidy = pp1$ploidy[1] - - seg = readRDS(paste0(outdir,'/iteration1/seg.rds')) ## read from the first iteration - - - if (verbose) - { - jmessage("Setting mipstart to previous iteration's jabba graph") - } - - init = jab - - if (verbose) - { - jmessage('Using purity ', round(purity,2), ' and ploidy ', round(ploidy,2), ' across ', length(seg), ' segments used in iteration 1') - } - } - - this.iter.dir = paste(outdir, '/iteration', this.iter, sep = '') - system(sprintf('cp %s/* %s', this.iter.dir, outdir)) - jab = readRDS(paste0(outdir, "/jabba.simple.gg.rds")) - jmessage('Done Iterating') - } else { - ## if all.in, convert all tier 3 to tier 2 - ## if (tfield %in% colnames(values(ra.all))){ - ## t3 = (values(ra.all)[, tfield] == 3) - ## if (all.in & length(ra.all)>0){ - ## if (any(t3)){ - ## ## save every t3 except small indel - ## t3.indel = which.indel(ra.all[which(t3)]) - ## t3.non.indel = which(t3)[setdiff(seq_along(which(t3)), t3.indel)] - ## values(ra.all)[t3.non.indel, tfield] = 2 - ## t3 = values(ra.all)[, tfield] == 3 - ## } - ## } - ## ## if not all.in, only use t2 or t1 - ## ra.all = ra.all[setdiff(seq_along(ra.all), which(t3))] +## important: rescue.all should always be TRUE if not running filter.loose + if ((!rescue.all) & (!filter_loose)) { + jmessage("Resetting rescue.all to TRUE as filter.loose is FALSE") + rescue.all = TRUE + } + + continue = TRUE + this.iter = 1; + + values(ra.all)$id = seq_along(ra.all) + saveRDS(ra.all, paste(outdir, '/junctions.all.rds', sep = '')) + + last.ra = ra.all[values(ra.all)[, tfield]<3] + + jmessage('Starting JaBbA iterative with ', length(last.ra), ' junctions') + jmessage('Will progressively add junctions within ', rescue.window, 'bp of a loose end in prev iter') + jmessage('Iterating for max ', reiterate, ' iterations or until convergence (i.e. no new junctions added)') + + while (continue) { + gc() + + this.iter.dir = paste(outdir, '/iteration', this.iter, sep = '') + system(paste('mkdir -p', this.iter.dir)) + + jmessage('Starting iteration ', this.iter, ' in ', this.iter.dir, ' using ', length(last.ra), ' junctions') + + if (this.iter>1){ + kag1 = readRDS(paste0(outdir, '/iteration1/karyograph.rds')) + ploidy = kag1$ploidy + purity = kag1$purity + jmessage("Using ploidy ", ploidy, + " and purity ", purity, + " consistent with the initial iteration") + + if (lp) { + jmessage("Using segments from JaBbA output of previous iteration") + loose.ends.fn = file.path(outdir, + paste0("iteration", this.iter - 1), + "loose.end.stats.rds") + seg.fn = file.path(outdir, paste0("iteration", this.iter - 1), "jabba.simple.rds") + + seg = readRDS(seg.fn)$segstats[, c()] + seg = seg %Q% (strand(seg) == "+") + seg = gr.stripstrand(seg) + } + + } + + this.ra.file = paste(this.iter.dir, '/junctions.rds', sep = '') + saveRDS(last.ra, this.ra.file) + + jab = jabba_stub( + junctions = this.ra.file, + seg = seg, + coverage = coverage, + blacklist.coverage = blacklist.coverage, + hets = hets, + nseg = nseg, + cfield = cfield, + tfield = tfield, + nudge.balanced = as.logical(nudge.balanced), + outdir = this.iter.dir, + mc.cores = as.numeric(mc.cores), + max.threads = as.numeric(max.threads), + max.mem = as.numeric(max.mem), + max.na = max.na, + edgenudge = as.numeric(edgenudge), + tilim = as.numeric(tilim), + strict = strict, + name = name, + use.gurobi = as.logical(use.gurobi), + field = field, + epgap = epgap, +## subsample = subsample, + slack.penalty = as.numeric(slack.penalty), + loose.penalty.mode = loose.penalty.mode, + mipstart = init, + ploidy = as.numeric(ploidy), + purity = as.numeric(purity), + pp.method = pp.method, + indel = indel, + min.nbins = min.nbins, + overwrite = as.logical(overwrite), + verbose = as.numeric(verbose), + dyn.tuning = dyn.tuning, + geno = geno, + cn.signif = cn.signif, + lp = lp, + ism = ism, + fix.thres = fix.thres, + min.bins = min.bins, + drop.chr = drop.chr, + filter_loose = filter_loose) + + gc() + + jab = readRDS(paste(this.iter.dir, '/jabba.simple.rds', sep = '')) + jabr = readRDS(paste(this.iter.dir, '/jabba.raw.rds', sep = '')) + le = gr.stripstrand(jab$segstats %Q% (loose==TRUE & strand=="+")) + if (length(le)==0){ + jmessage("No more loose ends to resolve, terminating.") + break + } else if (!rescue.all){ + le = le %Q% which(passed==TRUE) + if (length(le)==0){ + jmessage("No more plausible loose ends, terminating") + break + } + } else { + jmessage("Rescuing all ", length(le), " loose ends, regardless of confidence.") + } + +## determine orientation of loose ends + le.right = le %&% gr.start(jab$segstats %Q% (loose==FALSE)) + strand(le.right) = "+" + le.left = le %&% gr.end(jab$segstats %Q% (loose==FALSE)) + strand(le.left) = "-" + le = grbind(le.right, le.left) + +## Annotate ra.all + all.input = readRDS(paste0(outdir, "/junctions.all.rds")) + all.ov = ra.overlaps(all.input, jab$junctions, pad=0, arr.ind=TRUE) + if (ncol(all.ov)==2){ + all.ov = data.table(data.frame(all.ov)) + all.ov[, this.cn := values(jab$junctions)$cn[ra2.ix]] + values(all.input)[, paste0("iteration", this.iter, ".cn")] = + all.ov[, setNames(this.cn, ra1.ix)][as.character(seq_along(all.input))] + } else { + values(all.input)[, paste0("iteration", this.iter, ".cn")] = NA + } + saveRDS(all.input, paste0(outdir, "/junctions.all.rds")) + +## junction rescue +## rescues junctions that are within rescue.window bp of a loose end +## got used, stay there +## but not loose ends overlapping an exorbitant number of junctions + le.keep = which((le %N% (stack(ra.all) + rescue.window)) < 6) + tokeep = which(values(jab$junctions)$cn>0) + new.ra.id = unique(c( + values(jab$junctions)$id[tokeep], +## near a loose ends, got another chance + values(ra.all)$id[which(grl.in(ra.all, + le[le.keep] + rescue.window, + some = T, + ignore.strand = FALSE))], +## tier 2 or higher must stay for all iterations + values(ra.all)$id[which(values(ra.all)$tier==2)] + )) + if (tfield %in% colnames(ra.all)){ + high.tier.id = values(ra.all)$id[which(as.numeric(values(ra.all)[, tfield])<3)] + new.ra.id = union(new.ra.id, high.tier.id) + } + new.ra = ra.all[which(values(ra.all)$id %in% new.ra.id)] +## new.ra = ra.all[union(values(last.ra)$id, +## values(ra.all)$id[grl.in(ra.all, le + rescue.window, some = T)])] +## new.junc.id = setdiff(new.ra.id, values(jab$junctions)$id[which(values(jab$junctions)$cn>0)]) + new.junc.id = setdiff(new.ra.id, values(jab$junctions)$id) +## num.new.junc = length(setdiff(values(new.ra)$id, values(last.ra)$id)==0) + num.new.junc = length(new.junc.id) + jcn = rep(0, nrow(jab$ab.edges)) + abix = rowSums(is.na(rbind(jab$ab.edges[, 1:2, 1])))==0 + if (any(abix)){ + jcn[abix] = jab$adj[rbind(jab$ab.edges[abix, 1:2, 1])] + } + num.used.junc = length(which(jcn>0)) + + t3 = values(new.ra)[, tfield]==3 + jmessage('Adding ', num.new.junc, + ' new junctions, including ', sum(t3), + ' tier 3 junctions, yielding ', num.used.junc, + ' used junctions and ', length(new.ra), ' total junctions\n') + + if (any(t3)){ + values(new.ra)[t3, tfield] = 2 + } + + if (num.new.junc==0 | this.iter >= reiterate) + continue = FALSE + else { + last.ra = new.ra + this.iter = this.iter + 1 + } +## keep using the initial purity ploidy values + pp1 = readRDS(paste0( + outdir, + '/iteration1/karyograph.rds.ppgrid.solutions.rds')) + purity = pp1$purity[1] + ploidy = pp1$ploidy[1] + + seg = readRDS(paste0(outdir,'/iteration1/seg.rds')) ## read from the first iteration + + + if (verbose) + { + jmessage("Setting mipstart to previous iteration's jabba graph") + } + + init = jab + + if (verbose) + { + jmessage('Using purity ', round(purity,2), ' and ploidy ', round(ploidy,2), ' across ', length(seg), ' segments used in iteration 1') + } + } + + this.iter.dir = paste(outdir, '/iteration', this.iter, sep = '') + system(sprintf('cp %s/* %s', this.iter.dir, outdir)) + jab = readRDS(paste0(outdir, "/jabba.simple.gg.rds")) + jmessage('Done Iterating') + } else { +## if all.in, convert all tier 3 to tier 2 +## if (tfield %in% colnames(values(ra.all))){ +## t3 = (values(ra.all)[, tfield] == 3) +## if (all.in & length(ra.all)>0){ +## if (any(t3)){ +## ## save every t3 except small indel +## t3.indel = which.indel(ra.all[which(t3)]) +## t3.non.indel = which(t3)[setdiff(seq_along(which(t3)), t3.indel)] +## values(ra.all)[t3.non.indel, tfield] = 2 +## t3 = values(ra.all)[, tfield] == 3 +## } +## } +## ## if not all.in, only use t2 or t1 +## ra.all = ra.all[setdiff(seq_along(ra.all), which(t3))] ## } jab = jabba_stub( junctions = ra.all, @@ -628,7 +627,8 @@ JaBbA = function(## Two required inputs ism = ism, fix.thres = fix.thres, min.bins = min.bins, - filter_loose = filter_loose) + filter_loose = filter_loose, + drop.chr = drop.chr) } if(QCout){ QCStats(inputDT=data.table(pair=name,inputdir=outdir),outdir=outdir) @@ -964,7 +964,7 @@ jabba_stub = function(junctions, # path to junction VCF file, dRanger txt file o ra = junctions if (inherits(ra, "character")) { - ra = read.junctions(junctions, geno = geno) + ra = read.junctions(junctions, geno = geno, chr.convert = drop.chr) } else if (!inherits(ra, "GRangesList")){ jerror("`ra` must be GRangesList here") } @@ -1044,14 +1044,18 @@ jabba_stub = function(junctions, # path to junction VCF file, dRanger txt file o } ## Dropping chr across the sample if (drop.chr){ + jmessage(drop.chr , " for drop.chr, dropping chr in chromosome names.") seg = gr.nochr(seg) coverage = gr.nochr(coverage) ra = gr.nochr(ra) + }else{ + jmessage(drop.chr , " for drop.chr, continuing on with chr names as is.") } ## clean up the seqlevels before moving on seg.sl = seqlengths(seg) cov.sl = seqlengths(coverage) ra.sl = seqlengths(ra) + union.sn = union(union(names(seg.sl), names(cov.sl)), names(ra.sl)) union.sl = data.table( seqnames = union.sn, From c2e8ef775f35aa92f9f7728e4d106cdb46461f34 Mon Sep 17 00:00:00 2001 From: MaxLChao <98108161+MaxLChao@users.noreply.github.com> Date: Tue, 23 May 2023 17:58:05 -0400 Subject: [PATCH 08/13] Update jba remove local changes --- jba | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/jba b/jba index e8e2456..18cc1a7 100755 --- a/jba +++ b/jba @@ -1,9 +1,6 @@ #!/usr/bin/env Rscript -require(devtools) + library(optparse) -devtools::load_all("~/git/forks/JaBbA/") -# Setting up the dev version -message("Loading Max's development version of JaBbA...") jbastr = " From 33ab1c90633932c721e0b9bc59d92dc8664f228b Mon Sep 17 00:00:00 2001 From: Tanubrata Dey Date: Fri, 6 Oct 2023 17:44:39 -0400 Subject: [PATCH 09/13] Added a fix for CPLEX and gurobi overlap check --- R/JaBbA.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/JaBbA.R b/R/JaBbA.R index b6d18a2..733027f 100755 --- a/R/JaBbA.R +++ b/R/JaBbA.R @@ -55,8 +55,13 @@ low.count=high.count=seg=chromosome=alpha_high=alpha_low=beta_high=beta_low=pred jmessage("${CPLEX_DIR}/cplex/[(include)|(lib)] do not both exist") } - library(gGnome) - gGnome:::testOptimizationFunction() + if (!requireNamespace("gurobi", quietly = TRUE)) { + jmessage("Gurobi is not installed! REMEMBER: You need to have either CPLEX or Gurobi!") + + } else { + library(gGnome) + gGnome:::testOptimizationFunction() + } invisible() } From 813e72326c6eb081ab228107256582a88c78ebe8 Mon Sep 17 00:00:00 2001 From: Tanubrata Dey Date: Wed, 11 Oct 2023 16:48:59 -0400 Subject: [PATCH 10/13] Fixing a bug where the CPLEX check was forcefully implemented even when using gurobi --- R/JaBbA.R | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/R/JaBbA.R b/R/JaBbA.R index 733027f..ad7f2a4 100755 --- a/R/JaBbA.R +++ b/R/JaBbA.R @@ -55,13 +55,28 @@ low.count=high.count=seg=chromosome=alpha_high=alpha_low=beta_high=beta_low=pred jmessage("${CPLEX_DIR}/cplex/[(include)|(lib)] do not both exist") } - if (!requireNamespace("gurobi", quietly = TRUE)) { - jmessage("Gurobi is not installed! REMEMBER: You need to have either CPLEX or Gurobi!") + if (((!file.exists(paste0(cplex.dir, "/cplex"))) & (!file.exists(paste0(cplex.dir, "/cplex/include")) || + !file.exists(paste0(cplex.dir, "/cplex/lib")))) & (requireNamespace("gurobi", quietly = TRUE))) { + jmessage("Gurobi is installed! Will use Gurobi instead of CPLEX if mentioned use.gurobi=TRUE") - } else { + } else if (((file.exists(paste0(cplex.dir, "/cplex"))) & (file.exists(paste0(cplex.dir, "/cplex/include")) || + file.exists(paste0(cplex.dir, "/cplex/lib")))) & (!requireNamespace("gurobi", quietly = TRUE))) { + + jmessage("CPLEX found, will check if gGnome is wired up with CPLEX...") library(gGnome) gGnome:::testOptimizationFunction() - } + + } else if (((file.exists(paste0(cplex.dir, "/cplex"))) & (file.exists(paste0(cplex.dir, "/cplex/include")) || + file.exists(paste0(cplex.dir, "/cplex/lib")))) & (requireNamespace("gurobi", quietly = TRUE))) { + + jmessage("Both CPLEX and Gurobi is found, will check if gGnome has CPLEX wired since by default JaBbA use CPLEX...") + library(gGnome) + gGnome:::testOptimizationFunction() + + } else { + + jmessage("Both CPLEX and Gurobi not found! REMEMBER: You need any one of these optimizers to run JaBbA") + } invisible() } From 5b5ded867f60af8af702d41ce1e99b8ef703c13d Mon Sep 17 00:00:00 2001 From: Tanubrata Dey Date: Thu, 12 Oct 2023 11:50:42 -0400 Subject: [PATCH 11/13] Made the CPLEX and gurobi check if-else statment more human readable --- R/JaBbA.R | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/R/JaBbA.R b/R/JaBbA.R index ad7f2a4..b058d09 100755 --- a/R/JaBbA.R +++ b/R/JaBbA.R @@ -54,20 +54,24 @@ low.count=high.count=seg=chromosome=alpha_high=alpha_low=beta_high=beta_low=pred !file.exists(paste0(cplex.dir, "/cplex/lib"))){ jmessage("${CPLEX_DIR}/cplex/[(include)|(lib)] do not both exist") } - - if (((!file.exists(paste0(cplex.dir, "/cplex"))) & (!file.exists(paste0(cplex.dir, "/cplex/include")) || - !file.exists(paste0(cplex.dir, "/cplex/lib")))) & (requireNamespace("gurobi", quietly = TRUE))) { + + # variable that stores checks for CPLEX directory and its lib & include folder + cplex_installation = ((file.exists(paste0(cplex.dir, "/cplex"))) & (file.exists(paste0(cplex.dir, "/cplex/include")) || + file.exists(paste0(cplex.dir, "/cplex/lib")))) + # variable that stores boolean information for Gurobi installation + gurobi_installation = (requireNamespace("gurobi", quietly = TRUE)) + + # check to verify whether gurobi or cplex or both or none. if cplex or both cplex and gurobi found, it will run cplex connection for gGnome package + if ((!cplex_installation) & (gurobi_installation)) { jmessage("Gurobi is installed! Will use Gurobi instead of CPLEX if mentioned use.gurobi=TRUE") - } else if (((file.exists(paste0(cplex.dir, "/cplex"))) & (file.exists(paste0(cplex.dir, "/cplex/include")) || - file.exists(paste0(cplex.dir, "/cplex/lib")))) & (!requireNamespace("gurobi", quietly = TRUE))) { + } else if ((cplex_installation) & (!gurobi_installation)) { jmessage("CPLEX found, will check if gGnome is wired up with CPLEX...") library(gGnome) gGnome:::testOptimizationFunction() - } else if (((file.exists(paste0(cplex.dir, "/cplex"))) & (file.exists(paste0(cplex.dir, "/cplex/include")) || - file.exists(paste0(cplex.dir, "/cplex/lib")))) & (requireNamespace("gurobi", quietly = TRUE))) { + } else if ((cplex_installation) & (gurobi_installation)) { jmessage("Both CPLEX and Gurobi is found, will check if gGnome has CPLEX wired since by default JaBbA use CPLEX...") library(gGnome) From 3ae32cc310bb2b62a3dbd9be8a1d1f35cecb7840 Mon Sep 17 00:00:00 2001 From: Tanubrata Dey Date: Thu, 12 Oct 2023 11:50:42 -0400 Subject: [PATCH 12/13] Made the CPLEX and gurobi check if-else statment more human readable --- R/JaBbA.R | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/R/JaBbA.R b/R/JaBbA.R index ad7f2a4..b058d09 100755 --- a/R/JaBbA.R +++ b/R/JaBbA.R @@ -54,20 +54,24 @@ low.count=high.count=seg=chromosome=alpha_high=alpha_low=beta_high=beta_low=pred !file.exists(paste0(cplex.dir, "/cplex/lib"))){ jmessage("${CPLEX_DIR}/cplex/[(include)|(lib)] do not both exist") } - - if (((!file.exists(paste0(cplex.dir, "/cplex"))) & (!file.exists(paste0(cplex.dir, "/cplex/include")) || - !file.exists(paste0(cplex.dir, "/cplex/lib")))) & (requireNamespace("gurobi", quietly = TRUE))) { + + # variable that stores checks for CPLEX directory and its lib & include folder + cplex_installation = ((file.exists(paste0(cplex.dir, "/cplex"))) & (file.exists(paste0(cplex.dir, "/cplex/include")) || + file.exists(paste0(cplex.dir, "/cplex/lib")))) + # variable that stores boolean information for Gurobi installation + gurobi_installation = (requireNamespace("gurobi", quietly = TRUE)) + + # check to verify whether gurobi or cplex or both or none. if cplex or both cplex and gurobi found, it will run cplex connection for gGnome package + if ((!cplex_installation) & (gurobi_installation)) { jmessage("Gurobi is installed! Will use Gurobi instead of CPLEX if mentioned use.gurobi=TRUE") - } else if (((file.exists(paste0(cplex.dir, "/cplex"))) & (file.exists(paste0(cplex.dir, "/cplex/include")) || - file.exists(paste0(cplex.dir, "/cplex/lib")))) & (!requireNamespace("gurobi", quietly = TRUE))) { + } else if ((cplex_installation) & (!gurobi_installation)) { jmessage("CPLEX found, will check if gGnome is wired up with CPLEX...") library(gGnome) gGnome:::testOptimizationFunction() - } else if (((file.exists(paste0(cplex.dir, "/cplex"))) & (file.exists(paste0(cplex.dir, "/cplex/include")) || - file.exists(paste0(cplex.dir, "/cplex/lib")))) & (requireNamespace("gurobi", quietly = TRUE))) { + } else if ((cplex_installation) & (gurobi_installation)) { jmessage("Both CPLEX and Gurobi is found, will check if gGnome has CPLEX wired since by default JaBbA use CPLEX...") library(gGnome) From f0cf34eccff4f495e5f04d21d992c806f946c455 Mon Sep 17 00:00:00 2001 From: shihabdider Date: Tue, 17 Oct 2023 15:41:30 -0400 Subject: [PATCH 13/13] refactor: Updates JaBbA executable - There were multiple JaBbA executables (one in root directory, another one in inst/) which were out-of-date. - The code for deciding which MIP optimizer to use depends on the use.gurobi flag, so it has been moved inside of the JaBbA function out of .onLoad. --- R/JaBbA.R | 73 ++++++------- inst/jba | 308 ------------------------------------------------------ jba | 43 ++++---- 3 files changed, 51 insertions(+), 373 deletions(-) delete mode 100644 inst/jba diff --git a/R/JaBbA.R b/R/JaBbA.R index 7a54956..3c5b9da 100755 --- a/R/JaBbA.R +++ b/R/JaBbA.R @@ -40,43 +40,6 @@ low.count=high.count=seg=chromosome=alpha_high=alpha_low=beta_high=beta_low=pred toset <- !(names(op.JaBbA) %in% names(op)) if(any(toset)) options(op.JaBbA[toset]) - ## test for CPLEX environment - ## either CPLEX or gurobi must be installed - cplex.dir = Sys.getenv("CPLEX_DIR") - if (!requireNamespace("gurobi", quietly = TRUE)) { - jmessage("Gurobi not installed") - } - if (is.null(cplex.dir)){ - jmessage("CPLEX_DIR environment variable not found!") - } else if (!file.exists(paste0(cplex.dir, "/cplex"))) { - jmessage("${CPLEX_DIR}/cplex not found") - } else if (!file.exists(paste0(cplex.dir, "/cplex/include")) || - !file.exists(paste0(cplex.dir, "/cplex/lib"))){ - jmessage("${CPLEX_DIR}/cplex/[(include)|(lib)] do not both exist") - } - - cplex_installed = ((file.exists(paste0(cplex.dir, "/cplex"))) & (file.exists(paste0(cplex.dir, "/cplex/include")) || - file.exists(paste0(cplex.dir, "/cplex/lib")))) - - gurobi_installed = (requireNamespace("gurobi", quietly = TRUE)) - - if ((!cplex_installed) & (gurobi_installed)) { - jmessage("Gurobi is found.") - - } else if ((cplex_installed) & (!gurobi_installed)) { - - library(gGnome) - gGnome:::testOptimizationFunction() - - } else if ((cplex_installed) & (gurobi_installed)) { - - library(gGnome) - gGnome:::testOptimizationFunction() - - } else { - - jmessage("WARNING: Both CPLEX and Gurobi not found!") - } invisible() } @@ -205,20 +168,46 @@ JaBbA = function(## Two required inputs ## check optimizer choice is appropriate if (use.gurobi) { - if (!requireNamespace("gurobi", quietly = TRUE)) { - jerror("use.gurobi is TRUE but gurobi is not installed") + gurobi.dir = Sys.getenv("GUROBI_HOME") + if (is.null(gurobi.dir)){ + jerror("GUROBI_HOME environment variable is not defined. Please + install Gurobi or set this variable to the correct installation + directory.") + } else { + if (!requireNamespace("gurobi", quietly = TRUE)) { + jerror("Gurobi R library not found, attempting to install + from package binary in $GUROBI_HOME/R...") + gurobi_pattern <- "^gurobi_" + gurobi_r_package_path <- list.files( + file.path(Sys.getenv("GUROBI_HOME"), "R"), + pattern = gurobi_pattern, full.names = TRUE + )[1] + if (!is.na(gurobi_r_package_path) && !is.null(gurobi_r_package_path)) { + install.packages(gurobi_r_package_path, repos = NULL) + } + } else { + library(gurobi) + if (!requireNamespace("gurobi", quietly = TRUE)) { + jerror("Gurobi installation was found but the gurobi R + package could not be imported.") + } + } } } else { cplex.dir = Sys.getenv("CPLEX_DIR") if (is.null(cplex.dir)){ - jerror("CPLEX_DIR environment variable not found!") + jerror("CPLEX_DIR environment variable not found.") } else if (!file.exists(paste0(cplex.dir, "/cplex"))) { - jerror("${CPLEX_DIR}/cplex not found") + jerror("${CPLEX_DIR}/cplex not found.") } else if (!file.exists(paste0(cplex.dir, "/cplex/include")) || !file.exists(paste0(cplex.dir, "/cplex/lib"))){ - jerror("${CPLEX_DIR}/cplex/[(include)|(lib)] do not both exist") + jerror("Neither of ${CPLEX_DIR}/cplex/[(include)|(lib)] exist.") + } else { + library(gGnome) + gGnome:::testOptimizationFunction() } } + ## if (is.character(ra)) ## { ## if (!file.exists(ra)) diff --git a/inst/jba b/inst/jba deleted file mode 100644 index 3bb19e7..0000000 --- a/inst/jba +++ /dev/null @@ -1,308 +0,0 @@ -#!/usr/bin/env Rscript -library(optparse) -jbastr = " - _____ ___ _ _____ -(___ ) ( _`\\ ( ) ( _ ) - | | _ _ | (_) )| |_ | (_) | - _ | | /'_` )| _ <'| '_`\\ | _ | -( )_| |( (_| || (_) )| |_) )| | | | -`\\___/'`\\__,_)(____/'(_,__/'(_) (_) - -(Junction Balance Analysis)\n" - -if (!exists("opt")) -{ - option_list = list( - make_option( - c("--j.supp"), - type = "character", - default = NULL, - help = "supplement junctions in the same format as junctions"), - make_option( - c("--blacklist.junctions"), - type = "character", - default = NULL, - help = "rearrangement junctions to be excluded from consideration"), - make_option( - c("--whitelist.junctions"), - type = "character", - default = NULL, - help = "rearrangement junctions to be forced to be incorporated"), - make_option( - c("--geno"), - action = "store_true", - default = FALSE, - help = "whether the junction has genotype information"), - make_option( - c("--indel"), - type = "character", - default = "exclude", - help = "character of the decision to 'exclude' or 'include' small(< min.nbins * coverage bin width) isolated INDEL-like events into the model. Default NULL, do nothing."), - make_option( - c("--cfield"), - type = "character", - help = "junction confidence meta data field in ra"), - make_option( - c("--tfield"), - type = "character", - default = "tier", - help = "tier confidence meta data field in ra. tiers are 1 = must use, 2 = may use, 3 = use only in iteration>1 if near loose end. Default 'tier'."), - make_option( - c("--iterate"), - type = "integer", - default = 0, - help = "the number of extra re-iterations allowed, to rescue lower confidence junctions that are near loose end. Default 0. This requires junctions to be tiered via a metadata field tfield."), - make_option( - c("--rescue.window"), - type = "numeric", - default = 1e3, - help = "window size in bp within which to look for lower confidence junctions. Default 1000."), - make_option( - c("--rescue.all"), - type = "logical", - default = FALSE, - action = "store_true", - help = "Attempt to rescue all loose ends regardless of internal quality filter" - ), - make_option( - c("--nudgebalanced"), - type = "logical", - default = TRUE, - help = "whether to attempt to add a small incentive for chains of quasi-reciprocal junctions."), - make_option( - c("--edgenudge"), - type = "numeric", - default = 0.1, - help = "numeric hyper-parameter of how much to nudge or reward aberrant junction incorporation. Default 0.1 (should be several orders of magnitude lower than average 1/sd on individual segments), a nonzero value encourages incorporation of perfectly balanced rearrangements which would be equivalently optimal with 0 copies or more copies."), - make_option( - c("--strict"), - default = FALSE, - action = "store_true", - help = "if used will only include junctions that exactly overlap segs"), - make_option( - c("--allin"), - action = "store_true", - default = FALSE, - help = "if TRUE will put all tiers in the first round of iteration"), - make_option( - c("--field"), - type = "character", - default = "ratio", - help = "name of the metadata column of coverage that contains the data. Default 'ratio' (coverage ratio between tumor and normal). If using dryclean, it is 'foreground'."), - make_option( - c("-s", "--seg"), - type = "character", - help = "Path to .rds file of GRanges object of intervals corresponding to initial segmentation (required)"), - make_option( - c("--maxna"), - type = "numeric", - default = 0.1, - help = "Any node with more NA than this fraction will be ignored"), - make_option( - c("--blacklist.coverage"), - type = "character", - default = NULL, - help = "Path to .rds, BED, TXT, containing the blacklisted regions of the reference genome"), - make_option( - c("--nseg"), - type = "character", - default = "", - help = "Path to .rds file of GRanges object of intervals corresponding to normal tissue copy number, needs to have $cn field"), - make_option( - c("--hets"), - type = "character", - default = "", - help = "Path to tab delimited hets file output of pileup with fields seqnames, start, end, alt.count.t, ref.count.t, alt.count.n, ref.count.n"), - make_option( - c("--ploidy"), - type = "character", - default = NA, - help = "Ploidy guess, can be a length 2 range"), - make_option( - c("--purity"), - type = "character", - default = NA, - help = "Purity guess, can be a length 2 range"), - make_option( - c("--ppmethod"), - type = "character", - default = "sequenza", - help = "select from 'ppgrid', 'ppurple', and 'sequenza' to infer purity and ploidy if not both given. Default, 'sequenza'."), - make_option( - c("--cnsignif"), - type = "numeric", - default = 1e-5, - help = "alpha value for CBS"), - make_option( - c("--slack"), - type = "numeric", - default = 100, - help = "Slack penalty to apply per loose end"), - make_option( - c("--linear"), - action = "store_true", - default = FALSE, - help = "if TRUE will use L1 loose end penalty"), - make_option( - c("-t", "--tilim"), - type = "integer", - default = 6000, - help = "Time limit for JaBbA MIP"), - make_option( - c("--epgap"), - type="numeric", - default = 0.01, - help = "threshold for calling convergence"), - make_option( - c("-o", "--outdir"), - type = "character", - default = "./", - help = "Directory to dump output into"), - make_option( - c("-n", "--name"), - type = "character", - default = "tumor", - help = "Sample / Individual name"), - make_option( - c("--cores"), - type = "integer", - default = 1, - help = "Number of cores for JaBBa MIP"), - ## make_option(c("--gurobi"), - ## type = 'character', - ## default = 'FALSE', - ## help = "if TRUE will use gurobi (gurobi R package must be installed) and if FALSE will use CPLEX (cplex must be installed prior to library installation)"), - make_option(c("-v", "--verbose"), - action = "store_true", - default = FALSE, - help = "verbose output") - ) - - parseobj = OptionParser(usage = paste(c("jba JUNCTIONS COVERAGE [options]", - "\tJUNCTIONS can be BND style vcf, bedpe, rds of GrangesList", - " \tCOVERAGE is a .wig, .bw, .bedgraph, .bed., .rds of a granges, or .tsv .csv /.txt file that is coercible to a GRanges", - "\tuse --field=FIELD argument so specify which column to use if specific meta field of a multi-column table"), - collapse="\n"), - option_list=option_list) - - args = tryCatch(parse_args(parseobj, positional_arguments= 2, print_help_and_exit = FALSE), - error = function(e) {message(jbastr); print_help(parseobj); NULL}) - - - opt = NULL; - if (!is.null(args)) { - opt = args$options - opt$junctions = args$args[1] - opt$coverage = args$args[2] - if (!file.exists(opt$junctions)) - { - message('Did not find junction file ', opt$junctions) - message('Warning: will be running JaBbA without junctions') - ## print_help(parseobj); stop() - opt$junctions = "" - } - if (!file.exists(opt$coverage)) - { - message('Did not find coverage file ', opt$coverage) - print_help(parseobj); stop() - } - } else { - stop("See usage.") - } -} - -suppressPackageStartupMessages( -{ - - if (!is.null(opt$seg) && is.character(opt$seg) && (is.na(opt$seg) || opt$seg == '/dev/null' || !file.exists(opt$seg))) - { - message('Did not find seg file ', opt$seg, ' setting to NULL') - opt$seg = NULL - } - - - if (!is.null(opt$j.supp) && (is.na(opt$j.supp) || opt$j.supp == '/dev/null' || !file.exists(opt$j.supp))) - { - message('Did not find j.supp file ', opt$j.supp, ' setting to NULL') - opt$j.supp = NULL - } - - if (!is.null(opt$hets) && (is.na(opt$hets) || opt$hets == '/dev/null' || !file.exists(opt$hets))) - { - message('Did not find hets file ', opt$hets, ' setting to NULL') - opt$hets = NULL - } - - if (!is.null(opt$nseg) && (is.na(opt$nseg) || opt$nseg == '/dev/null' || !file.exists(opt$nseg))) - { - message('Did not find nseg file ', opt$nseg, ' setting to NULL') - opt$nseg = NULL - } - - if (!is.null(opt$j.supp) && (is.na(opt$j.supp) || opt$j.supp == '/dev/null' || !file.exists(opt$j.supp))) - { - message('Did not find supplementary junction file ', opt$j.supp, ' setting to NULL') - opt$j.supp = NULL - } - - - library(JaBbA) - jmessage = function(..., pre = 'JaBbA'){ - message(pre, ' ', paste0(as.character(Sys.time()), ': '), ...) - } - message(jbastr) - jmessage('Located junction file ', opt$junctions) - jmessage('Located coverage file ', opt$coverage) - jmessage('Loading packages ...') - system(paste('mkdir -p', opt$outdir)) - writeLines(paste(paste("--", names(opt), " ", sapply(opt, function(x) paste(x, collapse = ",")), sep = "", collapse = " "), sep = ""), paste(opt$outdir, "cmd.args", sep = "/")) - saveRDS(opt, paste(opt$outdir, "cmd.args.rds", sep = "/")) - jab = JaBbA( - ## below are required positional arguments - junctions = opt$junctions, - coverage = opt$coverage, - ## below are junction related options - juncs.uf = opt$j.supp, - blacklist.junctions = opt$blacklist.junctions, - whitelist.junctions = opt$whitelist.junctions, - geno = opt$geno, - indel = opt$indel, - cfield = opt$cfield, - tfield = opt$tfield, - reiterate = opt$iterate, - rescue.window = opt$rescue.window, - nudge.balanced = opt$nudgebalanced, - ## TODO: thresh.balanced, 500 default hardcoded - edgenudge = opt$edgenudge, - strict = opt$strict, - all.in = opt$allin, - ## below are coverage related options - field = opt$field, - seg = opt$seg, - max.na = opt$maxna, - blacklist.coverage = opt$blacklist.coverage, - nseg = opt$nseg, - hets = opt$hets, - purity = scan(text = opt$purity, what = numeric(), sep = ",", quiet = T), - ploidy = scan(text = opt$ploidy, what = numeric(), sep = ",", quiet = T), - pp.method = opt$ppmethod, - ## TODO: min.nbins, 5 by default - cn.signif = opt$cnsignif, - ## below are optimization related options - slack = opt$slack, - loose.penalty.mode = ifelse(opt$linear, "linear", "boolean"), - tilim = opt$tilim, - epgap = opt$epgap, - ## TODO use.gurobi = opt$gurobi, - ## TODO max.threads = Inf, max.mem = 16 - ## below are general options - outdir = opt$outdir, - name = opt$name, - mc.cores = opt$cores, - verbose = 2 - ## TODO init, dyn.tuning - ) -}) - - - diff --git a/jba b/jba index 18cc1a7..37d768c 100755 --- a/jba +++ b/jba @@ -1,8 +1,5 @@ #!/usr/bin/env Rscript - library(optparse) - - jbastr = " _____ ___ _ _____ (___ ) ( _`\\ ( ) ( _ ) @@ -63,8 +60,7 @@ if (!exists("opt")) make_option( c("--rescue.all"), type = "logical", - default = FALSE, - action = "store_true", + default = TRUE, help = "Attempt to rescue all loose ends regardless of internal quality filter" ), make_option( @@ -99,7 +95,7 @@ if (!exists("opt")) make_option( c("--maxna"), type = "numeric", - default = 0.1, + default = 0.5, help = "Any node with more NA than this fraction will be ignored"), make_option( c("--blacklist.coverage"), @@ -154,7 +150,7 @@ if (!exists("opt")) make_option( c("--epgap"), type="numeric", - default = 0.01, + default = 1e-5, help = "threshold for calling convergence"), make_option( c("-o", "--outdir"), @@ -184,7 +180,7 @@ if (!exists("opt")) make_option( c("--lp"), type = "logical", - default = FALSE, + default = TRUE, help = "Whether to run optimization as LP"), make_option( c("--ism"), @@ -192,14 +188,15 @@ if (!exists("opt")) default = FALSE, help = "Add ISM constraints? (only valid if LP = TRUE)"), make_option( - c("--drop.chr"), - type = "logical", - default = TRUE, - help = "drops chr in seqlevels for inputs"), - ## make_option(c("--gurobi"), - ## type = 'character', - ## default = 'FALSE', - ## help = "if TRUE will use gurobi (gurobi R package must be installed) and if FALSE will use CPLEX (cplex must be installed prior to library installation)"), + c("--filter_loose"), + type = "logical", + default = FALSE, + help = "run filter.loose to get a breakdown of loose ends?"), + make_option( + c("--gurobi"), + type = "logical", + default = FALSE, + help = "use gurobi optimizer instead of CPLEX?"), make_option(c("-v", "--verbose"), action = "store_true", default = FALSE, @@ -212,11 +209,11 @@ if (!exists("opt")) "\tuse --field=FIELD argument so specify which column to use if specific meta field of a multi-column table"), collapse="\n"), option_list=option_list) - parse_args(parseobj, positional_arguments= 2, print_help_and_exit = FALSE) + args = tryCatch(parse_args(parseobj, positional_arguments= 2, print_help_and_exit = FALSE), error = function(e) {message(jbastr); print_help(parseobj); NULL}) - + opt = NULL; if (!is.null(args)) { opt = args$options @@ -226,7 +223,6 @@ if (!exists("opt")) { message('Did not find junction file ', opt$junctions) message('Warning: will be running JaBbA without junctions') - ## print_help(parseobj); stop() opt$junctions = "" } if (!file.exists(opt$coverage)) @@ -274,7 +270,7 @@ suppressPackageStartupMessages( } - library(JaBbA) + require(JaBbA) jmessage = function(..., pre = 'JaBbA'){ message(pre, ' ', paste0(as.character(Sys.time()), ': '), ...) } @@ -321,17 +317,18 @@ suppressPackageStartupMessages( loose.penalty.mode = ifelse(opt$linear, "linear", "boolean"), tilim = opt$tilim, epgap = opt$epgap, - ## TODO use.gurobi = opt$gurobi, + use.gurobi = opt$gurobi, ## TODO max.threads = Inf, max.mem = 16 ## below are general options + max.mem = opt$mem, outdir = opt$outdir, name = opt$name, mc.cores = opt$cores, lp = opt$lp, ism = opt$ism, verbose = 2, - drop.chr = opt$drop.chr - ## TODO init, dyn.tuning + fix.thres = opt$fix.thres, + filter_loose = opt$filter_loose ) })