From 2b0deb69c33cbb5bfbb52df9a6220226310ec382 Mon Sep 17 00:00:00 2001
From: shihabdider <shihabdider@gmail.com>
Date: Tue, 7 May 2024 14:45:42 -0400
Subject: [PATCH 1/3] refactor: fix inputs of post jabba processes

---
 bin/Events.R                               |    2 +-
 bin/Fusions.R                              |   50 +
 bin/cbsFH.R                                |    2 +-
 bin/lp_phased_balance.R                    | 2371 +++++++++++++++++++-
 bin/non_integer_balance.R                  |   40 +-
 conf/igenomes.config                       |    6 +-
 conf/modules/alleic_cn.config              |   36 -
 conf/modules/allelic_cn.config             |   70 +
 main.nf                                    |    4 +
 modules/local/allelic_cn/main.nf           |   24 +-
 modules/local/ascat/main.nf                |    2 +-
 modules/local/cbs/main.nf                  |    1 -
 modules/local/dryclean/main.nf             |   14 +-
 modules/local/fusions/main.nf              |    3 +-
 modules/local/gridss/gridss/main.nf        |    6 +-
 nextflow.config                            |   10 +-
 nextflow_schema.json                       |    8 -
 subworkflows/local/allelic_cn/main.nf      |    4 +
 subworkflows/local/bam_fragCounter/main.nf |    7 +-
 subworkflows/local/cov_dryclean/main.nf    |   21 +-
 tests/modules/local/dryclean/main.nf.test  |    5 +-
 tests/nextflow.config                      |   15 +-
 workflows/nfjabba.nf                       |  253 +--
 23 files changed, 2678 insertions(+), 276 deletions(-)
 create mode 100644 conf/modules/allelic_cn.config

diff --git a/bin/Events.R b/bin/Events.R
index 6b56dee..6bb242f 100755
--- a/bin/Events.R
+++ b/bin/Events.R
@@ -39,7 +39,7 @@ withAutoprint(
               min.span = 1e6,
               max.small = 1e4)
     newtic = tic
-    overwritefun("newtic", "tic", "gGnome")
+    # overwritefun("newtic", "tic", "gGnome")
 
 
     ## call complex events
diff --git a/bin/Fusions.R b/bin/Fusions.R
index e69de29..372ee4c 100644
--- a/bin/Fusions.R
+++ b/bin/Fusions.R
@@ -0,0 +1,50 @@
+withAutoprint(
+    {
+        library(optparse)
+        options(bitmapType = "cairo")
+
+        options(error = function() {
+            traceback(2)
+            quit("no", 1)
+        })
+        if (!exists("opt")) {
+            option_list <- list(
+                make_option(c("-i", "--id"), type = "character", help = "sample id"),
+                make_option(c("-g", "--gGraph"), type = "character", help = "an RDS file contains a gGraph or JaBbA graph with cn annotation on nodes and edges"),
+                make_option(c("-r", "--gencode"), type = "character", help = "an RDS or GTF file of GENCODE"),
+                make_option(c("-o", "--outdir"), type = "character", default = "./", help = "Directory to dump output into"),
+                make_option(c("--cores"), type = "integer", default = 1L, help = "Number of cores")
+            )
+            parseobj <- OptionParser(option_list = option_list)
+            opt <- parse_args(parseobj)
+            saveRDS(opt, paste(opt$outdir, "cmd.args.rds", sep = "/"))
+        }
+
+        library(gGnome)
+        library(gUtils)
+        library(parallel) ## needed for mc.cores
+
+        ## setDTthreads(10)
+        if (grepl(".rds$", opt$gencode)) {
+            gencode <- readRDS(as.character(opt$gencode))
+        } else {
+            gencode <- rtracklayer::import(opt$gencode)
+        }
+
+
+        ## call complex events
+        ## fus = fusions(gG(jab = opt$gGraph), gencode, verbose = TRUE, opt$cores)
+        fus <- fusions(gG(jab = opt$gGraph), gencode, verbose = TRUE, mc.cores = opt$cores)
+
+        ## update events with sample id
+        if (length(fus)) {
+            fus$set(id = opt$id)
+            fus$set(mincn = fus$eval(edge = min(cn, na.rm = TRUE)))
+        }
+
+        saveRDS(fus, paste0(opt$outdir, "/", "fusions.rds"))
+
+        quit("no", 0)
+    },
+    echo = FALSE
+)
diff --git a/bin/cbsFH.R b/bin/cbsFH.R
index e7b3ced..8a2ab2f 100644
--- a/bin/cbsFH.R
+++ b/bin/cbsFH.R
@@ -8506,7 +8506,7 @@ ppgrid = function(
   else ## only row, only go left right
       M = (NLLc < NLLcl & NLLc < NLLcr)[-c(1, nrow(NLLc)), -c(1, ncol(NLLc)), drop = FALSE]
 
-  if (length(M)>1)
+  if (length(M)>1 & any(M, na.rm=TRUE))
       {
           ix = which(M, arr.ind= T);
           if (nrow(ix)>1)
diff --git a/bin/lp_phased_balance.R b/bin/lp_phased_balance.R
index b920905..b3323e4 100644
--- a/bin/lp_phased_balance.R
+++ b/bin/lp_phased_balance.R
@@ -35,18 +35,588 @@
 
     ## updated balance? yikes haha
     library(JaBbA)
-    devtools::load_all("~/git/gGnome_ZC")
-    devtools::load_all("~/git/zitools")
+    library(zitools)
+    library(gGnome)
     library(gUtils)
     library(skitools)
     library(DNAcopy)
 
 
-    ## the following are neede for grab.hets and grab.hets.from.maf but not doing that for now..
-    ## source("~/projects/phasing/phasing_utils.R")
-    ## source("~/projects/phasing/tmp.R")
+    # utils.R has been pasted below
+    # source("~/utils.R")
 
-    source("~/utils.R")
+    ## start of utils.R
+
+    #' @name diploid2haploid
+    #' @title diploid2haploid
+    #'
+    #' @param gg (gGraph) diploid gGraph (expect cn.low and cn.high as node metadata)
+    #' @param verbose (logical) default FALSE
+    #'
+    #' @return gGraph with field allele and cn
+    diploid2haploid = function(gg, verbose = FALSE) {
+        og.nodes.gr = gg$nodes$gr[, c("cn.low", "cn.high", "var.low", "var.high", "cn", "node.id", "nhets")]
+        values(og.nodes.gr)[, "og.node.id"] = values(gg$nodes$gr)[, "node.id"]
+        names(values(og.nodes.gr))[names(values(og.nodes.gr)) == "cn"] = "cn.total"
+
+        ## prepare nodes for melted graph
+        phased.gg.nodes = c(og.nodes.gr, og.nodes.gr)
+        values(phased.gg.nodes)[, "cn"] = c(values(og.nodes.gr)[, "cn.high"], values(og.nodes.gr)[, "cn.low"])
+        values(phased.gg.nodes)[, "allele"] = c(rep("major", length(og.nodes.gr)), rep("minor", length(og.nodes.gr)))
+        values(phased.gg.nodes)[, "variance"] = c(values(og.nodes.gr)[, "var.high"], values(og.nodes.gr)[, "var.low"])
+        values(phased.gg.nodes)[, "variance"] = c(values(og.nodes.gr)[, "var.high"], values(og.nodes.gr)[, "var.low"])
+        ## bound the variance
+        values(phased.gg.nodes)[, "var.adj"] = pmax(pmax(values(phased.gg.nodes)[, "variance"], values(phased.gg.nodes)[, "cn"]), 1)
+
+        ## compute the weight
+        values(phased.gg.nodes)[, "weight"] = values(phased.gg.nodes)[, "nhets"] / values(phased.gg.nodes)[, "var.adj"]
+
+        ## prepare edges for melted graph
+        phased.gg.edges = rbind(
+            gg$edges$dt[, .(n1, n2, n1.side, n2.side, type,
+                og.edge.id = edge.id,
+                n1.allele = "major",
+                n2.allele = "major")],
+            gg$edges$dt[, .(n1 = n1 + length(og.nodes.gr), n2 = n2 + length(og.nodes.gr), type,
+                n1.side, n2.side,
+                og.edge.id = edge.id,
+                n1.allele = "minor",
+                n2.allele = "minor")],
+            gg$edges$dt[, .(n1, n2 = n2 + length(og.nodes.gr), type,
+                n1.side, n2.side,
+                og.edge.id = edge.id,
+                n1.allele = "major",
+                n2.allele = "minor")],
+            gg$edges$dt[, .(n1 = n1 + length(og.nodes.gr), n2, type,
+                n1.side, n2.side,
+                og.edge.id = edge.id,
+                n1.allele = "minor",
+                n2.allele = "major")]
+        )
+
+        ## add n1/n2 chromosome information
+        phased.gg.edges[, ":="(n1.chr = seqnames(phased.gg.nodes)[n1] %>% as.character,
+            n2.chr = seqnames(phased.gg.nodes)[n2] %>% as.character)]
+
+        ## add edge connection type (straight/cross)
+        phased.gg.edges[n1.chr == n2.chr & n1.allele == n2.allele, connection := "straight"]
+        phased.gg.edges[n1.chr == n2.chr & n1.allele != n2.allele, connection := "cross"]
+
+        phased.gg = gG(nodes = phased.gg.nodes, edges = phased.gg.edges)
+
+        ref.edge.col = alpha("blue", 0.3)
+        alt.edge.col = alpha("red", 0.3)
+        ref.edge.lwd = 0.5
+        alt.edge.lwd = 1.0
+        phased.gg$edges$mark(col = ifelse(phased.gg$edges$dt$type == "REF", ref.edge.col, alt.edge.col),
+            lwd = ifelse(phased.gg$edges$dt$type == "REF", ref.edge.lwd, alt.edge.lwd))
+
+        major.node.col = alpha("red", 0.5)
+        minor.node.col = alpha("blue", 0.5)
+        phased.gg$nodes$mark(col = ifelse(phased.gg$nodes$dt$allele == "major", major.node.col, minor.node.col),
+            ywid = 0.8)
+
+        return(phased.gg)
+    }
+
+    ####################
+    #' @name jabba.alleles2
+    #' @title jabba.alleles2
+    #' @rdname internal
+    #' jabba.alleles
+    #'
+    #' @description
+    #' Populates allelic value s for JaBbA object.  This does not explicitly impose junction balance constraints on alleles, but rather just computes
+    #' the maximum likelihood estimate given allelic counts and the inferred total copy number on a given segment according to JaBbA
+    #'
+    #' @param jab JaBbA object
+    #' @param het.sites GRanges with meta data fields (see below) for alt and rref count
+    #' @param alt.count.field character specifying alt.count meta data field in input het.sites (default $alt)
+    #' @param ref.count.field character specifying ref.count meta data field in input het.sites (default $ref)
+    #' @param split.ab logical flag whether to split aberrant segmetns (segmentss with ab edge entering or leaving prior to computing allelic states (default FALSE)
+    #' @param uncoupled logical flag whether to not collapse segments after inferring MLE estimate (default FALSE), if FALSE will try to merge adjacent segments and populate allele-specific junctions with copy numbers on the basis of the MLE fit on individual allelic segments
+    #' @param conservative if TRUE then will leave certain allelic segments "unphased" if one cannot sync the high / low interval state with the incoming and / or outgoing junction state
+    #' @param marginal fix marginal? default TRUE
+    #' @param verbose logical flag
+    #' @return
+    #' list with following fields:
+    #' $segstats = GRanges of input segments with $cn.high and $cn.low segments populated
+    #' $asegstats = GRanges of allelic segments (length is 2*length(segstats)) with high and low segments each having $cn, this is a "melted" segstats GRAnges
+    #' $agtrack = gTrack of allelic segments and supporting input het.sites
+    #' $aadj = allelic adjacency matrix of allele specific junctions
+    #' $ab.ix = indices of aberrant edges in $aadj
+    #' $ref.ix = indices of reference edges in $aadj
+    ############################################
+    jabba.alleles2 = function(jab,
+                              het.sites, ## granges with meta data fields for alt.count and
+                              alt.count.field = 'alt',
+                              ref.count.field = 'ref',
+                              baf.field = 'baf.t',
+                              split.ab = F, ## if split.ab == T, then will split across any "aberrant" segment (i.e. segment with ab edge entering or leaving prior to computing allelic states (note: this might create gaps)
+                              uncoupled = FALSE, ## if uncoupled, we just assign each high low allele the MLE conditioning on the total copy number
+                              conservative = FALSE, ## if TRUE then will leave certain allelic segments "unphased" if one cannot sync the high / low interval state with the incoming and / or outgoing junction state
+                              marginal = TRUE,
+                              verbose = F
+                              )
+    {
+        if (!all(c(alt.count.field, ref.count.field) %in% names(values(het.sites)))){
+            jwarning('count fields not found in meta data of het.sites input, trying BAF...')
+            if (!(baf.field %in% names(values(het.sites))))
+            jerror('BAF field not found in meta data of het.sites input either!')
+            else{
+                ## outputs are re.seg$low and re.seg$high
+                ## test deviations of observed BAF from expected by beta distribution
+                if (verbose)
+                message('Processing', length(het.sites),
+                    'het sites using fields', baf.field, '\n')
+
+            }
+        } else {
+            ## jerror('count fields not found in meta data of het.sites input')
+
+            if (verbose)
+            {
+                message('Processing ', length(het.sites), ' het sites using fields ', alt.count.field, ' and ', ref.count.field)
+            }
+
+            het.sites$low.count = pmin(values(het.sites)[, alt.count.field], values(het.sites)[, ref.count.field])
+            het.sites$high.count = pmax(values(het.sites)[, alt.count.field], values(het.sites)[, ref.count.field])
+
+            het.sites = het.sites[!is.na(het.sites$low.count) & !is.na(het.sites$high.count)]
+
+            ## stretch out het sites
+            bin.gaps = gaps(het.sites)
+            bin.gaps = bin.gaps %Q% (strand(bin.gaps) == "*")
+            bin.gaps = resize(bin.gaps, width = width(bin.gaps) + 1, fix = "start")
+            values(bin.gaps)[, "low.count"] = gr.val(query = bin.gaps[, c()],
+                target = het.sites,
+                val = "low.count",
+                mean = TRUE,
+                na.rm = TRUE)$low.count
+            values(bin.gaps)[, "high.count"] = gr.val(query = bin.gaps[, c()],
+                target = het.sites,
+                val = "high.count",
+                mean = TRUE,
+                na.rm = TRUE)$high.count
+
+            het.sites = bin.gaps ## use these stretched out sites
+
+            het.sites = het.sites[!is.na(het.sites$low.count) & !is.na(het.sites$high.count)]
+
+            ss.p = jab$segstats[ as.logical( strand(jab$segstats)=='+' ) ]
+
+            ## find the reference junctions
+            ord.ix = order(jab$segstats)
+            rev.ix = as.logical(strand(jab$segstats[ord.ix]) == '-')
+            ord.ix = c(ord.ix[!rev.ix], rev(ord.ix[rev.ix]))
+
+            ref.jun = cbind(ord.ix[-length(ord.ix)], ord.ix[-1])
+            ref.jun = ref.jun[which(jab$adj[ref.jun]>0), ]
+
+            has.ab.rand = 0
+            if (split.ab)
+            {
+                ab.adj = jab$adj
+                ab.adj[ref.jun] = 0
+                has.ab = as.numeric(Matrix::rowSums(ab.adj!=0)!=0 | Matrix::colSums(ab.adj!=0)!=0)[which( as.logical( strand(jab$segstats)=='+')) ]
+                has.ab.rand = runif(length(ss.p)) * 1e-6 * has.ab
+            }
+
+            ss.p = ss.p[!is.na(ss.p$cn)]
+            ## browser()
+            #' zchoo Wednesday, Apr 27, 2022 08:34:21 AM
+            ## don't do this since this merges ranges with the same score!
+            ## re.seg = as(coverage(ss.p, weight = ss.p$cn + has.ab.rand), 'GRanges')
+            re.seg = ss.p[, "cn"]
+            ## re.seg$cn = round(re.seg$score)
+
+            het.sites$ix = gr.match(het.sites, re.seg)
+
+            if (verbose)
+            {
+                message('Computed high / low counts and matched to segs')
+            }
+
+
+            highs = split(het.sites$high.count, het.sites$ix)[as.character(seq_along(re.seg))]
+            lows = split(het.sites$low.count, het.sites$ix)[as.character(seq_along(re.seg))]
+
+            het.sites$cn = re.seg$cn[het.sites$ix]
+            purity = jab$purity
+            ploidy = mean(het.sites$cn, na.rm = T) ## ploidy may be slightly different from "global ploidy" depending on the distribution of sites
+
+            sw = length(het.sites)
+            total = sum(as.numeric(c(het.sites$high.count, het.sites$low.count)))
+
+            cn = re.seg$cn
+            ## gamma = 2*(1-purity)/purity  ## gammas and betas need to be recomputed for
+            ## beta = (2*(1-purity)*sw + purity*ploidy*sw) / (purity * total)
+            gamma = 1*(1-purity)/purity  ## gammas and betas need to be recomputed for  (1 since we are looking at het alleles)
+            beta = (1*(1-purity)*sw + purity*ploidy*sw) / (purity * total)
+            centers = (0:(max(cn)+1) + gamma)/beta
+
+            if (verbose)
+            {
+                message('Computed SNP ploidy and allelic copy centers')
+            }
+
+            ## now test deviation from each absolute copy combo using poisson model
+            ## i.e. counts ~ poisson(expected mean)
+            ##
+            re.seg.tmp = lapply(seq_along(re.seg), function(i)
+                {
+                    ##        if (verbose)
+                    ##          cat('.')
+                    x = lows[[i]]
+                    if (length(x)==0)
+                    return(list(low = NA, high = NA))
+                    y = highs[[i]]
+                    tot.cn = cn[i]
+                    ## allow for +/- errors
+                    ## use negative binomial??
+                    ## ll = sapply(0:(floor(tot.cn/2)), function(j) sum(pnbinom(x, mu = centers[j+1],
+                    ##                                                          size = 0,##centers[j+1] / 2,
+                    ##                                                          log.p = T) +
+                    ##                                                  pnbinom(y, mu = centers[tot.cn - j + 1],
+                    ##                                                          size = centers[tot.cn - j + 1],
+                    ##                                                          log.p = T)))
+                    ll = sapply(0:(floor(tot.cn/2)), function(j) sum(ppois(x,centers[j+1], log.p = T) +
+                        ppois(y,centers[tot.cn-j+1],log.p = T)))
+                    ll = ll - min(ll)
+                    curr.best = max(ll)
+                    curr.cn = which.max(ll) - 1
+                    curr.hcn = tot.cn - curr.cn
+                    if (!marginal) {
+                        if (tot.cn > 1) {
+                            ## ll = sapply(0:(floor(tot.cn/2)), function(j) sum(pnbinom(x, mu = centers[j+1],
+                            ##                                                          size = centers[j+1] / 2,
+                            ##                                                          log.p = T) +
+                            ##                                                  pnbinom(y, mu = centers[tot.cn - j],
+                            ##                                                          size = centers[tot.cn - j] / 2,
+                            ##                                                          log.p = T)))
+                            ll = sapply(0:(floor((tot.cn - 1)/2)), function(j) sum(ppois(x,centers[j+1], log.p = T) +
+                                ppois(y,centers[tot.cn-j],log.p = T)))
+                            ll = ll - min(ll)
+                            if (max(ll) > curr.best) {
+                                curr.best = max(ll)
+                                curr.cn = which.max(ll) - 1
+                                curr.hcn = tot.cn - curr.cn - 1
+                            }
+                        }
+                        ## ll = sapply(0:(floor(tot.cn/2)), function(j) sum(pnbinom(x, mu = centers[j+1],
+                        ##                                                          size = centers[j+1],
+                        ##                                                          log.p = T) +
+                        ##                                                  pnbinom(y, mu = centers[tot.cn - j + 2],
+                        ##                                                          size = centers[tot.cn - j + 2],
+                        ##                                                          log.p = T)))
+                        ll = sapply(0:(floor((tot.cn + 1)/2)), function(j) sum(ppois(x,centers[j+1], log.p = T) +
+                            ppois(y,centers[tot.cn-j+2],log.p = T)))
+                        ll = ll - min(ll)
+                        if (max(ll) > curr.best) {
+                            curr.best = max(ll)
+                            curr.cn = which.max(ll) - 1
+                            curr.hcn = tot.cn - curr.cn + 1
+                        }
+                    }
+                    ##return(list(low = curr.cn, high = tot.cn - curr.cn))
+                    return(list(low = curr.cn, high = curr.hcn))
+                })
+
+            re.seg$low = lapply(re.seg.tmp, function(x) {x$low}) %>% unlist
+            re.seg$high = lapply(re.seg.tmp, function(x) {x$high}) %>% unlist
+        }
+        ## #########################################################################
+        ## borderline, below are common to both methods
+        ## no need to round and NA segments are fine
+        jab$segstats$cn.low = gr.val(jab$segstats, re.seg, 'low', na.rm = TRUE)$low
+        jab$segstats$cn.high = gr.val(jab$segstats, re.seg, 'high', na.rm = TRUE)$high
+
+        ## also get number of hets per segment
+        jab$segstats$nhets = jab$segstats %N% het.sites
+
+        ## and the variance
+        jab$segstats$var.low = gr.val(query = jab$segstats,
+            target = het.sites,
+            val = 'low.count',
+            na.rm = TRUE,
+            FUN = function(x, w, na.rm) {var(x, na.rm = na.rm)})$low.count
+
+        jab$segstats$var.high = gr.val(query = jab$segstats,
+            target = het.sites,
+            val = 'high.count',
+            na.rm = TRUE,
+            FUN = function(x, w, na.rm) {var(x, na.rm = na.rm)})$high.count
+
+        ## NA-out some nodes
+        na.ix = (!gr.val(jab$segstats, re.seg, 'low', FUN = function(x,w,na.rm) any(!is.na(x)))$low) |
+        (!gr.val(jab$segstats, re.seg, 'high', FUN = function(x,w,na.rm) any(!is.na(x)))$high)
+        jab$segstats$cn.low[na.ix] = jab$segstats$cn.high[na.ix] = NA
+        jab$segstats$var.low[na.ix] = jab$segstats$var.high[na.ix] = NA
+        jab$segstats$nhets[na.ix] = 0
+
+        ## ## ###########
+        ## ## phasing
+        ## ## ###########
+
+        ## ## iterate through all reference junctions and apply (wishful thinking) heuristic
+        ## ##
+        ## ## populate n x n x 2 adjacency matrix, which we will later expand to a bigger matrix
+        ## adj.ab = jab$adj
+        ## adj.ab[ref.jun] = 0
+        ## adj.ref = jab$adj*0
+        ## adj.ref[ref.jun] = jab$adj[ref.jun]
+        ## high = low = jab$segstats[, c()]
+        ## high$cn = jab$segstats$cn.high
+        ## low$cn = jab$segstats$cn.low
+        ## high$parent = low$parent = seq_along(jab$segstats)
+        ## high$type = 'high'
+        ## low$type = 'low'
+        ## high$id = seq_along(jab$segstats)
+        ## low$id = length(jab$segstats) + seq_along(jab$segstats)
+        ## asegstats = c(high, low)
+        ## amap = cbind(high$id, low$id) ## maps segstats id x allele combos to asegstats id
+
+        ## aadj = sparseMatrix(1, 1, x = 0, dims = c(length(asegstats), length(asegstats)))
+
+        ## .flip = function(x) x %% 2+1
+
+        ## asegstats = c(high, low)
+        ## acn = cbind(high$cn, low$cn)
+
+        ## phased.out = phased.in = rep(TRUE, length(asegstats))
+
+        ## str = strand(asegstats)
+
+        ## if (verbose)
+        ##     message('Starting phasing ')
+
+        ## for (k in 1:nrow(ref.jun))
+        ## {
+        ##     i = ref.jun[k, 1]
+        ##     j = ref.jun[k, 2]
+        ##     a = acn[ref.jun[k,1],]
+        ##     b = acn[ref.jun[k,2],]
+
+        ##     phased.out[amap[i, ]] = FALSE
+        ##     phased.in[amap[j, ]] = FALSE
+
+        ##     pairs.ij = cbind(rep(c(1:2), 2), rep(c(1:2), each = 2)) ## 4 possible matches
+        ##     m = setdiff(which(a[pairs.ij[,1]] == b[pairs.ij[,2]]), NA)
+
+        ##     if (!(length(m) %in% c(0, 4))) ## 1,2, and 3 matches are fine (3 matches occur if one interval is in allelic balance, and the other not
+        ##     {
+        ##         if (length(m)==2) ## pick the phase that the alleles can handle
+        ##             m = rev(m[order(as.numeric(sum(adj.ab[i, ])<=a[pairs.ij[m,1]]) + as.numeric(sum(adj.ab[, j])<=b[pairs.ij[m,2]]))])
+
+        ##         m.ij = pairs.ij[m[1], ]
+        ##         fm.ij = .flip(m.ij)
+        ##         aadj[amap[i, m.ij[1]], amap[j, m.ij[2]]] = min(a[m.ij[1]], jab$adj[i, j])
+        ##         aadj[amap[i, fm.ij[1]], amap[j, fm.ij[2]]] = jab$adj[i, j] - aadj[amap[i, m.ij[1]], amap[j, m.ij[2]]]
+
+        ##         phased.out[amap[i, ]] = TRUE
+        ##         phased.in[amap[j, ]] = TRUE
+
+        ##         if (length(a.ab <- Matrix::which(adj.ab[i,]!=0))>0)
+        ##         {
+        ##             ## if a.ab (partner) is already phased then unpopulate the non-ab allelic junction, otherwise populate both alleles of partner
+        ##             ## BUG: a.ab is length 2????
+        ##             ## hack: replace a.ab with a.ab[1]
+        ##             if (any(ph <- aadj[amap[i, fm.ij[1]], amap[a.ab[1], ]] !=0))
+        ##             {
+        ##                 aadj[amap[i, fm.ij[1]], amap[a.ab[1], ph]] = adj.ab[i, a.ab[1]]
+        ##                 aadj[amap[i, m.ij[1]], amap[a.ab[1], ph]] = 0
+        ##             }
+        ##             else
+        ##                 ## otherwise diffuse copy into both alleles of the partner (will be resolved when we resolve phase for the partner interval)
+        ##                 ## or collapse unphased nodes back
+        ##                 aadj[amap[i, fm.ij[1]], amap[a.ab[1], ]] = adj.ab[i, a.ab[1]]/2
+
+        ##             if (!conservative)
+        ##                 if (a[fm.ij[1]] < adj.ab[i, a.ab]) # if the allelic node can't handle the outgoing allelic edge flux, so unphase
+        ##                     phased.out[amap[i, ]] = FALSE
+        ##         }
+
+        ##         if (length(b.ab <- Matrix::which(adj.ab[,j]!=0))>0)
+        ##         {
+        ##             ## if b.ab (partner) is already phased then concentrate all of the junction copy into the aberrant allele of this interval
+        ##             ## BUG: why b.ab is length 2???? I thought we resolved this long ago
+        ##             ## hack: replace a.ab with a.ab[1]
+        ##             if (any(ph <- aadj[amap[b.ab[1], ], amap[j, fm.ij[2]]] !=0))
+        ##             {
+        ##                 aadj[amap[b.ab[1], ph], amap[j, fm.ij[2]]] = adj.ab[b.ab[1], j]
+        ##                 aadj[amap[b.ab[1], ph], amap[j, m.ij[2]]] = 0
+        ##             }
+        ##             else
+        ##                 ## otherwise diffuse copy into both alleles of the partner (will be resolved when we resolve phase for the partner interval)
+        ##                 ## or collapse unphased nodes back
+        ##                 aadj[amap[b.ab[1],], amap[j, fm.ij[2]]] = adj.ab[b.ab[1], j]/2
+
+        ##             if (!conservative)
+        ##                 if (b[fm.ij[2]] < adj.ab[b.ab, j]) # the allelic node cn can't handle the incoming allelic edge flux, so unphase
+        ##                     phased.in[amap[j, ]] = FALSE
+        ##         }
+        ##     }
+        ## }
+
+        ## if (verbose)
+        ##     message('Finished phasing, finalizing.')
+
+        ## asegstats$phased.in = phased.in
+        ## asegstats$phased.out = phased.out
+
+        ## if (uncoupled)
+        ##     unphased = rep(FALSE, length(asegstats))
+        ## else
+        ##     unphased = !asegstats$phased.out | !asegstats$phased.in
+
+        ## unphased.parents = unique(asegstats$parent[unphased])
+        ## aadj.unphunph = jab$adj[unphased.parents, unphased.parents]
+        ## aadj.phph = aadj[!unphased, !unphased]
+
+        ## asegstats$new.ind = NA
+        ## asegstats$new.ind[!unphased] = 1:sum(!unphased)
+        ## asegstats$new.ind[unphased] = as.integer(factor(asegstats$parent[unphased], unphased.parents))
+        ## mat.collapse = sparseMatrix(which(unphased), asegstats$new.ind[unphased], x = 1, dims = c(nrow(aadj), length(unphased.parents)))
+
+        ## aadj.phunph = aadj[!unphased, ] %*% mat.collapse
+        ## aadj.unphph = t(mat.collapse) %*% aadj[, which(!unphased)]
+
+        ## aadj.final = rbind(
+        ##     cbind(aadj.phph, aadj.phunph),
+        ##     cbind(aadj.unphph, aadj.unphunph)
+        ## )
+
+        ## asegstats.unphased = asegstats[match(unphased.parents, asegstats$parent)]
+        ## asegstats.unphased$cn = jab$segstats$cn[asegstats.unphased$parent]
+        ## asegstats.final = c(asegstats[!unphased], asegstats.unphased)
+        ## asegstats.final$phased = c(rep(T, sum(!unphased)), rep(F, length(asegstats.unphased)))
+        ## asegstats.final$type[!asegstats.final$phased] = 'total'
+
+        ## tmp.str = gr.string(gr.stripstrand(asegstats), mb = F, other.cols = 'type');
+        ## asegstats$tile.id = as.integer(factor(tmp.str, unique(tmp.str)))
+
+        ## ix = order(asegstats.final)
+        ## asegstats.final = asegstats.final[ix]
+        ## aadj.final = aadj.final[ix, ix]
+
+        ## if (verbose)
+        ##     message('Annotating allelic vertices')
+
+        ## tmp.string = gr.string(asegstats, mb = F, other.cols = 'type'); tmp.string2 = gr.string(gr.flipstrand(asegstats), mb = F, other.cols = 'type')
+        ## asegstats$flip.ix = match(tmp.string, tmp.string2)
+        ## asegstats$phased = !unphased
+
+        ## asegstats.final$edges.in = sapply(seq_along(asegstats.final),
+        ##                                   function(x) {ix = Matrix::which(aadj.final[,x]!=0); paste(ix, '(', aadj.final[ix,x], ')', '->', sep = '', collapse = ',')})
+        ## asegstats.final$edges.out = sapply(seq_along(asegstats.final),
+        ##                                    function(x) {ix = Matrix::which(aadj.final[x, ]!=0); paste('->', ix, '(', aadj.final[x,ix], ')', sep = '', collapse = ',')})
+
+        ## asegstats.final$slack.in = asegstats.final$cn - Matrix::colSums(aadj.final)
+        ## asegstats.final$slack.out = asegstats.final$cn - Matrix::rowSums(aadj.final)
+
+        ## asegstats.final$new.ind = asegstats.final$phased.out = asegstats.final$phased.in = asegstats.final$id = NULL
+        ## asegstats.final$tile.id = as.integer(factor(gr.string(gr.stripstrand(asegstats.final), mb = F, other.cols = 'type')))
+
+        ## m = sparseMatrix(seq_along(asegstats.final), asegstats.final$parent, x = 1);
+
+        ## hh = rep(het.sites[, c()], 2)
+        ## hh$count = c(het.sites$high.count, het.sites$low.count)
+        ## hh$type = rep(c('high', 'low'), each = length(het.sites))
+
+        ## hh$ywid = 0.5
+        ## atd = c(
+        ##     gTrack(hh, angle = 0, y.field = 'count', y0 = 0,
+        ##            colormaps = list(type = c('high' = alpha('red', 0.3), 'low' = alpha('blue', 0.3))), name = 'hets', y.quantile = 0.001, lwd.border = 2),
+        ##     gTrack(asegstats.final, angle = 0, y.field = 'cn', y0 = 0,
+        ##            colormaps = list(type = c('high' = alpha('red', 0.3), 'low' = alpha('blue', 0.3), 'total' = alpha('purple', 0.3))), name = 'alleles')
+        ## )
+
+        ## out = list(
+        ##     segstats = jab$segstats,
+        ##     asegstats = asegstats.final,
+        ##     atd = atd,
+        ##     agtrack = atd,
+        ##     aadj = aadj.final,
+        ##     ab.ix = Matrix::which((m %*% adj.ab %*% t(m))!=0, arr.ind = T),
+        ##     ref.ix = Matrix::which((m %*% adj.ref %*% t(m))!=0, arr.ind = T)
+        ## )
+
+        return(jab)
+    }
+
+    #' @name grab.hets
+    #' @title grab.hets
+    #'
+    #' @description
+    #'
+    #' returns allele gtrack given sites.txt from het pileup
+    #'
+    #' @param agt.fname (character) path to sites.txt
+    #' @param min.frac (numeric) between 0 and 1, min frequency in normal to count as het site
+    #' @param max.frac (numeric) between 0 and 1, max frequency in normal to count as het site
+    #'
+    #' @return allele gTrack
+    grab.hets = function(agt.fname = NULL,
+    min.frac = 0.2,
+    max.frac = 0.8)
+    {
+        if (is.null(agt.fname) || !file.exists(agt.fname)) {
+            stop("agt.fname does not exist")
+        }
+
+        ## prepare and filter
+        agt.dt = fread(agt.fname)[alt.frac.n > min.frac & alt.frac.n < max.frac,]
+        ## add major and minor
+        agt.dt[, which.major := ifelse(alt.count.t > ref.count.t, "alt", "ref")]
+        agt.dt[, major.count := ifelse(which.major == "alt", alt.count.t, ref.count.t)]
+        agt.dt[, minor.count := ifelse(which.major == "alt", ref.count.t, alt.count.t)]
+
+        ## melt the data frame
+        agt.melted = rbind(agt.dt[, .(seqnames, start, end, count = major.count, allele = "major")],
+            agt.dt[, .(seqnames, start, end, count = minor.count, allele = "minor")]
+        )
+
+        ## make GRanges
+        agt.gr = dt2gr(agt.melted[, .(seqnames, start, end, count, allele)])
+
+        return (agt.gr)
+    }
+
+    #' @name grab.hets.from.maf
+    #' @title grab.hets.from.maf
+    #'
+    #' @description
+    #'
+    #' get hets into format needed by phased.binstats from HMF maf_approx
+    #'
+    #' @param agt.fname (character)
+    #' @param min.frac (numeric)
+    #'
+    #' @param GRanges
+    grab.hets.from.maf = function(agt.fname, min.frac = 0.2) {
+
+        if (!file.exists(agt.fname)) {
+            stop("invalid file")
+        }
+
+        gr = readRDS(agt.fname)
+
+        if (!inherits(gr, "GRanges")) {
+            stop("rds file must contain GRanges object.")
+        }
+
+        dt = as.data.table(gr)
+        dt[, major.count := ifelse(alt.count.t > ref.count.t, alt.count.t, ref.count.t)]
+        dt[, minor.count := ifelse(alt.count.t > ref.count.t, ref.count.t, alt.count.t)]
+
+        ## out.dt = rbind(dt[, .(seqnames, start, end, count = major.count, allele = "major")],
+        ##                dt[, .(seqnames, start, end, count = minor.count, allele = "minor")])
+
+        ## return(dt2gr(out.dt))
+        return(dt)
+    }
+
+    ## end of utils.R
 
     message("Reading input!")
 
@@ -266,6 +836,1795 @@
 
     message("Starting balance")
 
+    # override gGnome balance with zc_dev gGnome balance function)
+
+    #' @name balance
+    #' @title balance gGnome graphs
+    #' @description
+    #'
+    #' Here we analyze gGraphs with "cn" (copy number) field to enforce integer
+    #' cn and junction balance, ie sum of incoming (or outgoing) edge
+    #' cn should be equal to node copy cn.
+    #'
+    #' The goal is to find a balaned assignment of "cn" to the nodes and edges of the gGraph
+    #' that maximally resemble the input weights while minimizing the loose end penalty.
+    #' The similarity / distance function can be weighted by optional node / edge
+    #' metadata field $weight (when weighted = TRUE).
+    #'
+    #' To output this gGraph, we design a MIP with
+    #' (1) objective function that minimizes (weighted sum of square) distance of fit node and junction copy number to provided values in
+    #'     $cn field
+    #' (2) and lambda* the sum of copy number at non terminal loose ends subject to
+    #' (3) junction balance constraint
+    #' (4) fixing copy number of a subset of nodes and junctions
+    #'
+    #' Objective weight can be modulated at nodes and edges with $weight metadata
+    #' field (default node weight is node width, and edge weight is 1).
+    #' These fields will then set the penalty incurred to a fit of x to that node / edge
+    #' with copy number c and weight w as (x-c)^2/w.
+    #'
+    #' Lambda can be modulated at nodes with $lambda node metadata field (default 1)
+    #'
+    #' For "haplographs" ie graphs that have more than one node overlapping a given location, it may
+    #' be important to constrain total copy number using a haploid read depth signal.
+    #' The marginal parameter enables this through a GRanges annotated with $cn and optionally $weight
+    #' field that provides a target total copy number that the optimization will attempt to satisfy.
+    #' This provided copy number c and weight w (default 1) will be evaluated against the
+    #' sum s of the fit copy numbers of all nodes overlapping that location by adding a penalty
+    #' of (c-s)^2/w to the corresponding solution. marginal can also have an optional logical field
+    #' $fix that will actually constrain the marginal copy number to be equal to the provided value
+    #' (note: that the optimization may be infeasible, and function will error out)
+    #'
+    #' Additional controls can be inputted by changing the graph metadata - e.g. adding fields
+    #' $lb and $ub to nodes and edges will constrain their fit copy number to those bounds.
+    #' Adding $reward field to edges will add a reward for each copy of that edge in the solution.
+    #'
+    #'
+    #' @param gg gGraph with field $cn, can be NA for some nodes and edges, optional field $weight which will adjust the quadratic penalty on the fit to x as (x-$cn)^2/weight
+    #' @param lambda positive number specifying loose end penalty, note if gg$node metadata contain $lambda field then this lambda will be multiplied by the node level lambda (default 10)
+    #' @param marginal GRanges with field $cn and optional $weight field will be used to fit the summed values at each base of the genome to optimally fit the marginal value, optional field $fix will actually constrain the marginal to be the provided value
+    #' @param emarginal Junctions object with marginal CN in the $cn field (and optionally $weight in the weight field). optional field $fix will actually constrain the marginal to be the provided value.
+    #' @param tight indices or epxression on node metadata specifying at which nodes to disallow loose ensd
+    #' @param nfix indices or expression on node metadata specifying which node cn to fix
+    #' @param efix indices or expression on edge metadata specifying which edge cn to fix
+    #' @param nrelax indices or expression on node metadata specifying which nodes cn to relax
+    #' @param erelax  indices or expression on edge metadata specifying which edges cn to relax
+    #' @param L0  flag whether to apply loose end penalty as L1 (TRUE)
+    #' @param loose.collapse (parameter only relevant if L0 = TRUE) will count all unique (by coordinate) instances of loose ends in the graph as the loose end penalty, rather than each instance alone ... useful for fitting a metagenome graph   (FALSE)
+    #' @param phased (logical) indicates whether to run phased/unphased. default = FALSE
+    #' @param ism  (logical) additional ISM constraints (FALSE)
+    #' @param force.major (logical) force major allele CN to be >= minor allele CN (default FALSE)
+    #' @param force.alt (logical) force incorporation of ALT edges, only applicable for phasing (default TRUE)
+    #' @param cnloh (logical) allow CN LOH? only relevant if phasing = TRUE. default FALSE.
+    #' @param lp (logical) solve as linear program using abs value (default TRUE)
+    #' @param M  (numeric) big M constraint for L0 norm loose end penalty (default 1e3)
+    #' @param verbose (integer)scalar specifying whether to do verbose output, value 2 will spit out MIP (1)
+    #' @param tilim (numeric) time limit on MIP in seconds (10)
+    #' @param epgap (numeric) relative optimality gap threshhold between 0 and 1 (default 1e-3)
+
+    #' @param trelim (numeric) max size of uncompressed tree in MB (default 32e3)
+    #' @param nodefileind (numeric) one of 0 (no node file) 1 (in memory compressed) 2 (on disk uncompressed) 3 (on disk compressed) default 1
+    #' @param debug (logical) returns list with names gg and sol. sol contains full RCPLEX solution. (default FALSE)
+    #' @param force.haplotypes (logical) default TRUE
+    #' @param max.span (numeric) the maximum span of an edge below which both endpoints must be on the same parental haplotype default 1e9
+    #' @param use.gurobi (logical) use gurobi optimizer? if TRUE uses gurobi instead of cplex. default FALSE.
+    #' @param nonintegral (logical) run without integer constraints on REF edges and nodes? default FALSE.
+    #'
+    #' @return balanced gGraph maximally resembling input gg in CN while minimizing loose end penalty lambda.
+    #' @author Marcin Imielinski
+    #'
+    #' @export
+
+    balance = function(gg,
+                       lambda = 10,
+                       marginal = NULL,
+                       emarginal = NULL,
+                       tight = NULL,
+                       nfix = NULL, efix = NULL, nrelax = NULL, erelax = NULL,
+                       L0 = TRUE,
+                       loose.collapse = FALSE,
+                       M = 1e3,
+                       phased = FALSE,
+                       ism = TRUE,
+                       force.major = TRUE,
+                       force.alt = FALSE,
+                       cnloh = FALSE,
+                       lp = TRUE,
+                       verbose = 1,
+                       tilim = 10,
+                       trelim = 32e3,
+                       nodefileind = 1,
+                       epgap = 1e-3,
+                       max.span = 1e6, ## max span in bp
+                       debug = FALSE,
+                       use.gurobi = FALSE,
+                       nonintegral = FALSE)
+    {
+        if (verbose) {
+            message("creating copy of input gGraph")
+        }
+
+        if (use.gurobi) {
+            if (!requireNamespace("gurobi", quietly = TRUE)) {
+                stop("use.gurobi is TRUE but gurobi is not installed")
+            }
+        }
+
+        gg = gg$copy
+
+        if (verbose) {
+            message("Checking inputs")
+        }
+
+        if (nodefileind) {
+            if (!(nodefileind %in% c(0,1,2,3))) {
+                warning("Invalid choice for nodefileind, resetting to default 1")
+                nodefileind = 1
+            }
+        }
+        nodefileind = as.integer(nodefileind)
+
+        if (ism) {
+            if (!L0) {
+                stop("ISM can only be set to true if using L0 penalty")
+            }
+        }
+
+        if (!('cn' %in% names(gg$nodes$dt)))
+        {
+            warning('cn field not defined on nodes, setting to NA')
+            gg$nodes$mark(cn = NA_real_)
+        }
+
+        if (!('cn' %in% names(gg$edges$dt)))
+        {
+            warning('cn not defined on edges, providing NA')
+            gg$edges$mark(cn = NA_real_)
+        }
+
+        if (phased) {
+            if (!("allele" %in% names(gg$nodes$dt))) {
+                stop("cannot run phased balance without $allele field in nodes")
+            }
+        }
+
+        if (!is.null(marginal)) {
+            if (!inherits(marginal, 'GRanges') || is.null(marginal$cn)) {
+                stop('marginal must be a GRanges with field $cn')
+            }
+            if (is.null(marginal$fix)) {
+                if (verbose) {
+                    message("$fix not supplied. marginals not fixed by default.")
+                }
+                marginal$fix = 0
+            }
+            if (is.null(marginal$weight)) {
+                if (verbose) {
+                    message("$weight not supplied. set to range width in Mbp by default.")
+                }
+                marginal$weight = width(marginal)
+            }
+        }
+
+        if (!is.null(emarginal)) {
+            if (!inherits(emarginal, 'Junction') || is.null(emarginal$dt$cn)) {
+                stop('emarginal must be Junction with field $cn')
+            }
+            ## don't mutate?
+            ## emarginal = emarginal$copy
+            if (is.null(emarginal$dt$fix)) {
+                if (verbose) {
+                    message('$fix not supplied in emarginal. not fixed by default')
+                }
+                emarginal$set(fix = 0)
+            }
+            if (is.null(emarginal$dt$weight)) {
+                if (verbose) {
+                    message("$weight not supplied in emarginal. set to 1 by default")
+                }
+                emarginal$set(weight = 1)
+            }
+        }
+
+        ## default local lambda: default local lambda is 1 for consistency with JaBbA
+        if (!('lambda' %in% names(gg$nodes$dt)))
+            gg$nodes$mark(lambda = 1)
+
+
+        ## default node weight is its width
+        if (!('weight' %in% names(gg$nodes$dt)))
+        {
+            gg$nodes$mark(weight = width(gg$nodes$gr))
+        }
+
+        ## default edge weight is its width
+        if (!('weight' %in% names(gg$edges$dt)))
+        {
+            gg$edges$mark(weight = 1)
+        }
+
+        ## default reward is 0
+        if (!('reward' %in% names(gg$edges$dt)))
+        {
+            gg$edges$mark(reward = 0)
+        }
+
+        ## handle parsing of efix, nfix, nrelax, erelax
+        if (!any(deparse(substitute(nfix)) == "NULL")) ## R voodo to allow "with" style evaluation
+            nfix = tryCatch(eval(eval(parse(text = substitute(deparse(substitute(nfix)))), parent.frame()), gg$nodes$dt, parent.frame(2)), error = function(e) NULL)
+
+        if (!any(deparse(substitute(nrelax)) == "NULL")) ## R voodo to allow "with" style evaluation
+            nrelax = tryCatch(eval(eval(parse(text = substitute(deparse(substitute(nrelax)))), parent.frame()), gg$nodes$dt, parent.frame(2)), error = function(e) NULL)
+
+        if (!any(deparse(substitute(efix)) == "NULL")) ## R voodo to allow "with" style evaluation
+            efix = tryCatch(eval(eval(parse(text = substitute(deparse(substitute(efix)))), parent.frame()), gg$edges$dt, parent.frame(2)), error = function(e) NULL)
+
+        if (!any(deparse(substitute(erelax)) == "NULL")) ## R voodo to allow "with" style evaluation
+            erelax = tryCatch(eval(eval(parse(text = substitute(deparse(substitute(erelax)))), parent.frame()), gg$edges$dt, parent.frame(2)), error = function(e) NULL)
+
+        if (!any(deparse(substitute(tight)) == "NULL")) ## R voodo to allow "with" style evaluation
+            tight = tryCatch(eval(eval(parse(text = substitute(deparse(substitute(tight)))), parent.frame()), gg$nodes$dt, parent.frame(2)), error = function(e) NULL)
+
+
+        if (is.logical(nfix))
+            nfix = which(nfix)
+
+        if (is.logical(efix))
+            efix = which(efix)
+
+        if (is.logical(nrelax))
+            nrelax = which(nrelax)
+
+        if (is.logical(erelax))
+            erelax = which(erelax)
+
+        if (length(nfix) & verbose)
+            message('Fixing ', length(nfix), ' nodes')
+
+        if (length(efix) & verbose)
+            message('Fixing ', length(efix), ' edges')
+
+        if (length(nrelax) & verbose)
+            message('Relaxing ', length(nrelax), ' nodes')
+
+        gg$nodes[nrelax]$mark(weight = 0)
+
+        if (length(erelax) & verbose)
+            message('Relaxing ', length(erelax), ' edges')
+
+        gg$nodes[erelax]$mark(weight = 0)
+
+        if (!is.logical(tight))
+            tight = 1:length(gg$nodes) %in% tight
+
+        if (any(tight) & verbose)
+            message('Leaving ', sum(tight), ' nodes tight')
+
+        gg$nodes$mark(tight = tight)
+
+        if (is.null(gg$nodes$dt$lb))
+            gg$nodes$mark(lb = 0)
+
+        if (is.null(gg$nodes$dt$ub))
+            gg$nodes$mark(ub = Inf)
+
+        if (is.null(gg$edges$dt$lb))
+            gg$edges$mark(lb = 0)
+
+        if (is.null(gg$edges$dt$ub))
+            gg$edges$mark(ub = Inf)
+
+        if (loose.collapse)
+        {
+            if (verbose)
+                message('Collapsing loose ends')
+
+            uleft = unique(gr.start(gg$nodes$gr))
+            uright = unique(gr.end(gg$nodes$gr))
+
+            gg$nodes$mark(loose.left.id = paste0(gr.match(gr.start(gg$nodes$gr), uleft), 'l'))
+            gg$nodes$mark(loose.right.id = paste0(gr.match(gr.end(gg$nodes$gr), uright), 'r'))
+        }
+        else
+        {
+            gg$nodes$mark(loose.left.id = paste0(1:length(gg$nodes), 'l'))
+            gg$nodes$mark(loose.right.id = paste0(1:length(gg$nodes), 'r'))
+        }
+
+    ########
+        ## VARIABLES
+    ########
+
+        ## create state space, keeping track of graph ids
+        vars = rbind(
+            gg$dt[, .(cn, snode.id, lb, ub, weight, gid = index, type = 'node', vtype = 'I')],
+            gg$sedgesdt[, .(from, to, lb, ub, sedge.id,  cn, reward,
+                            gid = sedge.id, type = 'edge', vtype = 'I',
+                            span = gg$junctions$span[match(edge.id, gg$junctions$dt[, edge.id])])],
+            ## for loose ends lid marks all "unique" loose ends (which if loose.collapse = TRUE
+            ## will be defined on the basis of coordinate overlap)
+            gg$dt[tight == FALSE, .(cn = NA, snode.id, lambda, gid = index,
+                                    ulid = paste0(index, 'i'),
+                                    lid = ifelse(strand == '+', loose.left.id, paste0('-', loose.right.id)),
+                                    type = 'loose.in', vtype = 'I')], ## incoming loose ends
+            gg$dt[tight == FALSE, .(cn = NA, snode.id, lambda, gid = index,
+                                    ulid = paste0(index, 'o'),
+                                    lid = ifelse(strand == '+', loose.right.id, paste0('-', loose.left.id)),
+                                    type = 'loose.out', vtype = 'I')], ## outgoing loose ends
+            gg$dt[tight == FALSE, .(gid = index, cn,
+                                    weight, type = 'nresidual',
+                                    vtype = 'C')], ## node residual
+            gg$sedgesdt[, .(gid = sedge.id, cn,
+                            weight, type = 'eresidual',
+                            vtype = 'C')], ## edge residual
+            fill = TRUE)
+
+
+        ## add "slush" variables - there will be one per chromosome
+        if (nonintegral) {
+
+            if (verbose) { message("Adding slush variables") }
+            ## grab standard chromosomes from gGraph
+            chr.names = grep("^(chr)*[0-9XY]+$", as.character(seqlevels(gg)), value = TRUE)
+            slush.dt = data.table(chr = chr.names,
+                                  lb = -0.49999,
+                                  ub = 0.49999,
+                                  type = "slush",
+                                  vtype = "C",
+                                  gid = 1:length(chr.names))
+
+            vars = rbind(vars, slush.dt, fill = TRUE)
+
+            vars[type == "node", chr := as.character(gg$dt$seqnames)[match(snode.id, gg$dt$snode.id)]]
+            vars[!(chr %in% slush.dt[, chr]), chr := NA_character_]
+
+        }
+
+        if (L0)
+        {
+            ## loose ends are labeled with lid and ulid, lid is only relevant if loose.collapse is true
+            ## (i.e. we need indicator.sum and indicator.sum.indicator
+            if (verbose) {
+                message("adding l0 penalty indicator")
+            }
+
+            vars = rbind(vars,
+                         rbind(
+                             vars[type == 'loose.in', ][ , type := 'loose.in.indicator'][, vtype := 'B'][, gid := lid],
+                             vars[type == 'loose.out', ][ , type := 'loose.out.indicator'][, vtype := 'B'][, gid := lid]
+                         ))
+
+            if (loose.collapse)
+            {
+                ## sum will sum all the loose ends assocaited with the same lid
+                vars = rbind(vars,
+                             unique(rbind(
+                                 vars[type == 'loose.in', ][ , type := 'loose.in.indicator.sum'][, vtype := 'I'][, gid := lid],
+                                 vars[type == 'loose.out', ][ , type := 'loose.out.indicator.sum'][, vtype := 'I'][, gid := lid]
+                             ), by = 'gid'))
+
+                ## sum.indicator is an binary indicator on the sum
+                vars = rbind(vars,
+                             rbind(
+                                 vars[type == 'loose.in.indicator.sum', ][ , type := 'loose.in.indicator.sum.indicator'][, vtype := 'B'][, gid := lid],
+                                 vars[type == 'loose.out.indicator.sum', ][ , type := 'loose.out.indicator.sum.indicator'][, vtype := 'B'][, gid := lid]
+                             ))
+            }
+        }
+
+        if (!is.null(marginal)) {
+            ## first disjoin marginal against the nodes
+            ## ie wee ned to create a separate residual variable for every unique
+            ## disjoint overlap of marginal with the nodes
+            dmarginal = gg$nodes$gr %>% gr.stripstrand %*% grbind(marginal %>% gr.stripstrand) %>%
+                disjoin %$% marginal[, c('cn', 'weight', 'fix')] %Q%
+                (!is.na(cn)) %Q% (!is.na(weight)) %Q% (!is.infinite(weight))
+
+            vars = rbind(vars,
+                         gr2dt(dmarginal)[, .(cn, weight, mfix = fix>0,
+                                              rid = 1:.N, type = 'mresidual', vtype = 'C')][, gid := rid],
+                         fill = TRUE
+                         )
+        }
+
+
+        if (!is.null(emarginal)) {
+            ## we need to identify which junction in the marginal each junction in the phased graph corresponds to
+            junction.map = merge.Junction(
+                phased = gg$junctions[, c()],
+                emarginal = emarginal[, c("cn", "weight", "fix")],
+                cartesian = TRUE,
+                all.x = TRUE)$dt
+            ## match this back with edge id and add this to vars
+            ## vars[type == "edge", emarginal.id := junction.map[abs(sedge.id), seen.by.emarginal]]
+            vars[type == "edge", emarginal.id := junction.map[abs(sedge.id), subject.id]]
+            ## add weight and target total CN
+            emarginal = merge.data.table(unique(
+                vars[type == "edge" & !is.na(emarginal.id),][, type := "emresidual"][, .(emarginal.id, sedge.id, lb = -M, ub = M, gid, type, vtype = "C", from, to)],
+                by = "emarginal.id"),
+                junction.map[, .(subject.id, weight, cn, fix)],
+                by.x = "emarginal.id",
+                by.y = "subject.id")
+            vars = rbind(vars, emarginal, fill = TRUE)
+        }
+
+        if (lp) {
+            ## need delta plus and delta minus for nodes and edges
+            delta.node = gg$dt[tight == FALSE, .(gid = index, cn, weight, vtype = 'C')]
+            delta.edge = gg$sedgesdt[, .(gid = sedge.id, cn, weight, reward, vtype = 'C')]
+
+            deltas = rbind(
+                delta.node[, .(gid, weight, vtype, type = "ndelta.plus")],
+                delta.node[, .(gid, weight, vtype, type = "ndelta.minus")],
+                delta.edge[, .(gid, weight, vtype, type = "edelta.plus")],
+                delta.edge[, .(gid, weight, vtype, type = "edelta.minus")]
+            )
+
+            vars = rbind(
+                vars,
+                deltas,
+                fill = TRUE
+            )
+
+            ## add deltas for marginals if marginals are supplied
+            if (!is.null(marginal)) {
+                mdeltas = rbind(
+                    vars[type == "mresidual", .(rid, weight, vtype, type = "mdelta.plus")][, gid := rid],
+                    vars[type == "mresidual", .(rid, weight, vtype, type = "mdelta.minus")][, gid := rid]
+                )
+                vars = rbind(vars, mdeltas, fill = TRUE)
+            }
+
+            ## add deltas for emresiduals if emarginals are supplied
+            if (!is.null(emarginal)) {
+                emdeltas = rbind(
+                    vars[type == "emresidual", .(emarginal.id, weight, vtype, type = "emdelta.plus")][, gid := emarginal.id],
+                    vars[type == "emresidual", .(emarginal.id, weight, vtype, type = "emdelta.minus")][, gid := emarginal.id]
+                )
+                vars = rbind(vars, emdeltas, fill = TRUE)
+            }
+        }
+
+        ## moved from being ISM-specific annotation as this is more generally useful information
+        ## add telomeric annotation
+        qtips = gr.end(si2gr(seqlengths(gg$nodes))) ## location of q arm tips
+        term.in = c(which(start(gg$nodes$gr) == 1), ## beginning of chromosome
+                    -which(gg$nodes$gr %^% qtips)) ## flip side of chromosome end
+        term.out = -term.in ## out is reciprocal of in
+
+        ## annotate loose indicators with this
+        vars[!is.na(snode.id), telomeric := ifelse(snode.id %in% term.in | snode.id %in% term.out,
+                                                   TRUE,
+                                                   FALSE)]
+
+
+
+        if (phased) {
+            ## add allele information and og.node.id
+            node.match = match(vars[, snode.id], gg$dt$snode.id)
+            vars[, ":="(allele = gg$dt$allele[node.match],
+                        og.node.id = gg$dt$og.node.id[node.match])]
+
+
+
+
+            ## add ref/alt information and og.edge.id
+            edge.match = match(vars[, sedge.id], gg$sedgesdt$sedge.id)
+            gg$edges$mark(span = gg$junctions$span)
+
+            vars[, ":="(ref.or.alt = gg$sedgesdt$type[edge.match],
+                        connection = gg$sedgesdt$connection[edge.match],
+                        og.edge.id = gg$sedgesdt$og.edge.id[edge.match],
+                        span = gg$sedgesdt$span[edge.match], ## NEED SPAN
+                        n1 = gg$dt$snode.id[gg$sedgesdt$from[edge.match]],
+                        n2 = gg$dt$snode.id[gg$sedgesdt$to[edge.match]])]
+
+            vars[, n1.side := ifelse(n1 > 0, "right", "left")]
+            vars[, n2.side := ifelse(n2 > 0, "left", "right")]
+            vars[, n1 := abs(n1)]
+            vars[, n2 := abs(n2)]
+
+            edge.indicator.vars = vars[type == "edge"][, type := "edge.indicator"][, vtype := "B"][, gid := sedge.id]
+            vars = rbind(vars, edge.indicator.vars, fill = TRUE)
+
+            #' zchoo Thursday, Sep 02, 2021 05:30:53 PM
+            #' moved to earlier
+            ## REF edge configuration constraint (added by default basically)
+            ## only add this if there are no unphased nodes
+            if (cnloh) {
+
+                ## if allow CNLOH, the sum of edge indicators corresponding with og edge id is LEQ 2
+                ## this is only allowed in constant CN regions and if breakpoint is not shared with any ALT edges
+
+                ## penalize CNLOH edges
+
+                if (!is.null(gg$edges$dt$cnloh)) {
+                    cnloh.edges = gg$edges$dt[cnloh == TRUE & type == "ALT", edge.id] %>% unique
+                    if (verbose) {
+                        message("Number of marked CNLOH edges: ", length(cnloh.edges))
+                    }
+
+                    ## add CNLOH annotation to variables
+                    ## browser()
+                    vars[, cnloh := FALSE]
+                    vars[(type == "edge.indicator" | type == "edge" | type == "eresidual") &
+                         ref.or.alt == "ALT" & (abs(sedge.id) %in% cnloh.edges) & (sedge.id > 0),
+                         ":="(cnloh = TRUE)]
+
+                } else {
+                    warning("CNLOH not specified on edges. Disallowing!")
+                    cnloh.og.edges = c()
+                    vars[, cnloh := FALSE]
+                }
+            } else {
+
+                cnloh.og.edges = c()
+                vars[, cnloh := FALSE]
+
+            }
+
+            ## add node haplotype indicators
+            ## these are binary indicators that determine whether a node belongs to H1
+            ## only constrain positive-stranded nodes due to skew symmetry
+            haplotype.indicators = gg$dt[(allele == "major" | allele == "minor") & snode.id > 0,
+                                         .(cn, snode.id, lb, ub, weight, og.node.id,
+                                           allele, gid = index, type = 'haplotype', vtype = 'B')]
+            vars = rbind(vars, haplotype.indicators, fill = TRUE)
+
+            ## add H1 and H2 'AND' indicators which should have n1/n1.side/n2/n2.side metadata
+            ## only add these for low-span edges
+            ## and where CNLOH is FALSE
+            h1.and.indicators = vars[sedge.id > 0 & type == "edge" & (connection == "straight" | connection == "cross") & span < max.span & cnloh == FALSE,][, vtype := "B"][, type := "h1.and.indicator"][, gid := sedge.id]
+            h2.and.indicators = vars[sedge.id > 0 & type == "edge" & (connection == "straight" | connection == "cross") & span < max.span & cnloh == FALSE,][, vtype := "B"][, type := "h2.and.indicator"][, gid := sedge.id]
+
+            vars = rbind(vars, h1.and.indicators, h2.and.indicators, fill = TRUE)
+
+        }
+
+
+        ## if we want to implement edge reward or ISM, we need to add edge indicators
+        ## these are binary variables that allow us to reward/penalize L0 norms
+        ##if (TRUE) { ##(ism | any(gg$edges$dt$reward != 0, na.rm = TRUE)) {
+        ## if not phased, must add edge indicators (for just the ALT edges)
+        if (!phased) {
+            edge.match = match(vars[, sedge.id], gg$sedgesdt$sedge.id)
+            vars[, ":="(ref.or.alt = gg$sedgesdt$type[edge.match])] ## need ref.or.alt information
+            edge.indicator.vars = vars[type == "edge" & ref.or.alt == "ALT"][, type := "edge.indicator"][, vtype := "B"][, gid := sedge.id]
+            vars = rbind(vars, edge.indicator.vars, fill = TRUE)
+        }
+
+        ## loose indicators are only required if running with ISM
+        if (ism) {
+            vars[type == "loose.in.indicator" & sign(snode.id) == 1, ee.id := paste(snode.id, "left")]
+            vars[type == "loose.out.indicator" & sign(snode.id) == 1, ee.id := paste(snode.id, "right")]
+        }
+
+        ## but even without ISM, if we want to add reward, we must add edge indicators
+        vars[type == "edge.indicator" & sign(sedge.id) == 1 & ref.or.alt == "ALT",
+             ":="(ee.id.n1 = paste(gg$edges$dt$n1[match(sedge.id, gg$edges$dt$sedge.id)],
+                                   gg$edges$dt$n1.side[match(sedge.id, gg$edges$dt$sedge.id)]),
+                  ee.id.n2 = paste(gg$edges$dt$n2[match(sedge.id, gg$edges$dt$sedge.id)],
+                                   gg$edges$dt$n2.side[match(sedge.id, gg$edges$dt$sedge.id)]))]
+
+        if (phased) {
+            ## homologous extremity exclusivity (only for phased graphs)
+            ## get stranded breakpoint ID's associated with the start and end of each node
+
+            ## number of unique starts should be equal to number of snodes in the original unphased graph
+            ## aka 2 * number of og edge ids
+
+            vars[type == "loose.in.indicator", hee.id := paste(og.node.id, "in")]
+            vars[type == "loose.out.indicator", hee.id := paste(og.node.id, "out")]
+
+            vars[type == "edge.indicator" & ref.or.alt == "ALT" & sign(sedge.id) == 1,
+                 ":="(og.n1 = gg$dt$og.node.id[from],
+                      og.n1.side = gg$edges$dt$n1.side[match(abs(sedge.id), gg$edges$dt$edge.id)],
+                      og.n2 = gg$dt$og.node.id[to],
+                      og.n2.side = gg$edges$dt$n2.side[match(abs(sedge.id), gg$edges$dt$edge.id)])]
+
+            vars[type == "edge.indicator" & ref.or.alt == "ALT" & sign(sedge.id) == 1,
+                 ":="(hee.id.n1 = ifelse(og.n1.side == "left",
+                                         paste(og.n1, "in"),
+                                         paste(og.n1, "out")),
+                      hee.id.n2 = ifelse(og.n2.side == "left",
+                                         paste(og.n2, "in"),
+                                         paste(og.n2, "out")))]
+
+            ## reciprocal homologous extremity exclusivity
+            ## implement config indicators. there is one per og.edge.id per configuration
+            straight.config = unique(vars[type == "edge.indicator" & ref.or.alt == "REF" & sedge.id > 0, ][, type := "straight.config"][, config.id := paste("straight", og.edge.id)], by = "og.edge.id")
+            cross.config = unique(vars[type == "edge.indicator" & ref.or.alt == "REF" & sedge.id > 0, ][, type := "cross.config"][, config.id := paste("cross", og.edge.id)], by = "og.edge.id")
+
+            ## add these to vars
+            vars = rbind(vars, straight.config, cross.config, fill = TRUE)
+
+            ## add straight/cross to REF edges
+            vars[type == "edge.indicator" & ref.or.alt == "REF",
+                 connection := gg$sedgesdt$connection[match(sedge.id, gg$sedgesdt$sedge.id)]]
+
+            ## add config ID's to corresponding edge indicators
+            vars[type == "edge.indicator" & ref.or.alt == "REF" & sedge.id > 0,
+                 config.id := paste(connection, og.edge.id)]
+
+
+            ## add config ID's to corresponding edge indicators
+            vars[type == "edge.indicator" & ref.or.alt == "REF" & sedge.id > 0,
+                 config.id := paste(connection, og.edge.id)]
+
+            ## add straight edge id e.g. for each n1 and n2, add the sedge.id of the corresponding straight edge
+            straight.sedges = gg$edges$dt[type == "REF" & connection == "straight" & sedge.id > 0,
+                                          .(n1.full = paste(n1, n1.side), n2.full = paste(n2, n2.side), sedge.id)]
+            cross.sedges = gg$edges$dt[type == "REF" & connection == "cross" & sedge.id > 0,
+                                       .(n1.full = paste(n1, n1.side), n2.full = paste(n2, n2.side), sedge.id)]
+
+
+            ## pull alt edges from sedgesdt
+            alt.sedges = gg$edges$dt[type == "ALT" & sedge.id > 0,
+                                     .(n1.full = paste(n1, n1.side), n2.full = paste(n2, n2.side), sedge.id)]
+
+            alt.sedges[, ":="(s1 = straight.sedges$sedge.id[match(n1.full, straight.sedges$n1.full)],
+                              s2 = straight.sedges$sedge.id[match(n2.full, straight.sedges$n1.full)],
+                              s3 = straight.sedges$sedge.id[match(n1.full, straight.sedges$n2.full)],
+                              s4 = straight.sedges$sedge.id[match(n2.full, straight.sedges$n2.full)])]
+
+            alt.sedges[, ":="(c1 = cross.sedges$sedge.id[match(n1.full, cross.sedges$n1.full)],
+                              c2 = cross.sedges$sedge.id[match(n2.full, cross.sedges$n1.full)],
+                              c3 = cross.sedges$sedge.id[match(n1.full, cross.sedges$n2.full)],
+                              c4 = cross.sedges$sedge.id[match(n2.full, cross.sedges$n2.full)])]
+
+            ## pull loose ends
+            vars[type == "loose.in.indicator" & snode.id > 0, n2.full := paste(snode.id, "left")]
+            vars[type == "loose.out.indicator" & snode.id > 0, n1.full := paste(snode.id, "right")]
+
+            ## merge sedge.id
+            vars[type == "loose.in.indicator" & snode.id > 0, ":="(s = straight.sedges$sedge.id[match(n2.full, straight.sedges$n2.full)])]
+            vars[type == "loose.out.indicator" & snode.id > 0, ":="(s = straight.sedges$sedge.id[match(n1.full, straight.sedges$n1.full)])]
+
+            vars[type == "loose.in.indicator" & snode.id > 0, ":="(c = cross.sedges$sedge.id[match(n2.full, cross.sedges$n2.full)])]
+            vars[type == "loose.out.indicator" & snode.id > 0, ":="(c = cross.sedges$sedge.id[match(n1.full, cross.sedges$n1.full)])]
+
+
+            ## merge this info into vars
+            vars = merge(vars,
+                         alt.sedges[, .(sedge.id, s1, s2, s3, s4, c1, c2, c3, c4)],
+                         by = "sedge.id",
+                         all.x = TRUE,
+                         all.y = FALSE)
+
+        }
+
+        vars[, id := 1:.N] ## set id in the optimization
+        vars[is.na(lb), lb := -Inf]
+        vars[is.na(ub), ub := Inf]
+        vars[, relax := FALSE][, fix := FALSE]
+        if ("mresidual" %in% vars$type) {
+            vars[type == 'mresidual' & mfix == TRUE, ":="(lb = 0, ub = 0)]
+            message("Number of fixed marginals: ", nrow(vars[type == 'mresidual' & mfix == TRUE,]))
+        }
+        if ("emresidual" %in% vars$type) {
+            vars[type == "emresidual" & fix == TRUE, ":="(lb = 0, ub = 0)]
+        }
+
+        ## redo setting lb and ub
+        vars[type %in% c('node', 'edge') & is.na(lb), lb := 0]
+        vars[type %in% c('node', 'edge') & is.na(ub), ub := M]
+        vars[type %in% c('node', 'edge') & lb < 0, lb := 0]
+        vars[type %in% c('node', 'edge') & ub > M, ub := M]
+        ## vars[type %in% c('node', 'edge'), lb := ifelse(is.na(lb), 0, pmax(lb, 0, na.rm = TRUE)]
+        ## vars[type %in% c('node', 'edge'), ub := ifelse(is.na(ub), M, pmin(ub, M, na.rm = TRUE))]
+        vars[type %in% c('loose.in', 'loose.out'), ":="(lb = 0, ub = Inf)]
+
+        ## reward shouldn't have to be positive
+        ## vars[type %in% c('edge'), reward := pmax(reward, 0, na.rm = TRUE)]
+
+
+        ## figure out junctions and nodes to fix
+
+        vars[!is.na(cn) & type == 'node' & abs(snode.id) %in% nfix, ":="(lb = cn, ub = cn, fix = TRUE)]
+        vars[!is.na(cn) & type == 'edge' & abs(sedge.id) %in% efix, ":="(lb = cn, ub = cn, fix = TRUE)]
+
+        ## figure out terminal node sides for in and out loose ends
+        ## these will not have loose ends penalized
+        qtips = gr.end(si2gr(seqlengths(gg$nodes))) ## location of q arm tips
+        term.in = c(which(start(gg$nodes$gr) == 1), ## beginning of chromosome
+                    -which(gg$nodes$gr %^% qtips)) ## flip side of chromosome end
+        term.out = -term.in
+        vars$terminal = FALSE
+        vars[(type %in% c('loose.in', 'loose.in.indicator')) & (snode.id %in% term.in), terminal := TRUE]
+        vars[(type %in% c('loose.out', 'loose.out.indicator')) & (snode.id %in% term.out), terminal := TRUE]
+
+        ## if not using integral constraints,
+        ## change the vtype of terminal loose ends, nodes, and REF edges
+        ## additionally relax their lower bound to -0.5
+        if (nonintegral) {
+            vars[type == "loose.in" & (snode.id %in% term.in), vtype := "C"]
+            vars[type == "loose.out" & (snode.id %in% term.out), vtype := "C"]
+            vars[type == "loose.in" & (snode.id %in% term.in), lb := -0.4999]
+            vars[type == "loose.out" & (snode.id %in% term.out), lb := -0.4999]
+            vars[type == "edge" & ref.or.alt == "REF", vtype := "C"]
+            vars[type == "edge" & ref.or.alt == "REF", lb := -0.4999]
+            ## vars[type == "node", lb := -0.4999]
+            ## vars[type == "node", vtype := "C"]
+        }
+
+        ## fix the heaviest node
+        ## browser()
+        ## maxn = vars[type == "node"][which.max(weight), snode.id]
+        ## vars[snode.id == maxn & type == "node", ":="(ub = (cn), lb = (cn))]
+
+        ## browser()
+        ## table(vars[, .(type, vtype)])
+
+    ########
+        ## CONSTRAINTS
+        ## the key principle behind this "melted" form of constraint building is the cid
+        ## (constraint id) which is the key that will group coefficients into constraints
+        ## when we finally build the matrices.  So all we need to do is make sure that
+        ## that value / cid pairs make sense and that every cid has an entry in b
+    ########
+
+        ## we need one junction balance constraint per loose end
+
+        ## constraints indexed by cid
+        if (nonintegral) {
+
+            ## if running without integer constraints we have to include the slush variables
+            ## for each node
+            slush.sub = vars[type == "slush"]
+            node.slush.in = vars[type == "node", .(value = -1,
+                                                   id = slush.sub$id[match(chr, slush.sub$chr)],
+                                                   cid = paste("in", gid))]
+            node.slush.out = vars[type == "node", .(value = -1,
+                                                   id = slush.sub$id[match(chr, slush.sub$chr)],
+                                                   cid = paste("out", gid))]
+
+            node.slush = rbind(node.slush.in, node.slush.out)[!is.na(id)]
+
+            constraints = rbind(
+                node.slush,
+                vars[type == 'loose.in', .(value = 1, id, cid = paste('in', gid))],
+                vars[type == 'edge', .(value = 1, id, cid = paste('in', to))],
+                vars[type == 'node', .(value = -1, id, cid = paste('in', gid))],
+                vars[type == 'loose.out', .(value = 1, id, cid = paste('out', gid))],
+                vars[type == 'edge', .(value = 1, id, cid = paste('out', from))],
+                vars[type == 'node', .(value = -1, id, cid = paste('out', gid))],
+                fill = TRUE)
+
+
+        } else {
+            constraints = rbind(
+                vars[type == 'loose.in', .(value = 1, id, cid = paste('in', gid))],
+                vars[type == 'edge', .(value = 1, id, cid = paste('in', to))],
+                vars[type == 'node', .(value = -1, id, cid = paste('in', gid))],
+                vars[type == 'loose.out', .(value = 1, id, cid = paste('out', gid))],
+                vars[type == 'edge', .(value = 1, id, cid = paste('out', from))],
+                vars[type == 'node', .(value = -1, id, cid = paste('out', gid))],
+                fill = TRUE)
+
+        }
+
+        b = rbind(
+            vars[type == 'node', .(value = 0, sense = 'E', cid = paste('in', gid))],
+            vars[type == 'node', .(value = 0, sense = 'E', cid = paste('out', gid))],
+            fill = TRUE)
+
+        ## add to the constraints the definitions of the node and edge
+        if (nonintegral) {
+
+            ## if running without integer constraints we have to include the slush variables
+            ## for each node
+            slush.sub = vars[type == "slush"]
+            node.slush = vars[type == "node", .(value = 1,
+                                                id = slush.sub$id[match(chr, slush.sub$chr)],
+                                                cid = paste("nresidual", gid))]
+            constraints = rbind(
+                constraints,
+                rbind(
+                    node.slush,
+                    vars[type == 'node', .(value = 1, id, cid = paste('nresidual', gid))],
+                    vars[type == 'nresidual', .(value = -1, id, cid = paste('nresidual', gid))],
+                    vars[type == 'edge', .(value = 1, id, cid = paste('eresidual', gid))],
+                    vars[type == 'eresidual', .(value = -1, id, cid = paste('eresidual', gid))],
+                    fill = TRUE)
+            )
+        } else {
+            constraints = rbind(
+                constraints,
+                rbind(
+                    vars[type == 'node', .(value = 1, id, cid = paste('nresidual', gid))],
+                    vars[type == 'nresidual', .(value = -1, id, cid = paste('nresidual', gid))],
+                    vars[type == 'edge', .(value = 1, id, cid = paste('eresidual', gid))],
+                    vars[type == 'eresidual', .(value = -1, id, cid = paste('eresidual', gid))],
+                    fill = TRUE)
+            )
+        }
+
+        b = rbind(b,
+                  vars[type == 'node', .(value = cn, sense = 'E', cid = paste('nresidual', gid))],
+                  vars[type == 'edge', .(value = cn, sense = 'E', cid = paste('eresidual', gid))],
+                  fill = TRUE)
+
+        ## add the reverse complement equality constraints on nodes and edges
+        constraints = rbind(
+            constraints,
+            rbind( ## +1 coefficient for positive nodes, -1 for negative nodes, matched by abs (snode.id)
+                vars[type == 'node', .(value = sign(snode.id), id, cid = paste('nrc', abs(snode.id)))],
+                vars[type == 'edge', .(value = sign(sedge.id), id, cid = paste('erc', abs(sedge.id)))],
+                fill = TRUE)
+        )
+
+        b = rbind(b,
+                  vars[type == 'node' & snode.id>0, .(value = 0, sense = 'E', cid = paste('nrc', abs(snode.id)))],
+                  vars[type == 'edge' & sedge.id>0, .(value = 0, sense = 'E', cid = paste('erc', abs(sedge.id)))],
+                  fill = TRUE)
+
+
+        ## if solving as LP, add deltas constraints (absolute value trick)
+
+        if (lp) {
+            if (verbose) {
+                message("adding delta constraints for LP")
+            }
+
+        vars[type %like% "delta.plus" | type %like% "delta.minus", ":="(ub = M, lb = 0)]
+
+            ## add the residual constraints
+            ndelta.slack = rbind(
+                vars[type == "nresidual", .(value = -1, id, cid = paste("ndelta.minus.slack", gid))],
+                vars[type == "ndelta.minus", .(value = -1, id, cid = paste("ndelta.minus.slack", gid))],
+                vars[type == "nresidual", .(value = 1, id, cid = paste("ndelta.plus.slack", gid))],
+                vars[type == "ndelta.plus", .(value = -1, id, cid = paste("ndelta.plus.slack", gid))]
+            )
+
+            ndelta.slack.rhs = rbind(
+                vars[type == "ndelta.minus", .(value = 0, sense = "L", cid = paste("ndelta.minus.slack", gid))],
+                vars[type == "ndelta.plus", .(value = 0, sense = "L", cid = paste("ndelta.plus.slack", gid))]
+            )
+
+            edelta.slack = rbind(
+                vars[type == "eresidual", .(value = -1, id, cid = paste("edelta.minus.slack", gid))],
+                vars[type == "edelta.minus", .(value = -1, id, cid = paste("edelta.minus.slack", gid))],
+                vars[type == "eresidual", .(value = 1, id, cid = paste("edelta.plus.slack", gid))],
+                vars[type == "edelta.plus", .(value = -1, id, cid = paste("edelta.plus.slack", gid))]
+            )
+
+            edelta.slack.rhs = rbind(
+                vars[type == "edelta.minus", .(value = 0, sense = "L", cid = paste("edelta.minus.slack", gid))],
+                vars[type == "edelta.plus", .(value = 0, sense = "L", cid = paste("edelta.plus.slack", gid))]
+            )
+
+            mdelta.slack = rbind(
+                vars[type == "mresidual", .(value = -1, id, cid = paste("mdelta.minus.slack", gid))],
+                vars[type == "mdelta.minus", .(value = -1, id, cid = paste("mdelta.minus.slack", gid))],
+                vars[type == "mresidual", .(value = 1, id, cid = paste("mdelta.plus.slack", gid))],
+                vars[type == "mdelta.plus", .(value = -1, id, cid = paste("mdelta.plus.slack", gid))]
+            )
+
+            mdelta.slack.rhs = rbind(
+                vars[type == "mdelta.minus", .(value = 0, sense = "L", cid = paste("mdelta.minus.slack", gid))],
+                vars[type == "mdelta.plus", .(value = 0, sense = "L", cid = paste("mdelta.plus.slack", gid))]
+            )
+
+            constraints = rbind(constraints, ndelta.slack, edelta.slack, mdelta.slack, fill = TRUE)
+            b = rbind(b, ndelta.slack.rhs, edelta.slack.rhs, mdelta.slack.rhs, fill = TRUE)
+
+        }
+
+        if (phased) {
+
+            ## add haplotype indicator constraints
+            ## e.g. the haplotype indicators corresponding to the same og node must add up to 1
+            iconstraints = vars[type == "haplotype" & snode.id > 0,
+                                .(value = 1, id, cid = paste("haplotype.node", og.node.id))]
+            rhs = unique(vars[type == "haplotype",
+                              .(value = 1, sense = "E", cid = paste("haplotype.node", og.node.id))],
+                         by = "cid")
+
+            constraints = rbind(constraints, iconstraints, fill = TRUE)
+            b = rbind(b, rhs, fill = TRUE)
+
+            ## check that there are two per og edge id
+            ## browser()
+            ## tmp = iconstraints[, .(count = .N), by = cid]
+            ## all(tmp[, count] == 2, na.rm = TRUE)
+            ## length(unique(rhs[, cid])) == length(unique(iconstraints[, cid]))
+            ## length(unique(rhs[, cid])) == length(unique(gg$nodes$dt[allele == "major", og.node.id]))
+
+            ## add H1 AND constraint
+            h1.and.ids = merge.data.table(vars[type == "h1.and.indicator", .(n1, n2, edge.id = id, sedge.id)],
+                                          vars[type == "haplotype", .(n1.snode.id = snode.id, n1.id = id)],
+                                          by.x = "n1",
+                                          by.y = "n1.snode.id") %>%
+                merge.data.table(vars[type == "haplotype", .(n2.snode.id = snode.id, n2.id = id)],
+                                 by.x = "n2",
+                                 by.y = "n2.snode.id")
+
+            h2.and.ids = merge.data.table(vars[type == "h2.and.indicator", .(n1, n2, edge.id = id, sedge.id)],
+                                          vars[type == "haplotype", .(n1.snode.id = snode.id, n1.id = id)],
+                                          by.x = "n1",
+                                          by.y = "n1.snode.id") %>%
+                merge.data.table(vars[type == "haplotype", .(n2.snode.id = snode.id, n2.id = id)],
+                                 by.x = "n2",
+                                 by.y = "n2.snode.id")
+
+            ## verify only + sedge id
+            ## browser()
+
+            ## there are four constraints that are needed to implement this first edge constraint (c1-3)
+            iconstraints = rbind(h1.and.ids[, .(value = 1, id = edge.id, cid = paste("h1.and.c1", sedge.id))],
+                                 h1.and.ids[, .(value = -1, id = n1.id, cid = paste("h1.and.c1", sedge.id))],
+                                 h1.and.ids[, .(value = 1, id = edge.id, cid = paste("h1.and.c2", sedge.id))],
+                                 h1.and.ids[, .(value = -1, id = n2.id, cid = paste("h1.and.c2", sedge.id))],
+                                 h1.and.ids[, .(value = 1, id = edge.id, cid = paste("h1.and.c3", sedge.id))],
+                                 h1.and.ids[, .(value = -1, id = n1.id, cid = paste("h1.and.c3", sedge.id))],
+                                 h1.and.ids[, .(value = -1, id = n2.id, cid = paste("h1.and.c3", sedge.id))])
+
+            rhs = rbind(h1.and.ids[, .(value = 0, sense = "L", cid = paste("h1.and.c1", sedge.id))],
+                        h1.and.ids[, .(value = 0, sense = "L", cid = paste("h1.and.c2", sedge.id))],
+                        h1.and.ids[, .(value = -1, sense = "G", cid = paste("h1.and.c3", sedge.id))])
+
+            constraints = rbind(constraints, iconstraints, fill = TRUE)
+            b = rbind(b, rhs, fill = TRUE)
+
+            ## tmp = iconstraints[, .(count = .N), by = cid]
+            ## all(tmp[cid %like% "c1", count] == 2)
+            ## all(tmp[cid %like% "c2", count] == 2)
+            ## all(tmp[cid %like% "c3", count] == 3)
+
+
+            iconstraints = rbind(h2.and.ids[, .(value = 1, id = edge.id, cid = paste("h2.and.c1", sedge.id))],
+                                 h2.and.ids[, .(value = 1, id = n1.id, cid = paste("h2.and.c1", sedge.id))],
+                                 h2.and.ids[, .(value = 1, id = edge.id, cid = paste("h2.and.c2", sedge.id))],
+                                 h2.and.ids[, .(value = 1, id = n2.id, cid = paste("h2.and.c2", sedge.id))],
+                                 h2.and.ids[, .(value = 1, id = edge.id, cid = paste("h2.and.c3", sedge.id))],
+                                 h2.and.ids[, .(value = 1, id = n1.id, cid = paste("h2.and.c3", sedge.id))],
+                                 h2.and.ids[, .(value = 1, id = n2.id, cid = paste("h2.and.c3", sedge.id))])
+
+            rhs = unique(rbind(h2.and.ids[, .(value = 1, sense = "L", cid = paste("h2.and.c1", sedge.id))],
+                               h2.and.ids[, .(value = 1, sense = "L", cid = paste("h2.and.c2", sedge.id))],
+                               h2.and.ids[, .(value = 1, sense = "G", cid = paste("h2.and.c3", sedge.id))]),
+                         by = "cid")
+
+            constraints = rbind(constraints, iconstraints, fill = TRUE)
+            b = rbind(b, rhs, fill = TRUE)
+
+            ## verify that there are no weird NA's and that there is only one set of constraints per sedge.id
+            ## tmp = iconstraints[, .(count = .N), by = cid]
+            ## all(tmp[cid %like% "c1", count] == 2)
+            ## all(tmp[cid %like% "c2", count] == 2)
+            ## all(tmp[cid %like% "c3", count] == 3)
+
+            ## connect edge indicators to the haplotype configuration of connected edges
+            iconstraints = rbind(vars[type == "h1.and.indicator",
+                                      .(value = -1, id, cid = paste("haplotype.indicator", sedge.id))],
+                                 vars[type == "h2.and.indicator",
+                                      .(value = -1, id, cid = paste("haplotype.indicator", sedge.id))],
+                                 vars[type == "edge.indicator" &
+                                      (sedge.id %in% vars[type == "h1.and.indicator",]$sedge.id),
+                                      .(value = 1, id, cid = paste("haplotype.indicator", sedge.id))])
+            rhs = unique(iconstraints[, .(value = 0, sense = "L", cid)], by = "cid")
+
+            constraints = rbind(constraints, iconstraints, fill = TRUE)
+            b = rbind(b, rhs, fill = TRUE)
+
+            ## verify that there are three of these per sedge.id!
+            ## tmp = iconstraints[, .(count = .N), by = cid]
+            ## all(tmp[, count] == 3)
+
+            ## add constraints that force indicators to be 1 if edge CN > 0
+
+            ## add constraints for upper bound (same setup as L0 penalty) - one per edge
+            iconstraints = vars[type == "edge", .(value = 1, id,
+                                                  sedge.id,
+                                                  cid = paste("edge.indicator.ub", sedge.id))]
+
+            ## add matching indicator variables, matching by cid
+            iconstraints = rbind(
+                iconstraints,
+                vars[type == "edge.indicator", ][
+                    sedge.id %in% iconstraints$sedge.id, .(value = -M, id, cid = iconstraints$cid, sedge.id)],
+                fill = TRUE)
+
+            ## upper bound is M if indicator is positive, and zero otherwise
+            constraints = rbind(
+                constraints,
+                iconstraints,
+                fill = TRUE)
+
+            ## add the RHS of this constraint (upper bound)
+            b = rbind(
+                b,
+                vars[type == "edge", .(value = 0, sense = "L", cid = paste("edge.indicator.ub", sedge.id))],
+                fill = TRUE
+            )
+
+            ## add constraints for the lower bound
+            iconstraints = vars[type == "edge",
+                                .(value = 1, id, sedge.id, cid = paste("edge.indicator.lb", sedge.id))]
+
+            ## add matching indicator variables for LB
+            iconstraints = rbind(
+                iconstraints,
+                vars[type == "edge.indicator", ][sedge.id %in% iconstraints$sedge.id,
+                                                 .(value = -0.1, id, cid = iconstraints$cid, sedge.id)],
+                fill = TRUE)
+
+            constraints = rbind(
+                constraints,
+                iconstraints,
+                fill = TRUE)
+
+            ## add the RHS of this constraint (upper bound)
+            b = rbind(
+                b,
+                vars[type == "edge", .(value = 0, sense = "G", cid = paste("edge.indicator.lb", sedge.id))],
+                fill = TRUE
+            )
+        }
+
+        ## implement edge indicators for ISM and edge reward
+        if (ism | any(gg$edges$dt$reward != 0, na.rm = TRUE)) {
+
+            ## implement edge edge indicators if not already (e.g. if not doing phasing)
+            if (!phased) {
+
+                ## importantly, we only want to add these for ALT edges
+                iconstraints = vars[type == "edge" & ref.or.alt == "ALT" & sign(sedge.id) == 1,
+                                    .(value = 1, id,
+                                      sedge.id,
+                                      cid = paste("edge.indicator.ub", sedge.id))]
+
+                ## add matching indicator variables, matching by cid
+                iconstraints = rbind(
+                    iconstraints,
+                    vars[type == "edge.indicator" & ref.or.alt == "ALT" & sign(sedge.id) == 1, ][
+                        sedge.id %in% iconstraints$sedge.id,
+                        .(value = -M, id, cid = iconstraints$cid, sedge.id)],
+                    fill = TRUE)
+
+                ## upper bound is M if indicator is positive, and zero otherwise
+                constraints = rbind(
+                    constraints,
+                    iconstraints,
+                    fill = TRUE)
+
+                ## add the RHS of this constraint (upper bound)
+                b = rbind(
+                    b,
+                    vars[type == "edge" & ref.or.alt == "ALT" & sign(sedge.id) == 1,
+                         .(value = 0, sense = "L", cid = paste("edge.indicator.ub", sedge.id))],
+                    fill = TRUE
+                )
+
+                ## add constraints for the lower bound
+                iconstraints = vars[type == "edge" & ref.or.alt == "ALT" & sign(sedge.id) == 1,
+                                    .(value = 1, id, sedge.id, cid = paste("edge.indicator.lb", sedge.id))]
+
+                ## add matching indicator variables for LB
+                iconstraints = rbind(
+                    iconstraints,
+                    vars[type == "edge.indicator" & ref.or.alt == "ALT" & sign(sedge.id) == 1, ][
+                        sedge.id %in% iconstraints$sedge.id,
+                        .(value = -0.1, id, cid = iconstraints$cid, sedge.id)],
+                    fill = TRUE)
+
+                constraints = rbind(
+                    constraints,
+                    iconstraints,
+                    fill = TRUE)
+
+                ## add the RHS of this constraint (upper bound)
+                b = rbind(
+                    b,
+                    vars[type == "edge" & ref.or.alt == "ALT" & sign(sedge.id) == 1,
+                         .(value = 0, sense = "G", cid = paste("edge.indicator.lb", sedge.id))],
+                    fill = TRUE
+                )
+            }
+
+            ## fix loose ends at zero if there's a junction there (only valid if not phasing)
+            #' zchoo Tuesday, Jun 15, 2021 11:53:15 AM
+            #' this constraint appears to be valid even if running phasing.
+            ## if (!phased) {
+            ## extremity exclusivity (relevant for ALL graphs)
+
+            if (ism) {
+                loose.constraints = rbind(
+                    vars[type == "loose.in.indicator" & sign(snode.id) == 1 & telomeric == FALSE,
+                         .(value = 1, id, cid = paste("extremity.exclusivity", ee.id))],
+                    vars[type == "loose.out.indicator" & sign(snode.id) == 1 & telomeric == FALSE,
+                         .(value = 1, id, cid = paste("extremity.exclusivity", ee.id))]
+                )
+
+                edge.constraints = rbind(
+                    vars[type == "edge.indicator" & ref.or.alt == "ALT" & sign(sedge.id) == 1,
+                         .(value = 1, id, cid = paste("extremity.exclusivity", ee.id.n1))],
+                    vars[type == "edge.indicator" & ref.or.alt == "ALT" & sign(sedge.id) == 1,
+                         .(value = 1, id, cid = paste("extremity.exclusivity", ee.id.n2))]
+                )
+
+                constraints = rbind(constraints, loose.constraints, edge.constraints, fill = TRUE)
+
+                loose.b = unique(loose.constraints[, .(cid, value = 1, sense = "L")], by = "cid")
+                edge.b = unique(edge.constraints[, .(cid, value = 1, sense = "L")], by = "cid")
+
+                b = rbind(b, edge.b, loose.b, fill = TRUE)
+
+                ## fix loose ends at zero if they coincide with a called junction
+                edge.ee.ids = unique(c(vars[type == "edge.indicator", ee.id.n1], vars[type == "edge.indicator", ee.id.n2]))
+                edge.ee.ids = edge.ee.ids[!is.na(edge.ee.ids)]
+
+                loose.zeros = rbind(
+                    vars[type == "loose.in.indicator" & sign(snode.id) == 1 & ee.id %in% edge.ee.ids,
+                         .(value = 1, id, cid = paste("extremity.exclusivity", ee.id))],
+                    vars[type == "loose.out.indicator" & sign(snode.id) == 1 & ee.id %in% edge.ee.ids,
+                         .(value = 1, id, cid = paste("extremity.exclusivity", ee.id))]
+                )
+
+                loose.zeros.rhs = unique(loose.zeros[, .(cid, value = 0, sense = "E")], by = "cid")
+
+                constraints = rbind(constraints, loose.zeros, fill = TRUE)
+                b = rbind(b, loose.zeros.rhs, fill = TRUE)
+            }
+        }
+
+        if (phased) {
+            ## homologous extremity exclusivity
+            ## this is actually redundant with previous constraints
+
+            if (ism) {
+                loose.constraints = rbind(
+                    vars[type == "loose.in.indicator" & sign(snode.id)==1 & telomeric == FALSE,
+                         .(value = 1, id, cid = paste("homol.extremity.exclusivity", hee.id))],
+                    vars[type == "loose.out.indicator" & sign(snode.id)==1 & telomeric == FALSE,
+                         .(value = 1, id, cid = paste("homol.extremity.exclusivity", hee.id))]
+                )
+
+                edge.constraints = rbind(
+                    vars[type == "edge.indicator" & ref.or.alt == "ALT" & sign(sedge.id)==1,
+                         .(value = 1, id, cid = paste("homol.extremity.exclusivity", hee.id.n1))],
+                    vars[type == "edge.indicator" & ref.or.alt == "ALT" & sign(sedge.id)==1,
+                         .(value = 1, id, cid = paste("homol.extremity.exclusivity", hee.id.n2))]
+                )
+
+                ## we should allow loose ends to violate this as some loose ends are germline
+                ## therefore, loose ends with id's not shared with edge constraints
+                loose.constraints = loose.constraints[(cid %in% edge.constraints[, cid])]
+
+                ## add these constraints to the existing table
+                constraints = rbind(constraints, loose.constraints, edge.constraints, fill = TRUE)
+
+                rhs = unique(rbind(
+                    vars[type == "loose.in.indicator" & sign(snode.id)==1 & telomeric == FALSE,
+                         .(value = 1, sense = "L", cid = paste("homol.extremity.exclusivity", hee.id))],
+                    vars[type == "loose.out.indicator" & sign(snode.id)==1 & telomeric == FALSE,
+                         .(value = 1, sense = "L", cid = paste("homol.extremity.exclusivity", hee.id))]
+                ), by = "cid")
+
+                rhs = rhs[cid %in% edge.constraints[, cid]]
+
+                b = rbind(b, rhs, fill = TRUE)
+
+                if (verbose) {
+                    message("Number of homologous extremity exclusivity constraints: ",
+                            nrow(rhs))
+                }
+
+                config.dt = vars[type == "straight.config" | type == "cross.config",]
+
+                config.constraints.lt = rbind(
+                    vars[type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "REF",
+                         .(value = -1, id, cid = paste("config lt", sedge.id))],
+                    vars[type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "REF",
+                         .(value = 1, id = config.dt$id[match(config.id, config.dt$config.id)],
+                           cid = paste("config lt", sedge.id))])
+
+                config.constraints.gt = rbind(
+                    vars[type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "REF",
+                         .(value = -1, id, cid = config.id)],
+                    vars[type == "straight.config" & sedge.id > 0 & ref.or.alt == "REF",
+                         .(value = 1, id, cid = config.id)],
+                    vars[type == "cross.config" & sedge.id > 0 & ref.or.alt == "REF",
+                         .(value = 1, id, cid = config.id)])
+
+                rhs = unique(rbind(
+                    config.constraints.lt[, .(cid, value = 0, sense = "G")],
+                    config.constraints.gt[, .(cid, value = 0, sense = "L")]),
+                    by = "cid")
+
+                constraints = rbind(constraints, config.constraints.lt, config.constraints.gt, fill = TRUE)
+                b = rbind(b, rhs, fill = TRUE)
+
+                ## implement reciprocal homologous extremity exclusivity
+                straight.config.dt = vars[type == "straight.config",]
+                cross.config.dt = vars[type == "cross.config",]
+
+                rhomol.constraints = rbind(
+                    ## corresponding cross indicator
+                    vars[type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "REF" & connection == "straight",
+                         .(value = 1, id = cross.config.dt$id[match(og.edge.id, cross.config.dt$og.edge.id)],
+                           cid = paste("rhee", sedge.id))],
+
+                    ## corresponding cross indicator
+                    vars[type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "REF" & connection == "cross",
+                         .(value = 1, id = straight.config.dt$id[match(og.edge.id, straight.config.dt$og.edge.id)],
+                           cid = paste("rhee", sedge.id))],
+
+                    ## actual ALT edges
+                    vars[(!cnloh == TRUE) & type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "ALT" & !is.na(s1),
+                         .(value = 1, id, cid = paste("rhee", s1))],
+                    vars[(!cnloh == TRUE) & type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "ALT" & !is.na(s2),
+                         .(value = 1, id, cid = paste("rhee", s2))],
+                    vars[(!cnloh == TRUE) & type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "ALT" & !is.na(s3),
+                         .(value = 1, id, cid = paste("rhee", s3))],
+                    vars[(!cnloh == TRUE) & type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "ALT" & !is.na(s4),
+                         .(value = 1, id, cid = paste("rhee", s4))],
+                    vars[(!cnloh == TRUE) & type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "ALT" & !is.na(c1),
+                         .(value = 1, id, cid = paste("rhee", c1))],
+                    vars[(!cnloh == TRUE) & type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "ALT" & !is.na(c2),
+                         .(value = 1, id, cid = paste("rhee", c2))],
+                    vars[(!cnloh == TRUE) & type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "ALT" & !is.na(c3),
+                         .(value = 1, id, cid = paste("rhee", c3))],
+                    vars[(!cnloh == TRUE) & type == "edge.indicator" & sedge.id > 0 & ref.or.alt == "ALT" & !is.na(c4),
+                         .(value = 1, id, cid = paste("rhee", c4))],
+
+                    ## loose indicators
+                    vars[type == "loose.in.indicator" & snode.id > 0 & !is.na(s) & telomeric == FALSE,
+                         .(value = 1, id, cid = paste("rhee", s))],
+                    vars[type == "loose.in.indicator" & snode.id > 0 & !is.na(c) & telomeric == FALSE,
+                         .(value = 1, id, cid = paste("rhee", c))],
+                    vars[type == "loose.out.indicator" & snode.id > 0 & !is.na(s) & telomeric == FALSE,
+                         .(value = 1, id, cid = paste("rhee", s))],
+                    vars[type == "loose.out.indicator" & snode.id > 0 & !is.na(c) & telomeric == FALSE,
+                         .(value = 1, id, cid = paste("rhee", c))]
+                )
+
+                rhs = unique(rhomol.constraints[, .(value = 2, sense = "L", cid)], by = "cid")
+
+                if (verbose) {
+                    message("Number of reciprocal homologous constraints: ", nrow(rhs))
+                }
+
+                constraints = rbind(constraints, rhomol.constraints, fill = TRUE)
+                b = rbind(b, rhs, fill = TRUE)
+
+            }
+
+            ## add the edge indicator sum constraints (ISM consistency)
+            iconstraints = unique(
+                vars[type == "edge.indicator" & ref.or.alt == "ALT",
+                     .(value = 1, id, og.edge.id,
+                       edge.id = abs(sedge.id),
+                       cid = paste("edge.indicator.sum.ub", og.edge.id))],
+                by = "edge.id"
+            )
+
+            constraints = rbind(
+                constraints,
+                iconstraints[, .(value, id, cid)],
+                fill = TRUE)
+
+            edge.indicator.b = unique(
+                vars[type == "edge.indicator" & ref.or.alt == "ALT",
+                     .(value = 1, sense = "L", cid = paste("edge.indicator.sum.ub", og.edge.id))],
+                by = "cid"
+            )
+
+            b = rbind(b, edge.indicator.b, fill = TRUE)
+
+            ## force major allele to have higher CN than minor allele
+            ## may not work for phased blocks
+            if (force.major) {
+
+                iconstraints = rbind(
+                    vars[type == "node" & allele == "major" & snode.id > 0,
+                         .(value = 1, id, cid = paste("force.major", og.node.id))],
+                    vars[type == "node" & allele == "minor" & snode.id > 0,
+                         .(value = -1, id, cid = paste("force.major", og.node.id))])
+
+                rhs = unique(vars[type == "node" & snode.id > 0 & allele == "major",
+                                  .(value = 0, sense = "G", cid = paste("force.major", og.node.id))],
+                             by = "cid")
+
+                constraints = rbind(constraints, iconstraints, fill = TRUE)
+                b = rbind(b, rhs, fill = TRUE)
+
+            }
+
+
+            ## force nonzero CN for ALT edges (because these have nonzero CN in original JaBbA output)
+            ## can become infeasible if original graph is not compatible with ISM
+            if (force.alt) {
+
+                if (ism) {
+                    warning("Forcing ALT edges while running ISM can make some problems infeasible!")
+                }
+
+                iconstraints = unique(
+                    vars[type == "edge.indicator" & ref.or.alt == "ALT" & cnloh != TRUE & sedge.id > 0,
+                         .(value = 1, id, og.edge.id,
+                           edge.id = abs(sedge.id),
+                           cid = paste("edge.indicator.sum.lb", og.edge.id))],
+                    by = "edge.id"
+                )
+
+                constraints = rbind(
+                    constraints,
+                    iconstraints[, .(value, id, cid)],
+                    fill = TRUE)
+
+                edge.indicator.b = unique(
+                    vars[type == "edge.indicator" & ref.or.alt == "ALT" & cnloh != TRUE & sedge.id > 0,
+                         .(value = 1, sense = "G", cid = paste("edge.indicator.sum.lb", og.edge.id))],
+                    by = "cid"
+                )
+
+                b = rbind(b, edge.indicator.b, fill = TRUE)
+            }
+        }
+
+
+        if (L0) ## add "big M" constraints
+        {
+            ## indicator constraints ie on ulids
+            iconstraints = rbind(
+                vars[type == 'loose.out', .(value = 1, id, ulid, cid = paste('loose.out.indicator.ub', ulid))],
+                vars[type == 'loose.in', .(value = 1, id, ulid, cid = paste('loose.in.indicator.ub', ulid))],
+                fill = TRUE)
+
+            ## add the matching indicator variables, matching to the cid from above
+            iconstraints = rbind(
+                iconstraints,
+                vars[type %in% c('loose.out.indicator', 'loose.in.indicator'), ][
+                    match(iconstraints$ulid, ulid), .(value = -M, id, cid = iconstraints$cid)],
+                fill = TRUE)
+
+            ## upper bounds "infinity" ie M if indicator positive, 0 otherwise
+            constraints = rbind(
+                constraints,
+                iconstraints,
+                fill = TRUE)
+
+        }
+
+        if (L0) ## add "big M" constraints
+        {
+            ## indicator constraints ie on ulids
+            iconstraints = rbind(
+                vars[type == 'loose.out', .(value = 1, id, ulid, cid = paste('loose.out.indicator.ub', ulid))],
+                vars[type == 'loose.in', .(value = 1, id, ulid, cid = paste('loose.in.indicator.ub', ulid))],
+                fill = TRUE)
+
+            ## add the matching indicator variables, matching to the cid from above
+            iconstraints = rbind(
+                iconstraints,
+                vars[type %in% c('loose.out.indicator', 'loose.in.indicator'), ][
+                    match(iconstraints$ulid, ulid), .(value = -M, id, cid = iconstraints$cid)],
+                fill = TRUE)
+
+            ## upper bounds "infinity" ie M if indicator positive, 0 otherwise
+            constraints = rbind(
+                constraints,
+                iconstraints,
+                fill = TRUE)
+
+            ## upper bound sense is 'L' i.e. less than because -M on left hand side
+            b = rbind(b,
+                      vars[type == 'loose.in', .(value = 0, sense = 'L', cid = paste('loose.in.indicator.ub', ulid))],
+                      vars[type == 'loose.out', .(value = 0, sense = 'L', cid = paste('loose.out.indicator.ub', ulid))],
+                      fill = TRUE)
+
+            ## lower bound 0.1 if indicator positive, 0 otherwise
+            iconstraints = rbind(
+                vars[type == 'loose.out', .(value = 1, id, ulid, cid = paste('loose.out.indicator.lb', ulid))],
+                vars[type == 'loose.in', .(value = 1, id, ulid, cid = paste('loose.in.indicator.lb', ulid))],
+                fill = TRUE)
+
+            ## add the matching indicator variables, matching to the cid from above
+            iconstraints = rbind(
+                iconstraints,
+                vars[type %in% c('loose.out.indicator', 'loose.in.indicator'), ][
+                    match(iconstraints$ulid, ulid), .(value = -.1, id, cid = iconstraints$cid)],
+                fill = TRUE)
+
+            ## upper bounds "infinity" ie M if indicator positive, 0 otherwise
+            constraints = rbind(
+                constraints,
+                iconstraints,
+                fill = TRUE)
+
+            ## lower bound sense is 'G' i.e. greater than because -M on left hand side
+            b = rbind(b,
+                      vars[type == 'loose.in', .(value = 0, sense = 'G', cid = paste('loose.in.indicator.lb', ulid))],
+                      vars[type == 'loose.out', .(value = 0, sense = 'G', cid = paste('loose.out.indicator.lb', ulid))],
+                      fill = TRUE)
+
+            if (loose.collapse)
+            {
+    ##################
+                ## loose indicator sum  = sum of indicators
+    ##################
+                iconstraints = rbind(
+                    vars[type == 'loose.out.indicator', .(value = 1, id, lid, cid = paste('loose.out.indicator.sum', lid))],
+                    vars[type == 'loose.in.indicator', .(value = 1, id, lid, cid = paste('loose.in.indicator.sum', lid))],
+                    fill = TRUE)
+
+                ## indicator sum is the sum of all indicators mapping to that loose end
+                iconstraints = rbind(
+                    iconstraints,
+                    unique(vars[type %in% c('loose.out.indicator.sum', 'loose.in.indicator.sum'), ][
+                        match(iconstraints$lid, lid), .(value = -1, id, lid, cid = iconstraints$cid)], by = 'lid'),
+                    fill = TRUE)
+
+                constraints = rbind(
+                    constraints,
+                    iconstraints,
+                    fill = TRUE)
+
+                b = rbind(b,
+                          vars[type == 'loose.in.indicator.sum', .(value = 0, sense = 'E', cid = paste('loose.in.indicator.sum', lid))],
+                          vars[type == 'loose.out.indicator.sum', .(value = 0, sense = 'E', cid = paste('loose.out.indicator.sum', lid))],
+                          fill = TRUE)
+
+    ##################
+                ## now we make new indicator variables on the sum of the individual loose end indicators
+                ## upper bound bound 0.1 if indicator positive, 0 otherwise
+    ##################
+
+                iconstraints = rbind(
+                    vars[type == 'loose.out.indicator.sum', .(value = 1, id, lid, cid = paste('loose.out.indicator.sum.indicator.ub', lid))],
+                    vars[type == 'loose.in.indicator.sum', .(value = 1, id, lid, cid = paste('loose.in.indicator.sum.indicator.ub', lid))],
+                    fill = TRUE)
+
+                ## add the matching indicator variables, matching to the cid from above
+                iconstraints = rbind(
+                    iconstraints,
+                    vars[type %in% c('loose.out.indicator.sum.indicator', 'loose.in.indicator.sum.indicator'), ][
+                        match(iconstraints$lid, lid), .(value = -M, id, lid, cid = iconstraints$cid)],
+                    fill = TRUE)
+
+                ## upper bounds "infinity" ie M if indicator positive, 0 otherwise
+                constraints = rbind(
+                    constraints,
+                    iconstraints,
+                    fill = TRUE)
+
+                ## upper bound sense is 'L' i.e. less than because -M on left hand side
+                b = rbind(b,
+                          vars[type == 'loose.in.indicator.sum', .(value = 0, sense = 'L', cid = paste('loose.in.indicator.sum.indicator.ub', lid))],
+                          vars[type == 'loose.out.indicator.sum', .(value = 0, sense = 'L', cid = paste('loose.out.indicator.sum.indicator.ub', lid))],
+                          fill = TRUE)
+
+                ## lower bound 0.1 if indicator positive, 0 otherwise
+                iconstraints = rbind(
+                    vars[type == 'loose.out.indicator.sum', .(value = 1, id, lid, cid = paste('loose.out.indicator.sum.indicator.lb', lid))],
+                    vars[type == 'loose.in.indicator.sum', .(value = 1, id, lid, cid = paste('loose.in.indicator.sum.indicator.lb', lid))],
+                    fill = TRUE)
+
+                ## add the matching indicator variables, matching to the cid from above
+                iconstraints = rbind(
+                    iconstraints,
+                    vars[type %in% c('loose.out.indicator.sum', 'loose.in.indicator.sum'), ][
+                        match(iconstraints$lid, lid), .(value = -.1, id, lid, cid = iconstraints$cid)],
+                    fill = TRUE)
+
+                ## upper bounds "infinity" ie M if indicator positive, 0 otherwise
+                constraints = rbind(
+                    constraints,
+                    iconstraints,
+                    fill = TRUE)
+
+                ## lower bound sense is 'G' i.e. greater than because -M on left hand side
+                b = rbind(b,
+                          vars[type == 'loose.in.indicator.sum', .(value = 0, sense = 'G', cid = paste('loose.in.indicator.sum.indicator.lb', lid))],
+                          vars[type == 'loose.out.indicator.sum', .(value = 0, sense = 'G', cid = paste('loose.out.indicator.sum.indicator.lb', lid))],
+                          fill = TRUE)
+
+            }
+        }
+
+
+        if (!is.null(marginal) && length(dmarginal))
+        {
+            ## match against nodes and store query.id as rid
+            ## this will be the constraint id that will allow us
+            ## to sum the appropriate nodes to constrain to the residual
+            ov = dmarginal[, c('cn', 'weight')] %*% gg$nodes$gr %>% gr2dt
+
+            ov[, rid := query.id]
+
+            constraints = rbind(
+                constraints,
+                rbind(
+                    ## match up vars and marginal by snode.id and populate coefficients
+                    merge.data.table(vars[type == 'node', !"rid"], ov, by = 'snode.id')[, .(value = 1, id , cid = paste('mresidual', rid))],
+                    ## the residual is the difference between the sum and marginal cn
+                    vars[type == 'mresidual' & rid %in% ov$rid, .(value = -1, id, cid = paste('mresidual', rid))],
+                    fill = TRUE),
+                fill = TRUE
+            )
+
+            b = rbind(b,
+                      vars[type == 'mresidual' & rid %in% ov$rid, .(value = cn, sense = 'E', cid = paste('mresidual', rid))],
+                      fill = TRUE)
+        }
+
+        if (!is.null(emarginal)) {
+
+            emconstraints = rbind(
+                vars[type == "edge", .(value = 1, id, cid = paste("emresidual", emarginal.id))],
+                vars[type == "emresidual", .(value = -1, id, cid = paste("emresidual", emarginal.id))]
+            )
+
+            constraints = rbind(constraints, emconstraints, fill = TRUE)
+
+            emb = vars[type == "emresidual", .(value = cn, sense = "E", cid = paste("emresidual", emarginal.id))]
+
+            b = rbind(emb, b, fill = TRUE)
+        }
+
+    ########
+        ## MAKE MATRICES
+    ########
+
+        ## now Rcplex time
+        ## remove any rows with b = NA
+        ## get rid of any constraints with NA values
+        keep.constraints = intersect(b[!is.na(value), cid], constraints[!is.na(value), cid])
+        b = b[cid %in% keep.constraints,]
+        constraints = constraints[cid %in% keep.constraints,]
+
+        ## convert constraints to integers
+        ucid = unique(b$cid)
+        b[, cid.char := cid]
+        b[, cid := cid %>% factor(ucid) %>% as.integer]
+        constraints[, cid.char := cid]
+        constraints[, cid := cid %>% factor(ucid) %>% as.integer]
+
+        pmt = match(ucid, b$cid.char) ## get right permutation
+        bvec = b[pmt, value]
+        sense = b[pmt, sense]
+        if (verbose) {
+            message("Unique cids (A): ", length(unique(constraints$cid)))
+            message("Unique cids (b): ", length(unique(b$cid)))
+            message("Number of variables: ", length(unique(constraints$id)))
+        }
+
+        ## create constraint matrix, Qmat, and cobj, lb, ub from vars and constraints  lambda = 10
+        Amat = sparseMatrix(constraints$cid, constraints$id, x = constraints$value, dims = c(length(ucid), nrow(vars)))
+        vars[is.na(weight), weight := 0]
+
+        if (verbose) {
+
+            message("bvec length: ", length(bvec))
+            message("Amat nrow: ", nrow(Amat))
+
+        }
+        if (any(ix <- is.infinite(vars$weight)))
+        {
+            warning('nodes with infinite weight, setting to 0, please check inputs')
+            vars[ix, weight := 0]
+        }
+        Qmat = vars[, weight * (type %in% c('nresidual', 'eresidual', 'mresidual'))] %>% as.numeric %>% Diagonal(x = .) %>% as('CsparseMatrix')
+
+        ## set lambda to 0 at terminal or other non NA nodes
+        vars[is.na(lambda), lambda := 0]
+
+
+        ## set cvec by multiplying global lambda by local lambda for non-terminal loose end
+        ## vars (or their indicators if L0 is TRUE)
+        if (L0)
+        {
+            if (loose.collapse)
+            {
+                cvec = lambda*(vars[, lambda*(type %in% c('loose.in.indicator.sum.indicator', 'loose.out.indicator.sum.indicator') & !terminal)] %>% as.numeric)
+                ## cvec = lambda*(vars[, lambda*(type %in% c('loose.in.indicator.sum.indicator', 'loose.out.indicator.sum.indicator', 'loose.in.indicator', 'loose.out.indicator') & !terminal)] %>% as.numeric)
+            }
+            else
+            {
+                cvec = lambda*(vars[, lambda * (type %in% c('loose.in.indicator', 'loose.out.indicator') & !terminal)] %>% as.numeric)
+            }
+        } else {
+            cvec = lambda*(vars[, lambda*(type %in% c('loose.in', 'loose.out') & !terminal)] %>% as.numeric)
+        }
+
+        ## message("CVEC: ", length(cvec))
+
+        if (length(indices <- which(vars[, type == "edge.indicator" & reward != 0])))
+        {
+            if (verbose) {
+            }
+            message('Applying reward')
+            ## grab edge indicator variables with reward
+            cvec[indices] = -vars$reward[indices]
+        }
+
+        if (lp) {
+            ## add weights of stuff
+            indices = which(vars$type %in% c("mdelta.plus", "mdelta.minus",
+                                             "ndelta.plus", "ndelta.minus",
+                                             "edelta.plus", "edelta.minus"))
+            wts = vars$weight[indices]
+            cvec[indices] = wts
+            Qmat = NULL ## no Q if solving LP
+        }
+
+        ## browser()
+        if (cnloh) {
+
+            if ("cnloh" %in% colnames(vars)) {
+                indices = which(vars$type == "edge.indicator" & !is.na(vars$cnloh) & vars$cnloh == TRUE)
+                cvec[indices] = lambda
+
+                message("Number of penalized CNLOH edges: ", length(indices))
+            }
+        }
+
+        ## check constraints of CNLOH
+        ## browser()
+        ## vars[type == "edge.indicator" & cnloh == TRUE]
+        ## vars[type == "edge.indicator" & cnloh == TRUE, .N, by = og.edge.id]
+
+
+        lb = vars$lb
+        ub = vars$ub
+
+        control = list(trace = ifelse(verbose>=2, 1, 0), tilim = tilim, epgap = epgap, round = 1, trelim = trelim, nodefileind = nodefileind, method = 4)
+
+        ## call our wrapper for CPLEX
+        if (use.gurobi) {
+
+            if (verbose) { message("Starting optimization with gurobi!") }
+
+            sol = run_gurobi(cvec = cvec,
+                             Amat = Amat,
+                             bvec = bvec,
+                             Qmat = Qmat,
+                             lb = lb,
+                             ub = ub,
+                             sense = sense,
+                             vtype = vars$vtype,
+                             objsense = "min",
+                             control = control)
+        } else {
+
+            if (verbose) { message("Starting optimization with CPLEX!") }
+
+            sol =  gGnome:::Rcplex2(cvec,
+                                    Amat,
+                                    bvec,
+                                    Qmat = Qmat,
+                                    lb = lb,
+                                    ub = ub,
+                                    sense = sense,
+                                    vtype = vars$vtype,
+                                    objsense = "min",
+                                    control = control,
+                                    tuning = FALSE)
+        }
+
+        vars$cvec = cvec
+        vars$x = sol$x
+
+        ## for debugging
+        ppc = function(x) (x %>% merge(vars, by = 'id') %>% merge(b, by = 'cid.char'))[, paste(paste(round(value.x, 1), '*', paste(type, gid, sep=  '_'), '(', signif(x, 2), ')', collapse = ' + '), ifelse(sense[1] == 'E', '=', ifelse(sense[1] == 'G', '>=', '<=')), round(value.y[1],2)), by = cid.char]
+
+        ppv = function(x) {tmp = x %>% merge(constraints, by = 'id'); constraints[cid %in% tmp$cid, ] %>% ppc}
+
+        .check = function(x) data.table(obs = sign(as.numeric(round(Amat %*% x - bvec))),
+                                        sense)
+        chk = .check(sol$x)
+
+        if (any(is.na(sol$x)))
+            stop('Rcplex did not converge or failed to find a solution, please run with verbose = 2 to get more detailed output')
+
+        if (chk[sense == 'E', any(obs != 0, na.rm = TRUE)] |
+            chk[sense == 'G', any(obs < 0, na.rm = TRUE)] |
+            chk[sense == 'L', any(obs > 0, na.rm = TRUE)])
+            stop('Constraint violation likely due to M parameter being too large for problem causing CPLEX numerical instability, consider lowering M parameter')
+
+        ##.obj = function(x) 0.5 * rbind(x) %*% Qmat %*% cbind(x) + cvec %*% x
+
+        ## mark haplotypes if phasing
+        if (phased) {
+            haplotypes.dt = vars[type == "haplotype" & snode.id > 0,
+                                 .(node.id = snode.id,
+                                   haplotype = ifelse(x == 1, "h1", "h2"),
+                                   col = ifelse(x == 1, alpha("red", 0.5), alpha("blue", 0.5)))]
+            gg$nodes[haplotypes.dt$node.id]$mark(haplotype = haplotypes.dt$haplotype)
+            gg$nodes[haplotypes.dt$node.id]$mark(col = haplotypes.dt$col)
+        }
+
+        ## update graph
+        nmark = vars[type == 'node', .(nid = abs(snode.id), cn = round(x))]
+        emark = vars[type == 'edge', .(eid = abs(sedge.id), cn = round(x))]
+
+        loosei = vars[type == 'loose.in' & snode.id>0, .(cn = round(x)), keyby = snode.id]
+        looseo = vars[type == 'loose.out' & snode.id>0, .(cn = round(x)), keyby = snode.id]
+
+        nodes = gg$nodes[loosei$snode.id] ## need to do this to use nodes active binding settings
+        nodes$loose.left = loosei$cn>0
+
+        nodes = gg$nodes[looseo$snode.id] ## need to do this to use nodes active binding settings
+        nodes$loose.right = looseo$cn>0
+
+        gg$nodes$mark(loose.cn.left = 0, loose.cn.right = 0)
+        gg$nodes[loosei$snode.id]$mark(loose.cn.left = loosei$cn)
+        gg$nodes[looseo$snode.id]$mark(loose.cn.right = looseo$cn)
+
+        ## cache old cn values
+        gg$nodes$mark(cn.old = gg$nodes$dt$cn)
+        gg$edges$mark(cn.old = gg$edges$dt$cn)
+        gg$nodes$mark(cn = NULL) ## reset to avoid weird type casting issue
+        gg$edges$mark(cn = NULL) ## reset to avoid weird type casting issue
+        gg$nodes[nmark$nid]$mark(cn = nmark$cn)
+        gg$edges[emark$eid]$mark(cn = emark$cn)
+        gg$set(y.field = 'cn')
+
+        gg$set(obj = sol$obj)
+        gg$set(status = sol$status)
+        gg$set(epgap = sol$epgap)
+        if (!use.gurobi) {
+            gg$set(code = readRDS(system.file('extdata', 'cplex_codes.rds', package="gGnome"))[.(sol$status), code])
+        }
+
+        if (verbose) {
+          message("CPLEX epgap ", sol$epgap, " with solution status ", gg$meta$code)
+        }
+
+        ##  fix loose ends
+        nodes = gg$nodes
+        nodes$loose.left = nodes$dt$loose.cn.left>0
+        nodes$loose.right = nodes$dt$loose.cn.right>0
+
+        ## if phased, mark edges with different colors to make it easier to visualize
+        if (phased) {
+            if (verbose) {
+                message("formatting phased graph...")
+            }
+            ## edge formatting
+            ref.edge.col = alpha("blue", 0.5)
+            alt.edge.col = alpha("red", 0.5)
+            ref.edge.lwd = 1.0
+            alt.edge.lwd = 1.0
+            edge.col = ifelse(gg$edges$dt$type == "REF", ref.edge.col, alt.edge.col)
+            edge.lwd = ifelse(gg$edges$dt$type == "REF", ref.edge.lwd, alt.edge.lwd)
+            gg$edges$mark(col = edge.col, lwd = edge.lwd)
+
+            ## mark zero cn edges
+            zero.cn.col = alpha("gray", 0.1)
+            zero.cn.lwd = 0.5
+            zero.cn.edges = which(gg$edges$dt$cn == 0)
+            gg$edges[zero.cn.edges]$mark(col = zero.cn.col, lwd = zero.cn.lwd)
+        } else {
+
+            ## edge formatting
+            ref.edge.col = alpha("blue", 0.2)
+            alt.edge.col = alpha("red", 0.4)
+            ref.edge.lwd = 0.5
+            alt.edge.lwd = 1.0
+            edge.col = ifelse(gg$edges$dt$type == "REF", ref.edge.col, alt.edge.col)
+            edge.lwd = ifelse(gg$edges$dt$type == "REF", ref.edge.lwd, alt.edge.lwd)
+            gg$edges$mark(col = edge.col, lwd = edge.lwd)
+
+            ## mark zero cn edges
+            zero.cn.col = alpha("gray", 0)
+            zero.cn.lwd = 0.5
+            zero.cn.edges = which(gg$edges$dt$cn == 0)
+            gg$edges[zero.cn.edges]$mark(col = zero.cn.col, lwd = zero.cn.lwd)
+        }
+
+        ## if nonintegral also return the offsets as graph metadata
+        ## maybe it will be useful
+        if (nonintegral) {
+            gg$set(meta = vars[type == "slush", .(chr, offset = x)])
+        }
+
+        if (debug) {
+            return(list(gg = gg, sol = sol))
+        }
+        return(gg)
+    }
 
     res = balance(binstats.gg,
                   lambda = opt$lambda,
diff --git a/bin/non_integer_balance.R b/bin/non_integer_balance.R
index 64d365f..041e2cd 100644
--- a/bin/non_integer_balance.R
+++ b/bin/non_integer_balance.R
@@ -41,8 +41,46 @@
     library(skitools)
     library(JaBbA)
     library(gGnome)
+    library(zitools)
     ## devtools::load_all("~/git/gGnome") ## noninteger capacity now added to master branch
-    devtools::load_all("~/git/zitools")
+
+    #' @name grab.hets
+    #' @title grab.hets
+    #'
+    #' @description
+    #'
+    #' returns allele gtrack given sites.txt from het pileup
+    #'
+    #' @param agt.fname (character) path to sites.txt
+    #' @param min.frac (numeric) between 0 and 1, min frequency in normal to count as het site
+    #' @param max.frac (numeric) between 0 and 1, max frequency in normal to count as het site
+    #'
+    #' @return allele gTrack
+    grab.hets = function(agt.fname = NULL,
+                        min.frac = 0.2,
+                        max.frac = 0.8)
+    {
+        if (is.null(agt.fname) || !file.exists(agt.fname)) {
+            stop("agt.fname does not exist")
+        }
+
+        ## prepare and filter
+        agt.dt = fread(agt.fname)[alt.frac.n > min.frac & alt.frac.n < max.frac,]
+        ## add major and minor
+        agt.dt[, which.major := ifelse(alt.count.t > ref.count.t, "alt", "ref")]
+        agt.dt[, major.count := ifelse(which.major == "alt", alt.count.t, ref.count.t)]
+        agt.dt[, minor.count := ifelse(which.major == "alt", ref.count.t, alt.count.t)]
+
+        ## melt the data frame
+        agt.melted = rbind(agt.dt[, .(seqnames, start, end, count = major.count, allele = "major")],
+                        agt.dt[, .(seqnames, start, end, count = minor.count, allele = "minor")]
+                        )
+
+        ## make GRanges
+        agt.gr = dt2gr(agt.melted[, .(seqnames, start, end, count, allele)])
+
+        return (agt.gr)
+    }
 
     #' @name grab.hets.from.maf
     #' @title grab.hets.from.maf
diff --git a/conf/igenomes.config b/conf/igenomes.config
index be21320..d1dac35 100644
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@@ -48,10 +48,10 @@ params {
         hapmap_sites                     = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/hapmap_3.3.b37.vcf.gz"
         pon_dryclean                     = "${params.mski_base}/dryclean/pon/hg19/fixed.detergent.rds"
         blacklist_coverage_jabba         = "${params.mski_base}/JaBbA/blacklist_coverage/hg19/maskA_re.rds"
-        gencode_fusions                  = "${params.mski_base}/fusions/gencode/hg19/gencode.bed"
+        gencode_fusions                  = "${params.mski_base}/fusions/hg19/gencode.v29lift37.annotation.nochr.rds"
         build_non_integer_balance        = "hg19"
         mask_non_integer_balance         = "${params.mski_base}/allelic_cn/non_integer_balance/hg19/mask_with_segdups.rds"
-        mask_lp_phased_balance           = "${params.mski_base}/allelic_cn/lp_phased_balance/maskA_re.rds"
+        mask_lp_phased_balance           = "${params.mski_base}/allelic_cn/lp_phased_balance/lp_phased_balance_maskA_re.rds"
 	}
 	'GATK.GRCh38' {
             fasta                        = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta"
@@ -95,7 +95,7 @@ params {
             blacklist_coverage_jabba     = "${params.mski_base}/JaBbA/blacklist_coverage/hg38/hg38.coverage.mask.rds"
             build_non_integer_balance    = "hg38"
             mask_non_integer_balance     = "${params.mski_base}/allelic_cn/non_integer_balance/hg38/mask_with_segdups.rds"
-            mask_lp_phased_balance       = "${params.mski_base}/allelic_cn/lp_phased_balance/maskA_re.rds"
+            mask_lp_phased_balance       = "${params.mski_base}/allelic_cn/lp_phased_balance/lp_phased_balance_maskA_re.rds"
 	}
 
         'GRCh37' {
diff --git a/conf/modules/alleic_cn.config b/conf/modules/alleic_cn.config
index a5e459b..0fe7d71 100644
--- a/conf/modules/alleic_cn.config
+++ b/conf/modules/alleic_cn.config
@@ -23,24 +23,6 @@ process {
         ]
     }
 
-    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:NON_INTEGER_BALANCE_WITH_GRIDSS' {
-        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
-        publishDir       = [
-                mode: params.publish_dir_mode,
-                path: { "${params.outdir}/alleic_cn/non_integer_balance_with_gridss/${meta.id}/" },
-                pattern: "*{.rds*,.command.*}"
-        ]
-    }
-
-    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:NON_INTEGER_BALANCE_WITH_SVABA' {
-        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
-        publishDir       = [
-                mode: params.publish_dir_mode,
-                path: { "${params.outdir}/alleic_cn/non_integer_balance_with_svaba/${meta.id}/" },
-                pattern: "*{.rds*,.command.*}"
-        ]
-    }
-
     withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:LP_PHASED_BALANCE' {
         ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
         publishDir       = [
@@ -49,22 +31,4 @@ process {
                 pattern: "*{.rds*,.command.*}"
         ]
     }
-
-    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:LP_PHASED_BALANCE_WITH_GRIDSS' {
-        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
-        publishDir       = [
-                mode: params.publish_dir_mode,
-                path: { "${params.outdir}/alleic_cn/lp_phased_balance_with_gridss/${meta.id}/" },
-                pattern: "*{.rds*,.command.*}"
-        ]
-    }
-
-    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:LP_PHASED_BALANCE_WITH_SVABA' {
-        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
-        publishDir       = [
-                mode: params.publish_dir_mode,
-                path: { "${params.outdir}/alleic_cn/lp_phased_balance_with_svaba/${meta.id}/" },
-                pattern: "*{.rds*,.command.*}"
-        ]
-    }
 }
diff --git a/conf/modules/allelic_cn.config b/conf/modules/allelic_cn.config
new file mode 100644
index 0000000..a5e459b
--- /dev/null
+++ b/conf/modules/allelic_cn.config
@@ -0,0 +1,70 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Available keys to override module options:
+        ext.args   = Additional arguments appended to command in module.
+        ext.args2  = Second set of arguments appended to command in module (multi-tool modules).
+        ext.args3  = Third set of arguments appended to command in module (multi-tool modules).
+        ext.prefix = File name prefix for output files.
+        ext.when   = When to run the module.
+----------------------------------------------------------------------------------------
+*/
+// ALLEIC_CN configs
+
+process {
+
+    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:NON_INTEGER_BALANCE' {
+        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
+        publishDir       = [
+                mode: params.publish_dir_mode,
+                path: { "${params.outdir}/alleic_cn/non_integer_balance/${meta.id}/" },
+                pattern: "*{.rds*,.command.*}"
+        ]
+    }
+
+    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:NON_INTEGER_BALANCE_WITH_GRIDSS' {
+        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
+        publishDir       = [
+                mode: params.publish_dir_mode,
+                path: { "${params.outdir}/alleic_cn/non_integer_balance_with_gridss/${meta.id}/" },
+                pattern: "*{.rds*,.command.*}"
+        ]
+    }
+
+    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:NON_INTEGER_BALANCE_WITH_SVABA' {
+        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
+        publishDir       = [
+                mode: params.publish_dir_mode,
+                path: { "${params.outdir}/alleic_cn/non_integer_balance_with_svaba/${meta.id}/" },
+                pattern: "*{.rds*,.command.*}"
+        ]
+    }
+
+    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:LP_PHASED_BALANCE' {
+        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
+        publishDir       = [
+                mode: params.publish_dir_mode,
+                path: { "${params.outdir}/alleic_cn/lp_phased_balance/${meta.id}/" },
+                pattern: "*{.rds*,.command.*}"
+        ]
+    }
+
+    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:LP_PHASED_BALANCE_WITH_GRIDSS' {
+        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
+        publishDir       = [
+                mode: params.publish_dir_mode,
+                path: { "${params.outdir}/alleic_cn/lp_phased_balance_with_gridss/${meta.id}/" },
+                pattern: "*{.rds*,.command.*}"
+        ]
+    }
+
+    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:LP_PHASED_BALANCE_WITH_SVABA' {
+        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
+        publishDir       = [
+                mode: params.publish_dir_mode,
+                path: { "${params.outdir}/alleic_cn/lp_phased_balance_with_svaba/${meta.id}/" },
+                pattern: "*{.rds*,.command.*}"
+        ]
+    }
+}
diff --git a/main.nf b/main.nf
index 2f13b12..5a41047 100644
--- a/main.nf
+++ b/main.nf
@@ -60,6 +60,10 @@ params.build_dryclean        = WorkflowMain.getGenomeAttribute(params, 'build_dr
 params.hapmap_sites          = WorkflowMain.getGenomeAttribute(params, 'hapmap_sites')
 params.pon_dryclean          = WorkflowMain.getGenomeAttribute(params, 'pon_dryclean')
 params.blacklist_coverage_jabba     = WorkflowMain.getGenomeAttribute(params, 'blacklist_coverage_jabba')
+params.gencode_fusions     = WorkflowMain.getGenomeAttribute(params, 'gencode_fusions')
+params.build_non_integer_balance     = WorkflowMain.getGenomeAttribute(params, 'build_non_integer_balance')
+params.mask_non_integer_balance     = WorkflowMain.getGenomeAttribute(params, 'mask_non_integer_balance')
+params.mask_lp_phased_balance     = WorkflowMain.getGenomeAttribute(params, 'mask_lp_phased_balance')
 //params.blacklist_junctions_jabba     = WorkflowMain.getGenomeAttribute(params, 'blacklist_junctions_jabba')
 
 /*
diff --git a/modules/local/allelic_cn/main.nf b/modules/local/allelic_cn/main.nf
index d96241d..46e8d21 100644
--- a/modules/local/allelic_cn/main.nf
+++ b/modules/local/allelic_cn/main.nf
@@ -4,8 +4,8 @@ process NON_INTEGER_BALANCE {
     label 'process_medium'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'docker://mskilab/allelic_cn:latest':
-        'mskilab/allelic_cn:latest' }"
+        '/gpfs/commons/groups/imielinski_lab/home/sdider/Projects/nf-jabba/tests/test_runs/work/singularity/jabba_cplex_latest.sif':
+        'mskilab/jabba:latest' }"
 
     input:
     tuple val(meta), path(jabba_rds), path(decomposed_cov), path(het_pileups_wgs)
@@ -23,6 +23,8 @@ process NON_INTEGER_BALANCE {
     val(tilim)
     val(gurobi)
     path(fasta)     // path to decoy fasta
+    path(fasta_fai)     // path to decoy fasta
+    path(bwa_index)
     val(pad)
 
     output:
@@ -37,9 +39,11 @@ process NON_INTEGER_BALANCE {
     def args        = task.ext.args ?: ''
     def prefix      = task.ext.prefix ?: "${meta.id}"
     def id          = "${meta.sample}"
+    def bwa = bwa_index ? "ln -nfs \$(readlink -f ${bwa_index})/* \$(dirname \$(readlink -f $fasta))/" : ""
     def VERSION    = '0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
 
     """
+    ${bwa}
 
     export RSCRIPT_PATH=\$(echo "${baseDir}/bin/non_integer_balance.R")
 
@@ -49,12 +53,12 @@ process NON_INTEGER_BALANCE {
         --cov $decomposed_cov \\
         --field $field \\
         --hets $het_pileups_wgs \\
-        --hets-thresh $hets_thresh \\
+        --hets_thresh $hets_thresh \\
         --mask $mask \\
         --overwrite $overwrite \\
         --lambda $lambda \\
         --allin $allin \\
-        --fix_thresh $fix_thres \\
+        --fix_thresh $fix_thresh \\
         --nodebounds $nodebounds \\
         --ism $ism \\
         --build $build \\
@@ -85,12 +89,12 @@ process NON_INTEGER_BALANCE {
 
 process LP_PHASED_BALANCE {
 
-    tag "$id"
+    tag "$meta.id"
     label 'process_medium'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'docker://mskilab/allelic_cn:latest':
-        'mskilab/allelic_cn:latest' }"
+        '/gpfs/commons/groups/imielinski_lab/home/sdider/Projects/nf-jabba/tests/test_runs/work/singularity/jabba_cplex_latest.sif':
+        'mskilab/jabba:latest' }"
 
     input:
     tuple val(meta), path(hets_gg), path(hets) // output from non_integer_balance, sites.txt from hetpileups
@@ -123,7 +127,9 @@ process LP_PHASED_BALANCE {
     def VERSION = '0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
 
     """
-    Rscript \${baseDir}/bin/lp_phased_balance.R \\
+    export RSCRIPT_PATH=\$(echo "${baseDir}/bin/lp_phased_balance.R")
+
+    Rscript \$RSCRIPT_PATH \\
         --id $id \\
         --jab $hets_gg \\
         --hets $hets \\
@@ -146,7 +152,7 @@ process LP_PHASED_BALANCE {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        non_integer_balance: ${VERSION}
+        lp_phased_balance: ${VERSION}
     END_VERSIONS
     """
 }
diff --git a/modules/local/ascat/main.nf b/modules/local/ascat/main.nf
index d37999f..40db238 100644
--- a/modules/local/ascat/main.nf
+++ b/modules/local/ascat/main.nf
@@ -8,7 +8,7 @@ process ASCAT_SEG {
         'mskilab/ascat_seg:latest' }"
 
     input:
-    tuple val(meta), path(hets), path(cbs_cov)                       // channel: [mandatory] [ meta, hets ]
+    tuple val(meta), path(hets), path(cbs_cov)                       // channel: [mandatory] [ meta, hets, cbs_cov ]
     val(field)                                                       // channel: [mandatory] "foreground" for dryclean/ "ratio"
     val(hets_thresh)                                                 // channel: cutoff for hetpileups; default=0.2
     val(penalty)                                                     // channel: penalty for ASCAT; default=70
diff --git a/modules/local/cbs/main.nf b/modules/local/cbs/main.nf
index d1d36a0..7dd788d 100644
--- a/modules/local/cbs/main.nf
+++ b/modules/local/cbs/main.nf
@@ -29,7 +29,6 @@ process CBS {
     def VERSION     = '0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
 
     """
-    #RSCRIPT_PATH=\$(if [[ ${workflow.containerEngine} == "singularity" && !task.ext.singularity_pull_docker_container ]]; then echo "/cbsFH.R"; else echo "\${baseDir}/bin/cbsFH.R"; fi)
     export RSCRIPT_PATH=\$(echo "${baseDir}/bin/cbsFH.R")
     Rscript \$RSCRIPT_PATH \\
         -t ${tumor_dryclean_cov} \
diff --git a/modules/local/dryclean/main.nf b/modules/local/dryclean/main.nf
index 3576265..f643540 100644
--- a/modules/local/dryclean/main.nf
+++ b/modules/local/dryclean/main.nf
@@ -3,14 +3,14 @@ process DRYCLEAN {
     label 'process_medium'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'docker://mskilab/dryclean:0.0.2':
-        'mskilab/dryclean:0.0.2' }"
+        'docker://mskilab/dryclean:0.0.4':
+        'mskilab/dryclean:0.0.4' }"
 
 
     input:
     tuple val(meta), path(input)
     path(pon)
-    val(centered)
+    val(center)
     val(cbs)
     val(cnsignif)
     val(wholeGenome)
@@ -18,7 +18,6 @@ process DRYCLEAN {
     val(blacklist_path)
     val(germline_filter)
     val(germline_file)
-    val(human)
     val(field)
     val(build)
 
@@ -33,7 +32,7 @@ process DRYCLEAN {
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
-    def VERSION = '0.0.2'
+    def VERSION = '0.0.4'
     """
     #!/bin/bash
     set -o allexport
@@ -70,7 +69,7 @@ process DRYCLEAN {
     CMD="Rscript \$drycln \\
         --input             ${input} \\
         --pon               ${pon} \\
-        --center            ${centered} \\
+        --center            ${center} \\
         --cbs               ${cbs} \\
         --cnsignif          ${cnsignif} \\
         --cores             ${task.cpus} \\
@@ -79,7 +78,6 @@ process DRYCLEAN {
         --blacklist_path    ${blacklist_path} \\
         --germline.filter   ${germline_filter} \\
         --germline.file     ${germline_file} \\
-        --human             ${human} \\
         --field             ${field} \\
         --build             ${build} \\
     "
@@ -100,8 +98,6 @@ process DRYCLEAN {
     """
 
     stub:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
     """
     touch drycleaned.cov.rds
     """
diff --git a/modules/local/fusions/main.nf b/modules/local/fusions/main.nf
index c4d3700..9879e86 100644
--- a/modules/local/fusions/main.nf
+++ b/modules/local/fusions/main.nf
@@ -27,12 +27,13 @@ process FUSIONS {
 
     """
 
-    export RSCRIPT_PATH=\$(echo "${baseDir}/bin/fusions.R")
+    export RSCRIPT_PATH=\$(echo "${baseDir}/bin/Fusions.R")
 
     Rscript \$RSCRIPT_PATH \\
 	--id $id \\
 	--gGraph $gGraph \\
 	--gencode $gencode \\
+    --cores ${task.cpus}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/gridss/gridss/main.nf b/modules/local/gridss/gridss/main.nf
index 6856c7a..feb437e 100644
--- a/modules/local/gridss/gridss/main.nf
+++ b/modules/local/gridss/gridss/main.nf
@@ -10,10 +10,10 @@ process GRIDSS_GRIDSS {
 
 
     input:
-    tuple val(meta), path(normalbam, stageAs: "normal.bam"), path(normalbai, stageAs: "normal.bam.bai"), path(tumorbam, stageAs: "tumor.bam"), path(tumorbai, stageAs: "tumor.bam.bai")         // required: [meta, normal_cram, normal_crai, tumor_cram, tumor_crai]
-    path(fasta)                                                                               // required: reference fasta
+    tuple val(meta), path(normalbam, stageAs: "normal.bam"), path(normalbai, stageAs: "normal.bam.bai"), path(tumorbam, stageAs: "tumor.bam"), path(tumorbai, stageAs: "tumor.bam.bai")
+    path(fasta)
     path(fasta_fai)
-    path(bwa_index)                                                                           // required: bwa index folder
+    path(bwa_index)
     path(blacklist_gridss)                                                                    // optional: gridss blacklist bed file based on genome
 
 
diff --git a/nextflow.config b/nextflow.config
index 1253643..b5ccb28 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -60,7 +60,7 @@ params {
 
 	// fragCounter options
 	midpoint_frag              = "TRUE"                           // If TRUE only count midpoint if FALSE then count bin footprint of every fragment interval: Default=TRUE
-	windowsize_frag            = 200                            // Window / bin size : Default=200 (but dryclean uses 1000 binsize)
+	windowsize_frag            = 1000                            // Window / bin size : Default=200 (but dryclean uses 1000 binsize)
 	minmapq_frag               = 60                              // Minimal map quality : Default = 1
 	paired_frag                = "TRUE"                           // Is the dataset paired : Default = TRUE
 	exome_frag                 = "FALSE"	                        // Use exons as bins instead of fixed window : Default = FALSE
@@ -74,7 +74,6 @@ params {
     blacklist_path_dryclean              = "NA"
     germline_filter_dryclean             = "FALSE"
     germline_file_dryclean               = "NA"
-    human_dryclean                       = "TRUE"
     field_dryclean                       = "reads"
     //build_dryclean                       = "hg19"               // This should go inside igenomes.config
 
@@ -451,7 +450,14 @@ includeConfig 'conf/modules/hetpileups.config'
 // JaBbA configurations
 includeConfig 'conf/modules/jabba.config'
 
+// Events configurations
+includeConfig 'conf/modules/events.config'
 
+// Fusions configurations
+includeConfig 'conf/modules/fusions.config'
+
+// Allelic CN configurations
+includeConfig 'conf/modules/allelic_cn.config'
 
 // Function to ensure that resource requirements don't go beyond
 // a maximum limit
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 2eb9772..e4ac667 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -422,14 +422,6 @@
                     "help_text": "Path to file annotated with germline calls, if germline.filter == TRUE",
                     "default": "NA"
                 },
-                "human_dryclean": {
-                    "type": "string",
-                    "fa_icon": "fas fa-forward",
-                    "description": "Specify if the samples under consideration are human",
-                    "hidden": true,
-                    "help_text": "Specify if the samples under consideration are human",
-                    "default": "TRUE"
-                },
                 "field_dryclean": {
                     "type": "string",
                     "fa_icon": "fas fa-forward",
diff --git a/subworkflows/local/allelic_cn/main.nf b/subworkflows/local/allelic_cn/main.nf
index fbdd051..aed8299 100644
--- a/subworkflows/local/allelic_cn/main.nf
+++ b/subworkflows/local/allelic_cn/main.nf
@@ -18,6 +18,8 @@ workflow COV_GGRAPH_NON_INTEGER_BALANCE {
 	tilim_non_integer_balance
 	gurobi_non_integer_balance
 	fasta_non_integer_balance           // path to decoy fasta
+	fasta_fai_non_integer_balance       // path to decoy fasta.fai
+    bwa_non_integer_balance
 	pad_non_integer_balance
 
     main:
@@ -41,6 +43,8 @@ workflow COV_GGRAPH_NON_INTEGER_BALANCE {
         tilim_non_integer_balance,
         gurobi_non_integer_balance,
         fasta_non_integer_balance,
+        fasta_fai_non_integer_balance,
+        bwa_non_integer_balance,
         pad_non_integer_balance
     )
 
diff --git a/subworkflows/local/bam_fragCounter/main.nf b/subworkflows/local/bam_fragCounter/main.nf
index c40dc5a..7ddc5ed 100644
--- a/subworkflows/local/bam_fragCounter/main.nf
+++ b/subworkflows/local/bam_fragCounter/main.nf
@@ -33,15 +33,16 @@ workflow BAM_FRAGCOUNTER {
     versions          = FRAGCOUNTER.out.versions
     corrected_bw      = FRAGCOUNTER.out.corrected_bw
 
-    REBIN_RAW_FRAGCOUNTER(fragcounter_raw_cov, "reads", 1000)
+    // REBIN_RAW_FRAGCOUNTER(fragcounter_cov, "reads", 1000)
 
-    rebinned_raw_cov  = REBIN_RAW_FRAGCOUNTER.out.raw_fragcounter_cov_1kb
+    // rebinned_raw_cov  = REBIN_RAW_FRAGCOUNTER.out.raw_fragcounter_cov_1kb
+    // rebinned_raw_cov  = fragcounter_cov
 
     //
     emit:
     fragcounter_raw_cov
     fragcounter_cov
-    rebinned_raw_cov
+    // rebinned_raw_cov
     corrected_bw
 
     versions
diff --git a/subworkflows/local/cov_dryclean/main.nf b/subworkflows/local/cov_dryclean/main.nf
index 3b0c95b..af91097 100644
--- a/subworkflows/local/cov_dryclean/main.nf
+++ b/subworkflows/local/cov_dryclean/main.nf
@@ -9,7 +9,7 @@ workflow COV_DRYCLEAN {
     take:
     input_dryclean   // channel: [mandatory] [ meta, cov(.rds file) ]
     pon_dryclean
-    centered_dryclean
+    center_dryclean
     cbs_dryclean
     cnsignif_dryclean
     wholeGenome_dryclean
@@ -17,7 +17,6 @@ workflow COV_DRYCLEAN {
     blacklist_path_dryclean
     germline_filter_dryclean
     germline_file_dryclean
-    human_dryclean
     field_dryclean
     build_dryclean
 
@@ -26,10 +25,20 @@ workflow COV_DRYCLEAN {
     dryclean_cov      = Channel.empty()
     //dryclean_obj      = Channel.empty()
 
-    DRYCLEAN(input_dryclean, pon_dryclean, centered_dryclean, cbs_dryclean,
-    cnsignif_dryclean, wholeGenome_dryclean, blacklist_dryclean,
-    blacklist_path_dryclean, germline_filter_dryclean, germline_file_dryclean,
-    human_dryclean, field_dryclean, build_dryclean)
+    DRYCLEAN(
+        input_dryclean,
+        pon_dryclean,
+        center_dryclean,
+        cbs_dryclean,
+        cnsignif_dryclean,
+        wholeGenome_dryclean,
+        blacklist_dryclean,
+        blacklist_path_dryclean,
+        germline_filter_dryclean,
+        germline_file_dryclean,
+        field_dryclean,
+        build_dryclean
+    )
 
     dryclean_cov      = DRYCLEAN.out.decomposed_cov
     //dryclean_obj      = DRYCLEAN.out.dryclean_object
diff --git a/tests/modules/local/dryclean/main.nf.test b/tests/modules/local/dryclean/main.nf.test
index 0bc76b7..92f4905 100644
--- a/tests/modules/local/dryclean/main.nf.test
+++ b/tests/modules/local/dryclean/main.nf.test
@@ -27,9 +27,8 @@ nextflow_process {
                 input[7] = params.blacklist_path_dryclean
                 input[8] = params.germline_filter_dryclean
                 input[9] = params.germline_file_dryclean
-                input[10] = params.human_dryclean
-                input[11] = params.field_dryclean
-                input[12] = params.genomes['GATK.GRCh37'].build_dryclean
+                input[10] = params.field_dryclean
+                input[11] = params.genomes['GATK.GRCh37'].build_dryclean
                 """
             }
         }
diff --git a/tests/nextflow.config b/tests/nextflow.config
index 0cd3aa9..6c7e174 100644
--- a/tests/nextflow.config
+++ b/tests/nextflow.config
@@ -65,13 +65,13 @@ params {
 
 	// fragCounter options
 	midpoint_frag              = "TRUE"                           // If TRUE only count midpoint if FALSE then count bin footprint of every fragment interval: Default=TRUE
-	windowsize_frag            = 200                              // Window / bin size : Default=200 (but dryclean uses 1000 binsize)
+	windowsize_frag            = 1000                              // Window / bin size : Default=200 (but dryclean uses 1000 binsize)
 	minmapq_frag               = 60                               // Minimal map quality : Default = 1
 	paired_frag                = "TRUE"                           // Is the dataset paired : Default = TRUE
 	exome_frag                 = "FALSE"	                      // Use exons as bins instead of fixed window : Default = FALSE
 
     // Dryclean options
-    centered_dryclean                    = "TRUE"
+    center_dryclean                    = "TRUE"
     cbs_dryclean                         = "FALSE"
     cnsignif_dryclean                    = 0.00001
     wholeGenome_dryclean                 = "FALSE"
@@ -79,8 +79,7 @@ params {
     blacklist_path_dryclean              = "NA"
     germline_filter_dryclean             = "FALSE"
     germline_file_dryclean               = "NA"
-    human_dryclean                       = "TRUE"
-    field_dryclean                       = "count"
+    field_dryclean                       = "reads"
 
 	// ASCAT options
 	field_ascat                         = "foreground"
@@ -241,8 +240,8 @@ params {
 
     genomes {
 	'GATK.GRCh37' {
-	    fasta                            = "${params.mski_base}/test_data/human_g1k_v37_decoy.small.fasta"
-	    fasta_fai                        = "${params.mski_base}/test_data/human_g1k_v37_decoy.small.fasta.fai"
+	    fasta                            = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta"
+	    fasta_fai                        = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai"
 	    chr_dir                          = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/Chromosomes"
 	    dict                             = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict"
 	    bwa                              = "${params.mski_base}/test_data/BWAIndex/"
@@ -270,9 +269,9 @@ params {
         indel_mask                       = "${params.mski_base}/SVABA/hg19/snowman_blacklist.bed"
         germ_sv_db                       = "${params.mski_base}/SVABA/hg19/snowman_germline_mini_160413.bed"
         simple_seq_db                    = "${params.mski_base}/SVABA/hg19/repeat_masker_hg19_Simple.bed"
-        blacklist_gridss                 = "${params.mski_base}/test_data/human_g1k_v37_decoy.small.fasta.bed"
+        blacklist_gridss                 = "${params.mski_base}/GRIDSS/blacklist/hg19/human_g1k_v37_decoy.fasta.bed"
         pon_gridss                       = "${params.mski_base}/GRIDSS/pon/hg19/"
-        gcmapdir_frag                    = "${params.mski_base}/test_data/gcMAP21/"
+        gcmapdir_frag                    = "${params.mski_base}/fragcounter/gcmapdir/hg19/"
         build_dryclean                   = 'hg19'
         hapmap_sites                     = "${params.mski_base}/test_data/hapmap_3.3.b37.vcf.gz"
         pon_dryclean                     = "${params.mski_base}/test_data/chr21_pon.rds"
diff --git a/workflows/nfjabba.nf b/workflows/nfjabba.nf
index ac0b0bc..f908ac0 100644
--- a/workflows/nfjabba.nf
+++ b/workflows/nfjabba.nf
@@ -50,7 +50,7 @@ def checkPathParamList = [
     params.pon_tbi,
     params.gcmapdir_frag,
     params.pon_dryclean,
-    params.blacklist_coverage_jabba
+    params.blacklist_coverage_jabba,
 ]
 
 def toolParamMap = [
@@ -318,13 +318,12 @@ paired_frag        = params.paired_frag        ?: Channel.empty()
 exome_frag         = params.exome_frag         ?: Channel.empty()                                                         // For fragCounter
 
 // Dryclean
-centered_dryclean           = params.centered_dryclean          ?: Channel.empty()
+center_dryclean           = params.centered_dryclean          ?: Channel.empty()
 cbs_dryclean                = params.cbs_dryclean               ?: Channel.empty()
 cnsignif_dryclean           = params.cnsignif_dryclean          ?: Channel.empty()
 wholeGenome_dryclean        = params.wholeGenome_dryclean       ?: Channel.empty()
 blacklist_dryclean          = params.blacklist_dryclean         ?: Channel.empty()
 germline_filter_dryclean    = params.germline_filter_dryclean   ?: Channel.empty()
-human_dryclean              = params.human_dryclean             ?: Channel.empty()
 field_dryclean              = params.field_dryclean             ?: Channel.empty()
 build_dryclean              = params.build_dryclean             ?: Channel.empty()
 
@@ -398,7 +397,7 @@ ism_lp_phased_balance = params.ism_lp_phased_balance ?: Channel.empty()
 epgap_lp_phased_balance = params.epgap_lp_phased_balance ?: Channel.empty()
 hets_thresh_lp_phased_balance = params.hets_thresh_lp_phased_balance ?: Channel.empty()
 min_bins_lp_phased_balance = params.min_bins_lp_phased_balance ?: Channel.empty()
-min_width_lp_phased_balance = params.min_width_lp_phased_balance ?: Channel.empty()
+min_width_lp_phased_balance = params.min_width_lp_phased_balance || params.min_width_lp_phased_balance == 0 ? params.min_width_lp_phased_balance : Channel.empty()
 trelim_lp_phased_balance = params.trelim_lp_phased_balance ?: Channel.empty()
 reward_lp_phased_balance = params.reward_lp_phased_balance ?: Channel.empty()
 nodefileind_lp_phased_balance = params.nodefileind_lp_phased_balance ?: Channel.empty()
@@ -674,7 +673,7 @@ workflow NFJABBA {
     boolean runJabba = false
     boolean runEvents = false
     boolean runFusions = false
-    boolean runAlleicCN = false
+    boolean runAllelicCN = false
 
     // Set flags based on params.step using a cascading approach
     // Fall through to the next case if the previous case is true
@@ -704,7 +703,7 @@ workflow NFJABBA {
         case 'fusions':
             runFusions = true
         case 'allelic_cn':
-            runAlleicCN = true
+            runAllelicCN = true
             break
         default:
             error "Invalid step: ${params.step}"
@@ -1163,10 +1162,10 @@ workflow NFJABBA {
 
         if (tools_used.contains('fragcounter')) {
             NORMAL_FRAGCOUNTER(bam_fragcounter_status.normal, midpoint_frag, windowsize_frag, gcmapdir_frag, minmapq_frag, paired_frag, exome_frag)
-            normal_frag_cov = Channel.empty().mix(NORMAL_FRAGCOUNTER.out.rebinned_raw_cov)
+            normal_frag_cov = Channel.empty().mix(NORMAL_FRAGCOUNTER.out.fragcounter_cov)
 
             TUMOR_FRAGCOUNTER(bam_fragcounter_status.tumor, midpoint_frag, windowsize_frag, gcmapdir_frag, minmapq_frag, paired_frag, exome_frag)
-            tumor_frag_cov = Channel.empty().mix(TUMOR_FRAGCOUNTER.out.rebinned_raw_cov)
+            tumor_frag_cov = Channel.empty().mix(TUMOR_FRAGCOUNTER.out.fragcounter_cov)
 
             // Only need one versions because its just one program (fragcounter)
             versions = versions.mix(NORMAL_FRAGCOUNTER.out.versions)
@@ -1243,7 +1242,7 @@ workflow NFJABBA {
             TUMOR_DRYCLEAN(
                     tumor_frag_cov,
                     pon_dryclean,
-                    centered_dryclean,
+                    center_dryclean,
                     cbs_dryclean,
                     cnsignif_dryclean,
                     wholeGenome_dryclean,
@@ -1251,7 +1250,6 @@ workflow NFJABBA {
                     blacklist_path_dryclean,
                     germline_filter_dryclean,
                     germline_file_dryclean,
-                    human_dryclean,
                     field_dryclean,
                     build_dryclean
                     )
@@ -1261,7 +1259,7 @@ workflow NFJABBA {
             NORMAL_DRYCLEAN(
                     normal_frag_cov,
                     pon_dryclean,
-                    centered_dryclean,
+                    center_dryclean,
                     cbs_dryclean,
                     cnsignif_dryclean,
                     wholeGenome_dryclean,
@@ -1269,7 +1267,6 @@ workflow NFJABBA {
                     blacklist_path_dryclean,
                     germline_filter_dryclean,
                     germline_file_dryclean,
-                    human_dryclean,
                     field_dryclean,
                     build_dryclean
                     )
@@ -1326,7 +1323,7 @@ workflow NFJABBA {
                             meta.patient        = cov[0]
                             meta.sex            = cov[1].sex
 
-                            [ meta, cov[2], hets[2] ]
+                            [ meta, hets[2], cov[2] ]
                     }
             }
 
@@ -1349,7 +1346,6 @@ workflow NFJABBA {
 
     if (runJabba) {
         if (tools_used.contains('jabba')) {
-
             if (params.step == 'jabba') {
                 // put all the inputs into a map for easier retrieval
                 jabba_input_map = input_sample
@@ -1433,7 +1429,9 @@ workflow NFJABBA {
 
             } else {
 
-                tumor_dryclean_cov_for_joining = tumor_dryclean_cov.map { meta, tumor_cov -> [meta.patient, meta, tumor_cov] }
+                meta_for_joining = tumor_dryclean_cov.map{ meta, tumor_cov -> [meta.patient, meta] } // can use any of the inputs to get the meta data
+
+                tumor_dryclean_cov_for_joining = tumor_dryclean_cov.map { meta, tumor_cov -> [meta.patient, tumor_cov] }
 
                 het_pileups_for_joining = sites_from_het_pileups_wgs.map { meta, hets -> [meta.patient, hets] }
 
@@ -1444,7 +1442,8 @@ workflow NFJABBA {
 
                 // join all previous outputs to be used as input for jabba
                 // excluding svs since they can come from either svaba or gridss
-                jabba_inputs = tumor_dryclean_cov_for_joining
+                jabba_inputs = meta_for_joining
+                    .join(tumor_dryclean_cov_for_joining)
                     .join(het_pileups_for_joining)
                     .join(ploidy_for_joining)
                     .join(cbs_seg_rds_for_joining)
@@ -1624,180 +1623,86 @@ workflow NFJABBA {
         }
     }
 
-    if (runAlleicCN) {
+    if (runAllelicCN) {
         if (tools_used.contains('allelic_cn')) {
             if (params.step == 'allelic_cn') {
                 non_integer_balance_inputs = input_sample.map{ meta, cov, hets, ggraph -> [ meta, ggraph, cov, hets ] }
-
-                NON_INTEGER_BALANCE(
-                    non_integer_balance_inputs,
-					field_non_integer_balance,
-					hets_thresh_non_integer_balance,
-                    mask_non_integer_balance,
-					overwrite_non_integer_balance,
-					lambda_non_integer_balance,
-					allin_non_integer_balance,
-					fix_thresh_non_integer_balance,
-					nodebounds_non_integer_balance,
-					ism_non_integer_balance,
-					build_non_integer_balance,
-					epgap_non_integer_balance,
-					tilim_non_integer_balance,
-					gurobi_non_integer_balance,
-                    fasta,
-					pad_non_integer_balance
-                )
-                versions = Channel.empty().mix(NON_INTEGER_BALANCE.out.versions)
-
-                non_integer_balance_balanced_gg = Channel.empty().mix(NON_INTEGER_BALANCE.out.non_integer_balance_balanced_gg)
-                non_integer_balance_hets_gg = Channel.empty().mix(NON_INTEGER_BALANCE.out.non_integer_balance_hets_gg)
-
-                non_integer_balance_balanced_gg_for_joining = non_integer_balance_balanced_gg.map{ meta, balanced -> [ meta.patient, meta, balanced ] }
-                hets_for_joining = input_sample.map{ meta, cov, hets, ggraph -> [ meta.patient, hets ] }
-                lp_phased_balance_inputs = non_integer_balance_hets_gg_for_joining.join(hets_for_joining)
-                    .map{ patient, meta, balanced, hets -> [ meta, balanced, hets ]
-                }
-
-                LP_PHASED_BALANCE(
-                    lp_phased_balance_inputs,
-                    lambda_lp_phased_balance,
-                    cnloh_lp_phased_balance,
-                    major_lp_phased_balance,
-                    allin_lp_phased_balance,
-                    marginal_lp_phased_balance,
-                    from_maf_lp_phased_balance,
-                    mask_lp_phased_balance,
-                    ism_lp_phased_balance,
-                    epgap_lp_phased_balance,
-                    hets_thresh_lp_phased_balance,
-                    min_bins_lp_phased_balance,
-                    min_width_lp_phased_balance,
-                    trelim_lp_phased_balance,
-                    reward_lp_phased_balance,
-                    nodefileind_lp_phased_balance,
-                    tilim_lp_phased_balance
-                )
-
-                lp_phased_balance_balanced_gg = Channel.empty().mix(LP_PHASED_BALANCE.out.lp_phased_balance_balanced_gg)
-                lp_phased_balance_binstats_gg = Channel.empty().mix(LP_PHASED_BALANCE.out.lp_phased_balance_binstats_gg)
-                lp_phased_balance_unphased_allelic_gg = Channel.empty().mix(LP_PHASED_BALANCE.out.lp_phased_balance_unphased_allelic_gg)
+                het_pileups_for_joining = input_sample.map{ meta, cov, hets, ggraph -> [ meta.patient, hets ] }
             } else {
                 if (tools_used.contains('gridss')) {
-                    jabba_rds_with_gridss_for_joining = jabba_rds_with_gridss.map{ meta, rds -> [ meta.patient, meta, rds ] }
-                    non_integer_balance_w_gridss_inputs = jabba_rds_with_gridss_for_joining
+                    jabba_rds_with_gridss_for_joining = jabba_rds_with_gridss.map{ meta, rds -> [ meta.patient, rds ] }
+                    non_integer_balance_inputs = meta_for_joining
+                        .join(jabba_rds_with_gridss_for_joining)
                         .join(tumor_dryclean_cov_for_joining)
                         .join(het_pileups_for_joining)
                         .map{ patient, meta, rds, cov, hets -> [ meta, rds, cov, hets ] }
 
-                    NON_INTEGER_BALANCE_WITH_GRIDSS(
-                        non_integer_balance_w_gridss_inputs,
-                        field_non_integer_balance,
-                        hets_thresh_non_integer_balance,
-                        mask_non_integer_balance,
-                        overwrite_non_integer_balance,
-                        lambda_non_integer_balance,
-                        allin_non_integer_balance,
-                        fix_thresh_non_integer_balance,
-                        nodebounds_non_integer_balance,
-                        ism_non_integer_balance,
-                        build_non_integer_balance,
-                        epgap_non_integer_balance,
-                        tilim_non_integer_balance,
-                        gurobi_non_integer_balance,
-                        fasta,
-                        pad_non_integer_balance
-                    )
-                    versions_w_gridss = Channel.empty().mix(NON_INTEGER_BALANCE_WITH_GRIDSS.out.versions)
-
-                    non_integer_balance_w_gridss_balanced_gg = Channel.empty().mix(NON_INTEGER_BALANCE_WITH_GRIDSS.out.non_integer_balance_balanced_gg)
-                    non_integer_balance_w_gridss_hets_gg = Channel.empty().mix(NON_INTEGER_BALANCE_WITH_GRIDSS.out.non_integer_balance_hets_gg)
-
-                    non_integer_balance_w_gridss_balanced_gg_for_joining = non_integer_balance_w_gridss_balanced_gg.map{ meta, balanced -> [ meta.patient, meta, balanced ] }
-                    lp_phased_balance_w_gridss_inputs = non_integer_balance_w_gridss_balanced_gg_for_joining.join(het_pileups_for_joining)
-                        .map{ patient, meta, balanced, hets -> [ meta, balanced, hets ] }
-
-                    LP_PHASED_BALANCE_WITH_GRIDSS(
-                        lp_phased_balance_w_gridss_inputs,
-                        lambda_lp_phased_balance,
-                        cnloh_lp_phased_balance,
-                        major_lp_phased_balance,
-                        allin_lp_phased_balance,
-                        marginal_lp_phased_balance,
-                        from_maf_lp_phased_balance,
-                        mask_lp_phased_balance,
-                        ism_lp_phased_balance,
-                        epgap_lp_phased_balance,
-                        hets_thresh_lp_phased_balance,
-                        min_bins_lp_phased_balance,
-                        min_width_lp_phased_balance,
-                        trelim_lp_phased_balance,
-                        reward_lp_phased_balance,
-                        nodefileind_lp_phased_balance,
-                        tilim_lp_phased_balance
-                    )
-
-                    lp_phased_balance_w_gridss_balanced_gg = Channel.empty().mix(LP_PHASED_BALANCE_WITH_GRIDSS.out.lp_phased_balance_balanced_gg)
-                    lp_phased_balance_w_gridss_binstats_gg = Channel.empty().mix(LP_PHASED_BALANCE_WITH_GRIDSS.out.lp_phased_balance_binstats_gg)
-                    lp_phased_balance_w_gridss_unphased_allelic_gg = Channel.empty().mix(LP_PHASED_BALANCE_WITH_GRIDSS.out.lp_phased_balance_unphased_allelic_gg)
                 }
                 if (tools_used.contains('svaba')) {
-                    jabba_rds_with_svaba_for_joining = jabba_rds_with_svaba.map{ meta, rds -> [ meta.patient, meta, rds ] }
-                    non_integer_balance_w_svaba_inputs = jabba_rds_with_svaba_for_joining
+                    jabba_rds_with_svaba_for_joining = jabba_rds_with_svaba.map{ meta, rds -> [ meta.patient, rds ] }
+                    non_integer_balance_inputs = meta_for_joining
+                        .join(jabba_rds_with_svaba_for_joining)
                         .join(tumor_dryclean_cov_for_joining)
                         .join(het_pileups_for_joining)
                         .map{ patient, meta, rds, cov, hets -> [ meta, rds, cov, hets ] }
-                    NON_INTEGER_BALANCE_WITH_SVABA(
-                        non_integer_balance_w_svaba_inputs,
-                        field_non_integer_balance,
-                        hets_thresh_non_integer_balance,
-                        mask_non_integer_balance,
-                        overwrite_non_integer_balance,
-                        lambda_non_integer_balance,
-                        allin_non_integer_balance,
-                        fix_thresh_non_integer_balance,
-                        nodebounds_non_integer_balance,
-                        ism_non_integer_balance,
-                        build_non_integer_balance,
-                        epgap_non_integer_balance,
-                        tilim_non_integer_balance,
-                        gurobi_non_integer_balance,
-                        fasta,
-                        pad_non_integer_balance
-                    )
-                    versions_w_svaba = Channel.empty().mix(NON_INTEGER_BALANCE_WITH_SVABA.out.versions)
-
-                    non_integer_balance_w_svaba_balanced_gg = Channel.empty().mix(NON_INTEGER_BALANCE_WITH_SVABA.out.non_integer_balance_balanced_gg)
-                    non_integer_balance_w_svaba_hets_gg = Channel.empty().mix(NON_INTEGER_BALANCE_WITH_SVABA.out.non_integer_balance_hets_gg)
-
-                    non_integer_balance_w_svaba_balanced_gg_for_joining = non_integer_balance_w_svaba_balanced_gg.map{ meta, balanced -> [ meta.patient, meta, balanced ] }
-                    lp_phased_balance_w_svaba_inputs = non_integer_balance_w_svaba_balanced_gg_for_joining.join(het_pileups_for_joining)
-                        .map{ patient, meta, balanced, hets -> [ meta, balanced, hets ] }
-
-                    LP_PHASED_BALANCE_WITH_SVABA(
-                        lp_phased_balance_w_svaba_inputs,
-                        lambda_lp_phased_balance,
-                        cnloh_lp_phased_balance,
-                        major_lp_phased_balance,
-                        allin_lp_phased_balance,
-                        marginal_lp_phased_balance,
-                        from_maf_lp_phased_balance,
-                        mask_lp_phased_balance,
-                        ism_lp_phased_balance,
-                        epgap_lp_phased_balance,
-                        hets_thresh_lp_phased_balance,
-                        min_bins_lp_phased_balance,
-                        min_width_lp_phased_balance,
-                        trelim_lp_phased_balance,
-                        reward_lp_phased_balance,
-                        nodefileind_lp_phased_balance,
-                        tilim_lp_phased_balance
-                    )
-
-                    lp_phased_balance_balanced_gg = Channel.empty().mix(LP_PHASED_BALANCE_WITH_SVABA.out.lp_phased_balance_balanced_gg)
-                    lp_phased_balance_binstats_gg = Channel.empty().mix(LP_PHASED_BALANCE_WITH_SVABA.out.lp_phased_balance_binstats_gg)
-                    lp_phased_balance_unphased_allelic_gg = Channel.empty().mix(LP_PHASED_BALANCE_WITH_SVABA.out.lp_phased_balance_unphased_allelic_gg)
                 }
             }
+
+            NON_INTEGER_BALANCE(
+                non_integer_balance_inputs,
+                field_non_integer_balance,
+                hets_thresh_non_integer_balance,
+                mask_non_integer_balance,
+                overwrite_non_integer_balance,
+                lambda_non_integer_balance,
+                allin_non_integer_balance,
+                fix_thresh_non_integer_balance,
+                nodebounds_non_integer_balance,
+                ism_non_integer_balance,
+                build_non_integer_balance,
+                epgap_non_integer_balance,
+                tilim_non_integer_balance,
+                gurobi_non_integer_balance,
+                fasta,
+                fasta_fai,
+                bwa,
+                pad_non_integer_balance
+            )
+            versions = Channel.empty().mix(NON_INTEGER_BALANCE.out.versions)
+
+            non_integer_balance_balanced_gg = Channel.empty().mix(NON_INTEGER_BALANCE.out.non_integer_balance_balanced_gg)
+            non_integer_balance_hets_gg = Channel.empty().mix(NON_INTEGER_BALANCE.out.non_integer_balance_hets_gg)
+
+            non_integer_balance_balanced_gg_for_joining = non_integer_balance_balanced_gg.map{ meta, balanced -> [ meta.patient, balanced ] }
+
+            lp_phased_balance_inputs = meta_for_joining
+                .join(non_integer_balance_balanced_gg_for_joining)
+                .join(het_pileups_for_joining)
+                .map{ patient, meta, balanced_gg, hets -> [ meta, balanced_gg, hets ] }
+
+            LP_PHASED_BALANCE(
+                lp_phased_balance_inputs,
+                lambda_lp_phased_balance,
+                cnloh_lp_phased_balance,
+                major_lp_phased_balance,
+                allin_lp_phased_balance,
+                marginal_lp_phased_balance,
+                from_maf_lp_phased_balance,
+                mask_lp_phased_balance,
+                ism_lp_phased_balance,
+                epgap_lp_phased_balance,
+                hets_thresh_lp_phased_balance,
+                min_bins_lp_phased_balance,
+                min_width_lp_phased_balance,
+                trelim_lp_phased_balance,
+                reward_lp_phased_balance,
+                nodefileind_lp_phased_balance,
+                tilim_lp_phased_balance
+                )
+
+            lp_phased_balance_balanced_gg = Channel.empty().mix(LP_PHASED_BALANCE.out.lp_phased_balance_balanced_gg)
+            lp_phased_balance_binstats_gg = Channel.empty().mix(LP_PHASED_BALANCE.out.lp_phased_balance_binstats_gg)
+            lp_phased_balance_unphased_allelic_gg = Channel.empty().mix(LP_PHASED_BALANCE.out.lp_phased_balance_unphased_allelic_gg)
         }
     }
 }

From c66190707e47b869cd753bb673057d319fb52a89 Mon Sep 17 00:00:00 2001
From: shihabdider <shihabdider@gmail.com>
Date: Wed, 8 May 2024 16:25:22 -0400
Subject: [PATCH 2/3] refactor: put allelic_cn before events calling

---
 bin/ascat_seg.R           |  20 +--
 bin/lp_phased_balance.R   |  85 ++++++-----
 bin/non_integer_balance.R |   6 +-
 tests/nextflow.config     |   2 +-
 workflows/nfjabba.nf      | 295 ++++++++++++--------------------------
 5 files changed, 152 insertions(+), 256 deletions(-)

diff --git a/bin/ascat_seg.R b/bin/ascat_seg.R
index 835a6df..dfe0024 100644
--- a/bin/ascat_seg.R
+++ b/bin/ascat_seg.R
@@ -1,6 +1,6 @@
 {
     library(optparse)
-    
+
     ## DO NOT FAIL SILENTLY
     options(error = function() {traceback(2); quit("no", 1)})
 
@@ -35,7 +35,7 @@
         opt = parse_args(parseobj)
 
         print(opt)
-        
+
         print(.libPaths())
         options(error=function()traceback(2))
 
@@ -59,7 +59,7 @@
     #'
     #' @param gg (gGraph) diploid gGraph (expect cn.low and cn.high as node metadata)
     #' @param verbose (logical) default FALSE
-    #' 
+    #'
     #' @return gGraph with field allele and cn
     diploid2haploid = function(gg, verbose = FALSE) {
         og.nodes.gr = gg$nodes$gr[, c("cn.low", "cn.high", "var.low", "var.high", "cn", "node.id", "nhets")]
@@ -286,7 +286,7 @@
                 ##                                                          log.p = T) +
                 ##                                                  pnbinom(y, mu = centers[tot.cn - j + 1],
                 ##                                                          size = centers[tot.cn - j + 1],
-                ##                                                          log.p = T))) 
+                ##                                                          log.p = T)))
                 ll = sapply(0:(floor(tot.cn/2)), function(j) sum(ppois(x,centers[j+1], log.p = T) +
                                                                 ppois(y,centers[tot.cn-j+1],log.p = T)))
                 ll = ll - min(ll)
@@ -300,7 +300,7 @@
                         ##                                                          log.p = T) +
                         ##                                                  pnbinom(y, mu = centers[tot.cn - j],
                         ##                                                          size = centers[tot.cn - j] / 2,
-                        ##                                                          log.p = T))) 
+                        ##                                                          log.p = T)))
                         ll = sapply(0:(floor((tot.cn - 1)/2)), function(j) sum(ppois(x,centers[j+1], log.p = T) +
                                                                     ppois(y,centers[tot.cn-j],log.p = T)))
                         ll = ll - min(ll)
@@ -315,7 +315,7 @@
                     ##                                                          log.p = T) +
                     ##                                                  pnbinom(y, mu = centers[tot.cn - j + 2],
                     ##                                                          size = centers[tot.cn - j + 2],
-                    ##                                                          log.p = T))) 
+                    ##                                                          log.p = T)))
                     ll = sapply(0:(floor((tot.cn + 1)/2)), function(j) sum(ppois(x,centers[j+1], log.p = T) +
                                                                             ppois(y,centers[tot.cn-j+2],log.p = T)))
                     ll = ll - min(ll)
@@ -363,7 +363,7 @@
 
         ## ## ###########
         ## ## phasing
-        ## ## ########### 
+        ## ## ###########
 
         ## ## iterate through all reference junctions and apply (wishful thinking) heuristic
         ## ##
@@ -561,7 +561,7 @@
     #' @param agt.fname (character) path to sites.txt
     #' @param min.frac (numeric) between 0 and 1, min frequency in normal to count as het site
     #' @param max.frac (numeric) between 0 and 1, max frequency in normal to count as het site
-    #' 
+    #'
     #' @return allele gTrack
     grab.hets = function(agt.fname = NULL,
                         min.frac = 0.2,
@@ -605,7 +605,7 @@
         if (!file.exists(agt.fname)) {
             stop("invalid file")
         }
-        
+
         gr = readRDS(agt.fname)
 
         if (!inherits(gr, "GRanges")) {
@@ -748,6 +748,8 @@
                             median(ratio, na.rm = T)] > 0.8,
                     "XX",
                     "XY")
+    # handle single chr case as XY
+    gender = ifelse(!is.na(gender), gender, "XY")
     message("The gender of this sample: ", gender)
 
     message("Starting ASCAT!!")
diff --git a/bin/lp_phased_balance.R b/bin/lp_phased_balance.R
index b3323e4..145635a 100644
--- a/bin/lp_phased_balance.R
+++ b/bin/lp_phased_balance.R
@@ -705,45 +705,51 @@
                                     dt2gr(hets.dt[, .(seqnames, start, end, strand = "*")]),
                                     return.type = "data.table")
 
-    nodes.ov.hets[, count := .N, by = query.id]
-
-    ## we should only segment relatively wide nodes with low copy number
-    nodes.ov.hets = nodes.ov.hets[count >= 100,]
-
-    nodes.to.segment = nodes.ov.hets[, unique(query.id)]
-
-    new.sl = seqlengths(gg)
-
-    segs = lapply(nodes.to.segment,
-                  function(qid) {
-                      message("Starting segmentation for : ", qid)
-                      hets.subset.dt = hets.dt[nodes.ov.hets[query.id == qid, subject.id],]
-                      cna = CNA(hets.subset.dt[, BAF],
-                                hets.subset.dt[, as.character(seqnames)],
-                                hets.subset.dt[, start],
-                                data.type = "logratio")
-                      seg = segment(smooth.CNA(cna),
-                                    alpha = 1e-5,
-                                    verbose = TRUE)
-                      utils::capture.output({seg_dt = print(seg); setDT(seg_dt)},
-                                            type = "output",
-                                            file = "/dev/null")
-                      out = seg2gr(seg_dt[!(is.na(seg.mean) | is.na(loc.start) | is.na(loc.end))], new.sl)
-                      out = gr.fix(out, new.sl, drop = T)
-                      ## get the number of hets per segment
-                      values(out)[, "nhets"] = out %N% hets.gr
-                      ## make sure the segment is on the order of high kbp
-                      out = out %Q% (nhets > 50)
-                      message("Number of segments: ", length(out))
-                      names(out) = NULL
-                      if (length(out) > 1) {
-                          return(gr.start(out[2:length(out)]))
-                      }
-                      return(GRanges())
-                      message("Finished!")
-                  })
-
-    segs = do.call(grbind, segs)
+    # handle case where nodes.ov.hets is empty
+    if (nrow(nodes.ov.hets) == 0) {
+        message("No nodes overlap with hets")
+        segs = NULL
+    } else {
+        nodes.ov.hets[, count := .N, by = query.id]
+
+        ## we should only segment relatively wide nodes with low copy number
+        nodes.ov.hets = nodes.ov.hets[count >= 100,]
+
+        nodes.to.segment = nodes.ov.hets[, unique(query.id)]
+
+        new.sl = seqlengths(gg)
+
+        segs = lapply(nodes.to.segment,
+                      function(qid) {
+                          message("Starting segmentation for : ", qid)
+                          hets.subset.dt = hets.dt[nodes.ov.hets[query.id == qid, subject.id],]
+                          cna = CNA(hets.subset.dt[, BAF],
+                                    hets.subset.dt[, as.character(seqnames)],
+                                    hets.subset.dt[, start],
+                                    data.type = "logratio")
+                          seg = segment(smooth.CNA(cna),
+                                        alpha = 1e-5,
+                                        verbose = TRUE)
+                          utils::capture.output({seg_dt = print(seg); setDT(seg_dt)},
+                                                type = "output",
+                                                file = "/dev/null")
+                          out = seg2gr(seg_dt[!(is.na(seg.mean) | is.na(loc.start) | is.na(loc.end))], new.sl)
+                          out = gr.fix(out, new.sl, drop = T)
+                          ## get the number of hets per segment
+                          values(out)[, "nhets"] = out %N% hets.gr
+                          ## make sure the segment is on the order of high kbp
+                          out = out %Q% (nhets > 50)
+                          message("Number of segments: ", length(out))
+                          names(out) = NULL
+                          if (length(out) > 1) {
+                              return(gr.start(out[2:length(out)]))
+                          }
+                          return(GRanges())
+                          message("Finished!")
+                      })
+
+        segs = do.call(grbind, segs)
+    }
 
     if (is.null(segs)) {
         message("No extra breakends")
@@ -789,6 +795,7 @@
         ##binstats.gg$edges$mark(reward = new.gg$edges$dt[match(binstats.gg$edges$dt$og.edge.id, edge.id), reward])
         binstats.gg$edges$mark(cnloh = new.gg$edges$dt[match(binstats.gg$edges$dt$og.edge.id, edge.id), cnloh])
         ## binstats.gg$edges$mark(cnloh = gg$edges$dt[match(binstats.gg$edges$dt$og.edge.id, edge.id), cnloh])
+        print('tracer2')
     } else {
         tmp = as.data.table(readRDS(opt$hets))
         tmp = rbind(tmp[, .(seqnames, start, end, strand = "*",
diff --git a/bin/non_integer_balance.R b/bin/non_integer_balance.R
index 041e2cd..4ea7192 100644
--- a/bin/non_integer_balance.R
+++ b/bin/non_integer_balance.R
@@ -154,7 +154,11 @@
     if ((opt$overwrite) | (!file.exists(binstats.gg.fn))) {
 
         message("Starting binstats")
-        binstats.gg = gGnome::binstats(jab, bins = cov, field = opt$field, lp = TRUE)
+        if (ncn.x == 0) { # handle no sex chr case by disabling loess
+            binstats.gg = gGnome::binstats(jab, bins = cov, field = opt$field, lp = TRUE, loess=FALSE)
+        } else {
+            binstats.gg = gGnome::binstats(jab, bins = cov, field = opt$field, lp = TRUE)
+        }
 
         ## save binstats
         saveRDS(binstats.gg, binstats.gg.fn)
diff --git a/tests/nextflow.config b/tests/nextflow.config
index 6c7e174..b16a731 100644
--- a/tests/nextflow.config
+++ b/tests/nextflow.config
@@ -278,7 +278,7 @@ params {
         blacklist_coverage_jabba         = "${params.mski_base}/JaBbA/blacklist_coverage/hg19/maskA_re.rds"
         build_non_integer_balance        = "hg19"
         mask_non_integer_balance         = "${params.mski_base}/allelic_cn/non_integer_balance/hg19/mask_with_segdups.rds"
-        mask_lp_phased_balance           = "${params.mski_base}/JaBbA/blacklist_coverage/hg19/maskA_re.rds"
+        mask_lp_phased_balance           = "${params.mski_base}/allelic_cn/lp_phased_balance/lp_phased_balance_maskA_re.rds"
 	}
 	'GATK.GRCh38' {
             fasta                        = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta"
diff --git a/workflows/nfjabba.nf b/workflows/nfjabba.nf
index f908ac0..171bcc9 100644
--- a/workflows/nfjabba.nf
+++ b/workflows/nfjabba.nf
@@ -521,27 +521,17 @@ include { COV_CBS as CBS                              } from '../subworkflows/lo
 
 // JaBbA
 include { COV_JUNC_JABBA as JABBA                     } from '../subworkflows/local/jabba/main'
-include { COV_JUNC_JABBA as JABBA_WITH_SVABA          } from '../subworkflows/local/jabba/main'
-include { COV_JUNC_JABBA as JABBA_WITH_GRIDSS         } from '../subworkflows/local/jabba/main'
 
 // Events
 include { GGRAPH_EVENTS as EVENTS                            } from '../subworkflows/local/events/main'
-include { GGRAPH_EVENTS as EVENTS_WITH_GRIDSS                } from '../subworkflows/local/events/main'
-include { GGRAPH_EVENTS as EVENTS_WITH_SVABA                 } from '../subworkflows/local/events/main'
 
 // Fusions
 include { GGRAPH_FUSIONS as FUSIONS                            } from '../subworkflows/local/fusions/main'
-include { GGRAPH_FUSIONS as FUSIONS_WITH_GRIDSS                } from '../subworkflows/local/fusions/main'
-include { GGRAPH_FUSIONS as FUSIONS_WITH_SVABA                 } from '../subworkflows/local/fusions/main'
 
 // Alleic CN
 include { COV_GGRAPH_NON_INTEGER_BALANCE as NON_INTEGER_BALANCE                            } from '../subworkflows/local/allelic_cn/main'
-include { COV_GGRAPH_NON_INTEGER_BALANCE as NON_INTEGER_BALANCE_WITH_GRIDSS                } from '../subworkflows/local/allelic_cn/main'
-include { COV_GGRAPH_NON_INTEGER_BALANCE as NON_INTEGER_BALANCE_WITH_SVABA                 } from '../subworkflows/local/allelic_cn/main'
 
 include { COV_GGRAPH_LP_PHASED_BALANCE as LP_PHASED_BALANCE                            } from '../subworkflows/local/allelic_cn/main'
-include { COV_GGRAPH_LP_PHASED_BALANCE as LP_PHASED_BALANCE_WITH_GRIDSS                } from '../subworkflows/local/allelic_cn/main'
-include { COV_GGRAPH_LP_PHASED_BALANCE as LP_PHASED_BALANCE_WITH_SVABA                 } from '../subworkflows/local/allelic_cn/main'
 
 
 /*
@@ -671,9 +661,9 @@ workflow NFJABBA {
     boolean runDryClean = false
     boolean runAscat = false
     boolean runJabba = false
+    boolean runAllelicCN = false
     boolean runEvents = false
     boolean runFusions = false
-    boolean runAllelicCN = false
 
     // Set flags based on params.step using a cascading approach
     // Fall through to the next case if the previous case is true
@@ -698,12 +688,12 @@ workflow NFJABBA {
             runAscat = true
         case 'jabba':
             runJabba = true
+        case 'allelic_cn':
+            runAllelicCN = true
         case 'events':
             runEvents = true
         case 'fusions':
             runFusions = true
-        case 'allelic_cn':
-            runAllelicCN = true
             break
         default:
             error "Invalid step: ${params.step}"
@@ -1387,46 +1377,6 @@ workflow NFJABBA {
                         ]
                     }
 
-                JABBA(
-                    jabba_inputs
-                    blacklist_junctions_jabba,
-                    geno_jabba,
-                    indel_jabba,
-                    tfield_jabba,
-                    iter_jabba,
-                    rescue_window_jabba,
-                    rescue_all_jabba,
-                    nudgebalanced_jabba,
-                    edgenudge_jabba,
-                    strict_jabba,
-                    allin_jabba,
-                    field_jabba,
-                    maxna_jabba,
-                    blacklist_coverage_jabba,
-                    purity_jabba,
-                    pp_method_jabba,
-                    cnsignif_jabba,
-                    slack_jabba,
-                    linear_jabba,
-                    tilim_jabba,
-                    epgap_jabba,
-                    fix_thres_jabba,
-                    lp_jabba,
-                    ism_jabba,
-                    filter_loose_jabba,
-                    gurobi_jabba,
-                    verbose_jabba
-                )
-
-                jabba_rds           = Channel.empty().mix(JABBA.out.jabba_rds)
-                jabba_gg            = Channel.empty().mix(JABBA.out.jabba_gg)
-                jabba_vcf           = Channel.empty().mix(JABBA.out.jabba_vcf)
-                jabba_raw_rds       = Channel.empty().mix(JABBA.out.jabba_raw_rds)
-                opti                = Channel.empty().mix(JABBA.out.opti)
-                jabba_seg           = Channel.empty().mix(JABBA.out.jabba_seg)
-                karyograph          = Channel.empty().mix(JABBA.out.karyograph)
-                versions            = versions.mix(JABBA.out.versions)
-
             } else {
 
                 meta_for_joining = tumor_dryclean_cov.map{ meta, tumor_cov -> [meta.patient, meta] } // can use any of the inputs to get the meta data
@@ -1449,14 +1399,14 @@ workflow NFJABBA {
                     .join(cbs_seg_rds_for_joining)
                     .join(cbs_nseg_rds_for_joining)
 
-                if (tools_used.contains('svaba')) {
+                if (tools_used.contains('gridss')) {
 
-                    vcf_from_sv_calling_for_joining = vcf_from_sv_calling.map{ meta, juction -> [ meta.patient, junction ] }
-                    unfiltered_som_sv_for_joining = j_supp.map{ meta, j_supp -> [ meta.patient, j_supp ] }
+                    vcf_from_sv_calling_gridss_for_joining = vcf_from_sv_calling_gridss.map{ meta, junction -> [ meta.patient, junction ] }
+                    unfiltered_som_sv_gridss_for_joining = unfiltered_som_sv_gridss.map{ meta, j_supp -> [ meta.patient, j_supp ] }
 
-                    jabba_w_svaba_inputs = jabba_inputs
-                        .join(vcf_from_sv_calling_for_joining)
-                        .join(unfiltered_som_sv_for_joining)
+                    jabba_inputs = jabba_inputs
+                        .join(vcf_from_sv_calling_gridss_for_joining)
+                        .join(unfiltered_som_sv_gridss_for_joining)
                         .map{ patient, meta, cov, hets, ploidy, seg, nseg, junction, j_supp ->
                             [
                                 meta,
@@ -1469,57 +1419,16 @@ workflow NFJABBA {
                                 nseg
                             ]
                         }
-
-                    JABBA_WITH_SVABA(
-                        jabba_w_svaba_inputs,
-                        blacklist_junctions_jabba,
-                        geno_jabba,
-                        indel_jabba,
-                        tfield_jabba,
-                        iter_jabba,
-                        rescue_window_jabba,
-                        rescue_all_jabba,
-                        nudgebalanced_jabba,
-                        edgenudge_jabba,
-                        strict_jabba,
-                        allin_jabba,
-                        field_jabba,
-                        maxna_jabba,
-                        blacklist_coverage_jabba,
-                        purity_jabba,
-                        pp_method_jabba,
-                        cnsignif_jabba,
-                        slack_jabba,
-                        linear_jabba,
-                        tilim_jabba,
-                        epgap_jabba,
-                        fix_thres_jabba,
-                        lp_jabba,
-                        ism_jabba,
-                        filter_loose_jabba,
-                        gurobi_jabba,
-                        verbose_jabba
-                    )
-
-                    jabba_rds_with_svaba           = Channel.empty().mix(JABBA_WITH_SVABA.out.jabba_rds)
-                    jabba_gg_with_svaba            = Channel.empty().mix(JABBA_WITH_SVABA.out.jabba_gg)
-                    jabba_vcf_with_svaba           = Channel.empty().mix(JABBA_WITH_SVABA.out.jabba_vcf)
-                    jabba_raw_rds_with_svaba       = Channel.empty().mix(JABBA_WITH_SVABA.out.jabba_raw_rds)
-                    opti_with_svaba                = Channel.empty().mix(JABBA_WITH_SVABA.out.opti)
-                    jabba_seg_with_svaba           = Channel.empty().mix(JABBA_WITH_SVABA.out.jabba_seg)
-                    karyograph_with_svaba          = Channel.empty().mix(JABBA_WITH_SVABA.out.karyograph)
-                    versions_with_svaba            = versions.mix(JABBA_WITH_SVABA.out.versions)
-
                 }
 
-                if (tools_used.contains('gridss')) {
+                else if (tools_used.contains('svaba')) {
 
-                    vcf_from_sv_calling_gridss_for_joining = vcf_from_sv_calling_gridss.map{ meta, junction -> [ meta.patient, junction ] }
-                    unfiltered_som_sv_gridss_for_joining = unfiltered_som_sv_gridss.map{ meta, j_supp -> [ meta.patient, j_supp ] }
+                    vcf_from_sv_calling_for_joining = vcf_from_sv_calling.map{ meta, juction -> [ meta.patient, junction ] }
+                    unfiltered_som_sv_for_joining = j_supp.map{ meta, j_supp -> [ meta.patient, j_supp ] }
 
-                    jabba_w_gridss_inputs = jabba_inputs
-                        .join(vcf_from_sv_calling_gridss_for_joining)
-                        .join(unfiltered_som_sv_gridss_for_joining)
+                    jabba_inputs = jabba_inputs
+                        .join(vcf_from_sv_calling_for_joining)
+                        .join(unfiltered_som_sv_for_joining)
                         .map{ patient, meta, cov, hets, ploidy, seg, nseg, junction, j_supp ->
                             [
                                 meta,
@@ -1532,93 +1441,48 @@ workflow NFJABBA {
                                 nseg
                             ]
                         }
-
-                    JABBA_WITH_GRIDSS(
-                        jabba_w_gridss_inputs,
-                        blacklist_junctions_jabba,
-                        geno_jabba,
-                        indel_jabba,
-                        tfield_jabba,
-                        iter_jabba,
-                        rescue_window_jabba,
-                        rescue_all_jabba,
-                        nudgebalanced_jabba,
-                        edgenudge_jabba,
-                        strict_jabba,
-                        allin_jabba,
-                        field_jabba,
-                        maxna_jabba,
-                        blacklist_coverage_jabba,
-                        purity_jabba,
-                        pp_method_jabba,
-                        cnsignif_jabba,
-                        slack_jabba,
-                        linear_jabba,
-                        tilim_jabba,
-                        epgap_jabba,
-                        fix_thres_jabba,
-                        lp_jabba,
-                        ism_jabba,
-                        filter_loose_jabba,
-                        gurobi_jabba,
-                        verbose_jabba
-                    )
-
-                    jabba_rds_with_gridss           = Channel.empty().mix(JABBA_WITH_GRIDSS.out.jabba_rds)
-                    jabba_gg_with_gridss            = Channel.empty().mix(JABBA_WITH_GRIDSS.out.jabba_gg)
-                    jabba_seg_with_gridss           = Channel.empty().mix(JABBA_WITH_GRIDSS.out.jabba_seg)
-                    karyograph_with_gridss          = Channel.empty().mix(JABBA_WITH_GRIDSS.out.karyograph)
-                    versions_with_gridss            = versions.mix(JABBA_WITH_GRIDSS.out.versions)
-
-                }
-            }
-        }
-    }
-
-    if (runEvents) {
-        if (tools_used.contains('events')) {
-            if (params.step == 'events') {
-                EVENTS(input_sample, fasta)
-                versions = Channel.empty().mix(EVENTS.out.versions)
-
-                events_output = Channel.empty().mix(EVENTS.out.events_output)
-            } else {
-                if (tools_used.contains('gridss')) {
-                    EVENTS_WITH_GRIDSS(jabba_rds_with_gridss, fasta)
-                    events_w_gridss_versions = Channel.empty().mix(EVENTS_WITH_GRIDSS.out.versions)
-
-                    events_w_gridss_output = Channel.empty().mix(EVENTS_WITH_GRIDSS.out.events_output)
-                }
-                if (tools_used.contains('svaba')) {
-                    EVENTS_WITH_SVABA(jabba_rds_with_svaba, fasta)
-                    events_w_svaba_versions = Channel.empty().mix(EVENTS_WITH_SVABA.out.versions)
-
-                    events_w_svaba_output = Channel.empty().mix(EVENTS_WITH_SVABA.out.events_output)
                 }
-            }
-        }
-    }
-
-    if (runFusions) {
-        if (tools_used.contains('fusions')) {
-            if (params.step == 'fusions') {
-                FUSIONS(input_sample, gencode_fusions)
-                fusions_output = Channel.empty().mix(FUSIONS.out.fusions_output)
-                versions = Channel.empty().mix(FUSIONS.out.versions)
 
-            } else {
-                if (tools_used.contains('gridss')) {
-                    FUSIONS_WITH_GRIDSS(jabba_rds_with_gridss, gencode_fusions)
-                    fusions_w_gridss_output = Channel.empty().mix(FUSIONS_WITH_GRIDSS.out.fusions_output)
-                    fusions_w_gridss_versions = Channel.empty().mix(FUSIONS_WITH_GRIDSS.out.versions)
+                JABBA(
+                    jabba_inputs,
+                    blacklist_junctions_jabba,
+                    geno_jabba,
+                    indel_jabba,
+                    tfield_jabba,
+                    iter_jabba,
+                    rescue_window_jabba,
+                    rescue_all_jabba,
+                    nudgebalanced_jabba,
+                    edgenudge_jabba,
+                    strict_jabba,
+                    allin_jabba,
+                    field_jabba,
+                    maxna_jabba,
+                    blacklist_coverage_jabba,
+                    purity_jabba,
+                    pp_method_jabba,
+                    cnsignif_jabba,
+                    slack_jabba,
+                    linear_jabba,
+                    tilim_jabba,
+                    epgap_jabba,
+                    fix_thres_jabba,
+                    lp_jabba,
+                    ism_jabba,
+                    filter_loose_jabba,
+                    gurobi_jabba,
+                    verbose_jabba
+                )
 
-                }
-                if (tools_used.contains('svaba')) {
-                    FUSIONS_WITH_SVABA(jabba_rds_with_svaba, gencode_fusions)
-                    fusions_w_svaba_output = Channel.empty().mix(FUSIONS_WITH_SVABA.out.fusions_output)
-                    fusions_w_svaba_versions = Channel.empty().mix(FUSIONS_WITH_SVABA.out.versions)
+                jabba_rds           = Channel.empty().mix(JABBA.out.jabba_rds)
+                jabba_gg            = Channel.empty().mix(JABBA.out.jabba_gg)
+                jabba_vcf           = Channel.empty().mix(JABBA.out.jabba_vcf)
+                jabba_raw_rds       = Channel.empty().mix(JABBA.out.jabba_raw_rds)
+                opti                = Channel.empty().mix(JABBA.out.opti)
+                jabba_seg           = Channel.empty().mix(JABBA.out.jabba_seg)
+                karyograph          = Channel.empty().mix(JABBA.out.karyograph)
+                versions            = versions.mix(JABBA.out.versions)
 
-                }
             }
         }
     }
@@ -1629,23 +1493,12 @@ workflow NFJABBA {
                 non_integer_balance_inputs = input_sample.map{ meta, cov, hets, ggraph -> [ meta, ggraph, cov, hets ] }
                 het_pileups_for_joining = input_sample.map{ meta, cov, hets, ggraph -> [ meta.patient, hets ] }
             } else {
-                if (tools_used.contains('gridss')) {
-                    jabba_rds_with_gridss_for_joining = jabba_rds_with_gridss.map{ meta, rds -> [ meta.patient, rds ] }
-                    non_integer_balance_inputs = meta_for_joining
-                        .join(jabba_rds_with_gridss_for_joining)
-                        .join(tumor_dryclean_cov_for_joining)
-                        .join(het_pileups_for_joining)
-                        .map{ patient, meta, rds, cov, hets -> [ meta, rds, cov, hets ] }
-
-                }
-                if (tools_used.contains('svaba')) {
-                    jabba_rds_with_svaba_for_joining = jabba_rds_with_svaba.map{ meta, rds -> [ meta.patient, rds ] }
-                    non_integer_balance_inputs = meta_for_joining
-                        .join(jabba_rds_with_svaba_for_joining)
-                        .join(tumor_dryclean_cov_for_joining)
-                        .join(het_pileups_for_joining)
-                        .map{ patient, meta, rds, cov, hets -> [ meta, rds, cov, hets ] }
-                }
+                jabba_rds_for_joining = jabba_rds.map{ meta, rds -> [ meta.patient, rds ] }
+                non_integer_balance_inputs = meta_for_joining
+                    .join(jabba_rds_for_joining)
+                    .join(tumor_dryclean_cov_for_joining)
+                    .join(het_pileups_for_joining)
+                    .map{ patient, meta, rds, cov, hets -> [ meta, rds, cov, hets ] }
             }
 
             NON_INTEGER_BALANCE(
@@ -1705,6 +1558,36 @@ workflow NFJABBA {
             lp_phased_balance_unphased_allelic_gg = Channel.empty().mix(LP_PHASED_BALANCE.out.lp_phased_balance_unphased_allelic_gg)
         }
     }
+
+    if (runEvents) {
+        if (tools_used.contains('events')) {
+            if (params.step == 'events') {
+                events_input = input_sample
+                versions = Channel.empty().mix(EVENTS.out.versions)
+            } else {
+                events_input = lp_phased_balance_balanced_gg
+            }
+
+            EVENTS(events_input, fasta)
+            events_versions = Channel.empty().mix(EVENTS.out.versions)
+            events_output = Channel.empty().mix(EVENTS.out.events_output)
+        }
+    }
+
+    if (runFusions) {
+        if (tools_used.contains('fusions')) {
+            if (params.step == 'fusions') {
+                fusions_input = input_sample
+            } else {
+                fusions_input = lp_phased_balance_balanced_gg
+            }
+
+            FUSIONS(fusions_input, gencode_fusions)
+            fusions_output = Channel.empty().mix(FUSIONS.out.fusions_output)
+            versions = Channel.empty().mix(FUSIONS.out.versions)
+        }
+    }
+
 }
 
 /*

From 79186bd4809985923309f47b332ae7196ea49ac7 Mon Sep 17 00:00:00 2001
From: shihabdider <shihabdider@gmail.com>
Date: Sat, 11 May 2024 15:28:43 -0400
Subject: [PATCH 3/3] --amend

---
 bin/lp_phased_balance.R        |  1 +
 conf/modules/alleic_cn.config  | 34 --------------------------------
 conf/modules/allelic_cn.config | 36 ----------------------------------
 conf/modules/events.config     |  2 +-
 4 files changed, 2 insertions(+), 71 deletions(-)
 delete mode 100644 conf/modules/alleic_cn.config

diff --git a/bin/lp_phased_balance.R b/bin/lp_phased_balance.R
index 145635a..0d8afd6 100644
--- a/bin/lp_phased_balance.R
+++ b/bin/lp_phased_balance.R
@@ -783,6 +783,7 @@
     }
 
     if(!opt$from_maf) {
+        gg$meta$ploidy = ifelse(is.null(gg$meta$ploidy), NA, gg$meta$ploidy)
         jab = zitools:::gg2jab(gg, purity = gg$meta$purity, ploidy = gg$meta$ploidy)
         jab = jabba.alleles2(jab, hets.gr, verbose = TRUE, uncoupled = TRUE, marginal = opt$marginal)
 
diff --git a/conf/modules/alleic_cn.config b/conf/modules/alleic_cn.config
deleted file mode 100644
index 0fe7d71..0000000
--- a/conf/modules/alleic_cn.config
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Config file for defining DSL2 per module options and publishing paths
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Available keys to override module options:
-        ext.args   = Additional arguments appended to command in module.
-        ext.args2  = Second set of arguments appended to command in module (multi-tool modules).
-        ext.args3  = Third set of arguments appended to command in module (multi-tool modules).
-        ext.prefix = File name prefix for output files.
-        ext.when   = When to run the module.
-----------------------------------------------------------------------------------------
-*/
-// ALLEIC_CN configs
-
-process {
-
-    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:NON_INTEGER_BALANCE' {
-        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
-        publishDir       = [
-                mode: params.publish_dir_mode,
-                path: { "${params.outdir}/alleic_cn/non_integer_balance/${meta.id}/" },
-                pattern: "*{.rds*,.command.*}"
-        ]
-    }
-
-    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:LP_PHASED_BALANCE' {
-        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
-        publishDir       = [
-                mode: params.publish_dir_mode,
-                path: { "${params.outdir}/alleic_cn/lp_phased_balance/${meta.id}/" },
-                pattern: "*{.rds*,.command.*}"
-        ]
-    }
-}
diff --git a/conf/modules/allelic_cn.config b/conf/modules/allelic_cn.config
index a5e459b..0fe7d71 100644
--- a/conf/modules/allelic_cn.config
+++ b/conf/modules/allelic_cn.config
@@ -23,24 +23,6 @@ process {
         ]
     }
 
-    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:NON_INTEGER_BALANCE_WITH_GRIDSS' {
-        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
-        publishDir       = [
-                mode: params.publish_dir_mode,
-                path: { "${params.outdir}/alleic_cn/non_integer_balance_with_gridss/${meta.id}/" },
-                pattern: "*{.rds*,.command.*}"
-        ]
-    }
-
-    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:NON_INTEGER_BALANCE_WITH_SVABA' {
-        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
-        publishDir       = [
-                mode: params.publish_dir_mode,
-                path: { "${params.outdir}/alleic_cn/non_integer_balance_with_svaba/${meta.id}/" },
-                pattern: "*{.rds*,.command.*}"
-        ]
-    }
-
     withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:LP_PHASED_BALANCE' {
         ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
         publishDir       = [
@@ -49,22 +31,4 @@ process {
                 pattern: "*{.rds*,.command.*}"
         ]
     }
-
-    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:LP_PHASED_BALANCE_WITH_GRIDSS' {
-        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
-        publishDir       = [
-                mode: params.publish_dir_mode,
-                path: { "${params.outdir}/alleic_cn/lp_phased_balance_with_gridss/${meta.id}/" },
-                pattern: "*{.rds*,.command.*}"
-        ]
-    }
-
-    withName: 'MSKILABORG_NFJABBA:NFJABBA:ALLEIC_CN:LP_PHASED_BALANCE_WITH_SVABA' {
-        ext.when         = { params.tools && params.tools.split(',').contains('alleic_cn') }
-        publishDir       = [
-                mode: params.publish_dir_mode,
-                path: { "${params.outdir}/alleic_cn/lp_phased_balance_with_svaba/${meta.id}/" },
-                pattern: "*{.rds*,.command.*}"
-        ]
-    }
 }
diff --git a/conf/modules/events.config b/conf/modules/events.config
index 46d6899..15e58dd 100644
--- a/conf/modules/events.config
+++ b/conf/modules/events.config
@@ -18,7 +18,7 @@ process {
         ext.when         = { params.tools && params.tools.split(',').contains('events') }
         publishDir       = [
                 mode: params.publish_dir_mode,
-                path: { "${params.outdir}/events/${meta.id}/" },
+                path: { "${params.outdir}/Events/${meta.id}/" },
                 pattern: "*{.rds*,.command.*}"
         ]
     }