Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
tanubrata committed Mar 4, 2024
2 parents f3eafbe + 349dc77 commit f9703e3
Show file tree
Hide file tree
Showing 35 changed files with 1,460 additions and 118 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ testing/
testing*
*.pyc
.nf-test/
tests/test_data/
.nf-test.log
14 changes: 12 additions & 2 deletions .nf-test.log
Original file line number Diff line number Diff line change
@@ -1,2 +1,12 @@
Nov-20 17:46:16.940 [main] INFO com.askimed.nf.test.App - nf-test 0.8.1
Nov-20 17:46:16.960 [main] INFO com.askimed.nf.test.App - Arguments: [init]
Jan-16 10:10:49.572 [main] INFO com.askimed.nf.test.App - nf-test 0.8.2
Jan-16 10:10:49.580 [main] INFO com.askimed.nf.test.App - Arguments: [test, tests/modules/local/gridss/gridss/main.nf.test]
Jan-16 10:10:49.580 [main] INFO com.askimed.nf.test.commands.RunTestsCommand - Load config from file /Users/diders01/projects/jabba_prod/nf-jabba/nf-test.config...
Jan-16 10:10:49.758 [main] INFO com.askimed.nf.test.commands.RunTestsCommand - Detected 1 test files.
Jan-16 10:10:49.827 [main] INFO com.askimed.nf.test.config.FileStaging - Create symlink '/Users/diders01/projects/jabba_prod/nf-jabba/.nf-test/tests/1a4fc5dc640a75b58bcf88f15a9b2cb6/meta/bin' --> '/Users/diders01/projects/jabba_prod/nf-jabba/bin'
Jan-16 10:10:49.828 [main] INFO com.askimed.nf.test.config.FileStaging - Create symlink '/Users/diders01/projects/jabba_prod/nf-jabba/.nf-test/tests/1a4fc5dc640a75b58bcf88f15a9b2cb6/meta/lib' --> '/Users/diders01/projects/jabba_prod/nf-jabba/lib'
Jan-16 10:10:49.828 [main] INFO com.askimed.nf.test.config.FileStaging - Create symlink '/Users/diders01/projects/jabba_prod/nf-jabba/.nf-test/tests/1a4fc5dc640a75b58bcf88f15a9b2cb6/meta/assets' --> '/Users/diders01/projects/jabba_prod/nf-jabba/assets'
Jan-16 10:10:49.828 [main] DEBUG com.askimed.nf.test.core.AbstractTest - Stage 1 user provided files...
Jan-16 10:10:49.828 [main] INFO com.askimed.nf.test.config.FileStaging - Create symlink '/Users/diders01/projects/jabba_prod/nf-jabba/.nf-test/tests/1a4fc5dc640a75b58bcf88f15a9b2cb6/meta/tests/test_data/' --> '/Users/diders01/projects/jabba_prod/nf-jabba/tests/test_data'
Jan-16 10:10:49.830 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Started test plan
Jan-16 10:10:49.830 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Running testsuite 'Test Process GRIDSS_GRIDSS' from file '/Users/diders01/projects/jabba_prod/nf-jabba/tests/modules/local/gridss/gridss/main.nf.test'.
Jan-16 10:10:49.830 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Run test '1a4fc5dc: Should run GRIDSS without failures'. type: com.askimed.nf.test.lang.process.ProcessTest
37 changes: 37 additions & 0 deletions .spec.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Refactor
## Questions:
- Do we expect to have more than one tool per step (e.g gridss and svaba): No
- Is there redundancy/dependency in the package parameters. Can parameters be
organized into a dependency tree or otherwise hardcoded because they never
change?: Yes, need to investigate.
- Do we care about alignment? Should the pipeline be able to start at
alignment? No.

## To Do
- Refactor output channel attribute extraction (21)
- Swich from steps conditionals to cases (8)
- Switch from tools to nodes (13)
- Remove alignment step (3)
- Refactor repetitive declarations (1)

## Outline
- Swtich from the "step" based conditionals to something more declarative (e.g
cases). Having the program run a block of code by checking if the starting
step is included in a list of steps is enormously cumbersome, cases should
make things more parsimoinous and readable.
- Tools-in-step paradigm could be replaced with ungrouped nodes.
- Use functions for repetitive declarations/imports and hold their required
variables in arrays/maps.
- Clean up output channel attribute extraction. Currently very repetitive,
could be replaced with functions.
- Cut down on the number of default parameters in the config (possibly at the
package level). Packages/processes/workflows shouldn't have so many
parameters (see: JaBbA as the worst offendor), it indicates either
overparameterization or a dependency tree in the parameter space.
- Add *_create_csv methods for remaining tools to generate samplesheets to
start from those tools
- Move parameter specification from nextflow.config to module specific config,
then import them into nextflow config. This keeps the process and its parameter
configuration tightly coupled--but loses a central interface for modifying
all the defaults in one place, which is useful if changing one default would
affect defaults in a different parameter.
46 changes: 46 additions & 0 deletions bigpurple.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@

params {
config_profile_name = 'mskilab-org NYU BigPurple Cluster Profile'
config_profile_description = """
mskilab-org NYU School of Medicine BigPurple cluster profile to run nf-JaBbA.
!!Make sure to load both singularity/3.1 and squashfs-tools/4.3 before running nf-JaBbA with this profile!!
Ideal to make work folder on scratch as it generates whole lot of temporary files to run the pipeline
Make sure to submit the run as an SBATCH job since we don't own our own node at NYU yet!!
""".stripIndent()
config_profile_contact = "Tanubrata Dey ([email protected])"
config_profile_url = "https://www.mskilab.org/"

// Resources
max_memory = 700.GB
max_cpus = 256
max_time = 10.d
}
process {

// default SLURM node config
beforeScript = """
module load singularity/3.9.8
module load squashfs-tools/4.3
"""
.stripIndent()

executor='slurm'

// memory errors which should be retried. otherwise error out
errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
maxRetries = 3
maxErrors = '-1'

}
executor {
name = 'slurm'
queueSize = 500
submitRateLimit = '10 sec'
}
singularity {
enabled = true
autoMounts = true
cacheDir = "/gpfs/data/imielinskilab/singularity_files/nextflow_singularity_cache"
}

9 changes: 6 additions & 3 deletions bin/ascat_seg.R
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,7 @@

if (grepl(pattern = "txt$", x = opt$variants)) {
variants.dt = fread(opt$variants)
variants.dt[[1]] <- gsub("chr","",variants.dt[[1]])
variants.dt[, ":="(alt.count.n = as.numeric(as.character(alt.count.n)),
ref.count.n = as.numeric(as.character(ref.count.n)),
alt.count.t = as.numeric(as.character(alt.count.t)),
Expand Down Expand Up @@ -677,11 +678,13 @@
## ## transfer ratio
message("Transferring ratio")
## gc correct tumor and normal
## Edit by Tanubrata: Adds a fix to the column names to do GC correction when passing CBS coverge, else
## ASCAT breaks when passing raw drycleaned coverage without GC correction
if (opt$gc) {
if ("tumor" %in% names(values(cov.gr)) & "normal" %in% names(values(cov.gr))) {
if ("tum.counts" %in% names(values(cov.gr)) & "norm.counts" %in% names(values(cov.gr))) {
message("Applying GC correction")
tum.gr = khtools::.gc(cov.gr, "tumor")
norm.gr = khtools::.gc(cov.gr, "normal")
tum.gr = khtools::.gc(cov.gr, "tum.counts")
norm.gr = khtools::.gc(cov.gr, "norm.counts")
ratio.gr = khtools::.gc(cov.gr, "ratio")
values(cov.gr)[, opt$field] = values(ratio.gr)[, "ratio"]
} else {
Expand Down
4 changes: 2 additions & 2 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,12 @@ process {
time = { check_max( 24.h * task.attempt, 'time' ) }
}
withName: 'SVABA' {
cpus = 8
cpus = 4
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
time = 84.h
}
withName: 'GRIDSS' {
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
time = 84.h
}
Expand Down
2 changes: 1 addition & 1 deletion conf/igenomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ params {
build_dryclean = 'hg38'
hapmap_sites = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/hapmap_3.3.hg38.vcf.gz"
pon_dryclean = "${params.mski_base}/dryclean/pon/hg38/detergent.rds"
blacklist_coverage_jabba = "${params.mski_base}/JaBbA/blacklist_coverage/hg38/hg38.coverage.mask.rds"
blacklist_coverage_jabba = "${params.mski_base}/JaBbA/blacklist_coverage/hg38/hg38.coverage.mask.nochr.rds"
}

'GRCh37' {
Expand Down
18 changes: 18 additions & 0 deletions conf/modules/fragcounter.config
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,22 @@ process {
pattern: "*{.rds,.bw,cov*,.command.*}"
]
}

withName: 'MSKILABORG_NFJABBA:NFJABBA:NORMAL_FRAGCOUNTER:REBIN_RAW_FRAGCOUNTER' {
ext.when = { params.tools && params.tools.split(',').contains('fragcounter') }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/Coverages/fragCounter_normal/${meta.id}/" },
pattern: "*{.rds,1kb_*,.command.*}"
]
}

withName: 'MSKILABORG_NFJABBA:NFJABBA:TUMOR_FRAGCOUNTER:REBIN_RAW_FRAGCOUNTER' {
ext.when = { params.tools && params.tools.split(',').contains('fragcounter') }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/Coverages/fragCounter_tumor/${meta.id}/" },
pattern: "*{.rds,1kb_*,.command.*}"
]
}
}
2 changes: 1 addition & 1 deletion modules/local/ascat/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process ASCAT_SEG {
val(from_maf) // channel: whether to start from MAF, default=FALSE

output:
tuple val(meta), path("*ascat_pp.rds") , emit: purityploidy, optional:true
tuple val(meta), path("*ascat_pp.rds") , emit: purityploidy
tuple val(meta), path("*ascat_seg.rds") , emit: segments, optional:true
path "versions.yml" , emit: versions

Expand Down
2 changes: 1 addition & 1 deletion modules/local/cbs/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ process CBS {
val(name)

output:
tuple val(meta), path("*cov.rds") , emit: cbs_cov_rds
tuple val(meta), path("cov.rds") , emit: cbs_cov_rds
tuple val(meta), path("seg.rds") , emit: cbs_seg_rds
tuple val(meta), path("nseg.rds") , emit: cbs_nseg_rds
path "versions.yml" , emit: versions
Expand Down
14 changes: 6 additions & 8 deletions modules/local/dryclean/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,26 @@ process DRYCLEAN {
label 'process_medium'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker://mskilab/dryclean:0.0.2':
'mskilab/dryclean:0.0.2' }"
'docker://mskilab/dryclean:0.0.3':
'mskilab/dryclean:0.0.3' }"


input:
tuple val(meta), path(input)
path(pon)
val(centered)
val(center)
val(cbs)
val(cnsignif)
val(wholeGenome)
val(blacklist)
val(blacklist_path)
val(germline_filter)
val(germline_file)
val(human)
val(field)
val(build)

output:
tuple val(meta), path("*cov.rds") , emit: decomposed_cov, optional: true
tuple val(meta), path("*cov.rds") , emit: decomposed_cov
//tuple val(meta), path("*.dryclean.object.rds") , emit: dryclean_object, optional: true
path "versions.yml" , emit: versions

Expand All @@ -33,7 +32,7 @@ process DRYCLEAN {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '0.0.2'
def VERSION = '0.0.3'
"""
#!/bin/bash
set -o allexport
Expand Down Expand Up @@ -70,7 +69,7 @@ process DRYCLEAN {
CMD="Rscript \$drycln \\
--input ${input} \\
--pon ${pon} \\
--centered ${centered} \\
--center ${center} \\
--cbs ${cbs} \\
--cnsignif ${cnsignif} \\
--cores ${task.cpus} \\
Expand All @@ -79,7 +78,6 @@ process DRYCLEAN {
--blacklist_path ${blacklist_path} \\
--germline.filter ${germline_filter} \\
--germline.file ${germline_file} \\
--human ${human} \\
--field ${field} \\
--build ${build} \\
"
Expand Down
57 changes: 54 additions & 3 deletions modules/local/fragcounter/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ process FRAGCOUNTER {

// TODO add fragcounter container
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker://mskilab/fragcounter:latest':
'mskilab/fragcounter:latest' }"
'docker://mskilab/fragcounter:0.1':
'mskilab/fragcounter:0.1' }"

input:
tuple val(meta), path(bam), path(bai) // Mandatory: Format should be [meta, bam, bai] : can also provide cram & crai
Expand All @@ -21,6 +21,7 @@ process FRAGCOUNTER {


output:
tuple val(meta), path("*cov.raw.rds") , emit: fragcounter_raw_cov, optional: true
tuple val(meta), path("*cov.rds") , emit: fragcounter_cov, optional: true
tuple val(meta), path("*cov.corrected.bw") , emit: corrected_bw, optional: true
path "versions.yml" , emit: versions
Expand Down Expand Up @@ -72,4 +73,54 @@ process FRAGCOUNTER {
END_VERSIONS
"""

}
}

process REBIN_RAW_FRAGCOUNTER {

tag "$meta.id"
label 'process_low'

// TODO add fragcounter container
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker://mskilab/fragcounter:0.1':
'mskilab/fragcounter:0.1' }"

input:
tuple val(meta), path(cov_raw)
val(field)
val(windowsize)

output:
tuple val(meta), path("1kb_*"), emit: raw_fragcounter_cov_1kb, optional:true

script:

"""
#!/usr/bin/env Rscript
library(skitools)
filename = "${cov_raw}"
outputfn = "1kb_${cov_raw.name}"
raw_cov = readRDS(filename)
collapse.cov <- function(cov.gr, bin.size = 1e3, field = "reads.corrected") {
BINSIZE.ROUGH = bin.size
cov.gr = cov.gr[, field]
cov.gr = gr2dt(cov.gr)
setnames(cov.gr, field, "signal")
cov.gr = cov.gr[!is.infinite(signal), .(signal = median(signal, na.rm = TRUE)),
by = .(seqnames, start = floor(start/BINSIZE.ROUGH)*BINSIZE.ROUGH+1)]
cov.gr[, end := (start + BINSIZE.ROUGH) - 1]
setnames(cov.gr, "signal", field)
cov.gr = dt2gr(cov.gr)
return(cov.gr)
}
rebinned_cov = collapse.cov(raw_cov, bin.size = ${windowsize}, field = "${field}")
##rebinned_cov = rebinned_cov %Q% (!seqnames=="Y")
rebinned_cov = rebinned_cov %Q% (seqnames %in% c(seq(1:22),"X"))
saveRDS(rebinned_cov, outputfn)
"""

}
4 changes: 2 additions & 2 deletions modules/local/gridss/gridss/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ process GRIDSS_GRIDSS {
$assembly_bam \\
$blacklist \\
--picardoptions VALIDATION_STRINGENCY=LENIENT \\
--jvmheap 31g \\
--otherjvmheap 31g \\
--jvmheap 2g \\
--otherjvmheap 1g \\
${normalbam} \\
${tumorbam}
Expand Down
2 changes: 1 addition & 1 deletion modules/local/hetpileups/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ process HETPILEUPS {
path(hapmap_sites)

output:
tuple val(meta), path("*sites.txt") , emit: het_pileups_wgs
tuple val(meta), path("*sites.txt") , emit: het_pileups_wgs, optional: true
path "versions.yml" , emit: versions

when:
Expand Down
45 changes: 45 additions & 0 deletions modules/local/jabba/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,48 @@ process JABBA {
END_VERSIONS
"""
}

process COERCE_SEQNAMES {

tag "$meta.id"
label 'process_low'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker://mskilab/jabba:latest':
'mskilab/jabba:latest' }"

input:
tuple val(meta), path(file)

output:
tuple val(meta), path("coerced_chr*"), emit: file, optional: true

script:
"""
#!/usr/bin/env Rscript
fn <- "${file}"
outputfn <- "coerced_chr_${file.name}"
if(grepl('.rds', "${file.name}")){
library(GenomicRanges)
data <- readRDS(fn)
seqlevels(data, pruning.mode = "coarse") <- gsub("chr","",seqlevels(data))
saveRDS(data, file = outputfn)
} else if (grepl('.vcf|.vcf.gz|.vcf.bgz', "${file.name}")) {
library(VariantAnnotation)
data <- readVcf(fn)
##seqlevelsStyle(data) <- 'NCBI'
seqlevels(data) <- sub("^chr", "", seqlevels(data))
header = header(data)
rownames(header@header\$contig) = sub("^chr", "", rownames(header@header\$contig))
header(data) <- header
data@fixed\$ALT <- lapply(data@fixed\$ALT, function(x) gsub("chr", "", x))
writeVcf(data, file = outputfn)
} else {
data <- read.table(fn, header=T)
data[[1]] <- gsub("chr","",data[[1]])
write.table(data, file = outputfn, sep = "\\t", row.names = F, quote = F)
}
"""
}
Loading

0 comments on commit f9703e3

Please sign in to comment.