diff --git a/.gitignore b/.gitignore index 6d7db6d..cddedaf 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ testing/ testing* *.pyc .nf-test/ +tests/test_data/ +.nf-test.log diff --git a/.nf-test.log b/.nf-test.log index 4fc0546..23b3bc5 100644 --- a/.nf-test.log +++ b/.nf-test.log @@ -1,2 +1,12 @@ -Nov-20 17:46:16.940 [main] INFO com.askimed.nf.test.App - nf-test 0.8.1 -Nov-20 17:46:16.960 [main] INFO com.askimed.nf.test.App - Arguments: [init] +Jan-16 10:10:49.572 [main] INFO com.askimed.nf.test.App - nf-test 0.8.2 +Jan-16 10:10:49.580 [main] INFO com.askimed.nf.test.App - Arguments: [test, tests/modules/local/gridss/gridss/main.nf.test] +Jan-16 10:10:49.580 [main] INFO com.askimed.nf.test.commands.RunTestsCommand - Load config from file /Users/diders01/projects/jabba_prod/nf-jabba/nf-test.config... +Jan-16 10:10:49.758 [main] INFO com.askimed.nf.test.commands.RunTestsCommand - Detected 1 test files. +Jan-16 10:10:49.827 [main] INFO com.askimed.nf.test.config.FileStaging - Create symlink '/Users/diders01/projects/jabba_prod/nf-jabba/.nf-test/tests/1a4fc5dc640a75b58bcf88f15a9b2cb6/meta/bin' --> '/Users/diders01/projects/jabba_prod/nf-jabba/bin' +Jan-16 10:10:49.828 [main] INFO com.askimed.nf.test.config.FileStaging - Create symlink '/Users/diders01/projects/jabba_prod/nf-jabba/.nf-test/tests/1a4fc5dc640a75b58bcf88f15a9b2cb6/meta/lib' --> '/Users/diders01/projects/jabba_prod/nf-jabba/lib' +Jan-16 10:10:49.828 [main] INFO com.askimed.nf.test.config.FileStaging - Create symlink '/Users/diders01/projects/jabba_prod/nf-jabba/.nf-test/tests/1a4fc5dc640a75b58bcf88f15a9b2cb6/meta/assets' --> '/Users/diders01/projects/jabba_prod/nf-jabba/assets' +Jan-16 10:10:49.828 [main] DEBUG com.askimed.nf.test.core.AbstractTest - Stage 1 user provided files... +Jan-16 10:10:49.828 [main] INFO com.askimed.nf.test.config.FileStaging - Create symlink '/Users/diders01/projects/jabba_prod/nf-jabba/.nf-test/tests/1a4fc5dc640a75b58bcf88f15a9b2cb6/meta/tests/test_data/' --> '/Users/diders01/projects/jabba_prod/nf-jabba/tests/test_data' +Jan-16 10:10:49.830 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Started test plan +Jan-16 10:10:49.830 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Running testsuite 'Test Process GRIDSS_GRIDSS' from file '/Users/diders01/projects/jabba_prod/nf-jabba/tests/modules/local/gridss/gridss/main.nf.test'. +Jan-16 10:10:49.830 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Run test '1a4fc5dc: Should run GRIDSS without failures'. type: com.askimed.nf.test.lang.process.ProcessTest diff --git a/.spec.md b/.spec.md new file mode 100644 index 0000000..eecc611 --- /dev/null +++ b/.spec.md @@ -0,0 +1,37 @@ +# Refactor +## Questions: +- Do we expect to have more than one tool per step (e.g gridss and svaba): No +- Is there redundancy/dependency in the package parameters. Can parameters be + organized into a dependency tree or otherwise hardcoded because they never + change?: Yes, need to investigate. +- Do we care about alignment? Should the pipeline be able to start at + alignment? No. + +## To Do +- Refactor output channel attribute extraction (21) +- Swich from steps conditionals to cases (8) +- Switch from tools to nodes (13) +- Remove alignment step (3) +- Refactor repetitive declarations (1) + +## Outline +- Swtich from the "step" based conditionals to something more declarative (e.g + cases). Having the program run a block of code by checking if the starting + step is included in a list of steps is enormously cumbersome, cases should + make things more parsimoinous and readable. +- Tools-in-step paradigm could be replaced with ungrouped nodes. +- Use functions for repetitive declarations/imports and hold their required + variables in arrays/maps. +- Clean up output channel attribute extraction. Currently very repetitive, + could be replaced with functions. +- Cut down on the number of default parameters in the config (possibly at the + package level). Packages/processes/workflows shouldn't have so many + parameters (see: JaBbA as the worst offendor), it indicates either + overparameterization or a dependency tree in the parameter space. +- Add *_create_csv methods for remaining tools to generate samplesheets to + start from those tools +- Move parameter specification from nextflow.config to module specific config, + then import them into nextflow config. This keeps the process and its parameter + configuration tightly coupled--but loses a central interface for modifying + all the defaults in one place, which is useful if changing one default would + affect defaults in a different parameter. diff --git a/bigpurple.config b/bigpurple.config new file mode 100644 index 0000000..822cb90 --- /dev/null +++ b/bigpurple.config @@ -0,0 +1,46 @@ + +params { + config_profile_name = 'mskilab-org NYU BigPurple Cluster Profile' + config_profile_description = """ + mskilab-org NYU School of Medicine BigPurple cluster profile to run nf-JaBbA. + !!Make sure to load both singularity/3.1 and squashfs-tools/4.3 before running nf-JaBbA with this profile!! + Ideal to make work folder on scratch as it generates whole lot of temporary files to run the pipeline + Make sure to submit the run as an SBATCH job since we don't own our own node at NYU yet!! + """.stripIndent() + config_profile_contact = "Tanubrata Dey (tanubrata.dey@nyulangone.org)" + config_profile_url = "https://www.mskilab.org/" + + // Resources + max_memory = 700.GB + max_cpus = 256 + max_time = 10.d +} +process { + + // default SLURM node config + beforeScript = """ + module load singularity/3.9.8 + module load squashfs-tools/4.3 + + """ + .stripIndent() + + executor='slurm' + + // memory errors which should be retried. otherwise error out + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 3 + maxErrors = '-1' + +} +executor { + name = 'slurm' + queueSize = 500 + submitRateLimit = '10 sec' +} +singularity { + enabled = true + autoMounts = true + cacheDir = "/gpfs/data/imielinskilab/singularity_files/nextflow_singularity_cache" +} + diff --git a/bin/ascat_seg.R b/bin/ascat_seg.R index 226ea08..ae2bacd 100644 --- a/bin/ascat_seg.R +++ b/bin/ascat_seg.R @@ -625,6 +625,7 @@ if (grepl(pattern = "txt$", x = opt$variants)) { variants.dt = fread(opt$variants) + variants.dt[[1]] <- gsub("chr","",variants.dt[[1]]) variants.dt[, ":="(alt.count.n = as.numeric(as.character(alt.count.n)), ref.count.n = as.numeric(as.character(ref.count.n)), alt.count.t = as.numeric(as.character(alt.count.t)), @@ -677,11 +678,13 @@ ## ## transfer ratio message("Transferring ratio") ## gc correct tumor and normal + ## Edit by Tanubrata: Adds a fix to the column names to do GC correction when passing CBS coverge, else + ## ASCAT breaks when passing raw drycleaned coverage without GC correction if (opt$gc) { - if ("tumor" %in% names(values(cov.gr)) & "normal" %in% names(values(cov.gr))) { + if ("tum.counts" %in% names(values(cov.gr)) & "norm.counts" %in% names(values(cov.gr))) { message("Applying GC correction") - tum.gr = khtools::.gc(cov.gr, "tumor") - norm.gr = khtools::.gc(cov.gr, "normal") + tum.gr = khtools::.gc(cov.gr, "tum.counts") + norm.gr = khtools::.gc(cov.gr, "norm.counts") ratio.gr = khtools::.gc(cov.gr, "ratio") values(cov.gr)[, opt$field] = values(ratio.gr)[, "ratio"] } else { diff --git a/conf/base.config b/conf/base.config index e145441..3090ba6 100644 --- a/conf/base.config +++ b/conf/base.config @@ -102,12 +102,12 @@ process { time = { check_max( 24.h * task.attempt, 'time' ) } } withName: 'SVABA' { - cpus = 8 + cpus = 4 memory = { check_max( 72.GB * task.attempt, 'memory' ) } time = 84.h } withName: 'GRIDSS' { - cpus = { check_max( 8 * task.attempt, 'cpus' ) } + cpus = { check_max( 4 * task.attempt, 'cpus' ) } memory = { check_max( 72.GB * task.attempt, 'memory' ) } time = 84.h } diff --git a/conf/igenomes.config b/conf/igenomes.config index e7edfd6..2c3996a 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -88,7 +88,7 @@ params { build_dryclean = 'hg38' hapmap_sites = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/hapmap_3.3.hg38.vcf.gz" pon_dryclean = "${params.mski_base}/dryclean/pon/hg38/detergent.rds" - blacklist_coverage_jabba = "${params.mski_base}/JaBbA/blacklist_coverage/hg38/hg38.coverage.mask.rds" + blacklist_coverage_jabba = "${params.mski_base}/JaBbA/blacklist_coverage/hg38/hg38.coverage.mask.nochr.rds" } 'GRCh37' { diff --git a/conf/modules/fragcounter.config b/conf/modules/fragcounter.config index c17f10d..d819b46 100644 --- a/conf/modules/fragcounter.config +++ b/conf/modules/fragcounter.config @@ -33,4 +33,22 @@ process { pattern: "*{.rds,.bw,cov*,.command.*}" ] } + + withName: 'MSKILABORG_NFJABBA:NFJABBA:NORMAL_FRAGCOUNTER:REBIN_RAW_FRAGCOUNTER' { + ext.when = { params.tools && params.tools.split(',').contains('fragcounter') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/Coverages/fragCounter_normal/${meta.id}/" }, + pattern: "*{.rds,1kb_*,.command.*}" + ] + } + + withName: 'MSKILABORG_NFJABBA:NFJABBA:TUMOR_FRAGCOUNTER:REBIN_RAW_FRAGCOUNTER' { + ext.when = { params.tools && params.tools.split(',').contains('fragcounter') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/Coverages/fragCounter_tumor/${meta.id}/" }, + pattern: "*{.rds,1kb_*,.command.*}" + ] + } } diff --git a/modules/local/ascat/main.nf b/modules/local/ascat/main.nf index aa7078a..6ee486c 100644 --- a/modules/local/ascat/main.nf +++ b/modules/local/ascat/main.nf @@ -18,7 +18,7 @@ process ASCAT_SEG { val(from_maf) // channel: whether to start from MAF, default=FALSE output: - tuple val(meta), path("*ascat_pp.rds") , emit: purityploidy, optional:true + tuple val(meta), path("*ascat_pp.rds") , emit: purityploidy tuple val(meta), path("*ascat_seg.rds") , emit: segments, optional:true path "versions.yml" , emit: versions diff --git a/modules/local/cbs/main.nf b/modules/local/cbs/main.nf index 9257613..d1d36a0 100644 --- a/modules/local/cbs/main.nf +++ b/modules/local/cbs/main.nf @@ -15,7 +15,7 @@ process CBS { val(name) output: - tuple val(meta), path("*cov.rds") , emit: cbs_cov_rds + tuple val(meta), path("cov.rds") , emit: cbs_cov_rds tuple val(meta), path("seg.rds") , emit: cbs_seg_rds tuple val(meta), path("nseg.rds") , emit: cbs_nseg_rds path "versions.yml" , emit: versions diff --git a/modules/local/dryclean/main.nf b/modules/local/dryclean/main.nf index 1bab4fe..4c47568 100644 --- a/modules/local/dryclean/main.nf +++ b/modules/local/dryclean/main.nf @@ -3,14 +3,14 @@ process DRYCLEAN { label 'process_medium' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://mskilab/dryclean:0.0.2': - 'mskilab/dryclean:0.0.2' }" + 'docker://mskilab/dryclean:0.0.3': + 'mskilab/dryclean:0.0.3' }" input: tuple val(meta), path(input) path(pon) - val(centered) + val(center) val(cbs) val(cnsignif) val(wholeGenome) @@ -18,12 +18,11 @@ process DRYCLEAN { val(blacklist_path) val(germline_filter) val(germline_file) - val(human) val(field) val(build) output: - tuple val(meta), path("*cov.rds") , emit: decomposed_cov, optional: true + tuple val(meta), path("*cov.rds") , emit: decomposed_cov //tuple val(meta), path("*.dryclean.object.rds") , emit: dryclean_object, optional: true path "versions.yml" , emit: versions @@ -33,7 +32,7 @@ process DRYCLEAN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '0.0.2' + def VERSION = '0.0.3' """ #!/bin/bash set -o allexport @@ -70,7 +69,7 @@ process DRYCLEAN { CMD="Rscript \$drycln \\ --input ${input} \\ --pon ${pon} \\ - --centered ${centered} \\ + --center ${center} \\ --cbs ${cbs} \\ --cnsignif ${cnsignif} \\ --cores ${task.cpus} \\ @@ -79,7 +78,6 @@ process DRYCLEAN { --blacklist_path ${blacklist_path} \\ --germline.filter ${germline_filter} \\ --germline.file ${germline_file} \\ - --human ${human} \\ --field ${field} \\ --build ${build} \\ " diff --git a/modules/local/fragcounter/main.nf b/modules/local/fragcounter/main.nf index 8368097..3508936 100644 --- a/modules/local/fragcounter/main.nf +++ b/modules/local/fragcounter/main.nf @@ -5,8 +5,8 @@ process FRAGCOUNTER { // TODO add fragcounter container container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://mskilab/fragcounter:latest': - 'mskilab/fragcounter:latest' }" + 'docker://mskilab/fragcounter:0.1': + 'mskilab/fragcounter:0.1' }" input: tuple val(meta), path(bam), path(bai) // Mandatory: Format should be [meta, bam, bai] : can also provide cram & crai @@ -21,6 +21,7 @@ process FRAGCOUNTER { output: + tuple val(meta), path("*cov.raw.rds") , emit: fragcounter_raw_cov, optional: true tuple val(meta), path("*cov.rds") , emit: fragcounter_cov, optional: true tuple val(meta), path("*cov.corrected.bw") , emit: corrected_bw, optional: true path "versions.yml" , emit: versions @@ -72,4 +73,54 @@ process FRAGCOUNTER { END_VERSIONS """ -} \ No newline at end of file +} + +process REBIN_RAW_FRAGCOUNTER { + + tag "$meta.id" + label 'process_low' + + // TODO add fragcounter container + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://mskilab/fragcounter:0.1': + 'mskilab/fragcounter:0.1' }" + + input: + tuple val(meta), path(cov_raw) + val(field) + val(windowsize) + + output: + tuple val(meta), path("1kb_*"), emit: raw_fragcounter_cov_1kb, optional:true + + script: + + """ + #!/usr/bin/env Rscript + + library(skitools) + + filename = "${cov_raw}" + outputfn = "1kb_${cov_raw.name}" + + raw_cov = readRDS(filename) + collapse.cov <- function(cov.gr, bin.size = 1e3, field = "reads.corrected") { + BINSIZE.ROUGH = bin.size + cov.gr = cov.gr[, field] + cov.gr = gr2dt(cov.gr) + setnames(cov.gr, field, "signal") + cov.gr = cov.gr[!is.infinite(signal), .(signal = median(signal, na.rm = TRUE)), + by = .(seqnames, start = floor(start/BINSIZE.ROUGH)*BINSIZE.ROUGH+1)] + cov.gr[, end := (start + BINSIZE.ROUGH) - 1] + setnames(cov.gr, "signal", field) + cov.gr = dt2gr(cov.gr) + return(cov.gr) + } + rebinned_cov = collapse.cov(raw_cov, bin.size = ${windowsize}, field = "${field}") + ##rebinned_cov = rebinned_cov %Q% (!seqnames=="Y") + rebinned_cov = rebinned_cov %Q% (seqnames %in% c(seq(1:22),"X")) + saveRDS(rebinned_cov, outputfn) + + """ + +} diff --git a/modules/local/gridss/gridss/main.nf b/modules/local/gridss/gridss/main.nf index 1152c27..f653700 100644 --- a/modules/local/gridss/gridss/main.nf +++ b/modules/local/gridss/gridss/main.nf @@ -46,8 +46,8 @@ process GRIDSS_GRIDSS { $assembly_bam \\ $blacklist \\ --picardoptions VALIDATION_STRINGENCY=LENIENT \\ - --jvmheap 31g \\ - --otherjvmheap 31g \\ + --jvmheap 2g \\ + --otherjvmheap 1g \\ ${normalbam} \\ ${tumorbam} diff --git a/modules/local/hetpileups/main.nf b/modules/local/hetpileups/main.nf index f5ae473..f23ce5a 100644 --- a/modules/local/hetpileups/main.nf +++ b/modules/local/hetpileups/main.nf @@ -14,7 +14,7 @@ process HETPILEUPS { path(hapmap_sites) output: - tuple val(meta), path("*sites.txt") , emit: het_pileups_wgs + tuple val(meta), path("*sites.txt") , emit: het_pileups_wgs, optional: true path "versions.yml" , emit: versions when: diff --git a/modules/local/jabba/main.nf b/modules/local/jabba/main.nf index 4757bbd..b274c98 100644 --- a/modules/local/jabba/main.nf +++ b/modules/local/jabba/main.nf @@ -170,3 +170,48 @@ process JABBA { END_VERSIONS """ } + +process COERCE_SEQNAMES { + + tag "$meta.id" + label 'process_low' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://mskilab/jabba:latest': + 'mskilab/jabba:latest' }" + + input: + tuple val(meta), path(file) + + output: + tuple val(meta), path("coerced_chr*"), emit: file, optional: true + + script: + """ + #!/usr/bin/env Rscript + + fn <- "${file}" + outputfn <- "coerced_chr_${file.name}" + + if(grepl('.rds', "${file.name}")){ + library(GenomicRanges) + data <- readRDS(fn) + seqlevels(data, pruning.mode = "coarse") <- gsub("chr","",seqlevels(data)) + saveRDS(data, file = outputfn) + } else if (grepl('.vcf|.vcf.gz|.vcf.bgz', "${file.name}")) { + library(VariantAnnotation) + data <- readVcf(fn) + ##seqlevelsStyle(data) <- 'NCBI' + seqlevels(data) <- sub("^chr", "", seqlevels(data)) + header = header(data) + rownames(header@header\$contig) = sub("^chr", "", rownames(header@header\$contig)) + header(data) <- header + data@fixed\$ALT <- lapply(data@fixed\$ALT, function(x) gsub("chr", "", x)) + writeVcf(data, file = outputfn) + } else { + data <- read.table(fn, header=T) + data[[1]] <- gsub("chr","",data[[1]]) + write.table(data, file = outputfn, sep = "\\t", row.names = F, quote = F) + } + """ +} diff --git a/modules/local/svaba/main.nf b/modules/local/svaba/main.nf index 37d88b3..3ab68a7 100644 --- a/modules/local/svaba/main.nf +++ b/modules/local/svaba/main.nf @@ -86,4 +86,4 @@ process SVABA { END_VERSIONS """ -} \ No newline at end of file +} diff --git a/nextflow.config b/nextflow.config index 6cdd233..60405fa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,7 +16,7 @@ params { step = 'alignment' // References - genome = 'GATK.GRCh38' + genome = 'GATK.GRCh37' igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false mski_base = 's3://mskilab-pipeline' @@ -61,12 +61,12 @@ params { // fragCounter options midpoint_frag = "TRUE" // If TRUE only count midpoint if FALSE then count bin footprint of every fragment interval: Default=TRUE windowsize_frag = 200 // Window / bin size : Default=200 (but dryclean uses 1000 binsize) - minmapq_frag = 1 // Minimal map quality : Default = 1 + minmapq_frag = 60 // Minimal map quality : Default = 1 paired_frag = "TRUE" // Is the dataset paired : Default = TRUE exome_frag = "FALSE" // Use exons as bins instead of fixed window : Default = FALSE // Dryclean options - centered_dryclean = "TRUE" + center_dryclean = "TRUE" cbs_dryclean = "FALSE" cnsignif_dryclean = 0.00001 wholeGenome_dryclean = "TRUE" @@ -74,8 +74,8 @@ params { blacklist_path_dryclean = "NA" germline_filter_dryclean = "FALSE" germline_file_dryclean = "NA" - human_dryclean = "TRUE" - field_dryclean = "reads.corrected" + //human_dryclean = "TRUE" + field_dryclean = "reads" //build_dryclean = "hg19" // This should go inside igenomes.config // ASCAT options diff --git a/nextflow_schema.json b/nextflow_schema.json index b1744b0..24f897c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -362,7 +362,7 @@ "description": "Dryclean PON", "help_text": "Provide the PON .rds file based on genome version" }, - "centered_dryclean": { + "center_dryclean": { "type": "string", "fa_icon": "fas fa-forward", "description": "Option to specify whether the samples are centered", @@ -422,20 +422,12 @@ "help_text": "Path to file annotated with germline calls, if germline.filter == TRUE", "default": "NA" }, - "human_dryclean": { - "type": "string", - "fa_icon": "fas fa-forward", - "description": "Specify if the samples under consideration are human", - "hidden": true, - "help_text": "Specify if the samples under consideration are human", - "default": "TRUE" - }, "field_dryclean": { "type": "string", "fa_icon": "fas fa-forward", "description": "Field name in GRanges metadata to use for drycleaning", "help_text": "Specify Field name in GRanges metadata to use for drycleaning", - "default": "reads.corrected" + "default": "reads" }, "build_dryclean": { "type": "string", @@ -573,7 +565,8 @@ "type": "string", "default": "TRUE", "hidden": true, - "fa_icon": "fas fa-forward" + "fa_icon": "fas fa-forward", + "description": "whether to rescue all lower confidence junctions within the window, or just the best one." }, "nudgebalanced_jabba": { "type": "string", @@ -666,7 +659,8 @@ "fix_thres_jabba": { "type": "integer", "default": -1, - "fa_icon": "fas fa-wrench" + "fa_icon": "fas fa-wrench", + "description": "threshold for calling convergence" }, "lp_jabba": { "type": "string", @@ -698,7 +692,8 @@ "nonintegral_jabba": { "type": "string", "default": "FALSE", - "fa_icon": "fas fa-forward" + "fa_icon": "fas fa-forward", + "description": "whether to allow non-integral solutions" }, "verbose_jabba": { "type": "string", diff --git a/nf-test.config b/nf-test.config index 870799d..a223f6d 100644 --- a/nf-test.config +++ b/nf-test.config @@ -1,8 +1,9 @@ config { - testsDir "tests" workDir ".nf-test" configFile "tests/nextflow.config" - profile "" - + profile "docker" + stage { + symlink "tests/test_data/" + } } diff --git a/spec.md b/spec.md new file mode 100644 index 0000000..d42e71f --- /dev/null +++ b/spec.md @@ -0,0 +1,107 @@ +# Refactor +- Cut down on the number of default parameters in the config (possibly at the + package level). Packages/processes/workflows shouldn't have so many + parameters (see: JaBbA as the worst offendor), it indicates either + overparameterization or a dependency tree in the parameter space. +- Add *_create_csv methods for remaining tools to generate samplesheets to + start from those tools +- Move parameter specification from nextflow.config to module specific config, + then import them into nextflow config. This keeps the process and its parameter + configuration tightly coupled--but loses a central interface for modifying + all the defaults in one place, which is useful if changing one default would + affect defaults in a different parameter. +- Swtich from the "step" based conditionals to something more declarative (e.g + cases). Having the program run a block of code by checking if the starting + step is included in a list of steps is enormously cumbersome, cases should + make things more parsimoinous and readable. +- Use functions for repetitive declarations/imports and hold their required + variables in arrays/maps. +- Clean up output channel attribute extraction. Currently very repetitive, + could be replaced with functions. + + +## Code Samples +```groovy +// Define the pipeline steps in order +def pipelineSteps = ['alignment', 'sv_calling', 'coverage', 'segmentation', 'ploidy_calling', 'junction_balance'] + +// User input for the starting step +def startingStep = 'coverage' // This would be provided by the user + +// Find the index of the starting step in the pipeline +int startIndex = pipelineSteps.indexOf(startingStep) + +// Validate if the starting step is valid +if (startIndex == -1) { + error "Invalid starting step: $startingStep" +} + +// Execute the pipeline steps starting from the starting step +pipelineSteps.drop(startIndex).each { step -> + switch (step) { + case 'alignment': + // run alignment step + println "Running alignment" + break + case 'sv_calling': + // run sv_calling step + println "Running sv_calling" + break + case 'coverage': + // run coverage step + println "Running coverage" + break + case 'segmentation': + // run segmentation step + println "Running segmentation" + break + case 'ploidy_calling': + // run ploidy_calling step + println "Running ploidy_calling" + break + case 'junction_balance': + // run junction_balance step + println "Running junction_balance" + break + } +} +``` + +```groovy +// Define a closure to include a subworkflow from a local path +def includeLocalSubworkflow(String subworkflowName, String alias = null) { + alias = alias ?: subworkflowName.toUpperCase().replaceAll(/[^A-Z0-9_]/, '_') + return "include { $alias } from '../subworkflows/local/$subworkflowName/main'" +} + +// Define a list of subworkflows to include +def subworkflows = [ + 'channel_align_create_csv', + 'channel_markduplicates_create_csv', + 'channel_baserecalibrator_create_csv', + //... +] + +// Include subworkflows using the closure +subworkflows.each { subworkflow -> + println includeLocalSubworkflow(subworkflow) +} + +// For modules that are included multiple times with different aliases, use a map +def samtoolsConvertAliases = [ + 'BAM_TO_CRAM': 'bam_convert_samtools', + 'BAM_TO_CRAM_MAPPING': 'bam_convert_samtools', + // ... +] + +samtoolsConvertAliases.each { alias, subworkflow -> + println includeLocalSubworkflow(subworkflow, alias) +} + +// For modules that are included with the same alias but different parameters, include them once +// Then use with different parameters in the workflow logic +println "include { SAMTOOLS_CONVERT } from '../modules/nf-core/samtools/convert/main'" + + + + diff --git a/subworkflows/local/bam_fragCounter/main.nf b/subworkflows/local/bam_fragCounter/main.nf index e9d0404..8f5a4d5 100644 --- a/subworkflows/local/bam_fragCounter/main.nf +++ b/subworkflows/local/bam_fragCounter/main.nf @@ -3,6 +3,7 @@ // include { FRAGCOUNTER } from '../../../modules/local/fragcounter/main.nf' +include { REBIN_RAW_FRAGCOUNTER } from '../../../modules/local/fragcounter/main.nf' workflow BAM_FRAGCOUNTER { // defining inputs @@ -12,28 +13,35 @@ workflow BAM_FRAGCOUNTER { windowsize gcmapdir minmapq - fasta // Required: if using cram files instead of bam. In our case we are using cram files. - fasta_fai paired exome //Creating empty channels for output main: versions = Channel.empty() + fragcounter_raw_cov = Channel.empty() fragcounter_cov = Channel.empty() corrected_bw = Channel.empty() + rebinned_raw_cov = Channel.empty() - FRAGCOUNTER(input, midpoint, windowsize, gcmapdir, minmapq, fasta, fasta_fai, paired, exome) // We are keeping cov empty because we don't use any input coverage for fragcounter + FRAGCOUNTER(input, midpoint, windowsize, gcmapdir, minmapq, [], [], paired, exome) // We are keeping cov empty because we don't use any input coverage for fragcounter //FRAGCOUNTER(input, midpoint, windowsize, gcmapdir, minmapq, paired, exome) // initializing outputs from fragcounter + fragcounter_raw_cov = FRAGCOUNTER.out.fragcounter_raw_cov fragcounter_cov = FRAGCOUNTER.out.fragcounter_cov versions = FRAGCOUNTER.out.versions corrected_bw = FRAGCOUNTER.out.corrected_bw + REBIN_RAW_FRAGCOUNTER(fragcounter_cov, "reads", 1000) + + rebinned_raw_cov = REBIN_RAW_FRAGCOUNTER.out.raw_fragcounter_cov_1kb + // emit: + fragcounter_raw_cov fragcounter_cov + rebinned_raw_cov corrected_bw versions diff --git a/subworkflows/local/cov_dryclean/main.nf b/subworkflows/local/cov_dryclean/main.nf index 3b0c95b..68ba21f 100644 --- a/subworkflows/local/cov_dryclean/main.nf +++ b/subworkflows/local/cov_dryclean/main.nf @@ -9,7 +9,7 @@ workflow COV_DRYCLEAN { take: input_dryclean // channel: [mandatory] [ meta, cov(.rds file) ] pon_dryclean - centered_dryclean + center_dryclean cbs_dryclean cnsignif_dryclean wholeGenome_dryclean @@ -17,7 +17,6 @@ workflow COV_DRYCLEAN { blacklist_path_dryclean germline_filter_dryclean germline_file_dryclean - human_dryclean field_dryclean build_dryclean @@ -26,10 +25,10 @@ workflow COV_DRYCLEAN { dryclean_cov = Channel.empty() //dryclean_obj = Channel.empty() - DRYCLEAN(input_dryclean, pon_dryclean, centered_dryclean, cbs_dryclean, + DRYCLEAN(input_dryclean, pon_dryclean, center_dryclean, cbs_dryclean, cnsignif_dryclean, wholeGenome_dryclean, blacklist_dryclean, blacklist_path_dryclean, germline_filter_dryclean, germline_file_dryclean, - human_dryclean, field_dryclean, build_dryclean) + field_dryclean, build_dryclean) dryclean_cov = DRYCLEAN.out.decomposed_cov //dryclean_obj = DRYCLEAN.out.dryclean_object diff --git a/subworkflows/local/jabba/main.nf b/subworkflows/local/jabba/main.nf index 42b49ba..11b5f85 100644 --- a/subworkflows/local/jabba/main.nf +++ b/subworkflows/local/jabba/main.nf @@ -3,6 +3,10 @@ // include { JABBA } from '../../../modules/local/jabba/main.nf' +include { COERCE_SEQNAMES as COERCE_SEQNAMES_COV } from '../../../modules/local/jabba/main.nf' +include { COERCE_SEQNAMES as COERCE_SEQNAMES_SOM_SV } from '../../../modules/local/jabba/main.nf' +include { COERCE_SEQNAMES as COERCE_SEQNAMES_UNFIL_SOM_SV } from '../../../modules/local/jabba/main.nf' +include { COERCE_SEQNAMES as COERCE_SEQNAMES_HETS } from '../../../modules/local/jabba/main.nf' workflow COV_JUNC_JABBA { @@ -52,13 +56,55 @@ workflow COV_JUNC_JABBA { jabba_seg = Channel.empty() karyograph = Channel.empty() - JABBA(cov_rds_jabba, junction_jabba, ploidy_jabba, het_pileups_wgs_jabba, - cbs_seg_rds_jabba, cbs_nseg_rds_jabba, j_supp_jabba, blacklist_junctions_jabba, + // Add channels for the outputs of COERCE_SEQNAMES + chr_coerced_cov_rds_jabba = Channel.empty() + chr_coerced_junction_jabba = Channel.empty() + chr_coerced_j_supp_jabba = Channel.empty() + chr_coerced_het_pileups_wgs_jabba = Channel.empty() + + + // Run COERCE_SEQNAMES to force inputs to be in common + COERCE_SEQNAMES_COV(cov_rds_jabba) + chr_coerced_cov_rds_jabba = COERCE_SEQNAMES_COV.out.file + chr_coerced_cov_rds_jabba_to_cross = chr_coerced_cov_rds_jabba.map { tuple -> + def (meta, cov) = tuple + [meta.patient, meta, cov] } + COERCE_SEQNAMES_SOM_SV(junction_jabba) + chr_coerced_junction_jabba = COERCE_SEQNAMES_SOM_SV.out.file + chr_coerced_junction_jabba_to_cross = chr_coerced_junction_jabba.map { tuple -> + def (meta, vcf) = tuple + [meta.patient, meta, vcf] } + + COERCE_SEQNAMES_UNFIL_SOM_SV(j_supp_jabba) + chr_coerced_j_supp_jabba = COERCE_SEQNAMES_UNFIL_SOM_SV.out.file + chr_coerced_j_supp_jabba_to_cross = chr_coerced_j_supp_jabba.map { tuple -> + def (meta, vcf2) = tuple + [meta.patient, meta, vcf2] } + + COERCE_SEQNAMES_HETS(het_pileups_wgs_jabba) + chr_coerced_het_pileups_wgs_jabba = COERCE_SEQNAMES_HETS.out.file + chr_coerced_het_pileups_wgs_jabba_to_cross = chr_coerced_het_pileups_wgs_jabba.map { tuple -> + def (meta, hets) = tuple + [meta.patient, meta, hets] } + + input_jab = chr_coerced_cov_rds_jabba_to_cross.join(chr_coerced_het_pileups_wgs_jabba_to_cross) + .join(chr_coerced_junction_jabba_to_cross) + .join(chr_coerced_j_supp_jabba_to_cross) + .map{ tuples -> + [tuples[1]] + [tuples[2]] + [tuples[4]] + [tuples[6]] + [tuples[8]] + } + input_coerced_cov = input_jab.map{ meta, cov, hets, vcf, vcf2 -> [ meta, cov ] } //chr stripped cov + input_coerced_hets = input_jab.map{ meta, cov, hets, vcf, vcf2 -> [ meta, hets ] } //chr stripped hetpileups + input_coerced_vcf = input_jab.map{ meta, cov, hets, vcf, vcf2 -> [ meta, vcf ] } //chr stripped somatic sv + input_coerced_vcf2 = input_jab.map{ meta, cov, hets, vcf, vcf2 -> [ meta, vcf2 ] } //chr stripped unfiltered somatic sv + + JABBA(input_coerced_cov, input_coerced_vcf, ploidy_jabba, input_coerced_hets, + cbs_seg_rds_jabba, cbs_nseg_rds_jabba, input_coerced_vcf2, blacklist_junctions_jabba, geno_jabba, indel_jabba, tfield_jabba, iter_jabba, rescue_window_jabba, rescue_all_jabba, nudgebalanced_jabba, edgenudge_jabba, strict_jabba, allin_jabba, field_jabba, maxna_jabba, blacklist_coverage_jabba, purity_jabba, pp_method_jabba, cnsignif_jabba, - slack_jabba, linear_jabba, tilim_jabba, epgap_jabba, fix_thres_jabba, lp_jabba, + slack_jabba, linear_jabba, tilim_jabba, epgap_jabba, fix_thres_jabba, lp_jabba, ism_jabba, filter_loose_jabba, gurobi_jabba, verbose_jabba) jabba_rds = JABBA.out.jabba_rds @@ -82,4 +128,3 @@ workflow COV_JUNC_JABBA { versions } - diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 0000000..bfc57c2 --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,22 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + test("Should run entire nf-jabba pipeline without failure") { + + when { + params { + outdir = "results" + input = "$projectDir/tests/test_data/samplesheet.csv" + step = "sv_calling" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/modules/local/ascat/main.ascat_seg.nf.test b/tests/modules/local/ascat/main.ascat_seg.nf.test index 2b2035e..8f4704a 100644 --- a/tests/modules/local/ascat/main.ascat_seg.nf.test +++ b/tests/modules/local/ascat/main.ascat_seg.nf.test @@ -9,19 +9,30 @@ nextflow_process { when { params { // define parameters here. Example: - // outdir = "tests/results" + outdir = "results" } process { """ // define inputs of the process here. Example: // input[0] = file("test-file.txt") + + meta = [ id: "test" ] + input[0] = tuple(meta, "$projectDir/tests/test_data/hets.txt") + input[1] = tuple(meta, "$projectDir/tests/test_data/tumor_dryclean_cov.rds") + // Defaults from nextflow.config + input[2] = params.field_ascat + input[3] = params.hets_thresh_ascat + input[4] = params.penalty_ascat + input[5] = params.gc_correct_ascat + input[6] = params.rebin_width_ascat + input[7] = params.from_maf_ascat """ } } then { assert process.success - assert snapshot(process.out).match() + //assert snapshot(process.out).match() } } diff --git a/tests/modules/local/cbs/main.nf.test b/tests/modules/local/cbs/main.nf.test index 03face7..84c5fae 100644 --- a/tests/modules/local/cbs/main.nf.test +++ b/tests/modules/local/cbs/main.nf.test @@ -9,19 +9,26 @@ nextflow_process { when { params { // define parameters here. Example: - // outdir = "tests/results" + outdir = "results" } process { """ // define inputs of the process here. Example: // input[0] = file("test-file.txt") + + meta = [ id: "test" ] + input[0] = tuple(meta, "$projectDir/tests/test_data/drycleaned_jts_1501_chr21_tumor_cov.rds", "$projectDir/tests/test_data/drycleaned_jts_1501_chr21_normal_cov.rds") + // Defaults from nextflow.config + input[1] = params.cnsignif_cbs + input[2] = params.field_cbs + input[3] = params.name_cbs """ } } then { assert process.success - assert snapshot(process.out).match() + //assert snapshot(process.out).match() } } diff --git a/tests/modules/local/dryclean/main.nf.test b/tests/modules/local/dryclean/main.nf.test index 78dbd04..0bc76b7 100644 --- a/tests/modules/local/dryclean/main.nf.test +++ b/tests/modules/local/dryclean/main.nf.test @@ -4,26 +4,40 @@ nextflow_process { script "modules/local/dryclean/main.nf" process "DRYCLEAN" - test("Should run without failures") { + test("Should run Dryclean without failures") { when { params { // define parameters here. Example: - // outdir = "tests/results" + outdir = "results" } process { """ // define inputs of the process here. Example: // input[0] = file("test-file.txt") + meta = [ id: "test" ] + input[0] = tuple(meta, "$projectDir/tests/test_data/samp1.rds") + // Defaults from nextflow.config + input[1] = params.genomes['GATK.GRCh37'].pon_dryclean + input[2] = params.centered_dryclean + input[3] = params.cbs_dryclean + input[4] = params.cnsignif_dryclean + input[5] = params.wholeGenome_dryclean + input[6] = params.blacklist_dryclean + input[7] = params.blacklist_path_dryclean + input[8] = params.germline_filter_dryclean + input[9] = params.germline_file_dryclean + input[10] = params.human_dryclean + input[11] = params.field_dryclean + input[12] = params.genomes['GATK.GRCh37'].build_dryclean """ } } then { assert process.success - assert snapshot(process.out).match() + //assert snapshot(process.out).match() } } - } diff --git a/tests/modules/local/fragcounter/main.nf.test b/tests/modules/local/fragcounter/main.nf.test index 24e3745..d94d8b2 100644 --- a/tests/modules/local/fragcounter/main.nf.test +++ b/tests/modules/local/fragcounter/main.nf.test @@ -4,24 +4,68 @@ nextflow_process { script "modules/local/fragcounter/main.nf" process "FRAGCOUNTER" - test("Should run without failures") { + test("Should run Fragcounter on Tumor without failures") { when { params { // define parameters here. Example: - // outdir = "tests/results" + outdir = "results" } process { """ // define inputs of the process here. Example: // input[0] = file("test-file.txt") + meta = [ id: "test" ] + input[0] = tuple(meta, "$projectDir/tests/test_data/chr21.bam", "$projectDir/tests/test_data/chr21.bai") + // Defaults from nextflow.config + input[1] = params.midpoint_frag + input[2] = params.windowsize_frag + input[3] = params.genomes['GATK.GRCh37'].gcmapdir_frag + input[4] = params.minmapq_frag + input[5] = params.genomes['GATK.GRCh37'].fasta + input[6] = params.genomes['GATK.GRCh37'].fasta_fai + input[7] = params.paired_frag + input[8] = params.exome_frag """ } } then { assert process.success - assert snapshot(process.out).match() + //assert snapshot(process.out).match() + } + + } + + test("Should run without Fragcounter on Normal without failures") { + + when { + params { + // define parameters here. Example: + outdir = "results" + } + process { + """ + // define inputs of the process here. Example: + // input[0] = file("test-file.txt") + meta = [ id: "test" ] + input[0] = tuple(meta, "$projectDir/tests/test_data/chr21.bam", "$projectDir/tests/test_data/chr21.bai") + // Defaults from nextflow.config + input[1] = params.midpoint_frag + input[2] = params.windowsize_frag + input[3] = params.genomes['GATK.GRCh37'].gcmapdir_frag + input[4] = params.minmapq_frag + input[5] = params.genomes['GATK.GRCh37'].fasta + input[6] = params.genomes['GATK.GRCh37'].fasta_fai + input[7] = params.paired_frag + input[8] = params.exome_frag + """ + } + } + + then { + assert process.success + //assert snapshot(process.out).match() } } diff --git a/tests/modules/local/gridss/gridss/main.nf.test b/tests/modules/local/gridss/gridss/main.nf.test index f299be1..68fbe20 100644 --- a/tests/modules/local/gridss/gridss/main.nf.test +++ b/tests/modules/local/gridss/gridss/main.nf.test @@ -4,24 +4,32 @@ nextflow_process { script "modules/local/gridss/gridss/main.nf" process "GRIDSS_GRIDSS" - test("Should run without failures") { + test("Should run GRIDSS without failures") { when { params { // define parameters here. Example: - // outdir = "tests/results" + outdir = "results" } process { """ // define inputs of the process here. Example: // input[0] = file("test-file.txt") + meta = [ id: "test" ] + input[0] = tuple(meta, "$projectDir/tests/test_data/1234N.bam", "$projectDir/tests/test_data/1234N.bai", "$projectDir/tests/test_data/9876T.bam", "$projectDir/tests/test_data/9876T.bai") + // Defaults from nextflow.config + + input[1] = params.genomes['GATK.GRCh37'].fasta + input[2] = params.genomes['GATK.GRCh37'].fasta_fai + input[3] = params.genomes['GATK.GRCh37'].bwa + input[4] = params.genomes['GATK.GRCh37'].blacklist_gridss """ } } then { assert process.success - assert snapshot(process.out).match() + //assert snapshot(process.out).match() } } diff --git a/tests/modules/local/gridss/somaticFilter/main.nf.test b/tests/modules/local/gridss/somaticFilter/main.nf.test index 5b8f31d..1f07faa 100644 --- a/tests/modules/local/gridss/somaticFilter/main.nf.test +++ b/tests/modules/local/gridss/somaticFilter/main.nf.test @@ -9,19 +9,25 @@ nextflow_process { when { params { // define parameters here. Example: - // outdir = "tests/results" + outdir = "results" } process { """ // define inputs of the process here. Example: // input[0] = file("test-file.txt") + + meta = [ id: "test" ] + input[0] = tuple(meta, "$projectDir/tests/test_data/gridss_output.vcf) + // Defaults from nextflow.config + + input[1] = params.genomes['GATK.GRCh37'].pon_gridss """ } } then { assert process.success - assert snapshot(process.out).match() + //assert snapshot(process.out).match() } } diff --git a/tests/modules/local/hetpileups/main.nf.test b/tests/modules/local/hetpileups/main.nf.test index db7125e..a453051 100644 --- a/tests/modules/local/hetpileups/main.nf.test +++ b/tests/modules/local/hetpileups/main.nf.test @@ -9,19 +9,26 @@ nextflow_process { when { params { // define parameters here. Example: - // outdir = "tests/results" + outdir = "results" } process { """ // define inputs of the process here. Example: // input[0] = file("test-file.txt") + + meta = [ id: "test" ] + input[0] = tuple(meta, "$projectDir/tests/test_data/9876T.bam", "$projectDir/tests/test_data/9876T.bai", "$projectDir/tests/test_data/1234N.bam", "$projectDir/tests/test_data/1234N.bai") + // Defaults from nextflow.config + input[1] = params.filter_hets + input[2] = params.max_depth + input[3] = params.genomes['GATK.GRCh37'].hapmap_sites """ } } then { assert process.success - assert snapshot(process.out).match() + //assert snapshot(process.out).match() } } diff --git a/tests/modules/local/jabba/main.nf.test b/tests/modules/local/jabba/main.nf.test new file mode 100644 index 0000000..fa9e597 --- /dev/null +++ b/tests/modules/local/jabba/main.nf.test @@ -0,0 +1,68 @@ +nextflow_process { + + name "Test Process JABBA" + script "modules/local/jabba/main.nf" + process "JABBA" + + test("Should run JaBbA without failures") { + + when { + params { + // define parameters here. Example: + outdir = "results" + } + process { + """ + // define inputs of the process here. Example: + // input[0] = file("test-file.txt") + + meta = [ id: "test" ] + ploidy = 4.5 + input[0] = tuple(meta, "$projectDir/tests/test_data/coverage_jabba.txt") + input[1] = tuple(meta, "$projectDir/tests/test_data/junctions_jabba.rds") + input[2] = tuple(meta, ploidy) + input[3] = tuple(meta, "$projectDir/tests/test_data/hets_jabba.txt") + input[4] = tuple(meta, "$projectDir/tests/test_data/segs_jabba.rds") + input[5] = tuple(meta, "$projectDir/tests/test_data/nsegs_jabba.rds") + input[6] = tuple(meta, "$projectDir/tests/test_data/unfiltered_som.vcf") + // Defaults from nextflow.config + input[7] = params.blacklist_junctions_jabba // this is declared as val to allow for "NULL" default value, but is treated like a path + input[8] = params.geno_jabba + input[9] = params.indel_jabba + input[10] = params.tfield_jabba + input[11] = params.iter_jabba + input[12] = params.rescue_window_jabba + input[13] = params.rescue_all_jabba + input[14] = params.nudgebalanced_jabba + input[15] = params.edgenudge_jabba + input[16] = params.strict_jabba + input[17] = params.allin_jabba + input[18] = params.field_jabba + input[19] = params.maxna_jabba + input[20] = params.genomes['GATK.GRCh37'].blacklist_coverage_jabba + input[21] = params.purity_jabba + input[22] = params.pp_method_jabba + input[23] = params.cnsignif_jabba + input[24] = params.slack_jabba + input[25] = params.linear_jabba + input[26] = params.tilim_jabba + input[27] = params.epgap_jabba + input[28] = params.fix_thres_jabba + input[29] = params.lp_jabba + input[30] = params.ism_jabba + input[31] = params.filter_loose_jabba + input[32] = params.gurobi_jabba + input[33] = params.verbose_jabba + """ + } + } + + then { + assert process.success + //assert snapshot(process.out).match() + } + + } + +} + diff --git a/tests/modules/local/svaba/main.nf.test b/tests/modules/local/svaba/main.nf.test index 45e58b2..519780b 100644 --- a/tests/modules/local/svaba/main.nf.test +++ b/tests/modules/local/svaba/main.nf.test @@ -4,24 +4,39 @@ nextflow_process { script "modules/local/svaba/main.nf" process "SVABA" - test("Should run without failures") { + + test("Should run Svaba without failures") { when { params { // define parameters here. Example: - // outdir = "tests/results" + outdir = "results" + } process { """ // define inputs of the process here. Example: // input[0] = file("test-file.txt") + meta = [ id: "test" ] + input[0] = tuple(meta, "$projectDir/tests/test_data/9876T.bam", "$projectDir/tests/test_data/9876T.bai", "$projectDir/tests/test_data/1234N.bam", "$projectDir/tests/test_data/1234N.bai") + // Defaults from nextflow.config + + input[1] = params.genomes['GATK.GRCh37'].fasta + input[2] = params.genomes['GATK.GRCh37'].fasta_fai + input[3] = params.genomes['GATK.GRCh37'].bwa + input[4] = params.genomes['GATK.GRCh37'].dbsnp + input[5] = params.genomes['GATK.GRCh37'].dbsnp_tbi + input[6] = params.genomes['GATK.GRCh37'].indel_mask + input[7] = params.genomes['GATK.GRCh37'].germ_sv_db + input[8] = params.genomes['GATK.GRCh37'].simple_seq_db + input[9] = params.error_rate """ } } then { assert process.success - assert snapshot(process.out).match() + //assert snapshot(process.out).match() } } diff --git a/tests/nextflow.config b/tests/nextflow.config index c19b1ad..1146d5b 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -3,3 +3,767 @@ Nextflow config file for running tests ======================================================================================== */ + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mskilab-org/nf-jabba Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ + +// Global default params, used in configs +params { + + + // TODO nf-core: Specify your pipeline's command line flags + // Input options (Mandatory!) + input = null + step = 'alignment' + + // References + genome = 'GATK.GRCh38' + igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_ignore = false + mski_base = 's3://mskilab-pipeline' + save_reference = false + build_only_index = false // Only build the reference indexes + download_cache = false // Do not download annotation cache + + // Options to consider + // Main options + no_intervals = false // Intervals will be built from the fasta file + nucleotides_per_second = 200000 // Default interval size + tools = null // No default Variant_Calling or Annotation tools + skip_tools = null // All tools (markduplicates + baserecalibrator + QC) are used by default + split_fastq = 50000000 // FASTQ files will not be split by default by FASTP, sarek = 50000000 + + // Modify FASTQ files (trim/split) with FASTP + trim_fastq = false // No trimming by default + clip_r1 = 0 + clip_r2 = 0 + three_prime_clip_r1 = 0 + three_prime_clip_r2 = 0 + trim_nextseq = 0 + save_trimmed = false + save_split_fastqs = false + + // Alignment + aligner = 'bwa-mem' // Default is bwa-mem, bwa-mem2 and dragmap can be used too + save_mapped = true // Mapped BAMs are saved + save_output_as_bam = true // Output files from alignment are saved as bam by default and not as cram files + seq_center = null // No sequencing center to be written in read group CN field by aligner + seq_platform = null // Default platform written in read group PL field by aligner, null by default. + + // Structural Variant Calling + error_rate = 0.01 // Default error_rate for Svaba + + //indel_mask = null // Must provide blacklist bed file for indels based on genome to run Svaba + + // HetPileups options + filter_hets = "TRUE" + max_depth = 1000 + + // fragCounter options + midpoint_frag = "TRUE" // If TRUE only count midpoint if FALSE then count bin footprint of every fragment interval: Default=TRUE + windowsize_frag = 200 // Window / bin size : Default=200 (but dryclean uses 1000 binsize) + minmapq_frag = 1 // Minimal map quality : Default = 1 + paired_frag = "TRUE" // Is the dataset paired : Default = TRUE + exome_frag = "FALSE" // Use exons as bins instead of fixed window : Default = FALSE + + // Dryclean options + centered_dryclean = "TRUE" + cbs_dryclean = "FALSE" + cnsignif_dryclean = 0.00001 + wholeGenome_dryclean = "TRUE" + blacklist_dryclean = "FALSE" + blacklist_path_dryclean = "NA" + germline_filter_dryclean = "FALSE" + germline_file_dryclean = "NA" + human_dryclean = "TRUE" + field_dryclean = "reads.corrected" + + // ASCAT options + field_ascat = "foreground" + hets_thresh_ascat = 0.2 + penalty_ascat = 70 + gc_correct_ascat = "TRUE" + rebin_width_ascat = 50000 + from_maf_ascat = "FALSE" + + // CBS options + cnsignif_cbs = 0.01 + field_cbs = "foreground" + name_cbs = "tumor" + + // JaBbA options + blacklist_junctions_jabba = "NULL" + geno_jabba = "FALSE" + indel_jabba = "exclude" + tfield_jabba = "tier" + iter_jabba = 2 + rescue_window_jabba = 10000 + rescue_all_jabba = "TRUE" + nudgebalanced_jabba = "TRUE" + edgenudge_jabba = 0.1 + strict_jabba = "FALSE" + allin_jabba = "FALSE" + field_jabba = "foreground" + maxna_jabba = 0.9 + ploidy_jabba = "NA" + purity_jabba = "NA" + pp_method_jabba = "ppgrid" + cnsignif_jabba = 0.00001 + slack_jabba = 100 + linear_jabba = "TRUE" + tilim_jabba = 7200 + epgap_jabba = 0.000001 + fix_thres_jabba = -1 + lp_jabba = "TRUE" + ism_jabba = "TRUE" + filter_loose_jabba = "FALSE" + gurobi_jabba = "FALSE" + verbose_jabba = "TRUE" + + // Variant Calling + only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired samples + ascat_ploidy = null // default value for ASCAT + ascat_min_base_qual = 20 // default value for ASCAT + ascat_min_counts = 10 // default value for ASCAT + ascat_min_map_qual = 35 // default value for ASCAT + ascat_purity = null // default value for ASCAT + cf_ploidy = "2" // default value for Control-FREEC + cf_coeff = 0.05 // default value for Control-FREEC + cf_contamination = 0 // default value for Control-FREEC + cf_contamination_adjustment = false // by default we are not using this in Control-FREEC + cf_mincov = 0 // ControlFreec default values + cf_minqual = 0 // ControlFreec default values + cf_window = null // by default we are not using this in Control-FREEC + cnvkit_reference = null // by default the reference is build from the fasta file + concatenate_vcfs = false // by default we don't concatenate the germline-vcf-files + ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2 + wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers + joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected + joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling + sentieon_haplotyper_emit_mode = "variant" // default value for Sentieon haplotyper + + // Annotation + dbnsfp = null // No dbnsfp processed file + dbnsfp_consequence = null // No default consequence for dbnsfp plugin + dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin + dbnsfp_tbi = null // No dbnsfp processed file index + outdir_cache = null // No default outdir cache + snpeff_cache = 's3://annotation-cache/snpeff_cache/' + spliceai_indel = null // No spliceai_indel file + spliceai_indel_tbi = null // No spliceai_indel file index + spliceai_snv = null // No spliceai_snv file + spliceai_snv_tbi = null // No spliceai_snv file index + use_annotation_cache_keys = false + vep_cache = 's3://annotation-cache/vep_cache/' + vep_custom_args = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP + vep_dbnsfp = null // dbnsfp plugin disabled within VEP + vep_include_fasta = false // Don't use fasta file for annotation with VEP + vep_loftee = null // loftee plugin disabled within VEP + vep_out_format = "vcf" + vep_spliceai = null // spliceai plugin disabled within VEP + vep_spliceregion = null // spliceregion plugin disabled within VEP + + + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null + + + // Boilerplate options + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + + // Config options + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '12.GB' + max_cpus = 2 + max_time = '360.h' + + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes, cf_ploidy' + validationShowHiddenParams = false + validate_params = false + + genomes { + 'GATK.GRCh37' { + fasta = "${params.mski_base}/test_data/human_g1k_v37_decoy.small.fasta" + fasta_fai = "${params.mski_base}/test_data/human_g1k_v37_decoy.small.fasta.fai" + chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/Chromosomes" + dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict" + bwa = "${params.mski_base}/test_data/BWAIndex/" + dbsnp = "${params.mski_base}/test_data/dbsnp_138.b37.small.vcf.gz" + dbsnp_tbi = "${params.mski_base}/test_data/dbsnp_138.b37.small.vcf.gz.tbi" + dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_138.b37.vcf.gz' + known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz.tbi" + known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.tbi" + germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/af-only-gnomad.raw.sites.vcf.gz" + germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/af-only-gnomad.raw.sites.vcf.gz.tbi" + intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/intervals/wgs_calling_regions_Sarek.list" + mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/Control-FREEC/out100m2_hg19.gem" + ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_alleles_hg19.zip" + ascat_genome = 'hg19' + ascat_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_loci_hg19.zip" + ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/GC_G1000_hg19.zip" + ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/RT_G1000_hg19.zip" + snpeff_db = 87 + snpeff_genome = 'GRCh37' + vep_cache_version = 110 + vep_genome = 'GRCh37' + vep_species = 'homo_sapiens' + indel_mask = "${params.mski_base}/SVABA/hg19/snowman_blacklist.bed" + germ_sv_db = "${params.mski_base}/SVABA/hg19/snowman_germline_mini_160413.bed" + simple_seq_db = "${params.mski_base}/SVABA/hg19/repeat_masker_hg19_Simple.bed" + blacklist_gridss = "${params.mski_base}/test_data/human_g1k_v37_decoy.small.fasta.bed" + pon_gridss = "${params.mski_base}/GRIDSS/pon/hg19/" + gcmapdir_frag = "${params.mski_base}/test_data/gcMAP21/" + build_dryclean = 'hg19' + hapmap_sites = "${params.mski_base}/test_data/hapmap_3.3.b37.vcf.gz" + pon_dryclean = "${params.mski_base}/test_data/detergent.rds" + blacklist_coverage_jabba = "${params.mski_base}/JaBbA/blacklist_coverage/hg19/maskA_re.rds" + } + 'GATK.GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" + fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" + chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Chromosomes" + dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" + bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" + cf_chrom_len = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" + dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" + known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz.tbi" + known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" + germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/af-only-gnomad.hg38.vcf.gz" + germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/af-only-gnomad.hg38.vcf.gz.tbi" + intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions_noseconds.hg38.bed" + mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Control-FREEC/out100m2_hg38.gem" + pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" + pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" + ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_alleles_hg38.zip" + ascat_genome = 'hg38' + ascat_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_loci_hg38.zip" + ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/GC_G1000_hg38.zip" + ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/RT_G1000_hg38.zip" + snpeff_db = 105 + snpeff_genome = 'GRCh38' + vep_cache_version = 110 + vep_genome = 'GRCh38' + vep_species = 'homo_sapiens' + indel_mask = "${params.mski_base}/SVABA/hg38/snowman_blacklist.hg38.bed" + germ_sv_db = "${params.mski_base}/SVABA/hg38/snowman_germline_mini_hg38.bed" + simple_seq_db = "${params.mski_base}/SVABA/hg38/repeat_masker_hg38_simple.bed" + blacklist_gridss = "${params.mski_base}/GRIDSS/blacklist/hg38/ENCFF356LFX_hg38.bed" + pon_gridss = "${params.mski_base}/GRIDSS/pon/hg38/" + gcmapdir_frag = "${params.mski_base}/fragcounter/gcmapdir/hg38/" + build_dryclean = 'hg38' + hapmap_sites = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/hapmap_3.3.hg38.vcf.gz" + pon_dryclean = "${params.mski_base}/dryclean/pon/hg38/detergent.rds" + blacklist_coverage_jabba = "${params.mski_base}/JaBbA/blacklist_coverage/hg38/hg38.coverage.mask.rds" + } + + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + mito_name = "Mt" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + mito_name = "MT" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mito_name = "MtDNA" + macs_gsize = "9e7" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + mito_name = "MT" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mito_name = "M" + macs_gsize = "1.2e8" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" + mito_name = "MT" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + mito_name = "MT" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + mito_name = "MT" + } + 'Rnor_5.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.2e7" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.21e7" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" + mito_name = "MT" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "9e7" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + mito_name = "chrM" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.37e9" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.2e8" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" + mito_name = "chrM" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" + mito_name = "chrM" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" + mito_name = "chrM" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.2e7" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" + mito_name = "chrM" + } + } +} + +process { + withName: 'SVABA' { + cpus = 2 + memory = 8.GB + time = 84.h + } + withName: 'GRIDSS' { + cpus = 2 + memory = 8.GB + time = 84.h + } + withName: 'FRAGCOUNTER' { + cpus = 2 + memory = 8.GB + time = 48.h + } + withName: 'DRYCLEAN' { + cpus = 2 + memory = 8.GB + time = 36.h + } + withName: 'ASCAT_SEG' { + cpus = 2 + memory = 8.GB + time = 14.h + } + withName: 'CBS' { + cpus = 2 + memory = 8.GB + time = 24.h + } + withName: 'JABBA' { + cpus = 2 + memory = 8.GB + time = 24.h + } +} + +profiles { + + docker { + docker.runOptions = '--platform linux/amd64' + docker.enabled = true + docker.userEmulation = true + docker.registry = 'docker.io' + singularity.enabled = false + } +} + diff --git a/workflows/nfjabba.nf b/workflows/nfjabba.nf index b8498a3..78be443 100644 --- a/workflows/nfjabba.nf +++ b/workflows/nfjabba.nf @@ -289,13 +289,13 @@ paired_frag = params.paired_frag ?: Channel.empty() exome_frag = params.exome_frag ?: Channel.empty() // For fragCounter // Dryclean -centered_dryclean = params.centered_dryclean ?: Channel.empty() +center_dryclean = params.center_dryclean ?: Channel.empty() cbs_dryclean = params.cbs_dryclean ?: Channel.empty() cnsignif_dryclean = params.cnsignif_dryclean ?: Channel.empty() wholeGenome_dryclean = params.wholeGenome_dryclean ?: Channel.empty() blacklist_dryclean = params.blacklist_dryclean ?: Channel.empty() germline_filter_dryclean = params.germline_filter_dryclean ?: Channel.empty() -human_dryclean = params.human_dryclean ?: Channel.empty() +//human_dryclean = params.human_dryclean ?: Channel.empty() field_dryclean = params.field_dryclean ?: Channel.empty() build_dryclean = params.build_dryclean ?: Channel.empty() @@ -419,8 +419,8 @@ include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../modules/nf-core // Convert CRAM files (optional) include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../modules/nf-core/samtools/convert/main' include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../modules/nf-core/samtools/convert/main' -include { SAMTOOLS_CONVERT as CRAM_TO_BAM_NORMAL } from '../modules/nf-core/samtools/convert/main' -include { SAMTOOLS_CONVERT as CRAM_TO_BAM_TUMOR } from '../modules/nf-core/samtools/convert/main' +//include { SAMTOOLS_CONVERT as CRAM_TO_BAM_NORMAL } from '../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM_FINAL } from '../modules/nf-core/samtools/convert/main' // Mark Duplicates (+QC) include { BAM_MARKDUPLICATES } from '../subworkflows/local/bam_markduplicates/main' @@ -915,11 +915,11 @@ workflow NFJABBA { cram_sv_calling = cram_variant_calling // Converting to BAM files to work downstream (SvABA has very low success rate with CRAMs) - CRAM_TO_BAM(cram_sv_calling, fasta, fasta_fai) - versions = versions.mix(CRAM_TO_BAM.out.versions) + CRAM_TO_BAM_FINAL(cram_sv_calling, fasta, fasta_fai) + versions = versions.mix(CRAM_TO_BAM_FINAL.out.versions) // Gets the BAM files in a channel (format: [meta, bam, bai]); confirms data type is correct - bam_sv_calling = Channel.empty().mix(CRAM_TO_BAM.out.alignment_index) + bam_sv_calling = Channel.empty().mix(CRAM_TO_BAM_FINAL.out.alignment_index) .map{ meta, bam, bai -> [ meta + [data_type: "bam"], bam, bai ] } //cram_fragcounter_calling = cram_variant_calling @@ -1033,11 +1033,13 @@ workflow NFJABBA { } if (params.tools && params.tools.split(',').contains('fragcounter')) { - NORMAL_FRAGCOUNTER(bam_fragcounter_status.normal, midpoint_frag, windowsize_frag, gcmapdir_frag, minmapq_frag, fasta, fasta_fai, paired_frag, exome_frag) - normal_frag_cov = Channel.empty().mix(NORMAL_FRAGCOUNTER.out.fragcounter_cov) + NORMAL_FRAGCOUNTER(bam_fragcounter_status.normal, midpoint_frag, windowsize_frag, gcmapdir_frag, minmapq_frag, paired_frag, exome_frag) + //normal_frag_cov = Channel.empty().mix(NORMAL_FRAGCOUNTER.out.fragcounter_cov) + normal_frag_cov = Channel.empty().mix(NORMAL_FRAGCOUNTER.out.rebinned_raw_cov) - TUMOR_FRAGCOUNTER(bam_fragcounter_status.tumor, midpoint_frag, windowsize_frag, gcmapdir_frag, minmapq_frag, fasta, fasta_fai, paired_frag, exome_frag) - tumor_frag_cov = Channel.empty().mix(TUMOR_FRAGCOUNTER.out.fragcounter_cov) + TUMOR_FRAGCOUNTER(bam_fragcounter_status.tumor, midpoint_frag, windowsize_frag, gcmapdir_frag, minmapq_frag, paired_frag, exome_frag) + //tumor_frag_cov = Channel.empty().mix(TUMOR_FRAGCOUNTER.out.fragcounter_cov) + tumor_frag_cov = Channel.empty().mix(TUMOR_FRAGCOUNTER.out.rebinned_raw_cov) // Only need one versions because its just one program (fragcounter) versions = versions.mix(NORMAL_FRAGCOUNTER.out.versions) @@ -1098,7 +1100,8 @@ workflow NFJABBA { //Getting the meta.patient id out to aid in crossing JaBbA and ASCAT inputs het_pileups_to_cross = sites_from_het_pileups_wgs.map { tuple -> def (meta, hets) = tuple - [meta.patient, meta + [id: meta.sample], hets] } + [meta.patient, meta, hets] } + //het_pileups_to_cross.view() // Commenting out because not necessary for running from this step // CSV should be written for the file actually out out, either bam or BAM //csv_hetpileups = Channel.empty().mix(BAM_HETPILEUPS.out.het_pileups_wgs) @@ -1126,18 +1129,18 @@ workflow NFJABBA { } if (params.tools && params.tools.split(',').contains('dryclean')) { // Dryclean for both tumor & normal - TUMOR_DRYCLEAN(tumor_frag_cov, pon_dryclean, centered_dryclean, + TUMOR_DRYCLEAN(tumor_frag_cov, pon_dryclean, center_dryclean, cbs_dryclean, cnsignif_dryclean, wholeGenome_dryclean, blacklist_dryclean, blacklist_path_dryclean, - germline_filter_dryclean, germline_file_dryclean, human_dryclean, + germline_filter_dryclean, germline_file_dryclean, field_dryclean, build_dryclean) tumor_dryclean_cov = Channel.empty().mix(TUMOR_DRYCLEAN.out.dryclean_cov) - NORMAL_DRYCLEAN(normal_frag_cov, pon_dryclean, centered_dryclean, + NORMAL_DRYCLEAN(normal_frag_cov, pon_dryclean, center_dryclean, cbs_dryclean, cnsignif_dryclean, wholeGenome_dryclean, blacklist_dryclean, blacklist_path_dryclean, - germline_filter_dryclean, germline_file_dryclean, human_dryclean, + germline_filter_dryclean, germline_file_dryclean, field_dryclean, build_dryclean) normal_dryclean_cov = Channel.empty().mix(NORMAL_DRYCLEAN.out.dryclean_cov) @@ -1153,18 +1156,7 @@ workflow NFJABBA { def (meta, cov) = tuple [meta.patient, meta + [id: meta.sample], cov] } - if (params.tools && (params.tools.split(',').contains('ascat') && params.tools.split(',').contains('hetpileups'))) { - - input_ascat = tumor_dryclean_cov_to_cross.cross(het_pileups_to_cross) - .map { cov, hets -> - def meta = [:] - meta.id = cov[1].sample - meta.patient = cov[0] - meta.sex = cov[1].sex - - [ meta, cov[2], hets[2] ] - } - } + } // TODO: Add a subworkflow to write the output file paths into a csv @@ -1184,6 +1176,7 @@ workflow NFJABBA { .map { tumor, normal -> def meta = [:] meta.id = "${tumor[1].sample}_vs_${normal[1].sample}".toString() + meta.sample = "${tumor[1].sample}".toString() meta.normal_id = normal[1].sample meta.patient = normal[0] meta.sex = normal[1].sex @@ -1199,8 +1192,24 @@ workflow NFJABBA { cbs_seg_rds = Channel.empty().mix(CBS.out.cbs_seg_rds) cbs_nseg_rds = Channel.empty().mix(CBS.out.cbs_nseg_rds) - cbs_seg_rds_to_cross = cbs_seg_rds.map{ meta, seg -> [ meta.patient, meta, seg ] } - cbs_nseg_rds_to_cross = cbs_nseg_rds.map{ meta, nseg -> [ meta.patient, meta, nseg ] } + cbs_cov_rds_to_cross = cbs_cov_rds.map{ meta, cov -> [ meta.patient, meta + [id: meta.sample], cov ] } + //cbs_cov_rds_to_cross.view() + cbs_seg_rds_to_cross = cbs_seg_rds.map{ meta, seg -> [ meta.patient, meta + [id: meta.sample], seg ] } + cbs_nseg_rds_to_cross = cbs_nseg_rds.map{ meta, nseg -> [ meta.patient, meta + [id: meta.sample], nseg ] } + + if (params.tools && (params.tools.split(',').contains('ascat') && params.tools.split(',').contains('hetpileups'))) { + //het_pileups_to_cross.view() + input_ascat = tumor_dryclean_cov_to_cross.cross(het_pileups_to_cross) + .map { cov, hets -> + def meta = [:] + meta.id = "${cov[1].sample}".toString() + meta.patient = cov[0] + meta.sex = cov[1].sex + + [ meta, cov[2], hets[2] ] + } + } + //input_ascat.view() //Join all the inputs for jabba here into a single channel based on patient id that would accept it in downstream because nextflow doesn't know shit on whether the outputs are from same patient if (params.tools && params.tools.split(',').contains('svaba')) { @@ -1292,11 +1301,15 @@ workflow NFJABBA { } input_ploidy_jabba = input_jabba1_final.map{ meta, cov, hets, vcf, vcf2, seg, nseg, ploidy -> [ meta, ploidy ] } } else { - ploidy_jabba = ploidy_jabba.map{ meta, ploidy -> [ meta.patient, meta, ploidy ] } + //ploidy_jabba = ploidy_jabba.map{ meta, ploidy -> [ meta.patient, meta, ploidy ] } - input_jabba1_final = input_jabba1.join(ploidy_jabba).map{ tuples -> - [tuples[1]] + [tuples[2]] + [tuples[3]] + [tuples[4]] + [tuples[5]] + [tuples[6]] + [tuples[7]] + [tuples[9]] - } + //input_jabba1_final = input_jabba1.cross(ploidy_jabba).map{ tuples -> + // [tuples[1]] + [tuples[2]] + [tuples[3]] + [tuples[4]] + [tuples[5]] + [tuples[6]] + [tuples[7]] + [tuples[9]] + // } + + input_jabba1_final = input_jabba1.cross(ploidy_jabba).map{ tuples -> + [tuples[0][0]] + [tuples[0][1]] + [tuples[0][2]] + [tuples[0][3]] + [tuples[0][4]] + [tuples[0][5]] + [tuples[0][6]] + [tuples[1][1]] + } input_ploidy_jabba = input_jabba1_final.map{ meta, cov, hets, vcf, vcf2, seg, nseg, ploidy -> [ meta, ploidy ] } } @@ -1349,11 +1362,9 @@ workflow NFJABBA { } input_ploidy_jabba = input_jabba2_final.map{ meta, cov, hets, vcf, vcf2, seg, nseg, ploidy -> [ meta, ploidy ] } } else { - ploidy_jabba = ploidy_jabba.map{ meta, ploidy -> [ meta.patient, meta, ploidy ] } - - input_jabba2_final = input_jabba2.join(ploidy_jabba).map{ tuples -> - [tuples[1]] + [tuples[2]] + [tuples[3]] + [tuples[4]] + [tuples[5]] + [tuples[6]] + [tuples[7]] + [tuples[9]] - } + input_jabba2_final = input_jabba2.cross(ploidy_jabba).map{ tuples -> + [tuples[0][0]] + [tuples[0][1]] + [tuples[0][2]] + [tuples[0][3]] + [tuples[0][4]] + [tuples[0][5]] + [tuples[0][6]] + [tuples[1][1]] + } input_ploidy_jabba = input_jabba2_final.map{ meta, cov, hets, vcf, vcf2, seg, nseg, ploidy -> [ meta, ploidy ] } } @@ -1444,7 +1455,7 @@ workflow NFJABBA { } - + } }