feat: Wires up dryclean

Did not add schema definition since those can be generated automatically via the nf-core tool. Also did not fully incorporate FragCounter into the new "coverage" step in anticipation of merge conflicts.
mskilab-org · Oct 19, 2023 · ec26a90 · ec26a90
1 parent 5c20423
commit ec26a90
Show file tree

Hide file tree

Showing 8 changed files with 135 additions and 68 deletions.
diff --git a/modules/local/dryclean/main.nf b/modules/local/dryclean/main.nf
@@ -8,11 +8,11 @@ process DRYCLEAN {
         'mskilab/dryclean:latest' }"
 
     input:
-    tuple val(meta), path(pon) path(input)
+    tuple val(meta), path(input)
+    path pon
     val centered
     val cbs
     val cnsignif
-    val cores
     val wholeGenome
     val blacklist
     path blacklist_path
@@ -25,14 +25,15 @@ process DRYCLEAN {
     output:
     tuple val(meta), path("*.drycleaned.cov.rds")     , emit: decomposed_cov, optional: true
     tuple val(meta), path("*.dryclean.object.rds")    , emit: dryclean_object, optional: true
-    //path "versions.yml"           , emit: versions
+    path "versions.yml"           , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = '0.0.2'
     """
     #!/bin/bash
     set -o allexport
@@ -65,7 +66,23 @@ process DRYCLEAN {
     export drycln=$drycleanPath/extdata/drcln
     echo $drycln
     set +x
-    CMD="Rscript $drycln \$@"
+
+    CMD="Rscript $drycln \\
+        --input             ${input} \\
+        --pon               ${pon} \\
+        --centered          ${centered} \\
+        --cbs               ${cbs} \\
+        --cnsignif          ${cnsignif} \\
+        --cores             ${task.cpus} \\
+        --wholeGenome       ${wholeGenome} \\
+        --blacklist         ${blacklist} \\
+        --blacklist_path    ${blacklist_path} \\
+        --germline.filter   ${germline_filter} \\
+        --germline.file     ${germline_file} \\
+        --human             ${human} \\
+        --field             ${field} \\
+        --build             ${build} \\
+    "
 
     if [ ! -s ./drycleaned.cov.rds ]; then
 	if ! { echo "Running:" && echo "${CMD}" && eval ${CMD}; }; then
@@ -75,6 +92,11 @@ process DRYCLEAN {
 	echo "If you wish to rerun Dryclean - please purge directory first"
     fi
     exit 0
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        dryclean: ${VERSION}
+    END_VERSIONS
     """
 
     stub:

diff --git a/modules/local/gridss/gridss/main.nf b/modules/local/gridss/gridss/main.nf
@@ -15,7 +15,7 @@ process GRIDSS_GRIDSS {
     path(fasta_fai)
     path(bwa_index)                                                                           // required: bwa index folder
     path(blacklist_gridss)                                                                    // optional: gridss blacklist bed file based on genome
-    
+
 
     output:
     tuple val(meta), path("*.vcf.gz")                                  , emit: vcf,       optional:true

diff --git a/nextflow.config b/nextflow.config
@@ -55,26 +55,12 @@ params {
 	//indel_mask                 = null                            // Must provide blacklist bed file for indels based on genome to run Svaba
 
 	// fragCounter options
-	midpoint_frag              = "TRUE"                           // If TRUE only count midpoint if FALSE then count bin footprint of every fragment interval: Default=TRUE  
+	midpoint_frag              = "TRUE"                           // If TRUE only count midpoint if FALSE then count bin footprint of every fragment interval: Default=TRUE
 	windowsize_frag            = 200                            // Window / bin size : Default=200
 	minmapq_frag               = 1                              // Minimal map quality : Default = 1
 	paired_frag                = "TRUE"                           // Is the dataset paired : Default = TRUE
 	exome_frag                 = "FALSE"	                        // Use exons as bins instead of fixed window : Default = FALSE
 
-    // Dryclean
-    centered                    = true
-    cbs                         = false
-    cnsignif                    = 0.00001
-    cores                       = 1
-    wholeGenome                 = true
-    blacklist                   = false
-    blacklist_path              = null
-    germline_filter             = false
-    germline_file               = null
-    human                       = true
-    field                       = "reads.corrected"
-    build                       = "hg19"
-
     // Variant Calling
     only_paired_variant_calling   = false // if true, skips germline variant calling for normal-paired samples
     ascat_ploidy                  = null  // default value for ASCAT
@@ -126,6 +112,19 @@ params {
 	max_multiqc_email_size     = '25.MB'
 	multiqc_methods_description = null
 
+    // Dryclean
+    centered                    = true
+    cbs                         = false
+    cnsignif                    = 0.00001
+    wholeGenome                 = true
+    blacklist                   = false
+    blacklist_path              = null
+    germline_filter             = false
+    germline_file               = null
+    human                       = true
+    field                       = "reads.corrected"
+    build                       = "hg19"
+
     // JaBbA options
     field = "ratio"
     junctionUnfiltered = file("/dev/null")
@@ -134,8 +133,8 @@ params {
     cbs_seg_rds = file("/dev/null")
     slack = 100
     het_pileups_wgs = file("/dev/null")
-    purity = "NA"
-    ploidy = "NA"
+    purity = null
+    ploidy = null
     tilim = 6000
     epgap = 1e-8
     pp_method = "ppgrid"

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -47,7 +47,8 @@
                         "recalibrate",
                         "sv_calling",
                         "variant_calling",
-                        "annotate"
+                        "annotate",
+                        "coverage"
                     ]
                 },
                 "outdir": {
@@ -290,7 +291,7 @@
                     "fa_icon": "fas fa-forward",
                     "description": "Option to mention whether the dataset is paired or single ended for fragCounter",
                     "hidden": true,
-                    "help_text": "If TRUE, will consider the dataset is paired ened, else single ended"
+                    "help_text": "If TRUE, will consider the dataset is paired ended, else single ended"
                 },
                 "exome_frag": {
                     "type": "string",
@@ -303,7 +304,7 @@
                     "type": "number",
                     "default": 200,
                     "fa_icon": "fas fa-wrench",
-                    "description": "Default bin size for gragCounter",
+                    "description": "Default bin size for fragCounter",
                     "hidden": true,
                     "help_text": "By default the bin size is 200, adjust as necessary."
                 },
@@ -322,7 +323,7 @@
                     "hidden": true,
                     "help_text": "Provide the directory containing .rds file for gc and mappability bias based on genome version"
                 }
-            } 
+            }
         },
         "variant_calling": {
             "title": "Variant Calling",

diff --git a/subworkflows/local/bam_Dryclean/main.nf b/subworkflows/local/bam_Dryclean/main.nf
diff --git a/subworkflows/local/bam_svcalling_gridss/main.nf b/subworkflows/local/bam_svcalling_gridss/main.nf
@@ -9,7 +9,7 @@ include { GRIDSS_SOMATIC  } from '../../../modules/local/gridss/somaticFilter/ma
 
 workflow BAM_SVCALLING_GRIDSS {
     take:
-    cram                                              // channel: [mandatory] [ meta, normalcram, normalcrai, tumorcram, tumorcrai ] 
+    cram                                              // channel: [mandatory] [ meta, normalcram, normalcrai, tumorcram, tumorcrai ]
     fasta                                             // channel: [mandatory] reference fasta
     fasta_fai                                         // channel: [mandatory] reference fasta index
     bwa_index                                         // channel: [mandatory] bwa index path
@@ -65,7 +65,7 @@ workflow BAM_SVCALLING_GRIDSS_SOMATIC {
     somatic_all
     somatic_high_confidence
     all_vcf
-    
+
     versions
 
 }

diff --git a/subworkflows/local/dryclean/main.nf b/subworkflows/local/dryclean/main.nf
@@ -0,0 +1,44 @@
+//
+// DRYCLEAN
+//
+
+include { DRYCLEAN } from '../../../modules/local/dryclean/main.nf'
+
+workflow COV_DRYCLEAN {
+
+    take:
+    input_dryclean   // channel: [mandatory] [ meta, input ]
+    pon_dryclean
+    centered_dryclean
+    cbs_dryclean
+    cnsignif_dryclean
+    wholeGenome_dryclean
+    blacklist_dryclean
+    blacklist_path_dryclean
+    germline_filter_dryclean
+    germline_file_dryclean
+    human_dryclean
+    field_dryclean
+    build_dryclean
+
+    main:
+    versions          = Channel.empty()
+    dryclean_cov      = Channel.empty()
+    dryclean_obj      = Channel.empty()
+
+    DRYCLEAN(input_dryclean, pon_dryclean, centered_dryclean, cbs_dryclean,
+    cnsignif_dryclean, wholeGenome_dryclean, blacklist_dryclean,
+    blacklist_path_dryclean, germline_filter_dryclean, germline_file_dryclean,
+    human_dryclean, field_dryclean, build_dryclean)
+
+    dryclean_cov      = DRYCLEAN.out.decomposed_cov
+    dryclean_obj      = DRYCLEAN.out.dryclean_object
+
+    versions          = DRYCLEAN.out.versions
+
+    emit:
+    dryclean_cov    // only need to emit the coverage for JaBbA
+
+    versions
+}
+