fix: various minor fixes to output names

mskilab-org · Oct 15, 2024 · 4e35b7d · 4e35b7d
1 parent c09d46a
commit 4e35b7d
Show file tree

Hide file tree

Showing 8 changed files with 42 additions and 142 deletions.
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -117,21 +117,6 @@
                 "format": "file-path",
                 "exists": true
             },
-            "crai": {
-                "errorMessage": "CRAM index file cannot contain spaces and must have extension '.crai'",
-                "anyOf": [
-                    {
-                        "type": "string",
-                        "pattern": "^\\S+\\.crai$"
-                    },
-                    {
-                        "type": "string",
-                        "maxLength": 0
-                    }
-                ],
-                "format": "file-path",
-                "exists": true
-            },
             "bam": {
                 "errorMessage": "BAM file cannot contain spaces and must have extension '.bam'",
                 "anyOf": [
@@ -147,21 +132,6 @@
                 "format": "file-path",
                 "exists": true
             },
-            "bai": {
-                "errorMessage": "BAM index file cannot contain spaces and must have extension '.bai'",
-                "anyOf": [
-                    {
-                        "type": "string",
-                        "pattern": "^\\S+\\.bai$"
-                    },
-                    {
-                        "type": "string",
-                        "maxLength": 0
-                    }
-                ],
-                "format": "file-path",
-                "exists": true
-            },
             "hets": {
                 "errorMessage": "Pileups file and must have extension '.txt'",
                 "anyOf": [
@@ -311,12 +281,12 @@
                 "format": "file-path",
                 "exists": true
             },
-            "vcf2": {
-                "errorMessage": "VCF file for reads 2 cannot contain spaces and must have extension '.vcf' or '.vcf.gz'",
+            "jabba_gg": {
+                "errorMessage": "jabba.simple.gg.rds file and must have extension '.rds'",
                 "anyOf": [
                     {
                         "type": "string",
-                        "pattern": "^\\S+\\.vcf(\\.gz)?$"
+                        "pattern": "^\\S+\\.rds$"
                     },
                     {
                         "type": "string",
@@ -357,29 +327,6 @@
                 "format": "file-path",
                 "exists": true
             },
-            "jabba_gg": {
-                "errorMessage": "jabba.simple.gg.rds file and must have extension '.rds'",
-                "anyOf": [
-                    {
-                        "type": "string",
-                        "pattern": "^\\S+\\.rds$"
-                    },
-                    {
-                        "type": "string",
-                        "pattern": "^NA$"
-                    },
-                    {
-                        "type": "string",
-                        "pattern": "^/dev/null$"
-                    },
-                    {
-                        "type": "string",
-                        "maxLength": 0
-                    }
-                ],
-                "format": "file-path",
-                "exists": true
-            },
             "ni_balanced_rds": {
                 "errorMessage": "balanced_gg.rds file and must have extension '.rds'",
                 "anyOf": [
@@ -495,29 +442,6 @@
                 "format": "file-path",
                 "exists": true
             },
-            "snv_somatic_tbi": {
-                "errorMessage": "Sage somatic VCF tbi file cannot contain spaces and must have extension '.tbi' or '.gz.tbi'",
-                "anyOf": [
-                    {
-                        "type": "string",
-                        "pattern": "^\\S+\\.tbi(\\.gz.tbi)?$"
-                    },
-                    {
-                        "type": "string",
-                        "pattern": "^NA$"
-                    },
-                    {
-                        "type": "string",
-                        "pattern": "^/dev/null$"
-                    },
-                    {
-                        "type": "string",
-                        "maxLength": 0
-                    }
-                ],
-                "format": "file-path",
-                "exists": true
-            },
             "snv_germline_vcf": {
                 "errorMessage": "Sage germline VCF output file cannot contain spaces and must have extension '.vcf' or '.vcf.gz'",
                 "anyOf": [
@@ -541,29 +465,6 @@
                 "format": "file-path",
                 "exists": true
             },
-            "snv_germline_tbi": {
-                "errorMessage": "Sage germline VCF tbi file cannot contain spaces and must have extension '.tbi' or '.gz.tbi'",
-                "anyOf": [
-                    {
-                        "type": "string",
-                        "pattern": "^\\S+\\.tbi(\\.gz.tbi)?$"
-                    },
-                    {
-                        "type": "string",
-                        "pattern": "^NA$"
-                    },
-                    {
-                        "type": "string",
-                        "pattern": "^/dev/null$"
-                    },
-                    {
-                        "type": "string",
-                        "maxLength": 0
-                    }
-                ],
-                "format": "file-path",
-                "exists": true
-            },
             "variant_somatic_ann": {
                 "errorMessage": "Annotated somatic VCF file cannot contain spaces and must have extension '.vcf' ",
                 "anyOf": [

diff --git a/modules/local/allelic_cn/main.nf b/modules/local/allelic_cn/main.nf
@@ -28,7 +28,7 @@ process NON_INTEGER_BALANCE {
     val(pad)
 
     output:
-    tuple val(meta), path("balanced.gg.rds")                , emit: non_integer_balance_balanced_gg, optional: true
+    tuple val(meta), path("non_integer.balanced.gg.rds")                , emit: non_integer_balance_balanced_gg, optional: true
     tuple val(meta), path("hets.gg.rds")                    , emit: non_integer_balance_hets_gg, optional: true
     path "versions.yml"                                     , emit: versions
 
@@ -68,6 +68,8 @@ process NON_INTEGER_BALANCE {
         --fasta $fasta \\
         --pad $pad
 
+    mv balanced.gg.rds non_integer.balanced.gg.rds
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         non_integer_balance: ${VERSION}
@@ -116,7 +118,7 @@ process LP_PHASED_BALANCE {
     val(tilim)
 
     output:
-    tuple val(meta), path("balanced.gg.rds")                , emit: lp_phased_balance_balanced_gg, optional: true
+    tuple val(meta), path("lp_phased.balanced.gg.rds")                , emit: lp_phased_balance_balanced_gg, optional: true
     tuple val(meta), path("binstats.gg.rds")                , emit: lp_phased_balance_binstats_gg, optional: true
     tuple val(meta), path("unphased.gg.rds")                , emit: lp_phased_balance_unphased_allelic_gg, optional: true
     path "versions.yml"                                     , emit: versions
@@ -150,6 +152,8 @@ process LP_PHASED_BALANCE {
         --nodefileind $nodefileind \\
         --tilim $tilim
 
+    mv balanced.gg.rds lp_phased.balanced.gg.rds
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         lp_phased_balance: ${VERSION}

diff --git a/modules/local/amber/main.nf b/modules/local/amber/main.nf
@@ -76,22 +76,26 @@ process MAKE_HET_SITES {
     def baf_tsv = "${amber_dir}/${meta.tumor_id}.amber.baf.tsv.gz"
 
     """
-    echo "seqnames start end alt.count.t ref.count.t alt.count.n ref.count.n" > sites.txt
+    echo "seqnames start end alt.count.t ref.count.t alt.count.n ref.count.n alt.frac.t alt.frac.n" > sites.txt
     zcat ${baf_tsv} | awk 'NR>1 {
-        # Calculate alt.count.t using tumorModifiedBAF
-        alt_count_t = int(\$5 * \$4)  # \$4 is tumorModifiedBAF
-
-        # Calculate ref.count.t using tumorModifiedBAF
-        ref_count_t = int(\$5 * (1 - \$4))  # \$4 is tumorModifiedBAF
-
-        # Calculate alt.count.n using normalBAF
-        alt_count_n = int(\$8 * \$6)
-
-        # Calculate ref.count.n using normalBAF
-        ref_count_n = int(\$8 * (1 - \$6))
-
-        # Print the results
-        print \$1, \$2, \$2, alt_count_t, ref_count_t, alt_count_n, ref_count_n
+        chromosome=\$1
+        start = \$2
+        end = \$2
+        tumorBAF = \$3
+        tumorModifiedBAF = \$4
+        tumorDepth = \$5
+        normalBAF = \$6
+        normalModifiedBAF = \$7
+        normalDepth = \$8
+
+        alt_count_t = int(tumorDepth * tumorModifiedBAF)
+        ref_count_t = int(tumorDepth * (1 - tumorModifiedBAF))
+        alt_count_n = int(normalDepth * normalBAF)
+        ref_count_n = int(normalDepth * (1 - normalBAF))
+        alt_frac_t = alt_count_t / (alt_count_t + ref_count_t)
+        alt_frac_n = alt_count_n / (alt_count_n + ref_count_n)
+
+        print chromosome, start, end, alt_count_t, ref_count_t, alt_count_n, ref_count_n, alt_frac_t, alt_frac_n
     }' >> sites.txt
 
     cat <<-END_VERSIONS > versions.yml

diff --git a/modules/local/fragcounter/main.nf b/modules/local/fragcounter/main.nf
@@ -71,7 +71,7 @@ process FRAGCOUNTER {
     "${task.process}":
         fragcounter: ${VERSION}
     END_VERSIONS
-    """    
+    """
 
 }
 

diff --git a/modules/local/sigprofilerassignment/main.nf b/modules/local/sigprofilerassignment/main.nf
@@ -34,6 +34,10 @@ process SIGPROFILERASSIGNMENT {
     --genome ${genome} \\
     --cosmic-version ${cosmic_version} \\
 
+    # append sbs_ and indel_ to the output file names
+    mv sbs_results/Assignment_Solution/Activities/Assignment_Solution_Activities.txt sbs_results/Assignment_Solution/Activities/sbs_Assignment_Solution_Activities.txt
+    mv indel_results/Assignment_Solution/Activities/Assignment_Solution_Activities.txt indel_results/Assignment_Solution/Activities/indel_Assignment_Solution_Activities.txt
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         sigprofilerassignment: ${VERSION}

diff --git a/modules/nf-core/snpeff/snpeff/main.nf b/modules/nf-core/snpeff/snpeff/main.nf
diff --git a/tests/test_runs/chr21_test/params.json b/tests/test_runs/chr21_test/params.json
@@ -4,7 +4,7 @@
     "fasta": "/gpfs/commons/home/sdider/DB/GATK/human_g1k_v37_decoy.fasta",
     "fasta_fai": "/gpfs/commons/home/sdider/DB/GATK/human_g1k_v37_decoy.fasta.fai",
     "bwa": "/gpfs/commons/home/sdider/DB/GATK/bwa/",
-    "tools": "bamqc",
+    "tools": "aligner",
     "outdir": "./results",
     "pon_dryclean": "/gpfs/commons/home/sdider/Projects/nf-casereports/tests/test_data/chr21_pon.rds",
     "field_dryclean": "reads",

diff --git a/workflows/nfcasereports.nf b/workflows/nfcasereports.nf
@@ -277,9 +277,7 @@ inputs = ch_from_samplesheet.map {
     fastq_2,
     table,
     cram,
-    crai,
     bam,
-    bai,
     hets,
     amber_dir,
     frag_cov,
@@ -288,17 +286,14 @@ inputs = ch_from_samplesheet.map {
     seg,
     nseg,
     vcf,
-    vcf_tbi,
     jabba_rds,
     jabba_gg,
     ni_balanced_gg,
     lp_balanced_gg,
     events,
     fusions,
     snv_somatic_vcf,
-    snv_somatic_tbi,
     snv_germline_vcf,
-    snv_germline_tbi,
     variant_somatic_ann,
     variant_somatic_bcf,
     variant_germline_ann,
@@ -314,9 +309,9 @@ inputs = ch_from_samplesheet.map {
         fastq_2: fastq_2,
         table: table,
         cram: cram,
-        crai: crai,
+        crai: cram ? cram + '.crai' : [],
         bam: bam,
-        bai: bai,
+        bai: bam ? bam + '.bai': [],
         hets: hets,
         amber_dir: amber_dir,
         frag_cov: frag_cov,
@@ -325,17 +320,17 @@ inputs = ch_from_samplesheet.map {
         seg: seg,
         nseg: nseg,
         vcf: vcf,
-        vcf_tbi: vcf_tbi,
+        vcf_tbi: vcf ? vcf + '.tbi' : [],
         jabba_rds: jabba_rds,
         jabba_gg: jabba_gg,
         ni_balanced_gg: ni_balanced_gg,
         lp_balanced_gg: lp_balanced_gg,
         events: events,
         fusions: fusions,
         snv_somatic_vcf: snv_somatic_vcf,
-        snv_somatic_tbi: snv_somatic_tbi,
+        snv_somatic_tbi: snv_somatic_vcf ? snv_somatic_vcf + '.tbi' : [],
         snv_germline_vcf: snv_germline_vcf,
-        snv_germline_tbi: snv_germline_tbi,
+        snv_germline_tbi: snv_germline_vcf ? snv_germline_vcf + '.tbi' : [],
         variant_somatic_ann: variant_somatic_ann,
         variant_somatic_bcf: variant_somatic_bcf,
         variant_germline_ann: variant_germline_ann,
@@ -1586,19 +1581,15 @@ workflow NFCASEREPORTS {
     // ##############################
     if (tools_used.contains("all") || tools_used.contains("events")) {
         events_inputs = inputs.filter { it.events.isEmpty() }.map { it -> [it.meta.patient, it.meta] }
-        events_input_jabba_gg = jabba_gg_for_merge
-            .join(events_inputs)
-            .map { it -> [ it[0], it[1] ] } // meta.patient, jabba ggraph
         events_input_non_integer_balance = non_integer_balance_balanced_gg_for_merge
             .join(events_inputs)
             .map { it -> [ it[0], it[1] ] } // meta.patient, balanced_gg
 
         events_existing_outputs = inputs.map { it -> [it.meta, it.events] }.filter { !it[1].isEmpty() }
 
         events_input = events_inputs
-            .join(events_input_jabba_gg)
             .join(events_input_non_integer_balance)
-            .map{ patient, meta, rds, balanced_gg -> [ meta, rds, balanced_gg ] }
+            .map{ patient, meta, balanced_gg -> [ meta, balanced_gg ] }
 
         EVENTS(events_input)
 
@@ -1612,19 +1603,15 @@ workflow NFCASEREPORTS {
     // ##############################
     if (tools_used.contains("all") || tools_used.contains("fusions")) {
         fusions_inputs = inputs.filter { it.fusions.isEmpty() }.map { it -> [it.meta.patient, it.meta] }
-        fusions_input_jabba_gg = jabba_gg_for_merge
-            .join(fusions_inputs)
-            .map { it -> [ it[0], it[1] ] } // meta.patient, jabba ggraph
         fusions_input_non_integer_balance = non_integer_balance_balanced_gg_for_merge
             .join(fusions_inputs)
             .map { it -> [ it[0], it[1] ] } // meta.patient, balanced_gg
 
         fusions_existing_outputs = inputs.map { it -> [it.meta, it.fusions] }.filter { !it[1].isEmpty() }
 
         fusions_input = fusions_inputs
-            .join(fusions_input_jabba_gg)
             .join(fusions_input_non_integer_balance)
-            .map{ patient, meta, rds, balanced_gg -> [ meta, rds, balanced_gg ] }
+            .map{ patient, meta, balanced_gg -> [ meta, balanced_gg ] }
 
         FUSIONS(fusions_input)
         fusions = Channel.empty()
-Original file line number
+Diff line change
@@ Expand Up / @@ -71,7 +71,7 @@ process FRAGCOUNTER { @@
         "${task.process}":
             fragcounter: ${VERSION}
         END_VERSIONS
-        """
+        """
     }
@@ Expand Down @@