diff --git a/assets/schema_input.json b/assets/schema_input.json index 5ebe51f..fd447b7 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -117,21 +117,6 @@ "format": "file-path", "exists": true }, - "crai": { - "errorMessage": "CRAM index file cannot contain spaces and must have extension '.crai'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.crai$" - }, - { - "type": "string", - "maxLength": 0 - } - ], - "format": "file-path", - "exists": true - }, "bam": { "errorMessage": "BAM file cannot contain spaces and must have extension '.bam'", "anyOf": [ @@ -147,21 +132,6 @@ "format": "file-path", "exists": true }, - "bai": { - "errorMessage": "BAM index file cannot contain spaces and must have extension '.bai'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.bai$" - }, - { - "type": "string", - "maxLength": 0 - } - ], - "format": "file-path", - "exists": true - }, "hets": { "errorMessage": "Pileups file and must have extension '.txt'", "anyOf": [ @@ -311,12 +281,12 @@ "format": "file-path", "exists": true }, - "vcf2": { - "errorMessage": "VCF file for reads 2 cannot contain spaces and must have extension '.vcf' or '.vcf.gz'", + "jabba_gg": { + "errorMessage": "jabba.simple.gg.rds file and must have extension '.rds'", "anyOf": [ { "type": "string", - "pattern": "^\\S+\\.vcf(\\.gz)?$" + "pattern": "^\\S+\\.rds$" }, { "type": "string", @@ -357,29 +327,6 @@ "format": "file-path", "exists": true }, - "jabba_gg": { - "errorMessage": "jabba.simple.gg.rds file and must have extension '.rds'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.rds$" - }, - { - "type": "string", - "pattern": "^NA$" - }, - { - "type": "string", - "pattern": "^/dev/null$" - }, - { - "type": "string", - "maxLength": 0 - } - ], - "format": "file-path", - "exists": true - }, "ni_balanced_rds": { "errorMessage": "balanced_gg.rds file and must have extension '.rds'", "anyOf": [ @@ -495,29 +442,6 @@ "format": "file-path", "exists": true }, - "snv_somatic_tbi": { - "errorMessage": "Sage somatic VCF tbi file cannot contain spaces and must have extension '.tbi' or '.gz.tbi'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.tbi(\\.gz.tbi)?$" - }, - { - "type": "string", - "pattern": "^NA$" - }, - { - "type": "string", - "pattern": "^/dev/null$" - }, - { - "type": "string", - "maxLength": 0 - } - ], - "format": "file-path", - "exists": true - }, "snv_germline_vcf": { "errorMessage": "Sage germline VCF output file cannot contain spaces and must have extension '.vcf' or '.vcf.gz'", "anyOf": [ @@ -541,29 +465,6 @@ "format": "file-path", "exists": true }, - "snv_germline_tbi": { - "errorMessage": "Sage germline VCF tbi file cannot contain spaces and must have extension '.tbi' or '.gz.tbi'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.tbi(\\.gz.tbi)?$" - }, - { - "type": "string", - "pattern": "^NA$" - }, - { - "type": "string", - "pattern": "^/dev/null$" - }, - { - "type": "string", - "maxLength": 0 - } - ], - "format": "file-path", - "exists": true - }, "variant_somatic_ann": { "errorMessage": "Annotated somatic VCF file cannot contain spaces and must have extension '.vcf' ", "anyOf": [ diff --git a/modules/local/allelic_cn/main.nf b/modules/local/allelic_cn/main.nf index 1dbcf5f..db05505 100644 --- a/modules/local/allelic_cn/main.nf +++ b/modules/local/allelic_cn/main.nf @@ -28,7 +28,7 @@ process NON_INTEGER_BALANCE { val(pad) output: - tuple val(meta), path("balanced.gg.rds") , emit: non_integer_balance_balanced_gg, optional: true + tuple val(meta), path("non_integer.balanced.gg.rds") , emit: non_integer_balance_balanced_gg, optional: true tuple val(meta), path("hets.gg.rds") , emit: non_integer_balance_hets_gg, optional: true path "versions.yml" , emit: versions @@ -68,6 +68,8 @@ process NON_INTEGER_BALANCE { --fasta $fasta \\ --pad $pad + mv balanced.gg.rds non_integer.balanced.gg.rds + cat <<-END_VERSIONS > versions.yml "${task.process}": non_integer_balance: ${VERSION} @@ -116,7 +118,7 @@ process LP_PHASED_BALANCE { val(tilim) output: - tuple val(meta), path("balanced.gg.rds") , emit: lp_phased_balance_balanced_gg, optional: true + tuple val(meta), path("lp_phased.balanced.gg.rds") , emit: lp_phased_balance_balanced_gg, optional: true tuple val(meta), path("binstats.gg.rds") , emit: lp_phased_balance_binstats_gg, optional: true tuple val(meta), path("unphased.gg.rds") , emit: lp_phased_balance_unphased_allelic_gg, optional: true path "versions.yml" , emit: versions @@ -150,6 +152,8 @@ process LP_PHASED_BALANCE { --nodefileind $nodefileind \\ --tilim $tilim + mv balanced.gg.rds lp_phased.balanced.gg.rds + cat <<-END_VERSIONS > versions.yml "${task.process}": lp_phased_balance: ${VERSION} diff --git a/modules/local/amber/main.nf b/modules/local/amber/main.nf index b2276e8..66b16bb 100644 --- a/modules/local/amber/main.nf +++ b/modules/local/amber/main.nf @@ -76,22 +76,26 @@ process MAKE_HET_SITES { def baf_tsv = "${amber_dir}/${meta.tumor_id}.amber.baf.tsv.gz" """ - echo "seqnames start end alt.count.t ref.count.t alt.count.n ref.count.n" > sites.txt + echo "seqnames start end alt.count.t ref.count.t alt.count.n ref.count.n alt.frac.t alt.frac.n" > sites.txt zcat ${baf_tsv} | awk 'NR>1 { - # Calculate alt.count.t using tumorModifiedBAF - alt_count_t = int(\$5 * \$4) # \$4 is tumorModifiedBAF - - # Calculate ref.count.t using tumorModifiedBAF - ref_count_t = int(\$5 * (1 - \$4)) # \$4 is tumorModifiedBAF - - # Calculate alt.count.n using normalBAF - alt_count_n = int(\$8 * \$6) - - # Calculate ref.count.n using normalBAF - ref_count_n = int(\$8 * (1 - \$6)) - - # Print the results - print \$1, \$2, \$2, alt_count_t, ref_count_t, alt_count_n, ref_count_n + chromosome=\$1 + start = \$2 + end = \$2 + tumorBAF = \$3 + tumorModifiedBAF = \$4 + tumorDepth = \$5 + normalBAF = \$6 + normalModifiedBAF = \$7 + normalDepth = \$8 + + alt_count_t = int(tumorDepth * tumorModifiedBAF) + ref_count_t = int(tumorDepth * (1 - tumorModifiedBAF)) + alt_count_n = int(normalDepth * normalBAF) + ref_count_n = int(normalDepth * (1 - normalBAF)) + alt_frac_t = alt_count_t / (alt_count_t + ref_count_t) + alt_frac_n = alt_count_n / (alt_count_n + ref_count_n) + + print chromosome, start, end, alt_count_t, ref_count_t, alt_count_n, ref_count_n, alt_frac_t, alt_frac_n }' >> sites.txt cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/fragcounter/main.nf b/modules/local/fragcounter/main.nf index 3508936..7618d36 100644 --- a/modules/local/fragcounter/main.nf +++ b/modules/local/fragcounter/main.nf @@ -71,7 +71,7 @@ process FRAGCOUNTER { "${task.process}": fragcounter: ${VERSION} END_VERSIONS - """ + """ } diff --git a/modules/local/sigprofilerassignment/main.nf b/modules/local/sigprofilerassignment/main.nf index 1f1c897..6741a46 100644 --- a/modules/local/sigprofilerassignment/main.nf +++ b/modules/local/sigprofilerassignment/main.nf @@ -34,6 +34,10 @@ process SIGPROFILERASSIGNMENT { --genome ${genome} \\ --cosmic-version ${cosmic_version} \\ + # append sbs_ and indel_ to the output file names + mv sbs_results/Assignment_Solution/Activities/Assignment_Solution_Activities.txt sbs_results/Assignment_Solution/Activities/sbs_Assignment_Solution_Activities.txt + mv indel_results/Assignment_Solution/Activities/Assignment_Solution_Activities.txt indel_results/Assignment_Solution/Activities/indel_Assignment_Solution_Activities.txt + cat <<-END_VERSIONS > versions.yml "${task.process}": sigprofilerassignment: ${VERSION} diff --git a/modules/nf-core/snpeff/snpeff/main.nf b/modules/nf-core/snpeff/snpeff/main.nf index d62e624..e0dae52 100644 --- a/modules/nf-core/snpeff/snpeff/main.nf +++ b/modules/nf-core/snpeff/snpeff/main.nf @@ -79,7 +79,7 @@ process SNPEFF_VCF_TO_BCF { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}_${task.name}" """ bcftools view ${vcf} -O b -o ${prefix}.ann.unsorted.bcf && bcftools sort ${prefix}.ann.unsorted.bcf -O b -o ${prefix}.ann.bcf && diff --git a/tests/test_runs/chr21_test/params.json b/tests/test_runs/chr21_test/params.json index 03dd3d5..b4d80bd 100644 --- a/tests/test_runs/chr21_test/params.json +++ b/tests/test_runs/chr21_test/params.json @@ -4,7 +4,7 @@ "fasta": "/gpfs/commons/home/sdider/DB/GATK/human_g1k_v37_decoy.fasta", "fasta_fai": "/gpfs/commons/home/sdider/DB/GATK/human_g1k_v37_decoy.fasta.fai", "bwa": "/gpfs/commons/home/sdider/DB/GATK/bwa/", - "tools": "bamqc", + "tools": "aligner", "outdir": "./results", "pon_dryclean": "/gpfs/commons/home/sdider/Projects/nf-casereports/tests/test_data/chr21_pon.rds", "field_dryclean": "reads", diff --git a/workflows/nfcasereports.nf b/workflows/nfcasereports.nf index d7f5d7a..981809e 100644 --- a/workflows/nfcasereports.nf +++ b/workflows/nfcasereports.nf @@ -277,9 +277,7 @@ inputs = ch_from_samplesheet.map { fastq_2, table, cram, - crai, bam, - bai, hets, amber_dir, frag_cov, @@ -288,7 +286,6 @@ inputs = ch_from_samplesheet.map { seg, nseg, vcf, - vcf_tbi, jabba_rds, jabba_gg, ni_balanced_gg, @@ -296,9 +293,7 @@ inputs = ch_from_samplesheet.map { events, fusions, snv_somatic_vcf, - snv_somatic_tbi, snv_germline_vcf, - snv_germline_tbi, variant_somatic_ann, variant_somatic_bcf, variant_germline_ann, @@ -314,9 +309,9 @@ inputs = ch_from_samplesheet.map { fastq_2: fastq_2, table: table, cram: cram, - crai: crai, + crai: cram ? cram + '.crai' : [], bam: bam, - bai: bai, + bai: bam ? bam + '.bai': [], hets: hets, amber_dir: amber_dir, frag_cov: frag_cov, @@ -325,7 +320,7 @@ inputs = ch_from_samplesheet.map { seg: seg, nseg: nseg, vcf: vcf, - vcf_tbi: vcf_tbi, + vcf_tbi: vcf ? vcf + '.tbi' : [], jabba_rds: jabba_rds, jabba_gg: jabba_gg, ni_balanced_gg: ni_balanced_gg, @@ -333,9 +328,9 @@ inputs = ch_from_samplesheet.map { events: events, fusions: fusions, snv_somatic_vcf: snv_somatic_vcf, - snv_somatic_tbi: snv_somatic_tbi, + snv_somatic_tbi: snv_somatic_vcf ? snv_somatic_vcf + '.tbi' : [], snv_germline_vcf: snv_germline_vcf, - snv_germline_tbi: snv_germline_tbi, + snv_germline_tbi: snv_germline_vcf ? snv_germline_vcf + '.tbi' : [], variant_somatic_ann: variant_somatic_ann, variant_somatic_bcf: variant_somatic_bcf, variant_germline_ann: variant_germline_ann, @@ -1586,9 +1581,6 @@ workflow NFCASEREPORTS { // ############################## if (tools_used.contains("all") || tools_used.contains("events")) { events_inputs = inputs.filter { it.events.isEmpty() }.map { it -> [it.meta.patient, it.meta] } - events_input_jabba_gg = jabba_gg_for_merge - .join(events_inputs) - .map { it -> [ it[0], it[1] ] } // meta.patient, jabba ggraph events_input_non_integer_balance = non_integer_balance_balanced_gg_for_merge .join(events_inputs) .map { it -> [ it[0], it[1] ] } // meta.patient, balanced_gg @@ -1596,9 +1588,8 @@ workflow NFCASEREPORTS { events_existing_outputs = inputs.map { it -> [it.meta, it.events] }.filter { !it[1].isEmpty() } events_input = events_inputs - .join(events_input_jabba_gg) .join(events_input_non_integer_balance) - .map{ patient, meta, rds, balanced_gg -> [ meta, rds, balanced_gg ] } + .map{ patient, meta, balanced_gg -> [ meta, balanced_gg ] } EVENTS(events_input) @@ -1612,9 +1603,6 @@ workflow NFCASEREPORTS { // ############################## if (tools_used.contains("all") || tools_used.contains("fusions")) { fusions_inputs = inputs.filter { it.fusions.isEmpty() }.map { it -> [it.meta.patient, it.meta] } - fusions_input_jabba_gg = jabba_gg_for_merge - .join(fusions_inputs) - .map { it -> [ it[0], it[1] ] } // meta.patient, jabba ggraph fusions_input_non_integer_balance = non_integer_balance_balanced_gg_for_merge .join(fusions_inputs) .map { it -> [ it[0], it[1] ] } // meta.patient, balanced_gg @@ -1622,9 +1610,8 @@ workflow NFCASEREPORTS { fusions_existing_outputs = inputs.map { it -> [it.meta, it.fusions] }.filter { !it[1].isEmpty() } fusions_input = fusions_inputs - .join(fusions_input_jabba_gg) .join(fusions_input_non_integer_balance) - .map{ patient, meta, rds, balanced_gg -> [ meta, rds, balanced_gg ] } + .map{ patient, meta, balanced_gg -> [ meta, balanced_gg ] } FUSIONS(fusions_input) fusions = Channel.empty()