diff --git a/conf/base.config b/conf/base.config index 061750b..0e05cfb 100644 --- a/conf/base.config +++ b/conf/base.config @@ -236,3 +236,5 @@ process { cache = false } } + + diff --git a/conf/modules.config b/conf/modules.config index 4e07829..0325c71 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -37,6 +37,22 @@ process { ] } + withName: 'PICARD_COLLECTMULTIPLEMETRICS' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/qc_reports/picard/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'PICARD_COLLECTWGSMETRICS' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/qc_reports/picard/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + withName: 'SAMTOOLS_STATS' { ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } ext.prefix = { "${meta.id}.sorted.cram" } diff --git a/modules.json b/modules.json index 37ed502..0e250d7 100644 --- a/modules.json +++ b/modules.json @@ -125,6 +125,16 @@ "git_sha": "3e52a04aa60e0cb5cea0487b9ae2fdd04f874027", "installed_by": ["modules"] }, + "picard/collectmultiplemetrics": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "picard/collectwgsmetrics": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "samtools/collatefastq": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/nf-core/picard/collectmultiplemetrics/environment.yml b/modules/nf-core/picard/collectmultiplemetrics/environment.yml new file mode 100644 index 0000000..ff4a85e --- /dev/null +++ b/modules/nf-core/picard/collectmultiplemetrics/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::picard=3.2.0 diff --git a/modules/nf-core/picard/collectmultiplemetrics/main.nf b/modules/nf-core/picard/collectmultiplemetrics/main.nf new file mode 100644 index 0000000..c555587 --- /dev/null +++ b/modules/nf-core/picard/collectmultiplemetrics/main.nf @@ -0,0 +1,67 @@ +process PICARD_COLLECTMULTIPLEMETRICS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.2.0--hdfd78af_0' : + 'biocontainers/picard:3.2.0--hdfd78af_0' }" + + input: + tuple val(meta) , path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*_metrics"), emit: metrics + tuple val(meta), path("*.pdf") , emit: pdf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CollectMultipleMetrics \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectMultipleMetrics \\ + $reference + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectMultipleMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectMultipleMetrics.alignment_summary_metrics + touch ${prefix}.CollectMultipleMetrics.insert_size_metrics + touch ${prefix}.CollectMultipleMetrics.quality_distribution.pdf + touch ${prefix}.CollectMultipleMetrics.base_distribution_by_cycle_metrics + touch ${prefix}.CollectMultipleMetrics.quality_by_cycle_metrics + touch ${prefix}.CollectMultipleMetrics.read_length_histogram.pdf + touch ${prefix}.CollectMultipleMetrics.base_distribution_by_cycle.pdf + touch ${prefix}.CollectMultipleMetrics.quality_by_cycle.pdf + touch ${prefix}.CollectMultipleMetrics.insert_size_histogram.pdf + touch ${prefix}.CollectMultipleMetrics.quality_distribution_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectMultipleMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/collectmultiplemetrics/meta.yml b/modules/nf-core/picard/collectmultiplemetrics/meta.yml new file mode 100644 index 0000000..2b7981a --- /dev/null +++ b/modules/nf-core/picard/collectmultiplemetrics/meta.yml @@ -0,0 +1,79 @@ +name: picard_collectmultiplemetrics +description: Collect multiple metrics from a BAM file +keywords: + - alignment + - metrics + - statistics + - insert + - quality + - bam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] + identifier: biotools:picard_tools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: SAM/BAM/CRAM file + pattern: "*.{sam,bam,cram}" + - bai: + type: file + description: Optional SAM/BAM/CRAM file index + pattern: "*.{sai,bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - fasta: + type: file + description: Genome fasta file + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - fai: + type: file + description: Index of FASTA file. Only needed when fasta is supplied. + pattern: "*.fai" +output: + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_metrics": + type: file + description: Alignment metrics files generated by picard + pattern: "*_{metrics}" + - pdf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.pdf": + type: file + description: PDF plots of metrics + pattern: "*.{pdf}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/picard/collectmultiplemetrics/tests/main.nf.test b/modules/nf-core/picard/collectmultiplemetrics/tests/main.nf.test new file mode 100644 index 0000000..5b67774 --- /dev/null +++ b/modules/nf-core/picard/collectmultiplemetrics/tests/main.nf.test @@ -0,0 +1,112 @@ + +nextflow_process { + + name "Test Process PICARD_COLLECTMULTIPLEMETRICS" + script "../main.nf" + process "PICARD_COLLECTMULTIPLEMETRICS" + + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/collectmultiplemetrics" + + test("test-picard-collectmultiplemetrics") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [[id:'genome'],[]] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.metrics[0][1].collect { file(it).name }.toSorted(), + process.out.pdf[0][1].collect { file(it).name }.toSorted(), + process.out.versions + ).match() + } + ) + } + } + + test("test-picard-collectmultiplemetrics-nofasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [[id:'genome'],[]] + input[2] = [[id:'genome'],[]] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.metrics[0][1].collect { file(it).name }.toSorted(), + process.out.pdf[0][1].collect { file(it).name }.toSorted(), + process.out.versions + ).match() + } + ) + } + } + + test("test-picard-collectmultiplemetrics-cram") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.metrics[0][1].collect { file(it).name }.toSorted(), + process.out.pdf[0][1].collect { file(it).name }.toSorted(), + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/picard/collectmultiplemetrics/tests/main.nf.test.snap b/modules/nf-core/picard/collectmultiplemetrics/tests/main.nf.test.snap new file mode 100644 index 0000000..1859541 --- /dev/null +++ b/modules/nf-core/picard/collectmultiplemetrics/tests/main.nf.test.snap @@ -0,0 +1,80 @@ +{ + "test-picard-collectmultiplemetrics": { + "content": [ + [ + "test.CollectMultipleMetrics.alignment_summary_metrics", + "test.CollectMultipleMetrics.base_distribution_by_cycle_metrics", + "test.CollectMultipleMetrics.insert_size_metrics", + "test.CollectMultipleMetrics.quality_by_cycle_metrics", + "test.CollectMultipleMetrics.quality_distribution_metrics" + ], + [ + "test.CollectMultipleMetrics.base_distribution_by_cycle.pdf", + "test.CollectMultipleMetrics.insert_size_histogram.pdf", + "test.CollectMultipleMetrics.quality_by_cycle.pdf", + "test.CollectMultipleMetrics.quality_distribution.pdf", + "test.CollectMultipleMetrics.read_length_histogram.pdf" + ], + [ + "versions.yml:md5,b68b83e8dd0f9360453213acad639338" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-26T10:03:36.631174" + }, + "test-picard-collectmultiplemetrics-cram": { + "content": [ + [ + "test.CollectMultipleMetrics.alignment_summary_metrics", + "test.CollectMultipleMetrics.base_distribution_by_cycle_metrics", + "test.CollectMultipleMetrics.insert_size_metrics", + "test.CollectMultipleMetrics.quality_by_cycle_metrics", + "test.CollectMultipleMetrics.quality_distribution_metrics" + ], + [ + "test.CollectMultipleMetrics.base_distribution_by_cycle.pdf", + "test.CollectMultipleMetrics.insert_size_histogram.pdf", + "test.CollectMultipleMetrics.quality_by_cycle.pdf", + "test.CollectMultipleMetrics.quality_distribution.pdf", + "test.CollectMultipleMetrics.read_length_histogram.pdf" + ], + [ + "versions.yml:md5,b68b83e8dd0f9360453213acad639338" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-26T10:04:13.955902" + }, + "test-picard-collectmultiplemetrics-nofasta": { + "content": [ + [ + "test.CollectMultipleMetrics.alignment_summary_metrics", + "test.CollectMultipleMetrics.base_distribution_by_cycle_metrics", + "test.CollectMultipleMetrics.insert_size_metrics", + "test.CollectMultipleMetrics.quality_by_cycle_metrics", + "test.CollectMultipleMetrics.quality_distribution_metrics" + ], + [ + "test.CollectMultipleMetrics.base_distribution_by_cycle.pdf", + "test.CollectMultipleMetrics.insert_size_histogram.pdf", + "test.CollectMultipleMetrics.quality_by_cycle.pdf", + "test.CollectMultipleMetrics.quality_distribution.pdf", + "test.CollectMultipleMetrics.read_length_histogram.pdf" + ], + [ + "versions.yml:md5,b68b83e8dd0f9360453213acad639338" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-26T10:03:54.707587" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/collectwgsmetrics/environment.yml b/modules/nf-core/picard/collectwgsmetrics/environment.yml new file mode 100644 index 0000000..58d5258 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::picard=3.2.0 + - r::r-base diff --git a/modules/nf-core/picard/collectwgsmetrics/main.nf b/modules/nf-core/picard/collectwgsmetrics/main.nf new file mode 100644 index 0000000..6002a7c --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/main.nf @@ -0,0 +1,60 @@ +process PICARD_COLLECTWGSMETRICS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.2.0--hdfd78af_0' : + 'biocontainers/picard:3.2.0--hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + path intervallist + + output: + tuple val(meta), path("*_metrics"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3072 + def interval = intervallist ? "--INTERVALS ${intervallist}" : '' + if (!task.memory) { + log.info '[Picard CollectWgsMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CollectWgsMetrics \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectWgsMetrics.coverage_metrics \\ + --REFERENCE_SEQUENCE ${fasta} \\ + $interval + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectWgsMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectWgsMetrics.coverage_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectWgsMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/collectwgsmetrics/meta.yml b/modules/nf-core/picard/collectwgsmetrics/meta.yml new file mode 100644 index 0000000..bb74808 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/meta.yml @@ -0,0 +1,80 @@ +name: picard_collectwgsmetrics +description: Collect metrics about coverage and performance of whole genome sequencing + (WGS) experiments. +keywords: + - alignment + - metrics + - statistics + - quality + - bam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] + identifier: biotools:picard_tools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Aligned reads file + pattern: "*.{bam, cram}" + - bai: + type: file + description: (Optional) Aligned reads file index + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta,fna}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Genome fasta file index + pattern: "*.{fai}" + - - intervallist: + type: file + description: Picard Interval List. Defines which contigs to include. Can be + generated from a BED file with GATK BedToIntervalList. +output: + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_metrics": + type: file + description: Alignment metrics files generated by picard + pattern: "*_{metrics}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@flowuenne" + - "@lassefolkersen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@flowuenne" + - "@lassefolkersen" + - "@ramprasadn" diff --git a/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test new file mode 100644 index 0000000..a398456 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test @@ -0,0 +1,83 @@ + +nextflow_process { + + name "Test Process PICARD_COLLECTWGSMETRICS" + script "../main.nf" + process "PICARD_COLLECTWGSMETRICS" + + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/collectwgsmetrics" + + test("test-picard-collectwgsmetrics") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + ] + input[1] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.metrics[0][1]).text.contains('coverage high_quality_coverage_count'), + process.out.versions + ).match() + } + ) + } + } + + test("test-picard-collectwgsmetrics-with-interval") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ] + input[1] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/picard/baits.interval_list', checkIfExists: true) + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.metrics[0][1]).text.contains('coverage high_quality_coverage_count'), + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap new file mode 100644 index 0000000..f188382 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap @@ -0,0 +1,28 @@ +{ + "test-picard-collectwgsmetrics-with-interval": { + "content": [ + true, + [ + "versions.yml:md5,06b5898fb06823b736c90e1dcebe75fe" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-23T11:02:46.301176" + }, + "test-picard-collectwgsmetrics": { + "content": [ + true, + [ + "versions.yml:md5,06b5898fb06823b736c90e1dcebe75fe" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-23T11:02:25.132069" + } +} \ No newline at end of file diff --git a/subworkflows/local/bam_qc_picard/main.nf b/subworkflows/local/bam_qc_picard/main.nf new file mode 100644 index 0000000..8ae0e7e --- /dev/null +++ b/subworkflows/local/bam_qc_picard/main.nf @@ -0,0 +1,46 @@ +// +// QC on BAM +// + +include { PICARD_COLLECTWGSMETRICS } from '../../../modules/nf-core/picard/collectwgsmetrics/main' +include { PICARD_COLLECTMULTIPLEMETRICS } from '../../../modules/nf-core/picard/collectmultiplemetrics/main' + +fasta = WorkflowNfcasereports.create_file_channel(params.fasta) +fai = WorkflowNfcasereports.create_file_channel(params.fasta_fai) +intervals = WorkflowNfcasereports.create_file_channel(params.intervals) + +workflow BAM_QC_PICARD { + take: + bam // channel: [mandatory] [ meta, bam, bai ] + + main: + versions = Channel.empty() + reports = Channel.empty() + + PICARD_COLLECTWGSMETRICS( + bam, + fasta.map{ it -> [ [ id:'fasta' ], it ] }, + fai.map{ it -> [ [ id:'fai' ], it ] }, + [] + ) + + PICARD_COLLECTMULTIPLEMETRICS( + bam, + fasta.map{ it -> [ [ id:'fasta' ], it ] }, + fai.map{ it -> [ [ id:'fai' ], it ] }, + ) + + // Gather all reports generated + reports = reports.mix(PICARD_COLLECTWGSMETRICS.out.metrics) + reports = reports.mix(PICARD_COLLECTMULTIPLEMETRICS.out.metrics) + + // Gather versions of all tools used + versions = versions.mix(PICARD_COLLECTWGSMETRICS.out.versions) + versions = versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions) + + emit: + reports + + versions // channel: [ versions.yml ] +} + diff --git a/tests/test_runs/chr21_test/params.json b/tests/test_runs/chr21_test/params.json index ccf7554..03dd3d5 100644 --- a/tests/test_runs/chr21_test/params.json +++ b/tests/test_runs/chr21_test/params.json @@ -4,7 +4,7 @@ "fasta": "/gpfs/commons/home/sdider/DB/GATK/human_g1k_v37_decoy.fasta", "fasta_fai": "/gpfs/commons/home/sdider/DB/GATK/human_g1k_v37_decoy.fasta.fai", "bwa": "/gpfs/commons/home/sdider/DB/GATK/bwa/", - "tools": "all", + "tools": "bamqc", "outdir": "./results", "pon_dryclean": "/gpfs/commons/home/sdider/Projects/nf-casereports/tests/test_data/chr21_pon.rds", "field_dryclean": "reads", diff --git a/workflows/nfcasereports.nf b/workflows/nfcasereports.nf index e87278c..d7f5d7a 100644 --- a/workflows/nfcasereports.nf +++ b/workflows/nfcasereports.nf @@ -87,6 +87,7 @@ tools_used = params.tools ? params.tools.split(',') : ["all"] tool_dependency_map = [ "aligner": ["indexing"], + "bamqc": ["aligner"], "gridss": ["aligner"], "amber": ["aligner"], "fragcounter": ["aligner"], @@ -131,7 +132,11 @@ if (params.tools) { tools_used = ["all"] } -println "Tools that will be run: ${tools_used}" +if (tools_used == ["all"]) { + println "Tools that will be run: ${tool_dependency_map.keySet()}" +} else { + println "Tools that will be run: ${tools_used}" +} if (!params.dbsnp && !params.known_indels) { if (!params.skip_tools || (params.skip_tools && !params.skip_tools.contains('baserecalibrator'))) { @@ -497,6 +502,9 @@ include { BAM_MARKDUPLICATES } from '../subworkflows/local/bam_markduplicates/ma include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../subworkflows/local/cram_qc_mosdepth_samtools/main' include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../subworkflows/local/cram_qc_mosdepth_samtools/main' +// BAM Picard QC +include { BAM_QC_PICARD } from '../subworkflows/local/bam_qc_picard/main' + // Create recalibration tables include { BAM_BASERECALIBRATOR } from '../subworkflows/local/bam_baserecalibrator/main' @@ -867,6 +875,19 @@ workflow NFCASEREPORTS { .map{ meta, bam, bai -> [ meta.id, meta + [data_type: "bam"], bam, bai ] } } + // Post-alignment QC + if (tools_used.contains("all") || tools_used.contains("bamqc")) { + bam_qc_inputs = inputs.map { it -> [it.meta.id] } + bam_qc_calling = alignment_bams_final + .join(bam_qc_inputs) + .map { it -> [ it[1], it[2], it[3] ] } // meta, bam, bai + BAM_QC_PICARD(bam_qc_calling) + + // Gather QC + reports = reports.mix(BAM_QC_PICARD.out.reports.collect{ meta, report -> report }) + versions = versions.mix(BAM_QC_PICARD.out.versions) + } + // SV Calling // ############################## if (tools_used.contains("all") || tools_used.contains("gridss")) {