From 8b6c7faf72997ba865ac2992f4abfee12a9ec37a Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Thu, 12 Oct 2023 10:59:59 +0200 Subject: [PATCH] Add ability to explore various values of AED and LD. (#93) * Update parameters to read from meta map * Introduce channel to construct sweep parameter map * Update nf-core modules * Add meta map to agat separatebyrecord * Add meta map to agat filterbyattribute * update blast path * Add meta map to agat keeplongestisoform * Add meta map to agat filterincompletegenecodingmodels * Add meta map to agat filterbylocusdistance * Add meta map to agat extractsequences * Add meta map to blast blastp * Add meta map to agat filterbymrnablastvalue * Add meta map to augustus gff2gbk * Add meta map to augustus gbk2augustus * Add meta map to agustus training modules * Add meta map to agat gff2zff * Add meta map to snap training * Update channel logic for abinitio pipeline * Update README * Update abinitio test profile * Fix module links for annotation preprocessing workflow * Fix module links for functional annotation workflow * Fix module links * Update publish paths to include parameter sweep values * Add container registries to profiles for nf-core * Remove registries from docker container paths * Add aed and locus distance parameters to config * Update annotation preprocessing workflow for updated modules * Add meta map to interproscan module * Add meta map to agat managefunctionalannotation * Update functional annotation workflow to reflect updated modules * Fix includeInputs placement * Fix container paths * Fix workflow for module updates * Remove references to params.enable_conda * Update minimum Nextflow version to 22.10.0 * Run split maker evidence once * Update publishing path * Update test config to do parameter sweep * Associate proteindb with protein * Patch blast/makeblastdb to use a meta map * Fix functional annotation subworkflow from module change * Fix file staging * Update file prefix to include LD and AED values * Add start of table rank code * Add rank model to abinitio workflow * Escape dollars * Syntax fixes * Fix syntax * Add publish path * rename output folders * Append training data gene count to log --- config/abinitio_training_modules.config | 38 ++++++----- config/test.config | 5 +- modules.json | 56 +++++++++------ modules/local/agat/extractsequences.nf | 10 +-- modules/local/agat/filterbyattribute.nf | 10 +-- modules/local/agat/filterbylocusdistance.nf | 10 +-- modules/local/agat/filterbymrnablastvalue.nf | 10 +-- .../agat/filterincompletegenecodingmodels.nf | 10 +-- modules/local/agat/gff2zff.nf | 10 +-- modules/local/agat/keeplongestisoform.nf | 10 +-- .../local/agat/managefunctionalannotation.nf | 12 ++-- modules/local/agat/separatebyrecord.nf | 10 +-- modules/local/augustus/gbk2augustus.nf | 10 +-- modules/local/augustus/gff2gbk.nf | 10 +-- modules/local/augustus/training.nf | 13 ++-- modules/local/blast/blastp.nf | 10 +-- modules/local/custom/rankmodels.nf | 68 +++++++++++++++++++ modules/local/gaas/fastapurify.nf | 4 +- modules/local/gaas/fastastatistics.nf | 4 +- modules/local/hisat2/align.nf | 4 +- modules/local/hisat2/build.nf | 4 +- modules/local/interproscan.nf | 10 +-- modules/local/snap/training.nf | 10 +-- modules/local/stringtie/stringtie.nf | 4 +- .../blast/makeblastdb/blast-makeblastdb.diff | 20 ++++++ .../{modules => }/blast/makeblastdb/main.nf | 12 ++-- .../{modules => }/blast/makeblastdb/meta.yml | 0 modules/nf-core/{modules => }/busco/main.nf | 21 +++--- modules/nf-core/{modules => }/busco/meta.yml | 22 +++++- modules/nf-core/{modules => }/fastp/main.nf | 43 +++++++++--- modules/nf-core/{modules => }/fastp/meta.yml | 11 ++- modules/nf-core/fastqc/main.nf | 55 +++++++++++++++ modules/nf-core/{modules => }/fastqc/meta.yml | 0 modules/nf-core/modules/fastqc/main.nf | 59 ---------------- modules/nf-core/{modules => }/multiqc/main.nf | 16 +++-- .../nf-core/{modules => }/multiqc/meta.yml | 14 +++- nextflow.config | 21 +++--- subworkflows/abinitio_training/README.md | 8 +++ subworkflows/abinitio_training/main.nf | 45 +++++++++--- subworkflows/annotation_preprocessing/main.nf | 12 +++- subworkflows/functional_annotation/main.nf | 15 ++-- subworkflows/transcript_assembly/main.nf | 19 ++++-- 42 files changed, 478 insertions(+), 257 deletions(-) create mode 100644 modules/local/custom/rankmodels.nf create mode 100644 modules/nf-core/blast/makeblastdb/blast-makeblastdb.diff rename modules/nf-core/{modules => }/blast/makeblastdb/main.nf (65%) rename modules/nf-core/{modules => }/blast/makeblastdb/meta.yml (100%) rename modules/nf-core/{modules => }/busco/main.nf (70%) rename modules/nf-core/{modules => }/busco/meta.yml (75%) rename modules/nf-core/{modules => }/fastp/main.nf (65%) rename modules/nf-core/{modules => }/fastp/meta.yml (77%) create mode 100644 modules/nf-core/fastqc/main.nf rename modules/nf-core/{modules => }/fastqc/meta.yml (100%) delete mode 100644 modules/nf-core/modules/fastqc/main.nf rename modules/nf-core/{modules => }/multiqc/main.nf (77%) rename modules/nf-core/{modules => }/multiqc/meta.yml (74%) diff --git a/config/abinitio_training_modules.config b/config/abinitio_training_modules.config index 714d2939..0f04fdeb 100644 --- a/config/abinitio_training_modules.config +++ b/config/abinitio_training_modules.config @@ -4,20 +4,20 @@ process { withName: 'SPLIT_MAKER_EVIDENCE' { ext.args = '' publishDir = [ - path: "${params.outdir}/${publish_subdir}", + path: { "${params.outdir}/${publish_subdir}/split_evidence" }, mode: params.publishDir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: 'MODEL_SELECTION_BY_AED' { - ext.args = [ - '--value 0.3', + ext.args = { [ + "--value ${meta.aed_value}", '-a _AED', '-t ">"' - ].join(' ').trim() + ].join(' ').trim() } ext.prefix = 'codingGeneFeatures' publishDir = [ - path: "${params.outdir}/${publish_subdir}/filter", + path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/filter"}, mode: params.publishDir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -25,7 +25,7 @@ process { withName: 'RETAIN_LONGEST_ISOFORM' { ext.args = '' publishDir = [ - path: "${params.outdir}/${publish_subdir}/filter", + path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/filter"}, mode: params.publishDir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -33,15 +33,15 @@ process { withName: 'REMOVE_INCOMPLETE_GENE_MODELS' { ext.args = '' publishDir = [ - path: "${params.outdir}/${publish_subdir}/filter", + path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/filter"}, mode: params.publishDir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: 'FILTER_BY_LOCUS_DISTANCE' { - ext.args = '-d 3000' + ext.args = { "-d ${meta.locus_distance}" } publishDir = [ - path: "${params.outdir}/${publish_subdir}/filter", + path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/filter"}, mode: params.publishDir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -63,7 +63,7 @@ process { withName: 'GFF_FILTER_BY_BLAST' { ext.args = '' publishDir = [ - path: "${params.outdir}/${publish_subdir}/blast_filtered_gff", + path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/blast_filtered_gff"}, mode: params.publishDir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -71,7 +71,7 @@ process { withName: 'GFF2GBK' { ext.args = params.flank_region_size publishDir = [ - path: "${params.outdir}/${publish_subdir}/augustus/gbk_files", + path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/augustus/gbk_files"}, mode: params.publishDir_mode, pattern: "*.gbk" ] @@ -80,12 +80,12 @@ process { ext.args = '100' publishDir = [ [ - path: "${params.outdir}/${publish_subdir}/augustus/training_data", + path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/augustus/training_data"}, mode: params.publishDir_mode, pattern: "*.train" ], [ - path: "${params.outdir}/${publish_subdir}/augustus/test_data", + path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/augustus/test_data"}, mode: params.publishDir_mode, pattern: "*.test" ] @@ -93,20 +93,26 @@ process { } withName: 'AUGUSTUS_TRAINING' { ext.args = '' + ext.prefix = { "${species_label}-LD${meta.locus_distance}-AED${meta.aed_value}" } publishDir = [ [ - path: "${params.outdir}/${publish_subdir}/augustus_training", + path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/augustus_training"}, mode: params.publishDir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ], [ - path: "${params.maker_species_publishdir}", + path: {"${params.maker_species_publishdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}"}, mode: 'copy', enabled: params.maker_species_publishdir != null, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ] } + withName: 'RANK_AUGUSTUS_MODELS' { + publishDir = [ + path: { "${params.outdir}/${publish_subdir}/augustus_sweep_summary" } + ] + } withName: 'CONVERT_GFF2ZFF' { ext.args = '' } @@ -114,7 +120,7 @@ process { ext.args = "-categorize ${params.flank_region_size}" ext.args2 = "-export ${params.flank_region_size} -plus" publishDir = [ - path: "${params.outdir}/${publish_subdir}/snap_training", + path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/snap_training"}, mode: params.publishDir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/config/test.config b/config/test.config index 7b305b7c..4608a7b8 100644 --- a/config/test.config +++ b/config/test.config @@ -4,13 +4,12 @@ if ( params.subworkflow == 'abinitio_training' ) { genome = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa' species_label = 'test_species' // e.g. 'asecodes_parviclava' flank_region_size = 500 + aed_value = [ 0.3, 0.2 ] + locus_distance = [ 500 ] } process { // Trick: Fully qualified process name has higher priority than simple name // Otherwise settings are overridden by those in modules.config loaded after this - withName: 'ABINITIO_TRAINING:FILTER_BY_LOCUS_DISTANCE' { - ext.args = '-d 500' - } withName: 'ABINITIO_TRAINING:GBK2AUGUSTUS' { ext.args = '10' } diff --git a/modules.json b/modules.json index b471777d..f0326374 100644 --- a/modules.json +++ b/modules.json @@ -1,23 +1,41 @@ { - "name": "NBIS Genome Annotation Workflow", - "homePage": "", - "repos": { - "nf-core/modules": { - "blast/makeblastdb": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "busco": { - "git_sha": "89a84538bede7c6919f7c042fdb4c79e5e2d9d2a" - }, - "fastp": { - "git_sha": "9b51362a532a14665f513cf987531f9ea5046b74" - }, - "fastqc": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "multiqc": { - "git_sha": "5138acca0985ca01c38a1c4fba917d83772b1106" - } + "name": "NBIS Genome Annotation Workflow", + "homePage": "", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "blast/makeblastdb": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"], + "patch": "modules/nf-core/blast/makeblastdb/blast-makeblastdb.diff" + }, + "busco": { + "branch": "master", + "git_sha": "6d6552cb582f56b6101c452e16ee7c23073f91de", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd", + "installed_by": ["modules"] + }, + "fastqc": { + "branch": "master", + "git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", + "installed_by": ["modules"] + } } + }, + "subworkflows": { + "nf-core": {} + } } + } } diff --git a/modules/local/agat/extractsequences.nf b/modules/local/agat/extractsequences.nf index 9ea67da3..c8e7f20a 100644 --- a/modules/local/agat/extractsequences.nf +++ b/modules/local/agat/extractsequences.nf @@ -3,18 +3,18 @@ process AGAT_EXTRACTSEQUENCES { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::agat=0.9.2" : null) + conda "bioconda::agat=0.9.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1': - 'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" + 'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" input: - path gff + tuple val(meta), path (gff) path genome output: - path "${gff.baseName}_proteins.fasta", emit: proteins - path "versions.yml" , emit: versions + tuple val(meta), path ("${gff.baseName}_proteins.fasta"), emit: proteins + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/agat/filterbyattribute.nf b/modules/local/agat/filterbyattribute.nf index a83d3813..048f8b4d 100644 --- a/modules/local/agat/filterbyattribute.nf +++ b/modules/local/agat/filterbyattribute.nf @@ -3,17 +3,17 @@ process AGAT_FILTERBYATTRIBUTE { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::agat=0.9.2" : null) + conda "bioconda::agat=0.9.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1': - 'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" + 'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" input: - path mrna_gff + tuple val(meta), path(mrna_gff) output: - path "*.filter.gff", emit: selected_models - path "versions.yml", emit: versions + tuple val(meta), path("*.filter.gff"), emit: selected_models + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/agat/filterbylocusdistance.nf b/modules/local/agat/filterbylocusdistance.nf index 8080e3a5..5ad01a40 100644 --- a/modules/local/agat/filterbylocusdistance.nf +++ b/modules/local/agat/filterbylocusdistance.nf @@ -3,17 +3,17 @@ process AGAT_FILTERBYLOCUSDISTANCE { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::agat=0.9.2" : null) + conda "bioconda::agat=0.9.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1': - 'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" + 'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" input: - path coding_gene_features_gff + tuple val(meta), path (coding_gene_features_gff) output: - path "*.good_distance.gff", emit: distanced_models - path "versions.yml" , emit: versions + tuple val(meta), path ("*.good_distance.gff"), emit: distanced_models + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/agat/filterbymrnablastvalue.nf b/modules/local/agat/filterbymrnablastvalue.nf index acddc024..29a7e493 100644 --- a/modules/local/agat/filterbymrnablastvalue.nf +++ b/modules/local/agat/filterbymrnablastvalue.nf @@ -3,18 +3,18 @@ process AGAT_FILTERBYMRNABLASTVALUE { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::agat=0.9.2" : null) + conda "bioconda::agat=0.9.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1': - 'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" + 'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" input: - path gff + tuple val(meta), path(gff) path blast_tbl output: - path "*_blast-filtered.gff3", emit: blast_filtered - path "versions.yml" , emit: versions + tuple val(meta), path("*_blast-filtered.gff3"), emit: blast_filtered + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/agat/filterincompletegenecodingmodels.nf b/modules/local/agat/filterincompletegenecodingmodels.nf index 68551340..a6fd1e5b 100644 --- a/modules/local/agat/filterincompletegenecodingmodels.nf +++ b/modules/local/agat/filterincompletegenecodingmodels.nf @@ -3,18 +3,18 @@ process AGAT_FILTERINCOMPLETEGENECODINGMODELS { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::agat=0.9.2" : null) + conda "bioconda::agat=0.9.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1': - 'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" + 'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" input: - path coding_gene_features_gff + tuple val(meta), path (coding_gene_features_gff) path genome output: - path "*.complete.gff", emit: complete_gene_models - path "versions.yml" , emit: versions + tuple val(meta), path ("*.complete.gff"), emit: complete_gene_models + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/agat/gff2zff.nf b/modules/local/agat/gff2zff.nf index 228d35d1..a91682b6 100644 --- a/modules/local/agat/gff2zff.nf +++ b/modules/local/agat/gff2zff.nf @@ -2,18 +2,18 @@ process AGAT_GFF2ZFF { tag "${annotation}" label 'process_single' - conda (params.enable_conda ? "bioconda::agat=0.9.2" : null) + conda "bioconda::agat=0.9.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1': - 'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" + 'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" input: - path annotation + tuple val(meta), path (annotation) path genome output: - path "*.{ann,dna}" , emit: zff - path "versions.yml", emit: versions + tuple val(meta), path ("*.{ann,dna}"), emit: zff + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/agat/keeplongestisoform.nf b/modules/local/agat/keeplongestisoform.nf index a21a97b7..eecacb7c 100644 --- a/modules/local/agat/keeplongestisoform.nf +++ b/modules/local/agat/keeplongestisoform.nf @@ -3,17 +3,17 @@ process AGAT_KEEPLONGESTISOFORM { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::agat=0.9.2" : null) + conda "bioconda::agat=0.9.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1': - 'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" + 'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" input: - path coding_gene_features_gff + tuple val(meta), path(coding_gene_features_gff) output: - path "*.longest_cds.gff", emit: longest_isoform - path "versions.yml" , emit: versions + tuple val(meta), path("*.longest_cds.gff"), emit: longest_isoform + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/agat/managefunctionalannotation.nf b/modules/local/agat/managefunctionalannotation.nf index 0359758a..e35b7285 100644 --- a/modules/local/agat/managefunctionalannotation.nf +++ b/modules/local/agat/managefunctionalannotation.nf @@ -3,21 +3,21 @@ process AGAT_MANAGEFUNCTIONALANNOTATION { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::agat=0.9.2" : null) + conda "bioconda::agat=0.9.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1': - 'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" + 'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" input: - path gff + tuple val(meta), path(gff) path merged_blast_results path merged_interproscan_results path blast_db output: - path "*_plus-functional-annotation.gff", emit: gff - path "*.tsv" , emit: tsv, includeInputs: true - path "versions.yml" , emit: versions + tuple val(meta), path("*_plus-functional-annotation.gff"), emit: gff + tuple val(meta), path("*.tsv", includeInputs: true) , emit: tsv + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/agat/separatebyrecord.nf b/modules/local/agat/separatebyrecord.nf index f3df4a86..7c143566 100644 --- a/modules/local/agat/separatebyrecord.nf +++ b/modules/local/agat/separatebyrecord.nf @@ -3,17 +3,17 @@ process AGAT_SEPARATEBYRECORD { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::agat=0.9.2" : null) + conda "bioconda::agat=0.9.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1': - 'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" + 'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }" input: - path gff + tuple val(meta), path (gff) output: - path "maker_results_noAbinitio_clean/mrna.gff", emit: transcripts - path "maker_results_noAbinitio_clean/*", emit: all // FIXME: check + tuple val(meta), path("maker_results_noAbinitio_clean/mrna.gff"), emit: transcripts + tuple val(meta), path("maker_results_noAbinitio_clean/*") , emit: all path "versions.yml", emit: versions when: diff --git a/modules/local/augustus/gbk2augustus.nf b/modules/local/augustus/gbk2augustus.nf index aec78217..82499193 100644 --- a/modules/local/augustus/gbk2augustus.nf +++ b/modules/local/augustus/gbk2augustus.nf @@ -2,17 +2,17 @@ process AUGUSTUS_GBK2AUGUSTUS { tag "${genbank.baseName}" label 'process_single' - conda (params.enable_conda ? "bioconda::augustus=3.4.0" : null) + conda "bioconda::augustus=3.4.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/augustus:3.4.0--pl5321h5f9f3d9_6': - 'quay.io/biocontainers/augustus:3.4.0--pl5321h5f9f3d9_6' }" + 'biocontainers/augustus:3.4.0--pl5321h5f9f3d9_6' }" input: - path genbank + tuple val(meta), path(genbank) output: - path "${genbank}.train", emit: training_data - path "${genbank}.test", emit: testing_data + tuple val(meta), path("${genbank}.train"), emit: training_data + tuple val(meta), path ("${genbank}.test"), emit: testing_data path "versions.yml" , emit: versions when: diff --git a/modules/local/augustus/gff2gbk.nf b/modules/local/augustus/gff2gbk.nf index 421a8345..ec6a29d0 100644 --- a/modules/local/augustus/gff2gbk.nf +++ b/modules/local/augustus/gff2gbk.nf @@ -2,18 +2,18 @@ process AUGUSTUS_GFF2GBK { tag "${gff.baseName}" label 'process_single' - conda (params.enable_conda ? "bioconda::augustus=3.4.0" : null) + conda "bioconda::augustus=3.4.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/augustus:3.4.0--pl5321h5f9f3d9_6': - 'quay.io/biocontainers/augustus:3.4.0--pl5321h5f9f3d9_6' }" + 'biocontainers/augustus:3.4.0--pl5321h5f9f3d9_6' }" input: - path gff + tuple val(meta), path(gff) path genome output: - path "*.gbk" , emit: gbk - path "versions.yml", emit: versions + tuple val(meta), path ("*.gbk"), emit: gbk + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/augustus/training.nf b/modules/local/augustus/training.nf index bae43c10..3ca6beda 100644 --- a/modules/local/augustus/training.nf +++ b/modules/local/augustus/training.nf @@ -2,20 +2,20 @@ process AUGUSTUS_TRAINING { tag "$species_label" label 'process_single' - conda (params.enable_conda ? "bioconda::augustus=3.4.0" : null) + conda "bioconda::augustus=3.4.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/augustus:3.4.0--pl5321h5f9f3d9_6': - 'quay.io/biocontainers/augustus:3.4.0--pl5321h5f9f3d9_6' }" + 'biocontainers/augustus:3.4.0--pl5321h5f9f3d9_6' }" input: - path training_file + tuple val(meta), path (training_file) path test_file val species_label output: - path "${species_label}", emit: training_model - path "*_run.log" , emit: log - path "versions.yml" , emit: versions + tuple val(meta), path ("${species_label}"), emit: training_model + tuple val(meta), path ("*_run.log") , emit: log + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -31,6 +31,7 @@ process AUGUSTUS_TRAINING { etraining --species=$species_label $training_file augustus --species=$species_label $test_file | tee ${prefix}_run.log mv config/species/${species_label} . + printf "Training gene count: %d\\n" \$( grep -c "LOCUS" $training_file ) | tee -a ${prefix}_run.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/blast/blastp.nf b/modules/local/blast/blastp.nf index 658da81d..e4bf1127 100644 --- a/modules/local/blast/blastp.nf +++ b/modules/local/blast/blastp.nf @@ -2,18 +2,18 @@ process BLAST_BLASTP { tag "${fasta.baseName}" label 'process_medium' - conda (params.enable_conda ? 'bioconda::blast=2.12.0' : null) + conda 'bioconda::blast=2.12.0' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/blast:2.12.0--pl5262h3289130_0' : - 'quay.io/biocontainers/blast:2.12.0--pl5262h3289130_0' }" + 'biocontainers/blast:2.12.0--pl5262h3289130_0' }" input: - path fasta + tuple val(meta), path(fasta) path db output: - path '*.blastp.txt', emit: txt - path "versions.yml", emit: versions + tuple val(meta), path ('*.blastp.txt'), emit: txt + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/custom/rankmodels.nf b/modules/local/custom/rankmodels.nf new file mode 100644 index 00000000..4f26499c --- /dev/null +++ b/modules/local/custom/rankmodels.nf @@ -0,0 +1,68 @@ +process CUSTOM_RANKMODELS { + tag "$prefix" + label 'process_single' + + conda "anaconda::gawk=5.1.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0': + 'biocontainers/gawk:5.1.0' }" + + input: + tuple val(meta), path(augustus_logs) + + output: + path "*sweep_summary.tsv", emit: summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: meta.id + """ + ( + printf "%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\n" \\ + "locus_distance" \\ + "model_selection_value" \\ + "exon_sensitivity" \\ + "exon_specificity" \\ + "nucleotide_sensitivity" \\ + "nucleotide_specificity" \\ + "gene_sensitivity" \\ + "gene_specificity" \\ + "genes" + for LOG in $augustus_logs; do + printf "%d\\t%f\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%d\\n" \\ + \$( grep -Eo "LD[0-9]+" <<< \$LOG | cut -c 3- ) \\ + \$( grep -Eo "AED[0-9.]+" <<< \$LOG | cut -c 4- ) \\ + \$( grep "^exon level" \$LOG | grep -Eo "[0-9.]+" | sed -n "4p" ) \\ + \$( grep "^exon level" \$LOG | grep -Eo "[0-9.]+" | sed -n "5p" ) \\ + \$( grep "^nucleotide level" \$LOG | grep -Eo "[0-9.]+" | sed -n "1p" ) \\ + \$( grep "^nucleotide level" \$LOG | grep -Eo "[0-9.]+" | sed -n "2p" ) \\ + \$( grep "^gene level" \$LOG | grep -Eo "[0-9.]+" | sed -n "6p" ) \\ + \$( grep "^gene level" \$LOG | grep -Eo "[0-9.]+" | sed -n "7p" ) \\ + \$( grep "Training gene count:" \$LOG | grep -Eo "[0-9.]+" ) + done + ) > ${prefix}_sweep_summary.tsv + + # genes=\$( grep -c "LOCUS" codingGeneFeatures.filter.longest_cds.complete.good_distance_blast-filtered.gbk.train ) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + grep: \$( grep -V |& sed '2!d;s/.*v//;s/ .*//' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: meta.id + """ + touch ${prefix}_sweep_summary.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + grep: \$( grep -V |& sed '2!d;s/.*v//;s/ .*//' ) + END_VERSIONS + """ +} diff --git a/modules/local/gaas/fastapurify.nf b/modules/local/gaas/fastapurify.nf index 28fbdd40..d3382d6e 100644 --- a/modules/local/gaas/fastapurify.nf +++ b/modules/local/gaas/fastapurify.nf @@ -3,10 +3,10 @@ process GAAS_FASTAPURIFY { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::gaas=1.2.0" : null) + conda "bioconda::gaas=1.2.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gaas:1.2.0--pl526r35_0': - 'quay.io/biocontainers/gaas:1.2.0--pl526r35_0' }" + 'biocontainers/gaas:1.2.0--pl526r35_0' }" input: path fasta diff --git a/modules/local/gaas/fastastatistics.nf b/modules/local/gaas/fastastatistics.nf index 6674ba49..87170360 100644 --- a/modules/local/gaas/fastastatistics.nf +++ b/modules/local/gaas/fastastatistics.nf @@ -3,10 +3,10 @@ process GAAS_FASTASTATISTICS { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::gaas=1.2.0" : null) + conda "bioconda::gaas=1.2.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gaas:1.2.0--pl526r35_0': - 'quay.io/biocontainers/gaas:1.2.0--pl526r35_0' }" + 'biocontainers/gaas:1.2.0--pl526r35_0' }" input: path fasta diff --git a/modules/local/hisat2/align.nf b/modules/local/hisat2/align.nf index 4cf134a2..318c916a 100644 --- a/modules/local/hisat2/align.nf +++ b/modules/local/hisat2/align.nf @@ -4,8 +4,8 @@ process HISAT2_ALIGN { label 'process_high' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::hisat2=2.2.0 bioconda::samtools=1.15.1" : null) - container "nbisweden/hisat2:2.1.0" + conda "bioconda::hisat2=2.2.0 bioconda::samtools=1.15.1" + container "ghcr.io/nbisweden/pipelines-nextflow/hisat2:2.1.0" input: tuple val(meta), path(reads) diff --git a/modules/local/hisat2/build.nf b/modules/local/hisat2/build.nf index d827fff0..8340c5a1 100644 --- a/modules/local/hisat2/build.nf +++ b/modules/local/hisat2/build.nf @@ -5,8 +5,8 @@ process HISAT2_BUILD { label 'process_high_memory' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::hisat2=2.2.0 bioconda::samtools=1.15.1" : null) - container "nbisweden/hisat2:2.1.0" + conda "bioconda::hisat2=2.2.0 bioconda::samtools=1.15.1" + container "ghcr.io/nbisweden/pipelines-nextflow/hisat2:2.1.0" input: path( fasta ) diff --git a/modules/local/interproscan.nf b/modules/local/interproscan.nf index 9e3b7c91..099c6084 100644 --- a/modules/local/interproscan.nf +++ b/modules/local/interproscan.nf @@ -2,17 +2,17 @@ process INTERPROSCAN { tag "${protein_fasta.baseName}" label 'process_single' - conda (params.enable_conda ? "bioconda::interproscan=5.55_88.0" : null) + conda "bioconda::interproscan=5.55_88.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/interproscan:5.55_88.0--hec16e2b_1': - 'quay.io/biocontainers/interproscan:5.55_88.0--hec16e2b_1' }" + 'biocontainers/interproscan:5.55_88.0--hec16e2b_1' }" input: - path protein_fasta + tuple val(meta), path(protein_fasta) output: - path '*.tsv' , emit: tsv - path "versions.yml" , emit: versions + tuple val(meta), path('*.tsv'), emit: tsv + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/snap/training.nf b/modules/local/snap/training.nf index 73c513c1..375bf5f6 100644 --- a/modules/local/snap/training.nf +++ b/modules/local/snap/training.nf @@ -2,18 +2,18 @@ process SNAP_TRAINING { tag "$species_label" label 'process_single' - conda (params.enable_conda ? "bioconda::snap=2013_11_29" : null) + conda "bioconda::snap=2013_11_29" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/snap:2013_11_29--hec16e2b_4': - 'quay.io/biocontainers/snap:2013_11_29--hec16e2b_4' }" + 'biocontainers/snap:2013_11_29--hec16e2b_4' }" input: - path training_files + tuple val(meta), path (training_files) val species_label output: - path "*.hmm" , emit: training_model - path "versions.yml", emit: versions + tuple val(meta), path ("*.hmm"), emit: training_model + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/stringtie/stringtie.nf b/modules/local/stringtie/stringtie.nf index dc4c9695..c94ef05c 100644 --- a/modules/local/stringtie/stringtie.nf +++ b/modules/local/stringtie/stringtie.nf @@ -2,10 +2,10 @@ process STRINGTIE_STRINGTIE { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::stringtie=2.2.1" : null) + conda "bioconda::stringtie=2.2.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/stringtie:2.2.1--hecb563c_2' : - 'quay.io/biocontainers/stringtie:2.2.1--hecb563c_2' }" + 'biocontainers/stringtie:2.2.1--hecb563c_2' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/blast/makeblastdb/blast-makeblastdb.diff b/modules/nf-core/blast/makeblastdb/blast-makeblastdb.diff new file mode 100644 index 00000000..daacf209 --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/blast-makeblastdb.diff @@ -0,0 +1,20 @@ +Changes in module 'nf-core/blast/makeblastdb' +--- modules/nf-core/blast/makeblastdb/main.nf ++++ modules/nf-core/blast/makeblastdb/main.nf +@@ -8,11 +8,11 @@ + 'biocontainers/blast:2.13.0--hf3cf87c_0' }" + + input: +- path fasta ++ tuple val(meta), path(fasta) + + output: +- path 'blast_db' , emit: db +- path "versions.yml" , emit: versions ++ tuple val(meta), path('blast_db'), emit: db ++ path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + +************************************************************ diff --git a/modules/nf-core/modules/blast/makeblastdb/main.nf b/modules/nf-core/blast/makeblastdb/main.nf similarity index 65% rename from modules/nf-core/modules/blast/makeblastdb/main.nf rename to modules/nf-core/blast/makeblastdb/main.nf index 12208ea8..93f88512 100644 --- a/modules/nf-core/modules/blast/makeblastdb/main.nf +++ b/modules/nf-core/blast/makeblastdb/main.nf @@ -2,17 +2,17 @@ process BLAST_MAKEBLASTDB { tag "$fasta" label 'process_medium' - conda (params.enable_conda ? 'bioconda::blast=2.12.0' : null) + conda "bioconda::blast=2.13.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.12.0--pl5262h3289130_0' : - 'quay.io/biocontainers/blast:2.12.0--pl5262h3289130_0' }" + 'https://depot.galaxyproject.org/singularity/blast:2.13.0--hf3cf87c_0' : + 'biocontainers/blast:2.13.0--hf3cf87c_0' }" input: - path fasta + tuple val(meta), path(fasta) output: - path 'blast_db' , emit: db - path "versions.yml" , emit: versions + tuple val(meta), path('blast_db'), emit: db + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/modules/blast/makeblastdb/meta.yml b/modules/nf-core/blast/makeblastdb/meta.yml similarity index 100% rename from modules/nf-core/modules/blast/makeblastdb/meta.yml rename to modules/nf-core/blast/makeblastdb/meta.yml diff --git a/modules/nf-core/modules/busco/main.nf b/modules/nf-core/busco/main.nf similarity index 70% rename from modules/nf-core/modules/busco/main.nf rename to modules/nf-core/busco/main.nf index 40354168..95586b03 100644 --- a/modules/nf-core/modules/busco/main.nf +++ b/modules/nf-core/busco/main.nf @@ -2,23 +2,28 @@ process BUSCO { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::busco=5.4.3" : null) + conda "bioconda::busco=5.4.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/busco:5.4.3--pyhdfd78af_0': - 'quay.io/biocontainers/busco:5.4.3--pyhdfd78af_0' }" + 'biocontainers/busco:5.4.3--pyhdfd78af_0' }" input: tuple val(meta), path('tmp_input/*') - each lineage // Required: lineage to check against, "auto" enables --auto-lineage instead + val lineage // Required: lineage to check against, "auto" enables --auto-lineage instead path busco_lineages_path // Recommended: path to busco lineages - downloads if not set path config_file // Optional: busco configuration file output: - tuple val(meta), path("*-busco.batch_summary.txt"), emit: batch_summary - tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt, optional: true - tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json, optional: true - tuple val(meta), path("*-busco") , emit: busco_dir - path "versions.yml" , emit: versions + tuple val(meta), path("*-busco.batch_summary.txt") , emit: batch_summary + tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt, optional: true + tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json, optional: true + tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table, optional: true + tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list, optional: true + tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins, optional: true + tuple val(meta), path("*-busco/*/run_*/busco_sequences") , emit: seq_dir + tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir, optional: true + tuple val(meta), path("*-busco") , emit: busco_dir + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/modules/busco/meta.yml b/modules/nf-core/busco/meta.yml similarity index 75% rename from modules/nf-core/modules/busco/meta.yml rename to modules/nf-core/busco/meta.yml index ef8c5245..77d15fbd 100644 --- a/modules/nf-core/modules/busco/meta.yml +++ b/modules/nf-core/busco/meta.yml @@ -25,7 +25,7 @@ input: description: Nucleic or amino acid sequence file in FASTA format. pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" - lineage: - type: value + type: string description: The BUSCO lineage to use, or "auto" to automatically select lineage - busco_lineages_path: type: directory @@ -56,6 +56,26 @@ output: type: directory description: BUSCO lineage specific output pattern: "*-busco" + - full_table: + type: file + description: Full BUSCO results table + pattern: "full_table.tsv" + - missing_busco_list: + type: file + description: List of missing BUSCOs + pattern: "missing_busco_list.tsv" + - single_copy_proteins: + type: file + description: Fasta file of single copy proteins (transcriptome mode) + pattern: "single_copy_proteins.faa" + - seq_dir: + type: directory + description: BUSCO sequence directory + pattern: "busco_sequences" + - translated_proteins: + type: directory + description: Six frame translations of each transcript made by the transcriptome mode + pattern: "translated_proteins" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/modules/fastp/main.nf b/modules/nf-core/fastp/main.nf similarity index 65% rename from modules/nf-core/modules/fastp/main.nf rename to modules/nf-core/fastp/main.nf index 120392c5..831b7f12 100644 --- a/modules/nf-core/modules/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,13 +2,14 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null) + conda "bioconda::fastp=0.23.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' : - 'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }" + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" input: tuple val(meta), path(reads) + path adapter_fasta val save_trimmed_fail val save_merged @@ -26,28 +27,53 @@ process FASTP { script: def args = task.ext.args ?: '' - // Added soft-links to original fastqs for consistent naming in MultiQC def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { """ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + fastp \\ --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.fastp.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $args \\ 2> ${prefix}.fastp.log + cat <<-END_VERSIONS > versions.yml "${task.process}": fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") END_VERSIONS """ } else { - def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' """ [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz @@ -59,6 +85,7 @@ process FASTP { --out2 ${prefix}_2.fastp.fastq.gz \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $merge_fastq \\ --thread $task.cpus \\ diff --git a/modules/nf-core/modules/fastp/meta.yml b/modules/nf-core/fastp/meta.yml similarity index 77% rename from modules/nf-core/modules/fastp/meta.yml rename to modules/nf-core/fastp/meta.yml index 2bd2b1a9..197ea7ca 100644 --- a/modules/nf-core/modules/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -9,19 +9,24 @@ tools: description: | A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. documentation: https://github.com/OpenGene/fastp - doi: https://doi.org/10.1093/bioinformatics/bty560 + doi: 10.1093/bioinformatics/bty560 licence: ["MIT"] input: - meta: type: map description: | - Groovy Map containing sample information + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. e.g. [ id:'test', single_end:false ] - reads: type: file description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" - save_trimmed_fail: type: boolean description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 00000000..249f9064 --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,55 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::fastqc=0.11.9" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : + 'biocontainers/fastqc:0.11.9--0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml similarity index 100% rename from modules/nf-core/modules/fastqc/meta.yml rename to modules/nf-core/fastqc/meta.yml diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf deleted file mode 100644 index 05730368..00000000 --- a/modules/nf-core/modules/fastqc/main.nf +++ /dev/null @@ -1,59 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/multiqc/main.nf similarity index 77% rename from modules/nf-core/modules/multiqc/main.nf rename to modules/nf-core/multiqc/main.nf index 1e7d6afe..65d7dd0d 100644 --- a/modules/nf-core/modules/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,14 +1,16 @@ process MULTIQC { - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null) + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : - 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" - tuple path(multiqc_config), path(multiqc_logo) + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) output: path "*multiqc_report.html", emit: report @@ -22,11 +24,13 @@ process MULTIQC { script: def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' """ multiqc \\ --force \\ - $config \\ $args \\ + $config \\ + $extra_config \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml similarity index 74% rename from modules/nf-core/modules/multiqc/meta.yml rename to modules/nf-core/multiqc/meta.yml index bf3a27fe..f93b5ee5 100644 --- a/modules/nf-core/modules/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: MultiQC description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: @@ -12,6 +13,7 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + input: - multiqc_files: type: file @@ -19,19 +21,24 @@ input: List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - multiqc_config: type: file - description: Config yml for MultiQC + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. pattern: "*.{yml,yaml}" - multiqc_logo: type: file - description: Logo file for MultiQC + description: Optional logo file for MultiQC pattern: "*.{png}" + output: - report: type: file description: MultiQC report file pattern: "multiqc_report.html" - data: - type: dir + type: directory description: MultiQC data dir pattern: "multiqc_data" - plots: @@ -46,3 +53,4 @@ authors: - "@abhi18av" - "@bunop" - "@drpatelh" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 92c2b76d..6d614a2b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -4,8 +4,8 @@ manifest { homePage = '' description = 'Workflows for assisting in de novo genome annotation.' mainScript = 'main.nf' - nextflowVersion = '!>=21.10.6' - version = '1.0' + nextflowVersion = '!>=22.10.0' + version = '2.0' } params { @@ -20,6 +20,8 @@ params { codon_table = 1 flank_region_size = 1000 maker_species_publishdir = null + aed_value = [ 0.3 ] + locus_distance = [ 3000 ] // Annotation preprocessing parameters // List of lineages to run with busco @@ -41,8 +43,6 @@ params { // The default mode of publishing results (see https://www.nextflow.io/docs/latest/process.html#publishdir) publishDir_mode = 'copy' - // Enables the use of the conda package manager - enable_conda = false } includeConfig "$projectDir/config/nf-core-defaults.config" @@ -58,32 +58,28 @@ profiles { clusterOptions = "-A ${params.project}" scratch = '$SNIC_TMP' } - // includeConfig "$projectDir/config/compute_resources.config" singularity.enabled = true singularity.envWhitelist = 'SNIC_TMP' - // includeConfig "$projectDir/config/software_packages.config" } conda { - // includeConfig "$projectDir/config/software_packages.config" - params.enable_conda = true + conda.enabled = true } mamba { - // includeConfig "$projectDir/config/software_packages.config" - params.enable_conda = true + conda.enabled = true conda.useMamba = true } docker { - // includeConfig "$projectDir/config/software_packages.config" docker.enabled = true docker.runOptions='-u "$( id -u ):$( id -g )"' + docker.registry = 'quay.io' } singularity { singularity.enabled = true - // includeConfig "$projectDir/config/software_packages.config" + singularity.registry = 'quay.io' } gitpod { @@ -101,7 +97,6 @@ profiles { process { scratch = '/scratch' } - // includeConfig "$projectDir/config/compute_resources.config" } test { diff --git a/subworkflows/abinitio_training/README.md b/subworkflows/abinitio_training/README.md index 41bc929e..0d9c6887 100644 --- a/subworkflows/abinitio_training/README.md +++ b/subworkflows/abinitio_training/README.md @@ -17,6 +17,12 @@ genome: '/path/to/genome/assembly.fasta' maker_evidence_gff: '/path/to/evidence/annotation.gff' species_label: 'species_name' codon_table: 1 +aed_value: + - 0.2 + - 0.3 +locus_distance: + - 3000 + - 4000 outdir: '/path/to/save/results' ``` @@ -43,6 +49,8 @@ nextflow run NBISweden/pipelines-nextflow \ - `species_label`: A species label for the training data. - `maker_species_publishdir`: A shared directory where a copy of the augustus `species_label` profile is saved. - `codon_table`: The number of the codon table to use for translation (default: 1). + - `aed_value`: A list of model selection values to explore (smaller values mean higher stringency). + - `locus_distance`: A list of locus distances (average distance between genes) to explore. - `flank_region_size`: The size of the flank region to include (default: 1000). ### Tool specific parameters diff --git a/subworkflows/abinitio_training/main.nf b/subworkflows/abinitio_training/main.nf index ecb6812c..ddc30f6a 100644 --- a/subworkflows/abinitio_training/main.nf +++ b/subworkflows/abinitio_training/main.nf @@ -4,7 +4,7 @@ include { AGAT_KEEPLONGESTISOFORM as RETAIN_LONGEST_ISOFORM include { AGAT_FILTERINCOMPLETEGENECODINGMODELS as REMOVE_INCOMPLETE_GENE_MODELS } from "$projectDir/modules/local/agat/filterincompletegenecodingmodels" include { AGAT_FILTERBYLOCUSDISTANCE as FILTER_BY_LOCUS_DISTANCE } from "$projectDir/modules/local/agat/filterbylocusdistance" include { AGAT_EXTRACTSEQUENCES as EXTRACT_PROTEIN_SEQUENCE } from "$projectDir/modules/local/agat/extractsequences" -include { BLAST_MAKEBLASTDB } from "$projectDir/modules/nf-core/modules/blast/makeblastdb/main" +include { BLAST_MAKEBLASTDB } from "$projectDir/modules/nf-core/blast/makeblastdb/main" include { BLAST_BLASTP as BLAST_RECURSIVE } from "$projectDir/modules/local/blast/blastp" include { AGAT_FILTERBYMRNABLASTVALUE as GFF_FILTER_BY_BLAST } from "$projectDir/modules/local/agat/filterbymrnablastvalue" include { AUGUSTUS_GFF2GBK as GFF2GBK } from "$projectDir/modules/local/augustus/gff2gbk" @@ -12,6 +12,7 @@ include { AUGUSTUS_GBK2AUGUSTUS as GBK2AUGUSTUS include { AUGUSTUS_TRAINING } from "$projectDir/modules/local/augustus/training" include { AGAT_GFF2ZFF as CONVERT_GFF2ZFF } from "$projectDir/modules/local/agat/gff2zff" include { SNAP_TRAINING } from "$projectDir/modules/local/snap/training" +include { CUSTOM_RANKMODELS as RANK_AUGUSTUS_MODELS } from "$projectDir/modules/local/custom/rankmodels" workflow ABINITIO_TRAINING { @@ -22,12 +23,23 @@ workflow ABINITIO_TRAINING { """ Channel.fromPath( params.maker_evidence_gff, checkIfExists: true ) + .map { gff -> [ [ id: gff.baseName ], gff ] } .set { gff_annotation } Channel.fromPath( params.genome, checkIfExists: true ) .set{ genome } + // Make channel for sweep parameters + Channel.fromList( params.aed_value instanceof List ? params.aed_value : [ params.aed_value ] ) + .filter( Number ) + .set{ ch_aed } + Channel.fromList( params.locus_distance instanceof List ? params.locus_distance : [ params.locus_distance ] ) + .filter( Number ) + .combine( ch_aed ) + .map { locus_distance, aed -> [ 'aed_value': aed, 'locus_distance': locus_distance ] } + .set { ch_sweep_parameters } + SPLIT_MAKER_EVIDENCE( gff_annotation ) - MODEL_SELECTION_BY_AED( SPLIT_MAKER_EVIDENCE.out.transcripts ) + MODEL_SELECTION_BY_AED( ch_sweep_parameters.combine( SPLIT_MAKER_EVIDENCE.out.transcripts ).map { pars, id, gff -> [ id + pars, gff ] } ) RETAIN_LONGEST_ISOFORM( MODEL_SELECTION_BY_AED.out.selected_models ) REMOVE_INCOMPLETE_GENE_MODELS( RETAIN_LONGEST_ISOFORM.out.longest_isoform, @@ -40,12 +52,20 @@ workflow ABINITIO_TRAINING { ) BLAST_MAKEBLASTDB( EXTRACT_PROTEIN_SEQUENCE.out.proteins ) BLAST_RECURSIVE( - EXTRACT_PROTEIN_SEQUENCE.out.proteins, - BLAST_MAKEBLASTDB.out.db.collect() + EXTRACT_PROTEIN_SEQUENCE.out.proteins + .join( BLAST_MAKEBLASTDB.out.db ) + .multiMap { meta, proteins, proteindb -> + proteins: [ meta, proteins ] + prot_db: proteindb + } ) GFF_FILTER_BY_BLAST( - FILTER_BY_LOCUS_DISTANCE.out.distanced_models, - BLAST_RECURSIVE.out.txt.collect() + FILTER_BY_LOCUS_DISTANCE.out.distanced_models + .combine( BLAST_RECURSIVE.out.txt, by: 0 ) + .multiMap{ meta, dmodels, btbl -> + dmodels : [ meta, dmodels ] + blast_tbl: btbl + } ) GFF2GBK( GFF_FILTER_BY_BLAST.out.blast_filtered, @@ -53,10 +73,19 @@ workflow ABINITIO_TRAINING { ) GBK2AUGUSTUS( GFF2GBK.out.gbk ) AUGUSTUS_TRAINING( - GBK2AUGUSTUS.out.training_data, - GBK2AUGUSTUS.out.testing_data, + GBK2AUGUSTUS.out.training_data + .combine( GBK2AUGUSTUS.out.testing_data , by: 0 ) + .multiMap{ meta, training, testing -> + train: [ meta, training ] + test : testing + }, params.species_label ) + RANK_AUGUSTUS_MODELS( + AUGUSTUS_TRAINING.out.log + .map { meta, log -> [ [ 'id': params.species_label ], log ] } + .groupTuple() + ) CONVERT_GFF2ZFF( GFF_FILTER_BY_BLAST.out.blast_filtered, genome.collect() diff --git a/subworkflows/annotation_preprocessing/main.nf b/subworkflows/annotation_preprocessing/main.nf index b8c10a54..25b42426 100644 --- a/subworkflows/annotation_preprocessing/main.nf +++ b/subworkflows/annotation_preprocessing/main.nf @@ -1,6 +1,6 @@ include { GAAS_FASTAPURIFY as ASSEMBLY_PURIFY } from "$projectDir/modules/local/gaas/fastapurify" include { GAAS_FASTASTATISTICS as ASSEMBLY_STATS } from "$projectDir/modules/local/gaas/fastastatistics" -include { BUSCO } from "$projectDir/modules/nf-core/modules/busco/main" +include { BUSCO } from "$projectDir/modules/nf-core/busco/main" workflow ANNOTATION_PREPROCESSING { @@ -12,12 +12,18 @@ workflow ANNOTATION_PREPROCESSING { Channel.fromPath( params.genome, checkIfExists: true) .ifEmpty { error "Cannot find genome matching ${params.genome}!\n" } .set { genome_assembly } + Channel.fromList( params.busco_lineage instanceof List ? params.busco_lineage : [ params.busco_lineage ] ) + .set { ch_busco_lineage } ASSEMBLY_PURIFY( genome_assembly ) ASSEMBLY_STATS( genome_assembly.mix( ASSEMBLY_PURIFY.out.fasta ) ) BUSCO( - ASSEMBLY_PURIFY.out.fasta.map { fasta -> [ [ id: fasta.baseName ], fasta ] }, - params.busco_lineage, + ASSEMBLY_PURIFY.out.fasta + .combine( ch_busco_lineage ) + .multiMap { fasta, lineage -> + ch_fasta: [ [ id: fasta.baseName ], fasta ] + ch_busco: lineage + }, params.busco_lineages_path ? file( params.busco_lineages_path, checkIfExists: true ) : [], [] ) diff --git a/subworkflows/functional_annotation/main.nf b/subworkflows/functional_annotation/main.nf index 1d459f0c..9073ae1d 100644 --- a/subworkflows/functional_annotation/main.nf +++ b/subworkflows/functional_annotation/main.nf @@ -1,4 +1,4 @@ -include { BLAST_MAKEBLASTDB } from "$projectDir/modules/nf-core/modules/blast/makeblastdb/main" +include { BLAST_MAKEBLASTDB } from "$projectDir/modules/nf-core/blast/makeblastdb/main" include { AGAT_EXTRACTSEQUENCES as GFF2PROTEIN } from "$projectDir/modules/local/agat/extractsequences" include { BLAST_BLASTP } from "$projectDir/modules/local/blast/blastp" include { INTERPROSCAN } from "$projectDir/modules/local/interproscan" @@ -12,6 +12,7 @@ workflow FUNCTIONAL_ANNOTATION { =================================================== """ Channel.fromPath( params.gff_annotation, checkIfExists: true ) + .map { gff -> [ [ id: gff.baseName ], gff ] } .set { gff_file } Channel.fromPath( params.genome, checkIfExists: true ) .set { genome } @@ -25,9 +26,9 @@ workflow FUNCTIONAL_ANNOTATION { // No database files found matching the glob pattern } make_db : db_files.size() == 1 - return db_files + return [ [ db: fasta.baseName ] , db_files ] with_db : db_files.size() > 1 - return db_files + return [ [ db: fasta.baseName ] , db_files ] }.set { ch_blast_fa } BLAST_MAKEBLASTDB( @@ -40,13 +41,13 @@ workflow FUNCTIONAL_ANNOTATION { ) BLAST_BLASTP( GFF2PROTEIN.out.proteins.splitFasta( by: params.records_per_file, file: true ), - blastdb_ch.collect() + blastdb_ch.map{ meta, db -> db }.collect() ) INTERPROSCAN( GFF2PROTEIN.out.proteins.splitFasta( by: params.records_per_file, file: true ) ) - MERGE_FUNCTIONAL_ANNOTATION( + MERGE_FUNCTIONAL_ANNOTATION( gff_file, - BLAST_BLASTP.out.txt.collectFile( name: 'blast_merged.tsv' ), - INTERPROSCAN.out.tsv.collectFile( name: 'interproscan_merged.tsv' ), + BLAST_BLASTP.out.txt.map{ meta, txt -> txt }.collectFile( name: 'blast_merged.tsv' ), + INTERPROSCAN.out.tsv.map{ meta, tsv -> tsv }.collectFile( name: 'interproscan_merged.tsv' ), blast_fa.collect() ) } diff --git a/subworkflows/transcript_assembly/main.nf b/subworkflows/transcript_assembly/main.nf index abd4e379..a419c510 100644 --- a/subworkflows/transcript_assembly/main.nf +++ b/subworkflows/transcript_assembly/main.nf @@ -1,9 +1,9 @@ -include { FASTQC } from "$projectDir/modules/nf-core/modules/fastqc/main" +include { FASTQC } from "$projectDir/modules/nf-core/fastqc/main" include { HISAT2_ALIGN } from "$projectDir/modules/local/hisat2/align" include { HISAT2_BUILD } from "$projectDir/modules/local/hisat2/build" -include { FASTP } from "$projectDir/modules/nf-core/modules/fastp/main" +include { FASTP } from "$projectDir/modules/nf-core/fastp/main" include { STRINGTIE_STRINGTIE } from "$projectDir/modules/local/stringtie/stringtie" -include { MULTIQC } from "$projectDir/modules/nf-core/modules/multiqc/main" +include { MULTIQC } from "$projectDir/modules/nf-core/multiqc/main" workflow TRANSCRIPT_ASSEMBLY { @@ -13,16 +13,19 @@ workflow TRANSCRIPT_ASSEMBLY { =================================================== """ Channel.fromFilePairs( params.reads, size: params.single_end ? 1 : 2, checkIfExists: true ) - // .ifEmpty { error "Cannot find reads matching ${params.reads}!\n" } .map { filestem, files -> [ [ id: filestem, single_end: params.single_end ], files ] } .set { reads } Channel.fromPath( params.genome, checkIfExists: true ) - // .ifEmpty { error "Cannot find genome matching ${params.genome}!\n" } .set { genome } FASTQC ( reads ) HISAT2_BUILD ( genome ) - FASTP( reads, false, false ) // Disabled when params.skip_trimming + FASTP( // Disabled using `when:` when params.skip_trimming + reads, + [], // Adapter file + false, // save trimmed fail + false // save merged + ) // Disabled when params.skip_trimming HISAT2_ALIGN ( params.skip_trimming ? reads : FASTP.out.reads, HISAT2_BUILD.out.index.collect() @@ -33,6 +36,8 @@ workflow TRANSCRIPT_ASSEMBLY { FASTP.out.log.map{ meta, log -> log }, HISAT2_ALIGN.out.summary.map{ meta, log -> log } ).collect(), - [ file( params.multiqc_config, checkIfExists: true ), [] ] + file( params.multiqc_config, checkIfExists: true ), + [], // extra MQC config + [] // MQC logo ) }