diff --git a/bin/bin_summary.R b/bin/bin_summary.R index 9da3386..3fa7608 100755 --- a/bin/bin_summary.R +++ b/bin/bin_summary.R @@ -103,6 +103,9 @@ read_taxonomy <- function(file) { ncbi_classification = `Majority vote NCBI classification`, taxid) } + # gtdb doesn't drop the extension + df <- df |> + mutate(bin = str_extract(bin, "(.*)\\.[^\\.]+$", group = 1)) return(df) } diff --git a/conf/modules.config b/conf/modules.config index 9daeec9..53e2653 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -55,7 +55,7 @@ process { } withName: 'CHECKM2_PREDICT' { - ext.args = { "--extension fa" } + ext.args = { "--extension gz" } ext.prefix = { "${meta.id}_${meta.assembler}_${meta.binner}" } tag = { "${meta.id}_${meta.assembler}_${meta.binner}" } publishDir = [ @@ -134,7 +134,7 @@ process { } withName: 'GTDBTK_CLASSIFYWF' { - ext.args = "--extension fa" + ext.args = "--extension gz" ext.prefix = { "${meta.id}_${meta.assembler}_${meta.binner}" } tag = { "${meta.id}_${meta.assembler}_${meta.binner}" } publishDir = [ diff --git a/modules.json b/modules.json index 41d97ce..c8f0b1c 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "barrnap": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "bwamem2/index": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", @@ -57,8 +62,7 @@ "maxbin2": { "branch": "master", "git_sha": "283613159e079152f1336cef0db1c836086206e0", - "installed_by": ["modules"], - "patch": "modules/nf-core/maxbin2/maxbin2.diff" + "installed_by": ["modules"] }, "metabat2/metabat2": { "branch": "master", @@ -68,7 +72,7 @@ }, "metamdbg/asm": { "branch": "master", - "git_sha": "7c08494acb5aba0763c5c6db87f82b249de87ea8", + "git_sha": "88fbcaa0e08c4f5ab925bebe2234b2b68953e8cd", "installed_by": ["modules"] }, "minimap2/align": { @@ -82,6 +86,11 @@ "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] }, + "prokka": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "pyrodigal": { "branch": "master", "git_sha": "938e803109104e30773f76a7142442722498fef1", diff --git a/modules/local/bin3c/cluster/main.nf b/modules/local/bin3c/cluster/main.nf index cc325db..a7b25e5 100644 --- a/modules/local/bin3c/cluster/main.nf +++ b/modules/local/bin3c/cluster/main.nf @@ -7,7 +7,7 @@ process BIN3C_CLUSTER { tuple val(meta), path(contigs), path(map) output: - tuple val(meta), path("*.fa") , emit: fasta, optional: true + tuple val(meta), path("*.fa.gz") , emit: fasta, optional: true tuple val(meta), path("*.[!fna,log]*"), emit: clustering tuple val(meta), path("*.log") , emit: log path("versions.yml") , emit: versions @@ -27,11 +27,12 @@ process BIN3C_CLUSTER { # bin3c renames contigs, we don't want that for bin in bin3c/fasta/*.fna; do - basename=`basename \$bin` - awk -F" " '{if(\$1~">"){print ">" substr(\$2,8)}else{print \$0}}' \$bin > ${prefix}.\${basename%.fna}.fa + bn=`basename \$bin .fna` + awk -F" " '{if(\$1~">"){print ">" substr(\$2,8)}else{print \$0}}' \$bin > ${prefix}.\${bn}.fa done find bin3c -maxdepth 1 -type f -exec sh -c 'name=`basename {}`; mv {} ${prefix}.\$name' \\; + gzip *.fa cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/contig2bintofasta/main.nf b/modules/local/contig2bintofasta/main.nf index 0f6564c..c370139 100644 --- a/modules/local/contig2bintofasta/main.nf +++ b/modules/local/contig2bintofasta/main.nf @@ -11,8 +11,8 @@ process CONTIG2BINTOFASTA { tuple val(meta), path(contigs), path(contig2bin) output: - tuple val(meta), path("*.fa*"), emit: bins - path("versions.yml") , emit: versions + tuple val(meta), path("*.fa.gz"), emit: bins + path("versions.yml") , emit: versions script: def args = task.ext.args ?: '' @@ -21,7 +21,7 @@ process CONTIG2BINTOFASTA { awk '{print \$2}' ${contig2bin} | sort -u | while read bin do grep -w \${bin} ${contig2bin} | awk '{ print \$1 }' > \${bin}.ctglst - seqkit grep -f \${bin}.ctglst ${contigs} > \${bin}.fa + seqkit grep -f \${bin}.ctglst ${contigs} | gzip > \${bin}.fa.gz done cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/fastatocontig2bin/main.nf b/modules/local/fastatocontig2bin/main.nf index 0fa29c1..bad452e 100644 --- a/modules/local/fastatocontig2bin/main.nf +++ b/modules/local/fastatocontig2bin/main.nf @@ -16,19 +16,28 @@ process FASTATOCONTIG2BIN { path("versions.yml") , emit: versions script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def compressed_bins = bins.findAll { it.getExtension() == "gz" } + def decompress_bins = compressed_bins.size() > 0 ? "gunzip ${compressed_bins.join(" ")}" : "" + def clean_bins = bins.collect { it.toString() - ~/\.gz$/ } + def remove_compressed = compressed_bins.size() > 0 ? "rm ${compressed_bins.collect { it.toString() - ~/\.gz$/ }.join(" ")}" : "" """ + ${decompress_bins} + awk \\ 'BEGIN { OFS = "\t" } BEGINFILE { - cmd=sprintf("basename %s .%s", FILENAME, "${extension}") - cmd | getline bin + bin = FILENAME + sub(".*/", "", bin) + sub(/\\.[^\\.]+\$/, "", bin) } /^>/ { sub(/>/, "", \$1) - print \$1,bin - }' ${bins} > ${prefix}.tsv + print \$1, bin + }' ${clean_bins.join(" ")} > ${prefix}.tsv + + ${remove_compressed} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/metator/pipeline/main.nf b/modules/local/metator/pipeline/main.nf index e73bc82..94dc467 100644 --- a/modules/local/metator/pipeline/main.nf +++ b/modules/local/metator/pipeline/main.nf @@ -10,10 +10,10 @@ process METATOR_PIPELINE { val hic_enzymes output: - tuple val(meta), path("bin_summary.txt") , emit: bin_summary - tuple val(meta), path("binning.txt") , emit: contig2bin - tuple val(meta), path("bins/*.fa"), emit: bins - path "versions.yml" , emit: versions + tuple val(meta), path("bin_summary.txt") , emit: bin_summary + tuple val(meta), path("binning.txt") , emit: contig2bin + tuple val(meta), path("bins/*.fa.gz") , emit: bins + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -46,6 +46,8 @@ process METATOR_PIPELINE { awk -F" " '{if(\$1~">"){ print \$1 } else { print \$0 } }' \$bin > bins/\${binname} done + gzip bins/*.fa + rm -r final_bin_unscaffold cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/barrnap/environment.yml b/modules/nf-core/barrnap/environment.yml new file mode 100644 index 0000000..3a920f2 --- /dev/null +++ b/modules/nf-core/barrnap/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::barrnap=0.9 diff --git a/modules/nf-core/barrnap/main.nf b/modules/nf-core/barrnap/main.nf new file mode 100644 index 0000000..b99573d --- /dev/null +++ b/modules/nf-core/barrnap/main.nf @@ -0,0 +1,56 @@ +process BARRNAP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/barrnap:0.9--hdfd78af_4': + 'biocontainers/barrnap:0.9--hdfd78af_4' }" + + input: + tuple val(meta), path(fasta), val(dbname) + + output: + tuple val(meta), path("*.gff"), emit: gff + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + db = dbname ? "${dbname}" : 'bac' + input = fasta =~ /\.gz$/ ? fasta.name.take(fasta.name.lastIndexOf('.')) : fasta + gunzip = fasta =~ /\.gz$/ ? "gunzip -c ${fasta} > ${input}" : "" + + """ + $gunzip + + barrnap \\ + $args \\ + --threads $task.cpus \\ + --kingdom $db \\ + $input \\ + > ${prefix}_${db}.gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + barrnap: \$(echo \$(barrnap --version 2>&1) | sed 's/barrnap//; s/Using.*\$//' ) + END_VERSIONS + + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + db = dbname ? "${dbname}" : 'bac' + """ + touch ${prefix}_${db}.gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + barrnap: \$(echo \$(barrnap --version 2>&1) | sed 's/barrnap//; s/Using.*\$//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/barrnap/meta.yml b/modules/nf-core/barrnap/meta.yml new file mode 100644 index 0000000..7464c52 --- /dev/null +++ b/modules/nf-core/barrnap/meta.yml @@ -0,0 +1,49 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "barrnap" +description: barrnap uses a hmmer profile to find rrnas in reads or contig fasta files +keywords: + - rrna + - sequences + - removal +tools: + - "barrnap": + description: "Barrnap predicts the location of ribosomal RNA genes in genomes + (bacteria, archaea, metazoan mitochondria and eukaryotes)." + homepage: "https://github.com/tseemann/barrnap" + documentation: "https://github.com/tseemann/barrnap" + tool_dev_url: "https://github.com/tseemann/barrnap" + licence: ["GPL v3"] + identifier: biotools:barrnap +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: fasta file + pattern: "*.fasta" + - dbname: + type: string + description: database to use(bacteria, archaea, eukaryota, metazoan mitochondria) +output: + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.gff": + type: file + description: gff file containing coordinates of genes + pattern: "*.gff" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@danilodileo" +maintainers: + - "@danilodileo" diff --git a/modules/nf-core/barrnap/tests/main.nf.test b/modules/nf-core/barrnap/tests/main.nf.test new file mode 100644 index 0000000..b2d6216 --- /dev/null +++ b/modules/nf-core/barrnap/tests/main.nf.test @@ -0,0 +1,57 @@ +nextflow_process { + + name "Test Process BARRNAP" + script "../main.nf" + process "BARRNAP" + tag "modules" + tag "modules_nfcore" + tag "barrnap" + + test("barrnap") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), + "bac" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("barrnap - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), + "bac" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} + + diff --git a/modules/nf-core/barrnap/tests/main.nf.test.snap b/modules/nf-core/barrnap/tests/main.nf.test.snap new file mode 100644 index 0000000..0964a0d --- /dev/null +++ b/modules/nf-core/barrnap/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "barrnap - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_bac.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,447eafa60f76f6b84d1c41a2f5c2f76b" + ], + "gff": [ + [ + { + "id": "test", + "single_end": false + }, + "test_bac.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,447eafa60f76f6b84d1c41a2f5c2f76b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-10T11:25:35.085998" + }, + "barrnap": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_bac.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ] + ], + "1": [ + "versions.yml:md5,447eafa60f76f6b84d1c41a2f5c2f76b" + ], + "gff": [ + [ + { + "id": "test", + "single_end": false + }, + "test_bac.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ] + ], + "versions": [ + "versions.yml:md5,447eafa60f76f6b84d1c41a2f5c2f76b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-10T11:25:28.621027" + } +} \ No newline at end of file diff --git a/modules/nf-core/barrnap/tests/tags.yml b/modules/nf-core/barrnap/tests/tags.yml new file mode 100644 index 0000000..5b95110 --- /dev/null +++ b/modules/nf-core/barrnap/tests/tags.yml @@ -0,0 +1,2 @@ +barrnap: + - "modules/nf-core/barrnap/**" diff --git a/modules/nf-core/maxbin2/main.nf b/modules/nf-core/maxbin2/main.nf index 0f1d092..845c8e4 100644 --- a/modules/nf-core/maxbin2/main.nf +++ b/modules/nf-core/maxbin2/main.nf @@ -11,7 +11,7 @@ process MAXBIN2 { tuple val(meta), path(contigs), path(reads), path(abund) output: - tuple val(meta), path("*.fa") , emit: binned_fastas + tuple val(meta), path("*.fasta.gz") , emit: binned_fastas tuple val(meta), path("*.summary") , emit: summary tuple val(meta), path("*.abundance") , emit: abundance , optional: true tuple val(meta), path("*.log.gz") , emit: log @@ -47,8 +47,7 @@ process MAXBIN2 { $args \\ -out $prefix - gzip *.noclass *.tooshort *log *.marker - find . -name '*.fasta' -type f -exec sh -c 'name={}; mv \$name \${name%.fasta}.fa' \\; + gzip *.fasta *.noclass *.tooshort *log *.marker cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/maxbin2/maxbin2.diff b/modules/nf-core/maxbin2/maxbin2.diff deleted file mode 100644 index 2f0b337..0000000 --- a/modules/nf-core/maxbin2/maxbin2.diff +++ /dev/null @@ -1,29 +0,0 @@ -Changes in module 'nf-core/maxbin2' -'modules/nf-core/maxbin2/environment.yml' is unchanged -Changes in 'maxbin2/main.nf': ---- modules/nf-core/maxbin2/main.nf -+++ modules/nf-core/maxbin2/main.nf -@@ -11,7 +11,7 @@ - tuple val(meta), path(contigs), path(reads), path(abund) - - output: -- tuple val(meta), path("*.fasta.gz") , emit: binned_fastas -+ tuple val(meta), path("*.fa") , emit: binned_fastas - tuple val(meta), path("*.summary") , emit: summary - tuple val(meta), path("*.abundance") , emit: abundance , optional: true - tuple val(meta), path("*.log.gz") , emit: log -@@ -47,7 +47,8 @@ - $args \\ - -out $prefix - -- gzip *.fasta *.noclass *.tooshort *log *.marker -+ gzip *.noclass *.tooshort *log *.marker -+ find . -name '*.fasta' -type f -exec sh -c 'name={}; mv \$name \${name%.fasta}.fa' \\; - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - -'modules/nf-core/maxbin2/meta.yml' is unchanged -'modules/nf-core/maxbin2/tests/main.nf.test' is unchanged -'modules/nf-core/maxbin2/tests/main.nf.test.snap' is unchanged -************************************************************ diff --git a/modules/nf-core/metabat2/metabat2/main.nf b/modules/nf-core/metabat2/metabat2/main.nf index 3478d30..c161b8d 100644 --- a/modules/nf-core/metabat2/metabat2/main.nf +++ b/modules/nf-core/metabat2/metabat2/main.nf @@ -15,7 +15,7 @@ process METABAT2_METABAT2 { tuple val(meta), path("*.lowDepth.fa.gz") , optional:true, emit: lowdepth tuple val(meta), path("*.unbinned.fa.gz") , optional:true, emit: unbinned tuple val(meta), path("*.tsv.gz") , optional:true, emit: membership - tuple val(meta), path("*[!lowDepth|tooShort|unbinned].fa") , optional:true, emit: fasta + tuple val(meta), path("*[!lowDepth|tooShort|unbinned].fa.gz"), optional:true, emit: fasta path "versions.yml" , emit: versions when: @@ -35,7 +35,7 @@ process METABAT2_METABAT2 { -o ${prefix} gzip -cn ${prefix} > ${prefix}.tsv.gz - ## find . -name "*.fa" -type f | xargs -t -n 1 bgzip -@ ${task.cpus} + find . -name "*.fa" -type f | xargs -t -n 1 bgzip -@ ${task.cpus} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/metabat2/metabat2/metabat2-metabat2.diff b/modules/nf-core/metabat2/metabat2/metabat2-metabat2.diff index fa2aaec..6f53b8a 100644 --- a/modules/nf-core/metabat2/metabat2/metabat2-metabat2.diff +++ b/modules/nf-core/metabat2/metabat2/metabat2-metabat2.diff @@ -1,16 +1,9 @@ Changes in module 'nf-core/metabat2/metabat2' +'modules/nf-core/metabat2/metabat2/environment.yml' is unchanged +'modules/nf-core/metabat2/metabat2/meta.yml' is unchanged Changes in 'metabat2/metabat2/main.nf': --- modules/nf-core/metabat2/metabat2/main.nf +++ modules/nf-core/metabat2/metabat2/main.nf -@@ -15,7 +15,7 @@ - tuple val(meta), path("*.lowDepth.fa.gz") , optional:true, emit: lowdepth - tuple val(meta), path("*.unbinned.fa.gz") , optional:true, emit: unbinned - tuple val(meta), path("*.tsv.gz") , optional:true, emit: membership -- tuple val(meta), path("*[!lowDepth|tooShort|unbinned].fa.gz"), optional:true, emit: fasta -+ tuple val(meta), path("*[!lowDepth|tooShort|unbinned].fa") , optional:true, emit: fasta - path "versions.yml" , emit: versions - - when: @@ -24,11 +24,8 @@ script: def args = task.ext.args ?: '' @@ -24,19 +17,8 @@ Changes in 'metabat2/metabat2/main.nf': metabat2 \\ $args \\ -i $fasta \\ -@@ -38,7 +35,7 @@ - -o ${prefix} - - gzip -cn ${prefix} > ${prefix}.tsv.gz -- find . -name "*.fa" -type f | xargs -t -n 1 bgzip -@ ${task.cpus} -+ ## find . -name "*.fa" -type f | xargs -t -n 1 bgzip -@ ${task.cpus} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": -'modules/nf-core/metabat2/metabat2/meta.yml' is unchanged -'modules/nf-core/metabat2/metabat2/environment.yml' is unchanged -'modules/nf-core/metabat2/metabat2/tests/main.nf.test.snap' is unchanged 'modules/nf-core/metabat2/metabat2/tests/nextflow.config' is unchanged 'modules/nf-core/metabat2/metabat2/tests/main.nf.test' is unchanged +'modules/nf-core/metabat2/metabat2/tests/main.nf.test.snap' is unchanged ************************************************************ diff --git a/modules/nf-core/metamdbg/asm/environment.yml b/modules/nf-core/metamdbg/asm/environment.yml index 854b9ef..4c1cd35 100644 --- a/modules/nf-core/metamdbg/asm/environment.yml +++ b/modules/nf-core/metamdbg/asm/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - "bioconda::metamdbg=1.0" + - "bioconda::metamdbg=1.1" diff --git a/modules/nf-core/metamdbg/asm/main.nf b/modules/nf-core/metamdbg/asm/main.nf index 426ca3d..f421c90 100644 --- a/modules/nf-core/metamdbg/asm/main.nf +++ b/modules/nf-core/metamdbg/asm/main.nf @@ -4,8 +4,8 @@ process METAMDBG_ASM { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/metamdbg:1.0--hdcf5f25_1': - 'biocontainers/metamdbg:1.0--hdcf5f25_1' }" + 'https://depot.galaxyproject.org/singularity/metamdbg:1.1--h077b44d_1': + 'biocontainers/metamdbg:1.1--h077b44d_1' }" input: tuple val(meta), path(reads) diff --git a/modules/nf-core/metamdbg/asm/tests/main.nf.test.snap b/modules/nf-core/metamdbg/asm/tests/main.nf.test.snap index 1b48053..12a192d 100644 --- a/modules/nf-core/metamdbg/asm/tests/main.nf.test.snap +++ b/modules/nf-core/metamdbg/asm/tests/main.nf.test.snap @@ -4,39 +4,39 @@ "test.contigs.fasta.gz", "test.metaMDBG.log", [ - "versions.yml:md5,d8ff2a5fe2bb5c7eecd24ff95bf70c06" + "versions.yml:md5,7891f9a1057e30846f3f7ec4ab0c7b4b" ] ], "meta": { - "nf-test": "0.9.1", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.2" }, - "timestamp": "2024-11-05T11:42:07.219676949" + "timestamp": "2024-12-17T10:23:06.500307496" }, "metamdbg_asm - ont": { "content": [ "test.contigs.fasta.gz", "test.metaMDBG.log", [ - "versions.yml:md5,d8ff2a5fe2bb5c7eecd24ff95bf70c06" + "versions.yml:md5,7891f9a1057e30846f3f7ec4ab0c7b4b" ] ], "meta": { - "nf-test": "0.9.1", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.2" }, - "timestamp": "2024-11-05T11:41:15.996586123" + "timestamp": "2024-12-17T10:22:42.120580907" }, "stub_versions": { "content": [ [ - "versions.yml:md5,d8ff2a5fe2bb5c7eecd24ff95bf70c06" + "versions.yml:md5,7891f9a1057e30846f3f7ec4ab0c7b4b" ] ], "meta": { - "nf-test": "0.9.1", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.2" }, - "timestamp": "2024-11-05T11:42:35.946169222" + "timestamp": "2024-12-17T10:23:22.954484953" } } \ No newline at end of file diff --git a/modules/nf-core/prokka/environment.yml b/modules/nf-core/prokka/environment.yml new file mode 100644 index 0000000..7c9753f --- /dev/null +++ b/modules/nf-core/prokka/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::prokka=1.14.6 diff --git a/modules/nf-core/prokka/main.nf b/modules/nf-core/prokka/main.nf new file mode 100644 index 0000000..adfda03 --- /dev/null +++ b/modules/nf-core/prokka/main.nf @@ -0,0 +1,52 @@ +process PROKKA { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/prokka:1.14.6--pl5321hdfd78af_4' : + 'biocontainers/prokka:1.14.6--pl5321hdfd78af_4' }" + + input: + tuple val(meta), path(fasta) + path proteins + path prodigal_tf + + output: + tuple val(meta), path("${prefix}/*.gff"), emit: gff + tuple val(meta), path("${prefix}/*.gbk"), emit: gbk + tuple val(meta), path("${prefix}/*.fna"), emit: fna + tuple val(meta), path("${prefix}/*.faa"), emit: faa + tuple val(meta), path("${prefix}/*.ffn"), emit: ffn + tuple val(meta), path("${prefix}/*.sqn"), emit: sqn + tuple val(meta), path("${prefix}/*.fsa"), emit: fsa + tuple val(meta), path("${prefix}/*.tbl"), emit: tbl + tuple val(meta), path("${prefix}/*.err"), emit: err + tuple val(meta), path("${prefix}/*.log"), emit: log + tuple val(meta), path("${prefix}/*.txt"), emit: txt + tuple val(meta), path("${prefix}/*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def proteins_opt = proteins ? "--proteins ${proteins[0]}" : "" + def prodigal_tf = prodigal_tf ? "--prodigaltf ${prodigal_tf[0]}" : "" + """ + prokka \\ + $args \\ + --cpus $task.cpus \\ + --prefix $prefix \\ + $proteins_opt \\ + $prodigal_tf \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + prokka: \$(echo \$(prokka --version 2>&1) | sed 's/^.*prokka //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/prokka/meta.yml b/modules/nf-core/prokka/meta.yml new file mode 100644 index 0000000..9074573 --- /dev/null +++ b/modules/nf-core/prokka/meta.yml @@ -0,0 +1,161 @@ +name: prokka +description: Whole genome annotation of small genomes (bacterial, archeal, viral) +keywords: + - annotation + - fasta + - prokka +tools: + - prokka: + description: Rapid annotation of prokaryotic genomes + homepage: https://github.com/tseemann/prokka + doi: "10.1093/bioinformatics/btu153" + licence: ["GPL v2"] + identifier: biotools:prokka +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: | + FASTA file to be annotated. Has to contain at least a non-empty string dummy value. + - - proteins: + type: file + description: FASTA file of trusted proteins to first annotate from (optional) + - - prodigal_tf: + type: file + description: Training file to use for Prodigal (optional) +output: + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.gff: + type: file + description: annotation in GFF3 format, containing both sequences and annotations + pattern: "*.{gff}" + - gbk: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.gbk: + type: file + description: annotation in GenBank format, containing both sequences and annotations + pattern: "*.{gbk}" + - fna: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.fna: + type: file + description: nucleotide FASTA file of the input contig sequences + pattern: "*.{fna}" + - faa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.faa: + type: file + description: protein FASTA file of the translated CDS sequences + pattern: "*.{faa}" + - ffn: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.ffn: + type: file + description: nucleotide FASTA file of all the prediction transcripts (CDS, rRNA, + tRNA, tmRNA, misc_RNA) + pattern: "*.{ffn}" + - sqn: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.sqn: + type: file + description: an ASN1 format "Sequin" file for submission to Genbank + pattern: "*.{sqn}" + - fsa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.fsa: + type: file + description: nucleotide FASTA file of the input contig sequences, used by "tbl2asn" + to create the .sqn file + pattern: "*.{fsa}" + - tbl: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.tbl: + type: file + description: feature Table file, used by "tbl2asn" to create the .sqn file + pattern: "*.{tbl}" + - err: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.err: + type: file + description: unacceptable annotations - the NCBI discrepancy report. + pattern: "*.{err}" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.log: + type: file + description: contains all the output that Prokka produced during its run + pattern: "*.{log}" + - txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.txt: + type: file + description: statistics relating to the annotated features found + pattern: "*.{txt}" + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.tsv: + type: file + description: tab-separated file of all features (locus_tag,ftype,len_bp,gene,EC_number,COG,product) + pattern: "*.{tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/prokka/tests/main.nf.test b/modules/nf-core/prokka/tests/main.nf.test new file mode 100644 index 0000000..dca19bb --- /dev/null +++ b/modules/nf-core/prokka/tests/main.nf.test @@ -0,0 +1,50 @@ +nextflow_process { + + name "Test Process PROKKA" + script "../main.nf" + process "PROKKA" + + tag "modules" + tag "modules_nfcore" + tag "prokka" + + test("Prokka - sarscov2 - genome.fasta") { + + when { + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)) + ]) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.gbk.get(0).get(1)).exists() }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { assert path(process.out.sqn.get(0).get(1)).exists() }, + { assert snapshot( + process.out.gff, + process.out.fna, + process.out.faa, + process.out.ffn, + process.out.fsa, + process.out.tbl, + process.out.err, + process.out.txt, + process.out.tsv, + process.out.versions + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/prokka/tests/main.nf.test.snap b/modules/nf-core/prokka/tests/main.nf.test.snap new file mode 100644 index 0000000..874c989 --- /dev/null +++ b/modules/nf-core/prokka/tests/main.nf.test.snap @@ -0,0 +1,95 @@ +{ + "Prokka - sarscov2 - genome.fasta": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff:md5,5dbfb8fcf2db020564c16045976a0933" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna:md5,787307f29a263e5657cc276ebbf7e2b3" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa:md5,a4ceda83262b3c222a6b1f508fb9e24b" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.ffn:md5,80f474b5367b7ea5ed23791935f65e34" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fsa:md5,71bbefcb7f12046bcd3263f58cfd5404" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.tbl:md5,d8f816a066ced94b62d9618b13fb8add" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.err:md5,b3daedc646fddd422824e2b3e5e9229d" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,b40e485ffc8eaf1feacf8d79d9751a33" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,da7c720c3018c5081d6a70b517b7d450" + ] + ], + [ + "versions.yml:md5,e83a22fe02167e290d90853b45650db9" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T12:34:20.447734" + } +} \ No newline at end of file diff --git a/modules/nf-core/prokka/tests/tags.yml b/modules/nf-core/prokka/tests/tags.yml new file mode 100644 index 0000000..a2dc7bd --- /dev/null +++ b/modules/nf-core/prokka/tests/tags.yml @@ -0,0 +1,2 @@ +prokka: + - "modules/nf-core/prokka/**" diff --git a/subworkflows/local/bin_qc.nf b/subworkflows/local/bin_qc.nf index 28dd461..423517d 100644 --- a/subworkflows/local/bin_qc.nf +++ b/subworkflows/local/bin_qc.nf @@ -1,6 +1,7 @@ include { CHECKM2_DATABASEDOWNLOAD } from '../../modules/nf-core/checkm2/databasedownload/main' include { CHECKM2_PREDICT } from '../../modules/nf-core/checkm2/predict/main' include { SEQKIT_STATS } from '../../modules/nf-core/seqkit/stats/main' +include { PROKKA } from '../../modules/nf-core/prokka/main' workflow BIN_QC { take: @@ -9,6 +10,9 @@ workflow BIN_QC { main: ch_versions = Channel.empty() + SEQKIT_STATS(bins) + ch_versions = ch_versions.mix(SEQKIT_STATS.out.versions) + if(params.enable_checkm2) { if(!params.checkm2_local_db) { CHECKM2_DATABASEDOWNLOAD(params.checkm2_db_version) @@ -32,8 +36,9 @@ workflow BIN_QC { ch_checkm2_tsv = Channel.empty() } - SEQKIT_STATS(bins) - ch_versions = ch_versions.mix(SEQKIT_STATS.out.versions) + if(params.enable_prokka) { + + } emit: checkm2_tsv = ch_checkm2_tsv diff --git a/subworkflows/local/bin_taxonomy.nf b/subworkflows/local/bin_taxonomy.nf index 6262965..a83b988 100644 --- a/subworkflows/local/bin_taxonomy.nf +++ b/subworkflows/local/bin_taxonomy.nf @@ -30,7 +30,7 @@ workflow BIN_TAXONOMY { ch_filtered_bins = bins | transpose() - | map { meta, bin -> [bin.getBaseName(), bin, meta]} + | map { meta, bin -> [bin.getSimpleName(), bin, meta]} | join(ch_bin_scores, failOnDuplicate: true) | filter { // it[3] = completeness, it[4] = contamination it[3] >= params.gtdbtk_min_completeness && it[4] <= params.gtdbtk_max_contamination