From 5f6e0c7c05788501d61e85506a1d00efda128d1b Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Tue, 5 Nov 2024 08:42:34 +0000 Subject: [PATCH 1/3] update nextclade & pangolin run --- modules/nf-core/nextclade/run/environment.yml | 4 +- modules/nf-core/nextclade/run/main.nf | 24 +- modules/nf-core/nextclade/run/meta.yml | 163 +++++++++--- .../nf-core/nextclade/run/tests/main.nf.test | 82 ++++++ .../nextclade/run/tests/main.nf.test.snap | 245 ++++++++++++++++++ modules/nf-core/pangolin/environment.yml | 6 +- modules/nf-core/pangolin/main.nf | 6 +- modules/nf-core/pangolin/meta.yml | 37 +-- modules/nf-core/pangolin/tests/main.nf.test | 57 ++++ .../nf-core/pangolin/tests/main.nf.test.snap | 68 +++++ 10 files changed, 628 insertions(+), 64 deletions(-) create mode 100644 modules/nf-core/nextclade/run/tests/main.nf.test create mode 100644 modules/nf-core/nextclade/run/tests/main.nf.test.snap create mode 100644 modules/nf-core/pangolin/tests/main.nf.test create mode 100644 modules/nf-core/pangolin/tests/main.nf.test.snap diff --git a/modules/nf-core/nextclade/run/environment.yml b/modules/nf-core/nextclade/run/environment.yml index 1e50e8d4..09c0ba1a 100644 --- a/modules/nf-core/nextclade/run/environment.yml +++ b/modules/nf-core/nextclade/run/environment.yml @@ -1,7 +1,5 @@ -name: nextclade_run channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::nextclade=2.12.0 + - bioconda::nextclade=3.8.2 diff --git a/modules/nf-core/nextclade/run/main.nf b/modules/nf-core/nextclade/run/main.nf index 33fb34c6..cdb44437 100644 --- a/modules/nf-core/nextclade/run/main.nf +++ b/modules/nf-core/nextclade/run/main.nf @@ -4,8 +4,8 @@ process NEXTCLADE_RUN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nextclade:2.12.0--h9ee0642_0' : - 'biocontainers/nextclade:2.12.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/nextclade:3.8.2--h9ee0642_0' : + 'biocontainers/nextclade:3.8.2--h9ee0642_0' }" input: tuple val(meta), path(fasta) @@ -20,7 +20,8 @@ process NEXTCLADE_RUN { tuple val(meta), path("${prefix}.auspice.json") , optional:true, emit: json_auspice tuple val(meta), path("${prefix}.ndjson") , optional:true, emit: ndjson tuple val(meta), path("${prefix}.aligned.fasta") , optional:true, emit: fasta_aligned - tuple val(meta), path("*.translation.fasta") , optional:true, emit: fasta_translation + tuple val(meta), path("*_translation.*.fasta") , optional:true, emit: fasta_translation + tuple val(meta), path("${prefix}.nwk") , optional:true, emit: nwk path "versions.yml" , emit: versions when: @@ -44,4 +45,21 @@ process NEXTCLADE_RUN { nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//') END_VERSIONS """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.csv + touch ${prefix}.tsv + touch ${prefix}.json + touch ${prefix}.auspice.json + touch ${prefix}.aligned.fasta + touch ${prefix}.cds_translation.test.fasta + touch ${prefix}.nwk + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/nextclade/run/meta.yml b/modules/nf-core/nextclade/run/meta.yml index ceebfe20..4062b3f4 100644 --- a/modules/nf-core/nextclade/run/meta.yml +++ b/modules/nf-core/nextclade/run/meta.yml @@ -1,59 +1,146 @@ name: nextclade_run -description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation) +description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality + checks (C++ implementation) keywords: - nextclade - variant - consensus tools: - nextclade: - description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks + description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence + quality checks homepage: https://github.com/nextstrain/nextclade documentation: https://github.com/nextstrain/nextclade tool_dev_url: https://github.com/nextstrain/nextclade licence: ["MIT"] + identifier: biotools:nextclade input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - dataset: - type: path - description: Path containing the dataset files obtained by running nextclade dataset get - pattern: "*" - - fasta: - type: file - description: FASTA file containing one or more consensus sequences - pattern: "*.{fasta,fa}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file containing one or more consensus sequences + pattern: "*.{fasta,fa}" + - - dataset: + type: directory + description: Path containing the dataset files obtained by running nextclade + dataset get + pattern: "*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - csv: - type: file - description: CSV file containing nextclade results - pattern: "*.{csv}" - - json: - type: file - description: JSON file containing nextclade results - pattern: "*.{json}" - - json_tree: - type: file - description: Auspice JSON V2 containing nextclade results - pattern: "*.{tree.json}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.csv: + type: file + description: CSV file containing nextclade results + pattern: "*.{csv}" + - csv_errors: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.errors.csv: + type: file + description: CSV file containing errors from nextclade results + pattern: "*.{errors.csv}" + - csv_insertions: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.insertions.csv: + type: file + description: CSV file containing insertions from nextclade results + pattern: "*.{insertions.csv}" - tsv: - type: file - description: TSV file containing nextclade results - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.tsv: + type: file + description: TSV file containing nextclade results + pattern: "*.{tsv}" + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.json: + type: file + description: JSON file containing nextclade results + pattern: "*.{json}" + - json_auspice: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.auspice.json: + type: file + description: Auspice JSON V2 containing nextclade results + pattern: "*.{tree.json}" + - ndjson: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.ndjson: + type: file + description: newline-delimited JSON file containing nextclade results + pattern: "*.{ndjson}" + - fasta_aligned: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.aligned.fasta: + type: file + description: FASTA file containing aligned sequences from nextclade results + pattern: "*.{aligned.fasta}" + - fasta_translation: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_translation.*.fasta": + type: file + description: FASTA file containing aligned peptides from nextclade results + pattern: "*.{_translation.}*.{fasta}" + - nwk: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.nwk: + type: file + description: NWK file containing nextclade results + pattern: "*.{nwk}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@antunderwood" - "@drpatelh" maintainers: - "@antunderwood" - "@drpatelh" + - "@drpatelh" +updated on 2024.08.23: + - "@nmshahir" diff --git a/modules/nf-core/nextclade/run/tests/main.nf.test b/modules/nf-core/nextclade/run/tests/main.nf.test new file mode 100644 index 00000000..b486a3fe --- /dev/null +++ b/modules/nf-core/nextclade/run/tests/main.nf.test @@ -0,0 +1,82 @@ + +nextflow_process { + + name "Test Process NEXTCLADE_RUN" + script "../main.nf" + process "NEXTCLADE_RUN" + + tag "modules" + tag "modules_nfcore" + tag "nextclade" + tag "nextclade/datasetget" + tag "nextclade/run" + + setup { + run("NEXTCLADE_DATASETGET") { + script "../../datasetget/main.nf" + process { + """ + input[0] = 'nextstrain/sars-cov-2/wuhan-hu-1/orfs' + input[1] = '2024-01-16--20-31-02Z' + + """ + } + } + } + + test("sarscov2 default") { + + when { + process { + """ + input[0] = [[id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = NEXTCLADE_DATASETGET.out.dataset + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.csv, + process.out.csv_errors, + process.out.csv_insertions, + process.out.fasta_aligned, + process.out.fasta_translation, + file(process.out.json[0][1]).readLines()[4..10], + process.out.json_auspice, + process.out.ndjson, + process.out.nwk, + process.out.tsv, + process.out.versions + ).match() } + ) + } + } + + test("sarscov2 default-stub") { + options '-stub' + + when { + process { + """ + input[0] = [[id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = NEXTCLADE_DATASETGET.out.dataset + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/nextclade/run/tests/main.nf.test.snap b/modules/nf-core/nextclade/run/tests/main.nf.test.snap new file mode 100644 index 00000000..9294ed3a --- /dev/null +++ b/modules/nf-core/nextclade/run/tests/main.nf.test.snap @@ -0,0 +1,245 @@ +{ + "sarscov2 default-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "10": [ + "versions.yml:md5,f279e9049492abc589365716afc17d78" + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.auspice.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + + ], + "7": [ + [ + { + "id": "test" + }, + "test.aligned.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test" + }, + "test.cds_translation.test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test" + }, + "test.nwk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csv": [ + [ + { + "id": "test" + }, + "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csv_errors": [ + + ], + "csv_insertions": [ + + ], + "fasta_aligned": [ + [ + { + "id": "test" + }, + "test.aligned.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fasta_translation": [ + [ + { + "id": "test" + }, + "test.cds_translation.test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test" + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json_auspice": [ + [ + { + "id": "test" + }, + "test.auspice.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ndjson": [ + + ], + "nwk": [ + [ + { + "id": "test" + }, + "test.nwk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f279e9049492abc589365716afc17d78" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-07T07:21:00.853787859" + }, + "sarscov2 default": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.csv:md5,59667a74af31daf6151b809cb4645942" + ] + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test" + }, + "test.aligned.fasta:md5,1bf54662837b0df37f1857c7fa631225" + ] + ], + [ + [ + { + "id": "test" + }, + [ + "test.cds_translation.E.fasta:md5,1a6d93bd7abfeb193476a86950f07202", + "test.cds_translation.M.fasta:md5,6f79ab0742c078fcd9d2a474518c7022", + "test.cds_translation.N.fasta:md5,bef9912f101777bfff36225d7e5c3c1f", + "test.cds_translation.ORF1a.fasta:md5,4dc0e2eb1f2a61939ba49500ee3fa41a", + "test.cds_translation.ORF1b.fasta:md5,9c2e83d26161b5c887ff51cd64bd15bb", + "test.cds_translation.ORF3a.fasta:md5,adcbede4ebc2cac7af755a9d29d28ea3", + "test.cds_translation.ORF6.fasta:md5,3785b34cce978c95256f83db6ee82af0", + "test.cds_translation.ORF7a.fasta:md5,6ede1acb9e75afc84aa30bbc40551d37", + "test.cds_translation.ORF7b.fasta:md5,460e4cbc5f8c632c2bc9a8aedad5cf43", + "test.cds_translation.ORF8.fasta:md5,c733c88e61b29542664368fbf6dd4c76", + "test.cds_translation.ORF9b.fasta:md5,0aa13afc6cbf445fc92caa2e6c0a7548", + "test.cds_translation.S.fasta:md5,77740927a3f00b7e5bfac392fa6d264c" + ] + ] + ], + [ + " \"cladeNodeAttrKeys\": [", + " {", + " \"name\": \"Nextclade_pango\",", + " \"displayName\": \"Pango lineage (Nextclade)\",", + " \"description\": \"Pango lineage as inferred by Nextclade from the nearest neighbour in the reference tree. 98% accurate for recent sequences, for higher accuracy use dedicated pangolin software in UShER or pangoLEARN mode. Recombinants may get (wrongly) assigned to a designated recombinant lineage if they have similar breakpoints.\",", + " \"hideInWeb\": false,", + " \"skipAsReference\": false" + ], + [ + [ + { + "id": "test" + }, + "test.auspice.json:md5,84f57eb4611ffdf73eefb855819acb1d" + ] + ], + [ + [ + { + "id": "test" + }, + "test.ndjson:md5,946c0a0038ae937dab725d08a67c3c64" + ] + ], + [ + [ + { + "id": "test" + }, + "test.nwk:md5,70e9847b5fd44172e21323fe95ea57f8" + ] + ], + [ + [ + { + "id": "test" + }, + "test.tsv:md5,165384807e9a323da6bd15fc7f92420d" + ] + ], + [ + "versions.yml:md5,f279e9049492abc589365716afc17d78" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-07T07:20:48.066964467" + } +} \ No newline at end of file diff --git a/modules/nf-core/pangolin/environment.yml b/modules/nf-core/pangolin/environment.yml index 3c4d98c8..d4379368 100644 --- a/modules/nf-core/pangolin/environment.yml +++ b/modules/nf-core/pangolin/environment.yml @@ -1,7 +1,7 @@ -name: pangolin channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::pangolin=4.2 + - bioconda::pangolin-data=1.30 + - bioconda::pangolin=4.3 + - bioconda::snakemake=7.30.1 diff --git a/modules/nf-core/pangolin/main.nf b/modules/nf-core/pangolin/main.nf index 00cf4290..08200de2 100644 --- a/modules/nf-core/pangolin/main.nf +++ b/modules/nf-core/pangolin/main.nf @@ -4,8 +4,8 @@ process PANGOLIN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pangolin:4.2--pyhdfd78af_1' : - 'biocontainers/pangolin:4.2--pyhdfd78af_1' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/bb/bb7bac48e43a9cd6274e1f99c761a5785b74f6d8a55313ee634aaffbe87c1869/data' : + 'community.wave.seqera.io/library/pangolin-data_pangolin_snakemake:5bbc297f7502ff33' }" input: tuple val(meta), path(fasta) @@ -21,6 +21,8 @@ process PANGOLIN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ + export XDG_CACHE_HOME=/tmp/.cache + pangolin \\ $fasta\\ --outfile ${prefix}.pangolin.csv \\ diff --git a/modules/nf-core/pangolin/meta.yml b/modules/nf-core/pangolin/meta.yml index 6493f2c8..78713c13 100644 --- a/modules/nf-core/pangolin/meta.yml +++ b/modules/nf-core/pangolin/meta.yml @@ -5,30 +5,37 @@ keywords: - pangolin - lineage tools: - - star: + - pangolin: description: | Phylogenetic Assignment of Named Global Outbreak LINeages homepage: https://github.com/cov-lineages/pangolin#pangolearn-description manual: https://github.com/cov-lineages/pangolin#pangolearn-description licence: ["GPL-3.0-or-later"] + identifier: biotools:pangolin_cov-lineages input: - - meta: - type: map - description: | - Groovy Map containing sample information - - fasta: - type: file - description: | - The genome assembly to be evaluated + - - meta: + type: map + description: | + Groovy Map containing sample information + - fasta: + type: file + description: | + The genome assembly to be evaluated output: - report: - type: file - description: Pangolin lineage report - pattern: "*.{csv}" + - meta: + type: map + description: | + Groovy Map containing sample information + - "*.csv": + type: file + description: Pangolin lineage report + pattern: "*.{csv}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@kevinmenden" - "@drpatelh" diff --git a/modules/nf-core/pangolin/tests/main.nf.test b/modules/nf-core/pangolin/tests/main.nf.test new file mode 100644 index 00000000..1ee097e3 --- /dev/null +++ b/modules/nf-core/pangolin/tests/main.nf.test @@ -0,0 +1,57 @@ +nextflow_process { + + name "Test Process PANGOLIN" + script "../main.nf" + process "PANGOLIN" + + tag "modules" + tag "modules_nfcore" + tag "pangolin" + + test("sarscov2 genome [fasta]") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 genome [fasta] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/pangolin/tests/main.nf.test.snap b/modules/nf-core/pangolin/tests/main.nf.test.snap new file mode 100644 index 00000000..681a4783 --- /dev/null +++ b/modules/nf-core/pangolin/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 genome [fasta] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.pangolin.csv:md5,2cc701567cf37bad5c6574c29aa595d4" + ] + ], + "1": [ + "versions.yml:md5,65ec45a19faa92a922073b9b08d90a8a" + ], + "report": [ + [ + { + "id": "test" + }, + "test.pangolin.csv:md5,2cc701567cf37bad5c6574c29aa595d4" + ] + ], + "versions": [ + "versions.yml:md5,65ec45a19faa92a922073b9b08d90a8a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:33:48.797026222" + }, + "sarscov2 genome [fasta]": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.pangolin.csv:md5,2cc701567cf37bad5c6574c29aa595d4" + ] + ], + "1": [ + "versions.yml:md5,65ec45a19faa92a922073b9b08d90a8a" + ], + "report": [ + [ + { + "id": "test" + }, + "test.pangolin.csv:md5,2cc701567cf37bad5c6574c29aa595d4" + ] + ], + "versions": [ + "versions.yml:md5,65ec45a19faa92a922073b9b08d90a8a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:33:17.535254436" + } +} \ No newline at end of file From b88cdcf27683539a1f3ca25eeb017dab5ca5a81a Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Tue, 5 Nov 2024 08:59:11 +0000 Subject: [PATCH 2/3] update changelog & remove nextclade_dataset_reference param --- CHANGELOG.md | 42 ++++++++------- main.nf | 3 -- modules.json | 6 +-- .../nextclade/datasetget/environment.yml | 4 +- modules/nf-core/nextclade/datasetget/main.nf | 26 +++++++-- modules/nf-core/nextclade/datasetget/meta.yml | 46 ++++++++-------- .../nextclade/datasetget/tests/main.nf.test | 53 +++++++++++++++++++ .../datasetget/tests/main.nf.test.snap | 41 ++++++++++++++ nextflow_schema.json | 5 -- subworkflows/local/prepare_genome_illumina.nf | 2 - subworkflows/local/prepare_genome_nanopore.nf | 2 - workflows/illumina.nf | 2 - workflows/nanopore.nf | 2 - 13 files changed, 167 insertions(+), 67 deletions(-) create mode 100644 modules/nf-core/nextclade/datasetget/tests/main.nf.test create mode 100644 modules/nf-core/nextclade/datasetget/tests/main.nf.test.snap diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d2a5f6e..eb3990ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,22 +29,24 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements - [[PR #438](https://github.com/nf-core/viralrecon/pull/438)] - Update fastp container to 0.23.4 - [[PR #439](https://github.com/nf-core/viralrecon/pull/439)] - Fix cardinality issue when using `--bowtie2_index` - [[PR #435](https://github.com/nf-core/viralrecon/pull/435)] - Changed to a patched cutadapt from nf-core modules, added `skip_noninternal_primers` param to allow users to process primers inside the pipeline, and added `threeprime_adapters` to determine whether primers are 3' or 5' adapters. +- [[PR #446](https://github.com/nf-core/viralrecon/pull/446)] - Update nextclade & pangolin modules ### Parameters -| Old parameter | New parameter | -| ------------- | ---------------------------- | -| | `--skip_freyja` | -| | `--freyja_repeats` | -| | `--freyja_db_name` | -| | `--freyja_barcodes` | -| | `--freyja_lineages` | -| | `--skip_freyja_boot` | -| | `--additional_annotation` | -| | `--min_contig_length` | -| | `--min_perc_contig_aligned` | -| | `--skip_noninternal_primers` | -| | `--threeprime_adapters` | +| Old parameter | New parameter | +| ------------------------------- | ---------------------------- | +| | `--skip_freyja` | +| | `--freyja_repeats` | +| | `--freyja_db_name` | +| | `--freyja_barcodes` | +| | `--freyja_lineages` | +| | `--skip_freyja_boot` | +| | `--additional_annotation` | +| | `--min_contig_length` | +| | `--min_perc_contig_aligned` | +| | `--skip_noninternal_primers` | +| | `--threeprime_adapters` | +| `--nextclade_dataset_reference` | | > **NB:** Parameter has been **updated** if both old and new parameter information is present. > **NB:** Parameter has been **added** if just the new parameter information is present. @@ -54,12 +56,14 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. -| Dependency | Old version | New version | -| ---------- | ----------- | ----------- | -| `cutadapt` | | 4.6 | -| `fastp` | 0.23.2 | 0.23.4 | -| `freyja` | | 1.5.0 | -| `multiqc` | 1.14 | 1.19 | +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| `cutadapt` | | 4.6 | +| `fastp` | 0.23.2 | 0.23.4 | +| `freyja` | | 1.5.0 | +| `multiqc` | 1.14 | 1.19 | +| `nextclade` | 2.12.0 | 3.8.2 | +| `pangolin` | 4.2 | 4.3 | > **NB:** Dependency has been **updated** if both old and new version information is present. > diff --git a/main.nf b/main.nf index 96ae5c1f..74710ca8 100644 --- a/main.nf +++ b/main.nf @@ -33,7 +33,6 @@ params.primer_bed = getGenomeAttribute('primer_bed', primer_set, primer_set_v params.nextclade_dataset = getGenomeAttribute('nextclade_dataset') params.nextclade_dataset_name = getGenomeAttribute('nextclade_dataset_name') -params.nextclade_dataset_reference = getGenomeAttribute('nextclade_dataset_reference') params.nextclade_dataset_tag = getGenomeAttribute('nextclade_dataset_tag') @@ -86,7 +85,6 @@ workflow NFCORE_VIRALRECON { params.bowtie2_index, params.nextclade_dataset, params.nextclade_dataset_name, - params.nextclade_dataset_reference, params.nextclade_dataset_tag ) @@ -102,7 +100,6 @@ workflow NFCORE_VIRALRECON { params.bowtie2_index, params.nextclade_dataset, params.nextclade_dataset_name, - params.nextclade_dataset_reference, params.nextclade_dataset_tag ) diff --git a/modules.json b/modules.json index fb68a8c8..b6aaa67a 100644 --- a/modules.json +++ b/modules.json @@ -182,17 +182,17 @@ }, "nextclade/datasetget": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "nextclade/run": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "pangolin": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "9530ba667bb6c809d998fb9bd567bb9514cb23e5", "installed_by": ["modules"] }, "picard/collectmultiplemetrics": { diff --git a/modules/nf-core/nextclade/datasetget/environment.yml b/modules/nf-core/nextclade/datasetget/environment.yml index cdd9f646..09c0ba1a 100644 --- a/modules/nf-core/nextclade/datasetget/environment.yml +++ b/modules/nf-core/nextclade/datasetget/environment.yml @@ -1,7 +1,5 @@ -name: nextclade_datasetget channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::nextclade=2.12.0 + - bioconda::nextclade=3.8.2 diff --git a/modules/nf-core/nextclade/datasetget/main.nf b/modules/nf-core/nextclade/datasetget/main.nf index 70c900a5..4f878381 100644 --- a/modules/nf-core/nextclade/datasetget/main.nf +++ b/modules/nf-core/nextclade/datasetget/main.nf @@ -4,12 +4,11 @@ process NEXTCLADE_DATASETGET { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nextclade:2.12.0--h9ee0642_0' : - 'biocontainers/nextclade:2.12.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/nextclade:3.8.2--h9ee0642_0' : + 'biocontainers/nextclade:3.8.2--h9ee0642_0' }" input: val dataset - val reference val tag output: @@ -22,7 +21,6 @@ process NEXTCLADE_DATASETGET { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${dataset}" - def fasta = reference ? "--reference ${reference}" : '' def version = tag ? "--tag ${tag}" : '' """ nextclade \\ @@ -30,7 +28,6 @@ process NEXTCLADE_DATASETGET { get \\ $args \\ --name $dataset \\ - $fasta \\ $version \\ --output-dir $prefix @@ -39,4 +36,23 @@ process NEXTCLADE_DATASETGET { nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//') END_VERSIONS """ + + stub: + prefix = task.ext.prefix ?: "${dataset}" + """ + mkdir -p ${prefix} + touch ${prefix}/CHANGELOG.md + touch ${prefix}/README.md + touch ${prefix}/genome_annotation.gff3 + touch ${prefix}/pathogen.json + touch ${prefix}/reference.fasta + touch ${prefix}/sequences.fasta + touch ${prefix}/tree.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//') + END_VERSIONS + """ + } diff --git a/modules/nf-core/nextclade/datasetget/meta.yml b/modules/nf-core/nextclade/datasetget/meta.yml index f3fb403e..c60721da 100644 --- a/modules/nf-core/nextclade/datasetget/meta.yml +++ b/modules/nf-core/nextclade/datasetget/meta.yml @@ -1,41 +1,45 @@ name: nextclade_datasetget -description: Get dataset for SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation) +description: Get dataset for SARS-CoV-2 genome clade assignment, mutation calling, + and sequence quality checks (C++ implementation) keywords: - nextclade - variant - consensus tools: - nextclade: - description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks + description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence + quality checks homepage: https://github.com/nextstrain/nextclade documentation: https://github.com/nextstrain/nextclade tool_dev_url: https://github.com/nextstrain/nextclade licence: ["MIT"] + identifier: biotools:nextclade input: - - dataset: - type: string - description: Name of dataset to retrieve. A list of available datasets can be obtained using the nextclade dataset list command. - pattern: ".+" - - reference: - type: string - description: Accession id to download dataset based on a particular reference sequence. A list of available datasets can be obtained using the nextclade dataset list command. - pattern: ".+" - - tag: - type: string - description: Version tag of the dataset to download. A list of available datasets can be obtained using the nextclade dataset list command. - pattern: ".+" + - - dataset: + type: string + description: Name of dataset to retrieve. A list of available datasets can be + obtained using the nextclade dataset list command. + pattern: ".+" + - - tag: + type: string + description: Version tag of the dataset to download. A list of available datasets + can be obtained using the nextclade dataset list command. + pattern: ".+" output: + - dataset: + - $prefix: + type: directory + description: Directory containing the dataset - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - prefix: - type: path - description: A directory containing the dataset files needed for nextclade run - pattern: "prefix" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@antunderwood" - "@drpatelh" maintainers: - "@antunderwood" - "@drpatelh" +updated on 2024.08.27: + - "@nmshahir" diff --git a/modules/nf-core/nextclade/datasetget/tests/main.nf.test b/modules/nf-core/nextclade/datasetget/tests/main.nf.test new file mode 100644 index 00000000..d7eb12b7 --- /dev/null +++ b/modules/nf-core/nextclade/datasetget/tests/main.nf.test @@ -0,0 +1,53 @@ + +nextflow_process { + + name "Test Process NEXTCLADE_DATASETGET" + script "../main.nf" + process "NEXTCLADE_DATASETGET" + + tag "modules" + tag "modules_nfcore" + tag "nextclade" + tag "nextclade/datasetget" + + test("species-tag") { + + when { + process { + """ + input[0] = 'nextstrain/sars-cov-2/wuhan-hu-1/orfs' + input[1] = '2024-01-16--20-31-02Z' + + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + + test("species-tag-stub") { + options '-stub' + + when { + process { + """ + input[0] = 'nextstrain/sars-cov-2/wuhan-hu-1/orfs' + input[1] = '2024-01-16--20-31-02Z' + + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/nextclade/datasetget/tests/main.nf.test.snap b/modules/nf-core/nextclade/datasetget/tests/main.nf.test.snap new file mode 100644 index 00000000..8de6fd56 --- /dev/null +++ b/modules/nf-core/nextclade/datasetget/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "species-tag": { + "content": [ + { + "0": [ + [ + "CHANGELOG.md:md5,ebbe8be5a3c378ed903c1afb4d8c441d", + "README.md:md5,c69387d632361334f0d7c9b66065f947", + "genome_annotation.gff3:md5,4dff84d2d6ada820e0e3a8bc6798d402", + "pathogen.json:md5,db5bbec52359c1e168ffc5e6dc0ea32a", + "reference.fasta:md5,c7ce05f28e4ec0322c96f24e064ef55c", + "sequences.fasta:md5,c2a4d6cbb837dce22d81f9c36dd0629e", + "tree.json:md5,e180607cd34a6cb6bab101d295f6cedf" + ] + ], + "1": [ + "versions.yml:md5,8c64a653330d1c5cb58ee1b8363b22d3" + ], + "dataset": [ + [ + "CHANGELOG.md:md5,ebbe8be5a3c378ed903c1afb4d8c441d", + "README.md:md5,c69387d632361334f0d7c9b66065f947", + "genome_annotation.gff3:md5,4dff84d2d6ada820e0e3a8bc6798d402", + "pathogen.json:md5,db5bbec52359c1e168ffc5e6dc0ea32a", + "reference.fasta:md5,c7ce05f28e4ec0322c96f24e064ef55c", + "sequences.fasta:md5,c2a4d6cbb837dce22d81f9c36dd0629e", + "tree.json:md5,e180607cd34a6cb6bab101d295f6cedf" + ] + ], + "versions": [ + "versions.yml:md5,8c64a653330d1c5cb58ee1b8363b22d3" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-27T19:55:42.211731791" + } +} \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index b0ce2305..232ac373 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -243,11 +243,6 @@ "description": "Name of Nextclade dataset to retrieve. A list of available datasets can be obtained using the 'nextclade dataset list' command.", "fa_icon": "fas fa-project-diagram" }, - "nextclade_dataset_reference": { - "type": "string", - "description": "Accession id to download dataset based on a particular reference sequence. A list of available datasets can be obtained using the 'nextclade dataset list' command.", - "fa_icon": "fas fa-project-diagram" - }, "nextclade_dataset_tag": { "type": "string", "description": "Version tag of the dataset to download. A list of available datasets can be obtained using the 'nextclade dataset list' command.", diff --git a/subworkflows/local/prepare_genome_illumina.nf b/subworkflows/local/prepare_genome_illumina.nf index efc79b39..997de413 100644 --- a/subworkflows/local/prepare_genome_illumina.nf +++ b/subworkflows/local/prepare_genome_illumina.nf @@ -28,7 +28,6 @@ workflow PREPARE_GENOME { bowtie2_index nextclade_dataset nextclade_dataset_name - nextclade_dataset_reference nextclade_dataset_tag @@ -192,7 +191,6 @@ workflow PREPARE_GENOME { } else if (nextclade_dataset_name) { NEXTCLADE_DATASETGET ( nextclade_dataset_name, - nextclade_dataset_reference, nextclade_dataset_tag ) ch_nextclade_db = NEXTCLADE_DATASETGET.out.dataset diff --git a/subworkflows/local/prepare_genome_nanopore.nf b/subworkflows/local/prepare_genome_nanopore.nf index 77a30645..fd6e9456 100644 --- a/subworkflows/local/prepare_genome_nanopore.nf +++ b/subworkflows/local/prepare_genome_nanopore.nf @@ -20,7 +20,6 @@ workflow PREPARE_GENOME { bowtie2_index nextclade_dataset nextclade_dataset_name - nextclade_dataset_reference nextclade_dataset_tag main: @@ -114,7 +113,6 @@ workflow PREPARE_GENOME { } else if (nextclade_dataset_name) { NEXTCLADE_DATASETGET ( nextclade_dataset_name, - nextclade_dataset_reference, nextclade_dataset_tag ) ch_nextclade_db = NEXTCLADE_DATASETGET.out.dataset diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 668e7c09..e93e02d0 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -130,7 +130,6 @@ workflow ILLUMINA { ch_bowtie2_index ch_nextclade_dataset ch_nextclade_dataset_name - ch_nextclade_dataset_reference ch_nextclade_dataset_tag main: @@ -148,7 +147,6 @@ workflow ILLUMINA { ch_bowtie2_index, ch_nextclade_dataset, ch_nextclade_dataset_name, - ch_nextclade_dataset_reference, ch_nextclade_dataset_tag ) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index 096825e8..2701ddaa 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -116,7 +116,6 @@ workflow NANOPORE { ch_bowtie2_index ch_nextclade_dataset ch_nextclade_dataset_name - ch_nextclade_dataset_reference ch_nextclade_dataset_tag main: @@ -146,7 +145,6 @@ workflow NANOPORE { ch_bowtie2_index, ch_nextclade_dataset, ch_nextclade_dataset_name, - ch_nextclade_dataset_reference, ch_nextclade_dataset_tag ) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) From 406eb40257a2a80cca9c25551cf0a9f71bf5a28c Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Wed, 6 Nov 2024 08:20:17 +0100 Subject: [PATCH 3/3] Updating getGenomaAttribute to use nextclade v3pl versions --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 74710ca8..6d1a3323 100644 --- a/main.nf +++ b/main.nf @@ -31,9 +31,9 @@ params.gff = getGenomeAttribute('gff') params.bowtie2_index = getGenomeAttribute('bowtie2') params.primer_bed = getGenomeAttribute('primer_bed', primer_set, primer_set_version) -params.nextclade_dataset = getGenomeAttribute('nextclade_dataset') +params.nextclade_dataset = getGenomeAttribute('nextclade_dataset_v3pl') params.nextclade_dataset_name = getGenomeAttribute('nextclade_dataset_name') -params.nextclade_dataset_tag = getGenomeAttribute('nextclade_dataset_tag') +params.nextclade_dataset_tag = getGenomeAttribute('nextclade_dataset_tag_v3pl') /*