From 5f6e0c7c05788501d61e85506a1d00efda128d1b Mon Sep 17 00:00:00 2001
From: Joon-Klaps <joon.klaps@kuleuven.be>
Date: Tue, 5 Nov 2024 08:42:34 +0000
Subject: [PATCH 1/3] update nextclade & pangolin run

---
 modules/nf-core/nextclade/run/environment.yml |   4 +-
 modules/nf-core/nextclade/run/main.nf         |  24 +-
 modules/nf-core/nextclade/run/meta.yml        | 163 +++++++++---
 .../nf-core/nextclade/run/tests/main.nf.test  |  82 ++++++
 .../nextclade/run/tests/main.nf.test.snap     | 245 ++++++++++++++++++
 modules/nf-core/pangolin/environment.yml      |   6 +-
 modules/nf-core/pangolin/main.nf              |   6 +-
 modules/nf-core/pangolin/meta.yml             |  37 +--
 modules/nf-core/pangolin/tests/main.nf.test   |  57 ++++
 .../nf-core/pangolin/tests/main.nf.test.snap  |  68 +++++
 10 files changed, 628 insertions(+), 64 deletions(-)
 create mode 100644 modules/nf-core/nextclade/run/tests/main.nf.test
 create mode 100644 modules/nf-core/nextclade/run/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/pangolin/tests/main.nf.test
 create mode 100644 modules/nf-core/pangolin/tests/main.nf.test.snap

diff --git a/modules/nf-core/nextclade/run/environment.yml b/modules/nf-core/nextclade/run/environment.yml
index 1e50e8d4..09c0ba1a 100644
--- a/modules/nf-core/nextclade/run/environment.yml
+++ b/modules/nf-core/nextclade/run/environment.yml
@@ -1,7 +1,5 @@
-name: nextclade_run
 channels:
   - conda-forge
   - bioconda
-  - defaults
 dependencies:
-  - bioconda::nextclade=2.12.0
+  - bioconda::nextclade=3.8.2
diff --git a/modules/nf-core/nextclade/run/main.nf b/modules/nf-core/nextclade/run/main.nf
index 33fb34c6..cdb44437 100644
--- a/modules/nf-core/nextclade/run/main.nf
+++ b/modules/nf-core/nextclade/run/main.nf
@@ -4,8 +4,8 @@ process NEXTCLADE_RUN {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/nextclade:2.12.0--h9ee0642_0' :
-        'biocontainers/nextclade:2.12.0--h9ee0642_0' }"
+        'https://depot.galaxyproject.org/singularity/nextclade:3.8.2--h9ee0642_0' :
+        'biocontainers/nextclade:3.8.2--h9ee0642_0' }"
 
     input:
     tuple val(meta), path(fasta)
@@ -20,7 +20,8 @@ process NEXTCLADE_RUN {
     tuple val(meta), path("${prefix}.auspice.json")  , optional:true, emit: json_auspice
     tuple val(meta), path("${prefix}.ndjson")        , optional:true, emit: ndjson
     tuple val(meta), path("${prefix}.aligned.fasta") , optional:true, emit: fasta_aligned
-    tuple val(meta), path("*.translation.fasta")     , optional:true, emit: fasta_translation
+    tuple val(meta), path("*_translation.*.fasta")   , optional:true, emit: fasta_translation
+    tuple val(meta), path("${prefix}.nwk")           , optional:true, emit: nwk
     path "versions.yml"                              , emit: versions
 
     when:
@@ -44,4 +45,21 @@ process NEXTCLADE_RUN {
         nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//')
     END_VERSIONS
     """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.csv
+    touch ${prefix}.tsv
+    touch ${prefix}.json
+    touch ${prefix}.auspice.json
+    touch ${prefix}.aligned.fasta
+    touch ${prefix}.cds_translation.test.fasta
+    touch ${prefix}.nwk
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//')
+    END_VERSIONS
+    """
 }
diff --git a/modules/nf-core/nextclade/run/meta.yml b/modules/nf-core/nextclade/run/meta.yml
index ceebfe20..4062b3f4 100644
--- a/modules/nf-core/nextclade/run/meta.yml
+++ b/modules/nf-core/nextclade/run/meta.yml
@@ -1,59 +1,146 @@
 name: nextclade_run
-description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation)
+description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality
+  checks (C++ implementation)
 keywords:
   - nextclade
   - variant
   - consensus
 tools:
   - nextclade:
-      description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks
+      description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence
+        quality checks
       homepage: https://github.com/nextstrain/nextclade
       documentation: https://github.com/nextstrain/nextclade
       tool_dev_url: https://github.com/nextstrain/nextclade
       licence: ["MIT"]
+      identifier: biotools:nextclade
 input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - dataset:
-      type: path
-      description: Path containing the dataset files obtained by running nextclade dataset get
-      pattern: "*"
-  - fasta:
-      type: file
-      description: FASTA file containing one or more consensus sequences
-      pattern: "*.{fasta,fa}"
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - fasta:
+        type: file
+        description: FASTA file containing one or more consensus sequences
+        pattern: "*.{fasta,fa}"
+  - - dataset:
+        type: directory
+        description: Path containing the dataset files obtained by running nextclade
+          dataset get
+        pattern: "*"
 output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
   - csv:
-      type: file
-      description: CSV file containing nextclade results
-      pattern: "*.{csv}"
-  - json:
-      type: file
-      description: JSON file containing nextclade results
-      pattern: "*.{json}"
-  - json_tree:
-      type: file
-      description: Auspice JSON V2 containing nextclade results
-      pattern: "*.{tree.json}"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - ${prefix}.csv:
+          type: file
+          description: CSV file containing nextclade results
+          pattern: "*.{csv}"
+  - csv_errors:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - ${prefix}.errors.csv:
+          type: file
+          description: CSV file containing errors from nextclade results
+          pattern: "*.{errors.csv}"
+  - csv_insertions:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - ${prefix}.insertions.csv:
+          type: file
+          description: CSV file containing insertions from nextclade results
+          pattern: "*.{insertions.csv}"
   - tsv:
-      type: file
-      description: TSV file containing nextclade results
-      pattern: "*.{tsv}"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - ${prefix}.tsv:
+          type: file
+          description: TSV file containing nextclade results
+          pattern: "*.{tsv}"
+  - json:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - ${prefix}.json:
+          type: file
+          description: JSON file containing nextclade results
+          pattern: "*.{json}"
+  - json_auspice:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - ${prefix}.auspice.json:
+          type: file
+          description: Auspice JSON V2 containing nextclade results
+          pattern: "*.{tree.json}"
+  - ndjson:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - ${prefix}.ndjson:
+          type: file
+          description: newline-delimited JSON file containing nextclade results
+          pattern: "*.{ndjson}"
+  - fasta_aligned:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - ${prefix}.aligned.fasta:
+          type: file
+          description: FASTA file containing aligned sequences from nextclade results
+          pattern: "*.{aligned.fasta}"
+  - fasta_translation:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*_translation.*.fasta":
+          type: file
+          description: FASTA file containing aligned peptides from nextclade results
+          pattern: "*.{_translation.}*.{fasta}"
+  - nwk:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - ${prefix}.nwk:
+          type: file
+          description: NWK file containing nextclade results
+          pattern: "*.{nwk}"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
 authors:
   - "@antunderwood"
   - "@drpatelh"
 maintainers:
   - "@antunderwood"
   - "@drpatelh"
+  - "@drpatelh"
+updated on 2024.08.23:
+  - "@nmshahir"
diff --git a/modules/nf-core/nextclade/run/tests/main.nf.test b/modules/nf-core/nextclade/run/tests/main.nf.test
new file mode 100644
index 00000000..b486a3fe
--- /dev/null
+++ b/modules/nf-core/nextclade/run/tests/main.nf.test
@@ -0,0 +1,82 @@
+
+nextflow_process {
+
+    name "Test Process NEXTCLADE_RUN"
+    script "../main.nf"
+    process "NEXTCLADE_RUN"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "nextclade"
+    tag "nextclade/datasetget"
+    tag "nextclade/run"
+
+    setup {
+        run("NEXTCLADE_DATASETGET") {
+            script "../../datasetget/main.nf"
+            process {
+                """
+                input[0] = 'nextstrain/sars-cov-2/wuhan-hu-1/orfs'
+                input[1] = '2024-01-16--20-31-02Z'
+
+                """
+            }
+        }
+    }
+
+    test("sarscov2 default") {
+
+        when {
+            process {
+                """
+                input[0] = [[id: 'test'],
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                    ]
+				input[1] = NEXTCLADE_DATASETGET.out.dataset
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(
+                    process.out.csv,
+                    process.out.csv_errors,
+                    process.out.csv_insertions,
+                    process.out.fasta_aligned,
+                    process.out.fasta_translation,
+                    file(process.out.json[0][1]).readLines()[4..10],
+                    process.out.json_auspice,
+                    process.out.ndjson,
+                    process.out.nwk,
+                    process.out.tsv,
+                    process.out.versions
+                    ).match() }
+            )
+        }
+    }
+
+    test("sarscov2 default-stub") {
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = [[id: 'test'],
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                    ]
+				input[1] = NEXTCLADE_DATASETGET.out.dataset
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+}
diff --git a/modules/nf-core/nextclade/run/tests/main.nf.test.snap b/modules/nf-core/nextclade/run/tests/main.nf.test.snap
new file mode 100644
index 00000000..9294ed3a
--- /dev/null
+++ b/modules/nf-core/nextclade/run/tests/main.nf.test.snap
@@ -0,0 +1,245 @@
+{
+    "sarscov2 default-stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "10": [
+                    "versions.yml:md5,f279e9049492abc589365716afc17d78"
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "4": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "5": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.auspice.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "6": [
+                    
+                ],
+                "7": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.aligned.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "8": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.cds_translation.test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "9": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.nwk:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "csv": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "csv_errors": [
+                    
+                ],
+                "csv_insertions": [
+                    
+                ],
+                "fasta_aligned": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.aligned.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "fasta_translation": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.cds_translation.test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "json_auspice": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.auspice.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "ndjson": [
+                    
+                ],
+                "nwk": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.nwk:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "tsv": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,f279e9049492abc589365716afc17d78"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-09-07T07:21:00.853787859"
+    },
+    "sarscov2 default": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.csv:md5,59667a74af31daf6151b809cb4645942"
+                ]
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.aligned.fasta:md5,1bf54662837b0df37f1857c7fa631225"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    [
+                        "test.cds_translation.E.fasta:md5,1a6d93bd7abfeb193476a86950f07202",
+                        "test.cds_translation.M.fasta:md5,6f79ab0742c078fcd9d2a474518c7022",
+                        "test.cds_translation.N.fasta:md5,bef9912f101777bfff36225d7e5c3c1f",
+                        "test.cds_translation.ORF1a.fasta:md5,4dc0e2eb1f2a61939ba49500ee3fa41a",
+                        "test.cds_translation.ORF1b.fasta:md5,9c2e83d26161b5c887ff51cd64bd15bb",
+                        "test.cds_translation.ORF3a.fasta:md5,adcbede4ebc2cac7af755a9d29d28ea3",
+                        "test.cds_translation.ORF6.fasta:md5,3785b34cce978c95256f83db6ee82af0",
+                        "test.cds_translation.ORF7a.fasta:md5,6ede1acb9e75afc84aa30bbc40551d37",
+                        "test.cds_translation.ORF7b.fasta:md5,460e4cbc5f8c632c2bc9a8aedad5cf43",
+                        "test.cds_translation.ORF8.fasta:md5,c733c88e61b29542664368fbf6dd4c76",
+                        "test.cds_translation.ORF9b.fasta:md5,0aa13afc6cbf445fc92caa2e6c0a7548",
+                        "test.cds_translation.S.fasta:md5,77740927a3f00b7e5bfac392fa6d264c"
+                    ]
+                ]
+            ],
+            [
+                "  \"cladeNodeAttrKeys\": [",
+                "    {",
+                "      \"name\": \"Nextclade_pango\",",
+                "      \"displayName\": \"Pango lineage (Nextclade)\",",
+                "      \"description\": \"Pango lineage as inferred by Nextclade from the nearest neighbour in the reference tree. 98% accurate for recent sequences, for higher accuracy use dedicated pangolin software in UShER or pangoLEARN mode. Recombinants may get (wrongly) assigned to a designated recombinant lineage if they have similar breakpoints.\",",
+                "      \"hideInWeb\": false,",
+                "      \"skipAsReference\": false"
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.auspice.json:md5,84f57eb4611ffdf73eefb855819acb1d"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.ndjson:md5,946c0a0038ae937dab725d08a67c3c64"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.nwk:md5,70e9847b5fd44172e21323fe95ea57f8"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.tsv:md5,165384807e9a323da6bd15fc7f92420d"
+                ]
+            ],
+            [
+                "versions.yml:md5,f279e9049492abc589365716afc17d78"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-09-07T07:20:48.066964467"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/pangolin/environment.yml b/modules/nf-core/pangolin/environment.yml
index 3c4d98c8..d4379368 100644
--- a/modules/nf-core/pangolin/environment.yml
+++ b/modules/nf-core/pangolin/environment.yml
@@ -1,7 +1,7 @@
-name: pangolin
 channels:
   - conda-forge
   - bioconda
-  - defaults
 dependencies:
-  - bioconda::pangolin=4.2
+  - bioconda::pangolin-data=1.30
+  - bioconda::pangolin=4.3
+  - bioconda::snakemake=7.30.1
diff --git a/modules/nf-core/pangolin/main.nf b/modules/nf-core/pangolin/main.nf
index 00cf4290..08200de2 100644
--- a/modules/nf-core/pangolin/main.nf
+++ b/modules/nf-core/pangolin/main.nf
@@ -4,8 +4,8 @@ process PANGOLIN {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/pangolin:4.2--pyhdfd78af_1' :
-        'biocontainers/pangolin:4.2--pyhdfd78af_1' }"
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/bb/bb7bac48e43a9cd6274e1f99c761a5785b74f6d8a55313ee634aaffbe87c1869/data' :
+        'community.wave.seqera.io/library/pangolin-data_pangolin_snakemake:5bbc297f7502ff33' }"
 
     input:
     tuple val(meta), path(fasta)
@@ -21,6 +21,8 @@ process PANGOLIN {
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
+    export XDG_CACHE_HOME=/tmp/.cache
+
     pangolin \\
         $fasta\\
         --outfile ${prefix}.pangolin.csv \\
diff --git a/modules/nf-core/pangolin/meta.yml b/modules/nf-core/pangolin/meta.yml
index 6493f2c8..78713c13 100644
--- a/modules/nf-core/pangolin/meta.yml
+++ b/modules/nf-core/pangolin/meta.yml
@@ -5,30 +5,37 @@ keywords:
   - pangolin
   - lineage
 tools:
-  - star:
+  - pangolin:
       description: |
         Phylogenetic Assignment of Named Global Outbreak LINeages
       homepage: https://github.com/cov-lineages/pangolin#pangolearn-description
       manual: https://github.com/cov-lineages/pangolin#pangolearn-description
       licence: ["GPL-3.0-or-later"]
+      identifier: biotools:pangolin_cov-lineages
 input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-  - fasta:
-      type: file
-      description: |
-        The genome assembly to be evaluated
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+    - fasta:
+        type: file
+        description: |
+          The genome assembly to be evaluated
 output:
   - report:
-      type: file
-      description: Pangolin lineage report
-      pattern: "*.{csv}"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+      - "*.csv":
+          type: file
+          description: Pangolin lineage report
+          pattern: "*.{csv}"
   - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
 authors:
   - "@kevinmenden"
   - "@drpatelh"
diff --git a/modules/nf-core/pangolin/tests/main.nf.test b/modules/nf-core/pangolin/tests/main.nf.test
new file mode 100644
index 00000000..1ee097e3
--- /dev/null
+++ b/modules/nf-core/pangolin/tests/main.nf.test
@@ -0,0 +1,57 @@
+nextflow_process {
+
+    name "Test Process PANGOLIN"
+    script "../main.nf"
+    process "PANGOLIN"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "pangolin"
+
+    test("sarscov2 genome [fasta]") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                            [ id:'test' ], // meta map
+                            [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 genome [fasta] - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                            [ id:'test' ], // meta map
+                            [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/pangolin/tests/main.nf.test.snap b/modules/nf-core/pangolin/tests/main.nf.test.snap
new file mode 100644
index 00000000..681a4783
--- /dev/null
+++ b/modules/nf-core/pangolin/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+    "sarscov2 genome [fasta] - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.pangolin.csv:md5,2cc701567cf37bad5c6574c29aa595d4"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,65ec45a19faa92a922073b9b08d90a8a"
+                ],
+                "report": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.pangolin.csv:md5,2cc701567cf37bad5c6574c29aa595d4"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,65ec45a19faa92a922073b9b08d90a8a"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-18T10:33:48.797026222"
+    },
+    "sarscov2 genome [fasta]": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.pangolin.csv:md5,2cc701567cf37bad5c6574c29aa595d4"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,65ec45a19faa92a922073b9b08d90a8a"
+                ],
+                "report": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.pangolin.csv:md5,2cc701567cf37bad5c6574c29aa595d4"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,65ec45a19faa92a922073b9b08d90a8a"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-18T10:33:17.535254436"
+    }
+}
\ No newline at end of file

From b88cdcf27683539a1f3ca25eeb017dab5ca5a81a Mon Sep 17 00:00:00 2001
From: Joon-Klaps <joon.klaps@kuleuven.be>
Date: Tue, 5 Nov 2024 08:59:11 +0000
Subject: [PATCH 2/3] update changelog & remove nextclade_dataset_reference
 param

---
 CHANGELOG.md                                  | 42 ++++++++-------
 main.nf                                       |  3 --
 modules.json                                  |  6 +--
 .../nextclade/datasetget/environment.yml      |  4 +-
 modules/nf-core/nextclade/datasetget/main.nf  | 26 +++++++--
 modules/nf-core/nextclade/datasetget/meta.yml | 46 ++++++++--------
 .../nextclade/datasetget/tests/main.nf.test   | 53 +++++++++++++++++++
 .../datasetget/tests/main.nf.test.snap        | 41 ++++++++++++++
 nextflow_schema.json                          |  5 --
 subworkflows/local/prepare_genome_illumina.nf |  2 -
 subworkflows/local/prepare_genome_nanopore.nf |  2 -
 workflows/illumina.nf                         |  2 -
 workflows/nanopore.nf                         |  2 -
 13 files changed, 167 insertions(+), 67 deletions(-)
 create mode 100644 modules/nf-core/nextclade/datasetget/tests/main.nf.test
 create mode 100644 modules/nf-core/nextclade/datasetget/tests/main.nf.test.snap

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d2a5f6e..eb3990ba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,22 +29,24 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements
 - [[PR #438](https://github.com/nf-core/viralrecon/pull/438)] - Update fastp container to 0.23.4
 - [[PR #439](https://github.com/nf-core/viralrecon/pull/439)] - Fix cardinality issue when using `--bowtie2_index`
 - [[PR #435](https://github.com/nf-core/viralrecon/pull/435)] - Changed to a patched cutadapt from nf-core modules, added `skip_noninternal_primers` param to allow users to process primers inside the pipeline, and added `threeprime_adapters` to determine whether primers are 3' or 5' adapters.
+- [[PR #446](https://github.com/nf-core/viralrecon/pull/446)] - Update nextclade & pangolin modules
 
 ### Parameters
 
-| Old parameter | New parameter                |
-| ------------- | ---------------------------- |
-|               | `--skip_freyja`              |
-|               | `--freyja_repeats`           |
-|               | `--freyja_db_name`           |
-|               | `--freyja_barcodes`          |
-|               | `--freyja_lineages`          |
-|               | `--skip_freyja_boot`         |
-|               | `--additional_annotation`    |
-|               | `--min_contig_length`        |
-|               | `--min_perc_contig_aligned`  |
-|               | `--skip_noninternal_primers` |
-|               | `--threeprime_adapters`      |
+| Old parameter                   | New parameter                |
+| ------------------------------- | ---------------------------- |
+|                                 | `--skip_freyja`              |
+|                                 | `--freyja_repeats`           |
+|                                 | `--freyja_db_name`           |
+|                                 | `--freyja_barcodes`          |
+|                                 | `--freyja_lineages`          |
+|                                 | `--skip_freyja_boot`         |
+|                                 | `--additional_annotation`    |
+|                                 | `--min_contig_length`        |
+|                                 | `--min_perc_contig_aligned`  |
+|                                 | `--skip_noninternal_primers` |
+|                                 | `--threeprime_adapters`      |
+| `--nextclade_dataset_reference` |                              |
 
 > **NB:** Parameter has been **updated** if both old and new parameter information is present.
 > **NB:** Parameter has been **added** if just the new parameter information is present.
@@ -54,12 +56,14 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements
 
 Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference.
 
-| Dependency | Old version | New version |
-| ---------- | ----------- | ----------- |
-| `cutadapt` |             | 4.6         |
-| `fastp`    | 0.23.2      | 0.23.4      |
-| `freyja`   |             | 1.5.0       |
-| `multiqc`  | 1.14        | 1.19        |
+| Dependency  | Old version | New version |
+| ----------- | ----------- | ----------- |
+| `cutadapt`  |             | 4.6         |
+| `fastp`     | 0.23.2      | 0.23.4      |
+| `freyja`    |             | 1.5.0       |
+| `multiqc`   | 1.14        | 1.19        |
+| `nextclade` | 2.12.0      | 3.8.2       |
+| `pangolin`  | 4.2         | 4.3         |
 
 > **NB:** Dependency has been **updated** if both old and new version information is present.
 >
diff --git a/main.nf b/main.nf
index 96ae5c1f..74710ca8 100644
--- a/main.nf
+++ b/main.nf
@@ -33,7 +33,6 @@ params.primer_bed    = getGenomeAttribute('primer_bed', primer_set, primer_set_v
 
 params.nextclade_dataset           = getGenomeAttribute('nextclade_dataset')
 params.nextclade_dataset_name      = getGenomeAttribute('nextclade_dataset_name')
-params.nextclade_dataset_reference = getGenomeAttribute('nextclade_dataset_reference')
 params.nextclade_dataset_tag       = getGenomeAttribute('nextclade_dataset_tag')
 
 
@@ -86,7 +85,6 @@ workflow NFCORE_VIRALRECON {
             params.bowtie2_index,
             params.nextclade_dataset,
             params.nextclade_dataset_name,
-            params.nextclade_dataset_reference,
             params.nextclade_dataset_tag
         )
 
@@ -102,7 +100,6 @@ workflow NFCORE_VIRALRECON {
             params.bowtie2_index,
             params.nextclade_dataset,
             params.nextclade_dataset_name,
-            params.nextclade_dataset_reference,
             params.nextclade_dataset_tag
         )
 
diff --git a/modules.json b/modules.json
index fb68a8c8..b6aaa67a 100644
--- a/modules.json
+++ b/modules.json
@@ -182,17 +182,17 @@
                     },
                     "nextclade/datasetget": {
                         "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "nextclade/run": {
                         "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "pangolin": {
                         "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "git_sha": "9530ba667bb6c809d998fb9bd567bb9514cb23e5",
                         "installed_by": ["modules"]
                     },
                     "picard/collectmultiplemetrics": {
diff --git a/modules/nf-core/nextclade/datasetget/environment.yml b/modules/nf-core/nextclade/datasetget/environment.yml
index cdd9f646..09c0ba1a 100644
--- a/modules/nf-core/nextclade/datasetget/environment.yml
+++ b/modules/nf-core/nextclade/datasetget/environment.yml
@@ -1,7 +1,5 @@
-name: nextclade_datasetget
 channels:
   - conda-forge
   - bioconda
-  - defaults
 dependencies:
-  - bioconda::nextclade=2.12.0
+  - bioconda::nextclade=3.8.2
diff --git a/modules/nf-core/nextclade/datasetget/main.nf b/modules/nf-core/nextclade/datasetget/main.nf
index 70c900a5..4f878381 100644
--- a/modules/nf-core/nextclade/datasetget/main.nf
+++ b/modules/nf-core/nextclade/datasetget/main.nf
@@ -4,12 +4,11 @@ process NEXTCLADE_DATASETGET {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/nextclade:2.12.0--h9ee0642_0' :
-        'biocontainers/nextclade:2.12.0--h9ee0642_0' }"
+        'https://depot.galaxyproject.org/singularity/nextclade:3.8.2--h9ee0642_0' :
+        'biocontainers/nextclade:3.8.2--h9ee0642_0' }"
 
     input:
     val dataset
-    val reference
     val tag
 
     output:
@@ -22,7 +21,6 @@ process NEXTCLADE_DATASETGET {
     script:
     def args = task.ext.args ?: ''
     prefix = task.ext.prefix ?: "${dataset}"
-    def fasta = reference ? "--reference ${reference}" : ''
     def version = tag ? "--tag ${tag}" : ''
     """
     nextclade \\
@@ -30,7 +28,6 @@ process NEXTCLADE_DATASETGET {
         get \\
         $args \\
         --name $dataset \\
-        $fasta \\
         $version \\
         --output-dir $prefix
 
@@ -39,4 +36,23 @@ process NEXTCLADE_DATASETGET {
         nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//')
     END_VERSIONS
     """
+
+    stub:
+    prefix = task.ext.prefix ?: "${dataset}"
+    """
+    mkdir -p ${prefix}
+    touch ${prefix}/CHANGELOG.md
+    touch ${prefix}/README.md
+    touch ${prefix}/genome_annotation.gff3
+    touch ${prefix}/pathogen.json
+    touch ${prefix}/reference.fasta
+    touch ${prefix}/sequences.fasta
+    touch ${prefix}/tree.json
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//')
+    END_VERSIONS
+    """
+
 }
diff --git a/modules/nf-core/nextclade/datasetget/meta.yml b/modules/nf-core/nextclade/datasetget/meta.yml
index f3fb403e..c60721da 100644
--- a/modules/nf-core/nextclade/datasetget/meta.yml
+++ b/modules/nf-core/nextclade/datasetget/meta.yml
@@ -1,41 +1,45 @@
 name: nextclade_datasetget
-description: Get dataset for SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation)
+description: Get dataset for SARS-CoV-2 genome clade assignment, mutation calling,
+  and sequence quality checks (C++ implementation)
 keywords:
   - nextclade
   - variant
   - consensus
 tools:
   - nextclade:
-      description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks
+      description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence
+        quality checks
       homepage: https://github.com/nextstrain/nextclade
       documentation: https://github.com/nextstrain/nextclade
       tool_dev_url: https://github.com/nextstrain/nextclade
       licence: ["MIT"]
+      identifier: biotools:nextclade
 input:
-  - dataset:
-      type: string
-      description: Name of dataset to retrieve. A list of available datasets can be obtained using the nextclade dataset list command.
-      pattern: ".+"
-  - reference:
-      type: string
-      description: Accession id to download dataset based on a particular reference sequence. A list of available datasets can be obtained using the nextclade dataset list command.
-      pattern: ".+"
-  - tag:
-      type: string
-      description: Version tag of the dataset to download. A list of available datasets can be obtained using the nextclade dataset list command.
-      pattern: ".+"
+  - - dataset:
+        type: string
+        description: Name of dataset to retrieve. A list of available datasets can be
+          obtained using the nextclade dataset list command.
+        pattern: ".+"
+  - - tag:
+        type: string
+        description: Version tag of the dataset to download. A list of available datasets
+          can be obtained using the nextclade dataset list command.
+        pattern: ".+"
 output:
+  - dataset:
+      - $prefix:
+          type: directory
+          description: Directory containing the dataset
   - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-  - prefix:
-      type: path
-      description: A directory containing the dataset files needed for nextclade run
-      pattern: "prefix"
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
 authors:
   - "@antunderwood"
   - "@drpatelh"
 maintainers:
   - "@antunderwood"
   - "@drpatelh"
+updated on 2024.08.27:
+  - "@nmshahir"
diff --git a/modules/nf-core/nextclade/datasetget/tests/main.nf.test b/modules/nf-core/nextclade/datasetget/tests/main.nf.test
new file mode 100644
index 00000000..d7eb12b7
--- /dev/null
+++ b/modules/nf-core/nextclade/datasetget/tests/main.nf.test
@@ -0,0 +1,53 @@
+
+nextflow_process {
+
+    name "Test Process NEXTCLADE_DATASETGET"
+    script "../main.nf"
+    process "NEXTCLADE_DATASETGET"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "nextclade"
+    tag "nextclade/datasetget"
+
+    test("species-tag") {
+
+        when {
+            process {
+                """
+                input[0] = 'nextstrain/sars-cov-2/wuhan-hu-1/orfs'
+				input[1] = '2024-01-16--20-31-02Z'
+
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+
+    }
+
+    test("species-tag-stub") {
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = 'nextstrain/sars-cov-2/wuhan-hu-1/orfs'
+				input[1] = '2024-01-16--20-31-02Z'
+
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+    
+}
diff --git a/modules/nf-core/nextclade/datasetget/tests/main.nf.test.snap b/modules/nf-core/nextclade/datasetget/tests/main.nf.test.snap
new file mode 100644
index 00000000..8de6fd56
--- /dev/null
+++ b/modules/nf-core/nextclade/datasetget/tests/main.nf.test.snap
@@ -0,0 +1,41 @@
+{
+    "species-tag": {
+        "content": [
+            {
+                "0": [
+                    [
+                        "CHANGELOG.md:md5,ebbe8be5a3c378ed903c1afb4d8c441d",
+                        "README.md:md5,c69387d632361334f0d7c9b66065f947",
+                        "genome_annotation.gff3:md5,4dff84d2d6ada820e0e3a8bc6798d402",
+                        "pathogen.json:md5,db5bbec52359c1e168ffc5e6dc0ea32a",
+                        "reference.fasta:md5,c7ce05f28e4ec0322c96f24e064ef55c",
+                        "sequences.fasta:md5,c2a4d6cbb837dce22d81f9c36dd0629e",
+                        "tree.json:md5,e180607cd34a6cb6bab101d295f6cedf"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,8c64a653330d1c5cb58ee1b8363b22d3"
+                ],
+                "dataset": [
+                    [
+                        "CHANGELOG.md:md5,ebbe8be5a3c378ed903c1afb4d8c441d",
+                        "README.md:md5,c69387d632361334f0d7c9b66065f947",
+                        "genome_annotation.gff3:md5,4dff84d2d6ada820e0e3a8bc6798d402",
+                        "pathogen.json:md5,db5bbec52359c1e168ffc5e6dc0ea32a",
+                        "reference.fasta:md5,c7ce05f28e4ec0322c96f24e064ef55c",
+                        "sequences.fasta:md5,c2a4d6cbb837dce22d81f9c36dd0629e",
+                        "tree.json:md5,e180607cd34a6cb6bab101d295f6cedf"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,8c64a653330d1c5cb58ee1b8363b22d3"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-08-27T19:55:42.211731791"
+    }
+}
\ No newline at end of file
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b0ce2305..232ac373 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -243,11 +243,6 @@
                     "description": "Name of Nextclade dataset to retrieve. A list of available datasets can be obtained using the 'nextclade dataset list' command.",
                     "fa_icon": "fas fa-project-diagram"
                 },
-                "nextclade_dataset_reference": {
-                    "type": "string",
-                    "description": "Accession id to download dataset based on a particular reference sequence. A list of available datasets can be obtained using the 'nextclade dataset list' command.",
-                    "fa_icon": "fas fa-project-diagram"
-                },
                 "nextclade_dataset_tag": {
                     "type": "string",
                     "description": "Version tag of the dataset to download. A list of available datasets can be obtained using the 'nextclade dataset list' command.",
diff --git a/subworkflows/local/prepare_genome_illumina.nf b/subworkflows/local/prepare_genome_illumina.nf
index efc79b39..997de413 100644
--- a/subworkflows/local/prepare_genome_illumina.nf
+++ b/subworkflows/local/prepare_genome_illumina.nf
@@ -28,7 +28,6 @@ workflow PREPARE_GENOME {
     bowtie2_index
     nextclade_dataset
     nextclade_dataset_name
-    nextclade_dataset_reference
     nextclade_dataset_tag
 
 
@@ -192,7 +191,6 @@ workflow PREPARE_GENOME {
         } else if (nextclade_dataset_name) {
             NEXTCLADE_DATASETGET (
                 nextclade_dataset_name,
-                nextclade_dataset_reference,
                 nextclade_dataset_tag
             )
             ch_nextclade_db = NEXTCLADE_DATASETGET.out.dataset
diff --git a/subworkflows/local/prepare_genome_nanopore.nf b/subworkflows/local/prepare_genome_nanopore.nf
index 77a30645..fd6e9456 100644
--- a/subworkflows/local/prepare_genome_nanopore.nf
+++ b/subworkflows/local/prepare_genome_nanopore.nf
@@ -20,7 +20,6 @@ workflow PREPARE_GENOME {
     bowtie2_index
     nextclade_dataset
     nextclade_dataset_name
-    nextclade_dataset_reference
     nextclade_dataset_tag
 
     main:
@@ -114,7 +113,6 @@ workflow PREPARE_GENOME {
         } else if (nextclade_dataset_name) {
             NEXTCLADE_DATASETGET (
                 nextclade_dataset_name,
-                nextclade_dataset_reference,
                 nextclade_dataset_tag
             )
             ch_nextclade_db = NEXTCLADE_DATASETGET.out.dataset
diff --git a/workflows/illumina.nf b/workflows/illumina.nf
index 668e7c09..e93e02d0 100644
--- a/workflows/illumina.nf
+++ b/workflows/illumina.nf
@@ -130,7 +130,6 @@ workflow ILLUMINA {
     ch_bowtie2_index
     ch_nextclade_dataset
     ch_nextclade_dataset_name
-    ch_nextclade_dataset_reference
     ch_nextclade_dataset_tag
 
     main:
@@ -148,7 +147,6 @@ workflow ILLUMINA {
         ch_bowtie2_index,
         ch_nextclade_dataset,
         ch_nextclade_dataset_name,
-        ch_nextclade_dataset_reference,
         ch_nextclade_dataset_tag
     )
     ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions)
diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf
index 096825e8..2701ddaa 100644
--- a/workflows/nanopore.nf
+++ b/workflows/nanopore.nf
@@ -116,7 +116,6 @@ workflow NANOPORE {
     ch_bowtie2_index
     ch_nextclade_dataset
     ch_nextclade_dataset_name
-    ch_nextclade_dataset_reference
     ch_nextclade_dataset_tag
 
     main:
@@ -146,7 +145,6 @@ workflow NANOPORE {
         ch_bowtie2_index,
         ch_nextclade_dataset,
         ch_nextclade_dataset_name,
-        ch_nextclade_dataset_reference,
         ch_nextclade_dataset_tag
     )
     ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions)

From 406eb40257a2a80cca9c25551cf0a9f71bf5a28c Mon Sep 17 00:00:00 2001
From: Joon-Klaps <joonklaps@gmail.com>
Date: Wed, 6 Nov 2024 08:20:17 +0100
Subject: [PATCH 3/3] Updating getGenomaAttribute to use nextclade v3pl
 versions

---
 main.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main.nf b/main.nf
index 74710ca8..6d1a3323 100644
--- a/main.nf
+++ b/main.nf
@@ -31,9 +31,9 @@ params.gff           = getGenomeAttribute('gff')
 params.bowtie2_index = getGenomeAttribute('bowtie2')
 params.primer_bed    = getGenomeAttribute('primer_bed', primer_set, primer_set_version)
 
-params.nextclade_dataset           = getGenomeAttribute('nextclade_dataset')
+params.nextclade_dataset           = getGenomeAttribute('nextclade_dataset_v3pl')
 params.nextclade_dataset_name      = getGenomeAttribute('nextclade_dataset_name')
-params.nextclade_dataset_tag       = getGenomeAttribute('nextclade_dataset_tag')
+params.nextclade_dataset_tag       = getGenomeAttribute('nextclade_dataset_tag_v3pl')
 
 
 /*