From b7b4bc2dd5aa7f9b0512e801175bbeeb36a088cc Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Wed, 4 Dec 2024 15:57:50 +0000 Subject: [PATCH 1/3] Update nextclade dataset tags and pangolin docker version for TheiaCoV workflows --- .../genomic_characterization/theiacov.md | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/workflows/genomic_characterization/theiacov.md b/docs/workflows/genomic_characterization/theiacov.md index 319a32ad8..ee8274179 100644 --- a/docs/workflows/genomic_characterization/theiacov.md +++ b/docs/workflows/genomic_characterization/theiacov.md @@ -486,8 +486,8 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | gene_locations_bed_file | sars-cov-2 | `"gs://theiagen-public-files-rp/terra/sars-cov-2-files/sc2_gene_locations.bed"` | | genome_length_input | sars-cov-2 | `29903` | | nextclade_dataset_name_input | sars-cov-2 | `"nextstrain/sars-cov-2/wuhan-hu-1/orfs"` | - | nextclade_dataset_tag_input | sars-cov-2 | `"2024-07-17--12-57-03Z"` | - | pangolin_docker_image | sars-cov-2 | `"us-docker.pkg.dev/general-theiagen/staphb/pangolin:4.3.1-pdata-1.29 "`| + | nextclade_dataset_tag_input | sars-cov-2 | `"2024-11-19--14-18-53Z"` | + | pangolin_docker_image | sars-cov-2 | `"us-docker.pkg.dev/general-theiagen/staphb/pangolin:4.3.1-pdata-1.31 "`| | reference_genome | sars-cov-2 | `"gs://theiagen-public-files-rp/terra/augur-sars-cov-2-references/MN908947.fasta"` | | vadr_max_length | sars-cov-2 | `30000` | | vadr_mem | sars-cov-2 | `8` | @@ -500,7 +500,7 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | genome_length_input | MPXV | `197200` | | kraken_target_organism_input | MPXV | `"Monkeypox virus"` | | nextclade_dataset_name_input | MPXV | `"nextstrain/mpox/lineage-b.1"` | - | nextclade_dataset_tag_input | MPXV | `"2024-04-19--07-50-39Z"` | + | nextclade_dataset_tag_input | MPXV | `"2024-11-19--14-18-53Z"` | | primer_bed_file | MPXV | `"gs://theiagen-public-files/terra/mpxv-files/MPXV.primer.bed"` | | reference_genome | MPXV | `"gs://theiagen-public-files/terra/mpxv-files/MPXV.MT903345.reference.fasta"` | | reference_gff_file | MPXV | `"gs://theiagen-public-files/terra/mpxv-files/Mpox-MT903345.1.reference.gff3"` | @@ -531,13 +531,13 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | vadr_mem | flu | all | all | `8` | | | vadr_options | flu | all | all | `"--atgonly --xnocomp --nomisc --alt_fail extrant5,extrant3 --mkey flu"` | | | nextclade_dataset_name_input | flu | ha | h1n1 | `"nextstrain/flu/h1n1pdm/ha/MW626062"` | | - | nextclade_dataset_tag_input | flu | ha | h1n1 | `"2024-07-03--08-29-55Z"` | | + | nextclade_dataset_tag_input | flu | ha | h1n1 | `"2024-11-27--02-51-00Z"` | | | reference_genome | flu | ha | h1n1 | `"gs://theiagen-public-files-rp/terra/flu-references/reference_h1n1pdm_ha.fasta"` | | | nextclade_dataset_name_input | flu | ha | h3n2 | `"nextstrain/flu/h3n2/ha/EPI1857216"` | | - | nextclade_dataset_tag_input | flu | ha | h3n2 | `"2024-08-08--05-08-21Z"` | | + | nextclade_dataset_tag_input | flu | ha | h3n2 | `"2024-11-27--02-51-00Z"` | | | reference_genome | flu | ha | h3n2 | `"gs://theiagen-public-files-rp/terra/flu-references/reference_h3n2_ha.fasta"` | | | nextclade_dataset_name_input | flu | ha | victoria | `"nextstrain/flu/vic/ha/KX058884"` | | - | nextclade_dataset_tag_input | flu | ha | victoria | `"2024-07-03--08-29-55Z"` | | + | nextclade_dataset_tag_input | flu | ha | victoria | `"2024-11-05--09-19-52Z"` | | | reference_genome | flu | ha | victoria | `"gs://theiagen-public-files-rp/terra/flu-references/reference_vic_ha.fasta"` | | | nextclade_dataset_name_input | flu | ha | yamagata | `"nextstrain/flu/yam/ha/JN993010"` | | | nextclade_dataset_tag_input | flu | ha | yamagata | `"2024-01-30--16-34-55Z"` | | @@ -546,13 +546,13 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | nextclade_dataset_tag_input | flu | ha | h5n1 | `"2024-05-08--11-39-52Z"` | | | reference_genome | flu | ha | h5n1 | `"gs://theiagen-public-files-rp/terra/flu-references/reference_h5n1_ha.fasta"` | | | nextclade_dataset_name_input | flu | na | h1n1 | `"nextstrain/flu/h1n1pdm/na/MW626056"` | | - | nextclade_dataset_tag_input | flu | na | h1n1 | `"2024-07-03--08-29-55Z"` | | + | nextclade_dataset_tag_input | flu | na | h1n1 | `"2024-11-05--09-19-52Z"` | | | reference_genome | flu | na | h1n1 | `"gs://theiagen-public-files-rp/terra/flu-references/reference_h1n1pdm_na.fasta"` | | | nextclade_dataset_name_input | flu | na | h3n2 | `"nextstrain/flu/h3n2/na/EPI1857215"` | | - | nextclade_dataset_tag_input | flu | na | h3n2 | `"2024-04-19--07-50-39Z"` | | + | nextclade_dataset_tag_input | flu | na | h3n2 | `"2024-11-05--09-19-52Z"` | | | reference_genome | flu | na | h3n2 | `"gs://theiagen-public-files-rp/terra/flu-references/reference_h3n2_na.fasta"` | | | nextclade_dataset_name_input | flu | na | victoria | `"nextstrain/flu/vic/na/CY073894"` | | - | nextclade_dataset_tag_input | flu | na | victoria | `"2024-04-19--07-50-39Z"` | | + | nextclade_dataset_tag_input | flu | na | victoria | `"2024-11-05--09-19-52Z"` | | | reference_genome | flu | na | victoria | `"gs://theiagen-public-files-rp/terra/flu-references/reference_vic_na.fasta"` | | | nextclade_dataset_name_input | flu | na | yamagata | `"NA"` | | | nextclade_dataset_tag_input | flu | na | yamagata | `"NA"` | | @@ -564,7 +564,7 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | genome_length_input | rsv_a | 16000 | | kraken_target_organism | rsv_a | Respiratory syncytial virus | | nextclade_dataset_name_input | rsv_a | nextstrain/rsv/a/EPI_ISL_412866 | - | nextclade_dataset_tag_input | rsv_a | 2024-08-01--22-31-31Z | + | nextclade_dataset_tag_input | rsv_a | "2024-11-27--02-51-00Z" | | reference_genome | rsv_a | gs://theiagen-public-files-rp/terra/rsv_references/reference_rsv_a.fasta | | vadr_max_length | rsv_a | 15500 | | vadr_mem | rsv_a | 32 | @@ -576,7 +576,7 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | genome_length_input | rsv_b | 16000 | | kraken_target_organism | rsv_b | "Human orthopneumovirus" | | nextclade_dataset_name_input | rsv_b | nextstrain/rsv/b/EPI_ISL_1653999 | - | nextclade_dataset_tag_input | rsv_b | "2024-08-01--22-31-31Z" | + | nextclade_dataset_tag_input | rsv_b | "2024-11-27--02-51-00Z" | | reference_genome | rsv_b | gs://theiagen-public-files-rp/terra/rsv_references/reference_rsv_b.fasta | | vadr_max_length | rsv_b | 15500 | | vadr_mem | rsv_b | 32 | From b9763bd9bc20f0ef7679413fdb00824e367b4615 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Wed, 4 Dec 2024 16:14:47 +0000 Subject: [PATCH 2/3] Add searchable table for organism-specific parameters in TheiaCoV documentation --- docs/workflows/genomic_characterization/theiacov.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/workflows/genomic_characterization/theiacov.md b/docs/workflows/genomic_characterization/theiacov.md index ee8274179..e00dcbe9e 100644 --- a/docs/workflows/genomic_characterization/theiacov.md +++ b/docs/workflows/genomic_characterization/theiacov.md @@ -477,6 +477,8 @@ All TheiaCoV Workflows (not TheiaCoV_FASTA_Batch) The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflows. This step automatically sets the different parameters needed for each downstream tool to the appropriate value for the user-designated organism (by default, `"sars-cov-2"` is the default organism). +
+ !!! dna "" The following tables include the relevant organism-specific parameters; **all of these default values can be overwritten by providing a value for the "Overwrite Variable Name" field**. @@ -594,6 +596,7 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | primer_bed_file | HIV-v2 | gs://theiagen-public-files/terra/hivgc-files/HIV-1_v2.0.primer.hyphen400.1.bed | This version of HIV originates from Southern Africa | | reference_genome | HIV-v2 | gs://theiagen-public-files/terra/hivgc-files/AY228557.1.headerchanged.fasta | This version of HIV originates from Southern Africa | | reference_gff_file | HIV-v2 | gs://theiagen-public-files/terra/hivgc-files/AY228557.1.gff3 | This version of HIV originates from Southern Africa | +
### Workflow Tasks From 60536d891b5c19222c96fe5b3f132d77ff21a895 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Wed, 4 Dec 2024 16:38:03 +0000 Subject: [PATCH 3/3] Enhance TheiaCoV documentation with searchable tables for organism-specific parameters --- .../genomic_characterization/theiacov.md | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/docs/workflows/genomic_characterization/theiacov.md b/docs/workflows/genomic_characterization/theiacov.md index e00dcbe9e..74f5de668 100644 --- a/docs/workflows/genomic_characterization/theiacov.md +++ b/docs/workflows/genomic_characterization/theiacov.md @@ -477,12 +477,12 @@ All TheiaCoV Workflows (not TheiaCoV_FASTA_Batch) The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflows. This step automatically sets the different parameters needed for each downstream tool to the appropriate value for the user-designated organism (by default, `"sars-cov-2"` is the default organism). -
- !!! dna "" The following tables include the relevant organism-specific parameters; **all of these default values can be overwritten by providing a value for the "Overwrite Variable Name" field**. ??? toggle "SARS-CoV-2 Defaults" +
+ | **Overwrite Variable Name** | **Organism** | **Default Value** | |---|---|---| | gene_locations_bed_file | sars-cov-2 | `"gs://theiagen-public-files-rp/terra/sars-cov-2-files/sc2_gene_locations.bed"` | @@ -495,7 +495,11 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | vadr_mem | sars-cov-2 | `8` | | vadr_options | sars-cov-2 | `"--noseqnamemax --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn --out_allfasta"` | +
+ ??? toggle "Mpox Defaults" +
+ | **Overwrite Variable Name** | **Organism** | **Default Value** | |---|---|---| | gene_locations_bed_file | MPXV | `"gs://theiagen-public-files/terra/mpxv-files/mpox_gene_locations.bed"` | @@ -509,8 +513,12 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | vadr_max_length | MPXV | `210000` | | vadr_mem | MPXV | `8` | | vadr_options | MPXV | `"--glsearch -s -r --nomisc --mkey mpxv --r_lowsimok --r_lowsimxd 100 --r_lowsimxl 2000 --alt_pass discontn,dupregin --out_allfasta --minimap2 --s_overhang 150"` | + +
??? toggle "WNV Defaults" +
+ | **Overwrite Variable Name** | **Organism** | **Default Value** | **Notes** | |---|---|---|---| | genome_length_input | WNV | `11000` | | @@ -523,7 +531,11 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | vadr_mem | WNV | `8` | | | vadr_options | WNV | `"--mkey flavi --mdir /opt/vadr/vadr-models-flavi/ --nomisc --noprotid --out_allfasta"` | | +
+ ??? toggle "Flu Defaults" +
+ | **Overwrite Variable Name** | **Organism** | **Flu Segment** | **Flu Subtype** | **Default Value** | **Notes** | |---|---|---|---|---|---| | flu_segment | flu | all | all | N/A | TheiaCoV will attempt to automatically assign a flu segment | @@ -560,7 +572,11 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | nextclade_dataset_tag_input | flu | na | yamagata | `"NA"` | | | reference_genome | flu | na | yamagata | `"gs://theiagen-public-files-rp/terra/flu-references/reference_yam_na.fasta"` | | +
+ ??? toggle "RSV-A Defaults" +
+ | **Overwrite Variable Name** | **Organism** | **Default Value** | |---|---|---| | genome_length_input | rsv_a | 16000 | @@ -572,7 +588,11 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | vadr_mem | rsv_a | 32 | | vadr_options | rsv_a | -r --mkey rsv --xnocomp | +
+ ??? toggle "RSV-B Defaults" +
+ | **Overwrite Variable Name** | **Organism** | **Default Value** | |---|---|---| | genome_length_input | rsv_b | 16000 | @@ -584,7 +604,11 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | vadr_mem | rsv_b | 32 | | vadr_options | rsv_b | -r --mkey rsv --xnocomp | +
+ ??? toggle "HIV Defaults" +
+ | **Overwrite Variable Name** | **Organism** | **Default Value** | **Notes** | |---|---|---|---| | kraken_target_organism_input | HIV | Human immunodeficiency virus 1 | | @@ -596,7 +620,8 @@ The `organism_parameters` sub-workflow is the first step in all TheiaCoV workflo | primer_bed_file | HIV-v2 | gs://theiagen-public-files/terra/hivgc-files/HIV-1_v2.0.primer.hyphen400.1.bed | This version of HIV originates from Southern Africa | | reference_genome | HIV-v2 | gs://theiagen-public-files/terra/hivgc-files/AY228557.1.headerchanged.fasta | This version of HIV originates from Southern Africa | | reference_gff_file | HIV-v2 | gs://theiagen-public-files/terra/hivgc-files/AY228557.1.gff3 | This version of HIV originates from Southern Africa | -
+ +
### Workflow Tasks