diff --git a/.gitignore b/.gitignore index e7ccb8147..0a9f9aef1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ cromwell* _LAST -2024* \ No newline at end of file +2024*site/ +site/ diff --git a/docs/javascripts/table-search.js b/docs/javascripts/table-search.js new file mode 100644 index 000000000..6a87df839 --- /dev/null +++ b/docs/javascripts/table-search.js @@ -0,0 +1,64 @@ +function addTableSearch() { + // Select all containers with the class 'searchable-table' + const containers = document.querySelectorAll('.searchable-table'); + + containers.forEach((container) => { + // Find the table within this container + const table = container.querySelector('table'); + + if (table) { + // Ensure we don't add multiple search boxes + if (!container.querySelector('input[type="search"]')) { + // Create the search input element + const searchInput = document.createElement("input"); + searchInput.setAttribute("type", "search"); + searchInput.setAttribute("placeholder", "Search table..."); + searchInput.classList.add('table-search-input'); + searchInput.style.marginBottom = "10px"; + searchInput.style.display = "block"; + + // Insert the search input before the table + container.insertBefore(searchInput, container.firstChild); + + // Add event listener for table search + searchInput.addEventListener("input", function () { + const filter = searchInput.value.toUpperCase(); + const rows = table.getElementsByTagName("tr"); + + for (let i = 1; i < rows.length; i++) { // Skip header row + const cells = rows[i].getElementsByTagName("td"); + let match = false; + + for (let j = 0; j < cells.length; j++) { + if (cells[j].innerText.toUpperCase().includes(filter)) { + match = true; + break; + } + } + + rows[i].style.display = match ? "" : "none"; + } + }); + } + } else { + console.log('Table not found within container.'); + } + }); +} + +// Run on page load +addTableSearch(); + +// Reapply search bar on page change +function observeDOMChanges() { + const targetNode = document.querySelector('body'); + const config = { childList: true, subtree: true }; + + const observer = new MutationObserver(() => { + addTableSearch(); + }); + + observer.observe(targetNode, config); +} + +observeDOMChanges(); diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css index 0ff83e58d..e510ecedc 100644 --- a/docs/stylesheets/extra.css +++ b/docs/stylesheets/extra.css @@ -184,5 +184,36 @@ th { td { word-break: break-all; } +/* Base styles for the search box */ +div.searchable-table input.table-search-input { + width: 25%; + padding: 10px; + margin-bottom: 12px; + font-size: 12px; + box-sizing: border-box; + border-radius: 2px; +} + +/* Light mode styles */ +[data-md-color-scheme="light"] div.searchable-table input.table-search-input { + background-color: #fff; + color: #000; + border: 1px solid #E0E1E1; +} +[data-md-color-scheme="light"] div.searchable-table input.table-search-input::placeholder { + color: #888; + font-style: italic; +} +/* Dark mode styles */ +[data-md-color-scheme="slate"] div.searchable-table input.table-search-input { + background-color: #1d2125; + color: #fff; + border: 1px solid #373B40; +} + +[data-md-color-scheme="slate"] div.searchable-table input.table-search-input::placeholder { + color: #bbb; + font-style: italic; +} diff --git a/docs/workflows/data_export/concatenate_column_content.md b/docs/workflows/data_export/concatenate_column_content.md index 5534128ef..159986e1d 100644 --- a/docs/workflows/data_export/concatenate_column_content.md +++ b/docs/workflows/data_export/concatenate_column_content.md @@ -16,6 +16,8 @@ This set-level workflow will create a file containing all of the items from a gi This workflow runs on the set level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | concatenate_column_content | **concatenated_file_name** | String | The name of the output file. ***Include the extension***, such as ".fasta" or ".txt". | | Required | @@ -28,6 +30,8 @@ This workflow runs on the set level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs !!! info diff --git a/docs/workflows/data_export/transfer_column_content.md b/docs/workflows/data_export/transfer_column_content.md index 475e4bfb6..1d7dae06c 100644 --- a/docs/workflows/data_export/transfer_column_content.md +++ b/docs/workflows/data_export/transfer_column_content.md @@ -25,6 +25,8 @@ This set-level workflow will transfer all of the items from a given column in a This workflow runs on the set level. +
+ | **Terra Task name** | **input_variable** | **Type** | **Description** | **Default attribute** | **Status** | |---|---|---|---|---|---| | transfer_column_content | **files_to_transfer** | Array[File] | The column that has the files you want to concatenate. | | Required | @@ -36,6 +38,8 @@ This workflow runs on the set level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs !!! info diff --git a/docs/workflows/data_export/zip_column_content.md b/docs/workflows/data_export/zip_column_content.md index cdad7a73d..d1c38a8ee 100644 --- a/docs/workflows/data_export/zip_column_content.md +++ b/docs/workflows/data_export/zip_column_content.md @@ -16,6 +16,8 @@ This workflow will create a zip file that contains all of the items in a column This workflow runs on the set level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | zip_column_content | **files_to_zip** | Array[File] | The column that has the files you want to zip. | | Required | @@ -27,6 +29,8 @@ This workflow runs on the set level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs !!! info diff --git a/docs/workflows/data_import/assembly_fetch.md b/docs/workflows/data_import/assembly_fetch.md index 9387d0b2e..394a14d49 100644 --- a/docs/workflows/data_import/assembly_fetch.md +++ b/docs/workflows/data_import/assembly_fetch.md @@ -23,6 +23,8 @@ Assembly_Fetch requires the input samplename, and either the accession for a ref This workflow runs on the sample level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | reference_fetch | **samplename** | String | Your sample's name | | Required | @@ -44,6 +46,8 @@ This workflow runs on the sample level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Analysis Tasks ??? task "ReferenceSeeker (optional) Details" @@ -90,6 +94,8 @@ This workflow runs on the sample level. ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | assembly_fetch_analysis_date | String | Date of assembly download | @@ -101,11 +107,13 @@ This workflow runs on the sample level. | assembly_fetch_ncbi_datasets_version | String | NCBI datasets version used | | assembly_fetch_referenceseeker_database | String | ReferenceSeeker database used | | assembly_fetch_referenceseeker_docker | String | Docker file used for ReferenceSeeker | -| assembly_fetch_referenceseeker_top_hit_ncbi_accession | String | NCBI Accession for the top it identified by Assembly_Fetch | +| assembly_fetch_referenceseeker_top_hit_ncbi_accession | String | NCBI Accession for the top hit identified by Assembly_Fetch | | assembly_fetch_referenceseeker_tsv | File | TSV file of the top hits between the query genome and the Reference Seeker database | | assembly_fetch_referenceseeker_version | String | ReferenceSeeker version used | | assembly_fetch_version | String | The version of the repository the Assembly Fetch workflow is in | +
+ ## References > **ReferenceSeeker:** Schwengers O, Hain T, Chakraborty T, Goesmann A. ReferenceSeeker: rapid determination of appropriate reference genomes. J Open Source Softw. 2020 Feb 4;5(46):1994. diff --git a/docs/workflows/data_import/basespace_fetch.md b/docs/workflows/data_import/basespace_fetch.md index 4fe4072b9..f0d1fed96 100644 --- a/docs/workflows/data_import/basespace_fetch.md +++ b/docs/workflows/data_import/basespace_fetch.md @@ -153,6 +153,8 @@ This process must be performed on a command-line (ideally on a Linux or MacOS co This workflow runs on the sample level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | basespace_fetch | **access_token** | String | The access token is used in place of a username and password to allow the workflow to access the user account in BaseSpace from which the data is to be transferred. It is an alphanumeric string that is 32 characters in length. Example: 9e08a96471df44579b72abf277e113b7 | | Required | @@ -168,6 +170,8 @@ This workflow runs on the sample level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### **Outputs** The outputs of this workflow will be the fastq files imported from BaseSpace into the data table where the sample ID information had originally been uploaded. diff --git a/docs/workflows/data_import/create_terra_table.md b/docs/workflows/data_import/create_terra_table.md index 31c10e024..53b953dba 100644 --- a/docs/workflows/data_import/create_terra_table.md +++ b/docs/workflows/data_import/create_terra_table.md @@ -19,6 +19,8 @@ The manual creation of Terra tables can be tedious and error-prone. This workflo **_This can be changed_** by providing information in the `file_ending` optional input parameter. See below for more information. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | create_terra_table | **assembly_data** | Boolean | Set to true if your data is in FASTA format; set to false if your data is FASTQ format | | Required | @@ -33,6 +35,8 @@ The manual creation of Terra tables can be tedious and error-prone. This workflo | make_table | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/terra-tools:2023-06-21" | Optional | | make_table | **memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 4 | Optional | +
+ ### Finding the `data_location_path` #### Using the Terra data uploader diff --git a/docs/workflows/data_import/sra_fetch.md b/docs/workflows/data_import/sra_fetch.md index 0b3b407fb..835a81ed8 100644 --- a/docs/workflows/data_import/sra_fetch.md +++ b/docs/workflows/data_import/sra_fetch.md @@ -16,6 +16,8 @@ Read files associated with the SRA run accession provided as input are copied to This workflow runs on the sample level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | fetch_sra_to_fastq | **sra_accession** | String | SRA, ENA, or DRA accession number | | Required | @@ -25,6 +27,8 @@ This workflow runs on the sample level. | fetch_sra_to_fastq | **fastq_dl_options** | String | Additional parameters to pass to fastq_dl from [here](https://github.com/rpetit3/fastq-dl?tab=readme-ov-file#usage) | "--provider sra" | Optional | | fetch_sra_to_fastq | **memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 8 | Optional | +
+ The only required input for the SRA_Fetch workflow is an SRA run accession beginning "SRR", an ENA run accession beginning "ERR", or a DRA run accession which beginning "DRR". Please see the [NCBI Metadata and Submission Overview](https://www.ncbi.nlm.nih.gov/sra/docs/submitmeta/) for assistance with identifying accessions. Briefly, NCBI-accessioned objects have the following naming scheme: @@ -41,6 +45,8 @@ Read data are available either with full base quality scores (**SRA Normalized F Given the lack of usefulness of SRA Lite formatted FASTQ files, we try to avoid these by selecting as provided SRA directly (SRA-Lite is more probably to be the file synced to other repositories), but some times downloading these files is unavoidable. To make the user aware of this, a warning column is present that is populated when an SRA-Lite file is detected. +
+ | **Variable** | **Type** | **Description** | **Production Status** | |---|---|---|---| | read1 | File | File containing the forward reads | Always produced | @@ -51,6 +57,8 @@ Given the lack of usefulness of SRA Lite formatted FASTQ files, we try to avoid | fastq_dl_version | String | Fastq_dl version used | Always produced | | fastq_dl_warning | String | This warning field is populated if SRA-Lite files are detected. These files contain all quality encoding as Phred-30 or Phred-3. | Depends on internal workflow logic | +
+ ## References > This workflow relies on [fastq-dl](https://github.com/rpetit3/fastq-dl), a very handy bioinformatics tool by Robert A. Petit III diff --git a/docs/workflows/genomic_characterization/freyja.md b/docs/workflows/genomic_characterization/freyja.md index 004013e30..b3e2f4b6f 100644 --- a/docs/workflows/genomic_characterization/freyja.md +++ b/docs/workflows/genomic_characterization/freyja.md @@ -56,6 +56,8 @@ This workflow will copy the Freyja reference files (`usher_barcodes.feather` and We recommend running this workflow with **"Run inputs defined by file paths"** selected since no information from a Terra data table is actually being used. We also recommend turning off call caching so new information is retrieved every time. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | freyja_update | **gcp_uri** | String | The path where you want the Freyja reference files to be stored. Include gs:// at the beginning of the string. Full example with a Terra workspace bucket: "gs://fc-87ddd67a-c674-45a8-9651-f91e3d2f6bb7" | | Required | @@ -68,6 +70,8 @@ We recommend running this workflow with **"Run inputs defined by file paths"** s | transfer_files | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/utility:1.1" | Optional | | transfer_files | **memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 4 | Optional | +
+ ### Outputs This workflow does not produce any outputs that appear in a Terra data table. The reference files will appear at the location specified with the `gcp_uri` input variable. @@ -90,6 +94,8 @@ The Freyja_FASTQ_PHB workflow is compatible with the multiple input data types: This workflow runs on the sample level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | freyja_fastq | **primer_bed** | File | The bed file containing the primers used when sequencing was performed | | Required | @@ -190,6 +196,8 @@ This workflow runs on the sample level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Freyja_FASTQ Analysis Tasks ??? task "`read_QC_trim_pe` Details" @@ -308,6 +316,8 @@ The main output file used in subsequent Freyja workflows is found under the `fre !!! tip "Click "Ignore empty outputs"" When running the Freyja_FASTQ_PHB workflow, it is recommended to select the "Ignore empty outputs" option in the Terra UI. This will hide the output columns that will not be generated for your input data type. +
+ | **Variable** | **Type** | **Description** | **Input Data Type** | |---|---|---|---| | aligned_bai | File | Index companion file to the bam file generated during the consensus assembly process | ONT, PE, SE | @@ -394,6 +404,8 @@ The main output file used in subsequent Freyja workflows is found under the `fre | trimmomatic_docker | String | Docker container for Trimmomatic | PE, SE | | trimmomatic_version | String | The version of Trimmomatic used | PE, SE | +
+ ## Freyja_Plot_PHB {#freyja_plot} This workflow visualizes aggregated freyja_demixed output files produced by Freyja_FASTQ in a single plot (pdf format) which provides fractional abundance estimates for all aggregated samples. @@ -404,6 +416,8 @@ Options exist to provide lineage-specific breakdowns and/or sample collection ti This workflow runs on the set level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | freyja_plot | **freyja_demixed** | Array[File] | An array containing the output files (freyja_demixed) made by Freyja_FASTQ | | Required | @@ -422,6 +436,8 @@ This workflow runs on the set level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Analysis Tasks ??? task "`freyja_plot_task` Details" @@ -459,6 +475,8 @@ This dashboard is not "live" — that is, you must rerun the workflow every time This workflow runs on the set level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | freyja_dashboard | **collection_date** | Array[String] | An array containing the collection dates for the sample (YYYY-MM-DD format) | | Required | @@ -478,6 +496,8 @@ This workflow runs on the set level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Freyja_Dashboard Tasks ??? task "`freyja_dashboard_task` Details" diff --git a/docs/workflows/genomic_characterization/pangolin_update.md b/docs/workflows/genomic_characterization/pangolin_update.md index b258d2411..988db4404 100644 --- a/docs/workflows/genomic_characterization/pangolin_update.md +++ b/docs/workflows/genomic_characterization/pangolin_update.md @@ -14,6 +14,8 @@ The Pangolin_Update workflow re-runs Pangolin updating prior lineage calls from This workflow runs on the sample level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | pangolin_update | **assembly_fasta** | File | SARS-CoV-2 assembly file in FASTA format | | Required | @@ -42,8 +44,12 @@ This workflow runs on the sample level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | **pango_lineage** | String | Pango lineage as determined by Pangolin | @@ -58,3 +64,5 @@ This workflow runs on the sample level. | **pangolin_update_version** | String | Version of the Public Health Bioinformatics (PHB) repository used | | **pangolin_updates** | String | Result of Pangolin Update (lineage changed versus unchanged) with lineage assignment and date of analysis | | **pangolin_versions** | String | All Pangolin software and database versions | + +
\ No newline at end of file diff --git a/docs/workflows/genomic_characterization/theiacov.md b/docs/workflows/genomic_characterization/theiacov.md index f877ba540..a21d46f89 100644 --- a/docs/workflows/genomic_characterization/theiacov.md +++ b/docs/workflows/genomic_characterization/theiacov.md @@ -108,6 +108,8 @@ All TheiaCoV Workflows (not TheiaCoV_FASTA_Batch) The TheiaCoV_ClearLabs workflow takes in read data produced by the Clear Dx platform from ClearLabs. However, many users use the TheiaCoV_FASTA workflow instead of this one due to a few known issues when generating assemblies with this pipeline that are not present when using ClearLabs-generated FASTA files. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** |* | **Organism** | |---|---|---|---|---|---|---|---| | theiacov_clearlabs | **primer_bed** | File | The bed file containing the primers used when sequencing was performed | | Required | CL | sars-cov-2 | @@ -126,8 +128,8 @@ All TheiaCoV Workflows (not TheiaCoV_FASTA_Batch) | clean_check_reads | **memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 2 | Optional | ONT, PE, SE | HIV, MPXV, WNV, flu, rsv_a, rsv_b, sars-cov-2 | | consensus | **cpu** | Int | Number of CPUs to allocate to the task | 8 | Optional | CL, ONT | sars-cov-2 | | consensus | **disk_size** | Int | Amount of storage (in GB) to allocate to the task | 100 | Optional | CL, ONT | sars-cov-2 | -| consensus | **docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/staphb/artic-ncov2019-epi2me | Optional | ONT | HIV, MPXV, WNV, flu, rsv_a, rsv_b, sars-cov-2 | -| consensus | **medaka_model** | String | In order to obtain the best results, the appropriate model must be set to match the sequencer's basecaller model; this string takes the format of {pore}_{device}_{caller variant}_{caller_version}. See also https://github.com/nanoporetech/medaka?tab=readme-ov-file#models. | r941_min_high_g360 | Optional | CL, ONT | sars-cov-2 | +| consensus | **docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/staphb/artic:1.2.4-1.12.0 | Optional | CL, ONT | HIV, MPXV, WNV, flu, rsv_a, rsv_b, sars-cov-2 | +| consensus | **medaka_model** | String | In order to obtain the best results, the appropriate model must be set to match the sequencer's basecaller model; this string takes the format of {pore}_{device}_{caller variant}_{caller_version}. See the list of available models in the `artic_consensus` documentation section. See also https://github.com/nanoporetech/medaka?tab=readme-ov-file#models. | r941_min_high_g360 | Optional | CL, ONT | sars-cov-2 | | consensus | **memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 16 | Optional | CL, ONT | sars-cov-2 | | consensus_qc | **cpu** | Int | Number of CPUs to allocate to the task | 1 | Optional | CL, FASTA, ONT, PE, SE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | | consensus_qc | **disk_size** | Int | Amount of storage (in GB) to allocate to the task | 100 | Optional | CL, FASTA, ONT, PE, SE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | @@ -409,6 +411,8 @@ All TheiaCoV Workflows (not TheiaCoV_FASTA_Batch) | workflow name | **vadr_skip_length** | Int | Minimum assembly length (unambiguous) to run VADR | 10000 | Optional | FASTA, ONT, PE, SE | MPXV, WNV, flu, rsv_a, rsv_b, sars-cov-2 | | workflow name | **variant_min_freq** | Float | Minimum frequency for a variant to be reported in ivar outputs | 0.6 | Optional | PE, SE | HIV, MPXV, WNV, flu, rsv_a, rsv_b, sars-cov-2 | +
+ ??? toggle "TheiaCoV_FASTA_Batch_PHB Inputs" ##### TheiaCoV_FASTA_Batch Inputs {#theiacov-fasta-batch-inputs} @@ -612,7 +616,7 @@ All input reads are processed through "core tasks" in the TheiaCoV Illumina, ONT ??? task "`screen`: Total Raw Read Quantification and Genome Size Estimation" - The [`screen`](https://github.com/theiagen/public_health_bioinformatics/blob/main/tasks/quality_control/comparisons/task_screen.wdl) task ensures the quantity of sequence data is sufficient to undertake genomic analysis. It uses bash commands for quantification of reads and base pairs, and [mash](https://mash.readthedocs.io/en/latest/index.html) sketching to estimate the genome size and its coverage. At each step, the results are assessed relative to pass/fail criteria and thresholds that may be defined by optional user inputs. Samples that do not meet these criteria will not be processed further by the workflow: + The [`screen`](https://github.com/theiagen/public_health_bioinformatics/blob/main/tasks/quality_control/comparisons/task_screen.wdl) task ensures the quantity of sequence data is sufficient to undertake genomic analysis. It uses [`fastq-scan`](https://github.com/rpetit3/fastq-scan) and bash commands for quantification of reads and base pairs, and [mash](https://mash.readthedocs.io/en/latest/index.html) sketching to estimate the genome size and its coverage. At each step, the results are assessed relative to pass/fail criteria and thresholds that may be defined by optional user inputs. Samples that do not meet these criteria will not be processed further by the workflow: 1. Total number of reads: A sample will fail the read screening task if its total number of reads is less than or equal to `min_reads`. 2. The proportion of basepairs reads in the forward and reverse read files: A sample will fail the read screening if fewer than `min_proportion` basepairs are in either the reads1 or read2 files. @@ -800,6 +804,50 @@ All input reads are processed through "core tasks" in the TheiaCoV Illumina, ONT !!! info "" Read-trimming is performed on raw read data generated on the ClearLabs instrument and thus not a required step in the TheiaCoV_ClearLabs workflow. + + ??? toggle "Available `medaka` models" + The medaka models available in the default docker container are as follows: + + ``` bash + r103_fast_g507, r103_fast_snp_g507, r103_fast_variant_g507, r103_hac_g507, + r103_hac_snp_g507, r103_hac_variant_g507, r103_min_high_g345, r103_min_high_g360, + r103_prom_high_g360, r103_prom_snp_g3210, r103_prom_variant_g3210, r103_sup_g507, + r103_sup_snp_g507, r103_sup_variant_g507, r1041_e82_260bps_fast_g632, + r1041_e82_260bps_fast_variant_g632, r1041_e82_260bps_hac_g632, + r1041_e82_260bps_hac_v4.0.0, r1041_e82_260bps_hac_v4.1.0, + r1041_e82_260bps_hac_variant_g632, r1041_e82_260bps_hac_variant_v4.1.0, + r1041_e82_260bps_joint_apk_ulk_v5.0.0, r1041_e82_260bps_sup_g632, + r1041_e82_260bps_sup_v4.0.0, r1041_e82_260bps_sup_v4.1.0, + r1041_e82_260bps_sup_variant_g632, r1041_e82_260bps_sup_variant_v4.1.0, + r1041_e82_400bps_fast_g615, r1041_e82_400bps_fast_g632, + r1041_e82_400bps_fast_variant_g615, r1041_e82_400bps_fast_variant_g632, + r1041_e82_400bps_hac_g615, r1041_e82_400bps_hac_g632, r1041_e82_400bps_hac_v4.0.0, + r1041_e82_400bps_hac_v4.1.0, r1041_e82_400bps_hac_v4.2.0, r1041_e82_400bps_hac_v4.3.0, + r1041_e82_400bps_hac_v5.0.0, r1041_e82_400bps_hac_variant_g615, + r1041_e82_400bps_hac_variant_g632, r1041_e82_400bps_hac_variant_v4.1.0, + r1041_e82_400bps_hac_variant_v4.2.0, r1041_e82_400bps_hac_variant_v4.3.0, + r1041_e82_400bps_hac_variant_v5.0.0, r1041_e82_400bps_sup_g615, + r1041_e82_400bps_sup_v4.0.0, r1041_e82_400bps_sup_v4.1.0, r1041_e82_400bps_sup_v4.2.0, + r1041_e82_400bps_sup_v4.3.0, r1041_e82_400bps_sup_v5.0.0, + r1041_e82_400bps_sup_variant_g615, r1041_e82_400bps_sup_variant_v4.1.0, + r1041_e82_400bps_sup_variant_v4.2.0, r1041_e82_400bps_sup_variant_v4.3.0, + r1041_e82_400bps_sup_variant_v5.0.0, r104_e81_fast_g5015, r104_e81_fast_variant_g5015, + r104_e81_hac_g5015, r104_e81_hac_variant_g5015, r104_e81_sup_g5015, r104_e81_sup_g610, + r104_e81_sup_variant_g610, r10_min_high_g303, r10_min_high_g340, r941_e81_fast_g514, + r941_e81_fast_variant_g514, r941_e81_hac_g514, r941_e81_hac_variant_g514, + r941_e81_sup_g514, r941_e81_sup_variant_g514, r941_min_fast_g303, r941_min_fast_g507, + r941_min_fast_snp_g507, r941_min_fast_variant_g507, r941_min_hac_g507, + r941_min_hac_snp_g507, r941_min_hac_variant_g507, r941_min_high_g303, r941_min_high_g330, + r941_min_high_g340_rle, r941_min_high_g344, r941_min_high_g351, r941_min_high_g360, + r941_min_sup_g507, r941_min_sup_snp_g507, r941_min_sup_variant_g507, r941_prom_fast_g303, + r941_prom_fast_g507, r941_prom_fast_snp_g507, r941_prom_fast_variant_g507, + r941_prom_hac_g507, r941_prom_hac_snp_g507, r941_prom_hac_variant_g507, + r941_prom_high_g303, r941_prom_high_g330, r941_prom_high_g344, r941_prom_high_g360, + r941_prom_high_g4011, r941_prom_snp_g303, r941_prom_snp_g322, r941_prom_snp_g360, + r941_prom_sup_g507, r941_prom_sup_snp_g507, r941_prom_sup_variant_g507, + r941_prom_variant_g303, r941_prom_variant_g322, r941_prom_variant_g360, + r941_sup_plant_g610, r941_sup_plant_variant_g610 + ``` General statistics about the assembly are generated with the `consensus_qc` task ([task_assembly_metrics.wdl](https://github.com/theiagen/public_health_bioinformatics/blob/main/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl)). @@ -948,6 +996,8 @@ All input reads are processed through "core tasks" in the TheiaCoV Illumina, ONT All TheiaCoV Workflows (not TheiaCoV_FASTA_Batch) +
+ | **Variable** | **Type** | **Description** | **Workflow** | |---|---|---|---| | abricate_flu_database | String | ABRicate database used for analysis | FASTA, ONT, PE | @@ -959,7 +1009,7 @@ All TheiaCoV Workflows (not TheiaCoV_FASTA_Batch) | aligned_bam | File | Primer-trimmed BAM file; generated during consensus assembly process | CL, ONT, PE, SE | | artic_docker | String | Docker image utilized for read trimming and consensus genome assembly | CL, ONT | | artic_version | String | Version of the Artic software utilized for read trimming and conesnsus genome assembly | CL, ONT | -| assembly_fasta | File | Consensus genome assembly; for lower quality flu samples, the output may state "Assembly could not be generated" when there is too little and/or too low quality data for IRMA to produce an assembly. Contigs will be ordered from smallest to largest when IRMA is used. | CL, ONT, PE, SE | +| assembly_fasta | File | Consensus genome assembly; for lower quality flu samples, the output may state "Assembly could not be generated" when there is too little and/or too low quality data for IRMA to produce an assembly. Contigs will be ordered from largest to smallest when IRMA is used. | CL, ONT, PE, SE | | assembly_length_unambiguous | Int | Number of unambiguous basecalls within the consensus assembly | CL, FASTA, ONT, PE, SE | | assembly_mean_coverage | Float | Mean sequencing depth throughout the consensus assembly. Generated after performing primer trimming and calculated using the SAMtools coverage command | CL, ONT, PE, SE | | assembly_method | String | Method employed to generate consensus assembly | CL, FASTA, ONT, PE, SE | @@ -1159,6 +1209,8 @@ All TheiaCoV Workflows (not TheiaCoV_FASTA_Batch) | vadr_num_alerts | String | Number of fatal alerts as determined by VADR | CL, FASTA, ONT, PE, SE | | variants_from_ref_vcf | File | Number of variants relative to the reference genome | CL | +
+ ??? toggle "TheiaCoV_FASTA_Batch_PHB Outputs" ##### TheiaCoV_FASTA_Batch Outputs {#theiacov-fasta-batch-outputs} @@ -1173,4 +1225,4 @@ All TheiaCoV Workflows (not TheiaCoV_FASTA_Batch) | nextclade_tsv | File | Output Nextclade TSV file that contains results for all samples included in the workflow | | pango_lineage_report | File | Output Pangolin CSV file that contains results for all samples included in the workflow | | theiacov_fasta_batch_analysis_date | String | Date that the workflow was run. | - | theiacov_fasta_batch_version | String | Version of the workflow that was used. | + | theiacov_fasta_batch_version | String | Version of the workflow that was used. | \ No newline at end of file diff --git a/docs/workflows/genomic_characterization/theiaeuk.md b/docs/workflows/genomic_characterization/theiaeuk.md index a594dce3b..19141cd05 100644 --- a/docs/workflows/genomic_characterization/theiaeuk.md +++ b/docs/workflows/genomic_characterization/theiaeuk.md @@ -1,4 +1,4 @@ -# TheiaEuk +# TheiaEuk Workflow Series ## Quick Facts @@ -23,6 +23,8 @@ All input reads are processed through "core tasks" in each workflow. The core ta By default, the workflow anticipates 2 x 150bp reads (i.e. the input reads were generated using a 300-cycle sequencing kit). Modifications to the optional parameter for `trim_minlen` may be required to accommodate shorter read data, such as the 2 x 75bp reads generated using a 150-cycle sequencing kit. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | theiaeuk_pe | **read1** | File | Unprocessed Illumina forward read file | | Required | @@ -173,6 +175,8 @@ All input reads are processed through "core tasks" in each workflow. The core ta | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Workflow tasks (performed for all taxa) ??? task "`versioning`: Version capture for TheiaEuk" @@ -187,7 +191,7 @@ All input reads are processed through "core tasks" in each workflow. The core ta ??? task "`screen`: Total Raw Read Quantification and Genome Size Estimation" - The [`screen`](https://github.com/theiagen/public_health_bioinformatics/blob/main/tasks/quality_control/comparisons/task_screen.wdl) task ensures the quantity of sequence data is sufficient to undertake genomic analysis. It uses bash commands for quantification of reads and base pairs, and [mash](https://mash.readthedocs.io/en/latest/index.html) sketching to estimate the genome size and its coverage. At each step, the results are assessed relative to pass/fail criteria and thresholds that may be defined by optional user inputs. Samples that do not meet these criteria will not be processed further by the workflow: + The [`screen`](https://github.com/theiagen/public_health_bioinformatics/blob/main/tasks/quality_control/comparisons/task_screen.wdl) task ensures the quantity of sequence data is sufficient to undertake genomic analysis. It uses [`fastq-scan`](https://github.com/rpetit3/fastq-scan) and bash commands for quantification of reads and base pairs, and [mash](https://mash.readthedocs.io/en/latest/index.html) sketching to estimate the genome size and its coverage. At each step, the results are assessed relative to pass/fail criteria and thresholds that may be defined by optional user inputs. Samples that do not meet these criteria will not be processed further by the workflow: 1. Total number of reads: A sample will fail the read screening task if its total number of reads is less than or equal to `min_reads`. 2. The proportion of basepairs reads in the forward and reverse read files: A sample will fail the read screening if fewer than `min_proportion` basepairs are in either the reads1 or read2 files. @@ -389,36 +393,57 @@ The TheiaEuk workflow automatically activates taxa-specific tasks after identifi Two tools are deployed when _Candida auris is_ identified. First, the Cladetyping tool is launched to determine the clade of the specimen by comparing the sequence to five clade-specific reference files. The output of the clade typing task will be used to specify the reference genome for the antifungal resistance detection tool. To detect mutations that may confer antifungal resistance, `Snippy` is used to find all variants relative to the clade-specific reference, then these variants are queried for product names associated with resistance according to the MARDy database (). - **Default reference genomes used for clade typing and antimicrobial resistance gene detection of C. auris** - - | Clade | Genome Accession | Assembly Name | Strain | NCBI Submitter | Included mutations in AMR genes (not comprehensive) | - | --- | --- | --- | --- | --- | --- | - | Candida auris Clade I | GCA_002759435.2 | Cand_auris_B8441_V2 | B8441 | Centers for Disease Control and Prevention | | - | Candida auris Clade II | GCA_003013715.2 | ASM301371v2 | B11220 | Centers for Disease Control and Prevention | | - | Candida auris Clade III | GCA_002775015.1 | Cand_auris_B11221_V1 | B11221 | Centers for Disease Control and Prevention | _ERG11_ V125A/F126L | - | Candida auris Clade IV | GCA_003014415.1 | Cand_auris_B11243 | B11243 | Centers for Disease Control and Prevention | _ERG11_ Y132F | - | Candida auris Clade V | GCA_016809505.1 | ASM1680950v1 | IFRC2087 | Centers for Disease Control and Prevention | | - The genes in which there are known resistance-conferring mutations for this pathogen are: - FKS1 - ERG11 (lanosterol 14-alpha demethylase) - FUR1 (uracil phosphoribosyltransferase) - Mutations in these genes that are known to confer resistance are shown below (source: MARDy database http://mardy.dide.ic.ac.uk/index.php) - - | **Organism** | **Found in** | **Gene name** | **Gene locus** | **AA mutation** | **Drug** | **Tandem repeat name** | **Tandem repeat sequence** | **Reference** | - | --- | --- | --- | --- | --- | --- | --- | --- | --- | - | **Candida auris** | **Human** | **ERG11** | | **Y132F** | **Fluconazole** | | | [**10.1093/cid/ciw691**](https://academic.oup.com/cid/article/64/2/134/2706620/Simultaneous-Emergence-of-Multidrug-Resistant) | - | **Candida auris** | **Human** | **ERG11** | | **K143R** | **Fluconazole** | | | [**10.1093/cid/ciw691**](https://academic.oup.com/cid/article/64/2/134/2706620/Simultaneous-Emergence-of-Multidrug-Resistant) | - | **Candida auris** | **Human** | **ERG11** | | **F126T** | **Fluconazole** | | | [**10.1093/cid/ciw691**](https://academic.oup.com/cid/article/64/2/134/2706620/Simultaneous-Emergence-of-Multidrug-Resistant) | - | **Candida auris** | **Human** | **FKS1** | | **S639P** | **Micafungin** | | | [**10.1016/j.diagmicrobio.2017.10.021**](https://www.sciencedirect.com/science/article/pii/S0732889317303498) | - | **Candida auris** | **Human** | **FKS1** | | **S639P** | **Caspofungin** | | | [**10.1016/j.diagmicrobio.2017.10.021**](https://www.sciencedirect.com/science/article/pii/S0732889317303498) | - | **Candida auris** | **Human** | **FKS1** | | **S639P** | **Anidulafungin** | | | [**10.1016/j.diagmicrobio.2017.10.021**](https://www.sciencedirect.com/science/article/pii/S0732889317303498) | - | **Candida auris** | **Human** | **FKS1** | | **S639F** | **Micafungin** | | | [**10.1093/jac/dkx480**](https://academic.oup.com/jac/advance-article/doi/10.1093/jac/dkx480/4794718) | - | **Candida auris** | **Human** | **FKS1** | | **S639F** | **Caspofungin** | | | [**10.1093/jac/dkx480**](https://academic.oup.com/jac/advance-article/doi/10.1093/jac/dkx480/4794718) | - | **Candida auris** | **Human** | **FKS1** | | **S639F** | **Anidulafungin** | | | [**10.1093/jac/dkx480**](https://academic.oup.com/jac/advance-article/doi/10.1093/jac/dkx480/4794718) | - | **Candida auris** | **Human** | **FUR1** | **CAMJ_004922** | **F211I** | **5-flucytosine** | | | [**https://doi.org/10.1038/s41426-018-0045-x**](https://www.nature.com/articles/s41426-018-0045-x) | + We query `Snippy` results to see if any mutations were identified in those genes. In addition, _C. auris_ automatically checks for the following loci. You will find the mutations next to the locus tag in the `theiaeuk_snippy_variants_hits` column corresponding gene name followings: + + | **TheiaEuk Search Term** | **Corresponding Gene Name** | + |---|---| + | B9J08_005340 | ERG6 | + | B9J08_000401 | FLO8 | + | B9J08_005343 | Hypothetical protein (PSK74852) | + | B9J08_003102 | MEC3 | + | B9J08_003737 | ERG3 | + | lanosterol.14-alpha.demethylase | ERG11 | + | uracil.phosphoribosyltransferase | FUR1 | + | FKS1 | FKS1 | + + For example, one sample may have the following output for the `theiaeuk_snippy_variants_hits` column: + + ```plaintext + lanosterol.14-alpha.demethylase: lanosterol 14-alpha demethylase (missense_variant c.428A>G p.Lys143Arg; C:266 T:0),B9J08_000401: hypothetical protein (stop_gained c.424C>T p.Gln142*; A:70 G:0) + ``` + + Based on this, we can tell that ERG11 has a missense variant at position 143 (Lysine to Arginine) and B9J08_000401 (which is FLO8) has a stop-gained variant at position 142 (Glutamine to Stop). + + ??? toggle "Default reference genomes used for clade typing and antimicrobial resistance gene detection of _C. auris_" + | Clade | Genome Accession | Assembly Name | Strain | NCBI Submitter | Included mutations in AMR genes (not comprehensive) | + | --- | --- | --- | --- | --- | --- | + | Candida auris Clade I | GCA_002759435.2 | Cand_auris_B8441_V2 | B8441 | Centers for Disease Control and Prevention | | + | Candida auris Clade II | GCA_003013715.2 | ASM301371v2 | B11220 | Centers for Disease Control and Prevention | | + | Candida auris Clade III | GCA_002775015.1 | Cand_auris_B11221_V1 | B11221 | Centers for Disease Control and Prevention | _ERG11_ V125A/F126L | + | Candida auris Clade IV | GCA_003014415.1 | Cand_auris_B11243 | B11243 | Centers for Disease Control and Prevention | _ERG11_ Y132F | + | Candida auris Clade V | GCA_016809505.1 | ASM1680950v1 | IFRC2087 | Centers for Disease Control and Prevention | | + + ??? toggle "Known resistance-conferring mutations for _Candida auris_" + Mutations in these genes that are known to confer resistance are shown below (source: MARDy database http://mardy.dide.ic.ac.uk/index.php) + + | **Organism** | **Found in** | **Gene name** | **Gene locus** | **AA mutation** | **Drug** | **Tandem repeat name** | **Tandem repeat sequence** | **Reference** | + | --- | --- | --- | --- | --- | --- | --- | --- | --- | + | **Candida auris** | **Human** | **ERG11** | | **Y132F** | **Fluconazole** | | | [**10.1093/cid/ciw691**](https://academic.oup.com/cid/article/64/2/134/2706620/Simultaneous-Emergence-of-Multidrug-Resistant) | + | **Candida auris** | **Human** | **ERG11** | | **K143R** | **Fluconazole** | | | [**10.1093/cid/ciw691**](https://academic.oup.com/cid/article/64/2/134/2706620/Simultaneous-Emergence-of-Multidrug-Resistant) | + | **Candida auris** | **Human** | **ERG11** | | **F126T** | **Fluconazole** | | | [**10.1093/cid/ciw691**](https://academic.oup.com/cid/article/64/2/134/2706620/Simultaneous-Emergence-of-Multidrug-Resistant) | + | **Candida auris** | **Human** | **FKS1** | | **S639P** | **Micafungin** | | | [**10.1016/j.diagmicrobio.2017.10.021**](https://www.sciencedirect.com/science/article/pii/S0732889317303498) | + | **Candida auris** | **Human** | **FKS1** | | **S639P** | **Caspofungin** | | | [**10.1016/j.diagmicrobio.2017.10.021**](https://www.sciencedirect.com/science/article/pii/S0732889317303498) | + | **Candida auris** | **Human** | **FKS1** | | **S639P** | **Anidulafungin** | | | [**10.1016/j.diagmicrobio.2017.10.021**](https://www.sciencedirect.com/science/article/pii/S0732889317303498) | + | **Candida auris** | **Human** | **FKS1** | | **S639F** | **Micafungin** | | | [**10.1093/jac/dkx480**](https://academic.oup.com/jac/advance-article/doi/10.1093/jac/dkx480/4794718) | + | **Candida auris** | **Human** | **FKS1** | | **S639F** | **Caspofungin** | | | [**10.1093/jac/dkx480**](https://academic.oup.com/jac/advance-article/doi/10.1093/jac/dkx480/4794718) | + | **Candida auris** | **Human** | **FKS1** | | **S639F** | **Anidulafungin** | | | [**10.1093/jac/dkx480**](https://academic.oup.com/jac/advance-article/doi/10.1093/jac/dkx480/4794718) | + | **Candida auris** | **Human** | **FUR1** | **CAMJ_004922** | **F211I** | **5-flucytosine** | | | [**https://doi.org/10.1038/s41426-018-0045-x**](https://www.nature.com/articles/s41426-018-0045-x) | ??? toggle "_Candida albicans_" @@ -451,6 +476,8 @@ The TheiaEuk workflow automatically activates taxa-specific tasks after identifi ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | cg_pipeline_docker | String | Docker file used for running CG-Pipeline on cleaned reads | @@ -508,3 +535,5 @@ The TheiaEuk workflow automatically activates taxa-specific tasks after identifi | seq_platform | String | Sequencing platform inout by the user | | theiaeuk_illumina_pe_analysis_date | String | Date of TheiaProk workflow execution | | theiaeuk_illumina_pe_version | String | TheiaProk workflow version used | + +
\ No newline at end of file diff --git a/docs/workflows/genomic_characterization/theiameta.md b/docs/workflows/genomic_characterization/theiameta.md index e9fd004e7..55c26d9a6 100644 --- a/docs/workflows/genomic_characterization/theiameta.md +++ b/docs/workflows/genomic_characterization/theiameta.md @@ -23,6 +23,8 @@ TheiaMeta can use one of two distinct methods for generating and processing the The TheiaMeta_Illumina_PE workflow processes Illumina paired-end (PE) reads generated for metagenomic characterization (typically by shotgun). By default, this workflow will assume that input reads were generated using a 300-cycle sequencing kit (i.e. 2 x 150 bp reads). Modifications to the optional parameter for `trim_minlen` may be required to accommodate shorter read data, such as 2 x 75bp reads generated using a 150-cycle sequencing kit. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | theiameta_illumina_pe | **read1** | File | Forward Illumina read in FASTQ file format | | Required | @@ -121,6 +123,8 @@ The TheiaMeta_Illumina_PE workflow processes Illumina paired-end (PE) reads ge | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Workflow Tasks ??? task "`versioning`: Version Capture for TheiaMeta" @@ -276,6 +280,8 @@ The TheiaMeta_Illumina_PE workflow processes Illumina paired-end (PE) reads ge ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | assembly_fasta | File | Final assembly (MAG) | @@ -350,6 +356,8 @@ The TheiaMeta_Illumina_PE workflow processes Illumina paired-end (PE) reads ge | trimmomatic_docker | String | Docker image of trimmomatic | | trimmomatic_version | String | Version of trimmomatic used | +
+ ## References > **Human read removal tool (HRRT)**: diff --git a/docs/workflows/genomic_characterization/theiaprok.md b/docs/workflows/genomic_characterization/theiaprok.md index 3e9329a3b..188a8a7c5 100644 --- a/docs/workflows/genomic_characterization/theiaprok.md +++ b/docs/workflows/genomic_characterization/theiaprok.md @@ -45,6 +45,8 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al The TheiaProk_FASTA workflow takes in assembly files in FASTA format. +
+ | **Terra Task name** | **Variable** | **Type** | **Description** | **Default value** | **Terra Status** | **Workflow** | |---|---|---|---|---|---|---| | *workflow name | **samplename** | String | Name of sample to be analyzed | | Required | FASTA, ONT, PE, SE | @@ -569,6 +571,8 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | FASTA, ONT, PE, SE | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | FASTA, ONT, PE, SE | +
+ !!! tip "Skip Characterization" Ever wanted to skip characterization? Now you can! Set the optional input `perform_characterization` to **`false`** to only generate an assembly and run assembly QC. @@ -586,7 +590,7 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al ??? task "`screen`: Total Raw Read Quantification and Genome Size Estimation" - The [`screen`](https://github.com/theiagen/public_health_bioinformatics/blob/main/tasks/quality_control/comparisons/task_screen.wdl) task ensures the quantity of sequence data is sufficient to undertake genomic analysis. It uses bash commands for quantification of reads and base pairs, and [mash](https://mash.readthedocs.io/en/latest/index.html) sketching to estimate the genome size and its coverage. At each step, the results are assessed relative to pass/fail criteria and thresholds that may be defined by optional user inputs. Samples that do not meet these criteria will not be processed further by the workflow: + The [`screen`](https://github.com/theiagen/public_health_bioinformatics/blob/main/tasks/quality_control/comparisons/task_screen.wdl) task ensures the quantity of sequence data is sufficient to undertake genomic analysis. It uses [`fastq-scan`](https://github.com/rpetit3/fastq-scan) and bash commands for quantification of reads and base pairs, and [mash](https://mash.readthedocs.io/en/latest/index.html) sketching to estimate the genome size and its coverage. At each step, the results are assessed relative to pass/fail criteria and thresholds that may be defined by optional user inputs. Samples that do not meet these criteria will not be processed further by the workflow: 1. Total number of reads: A sample will fail the read screening task if its total number of reads is less than or equal to `min_reads`. 2. The proportion of basepairs reads in the forward and reverse read files: A sample will fail the read screening if fewer than `min_proportion` basepairs are in either the reads1 or read2 files. @@ -1579,6 +1583,8 @@ The TheiaProk workflows automatically activate taxa-specific sub-workflows after ### Outputs +
+ | **Variable** | **Type** | **Description** | **Workflow** | |---|---|---|---| | abricate_abaum_database | String | Database of reference A. baumannii plasmid typing genes used for plasmid typing | FASTA, ONT, PE, SE | @@ -1976,4 +1982,6 @@ The TheiaProk workflows automatically activate taxa-specific sub-workflows after | ts_mlst_version | String | Version of Torsten Seeman’s MLST tool used | FASTA, ONT, PE, SE | | virulencefinder_docker | String | VirulenceFinder docker image used | FASTA, ONT, PE, SE | | virulencefinder_hits | String | Virulence genes detected by VirulenceFinder | FASTA, ONT, PE, SE | -| virulencefinder_report_tsv | File | Output TSV file created by VirulenceFinder | FASTA, ONT, PE, SE | \ No newline at end of file +| virulencefinder_report_tsv | File | Output TSV file created by VirulenceFinder | FASTA, ONT, PE, SE | + +
\ No newline at end of file diff --git a/docs/workflows/genomic_characterization/vadr_update.md b/docs/workflows/genomic_characterization/vadr_update.md index 4743222fb..ceaa45fa8 100644 --- a/docs/workflows/genomic_characterization/vadr_update.md +++ b/docs/workflows/genomic_characterization/vadr_update.md @@ -29,6 +29,8 @@ Please note the default values are for SARS-CoV-2. This workflow runs on the sample level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | vadr_update | **assembly_length_unambiguous** | Int | Number of unambiguous basecalls within the consensus assembly | | Required | @@ -44,6 +46,8 @@ This workflow runs on the sample level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs | **Variable** | **Type** | **Description** | diff --git a/docs/workflows/phylogenetic_construction/augur.md b/docs/workflows/phylogenetic_construction/augur.md index a3300da5a..7ccf78d56 100644 --- a/docs/workflows/phylogenetic_construction/augur.md +++ b/docs/workflows/phylogenetic_construction/augur.md @@ -30,6 +30,8 @@ The Augur_Prep_PHB workflow takes assembly FASTA files and associated metadata f This workflow runs on the sample level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | augur_prep | **assembly** | File | Assembly/consensus file (single FASTA file per sample) | | Required | @@ -48,6 +50,8 @@ This workflow runs on the sample level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ #### Augur_Prep Outputs | **Variable** | **Type** | **Description** | @@ -170,6 +174,8 @@ The Augur_PHB workflow takes in a ***set*** of SARS-CoV-2 (or any other viral This workflow runs on the set level. Please note that for every task, runtime parameters are modifiable (cpu, disk_size, docker, and memory); most of these values have been excluded from the table below for convenience. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | augur | **assembly_fastas** | Array[File] | An array of the assembly files to use; use either the HA or NA segment for flu samples | | Required | @@ -235,6 +241,8 @@ This workflow runs on the set level. Please note that for every task, runtime pa | mutation_context | **docker** | String | Docker image used for the mutation_context task that is specific to Mpox. Do not modify. | us-docker.pkg.dev/general-theiagen/theiagen/nextstrain-mpox-mutation-context:2024-06-27 | Do Not Modify, Optional | | mutation_context | **memory** | Int | Memory size in GB requested for the mutation_context task that is specific to Mpox. | 4 | Optional | +
+ ??? task "Workflow Tasks" ##### Augur Workflow Tasks {#augur-tasks} diff --git a/docs/workflows/phylogenetic_construction/core_gene_snp.md b/docs/workflows/phylogenetic_construction/core_gene_snp.md index 9931522d7..a79cdc225 100644 --- a/docs/workflows/phylogenetic_construction/core_gene_snp.md +++ b/docs/workflows/phylogenetic_construction/core_gene_snp.md @@ -22,6 +22,8 @@ For further detail regarding Pirate options, please see [PIRATE's documentation) This workflow runs on the set level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | core_gene_snp_workflow | **cluster_name** | String | Name of sample set | | Required | @@ -84,6 +86,8 @@ This workflow runs on the set level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Workflow Tasks By default, the Core_Gene_SNP workflow will begin by analyzing the input sample set using [PIRATE](https://github.com/SionBayliss/PIRATE). Pirate takes in GFF3 files and classifies the genes into gene families by sequence identity, outputting a pangenome summary file. The workflow will instruct Pirate to create core gene and pangenome alignments using this gene family data. Setting the "align" input variable to false will turn off this behavior, and the workflow will output only the pangenome summary. The workflow will then use the core gene alignment from `Pirate` to infer a phylogenetic tree using `IQ-TREE`. It will also produce an SNP distance matrix from this alignment using [snp-dists](https://github.com/tseemann/snp-dists). This behavior can be turned off by setting the `core_tree` input variable to false. The workflow will not create a pangenome tree or SNP-matrix by default, but this behavior can be turned on by setting the `pan_tree` input variable to true. @@ -98,6 +102,8 @@ By default, this task appends a Phandango coloring tag to color all items from t ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | core_gene_snp_wf_analysis_date | String | Date of analysis using Core_Gene_SNP workflow | @@ -118,6 +124,8 @@ By default, this task appends a Phandango coloring tag to color all items from t | pirate_snp_dists_version | String | Version of snp-dists used | | pirate_summarized_data | File | The presence/absence matrix generated by the summarize_data task from the list of columns provided | +
+ ## References >Sion C Bayliss, Harry A Thorpe, Nicola M Coyle, Samuel K Sheppard, Edward J Feil, PIRATE: A fast and scalable pangenomics toolbox for clustering diverged orthologues in bacteria, *GigaScience*, Volume 8, Issue 10, October 2019, giz119,  diff --git a/docs/workflows/phylogenetic_construction/czgenepi_prep.md b/docs/workflows/phylogenetic_construction/czgenepi_prep.md index 1c2b88804..27b0913b6 100644 --- a/docs/workflows/phylogenetic_construction/czgenepi_prep.md +++ b/docs/workflows/phylogenetic_construction/czgenepi_prep.md @@ -18,6 +18,8 @@ Variables with both the "Optional" and "Required" tag require the column (regard This workflow runs on the set level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | czgenepi_prep | **sample_names** | Array[String] | The array of sample ids you want to prepare for CZ GEN EPI | | Required | @@ -46,6 +48,8 @@ This workflow runs on the set level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs The concatenated_czgenepi_fasta and concatenated_czgenepi_metadata files can be uploaded directly to CZ GEN EPI without any adjustments. diff --git a/docs/workflows/phylogenetic_construction/find_shared_variants.md b/docs/workflows/phylogenetic_construction/find_shared_variants.md index 49908c0e0..2e4767c46 100644 --- a/docs/workflows/phylogenetic_construction/find_shared_variants.md +++ b/docs/workflows/phylogenetic_construction/find_shared_variants.md @@ -20,6 +20,8 @@ The primary intended input of the workflow is the `snippy_variants_results` outp All variant data included in the sample set should be generated from aligning sequencing reads to the **same reference genome**. If variant data was generated using different reference genomes, shared variants cannot be identified and results will be less useful. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | | --- | --- | --- | --- | --- | --- | | shared_variants_wf | **concatenated_file_name** | String | String of your choice to prefix output files | | Required | @@ -33,6 +35,8 @@ All variant data included in the sample set should be generated from aligning se | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Tasks ??? task "Concatenate Variants" diff --git a/docs/workflows/phylogenetic_construction/ksnp3.md b/docs/workflows/phylogenetic_construction/ksnp3.md index e54a0c27b..c749754ac 100644 --- a/docs/workflows/phylogenetic_construction/ksnp3.md +++ b/docs/workflows/phylogenetic_construction/ksnp3.md @@ -19,6 +19,8 @@ You can learn more about the kSNP3 workflow, including how to visualize the outp ### Inputs +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | ksnp3_workflow | **assembly_fasta** | Array[File] | The assembly files to be analyzed | | Required | @@ -62,6 +64,8 @@ You can learn more about the kSNP3 workflow, including how to visualize the outp | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Workflow Actions The `ksnp3` workflow is run on the set of assembly files to produce both pan-genome and core-genome phylogenies. This also results in alignment files which - are used by [`snp-dists`](https://github.com/tseemann/snp-dists) to produce a pairwise SNP distance matrix for both the pan-genome and core-genomes. @@ -86,6 +90,8 @@ If you fill out the `data_summary_*` and `sample_names` optional variables, you ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | ksnp3_core_snp_matrix | File | The SNP matrix made with the core genome; formatted for Phandango if `phandango_coloring` input is `true` | @@ -109,6 +115,8 @@ If you fill out the `data_summary_*` and `sample_names` optional variables, you | ksnp3_wf_analysis_date | String | The date the workflow was run | | ksnp3_wf_version | String | The version of the repository the workflow is hosted in | +
+ ## References >Shea N Gardner, Tom Slezak, Barry G. Hall, kSNP3.0: SNP detection and phylogenetic analysis of genomes without genome alignment or reference genome, *Bioinformatics*, Volume 31, Issue 17, 1 September 2015, Pages 2877–2878,  diff --git a/docs/workflows/phylogenetic_construction/lyve_set.md b/docs/workflows/phylogenetic_construction/lyve_set.md index 4476cf02b..c2eef8818 100644 --- a/docs/workflows/phylogenetic_construction/lyve_set.md +++ b/docs/workflows/phylogenetic_construction/lyve_set.md @@ -17,6 +17,8 @@ The Lyve_SET WDL workflow runs the [Lyve-SET](https://github.com/lskatz/lyve-SET ### Inputs +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | lyveset_workflow | **dataset_name** | String | Free text string used to label output files | | Required | @@ -45,6 +47,8 @@ The Lyve_SET WDL workflow runs the [Lyve-SET](https://github.com/lskatz/lyve-SET | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Workflow Actions The Lyve_SET WDL workflow is run using read data from a set of samples. The workflow will produce a pairwise SNP matrix for the sample set and a maximum likelihood phylogenetic tree. Details regarding the default implementation of Lyve_SET and optional modifications are listed below. diff --git a/docs/workflows/phylogenetic_construction/mashtree_fasta.md b/docs/workflows/phylogenetic_construction/mashtree_fasta.md index 54751e398..d8d5e79b5 100644 --- a/docs/workflows/phylogenetic_construction/mashtree_fasta.md +++ b/docs/workflows/phylogenetic_construction/mashtree_fasta.md @@ -16,6 +16,8 @@ This workflow also features an optional module, `summarize_data`, that creates a ### Inputs +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | mashtree_fasta | **assembly_fasta** | Array[File] | The set of assembly fastas | | Required | @@ -49,6 +51,8 @@ This workflow also features an optional module, `summarize_data`, that creates a | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Workflow Actions `MashTree_Fasta` is run on a set of assembly fastas and creates a phylogenetic tree and matrix. These outputs are passed to a task that will rearrange the matrix to match the order of the terminal ends in the phylogenetic tree. @@ -63,6 +67,8 @@ By default, this task appends a Phandango coloring tag to color all items from t ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | mashtree_docker | String | The Docker image used to run the mashtree task | @@ -74,6 +80,8 @@ By default, this task appends a Phandango coloring tag to color all items from t | mashtree_wf_analysis_date | String | The date the workflow was run | | mashtree_wf_version | String | The version of PHB the workflow is hosted in | +
+ ## References > Katz, L. S., Griswold, T., Morrison, S., Caravas, J., Zhang, S., den Bakker, H.C., Deng, X., and Carleton, H. A., (2019). Mashtree: a rapid comparison of whole genome sequence files. Journal of Open Source Software, 4(44), 1762,  diff --git a/docs/workflows/phylogenetic_construction/snippy_streamline.md b/docs/workflows/phylogenetic_construction/snippy_streamline.md index 744b59482..ed70f28be 100644 --- a/docs/workflows/phylogenetic_construction/snippy_streamline.md +++ b/docs/workflows/phylogenetic_construction/snippy_streamline.md @@ -65,6 +65,8 @@ To run Snippy_Streamline, either a reference genome must be provided (`reference - Using the core genome - `core_genome` = true (as default) +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | snippy_streamline | **read1** | Array[File] | The forward read files | | Required | @@ -133,6 +135,8 @@ To run Snippy_Streamline, either a reference genome must be provided (`reference | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Workflow Tasks For automatic reference selection by the workflow (optional): @@ -179,6 +183,8 @@ For all cases: ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | snippy_centroid_docker | String | Docker file used for Centroid | @@ -223,3 +229,5 @@ For all cases: | snippy_variants_snippy_docker | Array[String] | Docker file used for Snippy in the Snippy_Variants subworkfow | | snippy_variants_snippy_version | Array[String] | Version of Snippy_Tree subworkflow used | | snippy_wg_snp_matrix | File | CSV file of whole genome pairwise SNP distances between samples, calculated from the final alignment | + +
\ No newline at end of file diff --git a/docs/workflows/phylogenetic_construction/snippy_streamline_fasta.md b/docs/workflows/phylogenetic_construction/snippy_streamline_fasta.md index 11f482891..0e9680518 100644 --- a/docs/workflows/phylogenetic_construction/snippy_streamline_fasta.md +++ b/docs/workflows/phylogenetic_construction/snippy_streamline_fasta.md @@ -39,6 +39,8 @@ The `Snippy_Streamline_FASTA` workflow is an all-in-one approach to generating a ### Inputs +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | snippy_streamline_fasta | **assembly_fasta** | Array[File] | The assembly files for your samples | | Required | @@ -107,8 +109,12 @@ The `Snippy_Streamline_FASTA` workflow is an all-in-one approach to generating a | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | snippy_centroid_docker | String | Docker file used for Centroid | @@ -151,3 +157,5 @@ The `Snippy_Streamline_FASTA` workflow is an all-in-one approach to generating a | snippy_variants_snippy_docker | Array[String] | Docker file used for Snippy in the Snippy_Variants subworkfow | | snippy_variants_snippy_version | Array[String] | Version of Snippy_Tree subworkflow used | | snippy_wg_snp_matrix | File | CSV file of whole genome pairwise SNP distances between samples, calculated from the final alignment | + +
diff --git a/docs/workflows/phylogenetic_construction/snippy_tree.md b/docs/workflows/phylogenetic_construction/snippy_tree.md index 86a19304c..4c9c7b02b 100644 --- a/docs/workflows/phylogenetic_construction/snippy_tree.md +++ b/docs/workflows/phylogenetic_construction/snippy_tree.md @@ -53,6 +53,8 @@ Sequencing data used in the Snippy_Tree workflow must: - Using the core genome - `core_genome` = true (as default) +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | snippy_tree_wf | **tree_name_updated** | String | Internal component, do not modify. Used for replacing spaces with underscores_ | | Do not modify | @@ -123,6 +125,8 @@ Sequencing data used in the Snippy_Tree workflow must: | wg_snp_dists | **disk_size** | Int | Amount of storage (in GB) to allocate to the task | 50 | Optional | | wg_snp_dists | **memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 2 | Optional | +
+ ### Workflow Tasks ??? task "Snippy" @@ -308,6 +312,8 @@ Sequencing data used in the Snippy_Tree workflow must: ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | snippy_cg_snp_matrix | File | CSV file of core genome pairwise SNP distances between samples, calculated from the final alignment | @@ -336,6 +342,8 @@ Sequencing data used in the Snippy_Tree workflow must: | snippy_tree_version | String | Version of Snippy_Tree workflow | | snippy_wg_snp_matrix | File | CSV file of whole genome pairwise SNP distances between samples, calculated from the final alignment | +
+ ## References > **Gubbins:** Croucher, Nicholas J., Andrew J. Page, Thomas R. Connor, Aidan J. Delaney, Jacqueline A. Keane, Stephen D. Bentley, Julian Parkhill, and Simon R. Harris. 2015. "Rapid Phylogenetic Analysis of Large Samples of Recombinant Bacterial Whole Genome Sequences Using Gubbins." Nucleic Acids Research 43 (3): e15. diff --git a/docs/workflows/phylogenetic_construction/snippy_variants.md b/docs/workflows/phylogenetic_construction/snippy_variants.md index b1fc18885..b62d1fbb3 100644 --- a/docs/workflows/phylogenetic_construction/snippy_variants.md +++ b/docs/workflows/phylogenetic_construction/snippy_variants.md @@ -29,6 +29,8 @@ The `Snippy_Variants` workflow aligns single-end or paired-end reads (in FASTQ f !!! info "Query String" The query string can be a gene or any other annotation that matches the GenBank file/output VCF **EXACTLY** +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | snippy_variants_wf | **reference_genome_file** | File | Reference genome (GenBank file or fasta) | | Required | @@ -54,6 +56,8 @@ The `Snippy_Variants` workflow aligns single-end or paired-end reads (in FASTQ f | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Workflow Tasks `Snippy_Variants` uses the snippy tool to align reads to the reference and call SNPs, MNPs and INDELs according to optional input parameters. The output includes a file of variants that is then queried using the `grep` bash command to identify any mutations in specified genes or annotations of interest. The query string MUST match the gene name or annotation as specified in the GenBank file and provided in the output variant file in the `snippy_results` column. @@ -66,6 +70,8 @@ The `Snippy_Variants` workflow aligns single-end or paired-end reads (in FASTQ f !!! warning "Note on coverage calculations" The outputs from `samtools coverage` (found in the `snippy_variants_coverage_tsv` file) may differ from the `snippy_variants_percent_ref_coverage` due to different calculation methods. `samtools coverage` computes genome-wide coverage metrics (e.g., the proportion of bases covered at depth ≥ 1), while `snippy_variants_percent_ref_coverage` uses a user-defined minimum coverage threshold (default is 10), calculating the proportion of the reference genome with a depth greater than or equal to this threshold. +
+ | **Variable** | **Type** | **Description** | |---|---|---| | snippy_variants_bai | File | Indexed bam file of the reads aligned to the reference | @@ -85,3 +91,5 @@ The `Snippy_Variants` workflow aligns single-end or paired-end reads (in FASTQ f | snippy_variants_summary | File | A summary TXT fie showing the number of mutations identified for each mutation type | | snippy_variants_version | String | Version of Snippy used | | snippy_variants_wf_version | String | Version of Snippy_Variants used | + +
\ No newline at end of file diff --git a/docs/workflows/phylogenetic_placement/samples_to_ref_tree.md b/docs/workflows/phylogenetic_placement/samples_to_ref_tree.md index 308b02b60..92447e25e 100644 --- a/docs/workflows/phylogenetic_placement/samples_to_ref_tree.md +++ b/docs/workflows/phylogenetic_placement/samples_to_ref_tree.md @@ -17,6 +17,8 @@ However, nextclade can be used on any organism as long as an an existing, high-q ### Inputs +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | nextclade_addToRefTree | **assembly_fasta** | File | A fasta file with query sequence(s) to be placed onto the global tree | | Required | @@ -34,8 +36,12 @@ However, nextclade can be used on any organism as long as an an existing, high-q | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | treeUpdate_auspice_json | File | Phylogenetic tree with user placed samples | @@ -45,3 +51,5 @@ However, nextclade can be used on any organism as long as an an existing, high-q | treeUpdate_nextclade_version | String | Nextclade version used | | samples_to_ref_tree_analysis_date | String | Date of analysis | | samples_to_ref_tree_version | String | Version of the Public Health Bioinformatics (PHB) repository used | + +
diff --git a/docs/workflows/phylogenetic_placement/usher.md b/docs/workflows/phylogenetic_placement/usher.md index b2a016b42..ffe0cf1be 100644 --- a/docs/workflows/phylogenetic_placement/usher.md +++ b/docs/workflows/phylogenetic_placement/usher.md @@ -14,6 +14,8 @@ While this workflow is technically a set-level workflow, it works on the sample-level too. When run on the set-level, the samples are placed with respect to each other. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | usher_workflow | **assembly_fasta** | Array[File] | The assembly files for the samples you want to place on the pre-existing; can either be a set of samples, an individual sample, or multiple individual samples | | Required | @@ -29,8 +31,12 @@ While this workflow is technically a set-level workflow, it works on the sample- | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | usher_clades | File | The clades predicted for the samples | @@ -41,3 +47,5 @@ While this workflow is technically a set-level workflow, it works on the sample- | usher_subtrees | Array[File] | An array of subtrees where your samples have been placed | | usher_uncondensed_tree | File | The entire global tree with your samples included (warning: may be a very large file if the organism is "sars-cov-2") | | usher_version | String | The version of UShER used | + +
diff --git a/docs/workflows/public_data_sharing/mercury_prep_n_batch.md b/docs/workflows/public_data_sharing/mercury_prep_n_batch.md index 77efad884..459d3be88 100644 --- a/docs/workflows/public_data_sharing/mercury_prep_n_batch.md +++ b/docs/workflows/public_data_sharing/mercury_prep_n_batch.md @@ -52,7 +52,7 @@ To help users collect all required metadata, we have created the following Excel The `using_clearlabs_data` and `using_reads_dehosted` arguments change the default values for the `read1_column_name`, `assembly_fasta_column_name`, and `assembly_mean_coverage_column_name` metadata columns. The default values are shown in the table below in addition to what they are changed to depending on what arguments are used. - | Variable | Default Value | with `using_clearlabs_data` | with `using_reads_dehosted` | with both  `using_clearlabs_data` ***and*** `using_reads_dehosted` | + | Variable | Default Value | with `using_clearlabs_data` | with `using_reads_dehosted` | with both  `using_clearlabs_data` **_and_** `using_reads_dehosted` | | --- | --- | --- | --- | --- | | `read1_column_name` | `"read1_dehosted"` | `"clearlabs_fastq_gz"` | `"reads_dehosted"` | `"reads_dehosted"` | | `assembly_fasta_column_name` | `"assembly_fasta"` | `"clearlabs_fasta"` | `"assembly_fasta"` | `"clearlabs_fasta"` | @@ -60,14 +60,19 @@ To help users collect all required metadata, we have created the following Excel ### Inputs +!!! tip "Use the sample table for the `terra_table_name` input" + Make sure your entry for `terra_table_name` is for the _sample_ table! While the root entity needs to be the set table, the input value for `terra_table_name` should be the sample table. + This workflow runs on the set-level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | mercury_prep_n_batch | **gcp_bucket_uri** | String | Google bucket where your SRA reads will be temporarily stored before transferring to SRA. Example: "gs://theiagen_sra_transfer" | | Required | | mercury_prep_n_batch | **sample_names** | Array[String] | The samples you want to submit | | Required | -| mercury_prep_n_batch | **terra_project_name** | String | The name of your Terra project. You can find this information in the URL of the webpage of your Terra dashboard. For example, if your URL contains #workspaces/example/my_workspace/ then your project name is example | | Required | -| mercury_prep_n_batch | **terra_table_name** | String | The name of the Terra table where your samples can be found. Do not include the entity: prefix or the _id suffix, just the name of the table as listed in the sidebar on lefthand side of the Terra Data tab. | | Required | +| mercury_prep_n_batch | **terra_project_name** | String | The name of your Terra project. You can find this information in the URL of the webpage of your Terra dashboard. For example, if your URL contains `#workspaces/example/my_workspace/` then your project name is `example` | | Required | +| mercury_prep_n_batch | **terra_table_name** | String | The name of the Terra table where your **samples** can be found. Do not include the `entity:` prefix, the `_id` suffix, or the `_set_id` suffix, just the name of the sample-level data table as listed in the sidebar on lefthand side of the Terra Data tab. | | Required | | mercury_prep_n_batch | **terra_workspace_name** | String | The name of your Terra workspace where your samples can be found. For example, if your URL contains #workspaces/example/my_workspace/ then your project name is my_workspace | | Required | | download_terra_table | **cpu** | Int | Number of CPUs to allocate to the task | 1 | Optional | | download_terra_table | **disk_size** | Int | Amount of storage (in GB) to allocate to the task | 10 | Optional | @@ -101,8 +106,12 @@ This workflow runs on the set-level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | bankit_sqn_to_email | File | **Only for mpox submission**: the sqn file that you will use to submit mpox assembly files to NCBI via email | @@ -117,6 +126,8 @@ This workflow runs on the set-level. | mercury_script_version | String | Version of the Mercury tool that was used in this workflow | | sra_metadata | File | SRA metadata TSV file for upload | +
+ ???+ toggle "An example excluded_samples.tsv file" ##### An example excluded_samples.tsv file {#example-excluded-samples} diff --git a/docs/workflows/public_data_sharing/terra_2_gisaid.md b/docs/workflows/public_data_sharing/terra_2_gisaid.md index 902521641..888177500 100644 --- a/docs/workflows/public_data_sharing/terra_2_gisaid.md +++ b/docs/workflows/public_data_sharing/terra_2_gisaid.md @@ -28,6 +28,8 @@ The optional variable `frameshift_notification` has three options that correspon This workflow runs on the sample level. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | Terra_2_GISAID | **client_id** | String | This value should be filled with the client-ID provided by GISAID | | Required | @@ -43,8 +45,12 @@ This workflow runs on the sample level. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | failed_uploads | Boolean | The metadata for any failed uploads | @@ -52,3 +58,5 @@ This workflow runs on the sample level. | gisaid_logs | File | The log files regarding the submission | | terra_2_gisaid_analysis_date | String | The date of the analysis | | terra_2_gisaid_version | String | The version of the PHB repository that this workflow is hosted in | + +
diff --git a/docs/workflows/public_data_sharing/terra_2_ncbi.md b/docs/workflows/public_data_sharing/terra_2_ncbi.md index ac09caae8..0fa48e50e 100644 --- a/docs/workflows/public_data_sharing/terra_2_ncbi.md +++ b/docs/workflows/public_data_sharing/terra_2_ncbi.md @@ -103,6 +103,8 @@ This workflow runs on set-level data tables. !!! info "Production Submissions" Please note that an optional Boolean variable, `submit_to_production`, is **required** for a production submission. +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | | --- | --- | --- | --- | --- | --- | | Terra_2_NCBI | **bioproject** | String | BioProject accession that the samples will be submitted to | | Required | @@ -143,6 +145,8 @@ This workflow runs on set-level data tables. | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ??? task "Workflow Tasks" ##### Workflow Tasks {#workflow-tasks} @@ -178,6 +182,8 @@ If the workflow ends unsuccessfully, no outputs will be shown on Terra and the ` The output files contain information mostly for debugging purposes. Additionally, if your submission is successful, the point of contact for the submission should also receive an email from NCBI notifying them of their submission success. +
+ | Variable | Description | Type | | --- | --- | --- | | biosample_failures | Text file listing samples that failed BioSample submission | File | @@ -193,6 +199,8 @@ The output files contain information mostly for debugging purposes. Additionally | terra_2_ncbi_analysis_date | Date that the workflow was run | String | | terra_2_ncbi_version | Version of the PHB repository where the workflow is hosted | String | +
+ ???+ toggle "An example excluded_samples.tsv file" ##### An example excluded_samples.tsv file {#example-excluded-samples} diff --git a/docs/workflows/standalone/gambit_query.md b/docs/workflows/standalone/gambit_query.md index 5d6dd3c44..095b6e463 100644 --- a/docs/workflows/standalone/gambit_query.md +++ b/docs/workflows/standalone/gambit_query.md @@ -12,6 +12,8 @@ The GAMBIT_Query_PHB workflow performs taxon assignment of a genome assembly usi ### Inputs +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | gambit_query | **assembly_fasta** | File | Assembly file in FASTA format | | Required | @@ -23,6 +25,8 @@ The GAMBIT_Query_PHB workflow performs taxon assignment of a genome assembly usi | gambit | **gambit_db_genomes** | File | Database of metadata for assembled query genomes; requires complementary signatures file. If not provided, uses default database "/gambit-db" | "gs://gambit-databases-rp/2.0.0/gambit-metadata-2.0.0-20240628.gdb" | Optional | | gambit | **gambit_db_signatures** | File | Signatures file; requires complementary genomes file. If not specified, the file from the docker container will be used. | "gs://gambit-databases-rp/2.0.0/gambit-signatures-2.0.0-20240628.gs" | Optional | +
+ ### Workflow Tasks [`GAMBIT`](https://github.com/jlumpe/gambit) determines the taxon of the genome assembly using a k-mer based approach to match the assembly sequence to the closest complete genome in a database, thereby predicting its identity. Sometimes, GAMBIT can confidently designate the organism to the species level. Other times, it is more conservative and assigns it to a higher taxonomic rank. @@ -40,6 +44,8 @@ For additional details regarding the GAMBIT tool and a list of available GAMBIT ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | gambit_closest_genomes | File | CSV file listing genomes in the GAMBIT database that are most similar to the query assembly | @@ -50,6 +56,8 @@ For additional details regarding the GAMBIT tool and a list of available GAMBIT | gambit_query_wf_analysis_date | String | Date of analysis | | gambit_query_wf_version | String | PHB repository version | | gambit_report | File | GAMBIT report in a machine-readable format | -| gambit_version | String | Version of gambit software used +| gambit_version | String | Version of gambit software used | + +
> GAMBIT (Genomic Approximation Method for Bacterial Identification and Tracking): A methodology to rapidly leverage whole genome sequencing of bacterial isolates for clinical identification. Lumpe et al. PLOS ONE, 2022. DOI: [10.1371/journal.pone.0277575](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0277575) diff --git a/docs/workflows/standalone/kraken2.md b/docs/workflows/standalone/kraken2.md index ffef97db3..df36e56a1 100644 --- a/docs/workflows/standalone/kraken2.md +++ b/docs/workflows/standalone/kraken2.md @@ -30,6 +30,8 @@ Besides the data input types, there are minimal differences between these two wo #### Suggested databases +
+ | Database name | Database Description | Suggested Applications | GCP URI (for usage in Terra) | Source | Database Size (GB) | Date of Last Update | | --- | --- | --- | --- | --- | --- | --- | | **Kalamari v5.1** | Kalamari is a database of complete public assemblies, that has been fine-tuned for enteric pathogens and is backed by trusted institutions. [Full list available here ( in chromosomes.tsv and plasmids.tsv)](https://github.com/lskatz/Kalamari/tree/master/src) | Single-isolate enteric bacterial pathogen analysis (Salmonella, Escherichia, Shigella, Listeria, Campylobacter, Vibrio, Yersinia) | **`gs://theiagen-large-public-files-rp/terra/databases/kraken2/kraken2.kalamari_5.1.tar.gz`** | ‣ | 1.5 | 18/5/2022 | @@ -40,8 +42,12 @@ Besides the data input types, there are minimal differences between these two wo | **EuPathDB48** | Eukaryotic pathogen genomes with contaminants removed. [Full list available here](https://genome-idx.s3.amazonaws.com/kraken/k2_eupathdb48_20201113/EuPathDB48_Contents.txt) | Eukaryotic organisms (Candida spp., Aspergillus spp., etc) | **`gs://theiagen-public-files-rp/terra/theiaprok-files/k2_eupathdb48_20201113.tar.gz`** | https://benlangmead.github.io/aws-indexes/k2 | 30.3 | 13/11/2020 | | **EuPathDB48** | Eukaryotic pathogen genomes with contaminants removed. [Full list available here](https://genome-idx.s3.amazonaws.com/kraken/k2_eupathdb48_20201113/EuPathDB48_Contents.txt) | Eukaryotic organisms (Candida spp., Aspergillus spp., etc) | **`gs://theiagen-large-public-files-rp/terra/databases/kraken/k2_eupathdb48_20230407.tar.gz`** | https://benlangmead.github.io/aws-indexes/k2 | 11 | 7/4/2023 | +
+ ### Inputs +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | **Workflow** | |---|---|---|---|---|---|---| | *workflow_name | **kraken2_db** | File | A Kraken2 database in .tar.gz format | | Required | ONT, PE, SE | @@ -67,8 +73,12 @@ Besides the data input types, there are minimal differences between these two wo | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | ONT, PE, SE | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | ONT, PE, SE | +
+ ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | kraken2_classified_read1 | File | FASTQ file of classified forward/R1 reads | @@ -85,6 +95,8 @@ Besides the data input types, there are minimal differences between these two wo | krona_html | File | HTML report of krona with visualisation of taxonomic classification of reads (if PE or SE) | | krona_version | String | krona version (if PE or SE) | +
+ #### Interpretation of results The most important outputs of the Kraken2 workflows are the `kraken2_report` files. These will include a breakdown of the number of sequences assigned to a particular taxon, and the percentage of reads assigned. [A complete description of the report format can be found here](https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown#standard-kraken-output-format). diff --git a/docs/workflows/standalone/ncbi_amrfinderplus.md b/docs/workflows/standalone/ncbi_amrfinderplus.md index 237e79039..38def0e99 100644 --- a/docs/workflows/standalone/ncbi_amrfinderplus.md +++ b/docs/workflows/standalone/ncbi_amrfinderplus.md @@ -19,6 +19,8 @@ You can check if a gene or point mutation is in the AMRFinderPlus database [here ### Inputs +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | amrfinderplus_wf | **assembly** | File | Genome assembly file in FASTA format. Can be generated by TheiaProk workflow or other bioinformatics workflows. | | Required | @@ -35,8 +37,12 @@ You can check if a gene or point mutation is in the AMRFinderPlus database [here | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | amrfinderplus_all_report | File | Output TSV file from AMRFinderPlus (described [here](https://github.com/ncbi/amr/wiki/Running-AMRFinderPlus#fields)) | @@ -54,6 +60,8 @@ You can check if a gene or point mutation is in the AMRFinderPlus database [here | amrfinderplus_wf_analysis_date | String | Date of analysis | | amrfinderplus_wf_version | String | Version of PHB used for the analysis | +
+ ## References >Feldgarden M, Brover V, Gonzalez-Escalona N, Frye JG, Haendiges J, Haft DH, Hoffmann M, Pettengill JB, Prasad AB, Tillman GE, Tyson GH, Klimke W. AMRFinderPlus and the Reference Gene Catalog facilitate examination of the genomic links among antimicrobial resistance, stress response, and virulence. Sci Rep. 2021 Jun 16;11(1):12728. doi: 10.1038/s41598-021-91456-0. PMID: 34135355; PMCID: PMC8208984. diff --git a/docs/workflows/standalone/ncbi_scrub.md b/docs/workflows/standalone/ncbi_scrub.md index e5e4bd6eb..0ae60c49b 100644 --- a/docs/workflows/standalone/ncbi_scrub.md +++ b/docs/workflows/standalone/ncbi_scrub.md @@ -16,6 +16,8 @@ There are three Kraken2 workflows: ### Inputs +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | **Workflow** | |---|---|---|---|---|---|---| | dehost_pe or dehost_se | **read1** | File | | | Required | PE, SE | @@ -35,6 +37,8 @@ There are three Kraken2 workflows: | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | PE, SE | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | PE, SE | +
+ ### Workflow Tasks This workflow is composed of two tasks, one to dehost the input reads and another to screen the clean reads with kraken2 and the viral+human database. @@ -69,6 +73,8 @@ This workflow is composed of two tasks, one to dehost the input reads and anothe ### Outputs +
+ | **Variable** | **Type** | **Description** | **Workflow** | |---|---|---|---| | kraken_human_dehosted | Float | Percent of human read data detected using the Kraken2 software after host removal | PE, SE | @@ -82,3 +88,4 @@ This workflow is composed of two tasks, one to dehost the input reads and anothe | read1_dehosted | File | Dehosted forward reads | PE, SE | | read2_dehosted | File | Dehosted reverse reads | PE | +
\ No newline at end of file diff --git a/docs/workflows/standalone/rasusa.md b/docs/workflows/standalone/rasusa.md index 055b235f5..38052ff41 100644 --- a/docs/workflows/standalone/rasusa.md +++ b/docs/workflows/standalone/rasusa.md @@ -27,6 +27,8 @@ RASUSA functions to randomly downsample the number of raw reads to a user-define ### Inputs +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Attribute** | **Terra Status** | |---|---|---|---|---|---| | rasusa_workflow | **coverage** | Float | Use to specify the desired coverage of reads after downsampling; actual coverage of subsampled reads will not be exact and may be slightly higher; always check the estimated clean coverage after performing downstream workflows to verify coverage values, when necessary | | Required | @@ -45,8 +47,12 @@ RASUSA functions to randomly downsample the number of raw reads to a user-define | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | rasusa_version | String | Version of RASUSA used for the analysis | @@ -55,6 +61,8 @@ RASUSA functions to randomly downsample the number of raw reads to a user-define | read1_subsampled | File | New read1 FASTQ files downsampled to desired coverage | | read2_subsampled | File | New read2 FASTQ files downsampled to desired coverage | +
+ !!! tip "Don't Forget!" Remember to use the subsampled reads in downstream analyses with `this.read1_subsampled` and `this.read2_subsampled` inputs. diff --git a/docs/workflows/standalone/rename_fastq.md b/docs/workflows/standalone/rename_fastq.md index 19ec4b4a6..d8d7196ba 100644 --- a/docs/workflows/standalone/rename_fastq.md +++ b/docs/workflows/standalone/rename_fastq.md @@ -12,6 +12,8 @@ This sample-level workflow receives a read file or a pair of read files (FASTQ), ### Inputs +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | rename_fastq_files | **new_filename** | String | New name for the FASTQ file(s) | | Required | @@ -24,6 +26,8 @@ This sample-level workflow receives a read file or a pair of read files (FASTQ), | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Outputs If a reverse read (`read2`) is provided, the files get renamed to the provided `new_filename` input with the notation `_R1.fastq.gz` and `_R2.fastq.gz`. If only `read1` is provided, the file is renamed to `.fastq.gz`. diff --git a/docs/workflows/standalone/tbprofiler_tngs.md b/docs/workflows/standalone/tbprofiler_tngs.md index 3e505cabb..d0061fdd7 100644 --- a/docs/workflows/standalone/tbprofiler_tngs.md +++ b/docs/workflows/standalone/tbprofiler_tngs.md @@ -12,6 +12,8 @@ This workflow is still in experimental research stages. Documentation is minimal ### Inputs +
+ | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | |---|---|---|---|---|---| | tbprofiler_tngs | **read1** | File | Illumina forward read file in FASTQ file format (compression optional) | | Required | @@ -62,8 +64,12 @@ This workflow is still in experimental research stages. Documentation is minimal | version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | | version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | +
+ ### Terra Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | tbp_parser_average_genome_depth | Float | The mean depth of coverage across all target regions included in the analysis | @@ -95,3 +101,5 @@ This workflow is still in experimental research stages. Documentation is minimal | trimmomatic_read2_trimmed | File | The read2 file post trimming | | trimmomatic_stats | File | The read trimming statistics | | trimmomatic_version | String | The version of trimmomatic used in this analysis | + +
diff --git a/docs/workflows/standalone/theiavalidate.md b/docs/workflows/standalone/theiavalidate.md index 2dfd5b2b6..a9c051f9c 100644 --- a/docs/workflows/standalone/theiavalidate.md +++ b/docs/workflows/standalone/theiavalidate.md @@ -39,6 +39,8 @@ If a column consists of only GCP URIs (Google Cloud file paths), the files will ### Inputs +
+ Please note that all string inputs **must** be enclosed in quotation marks; for example, "column1,column2" or "workspace1" | **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | @@ -62,6 +64,8 @@ Please note that all string inputs **must** be enclosed in quotation marks; for | export_two_tsvs | **cpu** | Int | Number of CPUs to allocate to the task | 1 | Optional | | export_two_tsvs | **disk_size** | Int | Amount of storage (in GB) to allocate to the task | 10 | Optional | +
+ The optional `validation_criteria_tsv` file takes the following format (tab-delimited; _a header line is required_): ```text linenums="1" @@ -95,6 +99,8 @@ Please note that the name in the **second column** will be displayed and used in ### Outputs +
+ | **Variable** | **Type** | **Description** | |---|---|---| | theiavalidate_criteria_differences | File | A TSV file that lists only the differences that fail to meet the validation criteria | @@ -108,6 +114,8 @@ Please note that the name in the **second column** will be displayed and used in | theiavalidate_version | String | The version of the TheiaValidate Python Docker | | theiavalidate_wf_version | String | The version of the PHB repository | +
+ ### Example Data and Outputs To help demonstrate how TheiaValidate works, please observe the following example and outputs: diff --git a/docs/workflows_overview/workflows_alphabetically.md b/docs/workflows_overview/workflows_alphabetically.md index 128e9bee7..5195dc0f5 100644 --- a/docs/workflows_overview/workflows_alphabetically.md +++ b/docs/workflows_overview/workflows_alphabetically.md @@ -6,6 +6,8 @@ title: Alphabetical Workflows --- +
+ | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Assembly_Fetch**](../workflows/data_import/assembly_fetch.md) | Download assemblies from NCBI, after optionally identifying the closest RefSeq reference genome to your own draft assembly | Any taxa | Sample-level | Yes | v1.3.0 | [Assembly_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Assembly_Fetch_PHB:main?tab=info) | @@ -49,6 +51,8 @@ title: Alphabetical Workflows | [**Usher_PHB**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v1.2.1 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | | [**Zip_Column_Content**](../workflows/data_export/zip_column_content.md)| Zip contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Zip_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Zip_Column_Content_PHB:main?tab=info) | +
+ *[Sample-level]: This workflow is run once for each sample *[Set-level]: This workflow is run once on a group of samples diff --git a/docs/workflows_overview/workflows_kingdom.md b/docs/workflows_overview/workflows_kingdom.md index ab8575a64..c9c001ec6 100644 --- a/docs/workflows_overview/workflows_kingdom.md +++ b/docs/workflows_overview/workflows_kingdom.md @@ -8,6 +8,7 @@ title: Workflows by Kingdom ### Any Taxa +
| **Name** | **Description** | **Taxa** | **Workflow Level** | **Command-line Compatible**[^1] | **Last known changes** | **Dockstore** | |---|---|---|---|---|---|---| @@ -25,8 +26,12 @@ title: Workflows by Kingdom | [**Transfer_Column_Content**](../workflows/data_export/transfer_column_content.md)| Transfer contents of a specified Terra data table column for many samples ("entities") to a GCP storage bucket location | Any taxa | Set-level | Yes | v1.3.0 | [Transfer_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Transfer_Column_Content_PHB:main?tab=info) | | [**Zip_Column_Content**](../workflows/data_export/zip_column_content.md)| Zip contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Zip_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Zip_Column_Content_PHB:main?tab=info) | +
+ ### Bacteria +
+ | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Core_Gene_SNP**](../workflows/phylogenetic_construction/core_gene_snp.md) | Pangenome analysis | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Core_Gene_SNP_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Core_Gene_SNP_PHB:main?tab=info) | @@ -44,8 +49,12 @@ title: Workflows by Kingdom | [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | | [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | +
+ ### Mycotics +
+ | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Cauris_CladeTyper**](../workflows/standalone/cauris_cladetyper.md)| C. auris clade assignment | Mycotics | Sample-level | Yes | v1.0.0 | [Cauris_CladeTyper_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Cauris_CladeTyper_PHB:main?tab=info) | @@ -58,8 +67,11 @@ title: Workflows by Kingdom | [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | | [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.0.1 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | +
+ ### Viral +
| **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| @@ -79,6 +91,8 @@ title: Workflows by Kingdom | [**Usher_PHB**](../workflows/phylogenetic_placement/usher.md)| Use UShER to rapidly and accurately place your samples on any existing phylogenetic tree | Monkeypox virus, SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.1.0 | [Usher_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Usher_PHB:main?tab=info) | | [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v1.2.1 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | +
+ *[Sample-level]: This workflow is run once for each sample *[Set-level]: This workflow is run once on a group of samples diff --git a/docs/workflows_overview/workflows_type.md b/docs/workflows_overview/workflows_type.md index 5e7e024b6..375c73e71 100644 --- a/docs/workflows_overview/workflows_type.md +++ b/docs/workflows_overview/workflows_type.md @@ -8,6 +8,8 @@ title: Workflows by Type ### Data Import +
+ | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Assembly_Fetch**](../workflows/data_import/assembly_fetch.md) | Download assemblies from NCBI, after optionally identifying the closest RefSeq reference genome to your own draft assembly | Any taxa | Sample-level | Yes | v1.3.0 | [Assembly_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Assembly_Fetch_PHB:main?tab=info) | @@ -15,8 +17,12 @@ title: Workflows by Type | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | | [**SRA_Fetch**](../workflows/data_import/sra_fetch.md)| Import publicly available reads from SRA using SRR#, ERR# or DRR# | Any taxa | Sample-level | Yes | v2.2.0 | [SRA_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/SRA_Fetch_PHB:main?tab=info) | +
+ ### Genomic Characterization +
+ | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.2.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | @@ -28,8 +34,12 @@ title: Workflows by Type | [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | | [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v1.2.1 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | +
+ ### Phylogenetic Construction +
+ | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.1.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | @@ -44,31 +54,47 @@ title: Workflows by Type | [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | | [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.2.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | +
+ ### Phylogenetic Placement +
+ | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Samples_to_Ref_Tree**](../workflows/phylogenetic_placement/samples_to_ref_tree.md)| Use Nextclade to rapidly and accurately place your samples on any existing phylogenetic tree | Monkeypox virus, SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.1.0 | [Samples_to_Ref_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Samples_to_Ref_Tree_PHB:main?tab=info) | | [**Usher_PHB**](../workflows/phylogenetic_placement/usher.md)| Use UShER to rapidly and accurately place your samples on any existing phylogenetic tree | Monkeypox virus, SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.1.0 | [Usher_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Usher_PHB:main?tab=info) | +
+ ### Public Data Sharing +
+ | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.2.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | | [**Terra_2_GISAID**](../workflows/public_data_sharing/terra_2_gisaid.md)| Upload of assembly data to GISAID | SARS-CoV-2, Viral | Set-level | Yes | v1.2.1 | [Terra_2_GISAID_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_GISAID_PHB:main?tab=info) | | [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | +
+ ### Exporting Data from Terra +
+ | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Concatenate_Column_Content**](../workflows/data_export/concatenate_column_content.md) | Concatenate contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Concatenate_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Column_Content_PHB:main?tab=info) | | [**Transfer_Column_Content**](../workflows/data_export/transfer_column_content.md)| Transfer contents of a specified Terra data table column for many samples ("entities") to a GCP storage bucket location | Any taxa | Set-level | Yes | v1.3.0 | [Transfer_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Transfer_Column_Content_PHB:main?tab=info) | | [**Zip_Column_Content**](../workflows/data_export/zip_column_content.md)| Zip contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Zip_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Zip_Column_Content_PHB:main?tab=info) | +
+ ### Standalone +
+ | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Cauris_CladeTyper**](../workflows/standalone/cauris_cladetyper.md)| C. auris clade assignment | Mycotics | Sample-level | Yes | v1.0.0 | [Cauris_CladeTyper_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Cauris_CladeTyper_PHB:main?tab=info) | @@ -81,6 +107,8 @@ title: Workflows by Type | [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.0.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | | [**TheiaValidate**](../workflows/standalone/theiavalidate.md)| This workflow performs basic comparisons between user-designated columns in two separate tables. | Any taxa | | No | v2.0.0 | [TheiaValidate_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaValidate_PHB:main?tab=info) | +
+ *[Sample-level]: This workflow is run once for each sample *[Set-level]: This workflow is run once on a group of samples diff --git a/mkdocs.yml b/mkdocs.yml index fbe6df019..6c35901bc 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -23,7 +23,7 @@ nav: - Freyja Workflow Series: workflows/genomic_characterization/freyja.md - Pangolin_Update: workflows/genomic_characterization/pangolin_update.md - TheiaCoV Workflow Series: workflows/genomic_characterization/theiacov.md - - TheiaEuk: workflows/genomic_characterization/theiaeuk.md + - TheiaEuk Workflow Series: workflows/genomic_characterization/theiaeuk.md - TheiaMeta: workflows/genomic_characterization/theiameta.md - TheiaMeta_Panel: workflows/genomic_characterization/theiameta_panel.md - TheiaProk Workflow Series: workflows/genomic_characterization/theiaprok.md @@ -101,7 +101,7 @@ nav: - NCBI-AMRFinderPlus: workflows/standalone/ncbi_amrfinderplus.md - Snippy_Variants: workflows/phylogenetic_construction/snippy_variants.md - Terra_2_NCBI: workflows/public_data_sharing/terra_2_ncbi.md - - TheiaEuk: workflows/genomic_characterization/theiaeuk.md + - TheiaEuk Workflow Series: workflows/genomic_characterization/theiaeuk.md - Viral: - Augur: workflows/phylogenetic_construction/augur.md - CZGenEpi_Prep: workflows/phylogenetic_construction/czgenepi_prep.md @@ -151,7 +151,7 @@ nav: - Terra_2_GISAID: workflows/public_data_sharing/terra_2_gisaid.md - Terra_2_NCBI: workflows/public_data_sharing/terra_2_ncbi.md - TheiaCoV Workflow Series: workflows/genomic_characterization/theiacov.md - - TheiaEuk: workflows/genomic_characterization/theiaeuk.md + - TheiaEuk Workflow Series: workflows/genomic_characterization/theiaeuk.md - TheiaMeta: workflows/genomic_characterization/theiameta.md - TheiaMeta_Panel: workflows/genomic_characterization/theiameta_panel.md - TheiaProk Workflow Series: workflows/genomic_characterization/theiaprok.md @@ -233,11 +233,12 @@ plugins: # - section-index extra_javascript: - - https://unpkg.com/tablesort@5.3.0/dist/tablesort.min.js - - javascripts/tablesort.js + - https://unpkg.com/tablesort@5.3.0/dist/tablesort.min.js + - javascripts/tablesort.js + - javascripts/table-search.js extra_css: - - stylesheets/extra.css + - stylesheets/extra.css extra: social: @@ -254,4 +255,4 @@ extra: homepage: https://www.theiagen.com copyright: | - © 2022-2024 Theiagen Genomics \ No newline at end of file + © 2022-2024 Theiagen Genomics diff --git a/tasks/assembly/task_artic_consensus.wdl b/tasks/assembly/task_artic_consensus.wdl index 6e38334a1..8e2d174db 100644 --- a/tasks/assembly/task_artic_consensus.wdl +++ b/tasks/assembly/task_artic_consensus.wdl @@ -12,7 +12,7 @@ task consensus { Int memory = 16 Int disk_size = 100 String medaka_model = "r941_min_high_g360" - String docker = "us-docker.pkg.dev/general-theiagen/staphb/artic-ncov2019-epi2me" + String docker = "us-docker.pkg.dev/general-theiagen/staphb/artic:1.2.4-1.12.0" } String primer_name = basename(primer_bed) command <<< @@ -61,7 +61,13 @@ task consensus { # version control echo "Medaka via $(artic -v)" | tee VERSION echo "~{primer_name}" | tee PRIMER_NAME - artic minion --medaka --medaka-model ~{medaka_model} --normalise ~{normalise} --threads ~{cpu} --scheme-directory ./primer-schemes --read-file ~{read1} ${scheme_name} ~{samplename} + artic minion \ + --medaka \ + --medaka-model ~{medaka_model} \ + --normalise ~{normalise} \ + --threads ~{cpu} \ + --scheme-directory ./primer-schemes \ + --read-file ~{read1} ${scheme_name} ~{samplename} gunzip -f ~{samplename}.pass.vcf.gz # clean up fasta header diff --git a/tasks/quality_control/comparisons/task_screen.wdl b/tasks/quality_control/comparisons/task_screen.wdl index 8cc6f58e0..3934d0b7a 100644 --- a/tasks/quality_control/comparisons/task_screen.wdl +++ b/tasks/quality_control/comparisons/task_screen.wdl @@ -20,6 +20,9 @@ task check_reads { Int cpu = 1 } command <<< + # just in case anything fails, throw an error + set -euo pipefail + flag="PASS" # initalize estimated genome length @@ -34,13 +37,13 @@ task check_reads { fi # check one: number of reads - read1_num=`eval "$cat_reads ~{read1}" | awk '{s++}END{print s/4}'` - read2_num=`eval "$cat_reads ~{read2}" | awk '{s++}END{print s/4}'` - # awk '{s++}END{print s/4' counts the number of lines and divides them by 4 - # key assumption: in fastq there will be four lines per read - # sometimes fastqs do not have 4 lines per read, so this might fail one day + read1_num=$($cat_reads ~{read1} | fastq-scan | grep 'read_total' | sed 's/[^0-9]*\([0-9]\+\).*/\1/') + read2_num=$($cat_reads ~{read2} | fastq-scan | grep 'read_total' | sed 's/[^0-9]*\([0-9]\+\).*/\1/') + echo "DEBUG: Number of reads in R1: ${read1_num}" + echo "DEBUG: Number of reads in R2: ${read2_num}" reads_total=$(expr $read1_num + $read2_num) + echo "DEBUG: Number of reads total in R1 and R2: ${reads_total}" if [ "${reads_total}" -le "~{min_reads}" ]; then flag="FAIL; the total number of reads is below the minimum of ~{min_reads}" @@ -51,13 +54,11 @@ task check_reads { # checks two and three: number of basepairs and proportion of sequence if [ "${flag}" == "PASS" ]; then # count number of basepairs - # this only works if the fastq has 4 lines per read, so this might fail one day - read1_bp=`eval "${cat_reads} ~{read1}" | paste - - - - | cut -f2 | tr -d '\n' | wc -c` - read2_bp=`eval "${cat_reads} ~{read2}" | paste - - - - | cut -f2 | tr -d '\n' | wc -c` - # paste - - - - (print 4 consecutive lines in one row, tab delimited) - # cut -f2 print only the second column (the second line of the fastq 4-line) - # tr -d '\n' removes line endings - # wc -c counts characters + # using fastq-scan to count the number of basepairs in each fastq + read1_bp=$(eval "${cat_reads} ~{read1}" | fastq-scan | grep 'total_bp' | sed 's/[^0-9]*\([0-9]\+\).*/\1/') + read2_bp=$(eval "${cat_reads} ~{read2}" | fastq-scan | grep 'total_bp' | sed 's/[^0-9]*\([0-9]\+\).*/\1/') + echo "DEBUG: Number of basepairs in R1: $read1_bp" + echo "DEBUG: Number of basepairs in R2: $read2_bp" # set proportion variables for easy comparison # removing the , 2) to make these integers instead of floats @@ -147,7 +148,8 @@ task check_reads { flag="FAIL; the estimated coverage (${estimated_coverage}) is less than the minimum of ~{min_coverage}x" else flag="PASS" - echo $estimated_genome_length | tee EST_GENOME_LENGTH + echo ${estimated_genome_length} | tee EST_GENOME_LENGTH + echo "DEBUG: estimated_genome_length: ${estimated_genome_length}" fi fi fi @@ -190,6 +192,9 @@ task check_reads_se { Int cpu = 1 } command <<< + # just in case anything fails, throw an error + set -euo pipefail + flag="PASS" # initalize estimated genome length @@ -203,11 +208,9 @@ task check_reads_se { cat_reads="cat" fi - # check one: number of reads - read1_num=`eval "$cat_reads ~{read1}" | awk '{s++}END{print s/4}'` - # awk '{s++}END{print s/4' counts the number of lines and divides them by 4 - # key assumption: in fastq there will be four lines per read - # sometimes fastqs do not have 4 lines per read, so this might fail one day + # check one: number of reads via fastq-scan + read1_num=$($cat_reads ~{read1} | fastq-scan | grep 'read_total' | sed 's/[^0-9]*\([0-9]\+\).*/\1/') + echo "DEBUG: Number of reads in R1: ${read1_num}" if [ "${read1_num}" -le "~{min_reads}" ] ; then flag="FAIL; the number of reads (${read1_num}) is below the minimum of ~{min_reads}" @@ -218,12 +221,9 @@ task check_reads_se { # checks two and three: number of basepairs and proportion of sequence if [ "${flag}" == "PASS" ]; then # count number of basepairs - # this only works if the fastq has 4 lines per read, so this might fail one day - read1_bp=`eval "${cat_reads} ~{read1}" | paste - - - - | cut -f2 | tr -d '\n' | wc -c` - # paste - - - - (print 4 consecutive lines in one row, tab delimited) - # cut -f2 print only the second column (the second line of the fastq 4-line) - # tr -d '\n' removes line endings - # wc -c counts characters + # using fastq-scan to count the number of basepairs in each fastq + read1_bp=$(eval "${cat_reads} ~{read1}" | fastq-scan | grep 'total_bp' | sed 's/[^0-9]*\([0-9]\+\).*/\1/') + echo "DEBUG: Number of basepairs in R1: $read1_bp" if [ "$flag" == "PASS" ] ; then if [ "${read1_bp}" -le "~{min_basepairs}" ] ; then @@ -309,7 +309,8 @@ task check_reads_se { fi echo $flag | tee FLAG - echo $estimated_genome_length | tee EST_GENOME_LENGTH + echo ${estimated_genome_length} | tee EST_GENOME_LENGTH + echo "DEBUG: estimated_genome_length: ${estimated_genome_length}" >>> output { String read_screen = read_string("FLAG") diff --git a/tasks/utilities/submission/task_mercury.wdl b/tasks/utilities/submission/task_mercury.wdl index 51ce839e1..d445e92aa 100644 --- a/tasks/utilities/submission/task_mercury.wdl +++ b/tasks/utilities/submission/task_mercury.wdl @@ -29,6 +29,9 @@ task mercury { volatile: true } command <<< + #set -euo pipefail to avoid silent failure + set -euo pipefail + python3 /mercury/mercury/mercury.py -v | tee VERSION python3 /mercury/mercury/mercury.py \ diff --git a/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml b/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml index 48ffe30c9..599fec45e 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml @@ -17,7 +17,7 @@ - wf_theiacov_clearlabs_miniwdl files: - path: miniwdl_run/call-consensus/command - md5sum: a8e200703dedf732b45dd92b0af15f1c + md5sum: b19d5ce485c612036064c07f0a1d6a18 - path: miniwdl_run/call-consensus/inputs.json contains: ["read1", "samplename", "fastq"] - path: miniwdl_run/call-consensus/outputs.json diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml index b1bb6da13..4c7542334 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml @@ -138,7 +138,7 @@ md5sum: 7c5aba41f53293b712fd86d08ed5b36e # clean read screen - path: miniwdl_run/call-clean_check_reads/command - md5sum: e18830c68993b2837d1da29ce55d2de8 + md5sum: aeeb107f328ccd7d2d805dc5990b24ac - path: miniwdl_run/call-clean_check_reads/inputs.json contains: ["read1", "read2", "organism"] - path: miniwdl_run/call-clean_check_reads/outputs.json diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml index 9af5b61c9..0742c19a9 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml @@ -94,7 +94,7 @@ md5sum: 9a089b8920e55c9cc7bc8cd7d18f9a8e # clean read screen - path: miniwdl_run/call-clean_check_reads/command - md5sum: aec6c57452ddff84c325601a780605d2 + md5sum: 80a361915a627e86743baacfc383b2b5 - path: miniwdl_run/call-clean_check_reads/inputs.json contains: ["read1", "organism"] - path: miniwdl_run/call-clean_check_reads/outputs.json diff --git a/tests/workflows/theiacov/test_wf_theiacov_ont.yml b/tests/workflows/theiacov/test_wf_theiacov_ont.yml index 1772e16b4..6077323b9 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_ont.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_ont.yml @@ -17,7 +17,7 @@ - wf_theiacov_ont_miniwdl files: - path: miniwdl_run/call-clean_check_reads/command - md5sum: 2517c5cf87db0af66663ffb5e69f6b4f + md5sum: 8078e96573428b712ddcb06517333cf5 - path: miniwdl_run/call-clean_check_reads/inputs.json - path: miniwdl_run/call-clean_check_reads/outputs.json - path: miniwdl_run/call-clean_check_reads/stderr.txt @@ -31,7 +31,7 @@ - path: miniwdl_run/call-clean_check_reads/work/_miniwdl_inputs/0/artic_ncov2019_ont.fastq md5sum: d41d8cd98f00b204e9800998ecf8427e - path: miniwdl_run/call-consensus/command - md5sum: 056563d18294928fef5238bac7213791 + md5sum: 362dccda19ecadf377d5cd5872946ddd - path: miniwdl_run/call-consensus/inputs.json contains: ["read1_clean", "samplename", "fastq"] - path: miniwdl_run/call-consensus/outputs.json @@ -45,7 +45,7 @@ - path: miniwdl_run/call-consensus/work/REFERENCE_GENOME md5sum: 0e6efd549c8773f9a2f7a3e82619ee61 - path: miniwdl_run/call-consensus/work/VERSION - md5sum: f3528ff85409c70100063c55ad75612b + md5sum: 394e07bc6788e025ac35254411db107c - path: miniwdl_run/call-consensus/work/_miniwdl_inputs/0/artic-v3.primers.bed md5sum: d41d8cd98f00b204e9800998ecf8427e - path: miniwdl_run/call-consensus/work/_miniwdl_inputs/0/artic_ncov2019_ont.fastq @@ -64,8 +64,6 @@ - path: miniwdl_run/call-consensus/work/ont.fastq.gz - path: miniwdl_run/call-consensus/work/ont.medaka.consensus.fasta md5sum: d36b7c665aa4127f0a6e8dbc562eea3e - - path: miniwdl_run/call-consensus/work/ont.merged.gvcf.vcf.gz - - path: miniwdl_run/call-consensus/work/ont.merged.gvcf.vcf.gz.tbi - path: miniwdl_run/call-consensus/work/ont.merged.vcf.gz - path: miniwdl_run/call-consensus/work/ont.merged.vcf.gz.tbi - path: miniwdl_run/call-consensus/work/ont.minion.log.txt @@ -73,20 +71,15 @@ - path: miniwdl_run/call-consensus/work/ont.pass.vcf.gz.tbi - path: miniwdl_run/call-consensus/work/ont.preconsensus.fasta md5sum: b68f4ee4abc9fc16215204d0ff754bb8 - - path: miniwdl_run/call-consensus/work/ont.preconsensus.fasta.fai - md5sum: 4ca7d9fd06b9cdf379c2cf02b9fd6d0e - path: miniwdl_run/call-consensus/work/ont.primers.vcf - path: miniwdl_run/call-consensus/work/ont.primersitereport.txt - md5sum: cffee67632a262eeb947cea9cee0b4c1 + md5sum: dab514423a8fb7b59ab7870ad8c3b4cf - path: miniwdl_run/call-consensus/work/ont.primertrimmed.rg.sorted.bam - path: miniwdl_run/call-consensus/work/ont.primertrimmed.rg.sorted.bam.bai - path: miniwdl_run/call-consensus/work/ont.sorted.bam - path: miniwdl_run/call-consensus/work/ont.sorted.bam.bai - path: miniwdl_run/call-consensus/work/ont.trimmed.rg.sorted.bam - path: miniwdl_run/call-consensus/work/ont.trimmed.rg.sorted.bam.bai - - path: miniwdl_run/call-consensus/work/ont.vcfcheck.log - - path: miniwdl_run/call-consensus/work/ont.vcfreport.txt - md5sum: 69131186223267b3ae6621cb8ef4eecd - path: miniwdl_run/call-consensus/work/primer-schemes/SARS-CoV-2/Vuser/SARS-CoV-2.reference.fasta md5sum: b9b67235a2d9d0b0d7f531166ffefd41 - path: miniwdl_run/call-consensus/work/primer-schemes/SARS-CoV-2/Vuser/SARS-CoV-2.reference.fasta.fai @@ -219,7 +212,7 @@ md5sum: d41d8cd98f00b204e9800998ecf8427e - path: miniwdl_run/call-pangolin4/work/ont.pangolin_report.csv - path: miniwdl_run/call-raw_check_reads/command - md5sum: 1858d98f3c15904be0c219b867727048 + md5sum: 5df9a70c852960f82c84c7da611cd177 - path: miniwdl_run/call-raw_check_reads/inputs.json - path: miniwdl_run/call-raw_check_reads/outputs.json - path: miniwdl_run/call-raw_check_reads/stderr.txt diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index d2cedb29b..91ae801b7 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -201,7 +201,7 @@ - path: miniwdl_run/call-cg_pipeline_raw/work/test_readMetrics.tsv contains: ["File", "fastq", "coverage"] - path: miniwdl_run/call-clean_check_reads/command - md5sum: 8235bc815fe3b57471f97474bff3e3f7 + md5sum: f28c2b4597398988bc9016505425f6f0 - path: miniwdl_run/call-clean_check_reads/inputs.json contains: ["read1", "fastq", "skip_screen", "true"] - path: miniwdl_run/call-clean_check_reads/outputs.json @@ -364,7 +364,7 @@ - path: miniwdl_run/call-quast/work/transposed_report.txt contains: ["Assembly", "length", "contigs", "test"] - path: miniwdl_run/call-raw_check_reads/command - md5sum: fd8c392c24e4e49859bdd006163db646 + md5sum: 0e417649675499e2be549ce82e02704c - path: miniwdl_run/call-raw_check_reads/inputs.json contains: ["read1", "fastq", "skip_screen", "true"] - path: miniwdl_run/call-raw_check_reads/outputs.json @@ -581,7 +581,7 @@ - path: miniwdl_run/wdl/tasks/quality_control/basic_statistics/task_quast.wdl contains: ["version", "quast", "output"] - path: miniwdl_run/wdl/tasks/quality_control/comparisons/task_screen.wdl - md5sum: 75631c4db89792a939e4d872adc05b86 + md5sum: adb43c5bf0a83b9e2ef7669ed2d1760f - path: miniwdl_run/wdl/tasks/quality_control/read_filtering/task_trimmomatic.wdl contains: ["version", "trimmomatic", "output"] - path: miniwdl_run/wdl/tasks/species_typing/escherichia_shigella/task_ectyper.wdl @@ -633,7 +633,7 @@ - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl md5sum: 6d9dd969e2144ca23f2a0e101e6b6966 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: 8ab95440118d06dd3c07765a19e876e7 + md5sum: 670f990128063eb3c7b3fa49302f08b7 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl contains: ["version", "QC", "output"] - path: miniwdl_run/workflow.log diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index d19f1319e..82f9a9a74 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -194,7 +194,7 @@ - path: miniwdl_run/call-cg_pipeline_raw/work/test_readMetrics.tsv contains: ["File", "fastq", "coverage"] - path: miniwdl_run/call-clean_check_reads/command - md5sum: 1ed4ae1859ade5045c01c92dfe54899c + md5sum: 51f799af8609c440d51e0046ebc030e8 - path: miniwdl_run/call-clean_check_reads/inputs.json contains: ["read1", "fastq", "skip_screen", "true"] - path: miniwdl_run/call-clean_check_reads/outputs.json @@ -203,7 +203,7 @@ md5sum: d41d8cd98f00b204e9800998ecf8427e - path: miniwdl_run/call-clean_check_reads/stderr.txt.offset - path: miniwdl_run/call-clean_check_reads/stdout.txt - md5sum: 9e807ce699271c3f647c7594df2b5b0a + md5sum: 73809db947c875bf81acccaad76a57d5 - path: miniwdl_run/call-clean_check_reads/task.log contains: ["wdl", "theiaprok_illumina_se", "check_reads", "done"] - path: miniwdl_run/call-clean_check_reads/work/EST_GENOME_LENGTH @@ -355,7 +355,7 @@ - path: miniwdl_run/call-quast/work/transposed_report.txt contains: ["Assembly", "length", "contigs", "test"] - path: miniwdl_run/call-raw_check_reads/command - md5sum: 3f6bf93b769ef9b152f005195cf2502e + md5sum: 7594079920cccbcf01d027547f54ccc2 - path: miniwdl_run/call-raw_check_reads/inputs.json contains: ["read1", "fastq", "skip_screen", "true"] - path: miniwdl_run/call-raw_check_reads/outputs.json @@ -364,7 +364,7 @@ md5sum: d41d8cd98f00b204e9800998ecf8427e - path: miniwdl_run/call-raw_check_reads/stderr.txt.offset - path: miniwdl_run/call-raw_check_reads/stdout.txt - md5sum: 9e807ce699271c3f647c7594df2b5b0a + md5sum: 73809db947c875bf81acccaad76a57d5 - path: miniwdl_run/call-raw_check_reads/task.log contains: ["wdl", "theiaprok_illumina_se", "check_reads", "done"] - path: miniwdl_run/call-raw_check_reads/work/EST_GENOME_LENGTH @@ -526,7 +526,7 @@ - path: miniwdl_run/wdl/tasks/gene_typing/drug_resistance/task_resfinder.wdl md5sum: 27528633723303b462d095b642649453 - path: miniwdl_run/wdl/tasks/gene_typing/variant_detection/task_snippy_variants.wdl - md5sum: 284ce680b52e7e1c1753537b344fa161 + md5sum: 3b9e04569d7e856dcc649b7726b306b7 - path: miniwdl_run/wdl/tasks/quality_control/read_filtering/task_bbduk.wdl md5sum: aec6ef024d6dff31723f44290f6b9cf5 - path: miniwdl_run/wdl/tasks/quality_control/advanced_metrics/task_busco.wdl @@ -544,7 +544,7 @@ - path: miniwdl_run/wdl/tasks/quality_control/basic_statistics/task_quast.wdl contains: ["version", "quast", "output"] - path: miniwdl_run/wdl/tasks/quality_control/comparisons/task_screen.wdl - md5sum: 75631c4db89792a939e4d872adc05b86 + md5sum: adb43c5bf0a83b9e2ef7669ed2d1760f - path: miniwdl_run/wdl/tasks/quality_control/read_filtering/task_trimmomatic.wdl contains: ["version", "trimmomatic", "output"] - path: miniwdl_run/wdl/tasks/species_typing/escherichia_shigella/task_ectyper.wdl @@ -596,7 +596,7 @@ - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl md5sum: 5aa25e4fad466f92c96a7c138aca0d20 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: 8ab95440118d06dd3c07765a19e876e7 + md5sum: 670f990128063eb3c7b3fa49302f08b7 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl md5sum: d11bfe33fdd96eab28892be5a01c1c7d - path: miniwdl_run/workflow.log diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index 690086a60..1d6914e79 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -631,7 +631,7 @@ workflow merlin_magic { samplename = samplename, snippy_variants_results = snippy_cauris.snippy_variants_results, reference = cladetyper.clade_spec_ref, - query_gene = select_first([snippy_query_gene, "FKS1,lanosterol.14-alpha.demethylase,uracil.phosphoribosyltransferase"]), + query_gene = select_first([snippy_query_gene, "FKS1,lanosterol.14-alpha.demethylase,uracil.phosphoribosyltransferase,B9J08_005340,B9J08_000401,B9J08_003102,B9J08_003737,B9J08_005343"]), docker = snippy_gene_query_docker_image } } @@ -976,7 +976,7 @@ workflow merlin_magic { String snippy_variants_summary = select_first([snippy_cauris.snippy_variants_summary, snippy_afumigatus.snippy_variants_summary, snippy_crypto.snippy_variants_summary, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) String snippy_variants_num_reads_aligned = select_first([snippy_cauris.snippy_variants_num_reads_aligned, snippy_afumigatus.snippy_variants_num_reads_aligned, snippy_crypto.snippy_variants_num_reads_aligned, "No matching taxon detected"]) String snippy_variants_coverage_tsv = select_first([snippy_cauris.snippy_variants_coverage_tsv, snippy_afumigatus.snippy_variants_coverage_tsv, snippy_crypto.snippy_variants_coverage_tsv, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_num_variants = select_first([snippy_cauris.snippy_variants_num_variants, snippy_afumigatus.snippy_variants_num_variants, snippy_crypto.snippy_variants_num_reads_aligned, "No matching taxon detected"]) + String snippy_variants_num_variants = select_first([snippy_cauris.snippy_variants_num_variants, snippy_afumigatus.snippy_variants_num_variants, snippy_crypto.snippy_variants_num_variants, "No matching taxon detected"]) String snippy_variants_percent_ref_coverage = select_first([snippy_cauris.snippy_variants_percent_ref_coverage, snippy_afumigatus.snippy_variants_percent_ref_coverage, snippy_crypto.snippy_variants_percent_ref_coverage, "No matching taxon detected"]) } } \ No newline at end of file