From c64b57bad4da4cdbf4adb1a7751271df34f69271 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 9 Sep 2024 16:36:16 -0500 Subject: [PATCH 01/40] Added percentage_mapped_reads output to ivar_consensus.wdl and updated stats_n_coverage task --- .../basic_statistics/task_assembly_metrics.wdl | 15 ++++++++++++++- workflows/utilities/wf_ivar_consensus.wdl | 4 ++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl index 9fe5f843b..d302d087b 100644 --- a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl +++ b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl @@ -14,11 +14,11 @@ task stats_n_coverage { samtools --version | head -n1 | tee VERSION samtools stats ~{bamfile} > ~{samplename}.stats.txt - samtools coverage ~{bamfile} -m -o ~{samplename}.cov.hist samtools coverage ~{bamfile} -o ~{samplename}.cov.txt samtools flagstat ~{bamfile} > ~{samplename}.flagstat.txt + # Extracting coverage, depth, meanbaseq, and meanmapq coverage=$(cut -f 6 ~{samplename}.cov.txt | tail -n 1) depth=$(cut -f 7 ~{samplename}.cov.txt | tail -n 1) meanbaseq=$(cut -f 8 ~{samplename}.cov.txt | tail -n 1) @@ -33,6 +33,18 @@ task stats_n_coverage { echo $depth | tee DEPTH echo $meanbaseq | tee MEANBASEQ echo $meanmapq | tee MEANMAPQ + + # Parsing flagstat for total and mapped reads + total_reads=$(grep "in total" ~{samplename}.flagstat.txt | cut -d " " -f 1) + mapped_reads=$(grep "mapped (" ~{samplename}.flagstat.txt | cut -d " " -f 1) + + if [ -z "$total_reads" ] ; then total_reads="1" ; fi # avoid division by zero + if [ -z "$mapped_reads" ] ; then mapped_reads="0" ; fi + + # Calculate percentage of mapped reads + percentage_mapped_reads=$(echo "scale=2; ($mapped_reads / $total_reads) * 100" | bc) + + echo $percentage_mapped_reads | tee PERCENTAGE_MAPPED_READS >>> output { String date = read_string("DATE") @@ -45,6 +57,7 @@ task stats_n_coverage { Float depth = read_string("DEPTH") Float meanbaseq = read_string("MEANBASEQ") Float meanmapq = read_string("MEANMAPQ") + Float percentage_mapped_reads = read_string("PERCENTAGE_MAPPED_READS") } runtime { docker: docker diff --git a/workflows/utilities/wf_ivar_consensus.wdl b/workflows/utilities/wf_ivar_consensus.wdl index 1da0fb5f6..67249a366 100644 --- a/workflows/utilities/wf_ivar_consensus.wdl +++ b/workflows/utilities/wf_ivar_consensus.wdl @@ -106,6 +106,10 @@ workflow ivar_consensus { String meanmapq_trim = select_first([stats_n_coverage_primtrim.meanmapq, stats_n_coverage.meanmapq,""]) String assembly_mean_coverage = select_first([stats_n_coverage_primtrim.depth, stats_n_coverage.depth,""]) String samtools_version_stats = stats_n_coverage.samtools_version + + # Assembly metrics + Float percentage_mapped_reads = select_first([stats_n_coverage_primtrim.percentage_mapped_reads, stats_n_coverage.percentage_mapped_reads, 0.0]) + } } \ No newline at end of file From 93e2d897dcad39177e19661ca416bc8b0b9ff764 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Tue, 10 Sep 2024 07:49:56 -0500 Subject: [PATCH 02/40] update mapped reads trying read_float --- .../basic_statistics/task_assembly_metrics.wdl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl index d302d087b..aac611526 100644 --- a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl +++ b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl @@ -38,12 +38,15 @@ task stats_n_coverage { total_reads=$(grep "in total" ~{samplename}.flagstat.txt | cut -d " " -f 1) mapped_reads=$(grep "mapped (" ~{samplename}.flagstat.txt | cut -d " " -f 1) - if [ -z "$total_reads" ] ; then total_reads="1" ; fi # avoid division by zero - if [ -z "$mapped_reads" ] ; then mapped_reads="0" ; fi + if [ -z "$total_reads" ]; then total_reads="1"; fi # avoid division by zero + if [ -z "$mapped_reads" ]; then mapped_reads="0"; fi # Calculate percentage of mapped reads percentage_mapped_reads=$(echo "scale=2; ($mapped_reads / $total_reads) * 100" | bc) + # Default to 0.0 if calculation fails + if [ -z "$percentage_mapped_reads" ]; then percentage_mapped_reads="0.0"; fi + echo $percentage_mapped_reads | tee PERCENTAGE_MAPPED_READS >>> output { @@ -57,7 +60,7 @@ task stats_n_coverage { Float depth = read_string("DEPTH") Float meanbaseq = read_string("MEANBASEQ") Float meanmapq = read_string("MEANMAPQ") - Float percentage_mapped_reads = read_string("PERCENTAGE_MAPPED_READS") + Float percentage_mapped_reads = read_float("PERCENTAGE_MAPPED_READS") } runtime { docker: docker From 5298eff6a2289477b0ff3cc3de95aebbc58be5ab Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Tue, 10 Sep 2024 08:13:20 -0500 Subject: [PATCH 03/40] get read numbers from stats file --- .../basic_statistics/task_assembly_metrics.wdl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl index aac611526..bc7d060fb 100644 --- a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl +++ b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl @@ -34,19 +34,21 @@ task stats_n_coverage { echo $meanbaseq | tee MEANBASEQ echo $meanmapq | tee MEANMAPQ - # Parsing flagstat for total and mapped reads - total_reads=$(grep "in total" ~{samplename}.flagstat.txt | cut -d " " -f 1) - mapped_reads=$(grep "mapped (" ~{samplename}.flagstat.txt | cut -d " " -f 1) + # Parsing stats.txt for total and mapped reads + total_reads=$(grep "^SN" ~{samplename}.stats.txt | grep "raw total sequences:" | cut -f 3) + mapped_reads=$(grep "^SN" ~{samplename}.stats.txt | grep "reads mapped:" | cut -f 3) - if [ -z "$total_reads" ]; then total_reads="1"; fi # avoid division by zero + # Check for empty values and set defaults to avoid errors + if [ -z "$total_reads" ]; then total_reads="1"; fi # Avoid division by zero if [ -z "$mapped_reads" ]; then mapped_reads="0"; fi - # Calculate percentage of mapped reads + # Calculate the percentage of mapped reads percentage_mapped_reads=$(echo "scale=2; ($mapped_reads / $total_reads) * 100" | bc) - # Default to 0.0 if calculation fails + # If the percentage calculation fails, default to 0.0 if [ -z "$percentage_mapped_reads" ]; then percentage_mapped_reads="0.0"; fi + # Output the result echo $percentage_mapped_reads | tee PERCENTAGE_MAPPED_READS >>> output { From 85a8cc8e3e69e61c46bc444710895e3c863c4518 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Tue, 10 Sep 2024 08:36:54 -0500 Subject: [PATCH 04/40] get read numbers from stats filev2 --- .../basic_statistics/task_assembly_metrics.wdl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl index bc7d060fb..fb889bbef 100644 --- a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl +++ b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl @@ -35,8 +35,8 @@ task stats_n_coverage { echo $meanmapq | tee MEANMAPQ # Parsing stats.txt for total and mapped reads - total_reads=$(grep "^SN" ~{samplename}.stats.txt | grep "raw total sequences:" | cut -f 3) - mapped_reads=$(grep "^SN" ~{samplename}.stats.txt | grep "reads mapped:" | cut -f 3) + total_reads=$(grep "^SN" ~{samplename}.stats.txt | grep "raw total sequences:" | awk '{print $4}') + mapped_reads=$(grep "^SN" ~{samplename}.stats.txt | grep "reads mapped:" | awk '{print $4}') # Check for empty values and set defaults to avoid errors if [ -z "$total_reads" ]; then total_reads="1"; fi # Avoid division by zero @@ -50,6 +50,16 @@ task stats_n_coverage { # Output the result echo $percentage_mapped_reads | tee PERCENTAGE_MAPPED_READS + + # Output header row (for CSV) + echo "Statistic,Value" > ~{samplename}_metrics.csv + + # Output each statistic as a row + echo "Coverage,$coverage" >> ~{samplename}_metrics.csv + echo "Depth,$depth" >> ~{samplename}_metrics.csv + echo "Mean Base Quality,$meanbaseq" >> ~{samplename}_metrics.csv + echo "Mean Mapping Quality,$meanmapq" >> ~{samplename}_metrics.csv + echo "Percentage Mapped Reads,$percentage_mapped_reads" >> ~{samplename}_metrics.csv >>> output { String date = read_string("DATE") @@ -63,6 +73,8 @@ task stats_n_coverage { Float meanbaseq = read_string("MEANBASEQ") Float meanmapq = read_string("MEANMAPQ") Float percentage_mapped_reads = read_float("PERCENTAGE_MAPPED_READS") + File metrics_csv = "~{samplename}_metrics.csv" + } runtime { docker: docker From 9c2cb18591c43273fbc03e3042afa0d5aec77c71 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Tue, 10 Sep 2024 10:26:22 -0500 Subject: [PATCH 05/40] change from bc to awk for calculation --- .../basic_statistics/task_assembly_metrics.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl index fb889bbef..ef74f74bb 100644 --- a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl +++ b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl @@ -35,8 +35,8 @@ task stats_n_coverage { echo $meanmapq | tee MEANMAPQ # Parsing stats.txt for total and mapped reads - total_reads=$(grep "^SN" ~{samplename}.stats.txt | grep "raw total sequences:" | awk '{print $4}') - mapped_reads=$(grep "^SN" ~{samplename}.stats.txt | grep "reads mapped:" | awk '{print $4}') + total_reads=$(grep "^SN" ~{samplename}.stats.txt | grep "raw total sequences:" | cut -f 3) + mapped_reads=$(grep "^SN" ~{samplename}.stats.txt | grep "reads mapped:" | cut -f 3) # Check for empty values and set defaults to avoid errors if [ -z "$total_reads" ]; then total_reads="1"; fi # Avoid division by zero From c57edc4ac3a3021d9988b41395848719c3324814 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Tue, 10 Sep 2024 10:50:36 -0500 Subject: [PATCH 06/40] update awk --- .../quality_control/basic_statistics/task_assembly_metrics.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl index ef74f74bb..d38b4d88c 100644 --- a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl +++ b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl @@ -43,7 +43,7 @@ task stats_n_coverage { if [ -z "$mapped_reads" ]; then mapped_reads="0"; fi # Calculate the percentage of mapped reads - percentage_mapped_reads=$(echo "scale=2; ($mapped_reads / $total_reads) * 100" | bc) + percentage_mapped_reads=$(awk "BEGIN {printf \"%.2f\", ($mapped_reads / $total_reads) * 100}") # If the percentage calculation fails, default to 0.0 if [ -z "$percentage_mapped_reads" ]; then percentage_mapped_reads="0.0"; fi From 76b62ce498567f7bccdf86502fd47dcc2fa7bba0 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Tue, 10 Sep 2024 11:10:42 -0500 Subject: [PATCH 07/40] metric output txt instead of csv --- .../basic_statistics/task_assembly_metrics.wdl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl index d38b4d88c..816bf337e 100644 --- a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl +++ b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl @@ -51,15 +51,16 @@ task stats_n_coverage { # Output the result echo $percentage_mapped_reads | tee PERCENTAGE_MAPPED_READS + #output all metrics in one txt file # Output header row (for CSV) - echo "Statistic,Value" > ~{samplename}_metrics.csv + echo "Statistic Value" > ~{samplename}_metrics.txt # Output each statistic as a row - echo "Coverage,$coverage" >> ~{samplename}_metrics.csv - echo "Depth,$depth" >> ~{samplename}_metrics.csv - echo "Mean Base Quality,$meanbaseq" >> ~{samplename}_metrics.csv - echo "Mean Mapping Quality,$meanmapq" >> ~{samplename}_metrics.csv - echo "Percentage Mapped Reads,$percentage_mapped_reads" >> ~{samplename}_metrics.csv + echo "Coverage $coverage" >> ~{samplename}_metrics.txt + echo "Depth $depth" >> ~{samplename}_metrics.txt + echo "Mean Base Quality $meanbaseq" >> ~{samplename}_metrics.txt + echo "Mean Mapping Quality $meanmapq" >> ~{samplename}_metrics.txt + echo "Percentage Mapped Reads $percentage_mapped_reads" >> ~{samplename}_metrics.txt >>> output { String date = read_string("DATE") @@ -73,7 +74,7 @@ task stats_n_coverage { Float meanbaseq = read_string("MEANBASEQ") Float meanmapq = read_string("MEANMAPQ") Float percentage_mapped_reads = read_float("PERCENTAGE_MAPPED_READS") - File metrics_csv = "~{samplename}_metrics.csv" + File metrics_txt = "~{samplename}_metrics.txt" } runtime { From e6afcdd213f514829863b3809ffee9deb93a45be Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Tue, 10 Sep 2024 12:57:08 -0500 Subject: [PATCH 08/40] reswitchack to read_string output t --- .../quality_control/basic_statistics/task_assembly_metrics.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl index 816bf337e..8323dd3f1 100644 --- a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl +++ b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl @@ -73,7 +73,7 @@ task stats_n_coverage { Float depth = read_string("DEPTH") Float meanbaseq = read_string("MEANBASEQ") Float meanmapq = read_string("MEANMAPQ") - Float percentage_mapped_reads = read_float("PERCENTAGE_MAPPED_READS") + Float percentage_mapped_reads = read_string("PERCENTAGE_MAPPED_READS") File metrics_txt = "~{samplename}_metrics.txt" } From dba965b95268738d91fc004adcafd38e7fa4511e Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 16 Sep 2024 15:46:04 -0500 Subject: [PATCH 09/40] percentage mapped reads based on trimmed bam file theiacov_ont --- workflows/theiacov/wf_theiacov_ont.wdl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 984934062..c8091d1d7 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -139,6 +139,11 @@ workflow theiacov_ont { samplename = samplename, bamfile = consensus.trim_sorted_bam } + # Calculate percentage of mapped reads using assembled_reads_percent task + call assembly_metrics.assembled_reads_percent { + input: + bam = consensus.trim_sorted_bam + } } # assembly via irma for flu organisms if (organism_parameters.standardized_organism == "flu") { @@ -427,5 +432,6 @@ workflow theiacov_ont { # QC_Check Results String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard + } } \ No newline at end of file From dd7b3e70931df1a933cf464dc0c07fc112a85709 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Tue, 17 Sep 2024 09:27:07 -0500 Subject: [PATCH 10/40] update theiacov-ont for mapped reads --- workflows/theiacov/wf_theiacov_ont.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index c8091d1d7..c6b1c6f89 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -432,6 +432,6 @@ workflow theiacov_ont { # QC_Check Results String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard - + Float? percentage_mapped_reads = assembled_reads_percent.percentage_mapped } } \ No newline at end of file From f6d5393cefc6473c2e513762bf16611202343a45 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Wed, 2 Oct 2024 18:53:36 -0500 Subject: [PATCH 11/40] pass output ivar cons mapped reads to wf for terra output --- workflows/theiacov/wf_theiacov_illumina_pe.wdl | 3 ++- workflows/theiacov/wf_theiacov_illumina_se.wdl | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/workflows/theiacov/wf_theiacov_illumina_pe.wdl b/workflows/theiacov/wf_theiacov_illumina_pe.wdl index ece3c201a..213f1f80e 100644 --- a/workflows/theiacov/wf_theiacov_illumina_pe.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_pe.wdl @@ -439,6 +439,7 @@ workflow theiacov_illumina_pe { # QC_Check Results String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard - + # Capture percentage_mapped_reads from ivar_consensus task + Float? percentage_mapped_reads = ivar_consensus.percentage_mapped_reads } } diff --git a/workflows/theiacov/wf_theiacov_illumina_se.wdl b/workflows/theiacov/wf_theiacov_illumina_se.wdl index fa1044c24..0f730024f 100644 --- a/workflows/theiacov/wf_theiacov_illumina_se.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_se.wdl @@ -317,5 +317,7 @@ workflow theiacov_illumina_se { # QC_Check Results String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard + # Capture percentage_mapped_reads from ivar_consensus task + Float? percentage_mapped_reads = ivar_consensus.percentage_mapped_reads } } \ No newline at end of file From f888c65e2bbff903d58e6b644f53186a0172f471 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Wed, 2 Oct 2024 20:52:36 -0500 Subject: [PATCH 12/40] perc mapped reads output flu track PE, ONT and clearlabs and doc update --- docs/workflows/genomic_characterization/theiacov.md | 1 + workflows/theiacov/wf_theiacov_clearlabs.wdl | 8 ++++++++ workflows/theiacov/wf_theiacov_illumina_pe.wdl | 10 +++++++++- workflows/theiacov/wf_theiacov_ont.wdl | 8 ++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/docs/workflows/genomic_characterization/theiacov.md b/docs/workflows/genomic_characterization/theiacov.md index 5849d2f08..5ce7225b4 100644 --- a/docs/workflows/genomic_characterization/theiacov.md +++ b/docs/workflows/genomic_characterization/theiacov.md @@ -1104,6 +1104,7 @@ All TheiaCoV Workflows (not TheiaCoV_FASTA_Batch) | pangolin_notes | String | Lineage notes as determined by Pangolin | CL, FASTA, ONT, PE, SE | | pangolin_versions | String | All Pangolin software and database versions | CL, FASTA, ONT, PE, SE | | percent_reference_coverage | Float | Percent coverage of the reference genome after performing primer trimming; calculated as assembly_length_unambiguous / length of the reference genome (SC2: 29903) x 100 | CL, FASTA, ONT, PE, SE | +| percentage_mapped_reads | String |Percentage of reads that successfully aligned to the reference genome. This value is calculated by number of mapped reads / total number of reads x 100. | ONT, PE, SE | | primer_bed_name | String | Name of the primer bed files used for primer trimming | CL, ONT, PE, SE | | primer_trimmed_read_percent | Float | Percentage of read data with primers trimmed as determined by iVar trim | PE, SE | | qc_check | String | The results of the QC Check task | CL, FASTA, ONT, PE, SE | diff --git a/workflows/theiacov/wf_theiacov_clearlabs.wdl b/workflows/theiacov/wf_theiacov_clearlabs.wdl index 8932c983f..6050447e6 100644 --- a/workflows/theiacov/wf_theiacov_clearlabs.wdl +++ b/workflows/theiacov/wf_theiacov_clearlabs.wdl @@ -86,6 +86,12 @@ workflow theiacov_clearlabs { samplename = samplename, bamfile = consensus.sorted_bam } + # Call to calculate percentage of mapped reads + call assembly_metrics.assembled_reads_percent { + input: + bam = consensus.sorted_bam + } + call consensus_qc_task.consensus_qc { input: assembly_fasta = consensus.consensus_seq, @@ -207,6 +213,8 @@ workflow theiacov_clearlabs { Int number_Degenerate = consensus_qc.number_Degenerate Int number_Total = consensus_qc.number_Total Float percent_reference_coverage = consensus_qc.percent_reference_coverage + # Percentage mapped reads + Float? percentage_mapped_reads = assembled_reads_percent.percentage_mapped # SC2 specific coverage outputs Float? sc2_s_gene_mean_coverage = gene_coverage.sc2_s_gene_depth Float? sc2_s_gene_percent_coverage = gene_coverage.sc2_s_gene_percent_coverage diff --git a/workflows/theiacov/wf_theiacov_illumina_pe.wdl b/workflows/theiacov/wf_theiacov_illumina_pe.wdl index 213f1f80e..dbe5fe3c4 100644 --- a/workflows/theiacov/wf_theiacov_illumina_pe.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_pe.wdl @@ -144,7 +144,7 @@ workflow theiacov_illumina_pe { trim_primers = trim_primers } } - # perform flu-specific tasks + # for flu organisms call flu_track if (organism_parameters.standardized_organism == "flu") { call run_flu_track.flu_track { input: @@ -155,6 +155,12 @@ workflow theiacov_illumina_pe { seq_method = seq_method } } + # Calculate the percentage of mapped reads for flu samples + call assembly_metrics.stats_n_coverage as flu_stats_n_coverage { + input: + samplename = samplename, + bamfile = select_first([flu_track.irma_ha_bam, flu_track.irma_na_bam]) + } if (defined(ivar_consensus.assembly_fasta) || defined(flu_track.irma_assembly_fasta)) { call consensus_qc_task.consensus_qc { input: @@ -429,6 +435,8 @@ workflow theiacov_illumina_pe { String? flu_oseltamivir_resistance = flu_track.flu_oseltamivir_resistance String? flu_xofluza_resistance = flu_track.flu_xofluza_resistance String? flu_zanamivir_resistance = flu_track.flu_zanamivir_resistance + # Flu Track Outputs stats n coverage + Float? flu_percentage_mapped_reads = flu_stats_n_coverage.percentage_mapped_reads # HIV Outputs String? quasitools_version = quasitools_illumina_pe.quasitools_version String? quasitools_date = quasitools_illumina_pe.quasitools_date diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index c6b1c6f89..b8241c4aa 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -154,6 +154,11 @@ workflow theiacov_ont { standardized_organism = organism_parameters.standardized_organism, seq_method = seq_method } + call assembly_metrics.stats_n_coverage as flu_stats_n_coverage { + input: + samplename = samplename, + bamfile = select_first([flu_track.irma_ha_bam, flu_track.irma_na_bam]) + } } # nanoplot for basic QC metrics call nanoplot_task.nanoplot as nanoplot_raw { @@ -432,6 +437,9 @@ workflow theiacov_ont { # QC_Check Results String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard + # Percentage mapped reads for other organisms (non-flu) Float? percentage_mapped_reads = assembled_reads_percent.percentage_mapped + # Flu mapped reads outputs + Float? percentage_mapped_reads_flu = flu_stats_n_coverage.percentage_mapped_reads } } \ No newline at end of file From 92733eaf8cd58aeaef0b482ff62750f502c33464 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Wed, 2 Oct 2024 21:16:49 -0500 Subject: [PATCH 13/40] updated namings outputs cov_ONT and removed extra call assembly metrics --- workflows/theiacov/wf_theiacov_ont.wdl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index b8241c4aa..035db7e9e 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -139,11 +139,6 @@ workflow theiacov_ont { samplename = samplename, bamfile = consensus.trim_sorted_bam } - # Calculate percentage of mapped reads using assembled_reads_percent task - call assembly_metrics.assembled_reads_percent { - input: - bam = consensus.trim_sorted_bam - } } # assembly via irma for flu organisms if (organism_parameters.standardized_organism == "flu") { @@ -437,9 +432,11 @@ workflow theiacov_ont { # QC_Check Results String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard - # Percentage mapped reads for other organisms (non-flu) + # Percentage mapped reads for non-flu Float? percentage_mapped_reads = assembled_reads_percent.percentage_mapped + Float? percentage_mapped_reads_trimmed = stats_n_coverage_primtrim.percentage_mapped_reads # Flu mapped reads outputs - Float? percentage_mapped_reads_flu = flu_stats_n_coverage.percentage_mapped_reads + Float? percentage_mapped_reads_flu = stats_n_coverage.percentage_mapped_reads + } } \ No newline at end of file From 72db28547f64bd9947ec10abbb726af1b5db306e Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Thu, 3 Oct 2024 09:00:53 -0500 Subject: [PATCH 14/40] change naming output stat n coverage task --- workflows/theiacov/wf_theiacov_ont.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 035db7e9e..6d693024e 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -433,10 +433,9 @@ workflow theiacov_ont { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Percentage mapped reads for non-flu - Float? percentage_mapped_reads = assembled_reads_percent.percentage_mapped + Float? percentage_mapped_reads = stats_n_coverage.percentage_mapped_reads Float? percentage_mapped_reads_trimmed = stats_n_coverage_primtrim.percentage_mapped_reads # Flu mapped reads outputs Float? percentage_mapped_reads_flu = stats_n_coverage.percentage_mapped_reads - } } \ No newline at end of file From 69b4666d23eba8ccff54b983e1d73ca7e365b0bd Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Thu, 3 Oct 2024 09:02:24 -0500 Subject: [PATCH 15/40] update flu mapped reads perc variable name --- workflows/theiacov/wf_theiacov_ont.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 6d693024e..2b525ddcb 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -436,6 +436,6 @@ workflow theiacov_ont { Float? percentage_mapped_reads = stats_n_coverage.percentage_mapped_reads Float? percentage_mapped_reads_trimmed = stats_n_coverage_primtrim.percentage_mapped_reads # Flu mapped reads outputs - Float? percentage_mapped_reads_flu = stats_n_coverage.percentage_mapped_reads + Float? percentage_mapped_reads_flu = flu_stats_n_coverage.percentage_mapped_reads } } \ No newline at end of file From b9320d7da6950fbdfee7f81023f9812c896d1018 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Thu, 3 Oct 2024 09:39:05 -0500 Subject: [PATCH 16/40] make theiacov_ont conditional output flu mapped reads --- workflows/theiacov/wf_theiacov_ont.wdl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 2b525ddcb..6c766560c 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -432,10 +432,15 @@ workflow theiacov_ont { # QC_Check Results String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard - # Percentage mapped reads for non-flu - Float? percentage_mapped_reads = stats_n_coverage.percentage_mapped_reads - Float? percentage_mapped_reads_trimmed = stats_n_coverage_primtrim.percentage_mapped_reads - # Flu mapped reads outputs - Float? percentage_mapped_reads_flu = flu_stats_n_coverage.percentage_mapped_reads + # Non-flu specific outputs + if (organism_parameters.standardized_organism != "flu") { + Float? percentage_mapped_reads = stats_n_coverage.percentage_mapped_reads + Float? percentage_mapped_reads_trimmed = stats_n_coverage_primtrim.percentage_mapped_reads + } + + # Flu-specific outputs + if (organism_parameters.standardized_organism == "flu") { + Float? percentage_mapped_reads_flu = flu_stats_n_coverage.percentage_mapped_reads + } } } \ No newline at end of file From 0aeb2721d83bbf1087afe1317497b0e134b860f5 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Thu, 3 Oct 2024 09:55:26 -0500 Subject: [PATCH 17/40] wdl does not support if cond in output change to select first --- workflows/theiacov/wf_theiacov_ont.wdl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 6c766560c..5f8ec5300 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -433,14 +433,11 @@ workflow theiacov_ont { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Non-flu specific outputs - if (organism_parameters.standardized_organism != "flu") { - Float? percentage_mapped_reads = stats_n_coverage.percentage_mapped_reads - Float? percentage_mapped_reads_trimmed = stats_n_coverage_primtrim.percentage_mapped_reads - } + # Non-flu specific outputs + Float percentage_mapped_reads = select_first([stats_n_coverage.percentage_mapped_reads, ""]) + Float percentage_mapped_reads_trimmed = select_first([stats_n_coverage_primtrim.percentage_mapped_reads, ""]) # Flu-specific outputs - if (organism_parameters.standardized_organism == "flu") { - Float? percentage_mapped_reads_flu = flu_stats_n_coverage.percentage_mapped_reads - } + Float percentage_mapped_reads_flu = select_first([flu_stats_n_coverage.percentage_mapped_reads, ""]) } } \ No newline at end of file From 4846b1e4b7ad7e0f18c88ae67fbd5c40fbae4447 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Thu, 3 Oct 2024 09:55:44 -0500 Subject: [PATCH 18/40] wdl does not support if cond in output change to select first --- workflows/theiacov/wf_theiacov_ont.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 5f8ec5300..7c34a744b 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -434,10 +434,10 @@ workflow theiacov_ont { File? qc_standard = qc_check_task.qc_standard # Non-flu specific outputs # Non-flu specific outputs - Float percentage_mapped_reads = select_first([stats_n_coverage.percentage_mapped_reads, ""]) - Float percentage_mapped_reads_trimmed = select_first([stats_n_coverage_primtrim.percentage_mapped_reads, ""]) + Float percentage_mapped_reads = select_first([stats_n_coverage.percentage_mapped_reads, 0.0]) + Float percentage_mapped_reads_trimmed = select_first([stats_n_coverage_primtrim.percentage_mapped_reads, 0.0]) # Flu-specific outputs - Float percentage_mapped_reads_flu = select_first([flu_stats_n_coverage.percentage_mapped_reads, ""]) + Float percentage_mapped_reads_flu = select_first([flu_stats_n_coverage.percentage_mapped_reads, 0.0]) } } \ No newline at end of file From 33ab0268ed6126d5c63e56632d22c2e508ebe4dd Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Thu, 3 Oct 2024 10:44:09 -0500 Subject: [PATCH 19/40] combine flu and non flu into same mapped reads output --- workflows/theiacov/wf_theiacov_ont.wdl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 7c34a744b..3cda26887 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -434,10 +434,6 @@ workflow theiacov_ont { File? qc_standard = qc_check_task.qc_standard # Non-flu specific outputs # Non-flu specific outputs - Float percentage_mapped_reads = select_first([stats_n_coverage.percentage_mapped_reads, 0.0]) - Float percentage_mapped_reads_trimmed = select_first([stats_n_coverage_primtrim.percentage_mapped_reads, 0.0]) - - # Flu-specific outputs - Float percentage_mapped_reads_flu = select_first([flu_stats_n_coverage.percentage_mapped_reads, 0.0]) + Float percentage_mapped_reads = select_first([flu_stats_n_coverage.percentage_mapped_reads, stats_n_coverage.percentage_mapped_reads, 0.0]) } } \ No newline at end of file From 7070c626306ee9fc13785ff9a83bb8e81d9a1a6e Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 21 Oct 2024 11:35:23 -0500 Subject: [PATCH 20/40] correct assembled reads call --- workflows/theiacov/wf_theiacov_clearlabs.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_clearlabs.wdl b/workflows/theiacov/wf_theiacov_clearlabs.wdl index 6050447e6..2f059a8db 100644 --- a/workflows/theiacov/wf_theiacov_clearlabs.wdl +++ b/workflows/theiacov/wf_theiacov_clearlabs.wdl @@ -86,10 +86,11 @@ workflow theiacov_clearlabs { samplename = samplename, bamfile = consensus.sorted_bam } - # Call to calculate percentage of mapped reads - call assembly_metrics.assembled_reads_percent { + # Use stats_n_coverage to calculate percentage_mapped_reads + call assembly_metrics.stats_n_coverage as stats_n_coverage { input: - bam = consensus.sorted_bam + samplename = samplename, + bamfile = consensus.sorted_bam } call consensus_qc_task.consensus_qc { From e23b19a3caba62551b1a4db7b1a409a38f58e362 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 21 Oct 2024 12:18:16 -0500 Subject: [PATCH 21/40] update mdsums --- tests/workflows/theiacov/test_wf_theiacov_ont.yml | 4 ++-- workflows/theiacov/wf_theiacov_clearlabs.wdl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/workflows/theiacov/test_wf_theiacov_ont.yml b/tests/workflows/theiacov/test_wf_theiacov_ont.yml index 1772e16b4..fd0cd375c 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_ont.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_ont.yml @@ -232,7 +232,7 @@ md5sum: 32c0be4fb7f3030bf9c74c0a836d4f2e - path: miniwdl_run/call-raw_check_reads/work/_miniwdl_inputs/0/ont.fastq.gz - path: miniwdl_run/call-stats_n_coverage/command - md5sum: 93414eacbbb9d7c4813bb54a8a507078 + md5sum: 194a32a05aca8867a2d400465838945d - path: miniwdl_run/call-stats_n_coverage/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-stats_n_coverage/outputs.json @@ -257,7 +257,7 @@ - path: miniwdl_run/call-stats_n_coverage/work/ont.flagstat.txt - path: miniwdl_run/call-stats_n_coverage/work/ont.stats.txt - path: miniwdl_run/call-stats_n_coverage_primtrim/command - md5sum: c6e7de70dfdbb1858229e44777b84110 + md5sum: 7246100df31a6508fe16757463e7c5a2 - path: miniwdl_run/call-stats_n_coverage_primtrim/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-stats_n_coverage_primtrim/outputs.json diff --git a/workflows/theiacov/wf_theiacov_clearlabs.wdl b/workflows/theiacov/wf_theiacov_clearlabs.wdl index 2f059a8db..1a57c6d47 100644 --- a/workflows/theiacov/wf_theiacov_clearlabs.wdl +++ b/workflows/theiacov/wf_theiacov_clearlabs.wdl @@ -215,7 +215,7 @@ workflow theiacov_clearlabs { Int number_Total = consensus_qc.number_Total Float percent_reference_coverage = consensus_qc.percent_reference_coverage # Percentage mapped reads - Float? percentage_mapped_reads = assembled_reads_percent.percentage_mapped + Float percentage_mapped_reads = stats_n_coverage.percentage_mapped_reads # SC2 specific coverage outputs Float? sc2_s_gene_mean_coverage = gene_coverage.sc2_s_gene_depth Float? sc2_s_gene_percent_coverage = gene_coverage.sc2_s_gene_percent_coverage From b79097309190e115d3fb3668b885fd14855b385e Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 21 Oct 2024 12:24:30 -0500 Subject: [PATCH 22/40] update clearlabs for statncov call --- workflows/theiacov/wf_theiacov_clearlabs.wdl | 7 ------- 1 file changed, 7 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_clearlabs.wdl b/workflows/theiacov/wf_theiacov_clearlabs.wdl index 1a57c6d47..0368bef95 100644 --- a/workflows/theiacov/wf_theiacov_clearlabs.wdl +++ b/workflows/theiacov/wf_theiacov_clearlabs.wdl @@ -86,13 +86,6 @@ workflow theiacov_clearlabs { samplename = samplename, bamfile = consensus.sorted_bam } - # Use stats_n_coverage to calculate percentage_mapped_reads - call assembly_metrics.stats_n_coverage as stats_n_coverage { - input: - samplename = samplename, - bamfile = consensus.sorted_bam - } - call consensus_qc_task.consensus_qc { input: assembly_fasta = consensus.consensus_seq, From 58ac2ee6df49d9379626fd9aa9cc954a8462e84d Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 21 Oct 2024 12:45:23 -0500 Subject: [PATCH 23/40] float? --- workflows/theiacov/wf_theiacov_clearlabs.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/theiacov/wf_theiacov_clearlabs.wdl b/workflows/theiacov/wf_theiacov_clearlabs.wdl index 0368bef95..a21665d7a 100644 --- a/workflows/theiacov/wf_theiacov_clearlabs.wdl +++ b/workflows/theiacov/wf_theiacov_clearlabs.wdl @@ -208,7 +208,7 @@ workflow theiacov_clearlabs { Int number_Total = consensus_qc.number_Total Float percent_reference_coverage = consensus_qc.percent_reference_coverage # Percentage mapped reads - Float percentage_mapped_reads = stats_n_coverage.percentage_mapped_reads + Float? percentage_mapped_reads = stats_n_coverage.percentage_mapped_reads # SC2 specific coverage outputs Float? sc2_s_gene_mean_coverage = gene_coverage.sc2_s_gene_depth Float? sc2_s_gene_percent_coverage = gene_coverage.sc2_s_gene_percent_coverage From 45568de706de31b4b4b05747de83f8ae415cff88 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 21 Oct 2024 15:58:10 -0500 Subject: [PATCH 24/40] mdsums and pe wf update flu irma defined --- tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml | 4 ++-- tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml | 4 ++-- workflows/theiacov/wf_theiacov_illumina_pe.wdl | 6 ++++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml b/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml index 48ffe30c9..0188cc8f2 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml @@ -318,7 +318,7 @@ - path: miniwdl_run/call-pangolin4/work/clearlabs.pangolin_report.csv md5sum: 151390c419b00ca44eb83e2bbfb96996 - path: miniwdl_run/call-stats_n_coverage/command - md5sum: 51da320ddc7de2ffeb263f0ddd85ced6 + md5sum: 976ccf8be4e09ec5df581c834b5d5792 - path: miniwdl_run/call-stats_n_coverage/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-stats_n_coverage/outputs.json @@ -350,7 +350,7 @@ - path: miniwdl_run/call-stats_n_coverage/work/clearlabs.stats.txt md5sum: bfed5344c91ce6f4db1f688cac0a3ab9 - path: miniwdl_run/call-stats_n_coverage_primtrim/command - md5sum: a84f90b8877babe54bf8c068d244fbe8 + md5sum: 5b211043f0514f94bbfd7a282d50d7c4 - path: miniwdl_run/call-stats_n_coverage_primtrim/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-stats_n_coverage_primtrim/outputs.json diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml index 9af5b61c9..fb5311a2c 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml @@ -157,7 +157,7 @@ md5sum: 603c3cbc771ca910b96d3c032aafe7c9 # stats n coverage primer trim - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/command - md5sum: 67cac223adcf059a9dfaa9f28ed34f68 + md5sum: 4945b2eb1bb9c419cdb6da7e78e7229d - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/outputs.json @@ -240,7 +240,7 @@ md5sum: 03c5ecf22fdfdb6b240ac3880281a056 # stats n coverage - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/command - md5sum: 3dacccb252429a0ff46c079a75a09377 + md5sum: 798a1a5da8115c0e3a2f0f7895816e62 - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/outputs.json diff --git a/workflows/theiacov/wf_theiacov_illumina_pe.wdl b/workflows/theiacov/wf_theiacov_illumina_pe.wdl index dbe5fe3c4..f89975200 100644 --- a/workflows/theiacov/wf_theiacov_illumina_pe.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_pe.wdl @@ -156,10 +156,12 @@ workflow theiacov_illumina_pe { } } # Calculate the percentage of mapped reads for flu samples - call assembly_metrics.stats_n_coverage as flu_stats_n_coverage { - input: + if (defined(flu_track.irma_ha_bam) || defined(flu_track.irma_na_bam)) { + call assembly_metrics.stats_n_coverage as flu_stats_n_coverage { + input: samplename = samplename, bamfile = select_first([flu_track.irma_ha_bam, flu_track.irma_na_bam]) + } } if (defined(ivar_consensus.assembly_fasta) || defined(flu_track.irma_assembly_fasta)) { call consensus_qc_task.consensus_qc { From 54541117a1df69aa0fe6a8f3cbb9ffd3af300a23 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 21 Oct 2024 16:12:20 -0500 Subject: [PATCH 25/40] mdsum pe --- tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml index b1bb6da13..0feefc993 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml @@ -205,7 +205,7 @@ md5sum: 511e696afe25f8b96a84d68ec5a8af8a # stats n coverage primer trim - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/command - md5sum: 260c3887be6d99b18caf6d3914c5737f + md5sum: eb1b72ffa6068223720fe4afd745cdf9 - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/outputs.json @@ -288,7 +288,7 @@ md5sum: 6c63395a125f8618334b8af2de4e2d88 # stats n coverage - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/command - md5sum: 1ffac4cc3e9bdd84a0f9228e8e5ca5d9 + md5sum: 6b32ff212b080a57198d3f1b464bddda - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/outputs.json From 6493640fafbf8612679e84ebc3f1fe47fc906405 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Tue, 22 Oct 2024 13:21:09 -0500 Subject: [PATCH 26/40] move to flue track --- workflows/theiacov/wf_theiacov_illumina_pe.wdl | 12 +----------- workflows/theiacov/wf_theiacov_ont.wdl | 11 +++-------- workflows/utilities/wf_flu_track.wdl | 11 ++++++++++- 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_illumina_pe.wdl b/workflows/theiacov/wf_theiacov_illumina_pe.wdl index f89975200..f8ba2e025 100644 --- a/workflows/theiacov/wf_theiacov_illumina_pe.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_pe.wdl @@ -155,14 +155,6 @@ workflow theiacov_illumina_pe { seq_method = seq_method } } - # Calculate the percentage of mapped reads for flu samples - if (defined(flu_track.irma_ha_bam) || defined(flu_track.irma_na_bam)) { - call assembly_metrics.stats_n_coverage as flu_stats_n_coverage { - input: - samplename = samplename, - bamfile = select_first([flu_track.irma_ha_bam, flu_track.irma_na_bam]) - } - } if (defined(ivar_consensus.assembly_fasta) || defined(flu_track.irma_assembly_fasta)) { call consensus_qc_task.consensus_qc { input: @@ -437,8 +429,6 @@ workflow theiacov_illumina_pe { String? flu_oseltamivir_resistance = flu_track.flu_oseltamivir_resistance String? flu_xofluza_resistance = flu_track.flu_xofluza_resistance String? flu_zanamivir_resistance = flu_track.flu_zanamivir_resistance - # Flu Track Outputs stats n coverage - Float? flu_percentage_mapped_reads = flu_stats_n_coverage.percentage_mapped_reads # HIV Outputs String? quasitools_version = quasitools_illumina_pe.quasitools_version String? quasitools_date = quasitools_illumina_pe.quasitools_date @@ -450,6 +440,6 @@ workflow theiacov_illumina_pe { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Capture percentage_mapped_reads from ivar_consensus task - Float? percentage_mapped_reads = ivar_consensus.percentage_mapped_reads + Float percentage_mapped_reads = select_first([ivar_consensus.percentage_mapped_reads, flu_stats_n_coverage.percentage_mapped_reads]) } } diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 3cda26887..983ba7516 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -149,12 +149,7 @@ workflow theiacov_ont { standardized_organism = organism_parameters.standardized_organism, seq_method = seq_method } - call assembly_metrics.stats_n_coverage as flu_stats_n_coverage { - input: - samplename = samplename, - bamfile = select_first([flu_track.irma_ha_bam, flu_track.irma_na_bam]) - } - } + } # nanoplot for basic QC metrics call nanoplot_task.nanoplot as nanoplot_raw { input: @@ -433,7 +428,7 @@ workflow theiacov_ont { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Non-flu specific outputs - # Non-flu specific outputs - Float percentage_mapped_reads = select_first([flu_stats_n_coverage.percentage_mapped_reads, stats_n_coverage.percentage_mapped_reads, 0.0]) + Float percentage_mapped_reads = select_first([run_flu_track.flu_stats_n_coverage.percentage_mapped_reads, stats_n_coverage.percentage_mapped_reads]) + } } \ No newline at end of file diff --git a/workflows/utilities/wf_flu_track.wdl b/workflows/utilities/wf_flu_track.wdl index 71e8e952d..8d05543d9 100644 --- a/workflows/utilities/wf_flu_track.wdl +++ b/workflows/utilities/wf_flu_track.wdl @@ -112,6 +112,14 @@ workflow flu_track { docker = assembly_metrics_docker } } + # Calculate the percentage of mapped reads for flu samples + if (defined(irma.seg_ha_bam) || defined(irma.seg_na_bam)) { + call assembly_metrics.stats_n_coverage as flu_stats_n_coverage { + input: + samplename = samplename, + bamfile = select_first([irma.seg_ha_bam, irma.seg_na_bam]) + } + } # combine HA & NA assembly coverages String ha_na_assembly_coverage_string = "HA: " + select_first([ha_assembly_coverage.depth, ""]) + ", NA: " + select_first([na_assembly_coverage.depth, ""]) # ABRICATE will run if assembly is provided, or was generated with IRMA @@ -249,7 +257,8 @@ workflow flu_track { File? irma_mp_segment_fasta = irma.seg_mp_assembly File? irma_np_segment_fasta = irma.seg_np_assembly File? irma_ns_segment_fasta = irma.seg_ns_assembly - + # calulate mapped reads percentage for flu samples + Float? flu_percentage_mapped_reads = flu_stats_n_coverage.percentage_mapped_reads Array[File] irma_assemblies = irma.irma_assemblies Array[File] irma_vcfs = irma.irma_vcfs Array[File] irma_bams = irma.irma_bams From 56a4a306a3b17853ffa9fec9a3856bcd1e0cdb8f Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Tue, 22 Oct 2024 13:36:43 -0500 Subject: [PATCH 27/40] tidy output pe and ont --- workflows/theiacov/wf_theiacov_illumina_pe.wdl | 4 ++-- workflows/theiacov/wf_theiacov_ont.wdl | 2 +- workflows/utilities/wf_flu_track.wdl | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_illumina_pe.wdl b/workflows/theiacov/wf_theiacov_illumina_pe.wdl index f8ba2e025..8ed61ac74 100644 --- a/workflows/theiacov/wf_theiacov_illumina_pe.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_pe.wdl @@ -439,7 +439,7 @@ workflow theiacov_illumina_pe { # QC_Check Results String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard - # Capture percentage_mapped_reads from ivar_consensus task - Float percentage_mapped_reads = select_first([ivar_consensus.percentage_mapped_reads, flu_stats_n_coverage.percentage_mapped_reads]) + # Capture percentage_mapped_reads from ivar_consensus task or flu_track task + Float percentage_mapped_reads = select_first([ivar_consensus.percentage_mapped_reads, flu_track.percentage_mapped_reads]) } } diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 983ba7516..7ae253db7 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -428,7 +428,7 @@ workflow theiacov_ont { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Non-flu specific outputs - Float percentage_mapped_reads = select_first([run_flu_track.flu_stats_n_coverage.percentage_mapped_reads, stats_n_coverage.percentage_mapped_reads]) + Float percentage_mapped_reads = select_first([flu_track.percentage_mapped_reads, stats_n_coverage.percentage_mapped_reads]) } } \ No newline at end of file diff --git a/workflows/utilities/wf_flu_track.wdl b/workflows/utilities/wf_flu_track.wdl index 8d05543d9..13c6bc36e 100644 --- a/workflows/utilities/wf_flu_track.wdl +++ b/workflows/utilities/wf_flu_track.wdl @@ -257,14 +257,14 @@ workflow flu_track { File? irma_mp_segment_fasta = irma.seg_mp_assembly File? irma_np_segment_fasta = irma.seg_np_assembly File? irma_ns_segment_fasta = irma.seg_ns_assembly - # calulate mapped reads percentage for flu samples - Float? flu_percentage_mapped_reads = flu_stats_n_coverage.percentage_mapped_reads Array[File] irma_assemblies = irma.irma_assemblies Array[File] irma_vcfs = irma.irma_vcfs Array[File] irma_bams = irma.irma_bams File? irma_ha_bam = irma.seg_ha_bam File? irma_na_bam = irma.seg_na_bam String ha_na_assembly_coverage = ha_na_assembly_coverage_string + # calulate mapped reads percentage for flu samples + Float? percentage_mapped_reads = flu_stats_n_coverage.percentage_mapped_reads # GenoFLU outputs String? genoflu_version = genoflu.genoflu_version String? genoflu_genotype = genoflu.genoflu_genotype From 108612b6d6b75eab47344bc042fa0e9fb28241e6 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Thu, 24 Oct 2024 16:33:27 -0500 Subject: [PATCH 28/40] update strings and provide default values --- workflows/theiacov/wf_theiacov_clearlabs.wdl | 2 +- workflows/theiacov/wf_theiacov_illumina_pe.wdl | 2 +- workflows/theiacov/wf_theiacov_illumina_se.wdl | 2 +- workflows/theiacov/wf_theiacov_ont.wdl | 3 +-- workflows/utilities/wf_ivar_consensus.wdl | 4 +--- 5 files changed, 5 insertions(+), 8 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_clearlabs.wdl b/workflows/theiacov/wf_theiacov_clearlabs.wdl index a21665d7a..0368bef95 100644 --- a/workflows/theiacov/wf_theiacov_clearlabs.wdl +++ b/workflows/theiacov/wf_theiacov_clearlabs.wdl @@ -208,7 +208,7 @@ workflow theiacov_clearlabs { Int number_Total = consensus_qc.number_Total Float percent_reference_coverage = consensus_qc.percent_reference_coverage # Percentage mapped reads - Float? percentage_mapped_reads = stats_n_coverage.percentage_mapped_reads + Float percentage_mapped_reads = stats_n_coverage.percentage_mapped_reads # SC2 specific coverage outputs Float? sc2_s_gene_mean_coverage = gene_coverage.sc2_s_gene_depth Float? sc2_s_gene_percent_coverage = gene_coverage.sc2_s_gene_percent_coverage diff --git a/workflows/theiacov/wf_theiacov_illumina_pe.wdl b/workflows/theiacov/wf_theiacov_illumina_pe.wdl index 8ed61ac74..3aa1694ef 100644 --- a/workflows/theiacov/wf_theiacov_illumina_pe.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_pe.wdl @@ -440,6 +440,6 @@ workflow theiacov_illumina_pe { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Capture percentage_mapped_reads from ivar_consensus task or flu_track task - Float percentage_mapped_reads = select_first([ivar_consensus.percentage_mapped_reads, flu_track.percentage_mapped_reads]) + String percentage_mapped_reads = select_first([ivar_consensus.percentage_mapped_reads, flu_track.percentage_mapped_reads,""]) } } diff --git a/workflows/theiacov/wf_theiacov_illumina_se.wdl b/workflows/theiacov/wf_theiacov_illumina_se.wdl index 0f730024f..4ae59f5dc 100644 --- a/workflows/theiacov/wf_theiacov_illumina_se.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_se.wdl @@ -318,6 +318,6 @@ workflow theiacov_illumina_se { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Capture percentage_mapped_reads from ivar_consensus task - Float? percentage_mapped_reads = ivar_consensus.percentage_mapped_reads + String? percentage_mapped_reads = ivar_consensus.percentage_mapped_reads } } \ No newline at end of file diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 7ae253db7..e1614d013 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -428,7 +428,6 @@ workflow theiacov_ont { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Non-flu specific outputs - Float percentage_mapped_reads = select_first([flu_track.percentage_mapped_reads, stats_n_coverage.percentage_mapped_reads]) - + String percentage_mapped_reads = select_first([flu_track.percentage_mapped_reads, stats_n_coverage.percentage_mapped_reads,""]) } } \ No newline at end of file diff --git a/workflows/utilities/wf_ivar_consensus.wdl b/workflows/utilities/wf_ivar_consensus.wdl index f412b23ff..4e7112943 100644 --- a/workflows/utilities/wf_ivar_consensus.wdl +++ b/workflows/utilities/wf_ivar_consensus.wdl @@ -155,8 +155,6 @@ workflow ivar_consensus { String samtools_version_stats = stats_n_coverage.samtools_version # Assembly metrics - Float percentage_mapped_reads = select_first([stats_n_coverage_primtrim.percentage_mapped_reads, stats_n_coverage.percentage_mapped_reads, 0.0]) - - + String percentage_mapped_reads = select_first([stats_n_coverage_primtrim.percentage_mapped_reads, stats_n_coverage.percentage_mapped_reads,""]) } } From 842691aa0cf90ff2f88f4ea9c2c9122912111301 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Thu, 31 Oct 2024 15:27:33 -0500 Subject: [PATCH 29/40] clean tab/spaces echo --- .../basic_statistics/task_assembly_metrics.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl index 8323dd3f1..359eb4ea1 100644 --- a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl +++ b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl @@ -53,14 +53,14 @@ task stats_n_coverage { #output all metrics in one txt file # Output header row (for CSV) - echo "Statistic Value" > ~{samplename}_metrics.txt + echo -e "Statistic\tValue" > ~{samplename}_metrics.txt # Output each statistic as a row - echo "Coverage $coverage" >> ~{samplename}_metrics.txt - echo "Depth $depth" >> ~{samplename}_metrics.txt - echo "Mean Base Quality $meanbaseq" >> ~{samplename}_metrics.txt - echo "Mean Mapping Quality $meanmapq" >> ~{samplename}_metrics.txt - echo "Percentage Mapped Reads $percentage_mapped_reads" >> ~{samplename}_metrics.txt + echo -e "Coverage\t$coverage" >> ~{samplename}_metrics.txt + echo -e "Depth\t$depth" >> ~{samplename}_metrics.txt + echo -e "Mean Base Quality\t$meanbaseq" >> ~{samplename}_metrics.txt + echo -e "Mean Mapping Quality\t$meanmapq" >> ~{samplename}_metrics.txt + echo -e "Percentage Mapped Reads\t$percentage_mapped_reads" >> ~{samplename}_metrics.txt >>> output { String date = read_string("DATE") From cd32e7497c2962d9d30da6468f04912c2bab2967 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Thu, 31 Oct 2024 15:48:17 -0500 Subject: [PATCH 30/40] my fav commit mdsums! --- tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml | 4 ++-- tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml | 4 ++-- tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml | 4 ++-- tests/workflows/theiacov/test_wf_theiacov_ont.yml | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml b/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml index 0188cc8f2..e42083370 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml @@ -318,7 +318,7 @@ - path: miniwdl_run/call-pangolin4/work/clearlabs.pangolin_report.csv md5sum: 151390c419b00ca44eb83e2bbfb96996 - path: miniwdl_run/call-stats_n_coverage/command - md5sum: 976ccf8be4e09ec5df581c834b5d5792 + md5sum: 218acd850fc53050a663b1cdc856bdbe - path: miniwdl_run/call-stats_n_coverage/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-stats_n_coverage/outputs.json @@ -350,7 +350,7 @@ - path: miniwdl_run/call-stats_n_coverage/work/clearlabs.stats.txt md5sum: bfed5344c91ce6f4db1f688cac0a3ab9 - path: miniwdl_run/call-stats_n_coverage_primtrim/command - md5sum: 5b211043f0514f94bbfd7a282d50d7c4 + md5sum: eb5a87024061836b8c244b0cd050bb6c - path: miniwdl_run/call-stats_n_coverage_primtrim/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-stats_n_coverage_primtrim/outputs.json diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml index 0feefc993..0d8d12351 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml @@ -205,7 +205,7 @@ md5sum: 511e696afe25f8b96a84d68ec5a8af8a # stats n coverage primer trim - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/command - md5sum: eb1b72ffa6068223720fe4afd745cdf9 + md5sum: cd4e1a2a33c10e9513e01c95eb641bdc - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/outputs.json @@ -288,7 +288,7 @@ md5sum: 6c63395a125f8618334b8af2de4e2d88 # stats n coverage - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/command - md5sum: 6b32ff212b080a57198d3f1b464bddda + md5sum: b4e5c57051c06349d24b01d00d383733 - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/outputs.json diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml index fb5311a2c..d79e610c7 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml @@ -157,7 +157,7 @@ md5sum: 603c3cbc771ca910b96d3c032aafe7c9 # stats n coverage primer trim - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/command - md5sum: 4945b2eb1bb9c419cdb6da7e78e7229d + md5sum: e9aa93dda1866b935220de3a60a28455 - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/outputs.json @@ -240,7 +240,7 @@ md5sum: 03c5ecf22fdfdb6b240ac3880281a056 # stats n coverage - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/command - md5sum: 798a1a5da8115c0e3a2f0f7895816e62 + md5sum: f1c13608d58271a8dcd476f442fc181c - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/outputs.json diff --git a/tests/workflows/theiacov/test_wf_theiacov_ont.yml b/tests/workflows/theiacov/test_wf_theiacov_ont.yml index fd0cd375c..233cd6e4e 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_ont.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_ont.yml @@ -232,7 +232,7 @@ md5sum: 32c0be4fb7f3030bf9c74c0a836d4f2e - path: miniwdl_run/call-raw_check_reads/work/_miniwdl_inputs/0/ont.fastq.gz - path: miniwdl_run/call-stats_n_coverage/command - md5sum: 194a32a05aca8867a2d400465838945d + md5sum: 22559ad4e4c2af9c55c563551e95e819 - path: miniwdl_run/call-stats_n_coverage/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-stats_n_coverage/outputs.json @@ -257,7 +257,7 @@ - path: miniwdl_run/call-stats_n_coverage/work/ont.flagstat.txt - path: miniwdl_run/call-stats_n_coverage/work/ont.stats.txt - path: miniwdl_run/call-stats_n_coverage_primtrim/command - md5sum: 7246100df31a6508fe16757463e7c5a2 + md5sum: ac19abff17f090e6da63cee8b831b212 - path: miniwdl_run/call-stats_n_coverage_primtrim/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-stats_n_coverage_primtrim/outputs.json From f3b316fd013cabc0141c7f4d0b15d65b99db8744 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Mon, 4 Nov 2024 10:25:22 -0500 Subject: [PATCH 31/40] remove extra space because it was bothering me --- .../basic_statistics/task_assembly_metrics.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl index 359eb4ea1..762db23cf 100644 --- a/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl +++ b/tasks/quality_control/basic_statistics/task_assembly_metrics.wdl @@ -56,7 +56,7 @@ task stats_n_coverage { echo -e "Statistic\tValue" > ~{samplename}_metrics.txt # Output each statistic as a row - echo -e "Coverage\t$coverage" >> ~{samplename}_metrics.txt + echo -e "Coverage\t$coverage" >> ~{samplename}_metrics.txt echo -e "Depth\t$depth" >> ~{samplename}_metrics.txt echo -e "Mean Base Quality\t$meanbaseq" >> ~{samplename}_metrics.txt echo -e "Mean Mapping Quality\t$meanmapq" >> ~{samplename}_metrics.txt @@ -86,4 +86,4 @@ task stats_n_coverage { preemptible: 0 maxRetries: 3 } -} \ No newline at end of file +} From 979d63639298c6b250c5f43a6e7eaa2dc21b415b Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Mon, 4 Nov 2024 10:26:07 -0500 Subject: [PATCH 32/40] adding a space because i am crazy --- docs/workflows/genomic_characterization/theiacov.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/workflows/genomic_characterization/theiacov.md b/docs/workflows/genomic_characterization/theiacov.md index 5ce7225b4..a3002e125 100644 --- a/docs/workflows/genomic_characterization/theiacov.md +++ b/docs/workflows/genomic_characterization/theiacov.md @@ -1104,7 +1104,7 @@ All TheiaCoV Workflows (not TheiaCoV_FASTA_Batch) | pangolin_notes | String | Lineage notes as determined by Pangolin | CL, FASTA, ONT, PE, SE | | pangolin_versions | String | All Pangolin software and database versions | CL, FASTA, ONT, PE, SE | | percent_reference_coverage | Float | Percent coverage of the reference genome after performing primer trimming; calculated as assembly_length_unambiguous / length of the reference genome (SC2: 29903) x 100 | CL, FASTA, ONT, PE, SE | -| percentage_mapped_reads | String |Percentage of reads that successfully aligned to the reference genome. This value is calculated by number of mapped reads / total number of reads x 100. | ONT, PE, SE | +| percentage_mapped_reads | String | Percentage of reads that successfully aligned to the reference genome. This value is calculated by number of mapped reads / total number of reads x 100. | ONT, PE, SE | | primer_bed_name | String | Name of the primer bed files used for primer trimming | CL, ONT, PE, SE | | primer_trimmed_read_percent | Float | Percentage of read data with primers trimmed as determined by iVar trim | PE, SE | | qc_check | String | The results of the QC Check task | CL, FASTA, ONT, PE, SE | From 25a2da2b52a5e46e1b726a28786e37ec426681ec Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 4 Nov 2024 10:13:54 -0600 Subject: [PATCH 33/40] update flu output --- workflows/utilities/wf_flu_track.wdl | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/workflows/utilities/wf_flu_track.wdl b/workflows/utilities/wf_flu_track.wdl index 13c6bc36e..6a2b9fd9b 100644 --- a/workflows/utilities/wf_flu_track.wdl +++ b/workflows/utilities/wf_flu_track.wdl @@ -112,16 +112,12 @@ workflow flu_track { docker = assembly_metrics_docker } } - # Calculate the percentage of mapped reads for flu samples - if (defined(irma.seg_ha_bam) || defined(irma.seg_na_bam)) { - call assembly_metrics.stats_n_coverage as flu_stats_n_coverage { - input: - samplename = samplename, - bamfile = select_first([irma.seg_ha_bam, irma.seg_na_bam]) - } - } # combine HA & NA assembly coverages String ha_na_assembly_coverage_string = "HA: " + select_first([ha_assembly_coverage.depth, ""]) + ", NA: " + select_first([na_assembly_coverage.depth, ""]) + + # combine HA & NA mapped reads percentages + String ha_na_mapped_reads_percentage_string = "HA: " + select_first([ha_assembly_coverage.percentage_mapped_reads, ""]) + ", NA: " + select_first([na_assembly_coverage.percentage_mapped_reads, ""]) + # ABRICATE will run if assembly is provided, or was generated with IRMA if (defined(irma.irma_assemblies) && defined(irma.irma_assembly_fasta)){ call abricate.abricate_flu { @@ -264,7 +260,7 @@ workflow flu_track { File? irma_na_bam = irma.seg_na_bam String ha_na_assembly_coverage = ha_na_assembly_coverage_string # calulate mapped reads percentage for flu samples - Float? percentage_mapped_reads = flu_stats_n_coverage.percentage_mapped_reads + String percentage_mapped_reads = flu_stats_n_coverage.percentage_mapped_reads # GenoFLU outputs String? genoflu_version = genoflu.genoflu_version String? genoflu_genotype = genoflu.genoflu_genotype From 23ead27b7a30a2150e64468a6c3d68e422ef6569 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 4 Nov 2024 10:40:42 -0600 Subject: [PATCH 34/40] update outputs --- workflows/theiacov/wf_theiacov_illumina_pe.wdl | 2 +- workflows/theiacov/wf_theiacov_ont.wdl | 2 +- workflows/utilities/wf_flu_track.wdl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_illumina_pe.wdl b/workflows/theiacov/wf_theiacov_illumina_pe.wdl index 3aa1694ef..5f5e5b651 100644 --- a/workflows/theiacov/wf_theiacov_illumina_pe.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_pe.wdl @@ -440,6 +440,6 @@ workflow theiacov_illumina_pe { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Capture percentage_mapped_reads from ivar_consensus task or flu_track task - String percentage_mapped_reads = select_first([ivar_consensus.percentage_mapped_reads, flu_track.percentage_mapped_reads,""]) + String percentage_mapped_reads = select_first([ivar_consensus.percentage_mapped_reads, flu_track.percentage_mapped_reads, ""]) } } diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index e1614d013..48db535e5 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -428,6 +428,6 @@ workflow theiacov_ont { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Non-flu specific outputs - String percentage_mapped_reads = select_first([flu_track.percentage_mapped_reads, stats_n_coverage.percentage_mapped_reads,""]) + String percentage_mapped_reads = select_first([stats_n_coverage.percentage_mapped_reads, flu_track.ha_na_mapped_reads_percentage_string, ""]) } } \ No newline at end of file diff --git a/workflows/utilities/wf_flu_track.wdl b/workflows/utilities/wf_flu_track.wdl index 6a2b9fd9b..ddfd5e902 100644 --- a/workflows/utilities/wf_flu_track.wdl +++ b/workflows/utilities/wf_flu_track.wdl @@ -260,7 +260,7 @@ workflow flu_track { File? irma_na_bam = irma.seg_na_bam String ha_na_assembly_coverage = ha_na_assembly_coverage_string # calulate mapped reads percentage for flu samples - String percentage_mapped_reads = flu_stats_n_coverage.percentage_mapped_reads + String percentage_mapped_reads = ha_na_mapped_reads_percentage_string # GenoFLU outputs String? genoflu_version = genoflu.genoflu_version String? genoflu_genotype = genoflu.genoflu_genotype From 8b8aea1393e9a08d251301c4186dd00e9a328e4e Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 4 Nov 2024 11:12:57 -0600 Subject: [PATCH 35/40] remove string in name --- workflows/theiacov/wf_theiacov_ont.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 48db535e5..fe36b78a3 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -428,6 +428,6 @@ workflow theiacov_ont { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Non-flu specific outputs - String percentage_mapped_reads = select_first([stats_n_coverage.percentage_mapped_reads, flu_track.ha_na_mapped_reads_percentage_string, ""]) + String percentage_mapped_reads = select_first([stats_n_coverage.percentage_mapped_reads, flu_track.ha_na_mapped_reads_percentage, ""]) } } \ No newline at end of file From 1b5882936054a0941ef1eb19e8f204546e43ba4c Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 4 Nov 2024 11:18:50 -0600 Subject: [PATCH 36/40] correctly name percentage mapped reads --- workflows/theiacov/wf_theiacov_ont.wdl | 2 +- workflows/utilities/wf_flu_track.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index fe36b78a3..264ef843f 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -428,6 +428,6 @@ workflow theiacov_ont { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Non-flu specific outputs - String percentage_mapped_reads = select_first([stats_n_coverage.percentage_mapped_reads, flu_track.ha_na_mapped_reads_percentage, ""]) + String percentage_mapped_reads = select_first([stats_n_coverage.percentage_mapped_reads, flu_track.ha_na_percentage_mapped_reads, ""]) } } \ No newline at end of file diff --git a/workflows/utilities/wf_flu_track.wdl b/workflows/utilities/wf_flu_track.wdl index ddfd5e902..84d906dcf 100644 --- a/workflows/utilities/wf_flu_track.wdl +++ b/workflows/utilities/wf_flu_track.wdl @@ -116,7 +116,7 @@ workflow flu_track { String ha_na_assembly_coverage_string = "HA: " + select_first([ha_assembly_coverage.depth, ""]) + ", NA: " + select_first([na_assembly_coverage.depth, ""]) # combine HA & NA mapped reads percentages - String ha_na_mapped_reads_percentage_string = "HA: " + select_first([ha_assembly_coverage.percentage_mapped_reads, ""]) + ", NA: " + select_first([na_assembly_coverage.percentage_mapped_reads, ""]) + String ha_na_percentage_mapped_reads = "HA: " + select_first([ha_assembly_coverage.percentage_mapped_reads, ""]) + ", NA: " + select_first([na_assembly_coverage.percentage_mapped_reads, ""]) # ABRICATE will run if assembly is provided, or was generated with IRMA if (defined(irma.irma_assemblies) && defined(irma.irma_assembly_fasta)){ From 85cc203d84d43dd34adfe720ec68c4975f0ef36d Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 4 Nov 2024 11:25:41 -0600 Subject: [PATCH 37/40] correct output in flu track wdl --- workflows/utilities/wf_flu_track.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/utilities/wf_flu_track.wdl b/workflows/utilities/wf_flu_track.wdl index 84d906dcf..6bb2f8c85 100644 --- a/workflows/utilities/wf_flu_track.wdl +++ b/workflows/utilities/wf_flu_track.wdl @@ -260,7 +260,7 @@ workflow flu_track { File? irma_na_bam = irma.seg_na_bam String ha_na_assembly_coverage = ha_na_assembly_coverage_string # calulate mapped reads percentage for flu samples - String percentage_mapped_reads = ha_na_mapped_reads_percentage_string + String percentage_mapped_reads = ha_na_percentage_mapped_reads # GenoFLU outputs String? genoflu_version = genoflu.genoflu_version String? genoflu_genotype = genoflu.genoflu_genotype From 5c5cc4f28e9400b5c6f5d74dbf0af924b414c8e0 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 4 Nov 2024 11:37:46 -0600 Subject: [PATCH 38/40] update mdsums --- tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml | 4 ++-- tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml | 4 ++-- tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml | 4 ++-- workflows/theiacov/wf_theiacov_ont.wdl | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml b/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml index e42083370..c8570f30f 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml @@ -318,7 +318,7 @@ - path: miniwdl_run/call-pangolin4/work/clearlabs.pangolin_report.csv md5sum: 151390c419b00ca44eb83e2bbfb96996 - path: miniwdl_run/call-stats_n_coverage/command - md5sum: 218acd850fc53050a663b1cdc856bdbe + md5sum: ac020678f99ac145b11d3dbc7b9fe9ba - path: miniwdl_run/call-stats_n_coverage/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-stats_n_coverage/outputs.json @@ -350,7 +350,7 @@ - path: miniwdl_run/call-stats_n_coverage/work/clearlabs.stats.txt md5sum: bfed5344c91ce6f4db1f688cac0a3ab9 - path: miniwdl_run/call-stats_n_coverage_primtrim/command - md5sum: eb5a87024061836b8c244b0cd050bb6c + md5sum: 2974f886e1959cd5eaae5e495c91f7cc - path: miniwdl_run/call-stats_n_coverage_primtrim/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-stats_n_coverage_primtrim/outputs.json diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml index 0d8d12351..011dc1b2e 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml @@ -205,7 +205,7 @@ md5sum: 511e696afe25f8b96a84d68ec5a8af8a # stats n coverage primer trim - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/command - md5sum: cd4e1a2a33c10e9513e01c95eb641bdc + md5sum: 1c61b89c2a94e87518a6679a04885341 - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/outputs.json @@ -288,7 +288,7 @@ md5sum: 6c63395a125f8618334b8af2de4e2d88 # stats n coverage - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/command - md5sum: b4e5c57051c06349d24b01d00d383733 + md5sum: e49a297b1c0eb195a2acd80f00672668 - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/outputs.json diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml index d79e610c7..6e51077bb 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml @@ -157,7 +157,7 @@ md5sum: 603c3cbc771ca910b96d3c032aafe7c9 # stats n coverage primer trim - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/command - md5sum: e9aa93dda1866b935220de3a60a28455 + md5sum: affacdcfda48ad5e371a4510f19520bd - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/outputs.json @@ -240,7 +240,7 @@ md5sum: 03c5ecf22fdfdb6b240ac3880281a056 # stats n coverage - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/command - md5sum: f1c13608d58271a8dcd476f442fc181c + md5sum: cb4de0e459b3fada21bcf08a8dbea89f - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage/outputs.json diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 264ef843f..8f6a9311d 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -428,6 +428,6 @@ workflow theiacov_ont { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Non-flu specific outputs - String percentage_mapped_reads = select_first([stats_n_coverage.percentage_mapped_reads, flu_track.ha_na_percentage_mapped_reads, ""]) + String percentage_mapped_reads = select_first([stats_n_coverage.percentage_mapped_reads, flu_track.percentage_mapped_reads, ""]) } } \ No newline at end of file From 1888803c12990267e82d3349d521f6a99278d20b Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 4 Nov 2024 11:49:14 -0600 Subject: [PATCH 39/40] primtrim output --- workflows/theiacov/wf_theiacov_ont.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 8f6a9311d..7d8d29ad4 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -428,6 +428,6 @@ workflow theiacov_ont { String? qc_check = qc_check_task.qc_check File? qc_standard = qc_check_task.qc_standard # Non-flu specific outputs - String percentage_mapped_reads = select_first([stats_n_coverage.percentage_mapped_reads, flu_track.percentage_mapped_reads, ""]) + String percentage_mapped_reads = select_first([stats_n_coverage_primtrim.percentage_mapped_reads, stats_n_coverage.percentage_mapped_reads, flu_track.percentage_mapped_reads, ""]) } } \ No newline at end of file From e81d7b9eedb45034378b3d0b09f991a6b62bb535 Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Mon, 4 Nov 2024 12:05:31 -0600 Subject: [PATCH 40/40] another mdsum --- tests/workflows/theiacov/test_wf_theiacov_ont.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/workflows/theiacov/test_wf_theiacov_ont.yml b/tests/workflows/theiacov/test_wf_theiacov_ont.yml index 233cd6e4e..fdff89406 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_ont.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_ont.yml @@ -232,7 +232,7 @@ md5sum: 32c0be4fb7f3030bf9c74c0a836d4f2e - path: miniwdl_run/call-raw_check_reads/work/_miniwdl_inputs/0/ont.fastq.gz - path: miniwdl_run/call-stats_n_coverage/command - md5sum: 22559ad4e4c2af9c55c563551e95e819 + md5sum: fbd85e82af1bbfaa734a13a9c1394300 - path: miniwdl_run/call-stats_n_coverage/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-stats_n_coverage/outputs.json @@ -257,7 +257,7 @@ - path: miniwdl_run/call-stats_n_coverage/work/ont.flagstat.txt - path: miniwdl_run/call-stats_n_coverage/work/ont.stats.txt - path: miniwdl_run/call-stats_n_coverage_primtrim/command - md5sum: ac19abff17f090e6da63cee8b831b212 + md5sum: 3689a902aa96e8c132e6ef4946699e61 - path: miniwdl_run/call-stats_n_coverage_primtrim/inputs.json contains: ["bamfile", "samplename"] - path: miniwdl_run/call-stats_n_coverage_primtrim/outputs.json