From 5be343354f716d77e9e4a0fb4a2ec10eb3bc00a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?In=C3=AAs=20Mendes?= Date: Thu, 29 Aug 2024 16:20:10 +0100 Subject: [PATCH] [TheiaCoV_Illumina_SE] Fix kraken2 raw input and expose kraken2 dehosted outputs (#597) * make kraken 2 raw actually run on the raw reads; expose kraken2 dehosted outputs * update md5sums * update md5sum * kraken2 raw is now actually running on the raw * update CI --- .../workflows/theiacov/test_wf_theiacov_illumina_se.yml | 9 ++++----- .../theiaprok/test_wf_theiaprok_illumina_se.yml | 2 +- workflows/theiacov/wf_theiacov_illumina_se.wdl | 6 +++++- workflows/utilities/wf_read_QC_trim_se.wdl | 4 ++-- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml index 139cd3b39..37e1b629c 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml @@ -74,7 +74,7 @@ - path: miniwdl_run/call-read_QC_trim/call-fastq_scan_raw/work/VERSION # kraken2 - path: miniwdl_run/call-read_QC_trim/call-kraken2_theiacov_raw/command - md5sum: 0efd43fc9f7079a38e5b094245c97f59 + md5sum: ca22e45a62c5c26c4447cdafe75a26ab - path: miniwdl_run/call-read_QC_trim/call-kraken2_theiacov_raw/inputs.json contains: ["read1", "samplename"] - path: miniwdl_run/call-read_QC_trim/call-kraken2_theiacov_raw/outputs.json @@ -85,14 +85,13 @@ - path: miniwdl_run/call-read_QC_trim/call-kraken2_theiacov_raw/task.log contains: ["wdl", "theiacov_illumina_se", "kraken2_theiacov_raw", "done"] - path: miniwdl_run/call-read_QC_trim/call-kraken2_theiacov_raw/work/PERCENT_HUMAN - md5sum: 4fd4dcef994592f9865e9bc8807f32f4 + md5sum: 1576d5d341223ea9d44b0b8a213bb9da - path: miniwdl_run/call-read_QC_trim/call-kraken2_theiacov_raw/work/PERCENT_SC2 - md5sum: 73f7f6bf2257905cb4bee12d23247db3 + md5sum: 7cc2eb659e21f15fa902b11812eae1f6 - path: miniwdl_run/call-read_QC_trim/call-kraken2_theiacov_raw/work/PERCENT_TARGET_ORGANISM md5sum: 68b329da9893e34099c7d8ad5cb9c940 - - path: miniwdl_run/call-read_QC_trim/call-kraken2_theiacov_raw/work/_miniwdl_inputs/0/ERR6319327_1.clean.fastq.gz - path: miniwdl_run/call-read_QC_trim/call-kraken2_theiacov_raw/work/ERR6319327_kraken2_report.txt - md5sum: 3d1bab1c5040916642df5baf20a2d6bd + md5sum: 9a089b8920e55c9cc7bc8cd7d18f9a8e # clean read screen - path: miniwdl_run/call-clean_check_reads/command md5sum: aec6c57452ddff84c325601a780605d2 diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 95dc53249..ac3b0e150 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -598,6 +598,6 @@ - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 359bc59b671ef3121f6b4ac589f0c80e - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl - md5sum: 51c696b5d122eb1bc7e03d31ccc82cc6 + md5sum: d11bfe33fdd96eab28892be5a01c1c7d - path: miniwdl_run/workflow.log contains: ["wdl", "theiaprok_illumina_se", "NOTICE", "done"] diff --git a/workflows/theiacov/wf_theiacov_illumina_se.wdl b/workflows/theiacov/wf_theiacov_illumina_se.wdl index 91f0a84ed..fa1044c24 100644 --- a/workflows/theiacov/wf_theiacov_illumina_se.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_se.wdl @@ -232,12 +232,16 @@ workflow theiacov_illumina_se { File? read1_clean = read_QC_trim.read1_clean String? bbduk_docker = read_QC_trim.bbduk_docker # Read QC - kraken outputs + String? kraken_version = read_QC_trim.kraken_version Float? kraken_human = read_QC_trim.kraken_human Float? kraken_sc2 = read_QC_trim.kraken_sc2 String? kraken_target_organism = read_QC_trim.kraken_target_organism String? kraken_target_organism_name = read_QC_trim.kraken_target_organism_name - String? kraken_version = read_QC_trim.kraken_version File? kraken_report = read_QC_trim.kraken_report + Float? kraken_human_dehosted = read_QC_trim.kraken_human_dehosted + Float? kraken_sc2_dehosted = read_QC_trim.kraken_sc2_dehosted + String? kraken_target_organism_dehosted = read_QC_trim.kraken_target_organism_dehosted + File? kraken_report_dehosted = read_QC_trim.kraken_report_dehosted # Read Alignment - bwa outputs String? bwa_version = ivar_consensus.bwa_version String? samtools_version = ivar_consensus.samtools_version diff --git a/workflows/utilities/wf_read_QC_trim_se.wdl b/workflows/utilities/wf_read_QC_trim_se.wdl index 76aa3b625..d652014ce 100644 --- a/workflows/utilities/wf_read_QC_trim_se.wdl +++ b/workflows/utilities/wf_read_QC_trim_se.wdl @@ -99,7 +99,7 @@ workflow read_QC_trim_se { call kraken.kraken2_theiacov as kraken2_theiacov_raw { input: samplename = samplename, - read1 = bbduk_se.read1_clean, + read1 = read1, target_organism = target_organism } call kraken.kraken2_theiacov as kraken2_theiacov_dehosted { @@ -158,7 +158,7 @@ workflow read_QC_trim_se { File? fastqc_raw1_html = fastqc_raw.read1_fastqc_html File? fastqc_clean1_html = fastqc_clean.read1_fastqc_html - # kraken2 + # kraken2 - raw and dehosted String kraken_version = select_first([kraken2_theiacov_raw.version, kraken2_standalone.kraken2_version, ""]) Float? kraken_human = kraken2_theiacov_raw.percent_human Float? kraken_sc2 = kraken2_theiacov_raw.percent_sc2