From 7828f47734f80ba8b3b337a50ff25c04889d371b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?In=C3=AAs=20Mendes?= Date: Mon, 9 Oct 2023 19:01:21 +0100 Subject: [PATCH] KmerFinder to TheiaProk (#188) * skeleton on kmerfinder task * remove kmerfinder_db_name * still trying to get kmerfinder to run * is this working now?!? * first working version of kmerfinder task * parse top hit * make results output file optional * final version of task (for now..) * add kmerfinder_bacteria to theiaprok! * fix typo * pass empty file for tests - kmerfinder_db * fix input json * add kmerfinder query coverage to theiaprok SE, PE, FASTA and ONT * remove undeeded skip on kmerfinder (set to false by default) * fix ops * update md5sum (part1) * update md5sum (part2) * fiz sister typo * missed a file * update CI * add kmerfinder_template_coverage to theiaprok output * expose database name on kmerfinder outputs * update md5sum * fix bug - headers being outputed to datatable --- tasks/taxon_id/task_kmerfinder.wdl | 67 +++++++++++++++++++ tasks/utilities/task_broad_terra_tools.wdl | 16 ++++- .../theiaprok/wf_theiaprok_illumina_pe.json | 3 +- .../theiaprok/wf_theiaprok_illumina_se.json | 3 +- .../test_wf_theiaprok_illumina_pe.yml | 4 +- .../test_wf_theiaprok_illumina_se.yml | 4 +- workflows/theiaprok/wf_theiaprok_fasta.wdl | 26 ++++++- .../theiaprok/wf_theiaprok_illumina_pe.wdl | 26 ++++++- .../theiaprok/wf_theiaprok_illumina_se.wdl | 26 ++++++- workflows/theiaprok/wf_theiaprok_ont.wdl | 26 ++++++- 10 files changed, 185 insertions(+), 16 deletions(-) create mode 100644 tasks/taxon_id/task_kmerfinder.wdl diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl new file mode 100644 index 000000000..61b3bed78 --- /dev/null +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -0,0 +1,67 @@ +version 1.0 + +task kmerfinder_bacteria { + input { + File assembly + String samplename + File kmerfinder_db = "gs://theiagen-public-files-rp/terra/theiaprok-files/kmerfinder_bacteria_20230911.tar.gz" + String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/kmerfinder:3.0.2--hdfd78af_0" + Int memory = 32 + Int cpu = 4 + Int disk_size = 100 + String kmerfinder_args = "" + } + command <<< + # Decompress the kmerfinder bacterial database + mkdir db + tar -C ./db/ -xzvf ~{kmerfinder_db} + + # Run kmerfinder + kmerfinder.py \ + -db ./db/bacteria/bacteria.ATG \ + -tax ./db/bacteria/bacteria.tax \ + -i ~{assembly} \ + -o ~{samplename} \ + ~{kmerfinder_args} + + # parse outputs + if [ ! -f ~{samplename}/results.txt ]; then + PF="No hit detected in database" + QC="No hit detected in database" + TC="No hit detected in database" + else + PF="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 19)" + QC="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 6)" + TC="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 7)" + # String is empty or just contains the header + if [ "$PF" == "" ] || [ "$PF" == "Species" ]; then + PF="No hit detected in database" + QC="No hit detected in database" + TC="No hit detected in database" + fi + mv -v ~{samplename}/results.txt ~{samplename}_kmerfinder.tsv + fi + echo $PF | tee TOP_HIT + echo $QC | tee QC_METRIC + echo $TC | tee TEMPLATE_COVERAGE + + # extract database name + DB=$(basename ~{kmerfinder_db} | sed 's/\.tar\.gz$//') + echo $DB | tee DATABASE + >>> + output { + String kmerfinder_docker = docker + File? kmerfinder_results_tsv = "~{samplename}_kmerfinder.tsv" + String kmerfinder_top_hit = read_string("TOP_HIT") + String kmerfinder_query_coverage = read_string("QC_METRIC") + String kmerfinder_template_coverage = read_string("TEMPLATE_COVERAGE") + String kmerfinder_database = read_string("DATABASE") + } + runtime { + docker: docker + memory: "~{memory} GB" + cpu: cpu + disks: "local-disk ~{disk_size} SSD" + preemptible: 0 + } +} \ No newline at end of file diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index e21673a83..2ae737a5c 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -92,6 +92,12 @@ task export_taxon_tables { File? ani_output_tsv String? ani_top_species_match String? ani_mummer_version + String? kmerfinder_docker + File? kmerfinder_results_tsv + String? kmerfinder_top_hit + String? kmerfinder_query_coverage + String? kmerfinder_template_coverage + String? kmerfinder_database File? amrfinderplus_all_report File? amrfinderplus_amr_report File? amrfinderplus_stress_report @@ -169,7 +175,7 @@ task export_taxon_tables { String? lissero_serotype File? sistr_results File? sistr_allele_json - File? sister_allele_fasta + File? sistr_allele_fasta File? sistr_cgmlst String? sistr_version String? sistr_predicted_serotype @@ -495,7 +501,7 @@ task export_taxon_tables { "lissero_serotype": "~{lissero_serotype}", "sistr_results": "~{sistr_results}", "sistr_allele_json": "~{sistr_allele_json}", - "sister_allele_fasta": "~{sister_allele_fasta}", + "sistr_allele_fasta": "~{sistr_allele_fasta}", "sistr_cgmlst": "~{sistr_cgmlst}", "sistr_version": "~{sistr_version}", "sistr_predicted_serotype": "~{sistr_predicted_serotype}", @@ -589,6 +595,12 @@ task export_taxon_tables { "ani_output_tsv": "~{ani_output_tsv}", "ani_top_species_match": "~{ani_top_species_match}", "ani_mummer_version": "~{ani_mummer_version}", + "kmerfinder_docker": "~{kmerfinder_docker}", + "kmerfinder_results_tsv": "~{kmerfinder_results_tsv}", + "kmerfinder_top_hit": "~{kmerfinder_top_hit}", + "kmerfinder_query_coverage": "~{kmerfinder_query_coverage}", + "kmerfinder_template_coverage": "~{kmerfinder_template_coverage}", + "kmerfinder_database": "~{kmerfinder_database}", "resfinder_pheno_table": "~{resfinder_pheno_table}", "resfinder_pheno_table_species": "~{resfinder_pheno_table_species}", "resfinder_seqs": "~{resfinder_seqs}", diff --git a/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json b/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json index c5e3444a7..3dd186232 100644 --- a/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json +++ b/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json @@ -36,5 +36,6 @@ "theiaprok_illumina_pe.merlin_magic.cladetyper.ref_clade5_annotated" : "./tests/inputs/empty-for-test.txt", "theiaprok_illumina_pe.bakta.bakta_db" : "./tests/inputs/empty-for-test.txt", "theiaprok_illumina_pe.gambit.gambit_db_signatures" : "./tests/inputs/completely-empty-for-test.txt", - "theiaprok_illumina_pe.gambit.gambit_db_genomes" : "./tests/inputs/completely-empty-for-test.txt" + "theiaprok_illumina_pe.gambit.gambit_db_genomes" : "./tests/inputs/completely-empty-for-test.txt", + "theiaprok_illumina_pe.kmerfinder.kmerfinder_db" : "./tests/inputs/completely-empty-for-test.txt" } diff --git a/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json b/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json index 126792e6d..9e0c42ed7 100644 --- a/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json +++ b/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json @@ -35,5 +35,6 @@ "theiaprok_illumina_se.merlin_magic.cladetyper.ref_clade5_annotated" : "./tests/inputs/empty-for-test.txt", "theiaprok_illumina_se.bakta.bakta_db" : "./tests/inputs/empty-for-test.txt", "theiaprok_illumina_se.gambit.gambit_db_signatures" : "./tests/inputs/completely-empty-for-test.txt", - "theiaprok_illumina_se.gambit.gambit_db_genomes" : "./tests/inputs/completely-empty-for-test.txt" + "theiaprok_illumina_se.gambit.gambit_db_genomes" : "./tests/inputs/completely-empty-for-test.txt", + "theiaprok_illumina_se.kmerfinder.kmerfinder_db" : "./tests/inputs/completely-empty-for-test.txt" } diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index d5d7986c2..ee07b678f 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -632,9 +632,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 7cffaf4d159b65fbcf9091dd8477500a + md5sum: 43ef050bde1fb8755f38e697a1794918 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: ca8825e5cc5a6d910e0368fcb6992905 + md5sum: 6dc6c393281a19e8dcbcc15964b8e08a - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 2614fac622fa2035b80a7b220b1aed86 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 33bb9d8b1..5d4873a19 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -600,9 +600,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 7cffaf4d159b65fbcf9091dd8477500a + md5sum: 43ef050bde1fb8755f38e697a1794918 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl - md5sum: 627a157d962453229f619da37e03e43d + md5sum: 24bfd35867f4ae864364e24195bf7f6f - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 2614fac622fa2035b80a7b220b1aed86 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl diff --git a/workflows/theiaprok/wf_theiaprok_fasta.wdl b/workflows/theiaprok/wf_theiaprok_fasta.wdl index 3d47f0ef2..2c4ce9f15 100644 --- a/workflows/theiaprok/wf_theiaprok_fasta.wdl +++ b/workflows/theiaprok/wf_theiaprok_fasta.wdl @@ -5,6 +5,7 @@ import "../../tasks/quality_control/task_quast.wdl" as quast_task import "../../tasks/quality_control/task_busco.wdl" as busco_task import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task import "../../tasks/quality_control/task_mummer_ani.wdl" as ani_task +import "../../tasks/taxon_id/task_kmerfinder.wdl" as kmerfinder_task import "../../tasks/gene_typing/task_amrfinderplus.wdl" as amrfinderplus import "../../tasks/gene_typing/task_resfinder.wdl" as resfinder import "../../tasks/species_typing/task_ts_mlst.wdl" as ts_mlst_task @@ -35,6 +36,7 @@ workflow theiaprok_fasta { String terra_workspace="NA" # module options Boolean call_ani = false # by default do not call ANI task, but user has ability to enable this task if working with enteric pathogens or supply their own high-quality reference genome + Boolean call_kmerfinder = false Boolean call_resfinder = false String genome_annotation = "prokka" # options: "prokka" or "bakta" String? expected_taxon # allow user to provide organism (e.g. "Clostridioides_difficile") string to amrfinder. Useful when gambit does not predict the correct species @@ -66,6 +68,13 @@ workflow theiaprok_fasta { samplename = samplename } } + if (call_kmerfinder) { + call kmerfinder_task.kmerfinder_bacteria as kmerfinder { + input: + assembly = assembly_fasta, + samplename = samplename + } + } call amrfinderplus.amrfinderplus_nuc as amrfinderplus_task { input: assembly = assembly_fasta, @@ -166,6 +175,12 @@ workflow theiaprok_fasta { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + kmerfinder_docker = kmerfinder.kmerfinder_docker, + kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, + kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, + kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, + kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, + kmerfinder_database = kmerfinder.kmerfinder_database, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -243,7 +258,7 @@ workflow theiaprok_fasta { lissero_serotype = merlin_magic.lissero_serotype, sistr_results = merlin_magic.sistr_results, sistr_allele_json = merlin_magic.sistr_allele_json, - sister_allele_fasta = merlin_magic.sistr_allele_fasta, + sistr_allele_fasta = merlin_magic.sistr_allele_fasta, sistr_cgmlst = merlin_magic.sistr_cgmlst, sistr_version = merlin_magic.sistr_version, sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype, @@ -412,6 +427,13 @@ workflow theiaprok_fasta { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + # kmerfinder outputs + String? kmerfinder_docker = kmerfinder.kmerfinder_docker + File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv + String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit + String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage + String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage + String? kmerfinder_database = kmerfinder.kmerfinder_database # NCBI-AMRFinderPlus Outputs File amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report @@ -499,7 +521,7 @@ workflow theiaprok_fasta { # Salmonella Typing File? sistr_results = merlin_magic.sistr_results File? sistr_allele_json = merlin_magic.sistr_allele_json - File? sister_allele_fasta = merlin_magic.sistr_allele_fasta + File? sistr_allele_fasta = merlin_magic.sistr_allele_fasta File? sistr_cgmlst = merlin_magic.sistr_cgmlst String? sistr_version = merlin_magic.sistr_version String? sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index 46939b8ae..feae677c0 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -9,6 +9,7 @@ import "../../tasks/quality_control/task_screen.wdl" as screen import "../../tasks/quality_control/task_busco.wdl" as busco_task import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task import "../../tasks/quality_control/task_mummer_ani.wdl" as ani_task +import "../../tasks/taxon_id/task_kmerfinder.wdl" as kmerfinder_task import "../../tasks/gene_typing/task_amrfinderplus.wdl" as amrfinderplus import "../../tasks/gene_typing/task_resfinder.wdl" as resfinder import "../../tasks/species_typing/task_ts_mlst.wdl" as ts_mlst_task @@ -53,6 +54,7 @@ workflow theiaprok_illumina_pe { Int trim_window_size = 10 # module options Boolean call_ani = false # by default do not call ANI task, but user has ability to enable this task if working with enteric pathogens or supply their own high-quality reference genome + Boolean call_kmerfinder = false Boolean call_resfinder = false String genome_annotation = "prokka" # options: "prokka" or "bakta" String? expected_taxon # allow user to provide organism (e.g. "Clostridioides_difficile") string to amrfinder. Useful when gambit does not predict the correct species # qc check parameters @@ -142,6 +144,13 @@ workflow theiaprok_illumina_pe { samplename = samplename } } + if (call_kmerfinder) { + call kmerfinder_task.kmerfinder_bacteria as kmerfinder { + input: + assembly = shovill_pe.assembly_fasta, + samplename = samplename + } + } call amrfinderplus.amrfinderplus_nuc as amrfinderplus_task { input: assembly = shovill_pe.assembly_fasta, @@ -291,6 +300,12 @@ workflow theiaprok_illumina_pe { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + kmerfinder_docker = kmerfinder.kmerfinder_docker, + kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, + kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, + kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, + kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, + kmerfinder_database = kmerfinder.kmerfinder_database, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -368,7 +383,7 @@ workflow theiaprok_illumina_pe { lissero_serotype = merlin_magic.lissero_serotype, sistr_results = merlin_magic.sistr_results, sistr_allele_json = merlin_magic.sistr_allele_json, - sister_allele_fasta = merlin_magic.sistr_allele_fasta, + sistr_allele_fasta = merlin_magic.sistr_allele_fasta, sistr_cgmlst = merlin_magic.sistr_cgmlst, sistr_version = merlin_magic.sistr_version, sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype, @@ -606,6 +621,13 @@ workflow theiaprok_illumina_pe { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + # kmerfinder outputs + String? kmerfinder_docker = kmerfinder.kmerfinder_docker + File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv + String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit + String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage + String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage + String? kmerfinder_database = kmerfinder.kmerfinder_database # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report @@ -721,7 +743,7 @@ workflow theiaprok_illumina_pe { # Salmonella Typing File? sistr_results = merlin_magic.sistr_results File? sistr_allele_json = merlin_magic.sistr_allele_json - File? sister_allele_fasta = merlin_magic.sistr_allele_fasta + File? sistr_allele_fasta = merlin_magic.sistr_allele_fasta File? sistr_cgmlst = merlin_magic.sistr_cgmlst String? sistr_version = merlin_magic.sistr_version String? sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype diff --git a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl index 75cea72c9..8a269be90 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl @@ -9,6 +9,7 @@ import "../../tasks/quality_control/task_screen.wdl" as screen import "../../tasks/quality_control/task_busco.wdl" as busco_task import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task import "../../tasks/quality_control/task_mummer_ani.wdl" as ani_task +import "../../tasks/taxon_id/task_kmerfinder.wdl" as kmerfinder_task import "../../tasks/gene_typing/task_amrfinderplus.wdl" as amrfinderplus import "../../tasks/gene_typing/task_resfinder.wdl" as resfinder import "../../tasks/species_typing/task_ts_mlst.wdl" as ts_mlst_task @@ -52,6 +53,7 @@ workflow theiaprok_illumina_se { Int trim_window_size = 4 # module options Boolean call_ani = false # by default do not call ANI task, but user has ability to enable this task if working with enteric pathogens or supply their own high-quality reference genome + Boolean call_kmerfinder = false Boolean call_resfinder = false String genome_annotation = "prokka" # options: "prokka" or "bakta" String? expected_taxon # allow user to provide organism (e.g. "Clostridioides_difficile") string to amrfinder. Useful when gambit does not predict the correct species @@ -135,6 +137,13 @@ workflow theiaprok_illumina_se { samplename = samplename } } + if (call_kmerfinder) { + call kmerfinder_task.kmerfinder_bacteria as kmerfinder { + input: + assembly = shovill_se.assembly_fasta, + samplename = samplename + } + } call amrfinderplus.amrfinderplus_nuc as amrfinderplus_task { input: assembly = shovill_se.assembly_fasta, @@ -264,6 +273,12 @@ workflow theiaprok_illumina_se { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + kmerfinder_docker = kmerfinder.kmerfinder_docker, + kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, + kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, + kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, + kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, + kmerfinder_database = kmerfinder.kmerfinder_database, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -341,7 +356,7 @@ workflow theiaprok_illumina_se { lissero_serotype = merlin_magic.lissero_serotype, sistr_results = merlin_magic.sistr_results, sistr_allele_json = merlin_magic.sistr_allele_json, - sister_allele_fasta = merlin_magic.sistr_allele_fasta, + sistr_allele_fasta = merlin_magic.sistr_allele_fasta, sistr_cgmlst = merlin_magic.sistr_cgmlst, sistr_version = merlin_magic.sistr_version, sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype, @@ -558,6 +573,13 @@ workflow theiaprok_illumina_se { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + # kmerfinder outputs + String? kmerfinder_docker = kmerfinder.kmerfinder_docker + File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv + String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit + String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage + String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage + String? kmerfinder_database = kmerfinder.kmerfinder_database # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report @@ -673,7 +695,7 @@ workflow theiaprok_illumina_se { # Salmonella Typing File? sistr_results = merlin_magic.sistr_results File? sistr_allele_json = merlin_magic.sistr_allele_json - File? sister_allele_fasta = merlin_magic.sistr_allele_fasta + File? sistr_allele_fasta = merlin_magic.sistr_allele_fasta File? sistr_cgmlst = merlin_magic.sistr_cgmlst String? sistr_version = merlin_magic.sistr_version String? sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index 80bac5920..5254f8640 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -9,6 +9,7 @@ import "../../tasks/quality_control/task_screen.wdl" as screen_task import "../../tasks/quality_control/task_busco.wdl" as busco_task import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task import "../../tasks/quality_control/task_mummer_ani.wdl" as ani_task +import "../../tasks/taxon_id/task_kmerfinder.wdl" as kmerfinder_task import "../../tasks/gene_typing/task_amrfinderplus.wdl" as amrfinderplus_task import "../../tasks/gene_typing/task_resfinder.wdl" as resfinder_task import "../../tasks/species_typing/task_ts_mlst.wdl" as ts_mlst_task @@ -48,6 +49,7 @@ workflow theiaprok_ont { Int min_coverage = 5 # reduced from 10 because some institutions sequence at lower depth because of longer read length # module options Boolean call_ani = false # by default do not call ANI task, but user has ability to enable this task if working with enteric pathogens or supply their own high-quality reference genome + Boolean call_kmerfinder = false Boolean call_resfinder = false String genome_annotation = "prokka" # options: "prokka" or "bakta" String? expected_taxon # allow user to provide organism (e.g. "Clostridioides_difficile") string to amrfinder. Useful when gambit does not predict the correct species @@ -131,6 +133,13 @@ workflow theiaprok_ont { samplename = samplename } } + if (call_kmerfinder) { + call kmerfinder_task.kmerfinder_bacteria as kmerfinder { + input: + assembly = dragonflye.assembly_fasta, + samplename = samplename + } + } call amrfinderplus_task.amrfinderplus_nuc as amrfinderplus { input: assembly = dragonflye.assembly_fasta, @@ -262,6 +271,12 @@ workflow theiaprok_ont { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + kmerfinder_docker = kmerfinder.kmerfinder_docker, + kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, + kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, + kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, + kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, + kmerfinder_database = kmerfinder.kmerfinder_database, amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus.amrfinderplus_stress_report, @@ -329,7 +344,7 @@ workflow theiaprok_ont { lissero_serotype = merlin_magic.lissero_serotype, sistr_results = merlin_magic.sistr_results, sistr_allele_json = merlin_magic.sistr_allele_json, - sister_allele_fasta = merlin_magic.sistr_allele_fasta, + sistr_allele_fasta = merlin_magic.sistr_allele_fasta, sistr_cgmlst = merlin_magic.sistr_cgmlst, sistr_version = merlin_magic.sistr_version, sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype, @@ -532,6 +547,13 @@ workflow theiaprok_ont { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + # kmerfinder outputs + String? kmerfinder_docker = kmerfinder.kmerfinder_docker + File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv + String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit + String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage + String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage + String? kmerfinder_database = kmerfinder.kmerfinder_database # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report @@ -635,7 +657,7 @@ workflow theiaprok_ont { # Salmonella Typing File? sistr_results = merlin_magic.sistr_results File? sistr_allele_json = merlin_magic.sistr_allele_json - File? sister_allele_fasta = merlin_magic.sistr_allele_fasta + File? sistr_allele_fasta = merlin_magic.sistr_allele_fasta File? sistr_cgmlst = merlin_magic.sistr_cgmlst String? sistr_version = merlin_magic.sistr_version String? sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype