From 9a298723781e7bd9489c165de005d7cac2bd289c Mon Sep 17 00:00:00 2001 From: kevinlibuit Date: Wed, 23 Jun 2021 20:02:31 -0700 Subject: [PATCH] Apollo_Illumina_PE & kSNP3 workflows (#7) * Update task_taxon_id.wdl * refine output tsv * increase compure resources * fix formatting * parse module reports to grab relevant outputs * change local dev workflow name * write to predicted taxon; format delta to two decimal places * fix syntax * print info for failed species predictions * print info for failed species predictions * print info for failed species predictions * fix syntax * update no-call message * split taxon to genus and species * write no-calls out as string * fix typo * rename to apollo * add ksnp3 workflow * Switch to gambit naming * Fix file path * fix typo * fix typo --- .dockstore.yml | 10 ++ tasks/task_phylo.wdl | 91 +++++++++++++++++++ tasks/task_taxon_id.wdl | 31 ++++--- ...pollo_pe.wdl => wf_apollo_illumina_pe.wdl} | 15 +-- workflows/wf_ksnp3.wdl | 28 ++++++ 5 files changed, 155 insertions(+), 20 deletions(-) create mode 100644 tasks/task_phylo.wdl rename workflows/{wf_apollo_pe.wdl => wf_apollo_illumina_pe.wdl} (86%) create mode 100644 workflows/wf_ksnp3.wdl diff --git a/.dockstore.yml b/.dockstore.yml index 9d88ef0e..3c80cdbd 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -5,3 +5,13 @@ workflows: primaryDescriptorPath: /workflows/wf_apollo_pe.wdl testParameterFiles: - empty.json + - name: kSNP3 + subclass: WDL + primaryDescriptorPath: /workflows/wf_ksnp3.wdl + testParameterFiles: + - empty.json + - name: Apollo_Illumina_PE + subclass: WDL + primaryDescriptorPath: /workflows/wf_apollo_illumina_pe.wdl + testParameterFiles: + - empty.json diff --git a/tasks/task_phylo.wdl b/tasks/task_phylo.wdl new file mode 100644 index 00000000..eee49e21 --- /dev/null +++ b/tasks/task_phylo.wdl @@ -0,0 +1,91 @@ +version 1.0 + +task ksnp3 { + + input { + Array[File] assembly_fasta + Array[String] samplename + String cluster_name + Int kmer_size = 19 + String docker_image = "staphb/ksnp3:3.1" + Int mem_size_gb = 8 + Int CPUs = 4 + } + + command <<< + + assembly_array=(~{sep=' ' assembly_fasta}) + assembly_array_len=$(echo "${#assembly_array[@]}") + samplename_array=(~{sep=' ' samplename}) + samplename_array_len=$(echo "${#samplename_array[@]}") + + # Ensure assembly, and samplename arrays are of equal length + if [ "$assembly_array_len" -ne "$samplename_array_len" ]; then + echo "Assembly array (length: $assembly_array_len) and samplename array (length: $samplename_array_len) are of unequal length." >&2 + exit 1 + fi + + # create file of filenames for kSNP3 input + date + + touch ksnp3_input.tsv + for index in ${!assembly_array[@]}; do + assembly=${assembly_array[$index]} + samplename=${samplename_array[$index]} + + echo -e "${assembly}\t${samplename}" >> ksnp3_input.tsv + done + + kSNP3 -in ksnp3_input.tsv -outdir ksnp3 -k ~{kmer_size} -core + + mv ksnp3/core_SNPs_matrix.fasta ~{cluster_name}_core_SNPs_matrix.fasta + mv ksnp3/tree.core.tre ~{cluster_name}_core.tree + + >>> + + output { + File ksnp3_matrix = "${cluster_name}_core_SNPs_matrix.fasta" + File ksnp3_tree = "${cluster_name}_core.tree" + String ksnp3_docker_image = docker_image + + } + + runtime { + docker: docker_image + memory: "~{mem_size_gb} GB" + cpu: CPUs + disks: "local-disk 100 SSD" + preemptible: 0 + } +} + +task snp_dists { + + input { + File alignment + String cluster_name + } + + command{ + # date and version control + date | tee DATE + snp-dists -v | tee VERSION + + snp-dists ${alignment} > ${cluster_name}_snp_distance_matrix.tsv + +} + + output { + String date = read_string("DATE") + String version = read_string("VERSION") + File snp_matrix = "${cluster_name}_snp_distance_matrix.tsv" + } + + runtime { + docker: "staphb/snp-dists:0.6.2" + memory: "2 GB" + cpu: 2 + disks: "local-disk 100 SSD" + preemptible: 0 + } +} diff --git a/tasks/task_taxon_id.wdl b/tasks/task_taxon_id.wdl index db83792c..cc081ecd 100644 --- a/tasks/task_taxon_id.wdl +++ b/tasks/task_taxon_id.wdl @@ -1,30 +1,34 @@ version 1.0 -task midas_nsphl { +task gambit { input { - File assembly - String samplename - String docker="theiagen/midas_nsphl:1.0.0" + File assembly + String samplename + String docker="theiagen/midas_nsphl:1.0.0" } command <<< # capture date and version date | tee DATE - midas query ~{assembly} | tail -n2 > ~{samplename}_midas_nsphl.csv + midas query ~{assembly} | tail -n2 > ~{samplename}_gambit.csv python3 <>> output { - File midas_nsphl_report = "~{samplename}_midas_nsphl.csv" - String midas_nsphl_docker = docker - String pipeline_date = read_string("DATE") - Float midas_delta = read_float("MIDAS_DELTA") - String predicted_genus = read_string("PREDICTED_GENUS") + File gambit_report = "~{samplename}_gambit.csv" + String gambit_docker = docker + String pipeline_date = read_string("DATE") + Float gambit_score = read_float("GAMBIT_SCORE") + Float gambit_delta = read_float("GAMBIT_DELTA") + String predicted_genus = read_string("PREDICTED_GENUS") String predicted_species = read_string("PREDICTED_SPECIES") } runtime { diff --git a/workflows/wf_apollo_pe.wdl b/workflows/wf_apollo_illumina_pe.wdl similarity index 86% rename from workflows/wf_apollo_pe.wdl rename to workflows/wf_apollo_illumina_pe.wdl index c77b4041..fd5c2d34 100644 --- a/workflows/wf_apollo_pe.wdl +++ b/workflows/wf_apollo_illumina_pe.wdl @@ -5,7 +5,7 @@ import "../tasks/task_qc_utils.wdl" as qc import "../tasks/task_taxon_id.wdl" as taxon_id import "../tasks/task_denovo_assembly.wdl" as assembly -workflow apollo_pe { +workflow apollo_illumina_pe { meta { description: "De-novo genome assembly, taxonomic ID, and QC of paired-end bacterial NGS data" } @@ -41,7 +41,7 @@ workflow apollo_pe { samplename = samplename, genome_length = quast.genome_length } - call taxon_id.midas_nsphl { + call taxon_id.gambit { input: assembly = shovill_pe.assembly_fasta, samplename = samplename @@ -75,11 +75,12 @@ workflow apollo_pe { Float r2_mean_q = cg_pipeline.r2_mean_q Float est_coverage = cg_pipeline.est_coverage - File midas_nsphl_report = midas_nsphl.midas_nsphl_report - String midas_nsphl_docker = midas_nsphl.midas_nsphl_docker - Float midas_delta = midas_nsphl.midas_delta - String predicted_genus = midas_nsphl.predicted_genus - String predicted_species = midas_nsphl.predicted_species + File gambit_report = gambit.gambit_report + String gambit_docker = gambit.gambit_docker + Float gambit_score = gambit.gambit_score + Float gambit_delta = gambit.gambit_delta + String predicted_genus = gambit.predicted_genus + String predicted_species = gambit.predicted_species } } diff --git a/workflows/wf_ksnp3.wdl b/workflows/wf_ksnp3.wdl new file mode 100644 index 00000000..54011fdd --- /dev/null +++ b/workflows/wf_ksnp3.wdl @@ -0,0 +1,28 @@ +version 1.0 + +import "../tasks/task_phylo.wdl" as phylo + +workflow ksnp3 { + input { + Array[File] assembly_fasta + Array[String] samplename + String cluster_name + } + call phylo.ksnp3 as ksnp3_task { + input: + assembly_fasta=assembly_fasta, + samplename=samplename, + cluster_name=cluster_name + } + call phylo.snp_dists { + input: + cluster_name = cluster_name, + alignment = ksnp3_task.ksnp3_matrix + } + + output { + File snp_matrix = snp_dists.snp_matrix + File ksnp3_tree = ksnp3_task.ksnp3_tree + String ksnp3_docker = ksnp3_task.ksnp3_docker_image + } +} \ No newline at end of file