diff --git a/.dockstore.yml b/.dockstore.yml index 9d88ef0e..3c80cdbd 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -5,3 +5,13 @@ workflows: primaryDescriptorPath: /workflows/wf_apollo_pe.wdl testParameterFiles: - empty.json + - name: kSNP3 + subclass: WDL + primaryDescriptorPath: /workflows/wf_ksnp3.wdl + testParameterFiles: + - empty.json + - name: Apollo_Illumina_PE + subclass: WDL + primaryDescriptorPath: /workflows/wf_apollo_illumina_pe.wdl + testParameterFiles: + - empty.json diff --git a/tasks/task_phylo.wdl b/tasks/task_phylo.wdl new file mode 100644 index 00000000..eee49e21 --- /dev/null +++ b/tasks/task_phylo.wdl @@ -0,0 +1,91 @@ +version 1.0 + +task ksnp3 { + + input { + Array[File] assembly_fasta + Array[String] samplename + String cluster_name + Int kmer_size = 19 + String docker_image = "staphb/ksnp3:3.1" + Int mem_size_gb = 8 + Int CPUs = 4 + } + + command <<< + + assembly_array=(~{sep=' ' assembly_fasta}) + assembly_array_len=$(echo "${#assembly_array[@]}") + samplename_array=(~{sep=' ' samplename}) + samplename_array_len=$(echo "${#samplename_array[@]}") + + # Ensure assembly, and samplename arrays are of equal length + if [ "$assembly_array_len" -ne "$samplename_array_len" ]; then + echo "Assembly array (length: $assembly_array_len) and samplename array (length: $samplename_array_len) are of unequal length." >&2 + exit 1 + fi + + # create file of filenames for kSNP3 input + date + + touch ksnp3_input.tsv + for index in ${!assembly_array[@]}; do + assembly=${assembly_array[$index]} + samplename=${samplename_array[$index]} + + echo -e "${assembly}\t${samplename}" >> ksnp3_input.tsv + done + + kSNP3 -in ksnp3_input.tsv -outdir ksnp3 -k ~{kmer_size} -core + + mv ksnp3/core_SNPs_matrix.fasta ~{cluster_name}_core_SNPs_matrix.fasta + mv ksnp3/tree.core.tre ~{cluster_name}_core.tree + + >>> + + output { + File ksnp3_matrix = "${cluster_name}_core_SNPs_matrix.fasta" + File ksnp3_tree = "${cluster_name}_core.tree" + String ksnp3_docker_image = docker_image + + } + + runtime { + docker: docker_image + memory: "~{mem_size_gb} GB" + cpu: CPUs + disks: "local-disk 100 SSD" + preemptible: 0 + } +} + +task snp_dists { + + input { + File alignment + String cluster_name + } + + command{ + # date and version control + date | tee DATE + snp-dists -v | tee VERSION + + snp-dists ${alignment} > ${cluster_name}_snp_distance_matrix.tsv + +} + + output { + String date = read_string("DATE") + String version = read_string("VERSION") + File snp_matrix = "${cluster_name}_snp_distance_matrix.tsv" + } + + runtime { + docker: "staphb/snp-dists:0.6.2" + memory: "2 GB" + cpu: 2 + disks: "local-disk 100 SSD" + preemptible: 0 + } +} diff --git a/tasks/task_taxon_id.wdl b/tasks/task_taxon_id.wdl index db83792c..cc081ecd 100644 --- a/tasks/task_taxon_id.wdl +++ b/tasks/task_taxon_id.wdl @@ -1,30 +1,34 @@ version 1.0 -task midas_nsphl { +task gambit { input { - File assembly - String samplename - String docker="theiagen/midas_nsphl:1.0.0" + File assembly + String samplename + String docker="theiagen/midas_nsphl:1.0.0" } command <<< # capture date and version date | tee DATE - midas query ~{assembly} | tail -n2 > ~{samplename}_midas_nsphl.csv + midas query ~{assembly} | tail -n2 > ~{samplename}_gambit.csv python3 <<CODE import csv #grab output genome length and number contigs by column header - with open("~{samplename}_midas_nsphl.csv",'r') as csv_file: + with open("~{samplename}_gambit.csv",'r') as csv_file: csv_reader = list(csv.DictReader(csv_file, delimiter=",")) for line in csv_reader: - with open("MIDAS_DELTA", 'wt') as midas_delta: + with open ("GAMBIT_SCORE", 'wt') as gambit_score: + top_score=float(line["top_score"]) + top_score="{:.2f}".format(top_score) + gambit_score.write(str(top_score)) + with open("GAMBIT_DELTA", 'wt') as gambit_delta: top_score=float(line["top_score"]) species_threshold=float(line["species_threshold"]) delta=top_score - species_threshold #format delta to two decimal placesn delta="{:.2f}".format(delta) - midas_delta.write(str(delta)) + gambit_delta.write(str(delta)) with open("PREDICTED_GENUS", 'wt') as predicted_genus: genus=line["predicted_genus"] if not genus: @@ -38,11 +42,12 @@ task midas_nsphl { CODE >>> output { - File midas_nsphl_report = "~{samplename}_midas_nsphl.csv" - String midas_nsphl_docker = docker - String pipeline_date = read_string("DATE") - Float midas_delta = read_float("MIDAS_DELTA") - String predicted_genus = read_string("PREDICTED_GENUS") + File gambit_report = "~{samplename}_gambit.csv" + String gambit_docker = docker + String pipeline_date = read_string("DATE") + Float gambit_score = read_float("GAMBIT_SCORE") + Float gambit_delta = read_float("GAMBIT_DELTA") + String predicted_genus = read_string("PREDICTED_GENUS") String predicted_species = read_string("PREDICTED_SPECIES") } runtime { diff --git a/workflows/wf_apollo_pe.wdl b/workflows/wf_apollo_illumina_pe.wdl similarity index 86% rename from workflows/wf_apollo_pe.wdl rename to workflows/wf_apollo_illumina_pe.wdl index c77b4041..fd5c2d34 100644 --- a/workflows/wf_apollo_pe.wdl +++ b/workflows/wf_apollo_illumina_pe.wdl @@ -5,7 +5,7 @@ import "../tasks/task_qc_utils.wdl" as qc import "../tasks/task_taxon_id.wdl" as taxon_id import "../tasks/task_denovo_assembly.wdl" as assembly -workflow apollo_pe { +workflow apollo_illumina_pe { meta { description: "De-novo genome assembly, taxonomic ID, and QC of paired-end bacterial NGS data" } @@ -41,7 +41,7 @@ workflow apollo_pe { samplename = samplename, genome_length = quast.genome_length } - call taxon_id.midas_nsphl { + call taxon_id.gambit { input: assembly = shovill_pe.assembly_fasta, samplename = samplename @@ -75,11 +75,12 @@ workflow apollo_pe { Float r2_mean_q = cg_pipeline.r2_mean_q Float est_coverage = cg_pipeline.est_coverage - File midas_nsphl_report = midas_nsphl.midas_nsphl_report - String midas_nsphl_docker = midas_nsphl.midas_nsphl_docker - Float midas_delta = midas_nsphl.midas_delta - String predicted_genus = midas_nsphl.predicted_genus - String predicted_species = midas_nsphl.predicted_species + File gambit_report = gambit.gambit_report + String gambit_docker = gambit.gambit_docker + Float gambit_score = gambit.gambit_score + Float gambit_delta = gambit.gambit_delta + String predicted_genus = gambit.predicted_genus + String predicted_species = gambit.predicted_species } } diff --git a/workflows/wf_ksnp3.wdl b/workflows/wf_ksnp3.wdl new file mode 100644 index 00000000..54011fdd --- /dev/null +++ b/workflows/wf_ksnp3.wdl @@ -0,0 +1,28 @@ +version 1.0 + +import "../tasks/task_phylo.wdl" as phylo + +workflow ksnp3 { + input { + Array[File] assembly_fasta + Array[String] samplename + String cluster_name + } + call phylo.ksnp3 as ksnp3_task { + input: + assembly_fasta=assembly_fasta, + samplename=samplename, + cluster_name=cluster_name + } + call phylo.snp_dists { + input: + cluster_name = cluster_name, + alignment = ksnp3_task.ksnp3_matrix + } + + output { + File snp_matrix = snp_dists.snp_matrix + File ksnp3_tree = ksnp3_task.ksnp3_tree + String ksnp3_docker = ksnp3_task.ksnp3_docker_image + } +} \ No newline at end of file