Skip to content

Commit

Permalink
Apollo_Illumina_PE & kSNP3 workflows (#7)
Browse files Browse the repository at this point in the history
* Update task_taxon_id.wdl

* refine output tsv

* increase compure resources

* fix formatting

* parse module reports to grab relevant outputs

* change local dev workflow name

* write to predicted taxon; format delta to two decimal places

* fix syntax

* print info for failed species predictions

* print info for failed species predictions

* print info for failed species predictions

* fix syntax

* update no-call message

* split taxon to genus and species

* write no-calls out as string

* fix typo

* rename to apollo

* add ksnp3 workflow

* Switch to gambit naming

* Fix file path

* fix typo

* fix typo
  • Loading branch information
kevinlibuit authored Jun 24, 2021
1 parent 4d0cb0f commit 9a29872
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 20 deletions.
10 changes: 10 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,13 @@ workflows:
primaryDescriptorPath: /workflows/wf_apollo_pe.wdl
testParameterFiles:
- empty.json
- name: kSNP3
subclass: WDL
primaryDescriptorPath: /workflows/wf_ksnp3.wdl
testParameterFiles:
- empty.json
- name: Apollo_Illumina_PE
subclass: WDL
primaryDescriptorPath: /workflows/wf_apollo_illumina_pe.wdl
testParameterFiles:
- empty.json
91 changes: 91 additions & 0 deletions tasks/task_phylo.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
version 1.0

task ksnp3 {

input {
Array[File] assembly_fasta
Array[String] samplename
String cluster_name
Int kmer_size = 19
String docker_image = "staphb/ksnp3:3.1"
Int mem_size_gb = 8
Int CPUs = 4
}

command <<<

assembly_array=(~{sep=' ' assembly_fasta})
assembly_array_len=$(echo "${#assembly_array[@]}")
samplename_array=(~{sep=' ' samplename})
samplename_array_len=$(echo "${#samplename_array[@]}")

# Ensure assembly, and samplename arrays are of equal length
if [ "$assembly_array_len" -ne "$samplename_array_len" ]; then
echo "Assembly array (length: $assembly_array_len) and samplename array (length: $samplename_array_len) are of unequal length." >&2
exit 1
fi

# create file of filenames for kSNP3 input
date

touch ksnp3_input.tsv
for index in ${!assembly_array[@]}; do
assembly=${assembly_array[$index]}
samplename=${samplename_array[$index]}

echo -e "${assembly}\t${samplename}" >> ksnp3_input.tsv
done

kSNP3 -in ksnp3_input.tsv -outdir ksnp3 -k ~{kmer_size} -core

mv ksnp3/core_SNPs_matrix.fasta ~{cluster_name}_core_SNPs_matrix.fasta
mv ksnp3/tree.core.tre ~{cluster_name}_core.tree

>>>

output {
File ksnp3_matrix = "${cluster_name}_core_SNPs_matrix.fasta"
File ksnp3_tree = "${cluster_name}_core.tree"
String ksnp3_docker_image = docker_image

}

runtime {
docker: docker_image
memory: "~{mem_size_gb} GB"
cpu: CPUs
disks: "local-disk 100 SSD"
preemptible: 0
}
}

task snp_dists {

input {
File alignment
String cluster_name
}

command{
# date and version control
date | tee DATE
snp-dists -v | tee VERSION

snp-dists ${alignment} > ${cluster_name}_snp_distance_matrix.tsv

}

output {
String date = read_string("DATE")
String version = read_string("VERSION")
File snp_matrix = "${cluster_name}_snp_distance_matrix.tsv"
}

runtime {
docker: "staphb/snp-dists:0.6.2"
memory: "2 GB"
cpu: 2
disks: "local-disk 100 SSD"
preemptible: 0
}
}
31 changes: 18 additions & 13 deletions tasks/task_taxon_id.wdl
Original file line number Diff line number Diff line change
@@ -1,30 +1,34 @@
version 1.0

task midas_nsphl {
task gambit {
input {
File assembly
String samplename
String docker="theiagen/midas_nsphl:1.0.0"
File assembly
String samplename
String docker="theiagen/midas_nsphl:1.0.0"
}
command <<<
# capture date and version
date | tee DATE

midas query ~{assembly} | tail -n2 > ~{samplename}_midas_nsphl.csv
midas query ~{assembly} | tail -n2 > ~{samplename}_gambit.csv

python3 <<CODE
import csv
#grab output genome length and number contigs by column header
with open("~{samplename}_midas_nsphl.csv",'r') as csv_file:
with open("~{samplename}_gambit.csv",'r') as csv_file:
csv_reader = list(csv.DictReader(csv_file, delimiter=","))
for line in csv_reader:
with open("MIDAS_DELTA", 'wt') as midas_delta:
with open ("GAMBIT_SCORE", 'wt') as gambit_score:
top_score=float(line["top_score"])
top_score="{:.2f}".format(top_score)
gambit_score.write(str(top_score))
with open("GAMBIT_DELTA", 'wt') as gambit_delta:
top_score=float(line["top_score"])
species_threshold=float(line["species_threshold"])
delta=top_score - species_threshold
#format delta to two decimal placesn
delta="{:.2f}".format(delta)
midas_delta.write(str(delta))
gambit_delta.write(str(delta))
with open("PREDICTED_GENUS", 'wt') as predicted_genus:
genus=line["predicted_genus"]
if not genus:
Expand All @@ -38,11 +42,12 @@ task midas_nsphl {
CODE
>>>
output {
File midas_nsphl_report = "~{samplename}_midas_nsphl.csv"
String midas_nsphl_docker = docker
String pipeline_date = read_string("DATE")
Float midas_delta = read_float("MIDAS_DELTA")
String predicted_genus = read_string("PREDICTED_GENUS")
File gambit_report = "~{samplename}_gambit.csv"
String gambit_docker = docker
String pipeline_date = read_string("DATE")
Float gambit_score = read_float("GAMBIT_SCORE")
Float gambit_delta = read_float("GAMBIT_DELTA")
String predicted_genus = read_string("PREDICTED_GENUS")
String predicted_species = read_string("PREDICTED_SPECIES")
}
runtime {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import "../tasks/task_qc_utils.wdl" as qc
import "../tasks/task_taxon_id.wdl" as taxon_id
import "../tasks/task_denovo_assembly.wdl" as assembly

workflow apollo_pe {
workflow apollo_illumina_pe {
meta {
description: "De-novo genome assembly, taxonomic ID, and QC of paired-end bacterial NGS data"
}
Expand Down Expand Up @@ -41,7 +41,7 @@ workflow apollo_pe {
samplename = samplename,
genome_length = quast.genome_length
}
call taxon_id.midas_nsphl {
call taxon_id.gambit {
input:
assembly = shovill_pe.assembly_fasta,
samplename = samplename
Expand Down Expand Up @@ -75,11 +75,12 @@ workflow apollo_pe {
Float r2_mean_q = cg_pipeline.r2_mean_q
Float est_coverage = cg_pipeline.est_coverage

File midas_nsphl_report = midas_nsphl.midas_nsphl_report
String midas_nsphl_docker = midas_nsphl.midas_nsphl_docker
Float midas_delta = midas_nsphl.midas_delta
String predicted_genus = midas_nsphl.predicted_genus
String predicted_species = midas_nsphl.predicted_species
File gambit_report = gambit.gambit_report
String gambit_docker = gambit.gambit_docker
Float gambit_score = gambit.gambit_score
Float gambit_delta = gambit.gambit_delta
String predicted_genus = gambit.predicted_genus
String predicted_species = gambit.predicted_species

}
}
28 changes: 28 additions & 0 deletions workflows/wf_ksnp3.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
version 1.0

import "../tasks/task_phylo.wdl" as phylo

workflow ksnp3 {
input {
Array[File] assembly_fasta
Array[String] samplename
String cluster_name
}
call phylo.ksnp3 as ksnp3_task {
input:
assembly_fasta=assembly_fasta,
samplename=samplename,
cluster_name=cluster_name
}
call phylo.snp_dists {
input:
cluster_name = cluster_name,
alignment = ksnp3_task.ksnp3_matrix
}

output {
File snp_matrix = snp_dists.snp_matrix
File ksnp3_tree = ksnp3_task.ksnp3_tree
String ksnp3_docker = ksnp3_task.ksnp3_docker_image
}
}

0 comments on commit 9a29872

Please sign in to comment.