Skip to content

Commit

Permalink
Merge branch 'main' into jro-snippy_tree-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinlibuit authored Dec 31, 2022
2 parents c9abd14 + 19d40c6 commit 6781a8a
Show file tree
Hide file tree
Showing 8 changed files with 585 additions and 3 deletions.
7 changes: 6 additions & 1 deletion .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: 1.2
workflows:
- name: TheiaEuk_PE
subclass: WDL
primaryDescriptorPath: /workflows/wf_theiaeuk.wdl
primaryDescriptorPath: /workflows/wf_theiaeuk_pe.wdl
testParameterFiles:
- empty.json
- name: Cauris_CladeTyper
Expand All @@ -18,5 +18,10 @@ workflows:
- name: Snippy_Tree
subclass: WDL
primaryDescriptorPath: /workflows/wf_snippy_tree.wdl
testParameterFiles:
- empty.json
- name: Nullarbor
subclass: WDL
primaryDescriptorPath: /workflows/wf_nullarbor.wdl
testParameterFiles:
- empty.json
1 change: 1 addition & 0 deletions tasks/gene_typing/task_snippy_variants.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -90,5 +90,6 @@ task snippy_variants {
cpu: "~{cpus}"
disks: "local-disk 100 SSD"
preemptible: 0
maxRetries: 3
}
}
4 changes: 2 additions & 2 deletions tasks/quality_control/task_screen.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ task check_reads {
# First Pass; assuming average depth
mash sketch -o test -k 31 -m 3 -r ~{read1} ~{read2} > mash-output.txt 2>&1
grep "Estimated genome size:" mash-output.txt | \
awk '{if($4){printf("%d", $4)}} END {if (!NR) print "0"}' > genome_size_output
awk '{if($4){printf("%5.0f\n", $4)}} END {if (!NR) print "0"}' > genome_size_output
grep "Estimated coverage:" mash-output.txt | \
awk '{if($3){printf("%d", $3)}} END {if (!NR) print "0"}' > coverage_output
rm -rf test.msh
Expand All @@ -95,7 +95,7 @@ task check_reads {
fi
mash sketch -o test -k 31 ${M} -r ~{read1} ~{read2} > mash-output.txt 2>&1
grep "Estimated genome size:" mash-output.txt | \
awk '{if($4){printf("%d", $4)}} END {if (!NR) print "0"}' > genome_size_output
awk '{if($4){printf("%5.0f\n", $4)}} END {if (!NR) print "0"}' > genome_size_output
grep "Estimated coverage:" mash-output.txt | \
awk '{if($3){printf("%d", $3)}} END {if (!NR) print "0"}' > coverage_output
rm -rf test.msh
Expand Down
104 changes: 104 additions & 0 deletions tasks/task_nullarbor.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
version 1.0

task nullarbor_tsv {
#Inputs
input {
String run_name = "run1"
File ref_genome
Array[File] read1
Array[File] read2
Array[String] samplename
String tree_builder = "iqtree_fast"
String assembler = "skesa"
String taxoner = "kraken2"
String docker = "quay.io/biocontainers/nullarbor:2.0.20191013--hdfd78af_3"
Int memory = 128
Int cpu = 16
File kraken1_db = "gs://theiagen-public-files/terra/theiaprok-files/minikraken_20171019_8GB_kraken1.tgz"
File kraken2_db = "gs://theiagen-public-files/terra/theiaprok-files/k2_standard_8gb_20210517.tar.gz"
File centrifuge_db = "gs://theiagen-public-files/terra/theiaprok-files/p_compressed+h+v.tar.gz"
}
command <<<
# capture date and version
# Print and save date
date | tee DATE
# Print and save version
nullarbor.pl --version | tee VERSION
# untar taxoner dbs
mkdir k1_db
mkdir /cromwell_root/k2_db
mkdir cent_db
tar -C k1_db -xzvf ~{kraken1_db}
tar -C /cromwell_root/k2_db/ -xzvf ~{kraken2_db}
tar -C cent_db -xzvf ~{centrifuge_db}
mv cent_db/p_compressed+h+v.1.cf cent_db.1.cf
mv cent_db/p_compressed+h+v.2.cf cent_db.2.cf
mv cent_db/p_compressed+h+v.3.cf cent_db.3.cf
mv k1_db/minikraken_20171019_8GB/* k1_db/
# assign dbs for taxoners
export KRAKEN_DEFAULT_DB=k1_db
export KRAKEN2_DEFAULT_DB=/cromwell_root/k2_db/
export KRAKEN2_DB_PATH=/cromwell_root/k2_db/
export CENTRIFUGE_DEFAULT_DB=cent_db
echo four
read1_array=(~{sep=' ' read1})
read1_array_len=$(echo "${#read1_array[@]}")
read2_array=(~{sep=' ' read2})
read2_array_len=$(echo "${#read2_array[@]}")
samplename_array=(~{sep=' ' samplename})
samplename_array_len=$(echo "${#samplename_array[@]}")

# Ensure read, and samplename arrays are of equal length
if [ "$read1_array_len" -ne "$samplename_array_len" ]; then
echo "Read1 array array (length: $read1_array_len) and samplename array (length: $samplename_array_len) are of unequal length." >&2
exit 1
fi

if [ "$read2_array_len" -ne "$samplename_array_len" ]; then
echo "Read2 array (length: $read2_array_len) and samplename array (length: $samplename_array_len) are of unequal length." >&2
exit 1
fi

# create file of filenames for nullarbor input
touch nullarbor_input.tsv
for index in ${!read1_array[@]}; do
read1=${read1_array[$index]}
read2=${read2_array[$index]}
samplename=${samplename_array[$index]}

echo -e "${samplename}\t${read1}\t${read2}" >> nullarbor_input.tsv
done
# Run Nullarbor on the input assembly with the --all flag
nullarbor.pl \
--name ~{run_name} \
--ref ~{ref_genome} \
--input nullarbor_input.tsv \
--outdir nullarbor_outdir \
--assembler ~{assembler} \
--treebuilder ~{tree_builder} \
--taxoner ~{taxoner} \
--verbose
#Run makefile
make preview -C nullarbor_outdir/
nice make all -j 2 -l 4 -C nullarbor_outdir/ 2>&1 | tee -a nullarbor_outdir/nullarbor.log

echo "nullarbor complete,tarballing results now"

tar -cf - nullarbor_outdir/ | gzip -n --best > ~{run_name}.tar.gz

>>>
output {
String nullarbor_version = read_string("VERSION")
String nullarbor_docker = "~{docker}"
String analysis_date = read_string("DATE")
File nullarbor_report = "/cromwell_root/nullarbor_outdir/report/index.html"
File nullarbor_output_dir = "~{run_name}.tar.gz"
}
runtime {
docker: "~{docker}"
memory: "~{memory} GB"
cpu: cpu
disks: "local-disk 500 SSD"
preemptible: 0
}
}
164 changes: 164 additions & 0 deletions tasks/taxon_id/task_gambit.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,167 @@ task gambit {
preemptible: 0
}
}
task gambit_euk {
input {
File assembly
String samplename
String docker = "quay.io/staphb/gambit:0.5.0"
File gambit_db_genomes = "gs://theiagen-public-files/terra/candida_auris_refs/221006-theiagen-fungal-v0.1.db"
File gambit_db_signatures = "gs://theiagen-public-files/terra/candida_auris_refs/221006-theiagen-fungal-v0.1.h5"
}
# If "File" type is used Cromwell attempts to localize it, which fails because it doesn't exist yet.
String report_path = "~{samplename}_gambit.json"
String closest_genomes_path = "~{samplename}_gambit_closest.csv"
command <<<
# capture date and version
date | tee DATE
gambit --version | tee GAMBIT_VERSION
# set gambit reference dir; will assume that gambit genomes and signatures will be provided by user in tandem or not at all
if [[ ! -z "~{gambit_db_genomes}" ]]; then
echo "User gabmit db identified; ~{gambit_db_genomes} will be utilized for alignment"
gambit_db_version="$(basename -- '~{gambit_db_genomes}'); $(basename -- '~{gambit_db_signatures}')"
gambit_db_dir="${PWD}/gambit_database"
mkdir ${gambit_db_dir}
cp ~{gambit_db_genomes} ${gambit_db_dir}
cp ~{gambit_db_signatures} ${gambit_db_dir}
else
gambit_db_dir="/gambit-db"
gambit_db_version="unmodified from gambit container: ~{docker}"
fi
echo ${gambit_db_version} | tee GAMBIT_DB_VERSION
gambit -d ${gambit_db_dir} query -f json -o ~{report_path} ~{assembly}
python3 <<EOF
import json
import csv
def fmt_dist(d): return format(d, '.4f')
with open("~{report_path}") as f:
data = json.load(f)
(item,) = data['items']
predicted = item['predicted_taxon']
next_taxon = item['next_taxon']
closest = item['closest_genomes'][0]
with open('CLOSEST_DISTANCE', 'w') as f:
f.write(fmt_dist(closest['distance']))
# Predicted taxon
with open('PREDICTED_TAXON', 'w') as f:
if predicted is None:
f.write('NA')
elif predicted['name'] is None:
f.write('NA')
else:
f.write(predicted['name'])
with open('PREDICTED_TAXON_RANK', 'w') as f:
if predicted is None:
f.write('NA')
elif predicted['rank'] is None:
f.write('NA')
else:
f.write(predicted['rank'])
with open('PREDICTED_TAXON_THRESHOLD', 'w') as f:
if predicted is None:
f.write(fmt_dist(0))
elif predicted['distance_threshold'] is None:
f.write(fmt_dist(0))
else:
f.write(fmt_dist(predicted['distance_threshold']))
# Next taxon
with open('NEXT_TAXON', 'w') as f:
if next_taxon is None:
f.write('NA')
elif next_taxon['name'] is None:
f.write('NA')
else:
f.write(next_taxon['name'])
with open('NEXT_TAXON_RANK', 'w') as f:
if next_taxon is None:
f.write('NA')
elif next_taxon['rank'] is None:
f.write('NA')
else:
f.write(next_taxon['rank'])
with open('NEXT_TAXON_THRESHOLD', 'w') as f:
if next_taxon is None:
f.write(fmt_dist(0))
elif next_taxon['distance_threshold'] is None:
f.write(fmt_dist(0))
else:
f.write(fmt_dist(next_taxon['distance_threshold']))
# Table of closest genomes
with open('~{closest_genomes_path}', 'w', newline='') as f:
writer = csv.writer(f)
# Header
writer.writerow([
'distance',
'genome.description',
'genome.taxon.name',
'genome.taxon.rank',
'genome.taxon.threshold',
'matched.name',
'matched.rank',
'matched.distance_threshold',
])
for match in item['closest_genomes']:
genome = match['genome']
genome_taxon = genome['taxonomy'][0]
match_taxon = match['matched_taxon']
writer.writerow([
fmt_dist(match['distance']),
genome['description'],
genome_taxon['name'],
genome_taxon['rank'],
fmt_dist(genome_taxon['distance_threshold']),
'' if match_taxon is None else match_taxon['name'],
'' if match_taxon is None else match_taxon['rank'],
fmt_dist(0 if match_taxon is None else match_taxon['distance_threshold']),
])
EOF
# set merlin tags
predicted_taxon=$(cat PREDICTED_TAXON)
if [[ ${predicted_taxon} == *"Candida auris"* ]] ; then
merlin_tag="Candida auris"
elif [[ ${predicted_taxon} == *"Candida albicans"* ]] ; then
merlin_tag="Candida albicans"
elif [[ ${predicted_taxon} == *"Aspergillus fumigatus"* ]] ; then
merlin_tag="Aspergillus fumigatus"
elif [[ ${predicted_taxon} == *"Cryptococcus neoformans"* ]] ; then
merlin_tag="Cryptococcus neoformans"
else
merlin_tag="None"
fi
echo ${merlin_tag} | tee MERLIN_TAG
>>>
output {
File gambit_report_file = report_path
File gambit_closest_genomes_file = closest_genomes_path
String gambit_predicted_taxon = read_string("PREDICTED_TAXON")
String gambit_predicted_taxon_rank = read_string("PREDICTED_TAXON_RANK")
String gambit_next_taxon = read_string("NEXT_TAXON")
String gambit_next_taxon_rank = read_string("NEXT_TAXON_RANK")
String gambit_version = read_string("GAMBIT_VERSION")
String gambit_db_version = read_string("GAMBIT_DB_VERSION")
String merlin_tag = read_string("MERLIN_TAG")
String gambit_docker = docker
}
runtime {
docker: "~{docker}"
memory: "16 GB"
cpu: 8
disks: "local-disk 100 SSD"
preemptible: 0
}
}
Loading

0 comments on commit 6781a8a

Please sign in to comment.