diff --git a/.dockstore.yml b/.dockstore.yml index a7bf3fb12..b8255860f 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -15,6 +15,11 @@ workflows: primaryDescriptorPath: /workflows/wf_snippy_variants.wdl testParameterFiles: - empty.json + - name: Snippy_Tree + subclass: WDL + primaryDescriptorPath: /workflows/wf_snippy_tree.wdl + testParameterFiles: + - empty.json - name: Nullarbor subclass: WDL primaryDescriptorPath: /workflows/wf_nullarbor.wdl diff --git a/tasks/gene_typing/task_snippy_variants.wdl b/tasks/gene_typing/task_snippy_variants.wdl index 047ca9934..eba0af05d 100644 --- a/tasks/gene_typing/task_snippy_variants.wdl +++ b/tasks/gene_typing/task_snippy_variants.wdl @@ -8,8 +8,8 @@ task snippy_variants { String? query_gene String samplename String docker = "staphb/snippy:4.6.0" - Int cpus = 4 - Int memory = 16 + Int cpus = 8 + Int memory = 32 # Paramters # --map_qual: Minimum read mapping quality to consider (default '60') # --base_quality: Minimum base quality to consider (default '13') @@ -36,7 +36,7 @@ task snippy_variants { if [ -z "~{query_gene}" ]; then no_hit="NA: No query gene was provided" else - no_hit="No variants identified in quieried genes (~{query_gene})" + no_hit="No variants identified in queried genes (~{query_gene})" fi # call snippy snippy \ @@ -70,13 +70,15 @@ task snippy_variants { else echo "${no_hit}" > SNIPPY_VARIANT_HITS fi + # Compress output dir + tar -cvzf "./~{samplename}_snippy_variants_outdir.tar" "./~{samplename}" >>> output { String snippy_variants_version = read_string("VERSION") String snippy_variants_query = "~{query_gene}" String snippy_variants_hits = read_string("SNIPPY_VARIANT_HITS") + File snippy_variants_outdir_tarball = "./~{samplename}_snippy_variants_outdir.tar" File snippy_variants_gene_query_results = "./gene_query.csv" - Array[File] snippy_outputs = glob("~{samplename}/~{samplename}*") File snippy_variants_results = "~{samplename}/~{samplename}.csv" File snippy_variants_bam = "~{samplename}/~{samplename}.bam" File snippy_variants_bai ="~{samplename}/~{samplename}.bam.bai" diff --git a/tasks/phylogenetic_inference/task_gubbins.wdl b/tasks/phylogenetic_inference/task_gubbins.wdl new file mode 100644 index 000000000..c1f979022 --- /dev/null +++ b/tasks/phylogenetic_inference/task_gubbins.wdl @@ -0,0 +1,54 @@ +version 1.0 + +task gubbins { + input { + File alignment + String cluster_name + String docker = "sangerpathogens/gubbins" + Int? filter_percent = 25 #default is 25% + Int? iterations = 5 + String? tree_builder = "raxml" + String? tree_args + String? nuc_subst_model = "GTRGAMMA" + Int? bootstrap = 0 + String? outgroup + File? dates_file + } + command <<< + # date and version control + date | tee DATE + run_gubbins.py --version | tee VERSION + + run_gubbins.py \ + ~{alignment} \ + --prefix ~{cluster_name} \ + --filter-percentage ~{filter_percent} \ + --iterations ~{iterations} \ + --tree-builder ~{tree_builder} \ + ~{'--tree-args ' + tree_args} \ + ~{'--model ' + nuc_subst_model} \ + --bootstrap ~{bootstrap} \ + ~{'--outgroup ' + outgroup} \ + ~{'--date ' + dates_file} \ + --threads 2 + >>> + output { + String date = read_string("DATE") + String version = read_string("VERSION") + File gubbins_final_tree = "~{cluster_name}.final_tree.tre" + File gubbins_final_labelled_tree = "~{cluster_name}.node_labelled.final_tree.tre" + File gubbins_polymorphic_fasta = "~{cluster_name}.filtered_polymorphic_sites.fasta" + File gubbins_recombination_gff = "~{cluster_name}.recombination_predictions.gff" + File gubbins_branch_stats = "~{cluster_name}.per_branch_statistics.csv" + File? gubbins_timetree = "~{cluster_name}.final_tree.timetree.tre" + File? gubbins_timetree_stats = "~{cluster_name}.lsd.out" + } + runtime { + docker: "~{docker}" + memory: "32 GB" + cpu: 4 + disks: "local-disk 100 SSD" + preemptible: 0 + maxRetries: 1 + } +} diff --git a/tasks/phylogenetic_inference/task_iqtree.wdl b/tasks/phylogenetic_inference/task_iqtree.wdl index a0eb8d4bb..3583d669e 100644 --- a/tasks/phylogenetic_inference/task_iqtree.wdl +++ b/tasks/phylogenetic_inference/task_iqtree.wdl @@ -16,7 +16,7 @@ task iqtree { iqtree --version | grep version | sed 's/.*version/version/;s/ for Linux.*//' | tee VERSION numGenomes=`grep -o '>' ~{alignment} | wc -l` - if [ $numGenomes -gt 3 ] + if [ "$numGenomes" -gt 3 ] then cp ~{alignment} ./msa.fasta iqtree \ @@ -27,13 +27,13 @@ task iqtree { -alrt ~{alrt} \ ~{iqtree_opts} - cp msa.fasta.contree ~{cluster_name}_msa.tree + cp msa.fasta.contree ~{cluster_name}_iqtree.tree fi >>> output { String date = read_string("DATE") String version = read_string("VERSION") - File ml_tree = "~{cluster_name}_msa.tree" + File ml_tree = "~{cluster_name}_iqtree.tree" } runtime { docker: "~{docker}" diff --git a/tasks/phylogenetic_inference/task_snippy_core.wdl b/tasks/phylogenetic_inference/task_snippy_core.wdl new file mode 100644 index 000000000..e0c81a110 --- /dev/null +++ b/tasks/phylogenetic_inference/task_snippy_core.wdl @@ -0,0 +1,56 @@ +version 1.0 + +task snippy_core { + input { + Array[File] snippy_variants_outdir_tarball + Array[String] samplenames + String tree_name + String docker = "staphb/snippy:4.6.0" + File reference + File? bed_file + } + command <<< + # version control + snippy --version | head -1 | tee VERSION + + tarball_array=(~{sep=" " snippy_variants_outdir_tarball}) + samplename_array=(~{sep=" " samplenames}) + + # iteratively untar + for i in ${tarball_array[@]}; do tar -xf $i; done + + # run snippy core + snippy-core \ + --prefix ~{tree_name} \ + ~{'--mask ' + bed_file} \ + --ref ~{reference} \ + "${samplename_array[@]}" + + # run snippy clean + snippy-clean_full_aln \ + ~{tree_name}.full.aln > ~{tree_name}_snippy_clean_full.aln + + mv ~{tree_name}.aln ~{tree_name}_core.aln + mv ~{tree_name}.full.aln ~{tree_name}_full.aln + mv ~{tree_name}.tab ~{tree_name}_all_snps.tsv + mv ~{tree_name}.txt ~{tree_name}_snps_summary.txt + >>> + output { + String snippy_version = read_string("VERSION") + File snippy_core_alignment = "~{tree_name}_core.aln" + File snippy_full_alignment = "~{tree_name}_full.aln" + File snippy_full_alignment_clean = "~{tree_name}_snippy_clean_full.aln" + File snippy_ref = "~{tree_name}.ref.fa" + File snippy_core_tab = "~{tree_name}_all_snps.tsv" + File snippy_txt = "~{tree_name}_snps_summary.txt" + File snippy_vcf = "~{tree_name}.vcf" + String snippy_docker_image = docker + } + runtime { + docker: "~{docker}" + memory: "8 GB" + cpu: 4 + disks: "local-disk 100 SSD" + preemptible: 0 + } +} \ No newline at end of file diff --git a/tasks/phylogenetic_inference/task_snp_dists.wdl b/tasks/phylogenetic_inference/task_snp_dists.wdl index 1e7d22332..d871a8d10 100644 --- a/tasks/phylogenetic_inference/task_snp_dists.wdl +++ b/tasks/phylogenetic_inference/task_snp_dists.wdl @@ -94,13 +94,15 @@ task snp_dists { z.close() print "Matrix has been created in current directory as '~{cluster_name}_snp_distance_matrix.tsv.'" - CODE + CODE + cp snp-dists-molten-ordered.tsv ~{cluster_name}_snp-dists_list.tsv + cp snp-dists-matrix.tsv ~{cluster_name}_snp_distance_matrix.tsv >>> output { String date = read_string("DATE") String version = read_string("VERSION") File snp_matrix = "${cluster_name}_snp_distance_matrix.tsv" - File snp_dists_molten_ordered = "snp-dists-molten-ordered.tsv" + File snp_dists_molten_ordered = "${cluster_name}_snp-dists_list.tsv" } runtime { docker: "quay.io/staphb/snp-dists:0.8.2" diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index a046695d9..4290ecb44 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -177,6 +177,7 @@ task export_taxon_tables { String? midas_primary_genus String? midas_secondary_genus String? midas_secondary_genus_coverage + Array[File]? snippy_variants_outdir_tarball } command <<< @@ -206,9 +207,9 @@ task export_taxon_tables { if [ ! -z ${sample_table} ]; then # create single-entity sample data table ## header - echo -e "entity:${sample_table}_id\treads\tread1\tread2\tread1_clean\tread2_clean\trun_id\tcollection_date\toriginating_lab\tcity\tcounty\tzip\ttheiaprok_illumina_pe_version\ttheiaprok_illumina_pe_analysis_date\ttheiaprok_illumina_se_version\ttheiaprok_illumina_se_analysis_date\tseq_platform\tnum_reads_raw1\tnum_reads_raw2\tnum_reads_raw_pairs\tfastq_scan_version\tnum_reads_clean1\tnum_reads_clean2\tnum_reads_clean_pairs\ttrimmomatic_version\tbbduk_docker\tr1_mean_q\tr2_mean_q\tassembly_fasta\tcontigs_gfa\tshovill_pe_version\tshovill_se_version\tquast_report\tquast_version\tgenome_length\tnumber_contigs\tn50_value\tcg_pipeline_report\tcg_pipeline_docker\test_coverage\tgambit_report\tgambit_predicted_taxon\tgambit_predicted_taxon_rank\tgambit_closest_genomes\tgambit_version\tgambit_db_version\tgambit_docker\tbusco_version\tbusco_database\tbusco_results\tbusco_report\tts_mlst_results\tts_mlst_predicted_st\tts_mlst_pubmlst_scheme\tts_mlst_version\tserotypefinder_report\tserotypefinder_docker\tserotypefinder_serotype\tectyper_results\tectyper_version\tectyper_predicted_serotype\tlissero_results\tlissero_version\tlissero_serotype\tsistr_results\tsistr_allele_json\tsister_allele_fasta\tsistr_cgmlst\tsistr_version\tsistr_predicted_serotype\tseqsero2_report\tseqsero2_version\tseqsero2_predicted_antigenic_profile\tseqsero2_predicted_serotype\tseqsero2_predicted_contamination\tkleborate_output_file\tkleborate_version\tkleborate_docker\tkleborate_key_resistance_genes\tkleborate_genomic_resistance_mutations\tkleborate_mlst_sequence_type\tkleborate_klocus\tkleborate_ktype\tkleborate_olocus\tkleborate_otype\tkleborate_klocus_confidence\tkleborate_olocus_confidence\tkaptive_version\tkaptive_output_file_k\tkaptive_output_file_oc\tkaptive_k_locus\tkaptive_k_type\tkaptive_kl_confidence\tkaptive_oc_locus\tkaptive_ocl_confidence\tabricate_abaum_plasmid_tsv\tabricate_abaum_plasmid_type_genes\tabricate_database\tabricate_version\tabricate_docker\tlegsta_results\tlegsta_predicted_sbt\tlegsta_version\ttbprofiler_output_file\ttbprofiler_output_bam\ttbprofiler_output_bai\ttbprofiler_version\ttbprofiler_main_lineage\ttbprofiler_sub_lineage\ttbprofiler_dr_type\ttbprofiler_resistance_genes\tamrfinderplus_all_report\tamrfinderplus_amr_report\tamrfinderplus_stress_report\tamrfinderplus_virulence_report\tamrfinderplus_version\tamrfinderplus_db_version\tamrfinderplus_amr_genes\tamrfinderplus_stress_genes\tamrfinderplus_virulence_genes\tamrfinderplus_amr_classes\tamrfinderplus_amr_subclasses\tgenotyphi_report_tsv\tgenotyphi_mykrobe_json\tgenotyphi_version\tgenotyphi_species\tgenotyphi_st_probes_percent_coverage\tgenotyphi_final_genotype\tgenotyphi_genotype_confidence\tani_highest_percent\tani_highest_percent_bases_aligned\tani_output_tsv\tani_top_species_match\tani_mummer_version\tresfinder_pheno_table\tresfinder_pheno_table_species\tresfinder_seqs\tresfinder_results\tresfinder_pointfinder_pheno_table\tresfinder_pointfinder_results\tresfinder_db_version\tresfinder_docker\tprokka_gff\tprokka_gbk\tprokka_sqn\tplasmidfinder_plasmids\tplasmidfinder_results\tplasmidfinder_seqs\tplasmidfinder_docker\tplasmidfinder_db_version\tpbptyper_predicted_1A_2B_2X\tpbptyper_pbptype_predicted_tsv\tpbptyper_version\tpbptyper_docker\tpoppunk_gps_cluster\tpoppunk_gps_external_cluster_csv\tpoppunk_GPS_db_version\tpoppunk_version\tpoppunk_docker\tseroba_version\tseroba_docker\tseroba_serotype\tseroba_ariba_serotype\tseroba_ariba_identity\tseroba_details\tmidas_docker\tmidas_report\tmidas_primary_genus\tmidas_secondary_genus\tmidas_secondary_genus_coverage" > ~{samplename}_terra_table.tsv + echo -e "entity:${sample_table}_id\treads\tread1\tread2\tread1_clean\tread2_clean\trun_id\tcollection_date\toriginating_lab\tcity\tcounty\tzip\ttheiaprok_illumina_pe_version\ttheiaprok_illumina_pe_analysis_date\ttheiaprok_illumina_se_version\ttheiaprok_illumina_se_analysis_date\tseq_platform\tnum_reads_raw1\tnum_reads_raw2\tnum_reads_raw_pairs\tfastq_scan_version\tnum_reads_clean1\tnum_reads_clean2\tnum_reads_clean_pairs\ttrimmomatic_version\tbbduk_docker\tr1_mean_q\tr2_mean_q\tassembly_fasta\tcontigs_gfa\tshovill_pe_version\tshovill_se_version\tquast_report\tquast_version\tgenome_length\tnumber_contigs\tn50_value\tcg_pipeline_report\tcg_pipeline_docker\test_coverage\tgambit_report\tgambit_predicted_taxon\tgambit_predicted_taxon_rank\tgambit_closest_genomes\tgambit_version\tgambit_db_version\tgambit_docker\tbusco_version\tbusco_database\tbusco_results\tbusco_report\tts_mlst_results\tts_mlst_predicted_st\tts_mlst_pubmlst_scheme\tts_mlst_version\tserotypefinder_report\tserotypefinder_docker\tserotypefinder_serotype\tectyper_results\tectyper_version\tectyper_predicted_serotype\tlissero_results\tlissero_version\tlissero_serotype\tsistr_results\tsistr_allele_json\tsister_allele_fasta\tsistr_cgmlst\tsistr_version\tsistr_predicted_serotype\tseqsero2_report\tseqsero2_version\tseqsero2_predicted_antigenic_profile\tseqsero2_predicted_serotype\tseqsero2_predicted_contamination\tkleborate_output_file\tkleborate_version\tkleborate_docker\tkleborate_key_resistance_genes\tkleborate_genomic_resistance_mutations\tkleborate_mlst_sequence_type\tkleborate_klocus\tkleborate_ktype\tkleborate_olocus\tkleborate_otype\tkleborate_klocus_confidence\tkleborate_olocus_confidence\tkaptive_version\tkaptive_output_file_k\tkaptive_output_file_oc\tkaptive_k_locus\tkaptive_k_type\tkaptive_kl_confidence\tkaptive_oc_locus\tkaptive_ocl_confidence\tabricate_abaum_plasmid_tsv\tabricate_abaum_plasmid_type_genes\tabricate_database\tabricate_version\tabricate_docker\tlegsta_results\tlegsta_predicted_sbt\tlegsta_version\ttbprofiler_output_file\ttbprofiler_output_bam\ttbprofiler_output_bai\ttbprofiler_version\ttbprofiler_main_lineage\ttbprofiler_sub_lineage\ttbprofiler_dr_type\ttbprofiler_resistance_genes\tamrfinderplus_all_report\tamrfinderplus_amr_report\tamrfinderplus_stress_report\tamrfinderplus_virulence_report\tamrfinderplus_version\tamrfinderplus_db_version\tamrfinderplus_amr_genes\tamrfinderplus_stress_genes\tamrfinderplus_virulence_genes\tamrfinderplus_amr_classes\tamrfinderplus_amr_subclasses\tgenotyphi_report_tsv\tgenotyphi_mykrobe_json\tgenotyphi_version\tgenotyphi_species\tgenotyphi_st_probes_percent_coverage\tgenotyphi_final_genotype\tgenotyphi_genotype_confidence\tani_highest_percent\tani_highest_percent_bases_aligned\tani_output_tsv\tani_top_species_match\tani_mummer_version\tresfinder_pheno_table\tresfinder_pheno_table_species\tresfinder_seqs\tresfinder_results\tresfinder_pointfinder_pheno_table\tresfinder_pointfinder_results\tresfinder_db_version\tresfinder_docker\tprokka_gff\tprokka_gbk\tprokka_sqn\tplasmidfinder_plasmids\tplasmidfinder_results\tplasmidfinder_seqs\tplasmidfinder_docker\tplasmidfinder_db_version\tpbptyper_predicted_1A_2B_2X\tpbptyper_pbptype_predicted_tsv\tpbptyper_version\tpbptyper_docker\tpoppunk_gps_cluster\tpoppunk_gps_external_cluster_csv\tpoppunk_GPS_db_version\tpoppunk_version\tpoppunk_docker\tseroba_version\tseroba_docker\tseroba_serotype\tseroba_ariba_serotype\tseroba_ariba_identity\tseroba_details\tmidas_docker\tmidas_report\tmidas_primary_genus\tmidas_secondary_genus\tmidas_secondary_genus_coverage\tsnippy_variants_outdir_tarball" > ~{samplename}_terra_table.tsv ## TheiaProk Outs - echo -e "~{samplename}\t~{reads}\t~{read1}\t~{read2}\t~{read1_clean}\t~{read2_clean}\t~{run_id}\t~{collection_date}\t~{originating_lab}\t~{city}\t~{county}\t~{zip}\t~{theiaprok_illumina_pe_version}\t~{theiaprok_illumina_pe_analysis_date}\t~{theiaprok_illumina_se_version}\t~{theiaprok_illumina_se_analysis_date}\t~{seq_platform}\t~{num_reads_raw1}\t~{num_reads_raw2}\t~{num_reads_raw_pairs}\t~{fastq_scan_version}\t~{num_reads_clean1}\t~{num_reads_clean2}\t~{num_reads_clean_pairs}\t~{trimmomatic_version}\t~{bbduk_docker}\t~{r1_mean_q}\t~{r2_mean_q}\t~{assembly_fasta}\t~{contigs_gfa}\t~{shovill_pe_version}\t~{shovill_se_version}\t~{quast_report}\t~{quast_version}\t~{genome_length}\t~{number_contigs}\t~{n50_value}\t~{cg_pipeline_report}\t~{cg_pipeline_docker}\t~{est_coverage}\t~{gambit_report}\t~{gambit_predicted_taxon}\t~{gambit_predicted_taxon_rank}\t~{gambit_closest_genomes}\t~{gambit_version}\t~{gambit_db_version}\t~{gambit_docker}\t~{busco_version}\t~{busco_database}\t~{busco_results}\t~{busco_report}\t~{ts_mlst_results}\t~{ts_mlst_predicted_st}\t~{ts_mlst_pubmlst_scheme}\t~{ts_mlst_version}\t~{serotypefinder_report}\t~{serotypefinder_docker}\t~{serotypefinder_serotype}\t~{ectyper_results}\t~{ectyper_version}\t~{ectyper_predicted_serotype}\t~{lissero_results}\t~{lissero_version}\t~{lissero_serotype}\t~{sistr_results}\t~{sistr_allele_json}\t~{sister_allele_fasta}\t~{sistr_cgmlst}\t~{sistr_version}\t~{sistr_predicted_serotype}\t~{seqsero2_report}\t~{seqsero2_version}\t~{seqsero2_predicted_antigenic_profile}\t~{seqsero2_predicted_serotype}\t~{seqsero2_predicted_contamination}\t~{kleborate_output_file}\t~{kleborate_version}\t~{kleborate_docker}\t~{kleborate_key_resistance_genes}\t~{kleborate_genomic_resistance_mutations}\t~{kleborate_mlst_sequence_type}\t~{kleborate_klocus}\t~{kleborate_ktype}\t~{kleborate_olocus}\t~{kleborate_otype}\t~{kleborate_klocus_confidence}\t~{kleborate_olocus_confidence}\t~{kaptive_version}\t~{kaptive_output_file_k}\t~{kaptive_output_file_oc}\t~{kaptive_k_locus}\t~{kaptive_k_type}\t~{kaptive_kl_confidence}\t~{kaptive_oc_locus}\t~{kaptive_ocl_confidence}\t~{abricate_abaum_plasmid_tsv}\t~{abricate_abaum_plasmid_type_genes}\t~{abricate_database}\t~{abricate_version}\t~{abricate_docker}\t~{legsta_results}\t~{legsta_predicted_sbt}\t~{legsta_version}\t~{tbprofiler_output_file}\t~{tbprofiler_output_bam}\t~{tbprofiler_output_bai}\t~{tbprofiler_version}\t~{tbprofiler_main_lineage}\t~{tbprofiler_sub_lineage}\t~{tbprofiler_dr_type}\t~{tbprofiler_resistance_genes}\t~{amrfinderplus_all_report}\t~{amrfinderplus_amr_report}\t~{amrfinderplus_stress_report}\t~{amrfinderplus_virulence_report}\t~{amrfinderplus_version}\t~{amrfinderplus_db_version}\t~{amrfinderplus_amr_genes}\t~{amrfinderplus_stress_genes}\t~{amrfinderplus_virulence_genes}\t~{amrfinderplus_amr_classes}\t~{amrfinderplus_amr_subclasses}\t~{genotyphi_report_tsv}\t~{genotyphi_mykrobe_json}\t~{genotyphi_version}\t~{genotyphi_species}\t~{genotyphi_st_probes_percent_coverage}\t~{genotyphi_final_genotype}\t~{genotyphi_genotype_confidence}\t~{ani_highest_percent}\t~{ani_highest_percent_bases_aligned}\t~{ani_output_tsv}\t~{ani_top_species_match}\t~{ani_mummer_version}\t~{resfinder_pheno_table}\t~{resfinder_pheno_table_species}\t~{resfinder_seqs}\t~{resfinder_results}\t~{resfinder_pointfinder_pheno_table}\t~{resfinder_pointfinder_results}\t~{resfinder_db_version}\t~{resfinder_docker}\t~{prokka_gff}\t~{prokka_gbk}\t~{prokka_sqn}\t~{plasmidfinder_plasmids}\t~{plasmidfinder_results}\t~{plasmidfinder_seqs}\t~{plasmidfinder_docker}\t~{plasmidfinder_db_version}\t~{pbptyper_predicted_1A_2B_2X}\t~{pbptyper_pbptype_predicted_tsv}\t~{pbptyper_version}\t~{pbptyper_docker}\t~{poppunk_gps_cluster}\t~{poppunk_gps_external_cluster_csv}\t~{poppunk_GPS_db_version}\t~{poppunk_version}\t~{poppunk_docker}\t~{seroba_version}\t~{seroba_docker}\t~{seroba_serotype}\t~{seroba_ariba_serotype}\t~{seroba_ariba_identity}\t~{seroba_details}\t~{midas_docker}\t~{midas_report}\t~{midas_primary_genus}\t~{midas_secondary_genus}\t~{midas_secondary_genus_coverage}" >> ~{samplename}_terra_table.tsv + echo -e "~{samplename}\t~{reads}\t~{read1}\t~{read2}\t~{read1_clean}\t~{read2_clean}\t~{run_id}\t~{collection_date}\t~{originating_lab}\t~{city}\t~{county}\t~{zip}\t~{theiaprok_illumina_pe_version}\t~{theiaprok_illumina_pe_analysis_date}\t~{theiaprok_illumina_se_version}\t~{theiaprok_illumina_se_analysis_date}\t~{seq_platform}\t~{num_reads_raw1}\t~{num_reads_raw2}\t~{num_reads_raw_pairs}\t~{fastq_scan_version}\t~{num_reads_clean1}\t~{num_reads_clean2}\t~{num_reads_clean_pairs}\t~{trimmomatic_version}\t~{bbduk_docker}\t~{r1_mean_q}\t~{r2_mean_q}\t~{assembly_fasta}\t~{contigs_gfa}\t~{shovill_pe_version}\t~{shovill_se_version}\t~{quast_report}\t~{quast_version}\t~{genome_length}\t~{number_contigs}\t~{n50_value}\t~{cg_pipeline_report}\t~{cg_pipeline_docker}\t~{est_coverage}\t~{gambit_report}\t~{gambit_predicted_taxon}\t~{gambit_predicted_taxon_rank}\t~{gambit_closest_genomes}\t~{gambit_version}\t~{gambit_db_version}\t~{gambit_docker}\t~{busco_version}\t~{busco_database}\t~{busco_results}\t~{busco_report}\t~{ts_mlst_results}\t~{ts_mlst_predicted_st}\t~{ts_mlst_pubmlst_scheme}\t~{ts_mlst_version}\t~{serotypefinder_report}\t~{serotypefinder_docker}\t~{serotypefinder_serotype}\t~{ectyper_results}\t~{ectyper_version}\t~{ectyper_predicted_serotype}\t~{lissero_results}\t~{lissero_version}\t~{lissero_serotype}\t~{sistr_results}\t~{sistr_allele_json}\t~{sister_allele_fasta}\t~{sistr_cgmlst}\t~{sistr_version}\t~{sistr_predicted_serotype}\t~{seqsero2_report}\t~{seqsero2_version}\t~{seqsero2_predicted_antigenic_profile}\t~{seqsero2_predicted_serotype}\t~{seqsero2_predicted_contamination}\t~{kleborate_output_file}\t~{kleborate_version}\t~{kleborate_docker}\t~{kleborate_key_resistance_genes}\t~{kleborate_genomic_resistance_mutations}\t~{kleborate_mlst_sequence_type}\t~{kleborate_klocus}\t~{kleborate_ktype}\t~{kleborate_olocus}\t~{kleborate_otype}\t~{kleborate_klocus_confidence}\t~{kleborate_olocus_confidence}\t~{kaptive_version}\t~{kaptive_output_file_k}\t~{kaptive_output_file_oc}\t~{kaptive_k_locus}\t~{kaptive_k_type}\t~{kaptive_kl_confidence}\t~{kaptive_oc_locus}\t~{kaptive_ocl_confidence}\t~{abricate_abaum_plasmid_tsv}\t~{abricate_abaum_plasmid_type_genes}\t~{abricate_database}\t~{abricate_version}\t~{abricate_docker}\t~{legsta_results}\t~{legsta_predicted_sbt}\t~{legsta_version}\t~{tbprofiler_output_file}\t~{tbprofiler_output_bam}\t~{tbprofiler_output_bai}\t~{tbprofiler_version}\t~{tbprofiler_main_lineage}\t~{tbprofiler_sub_lineage}\t~{tbprofiler_dr_type}\t~{tbprofiler_resistance_genes}\t~{amrfinderplus_all_report}\t~{amrfinderplus_amr_report}\t~{amrfinderplus_stress_report}\t~{amrfinderplus_virulence_report}\t~{amrfinderplus_version}\t~{amrfinderplus_db_version}\t~{amrfinderplus_amr_genes}\t~{amrfinderplus_stress_genes}\t~{amrfinderplus_virulence_genes}\t~{amrfinderplus_amr_classes}\t~{amrfinderplus_amr_subclasses}\t~{genotyphi_report_tsv}\t~{genotyphi_mykrobe_json}\t~{genotyphi_version}\t~{genotyphi_species}\t~{genotyphi_st_probes_percent_coverage}\t~{genotyphi_final_genotype}\t~{genotyphi_genotype_confidence}\t~{ani_highest_percent}\t~{ani_highest_percent_bases_aligned}\t~{ani_output_tsv}\t~{ani_top_species_match}\t~{ani_mummer_version}\t~{resfinder_pheno_table}\t~{resfinder_pheno_table_species}\t~{resfinder_seqs}\t~{resfinder_results}\t~{resfinder_pointfinder_pheno_table}\t~{resfinder_pointfinder_results}\t~{resfinder_db_version}\t~{resfinder_docker}\t~{prokka_gff}\t~{prokka_gbk}\t~{prokka_sqn}\t~{plasmidfinder_plasmids}\t~{plasmidfinder_results}\t~{plasmidfinder_seqs}\t~{plasmidfinder_docker}\t~{plasmidfinder_db_version}\t~{pbptyper_predicted_1A_2B_2X}\t~{pbptyper_pbptype_predicted_tsv}\t~{pbptyper_version}\t~{pbptyper_docker}\t~{poppunk_gps_cluster}\t~{poppunk_gps_external_cluster_csv}\t~{poppunk_GPS_db_version}\t~{poppunk_version}\t~{poppunk_docker}\t~{seroba_version}\t~{seroba_docker}\t~{seroba_serotype}\t~{seroba_ariba_serotype}\t~{seroba_ariba_identity}\t~{seroba_details}\t~{midas_docker}\t~{midas_report}\t~{midas_primary_genus}\t~{midas_secondary_genus}\t~{midas_secondary_genus_coverage}\t~{snippy_variants_outdir_tarball}" >> ~{samplename}_terra_table.tsv # modify file paths to GCP URIs sed -i 's/\/cromwell_root\//gs:\/\//g' ~{samplename}_terra_table.tsv # export table diff --git a/workflows/wf_snippy_tree.wdl b/workflows/wf_snippy_tree.wdl new file mode 100644 index 000000000..3c5459c01 --- /dev/null +++ b/workflows/wf_snippy_tree.wdl @@ -0,0 +1,81 @@ +version 1.0 + +import "../tasks/phylogenetic_inference/task_snippy_core.wdl" as snippy_core +import "../tasks/phylogenetic_inference/task_iqtree.wdl" as iqtree +import "../tasks/phylogenetic_inference/task_snp_dists.wdl" as snp_dists +import "../tasks/phylogenetic_inference/task_gubbins.wdl" as gubbins +import "../tasks/task_versioning.wdl" as versioning + +workflow snippy_tree_wf { + meta { + description: "Perform phylogenetic tree inference using iqtree (default) or snp-dist" + } + input { + String tree_name + Array[File] snippy_variants_outdir_tarball + Array[String] samplenames + File reference + Boolean use_gubbins = false + } + call snippy_core.snippy_core { + input: + snippy_variants_outdir_tarball = snippy_variants_outdir_tarball, + samplenames = samplenames, + reference = reference, + tree_name = tree_name + } + if (use_gubbins) { + call gubbins.gubbins { + input: + alignment = snippy_core.snippy_full_alignment_clean, + cluster_name = tree_name + } + call snp_dists.snp_dists as snp_dists_gubbins { + input: + alignment = gubbins.gubbins_polymorphic_fasta, + cluster_name = tree_name + } + } + if (!use_gubbins) { + call iqtree.iqtree { + input: + alignment = snippy_core.snippy_full_alignment_clean, + cluster_name = tree_name + } + call snp_dists.snp_dists as snp_dists_iqtree { + input: + alignment = snippy_core.snippy_full_alignment_clean, + cluster_name = tree_name + } + } + call versioning.version_capture{ + input: + } + output { + String snippy_tree_version = version_capture.phbg_version + String snippy_tree_snippy_version = snippy_core.snippy_version + File snippy_tree_core_alignment = snippy_core.snippy_core_alignment + File snippy_tree_full_alignment = snippy_core.snippy_full_alignment + File snippy_tree_clean_full_alignment = snippy_core.snippy_full_alignment_clean + File snippy_tree_ref = snippy_core.snippy_ref + File snippy_tree_all_snps = snippy_core.snippy_core_tab + File snippy_tree_snps_summary = snippy_core.snippy_txt + File snippy_tree_vcf = snippy_core.snippy_vcf + File? snippy_tree_iqtree = iqtree.ml_tree + String? snippy_tree_iqtree_version = iqtree.version + String? snippy_tree_snpdists_gubbins_version = snp_dists_gubbins.version + File? snippy_tree_snpdists_gubbins_matrix = snp_dists_gubbins.snp_matrix + File? snippy_tree_snpdists_gubbins_list = snp_dists_gubbins.snp_dists_molten_ordered + String? snippy_tree_snpdists_iqtree_version = snp_dists_iqtree.version + File? snippy_tree_snpdists_iqtree_matrix = snp_dists_iqtree.snp_matrix + File? snippy_tree_snpdists_iqtree_list = snp_dists_iqtree.snp_dists_molten_ordered + File? snippy_tree_gubbins_tree = gubbins.gubbins_final_tree + File? snippy_tree_gubbins_labelled_tree = gubbins.gubbins_final_labelled_tree + File? snippy_tree_gubbins_polymorphic_fasta = gubbins.gubbins_polymorphic_fasta + File? snippy_tree_gubbins_recombination_gff = gubbins.gubbins_recombination_gff + File? snippy_tree_gubbins_branch_stats = gubbins.gubbins_branch_stats + String? snippy_tree_gubbins_version = gubbins.version + File? snippy_tree_gubbins_timetree = gubbins.gubbins_timetree + File? snippy_tree_gubbins_timetree_stats = gubbins.gubbins_timetree_stats + } +} \ No newline at end of file diff --git a/workflows/wf_snippy_variants.wdl b/workflows/wf_snippy_variants.wdl index 5bf96fb12..71328c3bf 100644 --- a/workflows/wf_snippy_variants.wdl +++ b/workflows/wf_snippy_variants.wdl @@ -26,12 +26,13 @@ workflow snippy_variants_wf { output { String snippy_variants_wf_version = version_capture.phbg_version String snippy_version = snippy_variants.snippy_variants_version - String snippy_variant_query = snippy_variants.snippy_variants_query - String snippy_variant_hits = snippy_variants.snippy_variants_hits - File snippy_variant_gene_query_results = snippy_variants.snippy_variants_gene_query_results + String snippy_variants_query = snippy_variants.snippy_variants_query + String snippy_variants_hits = snippy_variants.snippy_variants_hits + File snippy_variants_gene_query_results = snippy_variants.snippy_variants_gene_query_results File snippy_results = snippy_variants.snippy_variants_results File snippy_bam = snippy_variants.snippy_variants_bam File snippy_bai = snippy_variants.snippy_variants_bai File snippy_variants_summary = snippy_variants.snippy_variants_summary + File snippy_variants_outdir_tarball = snippy_variants.snippy_variants_outdir_tarball } }