From 21b5b769e01cea8532c38fec0d964ba68d4cbbc6 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Wed, 7 Dec 2022 15:16:17 +0000 Subject: [PATCH 01/33] added snippy_outputs from snippy_variants task to snippy_variants wf --- workflows/wf_snippy_variants.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/workflows/wf_snippy_variants.wdl b/workflows/wf_snippy_variants.wdl index 5bf96fb12..30a44612a 100644 --- a/workflows/wf_snippy_variants.wdl +++ b/workflows/wf_snippy_variants.wdl @@ -24,6 +24,7 @@ workflow snippy_variants_wf { input: } output { + Array[File] snippy_outputs = snippy_variant.snippy_outputs String snippy_variants_wf_version = version_capture.phbg_version String snippy_version = snippy_variants.snippy_variants_version String snippy_variant_query = snippy_variants.snippy_variants_query From a774c171ea3ff82c84067ed6e07225e51c8258b7 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 8 Dec 2022 09:58:03 +0000 Subject: [PATCH 02/33] added a snippy core task that runs snippy-core and snippy-clean_full_aln. --- .../task_snippy_core.wdl | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tasks/phylogenetic_inference/task_snippy_core.wdl diff --git a/tasks/phylogenetic_inference/task_snippy_core.wdl b/tasks/phylogenetic_inference/task_snippy_core.wdl new file mode 100644 index 000000000..4c32afa12 --- /dev/null +++ b/tasks/phylogenetic_inference/task_snippy_core.wdl @@ -0,0 +1,44 @@ +version 1.0 + +task snippy_core { + input { + Array[File] snippy_outputs + String treename + String docker = "staphb/snippy:4.6.0" + File reference + File? bed_file + } + command <<< + # version control + snippy --version | head -1 | tee VERSION + + # run snippy core + snippy-core \ + --prefix ~{treename} \ + ~{'--mask ' + bed_file} \ + --ref ~{reference} \ + ~{sep=" " snippy_outputs} + + # run snippy clean + snippy-clean_full_aln \ + ~{treename}.full.aln > ~{treename}_snippy_clean_full.aln + + >>> + output { + File snippy_core_alignment = "~{treename}.aln" + File snippy_full_alignment = "~{treename}.full.aln" + File snippy_full_alignment_clean = "~{treename}.full.aln" + File snippy_ref = "~{treename}.ref.fa" + File snippy_core_tab = "~{treename}.tab" + File snippy_txt = "~{treename}.txt" + File snippy_vcf = "~{treename}.vcf" + String snippy_docker_image = docker + } + runtime { + docker: "~{docker}" + memory: "8 GB" + cpu: 4 + disks: "local-disk 100 SSD" + preemptible: 0 + } +} From 2a3a1531270285ed61c42b195dc34821f48a5f45 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 8 Dec 2022 10:05:26 +0000 Subject: [PATCH 03/33] added snippy_outputs into terra tools table --- tasks/utilities/task_broad_terra_tools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index a046695d9..eaf93debb 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -206,9 +206,9 @@ task export_taxon_tables { if [ ! -z ${sample_table} ]; then # create single-entity sample data table ## header - echo -e "entity:${sample_table}_id\treads\tread1\tread2\tread1_clean\tread2_clean\trun_id\tcollection_date\toriginating_lab\tcity\tcounty\tzip\ttheiaprok_illumina_pe_version\ttheiaprok_illumina_pe_analysis_date\ttheiaprok_illumina_se_version\ttheiaprok_illumina_se_analysis_date\tseq_platform\tnum_reads_raw1\tnum_reads_raw2\tnum_reads_raw_pairs\tfastq_scan_version\tnum_reads_clean1\tnum_reads_clean2\tnum_reads_clean_pairs\ttrimmomatic_version\tbbduk_docker\tr1_mean_q\tr2_mean_q\tassembly_fasta\tcontigs_gfa\tshovill_pe_version\tshovill_se_version\tquast_report\tquast_version\tgenome_length\tnumber_contigs\tn50_value\tcg_pipeline_report\tcg_pipeline_docker\test_coverage\tgambit_report\tgambit_predicted_taxon\tgambit_predicted_taxon_rank\tgambit_closest_genomes\tgambit_version\tgambit_db_version\tgambit_docker\tbusco_version\tbusco_database\tbusco_results\tbusco_report\tts_mlst_results\tts_mlst_predicted_st\tts_mlst_pubmlst_scheme\tts_mlst_version\tserotypefinder_report\tserotypefinder_docker\tserotypefinder_serotype\tectyper_results\tectyper_version\tectyper_predicted_serotype\tlissero_results\tlissero_version\tlissero_serotype\tsistr_results\tsistr_allele_json\tsister_allele_fasta\tsistr_cgmlst\tsistr_version\tsistr_predicted_serotype\tseqsero2_report\tseqsero2_version\tseqsero2_predicted_antigenic_profile\tseqsero2_predicted_serotype\tseqsero2_predicted_contamination\tkleborate_output_file\tkleborate_version\tkleborate_docker\tkleborate_key_resistance_genes\tkleborate_genomic_resistance_mutations\tkleborate_mlst_sequence_type\tkleborate_klocus\tkleborate_ktype\tkleborate_olocus\tkleborate_otype\tkleborate_klocus_confidence\tkleborate_olocus_confidence\tkaptive_version\tkaptive_output_file_k\tkaptive_output_file_oc\tkaptive_k_locus\tkaptive_k_type\tkaptive_kl_confidence\tkaptive_oc_locus\tkaptive_ocl_confidence\tabricate_abaum_plasmid_tsv\tabricate_abaum_plasmid_type_genes\tabricate_database\tabricate_version\tabricate_docker\tlegsta_results\tlegsta_predicted_sbt\tlegsta_version\ttbprofiler_output_file\ttbprofiler_output_bam\ttbprofiler_output_bai\ttbprofiler_version\ttbprofiler_main_lineage\ttbprofiler_sub_lineage\ttbprofiler_dr_type\ttbprofiler_resistance_genes\tamrfinderplus_all_report\tamrfinderplus_amr_report\tamrfinderplus_stress_report\tamrfinderplus_virulence_report\tamrfinderplus_version\tamrfinderplus_db_version\tamrfinderplus_amr_genes\tamrfinderplus_stress_genes\tamrfinderplus_virulence_genes\tamrfinderplus_amr_classes\tamrfinderplus_amr_subclasses\tgenotyphi_report_tsv\tgenotyphi_mykrobe_json\tgenotyphi_version\tgenotyphi_species\tgenotyphi_st_probes_percent_coverage\tgenotyphi_final_genotype\tgenotyphi_genotype_confidence\tani_highest_percent\tani_highest_percent_bases_aligned\tani_output_tsv\tani_top_species_match\tani_mummer_version\tresfinder_pheno_table\tresfinder_pheno_table_species\tresfinder_seqs\tresfinder_results\tresfinder_pointfinder_pheno_table\tresfinder_pointfinder_results\tresfinder_db_version\tresfinder_docker\tprokka_gff\tprokka_gbk\tprokka_sqn\tplasmidfinder_plasmids\tplasmidfinder_results\tplasmidfinder_seqs\tplasmidfinder_docker\tplasmidfinder_db_version\tpbptyper_predicted_1A_2B_2X\tpbptyper_pbptype_predicted_tsv\tpbptyper_version\tpbptyper_docker\tpoppunk_gps_cluster\tpoppunk_gps_external_cluster_csv\tpoppunk_GPS_db_version\tpoppunk_version\tpoppunk_docker\tseroba_version\tseroba_docker\tseroba_serotype\tseroba_ariba_serotype\tseroba_ariba_identity\tseroba_details\tmidas_docker\tmidas_report\tmidas_primary_genus\tmidas_secondary_genus\tmidas_secondary_genus_coverage" > ~{samplename}_terra_table.tsv + echo -e "entity:${sample_table}_id\treads\tread1\tread2\tread1_clean\tread2_clean\trun_id\tcollection_date\toriginating_lab\tcity\tcounty\tzip\ttheiaprok_illumina_pe_version\ttheiaprok_illumina_pe_analysis_date\ttheiaprok_illumina_se_version\ttheiaprok_illumina_se_analysis_date\tseq_platform\tnum_reads_raw1\tnum_reads_raw2\tnum_reads_raw_pairs\tfastq_scan_version\tnum_reads_clean1\tnum_reads_clean2\tnum_reads_clean_pairs\ttrimmomatic_version\tbbduk_docker\tr1_mean_q\tr2_mean_q\tassembly_fasta\tcontigs_gfa\tshovill_pe_version\tshovill_se_version\tquast_report\tquast_version\tgenome_length\tnumber_contigs\tn50_value\tcg_pipeline_report\tcg_pipeline_docker\test_coverage\tgambit_report\tgambit_predicted_taxon\tgambit_predicted_taxon_rank\tgambit_closest_genomes\tgambit_version\tgambit_db_version\tgambit_docker\tbusco_version\tbusco_database\tbusco_results\tbusco_report\tts_mlst_results\tts_mlst_predicted_st\tts_mlst_pubmlst_scheme\tts_mlst_version\tserotypefinder_report\tserotypefinder_docker\tserotypefinder_serotype\tectyper_results\tectyper_version\tectyper_predicted_serotype\tlissero_results\tlissero_version\tlissero_serotype\tsistr_results\tsistr_allele_json\tsister_allele_fasta\tsistr_cgmlst\tsistr_version\tsistr_predicted_serotype\tseqsero2_report\tseqsero2_version\tseqsero2_predicted_antigenic_profile\tseqsero2_predicted_serotype\tseqsero2_predicted_contamination\tkleborate_output_file\tkleborate_version\tkleborate_docker\tkleborate_key_resistance_genes\tkleborate_genomic_resistance_mutations\tkleborate_mlst_sequence_type\tkleborate_klocus\tkleborate_ktype\tkleborate_olocus\tkleborate_otype\tkleborate_klocus_confidence\tkleborate_olocus_confidence\tkaptive_version\tkaptive_output_file_k\tkaptive_output_file_oc\tkaptive_k_locus\tkaptive_k_type\tkaptive_kl_confidence\tkaptive_oc_locus\tkaptive_ocl_confidence\tabricate_abaum_plasmid_tsv\tabricate_abaum_plasmid_type_genes\tabricate_database\tabricate_version\tabricate_docker\tlegsta_results\tlegsta_predicted_sbt\tlegsta_version\ttbprofiler_output_file\ttbprofiler_output_bam\ttbprofiler_output_bai\ttbprofiler_version\ttbprofiler_main_lineage\ttbprofiler_sub_lineage\ttbprofiler_dr_type\ttbprofiler_resistance_genes\tamrfinderplus_all_report\tamrfinderplus_amr_report\tamrfinderplus_stress_report\tamrfinderplus_virulence_report\tamrfinderplus_version\tamrfinderplus_db_version\tamrfinderplus_amr_genes\tamrfinderplus_stress_genes\tamrfinderplus_virulence_genes\tamrfinderplus_amr_classes\tamrfinderplus_amr_subclasses\tgenotyphi_report_tsv\tgenotyphi_mykrobe_json\tgenotyphi_version\tgenotyphi_species\tgenotyphi_st_probes_percent_coverage\tgenotyphi_final_genotype\tgenotyphi_genotype_confidence\tani_highest_percent\tani_highest_percent_bases_aligned\tani_output_tsv\tani_top_species_match\tani_mummer_version\tresfinder_pheno_table\tresfinder_pheno_table_species\tresfinder_seqs\tresfinder_results\tresfinder_pointfinder_pheno_table\tresfinder_pointfinder_results\tresfinder_db_version\tresfinder_docker\tprokka_gff\tprokka_gbk\tprokka_sqn\tplasmidfinder_plasmids\tplasmidfinder_results\tplasmidfinder_seqs\tplasmidfinder_docker\tplasmidfinder_db_version\tpbptyper_predicted_1A_2B_2X\tpbptyper_pbptype_predicted_tsv\tpbptyper_version\tpbptyper_docker\tpoppunk_gps_cluster\tpoppunk_gps_external_cluster_csv\tpoppunk_GPS_db_version\tpoppunk_version\tpoppunk_docker\tseroba_version\tseroba_docker\tseroba_serotype\tseroba_ariba_serotype\tseroba_ariba_identity\tseroba_details\tmidas_docker\tmidas_report\tmidas_primary_genus\tmidas_secondary_genus\tmidas_secondary_genus_coverage\tsnippy_outputs" > ~{samplename}_terra_table.tsv ## TheiaProk Outs - echo -e "~{samplename}\t~{reads}\t~{read1}\t~{read2}\t~{read1_clean}\t~{read2_clean}\t~{run_id}\t~{collection_date}\t~{originating_lab}\t~{city}\t~{county}\t~{zip}\t~{theiaprok_illumina_pe_version}\t~{theiaprok_illumina_pe_analysis_date}\t~{theiaprok_illumina_se_version}\t~{theiaprok_illumina_se_analysis_date}\t~{seq_platform}\t~{num_reads_raw1}\t~{num_reads_raw2}\t~{num_reads_raw_pairs}\t~{fastq_scan_version}\t~{num_reads_clean1}\t~{num_reads_clean2}\t~{num_reads_clean_pairs}\t~{trimmomatic_version}\t~{bbduk_docker}\t~{r1_mean_q}\t~{r2_mean_q}\t~{assembly_fasta}\t~{contigs_gfa}\t~{shovill_pe_version}\t~{shovill_se_version}\t~{quast_report}\t~{quast_version}\t~{genome_length}\t~{number_contigs}\t~{n50_value}\t~{cg_pipeline_report}\t~{cg_pipeline_docker}\t~{est_coverage}\t~{gambit_report}\t~{gambit_predicted_taxon}\t~{gambit_predicted_taxon_rank}\t~{gambit_closest_genomes}\t~{gambit_version}\t~{gambit_db_version}\t~{gambit_docker}\t~{busco_version}\t~{busco_database}\t~{busco_results}\t~{busco_report}\t~{ts_mlst_results}\t~{ts_mlst_predicted_st}\t~{ts_mlst_pubmlst_scheme}\t~{ts_mlst_version}\t~{serotypefinder_report}\t~{serotypefinder_docker}\t~{serotypefinder_serotype}\t~{ectyper_results}\t~{ectyper_version}\t~{ectyper_predicted_serotype}\t~{lissero_results}\t~{lissero_version}\t~{lissero_serotype}\t~{sistr_results}\t~{sistr_allele_json}\t~{sister_allele_fasta}\t~{sistr_cgmlst}\t~{sistr_version}\t~{sistr_predicted_serotype}\t~{seqsero2_report}\t~{seqsero2_version}\t~{seqsero2_predicted_antigenic_profile}\t~{seqsero2_predicted_serotype}\t~{seqsero2_predicted_contamination}\t~{kleborate_output_file}\t~{kleborate_version}\t~{kleborate_docker}\t~{kleborate_key_resistance_genes}\t~{kleborate_genomic_resistance_mutations}\t~{kleborate_mlst_sequence_type}\t~{kleborate_klocus}\t~{kleborate_ktype}\t~{kleborate_olocus}\t~{kleborate_otype}\t~{kleborate_klocus_confidence}\t~{kleborate_olocus_confidence}\t~{kaptive_version}\t~{kaptive_output_file_k}\t~{kaptive_output_file_oc}\t~{kaptive_k_locus}\t~{kaptive_k_type}\t~{kaptive_kl_confidence}\t~{kaptive_oc_locus}\t~{kaptive_ocl_confidence}\t~{abricate_abaum_plasmid_tsv}\t~{abricate_abaum_plasmid_type_genes}\t~{abricate_database}\t~{abricate_version}\t~{abricate_docker}\t~{legsta_results}\t~{legsta_predicted_sbt}\t~{legsta_version}\t~{tbprofiler_output_file}\t~{tbprofiler_output_bam}\t~{tbprofiler_output_bai}\t~{tbprofiler_version}\t~{tbprofiler_main_lineage}\t~{tbprofiler_sub_lineage}\t~{tbprofiler_dr_type}\t~{tbprofiler_resistance_genes}\t~{amrfinderplus_all_report}\t~{amrfinderplus_amr_report}\t~{amrfinderplus_stress_report}\t~{amrfinderplus_virulence_report}\t~{amrfinderplus_version}\t~{amrfinderplus_db_version}\t~{amrfinderplus_amr_genes}\t~{amrfinderplus_stress_genes}\t~{amrfinderplus_virulence_genes}\t~{amrfinderplus_amr_classes}\t~{amrfinderplus_amr_subclasses}\t~{genotyphi_report_tsv}\t~{genotyphi_mykrobe_json}\t~{genotyphi_version}\t~{genotyphi_species}\t~{genotyphi_st_probes_percent_coverage}\t~{genotyphi_final_genotype}\t~{genotyphi_genotype_confidence}\t~{ani_highest_percent}\t~{ani_highest_percent_bases_aligned}\t~{ani_output_tsv}\t~{ani_top_species_match}\t~{ani_mummer_version}\t~{resfinder_pheno_table}\t~{resfinder_pheno_table_species}\t~{resfinder_seqs}\t~{resfinder_results}\t~{resfinder_pointfinder_pheno_table}\t~{resfinder_pointfinder_results}\t~{resfinder_db_version}\t~{resfinder_docker}\t~{prokka_gff}\t~{prokka_gbk}\t~{prokka_sqn}\t~{plasmidfinder_plasmids}\t~{plasmidfinder_results}\t~{plasmidfinder_seqs}\t~{plasmidfinder_docker}\t~{plasmidfinder_db_version}\t~{pbptyper_predicted_1A_2B_2X}\t~{pbptyper_pbptype_predicted_tsv}\t~{pbptyper_version}\t~{pbptyper_docker}\t~{poppunk_gps_cluster}\t~{poppunk_gps_external_cluster_csv}\t~{poppunk_GPS_db_version}\t~{poppunk_version}\t~{poppunk_docker}\t~{seroba_version}\t~{seroba_docker}\t~{seroba_serotype}\t~{seroba_ariba_serotype}\t~{seroba_ariba_identity}\t~{seroba_details}\t~{midas_docker}\t~{midas_report}\t~{midas_primary_genus}\t~{midas_secondary_genus}\t~{midas_secondary_genus_coverage}" >> ~{samplename}_terra_table.tsv + echo -e "~{samplename}\t~{reads}\t~{read1}\t~{read2}\t~{read1_clean}\t~{read2_clean}\t~{run_id}\t~{collection_date}\t~{originating_lab}\t~{city}\t~{county}\t~{zip}\t~{theiaprok_illumina_pe_version}\t~{theiaprok_illumina_pe_analysis_date}\t~{theiaprok_illumina_se_version}\t~{theiaprok_illumina_se_analysis_date}\t~{seq_platform}\t~{num_reads_raw1}\t~{num_reads_raw2}\t~{num_reads_raw_pairs}\t~{fastq_scan_version}\t~{num_reads_clean1}\t~{num_reads_clean2}\t~{num_reads_clean_pairs}\t~{trimmomatic_version}\t~{bbduk_docker}\t~{r1_mean_q}\t~{r2_mean_q}\t~{assembly_fasta}\t~{contigs_gfa}\t~{shovill_pe_version}\t~{shovill_se_version}\t~{quast_report}\t~{quast_version}\t~{genome_length}\t~{number_contigs}\t~{n50_value}\t~{cg_pipeline_report}\t~{cg_pipeline_docker}\t~{est_coverage}\t~{gambit_report}\t~{gambit_predicted_taxon}\t~{gambit_predicted_taxon_rank}\t~{gambit_closest_genomes}\t~{gambit_version}\t~{gambit_db_version}\t~{gambit_docker}\t~{busco_version}\t~{busco_database}\t~{busco_results}\t~{busco_report}\t~{ts_mlst_results}\t~{ts_mlst_predicted_st}\t~{ts_mlst_pubmlst_scheme}\t~{ts_mlst_version}\t~{serotypefinder_report}\t~{serotypefinder_docker}\t~{serotypefinder_serotype}\t~{ectyper_results}\t~{ectyper_version}\t~{ectyper_predicted_serotype}\t~{lissero_results}\t~{lissero_version}\t~{lissero_serotype}\t~{sistr_results}\t~{sistr_allele_json}\t~{sister_allele_fasta}\t~{sistr_cgmlst}\t~{sistr_version}\t~{sistr_predicted_serotype}\t~{seqsero2_report}\t~{seqsero2_version}\t~{seqsero2_predicted_antigenic_profile}\t~{seqsero2_predicted_serotype}\t~{seqsero2_predicted_contamination}\t~{kleborate_output_file}\t~{kleborate_version}\t~{kleborate_docker}\t~{kleborate_key_resistance_genes}\t~{kleborate_genomic_resistance_mutations}\t~{kleborate_mlst_sequence_type}\t~{kleborate_klocus}\t~{kleborate_ktype}\t~{kleborate_olocus}\t~{kleborate_otype}\t~{kleborate_klocus_confidence}\t~{kleborate_olocus_confidence}\t~{kaptive_version}\t~{kaptive_output_file_k}\t~{kaptive_output_file_oc}\t~{kaptive_k_locus}\t~{kaptive_k_type}\t~{kaptive_kl_confidence}\t~{kaptive_oc_locus}\t~{kaptive_ocl_confidence}\t~{abricate_abaum_plasmid_tsv}\t~{abricate_abaum_plasmid_type_genes}\t~{abricate_database}\t~{abricate_version}\t~{abricate_docker}\t~{legsta_results}\t~{legsta_predicted_sbt}\t~{legsta_version}\t~{tbprofiler_output_file}\t~{tbprofiler_output_bam}\t~{tbprofiler_output_bai}\t~{tbprofiler_version}\t~{tbprofiler_main_lineage}\t~{tbprofiler_sub_lineage}\t~{tbprofiler_dr_type}\t~{tbprofiler_resistance_genes}\t~{amrfinderplus_all_report}\t~{amrfinderplus_amr_report}\t~{amrfinderplus_stress_report}\t~{amrfinderplus_virulence_report}\t~{amrfinderplus_version}\t~{amrfinderplus_db_version}\t~{amrfinderplus_amr_genes}\t~{amrfinderplus_stress_genes}\t~{amrfinderplus_virulence_genes}\t~{amrfinderplus_amr_classes}\t~{amrfinderplus_amr_subclasses}\t~{genotyphi_report_tsv}\t~{genotyphi_mykrobe_json}\t~{genotyphi_version}\t~{genotyphi_species}\t~{genotyphi_st_probes_percent_coverage}\t~{genotyphi_final_genotype}\t~{genotyphi_genotype_confidence}\t~{ani_highest_percent}\t~{ani_highest_percent_bases_aligned}\t~{ani_output_tsv}\t~{ani_top_species_match}\t~{ani_mummer_version}\t~{resfinder_pheno_table}\t~{resfinder_pheno_table_species}\t~{resfinder_seqs}\t~{resfinder_results}\t~{resfinder_pointfinder_pheno_table}\t~{resfinder_pointfinder_results}\t~{resfinder_db_version}\t~{resfinder_docker}\t~{prokka_gff}\t~{prokka_gbk}\t~{prokka_sqn}\t~{plasmidfinder_plasmids}\t~{plasmidfinder_results}\t~{plasmidfinder_seqs}\t~{plasmidfinder_docker}\t~{plasmidfinder_db_version}\t~{pbptyper_predicted_1A_2B_2X}\t~{pbptyper_pbptype_predicted_tsv}\t~{pbptyper_version}\t~{pbptyper_docker}\t~{poppunk_gps_cluster}\t~{poppunk_gps_external_cluster_csv}\t~{poppunk_GPS_db_version}\t~{poppunk_version}\t~{poppunk_docker}\t~{seroba_version}\t~{seroba_docker}\t~{seroba_serotype}\t~{seroba_ariba_serotype}\t~{seroba_ariba_identity}\t~{seroba_details}\t~{midas_docker}\t~{midas_report}\t~{midas_primary_genus}\t~{midas_secondary_genus}\t~{midas_secondary_genus_coverage}\t~{snippy_outputs}" >> ~{samplename}_terra_table.tsv # modify file paths to GCP URIs sed -i 's/\/cromwell_root\//gs:\/\//g' ~{samplename}_terra_table.tsv # export table From 30f799b77f95ca2b4da552fda4e29255bbb44fef Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 8 Dec 2022 10:10:20 +0000 Subject: [PATCH 04/33] declared snippy_outputs in the input block --- tasks/utilities/task_broad_terra_tools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index eaf93debb..a85173f02 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -177,6 +177,7 @@ task export_taxon_tables { String? midas_primary_genus String? midas_secondary_genus String? midas_secondary_genus_coverage + Array[File]? snippy_outputs } command <<< From f8ad3c9f43d245c833aac258f251e4e63faac8e5 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 8 Dec 2022 10:13:01 +0000 Subject: [PATCH 05/33] minor change in the snippy_variants_wf --- workflows/wf_snippy_variants.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/wf_snippy_variants.wdl b/workflows/wf_snippy_variants.wdl index 30a44612a..16b98fcc5 100644 --- a/workflows/wf_snippy_variants.wdl +++ b/workflows/wf_snippy_variants.wdl @@ -24,7 +24,7 @@ workflow snippy_variants_wf { input: } output { - Array[File] snippy_outputs = snippy_variant.snippy_outputs + Array[File] snippy_outputs = snippy_variants.snippy_outputs String snippy_variants_wf_version = version_capture.phbg_version String snippy_version = snippy_variants.snippy_variants_version String snippy_variant_query = snippy_variants.snippy_variants_query From bc7197db66895471183fb1085445eea1a9163656 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 8 Dec 2022 10:49:57 +0000 Subject: [PATCH 06/33] trying to capture output directories --- tasks/gene_typing/task_snippy_variants.wdl | 1 + tasks/utilities/task_broad_terra_tools.wdl | 5 +++-- workflows/wf_snippy_variants.wdl | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tasks/gene_typing/task_snippy_variants.wdl b/tasks/gene_typing/task_snippy_variants.wdl index 635b8ff98..128e8e5e6 100644 --- a/tasks/gene_typing/task_snippy_variants.wdl +++ b/tasks/gene_typing/task_snippy_variants.wdl @@ -76,6 +76,7 @@ task snippy_variants { String snippy_variants_query = "~{query_gene}" String snippy_variants_hits = read_string("SNIPPY_VARIANT_HITS") File snippy_variants_gene_query_results = "./gene_query.csv" + Array[File] snippy_output_dirs = glob("../~{samplename}") Array[File] snippy_outputs = glob("~{samplename}/~{samplename}*") File snippy_variants_results = "~{samplename}/~{samplename}.csv" File snippy_variants_bam = "~{samplename}/~{samplename}.bam" diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index a85173f02..fdc5afecc 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -178,6 +178,7 @@ task export_taxon_tables { String? midas_secondary_genus String? midas_secondary_genus_coverage Array[File]? snippy_outputs + Array[File]? snippy_output_dirs } command <<< @@ -207,9 +208,9 @@ task export_taxon_tables { if [ ! -z ${sample_table} ]; then # create single-entity sample data table ## header - echo -e "entity:${sample_table}_id\treads\tread1\tread2\tread1_clean\tread2_clean\trun_id\tcollection_date\toriginating_lab\tcity\tcounty\tzip\ttheiaprok_illumina_pe_version\ttheiaprok_illumina_pe_analysis_date\ttheiaprok_illumina_se_version\ttheiaprok_illumina_se_analysis_date\tseq_platform\tnum_reads_raw1\tnum_reads_raw2\tnum_reads_raw_pairs\tfastq_scan_version\tnum_reads_clean1\tnum_reads_clean2\tnum_reads_clean_pairs\ttrimmomatic_version\tbbduk_docker\tr1_mean_q\tr2_mean_q\tassembly_fasta\tcontigs_gfa\tshovill_pe_version\tshovill_se_version\tquast_report\tquast_version\tgenome_length\tnumber_contigs\tn50_value\tcg_pipeline_report\tcg_pipeline_docker\test_coverage\tgambit_report\tgambit_predicted_taxon\tgambit_predicted_taxon_rank\tgambit_closest_genomes\tgambit_version\tgambit_db_version\tgambit_docker\tbusco_version\tbusco_database\tbusco_results\tbusco_report\tts_mlst_results\tts_mlst_predicted_st\tts_mlst_pubmlst_scheme\tts_mlst_version\tserotypefinder_report\tserotypefinder_docker\tserotypefinder_serotype\tectyper_results\tectyper_version\tectyper_predicted_serotype\tlissero_results\tlissero_version\tlissero_serotype\tsistr_results\tsistr_allele_json\tsister_allele_fasta\tsistr_cgmlst\tsistr_version\tsistr_predicted_serotype\tseqsero2_report\tseqsero2_version\tseqsero2_predicted_antigenic_profile\tseqsero2_predicted_serotype\tseqsero2_predicted_contamination\tkleborate_output_file\tkleborate_version\tkleborate_docker\tkleborate_key_resistance_genes\tkleborate_genomic_resistance_mutations\tkleborate_mlst_sequence_type\tkleborate_klocus\tkleborate_ktype\tkleborate_olocus\tkleborate_otype\tkleborate_klocus_confidence\tkleborate_olocus_confidence\tkaptive_version\tkaptive_output_file_k\tkaptive_output_file_oc\tkaptive_k_locus\tkaptive_k_type\tkaptive_kl_confidence\tkaptive_oc_locus\tkaptive_ocl_confidence\tabricate_abaum_plasmid_tsv\tabricate_abaum_plasmid_type_genes\tabricate_database\tabricate_version\tabricate_docker\tlegsta_results\tlegsta_predicted_sbt\tlegsta_version\ttbprofiler_output_file\ttbprofiler_output_bam\ttbprofiler_output_bai\ttbprofiler_version\ttbprofiler_main_lineage\ttbprofiler_sub_lineage\ttbprofiler_dr_type\ttbprofiler_resistance_genes\tamrfinderplus_all_report\tamrfinderplus_amr_report\tamrfinderplus_stress_report\tamrfinderplus_virulence_report\tamrfinderplus_version\tamrfinderplus_db_version\tamrfinderplus_amr_genes\tamrfinderplus_stress_genes\tamrfinderplus_virulence_genes\tamrfinderplus_amr_classes\tamrfinderplus_amr_subclasses\tgenotyphi_report_tsv\tgenotyphi_mykrobe_json\tgenotyphi_version\tgenotyphi_species\tgenotyphi_st_probes_percent_coverage\tgenotyphi_final_genotype\tgenotyphi_genotype_confidence\tani_highest_percent\tani_highest_percent_bases_aligned\tani_output_tsv\tani_top_species_match\tani_mummer_version\tresfinder_pheno_table\tresfinder_pheno_table_species\tresfinder_seqs\tresfinder_results\tresfinder_pointfinder_pheno_table\tresfinder_pointfinder_results\tresfinder_db_version\tresfinder_docker\tprokka_gff\tprokka_gbk\tprokka_sqn\tplasmidfinder_plasmids\tplasmidfinder_results\tplasmidfinder_seqs\tplasmidfinder_docker\tplasmidfinder_db_version\tpbptyper_predicted_1A_2B_2X\tpbptyper_pbptype_predicted_tsv\tpbptyper_version\tpbptyper_docker\tpoppunk_gps_cluster\tpoppunk_gps_external_cluster_csv\tpoppunk_GPS_db_version\tpoppunk_version\tpoppunk_docker\tseroba_version\tseroba_docker\tseroba_serotype\tseroba_ariba_serotype\tseroba_ariba_identity\tseroba_details\tmidas_docker\tmidas_report\tmidas_primary_genus\tmidas_secondary_genus\tmidas_secondary_genus_coverage\tsnippy_outputs" > ~{samplename}_terra_table.tsv + echo -e "entity:${sample_table}_id\treads\tread1\tread2\tread1_clean\tread2_clean\trun_id\tcollection_date\toriginating_lab\tcity\tcounty\tzip\ttheiaprok_illumina_pe_version\ttheiaprok_illumina_pe_analysis_date\ttheiaprok_illumina_se_version\ttheiaprok_illumina_se_analysis_date\tseq_platform\tnum_reads_raw1\tnum_reads_raw2\tnum_reads_raw_pairs\tfastq_scan_version\tnum_reads_clean1\tnum_reads_clean2\tnum_reads_clean_pairs\ttrimmomatic_version\tbbduk_docker\tr1_mean_q\tr2_mean_q\tassembly_fasta\tcontigs_gfa\tshovill_pe_version\tshovill_se_version\tquast_report\tquast_version\tgenome_length\tnumber_contigs\tn50_value\tcg_pipeline_report\tcg_pipeline_docker\test_coverage\tgambit_report\tgambit_predicted_taxon\tgambit_predicted_taxon_rank\tgambit_closest_genomes\tgambit_version\tgambit_db_version\tgambit_docker\tbusco_version\tbusco_database\tbusco_results\tbusco_report\tts_mlst_results\tts_mlst_predicted_st\tts_mlst_pubmlst_scheme\tts_mlst_version\tserotypefinder_report\tserotypefinder_docker\tserotypefinder_serotype\tectyper_results\tectyper_version\tectyper_predicted_serotype\tlissero_results\tlissero_version\tlissero_serotype\tsistr_results\tsistr_allele_json\tsister_allele_fasta\tsistr_cgmlst\tsistr_version\tsistr_predicted_serotype\tseqsero2_report\tseqsero2_version\tseqsero2_predicted_antigenic_profile\tseqsero2_predicted_serotype\tseqsero2_predicted_contamination\tkleborate_output_file\tkleborate_version\tkleborate_docker\tkleborate_key_resistance_genes\tkleborate_genomic_resistance_mutations\tkleborate_mlst_sequence_type\tkleborate_klocus\tkleborate_ktype\tkleborate_olocus\tkleborate_otype\tkleborate_klocus_confidence\tkleborate_olocus_confidence\tkaptive_version\tkaptive_output_file_k\tkaptive_output_file_oc\tkaptive_k_locus\tkaptive_k_type\tkaptive_kl_confidence\tkaptive_oc_locus\tkaptive_ocl_confidence\tabricate_abaum_plasmid_tsv\tabricate_abaum_plasmid_type_genes\tabricate_database\tabricate_version\tabricate_docker\tlegsta_results\tlegsta_predicted_sbt\tlegsta_version\ttbprofiler_output_file\ttbprofiler_output_bam\ttbprofiler_output_bai\ttbprofiler_version\ttbprofiler_main_lineage\ttbprofiler_sub_lineage\ttbprofiler_dr_type\ttbprofiler_resistance_genes\tamrfinderplus_all_report\tamrfinderplus_amr_report\tamrfinderplus_stress_report\tamrfinderplus_virulence_report\tamrfinderplus_version\tamrfinderplus_db_version\tamrfinderplus_amr_genes\tamrfinderplus_stress_genes\tamrfinderplus_virulence_genes\tamrfinderplus_amr_classes\tamrfinderplus_amr_subclasses\tgenotyphi_report_tsv\tgenotyphi_mykrobe_json\tgenotyphi_version\tgenotyphi_species\tgenotyphi_st_probes_percent_coverage\tgenotyphi_final_genotype\tgenotyphi_genotype_confidence\tani_highest_percent\tani_highest_percent_bases_aligned\tani_output_tsv\tani_top_species_match\tani_mummer_version\tresfinder_pheno_table\tresfinder_pheno_table_species\tresfinder_seqs\tresfinder_results\tresfinder_pointfinder_pheno_table\tresfinder_pointfinder_results\tresfinder_db_version\tresfinder_docker\tprokka_gff\tprokka_gbk\tprokka_sqn\tplasmidfinder_plasmids\tplasmidfinder_results\tplasmidfinder_seqs\tplasmidfinder_docker\tplasmidfinder_db_version\tpbptyper_predicted_1A_2B_2X\tpbptyper_pbptype_predicted_tsv\tpbptyper_version\tpbptyper_docker\tpoppunk_gps_cluster\tpoppunk_gps_external_cluster_csv\tpoppunk_GPS_db_version\tpoppunk_version\tpoppunk_docker\tseroba_version\tseroba_docker\tseroba_serotype\tseroba_ariba_serotype\tseroba_ariba_identity\tseroba_details\tmidas_docker\tmidas_report\tmidas_primary_genus\tmidas_secondary_genus\tmidas_secondary_genus_coverage\tsnippy_outputs\tsnippy_output_dirs" > ~{samplename}_terra_table.tsv ## TheiaProk Outs - echo -e "~{samplename}\t~{reads}\t~{read1}\t~{read2}\t~{read1_clean}\t~{read2_clean}\t~{run_id}\t~{collection_date}\t~{originating_lab}\t~{city}\t~{county}\t~{zip}\t~{theiaprok_illumina_pe_version}\t~{theiaprok_illumina_pe_analysis_date}\t~{theiaprok_illumina_se_version}\t~{theiaprok_illumina_se_analysis_date}\t~{seq_platform}\t~{num_reads_raw1}\t~{num_reads_raw2}\t~{num_reads_raw_pairs}\t~{fastq_scan_version}\t~{num_reads_clean1}\t~{num_reads_clean2}\t~{num_reads_clean_pairs}\t~{trimmomatic_version}\t~{bbduk_docker}\t~{r1_mean_q}\t~{r2_mean_q}\t~{assembly_fasta}\t~{contigs_gfa}\t~{shovill_pe_version}\t~{shovill_se_version}\t~{quast_report}\t~{quast_version}\t~{genome_length}\t~{number_contigs}\t~{n50_value}\t~{cg_pipeline_report}\t~{cg_pipeline_docker}\t~{est_coverage}\t~{gambit_report}\t~{gambit_predicted_taxon}\t~{gambit_predicted_taxon_rank}\t~{gambit_closest_genomes}\t~{gambit_version}\t~{gambit_db_version}\t~{gambit_docker}\t~{busco_version}\t~{busco_database}\t~{busco_results}\t~{busco_report}\t~{ts_mlst_results}\t~{ts_mlst_predicted_st}\t~{ts_mlst_pubmlst_scheme}\t~{ts_mlst_version}\t~{serotypefinder_report}\t~{serotypefinder_docker}\t~{serotypefinder_serotype}\t~{ectyper_results}\t~{ectyper_version}\t~{ectyper_predicted_serotype}\t~{lissero_results}\t~{lissero_version}\t~{lissero_serotype}\t~{sistr_results}\t~{sistr_allele_json}\t~{sister_allele_fasta}\t~{sistr_cgmlst}\t~{sistr_version}\t~{sistr_predicted_serotype}\t~{seqsero2_report}\t~{seqsero2_version}\t~{seqsero2_predicted_antigenic_profile}\t~{seqsero2_predicted_serotype}\t~{seqsero2_predicted_contamination}\t~{kleborate_output_file}\t~{kleborate_version}\t~{kleborate_docker}\t~{kleborate_key_resistance_genes}\t~{kleborate_genomic_resistance_mutations}\t~{kleborate_mlst_sequence_type}\t~{kleborate_klocus}\t~{kleborate_ktype}\t~{kleborate_olocus}\t~{kleborate_otype}\t~{kleborate_klocus_confidence}\t~{kleborate_olocus_confidence}\t~{kaptive_version}\t~{kaptive_output_file_k}\t~{kaptive_output_file_oc}\t~{kaptive_k_locus}\t~{kaptive_k_type}\t~{kaptive_kl_confidence}\t~{kaptive_oc_locus}\t~{kaptive_ocl_confidence}\t~{abricate_abaum_plasmid_tsv}\t~{abricate_abaum_plasmid_type_genes}\t~{abricate_database}\t~{abricate_version}\t~{abricate_docker}\t~{legsta_results}\t~{legsta_predicted_sbt}\t~{legsta_version}\t~{tbprofiler_output_file}\t~{tbprofiler_output_bam}\t~{tbprofiler_output_bai}\t~{tbprofiler_version}\t~{tbprofiler_main_lineage}\t~{tbprofiler_sub_lineage}\t~{tbprofiler_dr_type}\t~{tbprofiler_resistance_genes}\t~{amrfinderplus_all_report}\t~{amrfinderplus_amr_report}\t~{amrfinderplus_stress_report}\t~{amrfinderplus_virulence_report}\t~{amrfinderplus_version}\t~{amrfinderplus_db_version}\t~{amrfinderplus_amr_genes}\t~{amrfinderplus_stress_genes}\t~{amrfinderplus_virulence_genes}\t~{amrfinderplus_amr_classes}\t~{amrfinderplus_amr_subclasses}\t~{genotyphi_report_tsv}\t~{genotyphi_mykrobe_json}\t~{genotyphi_version}\t~{genotyphi_species}\t~{genotyphi_st_probes_percent_coverage}\t~{genotyphi_final_genotype}\t~{genotyphi_genotype_confidence}\t~{ani_highest_percent}\t~{ani_highest_percent_bases_aligned}\t~{ani_output_tsv}\t~{ani_top_species_match}\t~{ani_mummer_version}\t~{resfinder_pheno_table}\t~{resfinder_pheno_table_species}\t~{resfinder_seqs}\t~{resfinder_results}\t~{resfinder_pointfinder_pheno_table}\t~{resfinder_pointfinder_results}\t~{resfinder_db_version}\t~{resfinder_docker}\t~{prokka_gff}\t~{prokka_gbk}\t~{prokka_sqn}\t~{plasmidfinder_plasmids}\t~{plasmidfinder_results}\t~{plasmidfinder_seqs}\t~{plasmidfinder_docker}\t~{plasmidfinder_db_version}\t~{pbptyper_predicted_1A_2B_2X}\t~{pbptyper_pbptype_predicted_tsv}\t~{pbptyper_version}\t~{pbptyper_docker}\t~{poppunk_gps_cluster}\t~{poppunk_gps_external_cluster_csv}\t~{poppunk_GPS_db_version}\t~{poppunk_version}\t~{poppunk_docker}\t~{seroba_version}\t~{seroba_docker}\t~{seroba_serotype}\t~{seroba_ariba_serotype}\t~{seroba_ariba_identity}\t~{seroba_details}\t~{midas_docker}\t~{midas_report}\t~{midas_primary_genus}\t~{midas_secondary_genus}\t~{midas_secondary_genus_coverage}\t~{snippy_outputs}" >> ~{samplename}_terra_table.tsv + echo -e "~{samplename}\t~{reads}\t~{read1}\t~{read2}\t~{read1_clean}\t~{read2_clean}\t~{run_id}\t~{collection_date}\t~{originating_lab}\t~{city}\t~{county}\t~{zip}\t~{theiaprok_illumina_pe_version}\t~{theiaprok_illumina_pe_analysis_date}\t~{theiaprok_illumina_se_version}\t~{theiaprok_illumina_se_analysis_date}\t~{seq_platform}\t~{num_reads_raw1}\t~{num_reads_raw2}\t~{num_reads_raw_pairs}\t~{fastq_scan_version}\t~{num_reads_clean1}\t~{num_reads_clean2}\t~{num_reads_clean_pairs}\t~{trimmomatic_version}\t~{bbduk_docker}\t~{r1_mean_q}\t~{r2_mean_q}\t~{assembly_fasta}\t~{contigs_gfa}\t~{shovill_pe_version}\t~{shovill_se_version}\t~{quast_report}\t~{quast_version}\t~{genome_length}\t~{number_contigs}\t~{n50_value}\t~{cg_pipeline_report}\t~{cg_pipeline_docker}\t~{est_coverage}\t~{gambit_report}\t~{gambit_predicted_taxon}\t~{gambit_predicted_taxon_rank}\t~{gambit_closest_genomes}\t~{gambit_version}\t~{gambit_db_version}\t~{gambit_docker}\t~{busco_version}\t~{busco_database}\t~{busco_results}\t~{busco_report}\t~{ts_mlst_results}\t~{ts_mlst_predicted_st}\t~{ts_mlst_pubmlst_scheme}\t~{ts_mlst_version}\t~{serotypefinder_report}\t~{serotypefinder_docker}\t~{serotypefinder_serotype}\t~{ectyper_results}\t~{ectyper_version}\t~{ectyper_predicted_serotype}\t~{lissero_results}\t~{lissero_version}\t~{lissero_serotype}\t~{sistr_results}\t~{sistr_allele_json}\t~{sister_allele_fasta}\t~{sistr_cgmlst}\t~{sistr_version}\t~{sistr_predicted_serotype}\t~{seqsero2_report}\t~{seqsero2_version}\t~{seqsero2_predicted_antigenic_profile}\t~{seqsero2_predicted_serotype}\t~{seqsero2_predicted_contamination}\t~{kleborate_output_file}\t~{kleborate_version}\t~{kleborate_docker}\t~{kleborate_key_resistance_genes}\t~{kleborate_genomic_resistance_mutations}\t~{kleborate_mlst_sequence_type}\t~{kleborate_klocus}\t~{kleborate_ktype}\t~{kleborate_olocus}\t~{kleborate_otype}\t~{kleborate_klocus_confidence}\t~{kleborate_olocus_confidence}\t~{kaptive_version}\t~{kaptive_output_file_k}\t~{kaptive_output_file_oc}\t~{kaptive_k_locus}\t~{kaptive_k_type}\t~{kaptive_kl_confidence}\t~{kaptive_oc_locus}\t~{kaptive_ocl_confidence}\t~{abricate_abaum_plasmid_tsv}\t~{abricate_abaum_plasmid_type_genes}\t~{abricate_database}\t~{abricate_version}\t~{abricate_docker}\t~{legsta_results}\t~{legsta_predicted_sbt}\t~{legsta_version}\t~{tbprofiler_output_file}\t~{tbprofiler_output_bam}\t~{tbprofiler_output_bai}\t~{tbprofiler_version}\t~{tbprofiler_main_lineage}\t~{tbprofiler_sub_lineage}\t~{tbprofiler_dr_type}\t~{tbprofiler_resistance_genes}\t~{amrfinderplus_all_report}\t~{amrfinderplus_amr_report}\t~{amrfinderplus_stress_report}\t~{amrfinderplus_virulence_report}\t~{amrfinderplus_version}\t~{amrfinderplus_db_version}\t~{amrfinderplus_amr_genes}\t~{amrfinderplus_stress_genes}\t~{amrfinderplus_virulence_genes}\t~{amrfinderplus_amr_classes}\t~{amrfinderplus_amr_subclasses}\t~{genotyphi_report_tsv}\t~{genotyphi_mykrobe_json}\t~{genotyphi_version}\t~{genotyphi_species}\t~{genotyphi_st_probes_percent_coverage}\t~{genotyphi_final_genotype}\t~{genotyphi_genotype_confidence}\t~{ani_highest_percent}\t~{ani_highest_percent_bases_aligned}\t~{ani_output_tsv}\t~{ani_top_species_match}\t~{ani_mummer_version}\t~{resfinder_pheno_table}\t~{resfinder_pheno_table_species}\t~{resfinder_seqs}\t~{resfinder_results}\t~{resfinder_pointfinder_pheno_table}\t~{resfinder_pointfinder_results}\t~{resfinder_db_version}\t~{resfinder_docker}\t~{prokka_gff}\t~{prokka_gbk}\t~{prokka_sqn}\t~{plasmidfinder_plasmids}\t~{plasmidfinder_results}\t~{plasmidfinder_seqs}\t~{plasmidfinder_docker}\t~{plasmidfinder_db_version}\t~{pbptyper_predicted_1A_2B_2X}\t~{pbptyper_pbptype_predicted_tsv}\t~{pbptyper_version}\t~{pbptyper_docker}\t~{poppunk_gps_cluster}\t~{poppunk_gps_external_cluster_csv}\t~{poppunk_GPS_db_version}\t~{poppunk_version}\t~{poppunk_docker}\t~{seroba_version}\t~{seroba_docker}\t~{seroba_serotype}\t~{seroba_ariba_serotype}\t~{seroba_ariba_identity}\t~{seroba_details}\t~{midas_docker}\t~{midas_report}\t~{midas_primary_genus}\t~{midas_secondary_genus}\t~{midas_secondary_genus_coverage}\t~{snippy_outputs}\t~{snippy_output_dirs}" >> ~{samplename}_terra_table.tsv # modify file paths to GCP URIs sed -i 's/\/cromwell_root\//gs:\/\//g' ~{samplename}_terra_table.tsv # export table diff --git a/workflows/wf_snippy_variants.wdl b/workflows/wf_snippy_variants.wdl index 16b98fcc5..d8a1f9702 100644 --- a/workflows/wf_snippy_variants.wdl +++ b/workflows/wf_snippy_variants.wdl @@ -25,6 +25,7 @@ workflow snippy_variants_wf { } output { Array[File] snippy_outputs = snippy_variants.snippy_outputs + Array[File] snippy_output_dirs = snippy_variants.snippy_output_dirs String snippy_variants_wf_version = version_capture.phbg_version String snippy_version = snippy_variants.snippy_variants_version String snippy_variant_query = snippy_variants.snippy_variants_query From 9ef69d72d915163265d37bb00093dbaabc9b713d Mon Sep 17 00:00:00 2001 From: frankambrosio3 Date: Thu, 8 Dec 2022 17:35:09 +0000 Subject: [PATCH 07/33] compress output dir from snippy --- tasks/gene_typing/task_snippy_variants.wdl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tasks/gene_typing/task_snippy_variants.wdl b/tasks/gene_typing/task_snippy_variants.wdl index 128e8e5e6..1d7294f22 100644 --- a/tasks/gene_typing/task_snippy_variants.wdl +++ b/tasks/gene_typing/task_snippy_variants.wdl @@ -8,8 +8,8 @@ task snippy_variants { String? query_gene String samplename String docker = "staphb/snippy:4.6.0" - Int cpus = 4 - Int memory = 16 + Int cpus = 8 + Int memory = 32 # Paramters # --map_qual: Minimum read mapping quality to consider (default '60') # --base_quality: Minimum base quality to consider (default '13') @@ -70,14 +70,17 @@ task snippy_variants { else echo "${no_hit}" > SNIPPY_VARIANT_HITS fi + # Compress output dir + tar -cvzf "./~{samplename}_snippy_outdir.tar" "./~{samplename}/*" >>> output { String snippy_variants_version = read_string("VERSION") String snippy_variants_query = "~{query_gene}" String snippy_variants_hits = read_string("SNIPPY_VARIANT_HITS") + File snippy_outdir_tarball = "./~{samplename}_snippy_outdir.tar" File snippy_variants_gene_query_results = "./gene_query.csv" Array[File] snippy_output_dirs = glob("../~{samplename}") - Array[File] snippy_outputs = glob("~{samplename}/~{samplename}*") + Array[File] snippy_outputs = glob("~{samplename}/~{samplename}*") File snippy_variants_results = "~{samplename}/~{samplename}.csv" File snippy_variants_bam = "~{samplename}/~{samplename}.bam" File snippy_variants_bai ="~{samplename}/~{samplename}.bam.bai" From 53b21bddca5ec72e1ca9d6e6ab0837078423666c Mon Sep 17 00:00:00 2001 From: frankambrosio3 Date: Thu, 8 Dec 2022 17:35:32 +0000 Subject: [PATCH 08/33] added snippyoutput dir to wf --- workflows/wf_snippy_variants.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/workflows/wf_snippy_variants.wdl b/workflows/wf_snippy_variants.wdl index d8a1f9702..d2a4757f3 100644 --- a/workflows/wf_snippy_variants.wdl +++ b/workflows/wf_snippy_variants.wdl @@ -24,8 +24,8 @@ workflow snippy_variants_wf { input: } output { - Array[File] snippy_outputs = snippy_variants.snippy_outputs - Array[File] snippy_output_dirs = snippy_variants.snippy_output_dirs + # Array[File] snippy_outputs = snippy_variants.snippy_outputs + # Array[File] snippy_output_dirs = snippy_variants.snippy_output_dirs String snippy_variants_wf_version = version_capture.phbg_version String snippy_version = snippy_variants.snippy_variants_version String snippy_variant_query = snippy_variants.snippy_variants_query @@ -35,5 +35,6 @@ workflow snippy_variants_wf { File snippy_bam = snippy_variants.snippy_variants_bam File snippy_bai = snippy_variants.snippy_variants_bai File snippy_variants_summary = snippy_variants.snippy_variants_summary + File snippy_outdir_tarball = snippy_variants.snippy_outdir_tarball } } From 3d7f634ba75815504ff0ec22f12429ef38f93611 Mon Sep 17 00:00:00 2001 From: frankambrosio3 Date: Thu, 8 Dec 2022 17:50:30 +0000 Subject: [PATCH 09/33] for loop added to untar and make input list --- tasks/phylogenetic_inference/task_snippy_core.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tasks/phylogenetic_inference/task_snippy_core.wdl b/tasks/phylogenetic_inference/task_snippy_core.wdl index 4c32afa12..65c96942d 100644 --- a/tasks/phylogenetic_inference/task_snippy_core.wdl +++ b/tasks/phylogenetic_inference/task_snippy_core.wdl @@ -2,7 +2,7 @@ version 1.0 task snippy_core { input { - Array[File] snippy_outputs + Array[File] snippy_output_tarball_array String treename String docker = "staphb/snippy:4.6.0" File reference @@ -12,6 +12,10 @@ task snippy_core { # version control snippy --version | head -1 | tee VERSION + tarball_array=(~{sep=" " snippy_output_tarball_array}) + + # iteratively untar and create list for input to snippy core + for i in ${tarball_array[@]}; do echo $i | cut -d "." -f1 >> ; do tar -xf $i; done # run snippy core snippy-core \ --prefix ~{treename} \ From 5321cb4fab5c5588a6dbfdb4f6eb62a77d640ab8 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 13 Dec 2022 10:34:33 +0000 Subject: [PATCH 10/33] fixed an error in the tarball file creation --- tasks/gene_typing/task_snippy_variants.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/gene_typing/task_snippy_variants.wdl b/tasks/gene_typing/task_snippy_variants.wdl index 1d7294f22..d3abb4cb8 100644 --- a/tasks/gene_typing/task_snippy_variants.wdl +++ b/tasks/gene_typing/task_snippy_variants.wdl @@ -71,7 +71,7 @@ task snippy_variants { echo "${no_hit}" > SNIPPY_VARIANT_HITS fi # Compress output dir - tar -cvzf "./~{samplename}_snippy_outdir.tar" "./~{samplename}/*" + tar -cvzf "./~{samplename}_snippy_outdir.tar" "./~{samplename}" >>> output { String snippy_variants_version = read_string("VERSION") @@ -79,8 +79,8 @@ task snippy_variants { String snippy_variants_hits = read_string("SNIPPY_VARIANT_HITS") File snippy_outdir_tarball = "./~{samplename}_snippy_outdir.tar" File snippy_variants_gene_query_results = "./gene_query.csv" - Array[File] snippy_output_dirs = glob("../~{samplename}") - Array[File] snippy_outputs = glob("~{samplename}/~{samplename}*") + #Array[File] snippy_output_dirs = glob("../~{samplename}") + #Array[File] snippy_outputs = glob("~{samplename}/~{samplename}*") File snippy_variants_results = "~{samplename}/~{samplename}.csv" File snippy_variants_bam = "~{samplename}/~{samplename}.bam" File snippy_variants_bai ="~{samplename}/~{samplename}.bam.bai" From f0c92c9d8e6cb07f11aa59e76de2111798effe88 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 13 Dec 2022 21:10:27 +0000 Subject: [PATCH 11/33] take in an array or tar files from snippy_variants, untar, then run snippy-core and snippy-clean --- tasks/phylogenetic_inference/task_snippy_core.wdl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tasks/phylogenetic_inference/task_snippy_core.wdl b/tasks/phylogenetic_inference/task_snippy_core.wdl index 65c96942d..c60a5fdd3 100644 --- a/tasks/phylogenetic_inference/task_snippy_core.wdl +++ b/tasks/phylogenetic_inference/task_snippy_core.wdl @@ -3,6 +3,7 @@ version 1.0 task snippy_core { input { Array[File] snippy_output_tarball_array + Array[String] samplenames String treename String docker = "staphb/snippy:4.6.0" File reference @@ -13,15 +14,18 @@ task snippy_core { snippy --version | head -1 | tee VERSION tarball_array=(~{sep=" " snippy_output_tarball_array}) + samplename_array=(~{sep=" " samplenames}) + + # iteratively untar + #for i in ${tarball_array[@]}; do tar -xf $i && mv ./${i/_snippy_outdir.tar/} $i"_inputdir"; done + for i in ${tarball_array[@]}; do tar -xf $i; done - # iteratively untar and create list for input to snippy core - for i in ${tarball_array[@]}; do echo $i | cut -d "." -f1 >> ; do tar -xf $i; done # run snippy core snippy-core \ --prefix ~{treename} \ ~{'--mask ' + bed_file} \ --ref ~{reference} \ - ~{sep=" " snippy_outputs} + "${samplename_array[@]}" # run snippy clean snippy-clean_full_aln \ @@ -29,9 +33,10 @@ task snippy_core { >>> output { + String snippy_variants_version = read_string("VERSION") File snippy_core_alignment = "~{treename}.aln" File snippy_full_alignment = "~{treename}.full.aln" - File snippy_full_alignment_clean = "~{treename}.full.aln" + File snippy_full_alignment_clean = "~{treename}_snippy_clean_full.aln" File snippy_ref = "~{treename}.ref.fa" File snippy_core_tab = "~{treename}.tab" File snippy_txt = "~{treename}.txt" From 6c4fd203bd3b8ab36ff91737002ca98bcd557eef Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 15 Dec 2022 09:05:05 +0000 Subject: [PATCH 12/33] adding quotation around a variable --- tasks/phylogenetic_inference/task_iqtree.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/phylogenetic_inference/task_iqtree.wdl b/tasks/phylogenetic_inference/task_iqtree.wdl index a0eb8d4bb..a0d816d28 100644 --- a/tasks/phylogenetic_inference/task_iqtree.wdl +++ b/tasks/phylogenetic_inference/task_iqtree.wdl @@ -16,7 +16,7 @@ task iqtree { iqtree --version | grep version | sed 's/.*version/version/;s/ for Linux.*//' | tee VERSION numGenomes=`grep -o '>' ~{alignment} | wc -l` - if [ $numGenomes -gt 3 ] + if [ "$numGenomes" -gt 3 ] then cp ~{alignment} ./msa.fasta iqtree \ From aaa1d75a260c00d9724359034e7dd7c48a5aac3b Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 15 Dec 2022 09:05:39 +0000 Subject: [PATCH 13/33] A snippy tree workflow that calls iqtree and snp_dists tasks --- workflows/wf_snippy_tree.wdl | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 workflows/wf_snippy_tree.wdl diff --git a/workflows/wf_snippy_tree.wdl b/workflows/wf_snippy_tree.wdl new file mode 100644 index 000000000..5176e8833 --- /dev/null +++ b/workflows/wf_snippy_tree.wdl @@ -0,0 +1,32 @@ +version 1.0 + +import "../tasks/phylogenetic_inference/task_iqtree.wdl" as iq_tree +import "../tasks/phylogenetic_inference/task_snp_dists.wdl" as snpdists + +workflow snippy_tree_wf { + meta { + description: "Perform phylogenetic tree inference using iqtree (default) or snp-dist" + } + input { + File alignment + String cluster_name + + } + call iq_tree.iqtree { + input: + alignment = alignment, + cluster_name = cluster_name + } + call snpdists.snp_dists{ + input: + alignment = alignment, + cluster_name = cluster_name + } + output { + File snippy_iqtree = iqtree.ml_tree + String snippy_iqtree_version = iqtree.version + String snippy_snpdists_version = snp_dists.version + File snippy_snpdists_matrix = snp_dists.snp_matrix + File snippy_snpdists_molten_ordered = snp_dists.snp_dists_molten_ordered + } +} \ No newline at end of file From d44d5221115be52461bc37775b0dcc0756fc0c43 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 15 Dec 2022 13:00:45 +0000 Subject: [PATCH 14/33] variable name updates "variant" to "variants" --- workflows/wf_snippy_variants.wdl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/workflows/wf_snippy_variants.wdl b/workflows/wf_snippy_variants.wdl index d2a4757f3..7c0a217ba 100644 --- a/workflows/wf_snippy_variants.wdl +++ b/workflows/wf_snippy_variants.wdl @@ -24,13 +24,11 @@ workflow snippy_variants_wf { input: } output { - # Array[File] snippy_outputs = snippy_variants.snippy_outputs - # Array[File] snippy_output_dirs = snippy_variants.snippy_output_dirs String snippy_variants_wf_version = version_capture.phbg_version String snippy_version = snippy_variants.snippy_variants_version - String snippy_variant_query = snippy_variants.snippy_variants_query - String snippy_variant_hits = snippy_variants.snippy_variants_hits - File snippy_variant_gene_query_results = snippy_variants.snippy_variants_gene_query_results + String snippy_variants_query = snippy_variants.snippy_variants_query + String snippy_variants_hits = snippy_variants.snippy_variants_hits + File snippy_variants_gene_query_results = snippy_variants.snippy_variants_gene_query_results File snippy_results = snippy_variants.snippy_variants_results File snippy_bam = snippy_variants.snippy_variants_bam File snippy_bai = snippy_variants.snippy_variants_bai From cb5e72953ef1bc384a7bf3cc9bb797c9793dc275 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 15 Dec 2022 13:01:39 +0000 Subject: [PATCH 15/33] removal of nolonger needed and commented out code blocks --- tasks/gene_typing/task_snippy_variants.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/tasks/gene_typing/task_snippy_variants.wdl b/tasks/gene_typing/task_snippy_variants.wdl index d3abb4cb8..01401ce5c 100644 --- a/tasks/gene_typing/task_snippy_variants.wdl +++ b/tasks/gene_typing/task_snippy_variants.wdl @@ -79,8 +79,6 @@ task snippy_variants { String snippy_variants_hits = read_string("SNIPPY_VARIANT_HITS") File snippy_outdir_tarball = "./~{samplename}_snippy_outdir.tar" File snippy_variants_gene_query_results = "./gene_query.csv" - #Array[File] snippy_output_dirs = glob("../~{samplename}") - #Array[File] snippy_outputs = glob("~{samplename}/~{samplename}*") File snippy_variants_results = "~{samplename}/~{samplename}.csv" File snippy_variants_bam = "~{samplename}/~{samplename}.bam" File snippy_variants_bai ="~{samplename}/~{samplename}.bam.bai" From 9caccc7983cfa53284d67eec0cd4975f9cb48011 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 15 Dec 2022 13:02:48 +0000 Subject: [PATCH 16/33] variable name changes "treename" to "tree_name" --- .../task_snippy_core.wdl | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tasks/phylogenetic_inference/task_snippy_core.wdl b/tasks/phylogenetic_inference/task_snippy_core.wdl index c60a5fdd3..47193b640 100644 --- a/tasks/phylogenetic_inference/task_snippy_core.wdl +++ b/tasks/phylogenetic_inference/task_snippy_core.wdl @@ -4,7 +4,7 @@ task snippy_core { input { Array[File] snippy_output_tarball_array Array[String] samplenames - String treename + String tree_name String docker = "staphb/snippy:4.6.0" File reference File? bed_file @@ -22,25 +22,25 @@ task snippy_core { # run snippy core snippy-core \ - --prefix ~{treename} \ + --prefix ~{tree_name} \ ~{'--mask ' + bed_file} \ --ref ~{reference} \ "${samplename_array[@]}" # run snippy clean snippy-clean_full_aln \ - ~{treename}.full.aln > ~{treename}_snippy_clean_full.aln + ~{tree_name}.full.aln > ~{tree_name}_snippy_clean_full.aln >>> output { - String snippy_variants_version = read_string("VERSION") - File snippy_core_alignment = "~{treename}.aln" - File snippy_full_alignment = "~{treename}.full.aln" - File snippy_full_alignment_clean = "~{treename}_snippy_clean_full.aln" - File snippy_ref = "~{treename}.ref.fa" - File snippy_core_tab = "~{treename}.tab" - File snippy_txt = "~{treename}.txt" - File snippy_vcf = "~{treename}.vcf" + String snippy_version = read_string("VERSION") + File snippy_core_alignment = "~{tree_name}.aln" + File snippy_full_alignment = "~{tree_name}.full.aln" + File snippy_full_alignment_clean = "~{tree_name}_snippy_clean_full.aln" + File snippy_ref = "~{tree_name}.ref.fa" + File snippy_core_tab = "~{tree_name}.tab" + File snippy_txt = "~{tree_name}.txt" + File snippy_vcf = "~{tree_name}.vcf" String snippy_docker_image = docker } runtime { From ee507d5039c9f5ce3ac2996e09917efd87bf2409 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 15 Dec 2022 13:03:35 +0000 Subject: [PATCH 17/33] added snippy_core task and versioning task --- workflows/wf_snippy_tree.wdl | 54 +++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/workflows/wf_snippy_tree.wdl b/workflows/wf_snippy_tree.wdl index 5176e8833..9f5bf4357 100644 --- a/workflows/wf_snippy_tree.wdl +++ b/workflows/wf_snippy_tree.wdl @@ -1,32 +1,54 @@ version 1.0 -import "../tasks/phylogenetic_inference/task_iqtree.wdl" as iq_tree -import "../tasks/phylogenetic_inference/task_snp_dists.wdl" as snpdists +import "../tasks/phylogenetic_inference/task_snippy_core.wdl" as snippy_core +import "../tasks/phylogenetic_inference/task_iqtree.wdl" as iqtree +import "../tasks/phylogenetic_inference/task_snp_dists.wdl" as snp_dists +import "../tasks/task_versioning.wdl" as versioning workflow snippy_tree_wf { meta { description: "Perform phylogenetic tree inference using iqtree (default) or snp-dist" } input { - File alignment - String cluster_name - + String tree_name + Array[File] snippy_output_tarball_array + Array[String] samplenames + File reference + } + call snippy_core.snippy_core { + input: + snippy_output_tarball_array = snippy_output_tarball_array, + samplenames = samplenames, + reference = reference, + tree_name = tree_name + } + call iqtree.iqtree { + input: + alignment = snippy_core.snippy_full_alignment_clean, + cluster_name = tree_name } - call iq_tree.iqtree { + call snp_dists.snp_dists{ input: - alignment = alignment, - cluster_name = cluster_name + alignment = snippy_core.snippy_full_alignment_clean, + cluster_name = tree_name } - call snpdists.snp_dists{ + call versioning.version_capture{ input: - alignment = alignment, - cluster_name = cluster_name } output { - File snippy_iqtree = iqtree.ml_tree - String snippy_iqtree_version = iqtree.version - String snippy_snpdists_version = snp_dists.version - File snippy_snpdists_matrix = snp_dists.snp_matrix - File snippy_snpdists_molten_ordered = snp_dists.snp_dists_molten_ordered + String snippy_tree_version = version_capture.phbg_version + String snippy_tree_snippy_version = snippy_core.snippy_version + File snippy_tree_alignment = snippy_core.snippy_core_alignment + File snippy_tree_full_alignment = snippy_core.snippy_full_alignment + File snippy_tree_full_alignment_clean = snippy_core.snippy_full_alignment_clean + File snippy_tree_ref = snippy_core.snippy_ref + File snippy_tree_tab = snippy_core.snippy_core_tab + File snippy_tree_txt = snippy_core.snippy_txt + File snippy_tree_vcf = snippy_core.snippy_vcf + File snippy_tree_iqtree = iqtree.ml_tree + String snippy_tree_iqtree_version = iqtree.version + String snippy_tree_snpdists_version = snp_dists.version + File snippy_tree_snpdists_matrix = snp_dists.snp_matrix + File snippy_tree_snpdists_molten_ordered = snp_dists.snp_dists_molten_ordered } } \ No newline at end of file From 137b5b290b88cbb4f679cab763971e894aafc15d Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 15 Dec 2022 14:02:00 +0000 Subject: [PATCH 18/33] adding snippt tree wf to dockstore --- .dockstore.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.dockstore.yml b/.dockstore.yml index 30eee24be..3bc33f593 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -13,5 +13,10 @@ workflows: - name: Snippy_Variants subclass: WDL primaryDescriptorPath: /workflows/wf_snippy_variants.wdl + testParameterFiles: + - empty.json +- name: Snippy_Tree + subclass: WDL + primaryDescriptorPath: /workflows/wf_snippy_tree.wdl testParameterFiles: - empty.json \ No newline at end of file From 08e90fe8236f175ad8195903169b229094b1d431 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 15 Dec 2022 14:10:44 +0000 Subject: [PATCH 19/33] typo correction --- .dockstore.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.dockstore.yml b/.dockstore.yml index 3bc33f593..314772792 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -15,7 +15,7 @@ workflows: primaryDescriptorPath: /workflows/wf_snippy_variants.wdl testParameterFiles: - empty.json -- name: Snippy_Tree + - name: Snippy_Tree subclass: WDL primaryDescriptorPath: /workflows/wf_snippy_tree.wdl testParameterFiles: From bcc2bc801540cac6cc474f2c48bac6ddbc5bbe89 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Sun, 18 Dec 2022 07:50:47 +0000 Subject: [PATCH 20/33] changes in variable and output file suffix names --- tasks/gene_typing/task_snippy_variants.wdl | 6 +++--- tasks/phylogenetic_inference/task_iqtree.wdl | 2 +- tasks/phylogenetic_inference/task_snippy_core.wdl | 12 ++++++------ tasks/phylogenetic_inference/task_snp_dists.wdl | 2 +- workflows/wf_snippy_tree.wdl | 14 +++++++------- workflows/wf_snippy_variants.wdl | 2 +- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tasks/gene_typing/task_snippy_variants.wdl b/tasks/gene_typing/task_snippy_variants.wdl index 01401ce5c..08dba3e56 100644 --- a/tasks/gene_typing/task_snippy_variants.wdl +++ b/tasks/gene_typing/task_snippy_variants.wdl @@ -36,7 +36,7 @@ task snippy_variants { if [ -z "~{query_gene}" ]; then no_hit="NA: No query gene was provided" else - no_hit="No variants identified in quieried genes (~{query_gene})" + no_hit="No variants identified in queried genes (~{query_gene})" fi # call snippy snippy \ @@ -71,13 +71,13 @@ task snippy_variants { echo "${no_hit}" > SNIPPY_VARIANT_HITS fi # Compress output dir - tar -cvzf "./~{samplename}_snippy_outdir.tar" "./~{samplename}" + tar -cvzf "./~{samplename}_snippy_variants_outdir.tar" "./~{samplename}" >>> output { String snippy_variants_version = read_string("VERSION") String snippy_variants_query = "~{query_gene}" String snippy_variants_hits = read_string("SNIPPY_VARIANT_HITS") - File snippy_outdir_tarball = "./~{samplename}_snippy_outdir.tar" + File snippy_variants_outdir_tarball = "./~{samplename}_snippy_variants_outdir.tar" File snippy_variants_gene_query_results = "./gene_query.csv" File snippy_variants_results = "~{samplename}/~{samplename}.csv" File snippy_variants_bam = "~{samplename}/~{samplename}.bam" diff --git a/tasks/phylogenetic_inference/task_iqtree.wdl b/tasks/phylogenetic_inference/task_iqtree.wdl index a0d816d28..a3c9135a6 100644 --- a/tasks/phylogenetic_inference/task_iqtree.wdl +++ b/tasks/phylogenetic_inference/task_iqtree.wdl @@ -33,7 +33,7 @@ task iqtree { output { String date = read_string("DATE") String version = read_string("VERSION") - File ml_tree = "~{cluster_name}_msa.tree" + File ml_tree = "~{cluster_name}_iqtree.tree" } runtime { docker: "~{docker}" diff --git a/tasks/phylogenetic_inference/task_snippy_core.wdl b/tasks/phylogenetic_inference/task_snippy_core.wdl index 47193b640..1c380544c 100644 --- a/tasks/phylogenetic_inference/task_snippy_core.wdl +++ b/tasks/phylogenetic_inference/task_snippy_core.wdl @@ -2,7 +2,7 @@ version 1.0 task snippy_core { input { - Array[File] snippy_output_tarball_array + Array[File] snippy_variants_outdir_tarball Array[String] samplenames String tree_name String docker = "staphb/snippy:4.6.0" @@ -13,7 +13,7 @@ task snippy_core { # version control snippy --version | head -1 | tee VERSION - tarball_array=(~{sep=" " snippy_output_tarball_array}) + tarball_array=(~{sep=" " snippy_variants_outdir_tarball}) samplename_array=(~{sep=" " samplenames}) # iteratively untar @@ -34,12 +34,12 @@ task snippy_core { >>> output { String snippy_version = read_string("VERSION") - File snippy_core_alignment = "~{tree_name}.aln" - File snippy_full_alignment = "~{tree_name}.full.aln" + File snippy_core_alignment = "~{tree_name}_core.aln" + File snippy_full_alignment = "~{tree_name}_full.aln" File snippy_full_alignment_clean = "~{tree_name}_snippy_clean_full.aln" File snippy_ref = "~{tree_name}.ref.fa" - File snippy_core_tab = "~{tree_name}.tab" - File snippy_txt = "~{tree_name}.txt" + File snippy_core_tab = "~{tree_name}_all_snps.tsv" + File snippy_txt = "~{tree_name}_snps_summary.txt" File snippy_vcf = "~{tree_name}.vcf" String snippy_docker_image = docker } diff --git a/tasks/phylogenetic_inference/task_snp_dists.wdl b/tasks/phylogenetic_inference/task_snp_dists.wdl index 1e7d22332..a330de91e 100644 --- a/tasks/phylogenetic_inference/task_snp_dists.wdl +++ b/tasks/phylogenetic_inference/task_snp_dists.wdl @@ -100,7 +100,7 @@ task snp_dists { String date = read_string("DATE") String version = read_string("VERSION") File snp_matrix = "${cluster_name}_snp_distance_matrix.tsv" - File snp_dists_molten_ordered = "snp-dists-molten-ordered.tsv" + File snp_dists_molten_ordered = "${cluster_name}_snp-dists_list.tsv" } runtime { docker: "quay.io/staphb/snp-dists:0.8.2" diff --git a/workflows/wf_snippy_tree.wdl b/workflows/wf_snippy_tree.wdl index 9f5bf4357..075bbe0f0 100644 --- a/workflows/wf_snippy_tree.wdl +++ b/workflows/wf_snippy_tree.wdl @@ -11,13 +11,13 @@ workflow snippy_tree_wf { } input { String tree_name - Array[File] snippy_output_tarball_array + Array[File] snippy_variants_outdir_tarball Array[String] samplenames File reference } call snippy_core.snippy_core { input: - snippy_output_tarball_array = snippy_output_tarball_array, + snippy_variants_outdir_tarball = snippy_variants_outdir_tarball, samplenames = samplenames, reference = reference, tree_name = tree_name @@ -38,17 +38,17 @@ workflow snippy_tree_wf { output { String snippy_tree_version = version_capture.phbg_version String snippy_tree_snippy_version = snippy_core.snippy_version - File snippy_tree_alignment = snippy_core.snippy_core_alignment + File snippy_tree_core_alignment = snippy_core.snippy_core_alignment File snippy_tree_full_alignment = snippy_core.snippy_full_alignment - File snippy_tree_full_alignment_clean = snippy_core.snippy_full_alignment_clean + File snippy_tree_clean_full_alignment = snippy_core.snippy_full_alignment_clean File snippy_tree_ref = snippy_core.snippy_ref - File snippy_tree_tab = snippy_core.snippy_core_tab - File snippy_tree_txt = snippy_core.snippy_txt + File snippy_tree_all_snps = snippy_core.snippy_core_tab + File snippy_tree_snps_summary = snippy_core.snippy_txt File snippy_tree_vcf = snippy_core.snippy_vcf File snippy_tree_iqtree = iqtree.ml_tree String snippy_tree_iqtree_version = iqtree.version String snippy_tree_snpdists_version = snp_dists.version File snippy_tree_snpdists_matrix = snp_dists.snp_matrix - File snippy_tree_snpdists_molten_ordered = snp_dists.snp_dists_molten_ordered + File snippy_tree_snpdists_list = snp_dists.snp_dists_molten_ordered } } \ No newline at end of file diff --git a/workflows/wf_snippy_variants.wdl b/workflows/wf_snippy_variants.wdl index 7c0a217ba..71328c3bf 100644 --- a/workflows/wf_snippy_variants.wdl +++ b/workflows/wf_snippy_variants.wdl @@ -33,6 +33,6 @@ workflow snippy_variants_wf { File snippy_bam = snippy_variants.snippy_variants_bam File snippy_bai = snippy_variants.snippy_variants_bai File snippy_variants_summary = snippy_variants.snippy_variants_summary - File snippy_outdir_tarball = snippy_variants.snippy_outdir_tarball + File snippy_variants_outdir_tarball = snippy_variants.snippy_variants_outdir_tarball } } From 667c7d9366fb13e4278316224def1035ec06fa18 Mon Sep 17 00:00:00 2001 From: Emma Doughty Date: Mon, 19 Dec 2022 15:00:42 +0000 Subject: [PATCH 21/33] Update task_snippy_core.wdl Fixed alignment and removed commented-out line --- tasks/phylogenetic_inference/task_snippy_core.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tasks/phylogenetic_inference/task_snippy_core.wdl b/tasks/phylogenetic_inference/task_snippy_core.wdl index 1c380544c..6cb371250 100644 --- a/tasks/phylogenetic_inference/task_snippy_core.wdl +++ b/tasks/phylogenetic_inference/task_snippy_core.wdl @@ -10,14 +10,13 @@ task snippy_core { File? bed_file } command <<< - # version control + # version control snippy --version | head -1 | tee VERSION tarball_array=(~{sep=" " snippy_variants_outdir_tarball}) samplename_array=(~{sep=" " samplenames}) - # iteratively untar - #for i in ${tarball_array[@]}; do tar -xf $i && mv ./${i/_snippy_outdir.tar/} $i"_inputdir"; done + # iteratively untar for i in ${tarball_array[@]}; do tar -xf $i; done # run snippy core From 2195d3979ac9883125cd7864396389b3b77c521a Mon Sep 17 00:00:00 2001 From: jrotieno Date: Mon, 19 Dec 2022 17:39:56 +0000 Subject: [PATCH 22/33] small variable edits --- tasks/utilities/task_broad_terra_tools.wdl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index fdc5afecc..4290ecb44 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -177,8 +177,7 @@ task export_taxon_tables { String? midas_primary_genus String? midas_secondary_genus String? midas_secondary_genus_coverage - Array[File]? snippy_outputs - Array[File]? snippy_output_dirs + Array[File]? snippy_variants_outdir_tarball } command <<< @@ -208,9 +207,9 @@ task export_taxon_tables { if [ ! -z ${sample_table} ]; then # create single-entity sample data table ## header - echo -e "entity:${sample_table}_id\treads\tread1\tread2\tread1_clean\tread2_clean\trun_id\tcollection_date\toriginating_lab\tcity\tcounty\tzip\ttheiaprok_illumina_pe_version\ttheiaprok_illumina_pe_analysis_date\ttheiaprok_illumina_se_version\ttheiaprok_illumina_se_analysis_date\tseq_platform\tnum_reads_raw1\tnum_reads_raw2\tnum_reads_raw_pairs\tfastq_scan_version\tnum_reads_clean1\tnum_reads_clean2\tnum_reads_clean_pairs\ttrimmomatic_version\tbbduk_docker\tr1_mean_q\tr2_mean_q\tassembly_fasta\tcontigs_gfa\tshovill_pe_version\tshovill_se_version\tquast_report\tquast_version\tgenome_length\tnumber_contigs\tn50_value\tcg_pipeline_report\tcg_pipeline_docker\test_coverage\tgambit_report\tgambit_predicted_taxon\tgambit_predicted_taxon_rank\tgambit_closest_genomes\tgambit_version\tgambit_db_version\tgambit_docker\tbusco_version\tbusco_database\tbusco_results\tbusco_report\tts_mlst_results\tts_mlst_predicted_st\tts_mlst_pubmlst_scheme\tts_mlst_version\tserotypefinder_report\tserotypefinder_docker\tserotypefinder_serotype\tectyper_results\tectyper_version\tectyper_predicted_serotype\tlissero_results\tlissero_version\tlissero_serotype\tsistr_results\tsistr_allele_json\tsister_allele_fasta\tsistr_cgmlst\tsistr_version\tsistr_predicted_serotype\tseqsero2_report\tseqsero2_version\tseqsero2_predicted_antigenic_profile\tseqsero2_predicted_serotype\tseqsero2_predicted_contamination\tkleborate_output_file\tkleborate_version\tkleborate_docker\tkleborate_key_resistance_genes\tkleborate_genomic_resistance_mutations\tkleborate_mlst_sequence_type\tkleborate_klocus\tkleborate_ktype\tkleborate_olocus\tkleborate_otype\tkleborate_klocus_confidence\tkleborate_olocus_confidence\tkaptive_version\tkaptive_output_file_k\tkaptive_output_file_oc\tkaptive_k_locus\tkaptive_k_type\tkaptive_kl_confidence\tkaptive_oc_locus\tkaptive_ocl_confidence\tabricate_abaum_plasmid_tsv\tabricate_abaum_plasmid_type_genes\tabricate_database\tabricate_version\tabricate_docker\tlegsta_results\tlegsta_predicted_sbt\tlegsta_version\ttbprofiler_output_file\ttbprofiler_output_bam\ttbprofiler_output_bai\ttbprofiler_version\ttbprofiler_main_lineage\ttbprofiler_sub_lineage\ttbprofiler_dr_type\ttbprofiler_resistance_genes\tamrfinderplus_all_report\tamrfinderplus_amr_report\tamrfinderplus_stress_report\tamrfinderplus_virulence_report\tamrfinderplus_version\tamrfinderplus_db_version\tamrfinderplus_amr_genes\tamrfinderplus_stress_genes\tamrfinderplus_virulence_genes\tamrfinderplus_amr_classes\tamrfinderplus_amr_subclasses\tgenotyphi_report_tsv\tgenotyphi_mykrobe_json\tgenotyphi_version\tgenotyphi_species\tgenotyphi_st_probes_percent_coverage\tgenotyphi_final_genotype\tgenotyphi_genotype_confidence\tani_highest_percent\tani_highest_percent_bases_aligned\tani_output_tsv\tani_top_species_match\tani_mummer_version\tresfinder_pheno_table\tresfinder_pheno_table_species\tresfinder_seqs\tresfinder_results\tresfinder_pointfinder_pheno_table\tresfinder_pointfinder_results\tresfinder_db_version\tresfinder_docker\tprokka_gff\tprokka_gbk\tprokka_sqn\tplasmidfinder_plasmids\tplasmidfinder_results\tplasmidfinder_seqs\tplasmidfinder_docker\tplasmidfinder_db_version\tpbptyper_predicted_1A_2B_2X\tpbptyper_pbptype_predicted_tsv\tpbptyper_version\tpbptyper_docker\tpoppunk_gps_cluster\tpoppunk_gps_external_cluster_csv\tpoppunk_GPS_db_version\tpoppunk_version\tpoppunk_docker\tseroba_version\tseroba_docker\tseroba_serotype\tseroba_ariba_serotype\tseroba_ariba_identity\tseroba_details\tmidas_docker\tmidas_report\tmidas_primary_genus\tmidas_secondary_genus\tmidas_secondary_genus_coverage\tsnippy_outputs\tsnippy_output_dirs" > ~{samplename}_terra_table.tsv + echo -e "entity:${sample_table}_id\treads\tread1\tread2\tread1_clean\tread2_clean\trun_id\tcollection_date\toriginating_lab\tcity\tcounty\tzip\ttheiaprok_illumina_pe_version\ttheiaprok_illumina_pe_analysis_date\ttheiaprok_illumina_se_version\ttheiaprok_illumina_se_analysis_date\tseq_platform\tnum_reads_raw1\tnum_reads_raw2\tnum_reads_raw_pairs\tfastq_scan_version\tnum_reads_clean1\tnum_reads_clean2\tnum_reads_clean_pairs\ttrimmomatic_version\tbbduk_docker\tr1_mean_q\tr2_mean_q\tassembly_fasta\tcontigs_gfa\tshovill_pe_version\tshovill_se_version\tquast_report\tquast_version\tgenome_length\tnumber_contigs\tn50_value\tcg_pipeline_report\tcg_pipeline_docker\test_coverage\tgambit_report\tgambit_predicted_taxon\tgambit_predicted_taxon_rank\tgambit_closest_genomes\tgambit_version\tgambit_db_version\tgambit_docker\tbusco_version\tbusco_database\tbusco_results\tbusco_report\tts_mlst_results\tts_mlst_predicted_st\tts_mlst_pubmlst_scheme\tts_mlst_version\tserotypefinder_report\tserotypefinder_docker\tserotypefinder_serotype\tectyper_results\tectyper_version\tectyper_predicted_serotype\tlissero_results\tlissero_version\tlissero_serotype\tsistr_results\tsistr_allele_json\tsister_allele_fasta\tsistr_cgmlst\tsistr_version\tsistr_predicted_serotype\tseqsero2_report\tseqsero2_version\tseqsero2_predicted_antigenic_profile\tseqsero2_predicted_serotype\tseqsero2_predicted_contamination\tkleborate_output_file\tkleborate_version\tkleborate_docker\tkleborate_key_resistance_genes\tkleborate_genomic_resistance_mutations\tkleborate_mlst_sequence_type\tkleborate_klocus\tkleborate_ktype\tkleborate_olocus\tkleborate_otype\tkleborate_klocus_confidence\tkleborate_olocus_confidence\tkaptive_version\tkaptive_output_file_k\tkaptive_output_file_oc\tkaptive_k_locus\tkaptive_k_type\tkaptive_kl_confidence\tkaptive_oc_locus\tkaptive_ocl_confidence\tabricate_abaum_plasmid_tsv\tabricate_abaum_plasmid_type_genes\tabricate_database\tabricate_version\tabricate_docker\tlegsta_results\tlegsta_predicted_sbt\tlegsta_version\ttbprofiler_output_file\ttbprofiler_output_bam\ttbprofiler_output_bai\ttbprofiler_version\ttbprofiler_main_lineage\ttbprofiler_sub_lineage\ttbprofiler_dr_type\ttbprofiler_resistance_genes\tamrfinderplus_all_report\tamrfinderplus_amr_report\tamrfinderplus_stress_report\tamrfinderplus_virulence_report\tamrfinderplus_version\tamrfinderplus_db_version\tamrfinderplus_amr_genes\tamrfinderplus_stress_genes\tamrfinderplus_virulence_genes\tamrfinderplus_amr_classes\tamrfinderplus_amr_subclasses\tgenotyphi_report_tsv\tgenotyphi_mykrobe_json\tgenotyphi_version\tgenotyphi_species\tgenotyphi_st_probes_percent_coverage\tgenotyphi_final_genotype\tgenotyphi_genotype_confidence\tani_highest_percent\tani_highest_percent_bases_aligned\tani_output_tsv\tani_top_species_match\tani_mummer_version\tresfinder_pheno_table\tresfinder_pheno_table_species\tresfinder_seqs\tresfinder_results\tresfinder_pointfinder_pheno_table\tresfinder_pointfinder_results\tresfinder_db_version\tresfinder_docker\tprokka_gff\tprokka_gbk\tprokka_sqn\tplasmidfinder_plasmids\tplasmidfinder_results\tplasmidfinder_seqs\tplasmidfinder_docker\tplasmidfinder_db_version\tpbptyper_predicted_1A_2B_2X\tpbptyper_pbptype_predicted_tsv\tpbptyper_version\tpbptyper_docker\tpoppunk_gps_cluster\tpoppunk_gps_external_cluster_csv\tpoppunk_GPS_db_version\tpoppunk_version\tpoppunk_docker\tseroba_version\tseroba_docker\tseroba_serotype\tseroba_ariba_serotype\tseroba_ariba_identity\tseroba_details\tmidas_docker\tmidas_report\tmidas_primary_genus\tmidas_secondary_genus\tmidas_secondary_genus_coverage\tsnippy_variants_outdir_tarball" > ~{samplename}_terra_table.tsv ## TheiaProk Outs - echo -e "~{samplename}\t~{reads}\t~{read1}\t~{read2}\t~{read1_clean}\t~{read2_clean}\t~{run_id}\t~{collection_date}\t~{originating_lab}\t~{city}\t~{county}\t~{zip}\t~{theiaprok_illumina_pe_version}\t~{theiaprok_illumina_pe_analysis_date}\t~{theiaprok_illumina_se_version}\t~{theiaprok_illumina_se_analysis_date}\t~{seq_platform}\t~{num_reads_raw1}\t~{num_reads_raw2}\t~{num_reads_raw_pairs}\t~{fastq_scan_version}\t~{num_reads_clean1}\t~{num_reads_clean2}\t~{num_reads_clean_pairs}\t~{trimmomatic_version}\t~{bbduk_docker}\t~{r1_mean_q}\t~{r2_mean_q}\t~{assembly_fasta}\t~{contigs_gfa}\t~{shovill_pe_version}\t~{shovill_se_version}\t~{quast_report}\t~{quast_version}\t~{genome_length}\t~{number_contigs}\t~{n50_value}\t~{cg_pipeline_report}\t~{cg_pipeline_docker}\t~{est_coverage}\t~{gambit_report}\t~{gambit_predicted_taxon}\t~{gambit_predicted_taxon_rank}\t~{gambit_closest_genomes}\t~{gambit_version}\t~{gambit_db_version}\t~{gambit_docker}\t~{busco_version}\t~{busco_database}\t~{busco_results}\t~{busco_report}\t~{ts_mlst_results}\t~{ts_mlst_predicted_st}\t~{ts_mlst_pubmlst_scheme}\t~{ts_mlst_version}\t~{serotypefinder_report}\t~{serotypefinder_docker}\t~{serotypefinder_serotype}\t~{ectyper_results}\t~{ectyper_version}\t~{ectyper_predicted_serotype}\t~{lissero_results}\t~{lissero_version}\t~{lissero_serotype}\t~{sistr_results}\t~{sistr_allele_json}\t~{sister_allele_fasta}\t~{sistr_cgmlst}\t~{sistr_version}\t~{sistr_predicted_serotype}\t~{seqsero2_report}\t~{seqsero2_version}\t~{seqsero2_predicted_antigenic_profile}\t~{seqsero2_predicted_serotype}\t~{seqsero2_predicted_contamination}\t~{kleborate_output_file}\t~{kleborate_version}\t~{kleborate_docker}\t~{kleborate_key_resistance_genes}\t~{kleborate_genomic_resistance_mutations}\t~{kleborate_mlst_sequence_type}\t~{kleborate_klocus}\t~{kleborate_ktype}\t~{kleborate_olocus}\t~{kleborate_otype}\t~{kleborate_klocus_confidence}\t~{kleborate_olocus_confidence}\t~{kaptive_version}\t~{kaptive_output_file_k}\t~{kaptive_output_file_oc}\t~{kaptive_k_locus}\t~{kaptive_k_type}\t~{kaptive_kl_confidence}\t~{kaptive_oc_locus}\t~{kaptive_ocl_confidence}\t~{abricate_abaum_plasmid_tsv}\t~{abricate_abaum_plasmid_type_genes}\t~{abricate_database}\t~{abricate_version}\t~{abricate_docker}\t~{legsta_results}\t~{legsta_predicted_sbt}\t~{legsta_version}\t~{tbprofiler_output_file}\t~{tbprofiler_output_bam}\t~{tbprofiler_output_bai}\t~{tbprofiler_version}\t~{tbprofiler_main_lineage}\t~{tbprofiler_sub_lineage}\t~{tbprofiler_dr_type}\t~{tbprofiler_resistance_genes}\t~{amrfinderplus_all_report}\t~{amrfinderplus_amr_report}\t~{amrfinderplus_stress_report}\t~{amrfinderplus_virulence_report}\t~{amrfinderplus_version}\t~{amrfinderplus_db_version}\t~{amrfinderplus_amr_genes}\t~{amrfinderplus_stress_genes}\t~{amrfinderplus_virulence_genes}\t~{amrfinderplus_amr_classes}\t~{amrfinderplus_amr_subclasses}\t~{genotyphi_report_tsv}\t~{genotyphi_mykrobe_json}\t~{genotyphi_version}\t~{genotyphi_species}\t~{genotyphi_st_probes_percent_coverage}\t~{genotyphi_final_genotype}\t~{genotyphi_genotype_confidence}\t~{ani_highest_percent}\t~{ani_highest_percent_bases_aligned}\t~{ani_output_tsv}\t~{ani_top_species_match}\t~{ani_mummer_version}\t~{resfinder_pheno_table}\t~{resfinder_pheno_table_species}\t~{resfinder_seqs}\t~{resfinder_results}\t~{resfinder_pointfinder_pheno_table}\t~{resfinder_pointfinder_results}\t~{resfinder_db_version}\t~{resfinder_docker}\t~{prokka_gff}\t~{prokka_gbk}\t~{prokka_sqn}\t~{plasmidfinder_plasmids}\t~{plasmidfinder_results}\t~{plasmidfinder_seqs}\t~{plasmidfinder_docker}\t~{plasmidfinder_db_version}\t~{pbptyper_predicted_1A_2B_2X}\t~{pbptyper_pbptype_predicted_tsv}\t~{pbptyper_version}\t~{pbptyper_docker}\t~{poppunk_gps_cluster}\t~{poppunk_gps_external_cluster_csv}\t~{poppunk_GPS_db_version}\t~{poppunk_version}\t~{poppunk_docker}\t~{seroba_version}\t~{seroba_docker}\t~{seroba_serotype}\t~{seroba_ariba_serotype}\t~{seroba_ariba_identity}\t~{seroba_details}\t~{midas_docker}\t~{midas_report}\t~{midas_primary_genus}\t~{midas_secondary_genus}\t~{midas_secondary_genus_coverage}\t~{snippy_outputs}\t~{snippy_output_dirs}" >> ~{samplename}_terra_table.tsv + echo -e "~{samplename}\t~{reads}\t~{read1}\t~{read2}\t~{read1_clean}\t~{read2_clean}\t~{run_id}\t~{collection_date}\t~{originating_lab}\t~{city}\t~{county}\t~{zip}\t~{theiaprok_illumina_pe_version}\t~{theiaprok_illumina_pe_analysis_date}\t~{theiaprok_illumina_se_version}\t~{theiaprok_illumina_se_analysis_date}\t~{seq_platform}\t~{num_reads_raw1}\t~{num_reads_raw2}\t~{num_reads_raw_pairs}\t~{fastq_scan_version}\t~{num_reads_clean1}\t~{num_reads_clean2}\t~{num_reads_clean_pairs}\t~{trimmomatic_version}\t~{bbduk_docker}\t~{r1_mean_q}\t~{r2_mean_q}\t~{assembly_fasta}\t~{contigs_gfa}\t~{shovill_pe_version}\t~{shovill_se_version}\t~{quast_report}\t~{quast_version}\t~{genome_length}\t~{number_contigs}\t~{n50_value}\t~{cg_pipeline_report}\t~{cg_pipeline_docker}\t~{est_coverage}\t~{gambit_report}\t~{gambit_predicted_taxon}\t~{gambit_predicted_taxon_rank}\t~{gambit_closest_genomes}\t~{gambit_version}\t~{gambit_db_version}\t~{gambit_docker}\t~{busco_version}\t~{busco_database}\t~{busco_results}\t~{busco_report}\t~{ts_mlst_results}\t~{ts_mlst_predicted_st}\t~{ts_mlst_pubmlst_scheme}\t~{ts_mlst_version}\t~{serotypefinder_report}\t~{serotypefinder_docker}\t~{serotypefinder_serotype}\t~{ectyper_results}\t~{ectyper_version}\t~{ectyper_predicted_serotype}\t~{lissero_results}\t~{lissero_version}\t~{lissero_serotype}\t~{sistr_results}\t~{sistr_allele_json}\t~{sister_allele_fasta}\t~{sistr_cgmlst}\t~{sistr_version}\t~{sistr_predicted_serotype}\t~{seqsero2_report}\t~{seqsero2_version}\t~{seqsero2_predicted_antigenic_profile}\t~{seqsero2_predicted_serotype}\t~{seqsero2_predicted_contamination}\t~{kleborate_output_file}\t~{kleborate_version}\t~{kleborate_docker}\t~{kleborate_key_resistance_genes}\t~{kleborate_genomic_resistance_mutations}\t~{kleborate_mlst_sequence_type}\t~{kleborate_klocus}\t~{kleborate_ktype}\t~{kleborate_olocus}\t~{kleborate_otype}\t~{kleborate_klocus_confidence}\t~{kleborate_olocus_confidence}\t~{kaptive_version}\t~{kaptive_output_file_k}\t~{kaptive_output_file_oc}\t~{kaptive_k_locus}\t~{kaptive_k_type}\t~{kaptive_kl_confidence}\t~{kaptive_oc_locus}\t~{kaptive_ocl_confidence}\t~{abricate_abaum_plasmid_tsv}\t~{abricate_abaum_plasmid_type_genes}\t~{abricate_database}\t~{abricate_version}\t~{abricate_docker}\t~{legsta_results}\t~{legsta_predicted_sbt}\t~{legsta_version}\t~{tbprofiler_output_file}\t~{tbprofiler_output_bam}\t~{tbprofiler_output_bai}\t~{tbprofiler_version}\t~{tbprofiler_main_lineage}\t~{tbprofiler_sub_lineage}\t~{tbprofiler_dr_type}\t~{tbprofiler_resistance_genes}\t~{amrfinderplus_all_report}\t~{amrfinderplus_amr_report}\t~{amrfinderplus_stress_report}\t~{amrfinderplus_virulence_report}\t~{amrfinderplus_version}\t~{amrfinderplus_db_version}\t~{amrfinderplus_amr_genes}\t~{amrfinderplus_stress_genes}\t~{amrfinderplus_virulence_genes}\t~{amrfinderplus_amr_classes}\t~{amrfinderplus_amr_subclasses}\t~{genotyphi_report_tsv}\t~{genotyphi_mykrobe_json}\t~{genotyphi_version}\t~{genotyphi_species}\t~{genotyphi_st_probes_percent_coverage}\t~{genotyphi_final_genotype}\t~{genotyphi_genotype_confidence}\t~{ani_highest_percent}\t~{ani_highest_percent_bases_aligned}\t~{ani_output_tsv}\t~{ani_top_species_match}\t~{ani_mummer_version}\t~{resfinder_pheno_table}\t~{resfinder_pheno_table_species}\t~{resfinder_seqs}\t~{resfinder_results}\t~{resfinder_pointfinder_pheno_table}\t~{resfinder_pointfinder_results}\t~{resfinder_db_version}\t~{resfinder_docker}\t~{prokka_gff}\t~{prokka_gbk}\t~{prokka_sqn}\t~{plasmidfinder_plasmids}\t~{plasmidfinder_results}\t~{plasmidfinder_seqs}\t~{plasmidfinder_docker}\t~{plasmidfinder_db_version}\t~{pbptyper_predicted_1A_2B_2X}\t~{pbptyper_pbptype_predicted_tsv}\t~{pbptyper_version}\t~{pbptyper_docker}\t~{poppunk_gps_cluster}\t~{poppunk_gps_external_cluster_csv}\t~{poppunk_GPS_db_version}\t~{poppunk_version}\t~{poppunk_docker}\t~{seroba_version}\t~{seroba_docker}\t~{seroba_serotype}\t~{seroba_ariba_serotype}\t~{seroba_ariba_identity}\t~{seroba_details}\t~{midas_docker}\t~{midas_report}\t~{midas_primary_genus}\t~{midas_secondary_genus}\t~{midas_secondary_genus_coverage}\t~{snippy_variants_outdir_tarball}" >> ~{samplename}_terra_table.tsv # modify file paths to GCP URIs sed -i 's/\/cromwell_root\//gs:\/\//g' ~{samplename}_terra_table.tsv # export table From 2b98047fa23e73fa959ab2d03e7ca1e661037964 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 20 Dec 2022 18:19:46 +0000 Subject: [PATCH 23/33] New gubbins task for phylogenetic tree inference --- tasks/phylogenetic_inference/task_gubbins.wdl | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 tasks/phylogenetic_inference/task_gubbins.wdl diff --git a/tasks/phylogenetic_inference/task_gubbins.wdl b/tasks/phylogenetic_inference/task_gubbins.wdl new file mode 100644 index 000000000..1f437b762 --- /dev/null +++ b/tasks/phylogenetic_inference/task_gubbins.wdl @@ -0,0 +1,36 @@ +version 1.0 + +task gubbins { + input { + File alignment + String cluster_name + String docker = "sangerpathogens/gubbins" + } + command <<< + # date and version control + date | tee DATE + run_gubbins.py --version | tee VERSION + + run_gubbins.py \ + ~{alignment} \ + --prefix ~{cluster_name} \ + --first-tree-builder fasttree + >>> + output { + String date = read_string("DATE") + String version = read_string("VERSION") + File gubbins_final_tree = "~{cluster_name}.final_tree.tre" + File gubbins_final_labelled_tree = "~{cluster_name}.node_labelled.final_tree.tre" + File gubbins_polymorphic_fasta = "~{cluster_name}.filtered_polymorphic_sites.fasta" + File gubbins_recombination_gff = "~{cluster_name}.recombination_predictions.gff" + File gubbins_branch_stats = "~{cluster_name}.per_branch_statistics.csv" + } + runtime { + docker: "~{docker}" + memory: "32 GB" + cpu: 4 + disks: "local-disk 100 SSD" + preemptible: 0 + maxRetries: 3 + } +} From cc5de44595c3b85d5303706efa27afdd95f01361 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 20 Dec 2022 18:20:22 +0000 Subject: [PATCH 24/33] Updated wf with gubbins task --- workflows/wf_snippy_tree.wdl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/workflows/wf_snippy_tree.wdl b/workflows/wf_snippy_tree.wdl index 075bbe0f0..fa49841d7 100644 --- a/workflows/wf_snippy_tree.wdl +++ b/workflows/wf_snippy_tree.wdl @@ -3,6 +3,7 @@ version 1.0 import "../tasks/phylogenetic_inference/task_snippy_core.wdl" as snippy_core import "../tasks/phylogenetic_inference/task_iqtree.wdl" as iqtree import "../tasks/phylogenetic_inference/task_snp_dists.wdl" as snp_dists +import "../tasks/phylogenetic_inference/task_gubbins.wdl" as gubbins import "../tasks/task_versioning.wdl" as versioning workflow snippy_tree_wf { @@ -31,6 +32,11 @@ workflow snippy_tree_wf { input: alignment = snippy_core.snippy_full_alignment_clean, cluster_name = tree_name + } + call gubbins.gubbins { + input: + alignment = snippy_core.snippy_full_alignment_clean, + cluster_name = tree_name } call versioning.version_capture{ input: @@ -50,5 +56,11 @@ workflow snippy_tree_wf { String snippy_tree_snpdists_version = snp_dists.version File snippy_tree_snpdists_matrix = snp_dists.snp_matrix File snippy_tree_snpdists_list = snp_dists.snp_dists_molten_ordered + File snippy_tree_gubbins_tree = gubbins.gubbins_final_tree + File snippy_tree_gubbins_labelled_tree = gubbins.gubbins_final_labelled_tree + File snippy_tree_gubbins_polymorphic_fasta = gubbins.gubbins_polymorphic_fasta + File snippy_tree_gubbins_recombination_gff = gubbins.gubbins_recombination_gff + File snippy_tree_gubbins_branch_stats = gubbins.gubbins_branch_stats + String snippy_tree_gubbins_version = gubbins.version } } \ No newline at end of file From 0579c50c90ce47c7353b07b14d7e921a4023681a Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 20 Dec 2022 18:21:05 +0000 Subject: [PATCH 25/33] output file name formatting --- tasks/phylogenetic_inference/task_iqtree.wdl | 2 +- tasks/phylogenetic_inference/task_snippy_core.wdl | 4 ++++ tasks/phylogenetic_inference/task_snp_dists.wdl | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tasks/phylogenetic_inference/task_iqtree.wdl b/tasks/phylogenetic_inference/task_iqtree.wdl index a3c9135a6..3583d669e 100644 --- a/tasks/phylogenetic_inference/task_iqtree.wdl +++ b/tasks/phylogenetic_inference/task_iqtree.wdl @@ -27,7 +27,7 @@ task iqtree { -alrt ~{alrt} \ ~{iqtree_opts} - cp msa.fasta.contree ~{cluster_name}_msa.tree + cp msa.fasta.contree ~{cluster_name}_iqtree.tree fi >>> output { diff --git a/tasks/phylogenetic_inference/task_snippy_core.wdl b/tasks/phylogenetic_inference/task_snippy_core.wdl index 6cb371250..e0d334f4c 100644 --- a/tasks/phylogenetic_inference/task_snippy_core.wdl +++ b/tasks/phylogenetic_inference/task_snippy_core.wdl @@ -30,6 +30,10 @@ task snippy_core { snippy-clean_full_aln \ ~{tree_name}.full.aln > ~{tree_name}_snippy_clean_full.aln + mv ~{tree_name}.aln ~{tree_name}_core.aln + mv ~{tree_name}.full.aln ~{tree_name}_full.aln + mv ~{tree_name}.tab ~{tree_name}_all_snps.tsv + mv ~{tree_name}.txt ~{tree_name}_snps_summary.txt >>> output { String snippy_version = read_string("VERSION") diff --git a/tasks/phylogenetic_inference/task_snp_dists.wdl b/tasks/phylogenetic_inference/task_snp_dists.wdl index a330de91e..d871a8d10 100644 --- a/tasks/phylogenetic_inference/task_snp_dists.wdl +++ b/tasks/phylogenetic_inference/task_snp_dists.wdl @@ -94,7 +94,9 @@ task snp_dists { z.close() print "Matrix has been created in current directory as '~{cluster_name}_snp_distance_matrix.tsv.'" - CODE + CODE + cp snp-dists-molten-ordered.tsv ~{cluster_name}_snp-dists_list.tsv + cp snp-dists-matrix.tsv ~{cluster_name}_snp_distance_matrix.tsv >>> output { String date = read_string("DATE") From 30de4aede1faf968e23a7d93a1fd39aead0e05b7 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 20 Dec 2022 18:26:19 +0000 Subject: [PATCH 26/33] output file name cleanup --- tasks/phylogenetic_inference/task_iqtree.wdl | 2 +- tasks/phylogenetic_inference/task_snippy_core.wdl | 6 +++++- tasks/phylogenetic_inference/task_snp_dists.wdl | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tasks/phylogenetic_inference/task_iqtree.wdl b/tasks/phylogenetic_inference/task_iqtree.wdl index a3c9135a6..3583d669e 100644 --- a/tasks/phylogenetic_inference/task_iqtree.wdl +++ b/tasks/phylogenetic_inference/task_iqtree.wdl @@ -27,7 +27,7 @@ task iqtree { -alrt ~{alrt} \ ~{iqtree_opts} - cp msa.fasta.contree ~{cluster_name}_msa.tree + cp msa.fasta.contree ~{cluster_name}_iqtree.tree fi >>> output { diff --git a/tasks/phylogenetic_inference/task_snippy_core.wdl b/tasks/phylogenetic_inference/task_snippy_core.wdl index 6cb371250..e0c81a110 100644 --- a/tasks/phylogenetic_inference/task_snippy_core.wdl +++ b/tasks/phylogenetic_inference/task_snippy_core.wdl @@ -30,6 +30,10 @@ task snippy_core { snippy-clean_full_aln \ ~{tree_name}.full.aln > ~{tree_name}_snippy_clean_full.aln + mv ~{tree_name}.aln ~{tree_name}_core.aln + mv ~{tree_name}.full.aln ~{tree_name}_full.aln + mv ~{tree_name}.tab ~{tree_name}_all_snps.tsv + mv ~{tree_name}.txt ~{tree_name}_snps_summary.txt >>> output { String snippy_version = read_string("VERSION") @@ -49,4 +53,4 @@ task snippy_core { disks: "local-disk 100 SSD" preemptible: 0 } -} +} \ No newline at end of file diff --git a/tasks/phylogenetic_inference/task_snp_dists.wdl b/tasks/phylogenetic_inference/task_snp_dists.wdl index a330de91e..d871a8d10 100644 --- a/tasks/phylogenetic_inference/task_snp_dists.wdl +++ b/tasks/phylogenetic_inference/task_snp_dists.wdl @@ -94,7 +94,9 @@ task snp_dists { z.close() print "Matrix has been created in current directory as '~{cluster_name}_snp_distance_matrix.tsv.'" - CODE + CODE + cp snp-dists-molten-ordered.tsv ~{cluster_name}_snp-dists_list.tsv + cp snp-dists-matrix.tsv ~{cluster_name}_snp_distance_matrix.tsv >>> output { String date = read_string("DATE") From d8319f618db02f9354f3ab1137d3a8dd54cf6f4e Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 23 Dec 2022 10:32:31 +0000 Subject: [PATCH 27/33] additional gubbins outputs --- tasks/phylogenetic_inference/task_gubbins.wdl | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/tasks/phylogenetic_inference/task_gubbins.wdl b/tasks/phylogenetic_inference/task_gubbins.wdl index 1f437b762..a414e0ef2 100644 --- a/tasks/phylogenetic_inference/task_gubbins.wdl +++ b/tasks/phylogenetic_inference/task_gubbins.wdl @@ -5,6 +5,18 @@ task gubbins { File alignment String cluster_name String docker = "sangerpathogens/gubbins" + Int? filter_percent = 25 #default is 25% + Int? iterations = 5 + String? first_tree_nuc_subst_model = "GTRGAMMA" + Array[String]? first_tree_args + String? tree_builder = "raxml" + Array[String]? tree_args + String? nuc_subst_model = "GTRGAMMA" + Array[String]? nuc_subst_model_args = ["4"] + String? best_nuc_subst_model + Array[String]? outgroup + Int? bootstrap = 0 + File? dates_file } command <<< # date and version control @@ -14,7 +26,20 @@ task gubbins { run_gubbins.py \ ~{alignment} \ --prefix ~{cluster_name} \ - --first-tree-builder fasttree + --first-tree-builder fasttree \ + --filter-percentage ~{filter_percent} \ + --iterations ~{iterations} \ + --first-model ~{first_tree_nuc_subst_model} \ + ~{'--first-tree-args ' + first_tree_args} \ + --tree-builder ~{tree_builder} \ + ~{'--tree-args ' + tree_args} \ + ~{'--best-model ' + best_nuc_subst_model} \ + --model ~{nuc_subst_model} \ + --model-args ~{sep="," nuc_subst_model_args} \ + --bootstrap ~{bootstrap} \ + ~{'--outgroup ' + outgroup} \ + ~{'--date ' + dates_file} \ + --threads 2 >>> output { String date = read_string("DATE") @@ -24,6 +49,8 @@ task gubbins { File gubbins_polymorphic_fasta = "~{cluster_name}.filtered_polymorphic_sites.fasta" File gubbins_recombination_gff = "~{cluster_name}.recombination_predictions.gff" File gubbins_branch_stats = "~{cluster_name}.per_branch_statistics.csv" + File? gubbins_timetree = "~{cluster_name}.final_tree.timetree.tre" + File? gubbins_timetree_stats = "~{cluster_name}.lsd.out" } runtime { docker: "~{docker}" @@ -31,6 +58,6 @@ task gubbins { cpu: 4 disks: "local-disk 100 SSD" preemptible: 0 - maxRetries: 3 + maxRetries: 1 } } From 9f1f69dd40d675bad037fa55d27ea119b57778a7 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 23 Dec 2022 10:32:57 +0000 Subject: [PATCH 28/33] configured user choice between gubbins and iqtree --- workflows/wf_snippy_tree.wdl | 63 ++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/workflows/wf_snippy_tree.wdl b/workflows/wf_snippy_tree.wdl index fa49841d7..3c5459c01 100644 --- a/workflows/wf_snippy_tree.wdl +++ b/workflows/wf_snippy_tree.wdl @@ -15,6 +15,7 @@ workflow snippy_tree_wf { Array[File] snippy_variants_outdir_tarball Array[String] samplenames File reference + Boolean use_gubbins = false } call snippy_core.snippy_core { input: @@ -23,20 +24,29 @@ workflow snippy_tree_wf { reference = reference, tree_name = tree_name } - call iqtree.iqtree { - input: - alignment = snippy_core.snippy_full_alignment_clean, - cluster_name = tree_name - } - call snp_dists.snp_dists{ - input: - alignment = snippy_core.snippy_full_alignment_clean, - cluster_name = tree_name + if (use_gubbins) { + call gubbins.gubbins { + input: + alignment = snippy_core.snippy_full_alignment_clean, + cluster_name = tree_name + } + call snp_dists.snp_dists as snp_dists_gubbins { + input: + alignment = gubbins.gubbins_polymorphic_fasta, + cluster_name = tree_name + } } - call gubbins.gubbins { - input: - alignment = snippy_core.snippy_full_alignment_clean, - cluster_name = tree_name + if (!use_gubbins) { + call iqtree.iqtree { + input: + alignment = snippy_core.snippy_full_alignment_clean, + cluster_name = tree_name + } + call snp_dists.snp_dists as snp_dists_iqtree { + input: + alignment = snippy_core.snippy_full_alignment_clean, + cluster_name = tree_name + } } call versioning.version_capture{ input: @@ -51,16 +61,21 @@ workflow snippy_tree_wf { File snippy_tree_all_snps = snippy_core.snippy_core_tab File snippy_tree_snps_summary = snippy_core.snippy_txt File snippy_tree_vcf = snippy_core.snippy_vcf - File snippy_tree_iqtree = iqtree.ml_tree - String snippy_tree_iqtree_version = iqtree.version - String snippy_tree_snpdists_version = snp_dists.version - File snippy_tree_snpdists_matrix = snp_dists.snp_matrix - File snippy_tree_snpdists_list = snp_dists.snp_dists_molten_ordered - File snippy_tree_gubbins_tree = gubbins.gubbins_final_tree - File snippy_tree_gubbins_labelled_tree = gubbins.gubbins_final_labelled_tree - File snippy_tree_gubbins_polymorphic_fasta = gubbins.gubbins_polymorphic_fasta - File snippy_tree_gubbins_recombination_gff = gubbins.gubbins_recombination_gff - File snippy_tree_gubbins_branch_stats = gubbins.gubbins_branch_stats - String snippy_tree_gubbins_version = gubbins.version + File? snippy_tree_iqtree = iqtree.ml_tree + String? snippy_tree_iqtree_version = iqtree.version + String? snippy_tree_snpdists_gubbins_version = snp_dists_gubbins.version + File? snippy_tree_snpdists_gubbins_matrix = snp_dists_gubbins.snp_matrix + File? snippy_tree_snpdists_gubbins_list = snp_dists_gubbins.snp_dists_molten_ordered + String? snippy_tree_snpdists_iqtree_version = snp_dists_iqtree.version + File? snippy_tree_snpdists_iqtree_matrix = snp_dists_iqtree.snp_matrix + File? snippy_tree_snpdists_iqtree_list = snp_dists_iqtree.snp_dists_molten_ordered + File? snippy_tree_gubbins_tree = gubbins.gubbins_final_tree + File? snippy_tree_gubbins_labelled_tree = gubbins.gubbins_final_labelled_tree + File? snippy_tree_gubbins_polymorphic_fasta = gubbins.gubbins_polymorphic_fasta + File? snippy_tree_gubbins_recombination_gff = gubbins.gubbins_recombination_gff + File? snippy_tree_gubbins_branch_stats = gubbins.gubbins_branch_stats + String? snippy_tree_gubbins_version = gubbins.version + File? snippy_tree_gubbins_timetree = gubbins.gubbins_timetree + File? snippy_tree_gubbins_timetree_stats = gubbins.gubbins_timetree_stats } } \ No newline at end of file From 3da6b9f65fb169a0e67bbb253002ae38c24736f6 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 23 Dec 2022 12:27:08 +0000 Subject: [PATCH 29/33] updated arguments --- tasks/phylogenetic_inference/task_gubbins.wdl | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/tasks/phylogenetic_inference/task_gubbins.wdl b/tasks/phylogenetic_inference/task_gubbins.wdl index a414e0ef2..5c6da3e7a 100644 --- a/tasks/phylogenetic_inference/task_gubbins.wdl +++ b/tasks/phylogenetic_inference/task_gubbins.wdl @@ -7,14 +7,10 @@ task gubbins { String docker = "sangerpathogens/gubbins" Int? filter_percent = 25 #default is 25% Int? iterations = 5 - String? first_tree_nuc_subst_model = "GTRGAMMA" - Array[String]? first_tree_args String? tree_builder = "raxml" - Array[String]? tree_args - String? nuc_subst_model = "GTRGAMMA" - Array[String]? nuc_subst_model_args = ["4"] - String? best_nuc_subst_model - Array[String]? outgroup + Array[String]? tree_args = [""] + String? nuc_subst_model = "GTRCAT" + Boolean? best_nuc_subst_model = false Int? bootstrap = 0 File? dates_file } @@ -26,18 +22,13 @@ task gubbins { run_gubbins.py \ ~{alignment} \ --prefix ~{cluster_name} \ - --first-tree-builder fasttree \ --filter-percentage ~{filter_percent} \ --iterations ~{iterations} \ - --first-model ~{first_tree_nuc_subst_model} \ - ~{'--first-tree-args ' + first_tree_args} \ --tree-builder ~{tree_builder} \ ~{'--tree-args ' + tree_args} \ - ~{'--best-model ' + best_nuc_subst_model} \ - --model ~{nuc_subst_model} \ - --model-args ~{sep="," nuc_subst_model_args} \ + ~{true="--best-model" false="" best_nuc_subst_model} \ + ~{'--model ' + nuc_subst_model} \ --bootstrap ~{bootstrap} \ - ~{'--outgroup ' + outgroup} \ ~{'--date ' + dates_file} \ --threads 2 >>> From 8167ca17b41c701a26ba27f2390334605c8a62b2 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 23 Dec 2022 12:31:10 +0000 Subject: [PATCH 30/33] variable updates --- tasks/phylogenetic_inference/task_gubbins.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/phylogenetic_inference/task_gubbins.wdl b/tasks/phylogenetic_inference/task_gubbins.wdl index 5c6da3e7a..85812d479 100644 --- a/tasks/phylogenetic_inference/task_gubbins.wdl +++ b/tasks/phylogenetic_inference/task_gubbins.wdl @@ -8,7 +8,7 @@ task gubbins { Int? filter_percent = 25 #default is 25% Int? iterations = 5 String? tree_builder = "raxml" - Array[String]? tree_args = [""] + String? tree_args = [""] String? nuc_subst_model = "GTRCAT" Boolean? best_nuc_subst_model = false Int? bootstrap = 0 From fdeb51c887f87f9a4b88dddb54b26a471ea722dc Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 23 Dec 2022 12:35:32 +0000 Subject: [PATCH 31/33] small updates --- tasks/phylogenetic_inference/task_gubbins.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/phylogenetic_inference/task_gubbins.wdl b/tasks/phylogenetic_inference/task_gubbins.wdl index 85812d479..2bce83d97 100644 --- a/tasks/phylogenetic_inference/task_gubbins.wdl +++ b/tasks/phylogenetic_inference/task_gubbins.wdl @@ -8,7 +8,7 @@ task gubbins { Int? filter_percent = 25 #default is 25% Int? iterations = 5 String? tree_builder = "raxml" - String? tree_args = [""] + String? tree_args String? nuc_subst_model = "GTRCAT" Boolean? best_nuc_subst_model = false Int? bootstrap = 0 From 45cebe6cca97689c5ad31e99c31dce92635f883c Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 23 Dec 2022 12:46:02 +0000 Subject: [PATCH 32/33] updates --- tasks/phylogenetic_inference/task_gubbins.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tasks/phylogenetic_inference/task_gubbins.wdl b/tasks/phylogenetic_inference/task_gubbins.wdl index 2bce83d97..a37167499 100644 --- a/tasks/phylogenetic_inference/task_gubbins.wdl +++ b/tasks/phylogenetic_inference/task_gubbins.wdl @@ -12,6 +12,7 @@ task gubbins { String? nuc_subst_model = "GTRCAT" Boolean? best_nuc_subst_model = false Int? bootstrap = 0 + String? outgroup File? dates_file } command <<< @@ -29,6 +30,7 @@ task gubbins { ~{true="--best-model" false="" best_nuc_subst_model} \ ~{'--model ' + nuc_subst_model} \ --bootstrap ~{bootstrap} \ + ~{'--outgroup ' + outgroup} \ ~{'--date ' + dates_file} \ --threads 2 >>> From c2438da9970c6c70022d61796648967d2ea8c698 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Wed, 28 Dec 2022 11:22:52 +0000 Subject: [PATCH 33/33] minor changes --- tasks/phylogenetic_inference/task_gubbins.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tasks/phylogenetic_inference/task_gubbins.wdl b/tasks/phylogenetic_inference/task_gubbins.wdl index a37167499..c1f979022 100644 --- a/tasks/phylogenetic_inference/task_gubbins.wdl +++ b/tasks/phylogenetic_inference/task_gubbins.wdl @@ -9,8 +9,7 @@ task gubbins { Int? iterations = 5 String? tree_builder = "raxml" String? tree_args - String? nuc_subst_model = "GTRCAT" - Boolean? best_nuc_subst_model = false + String? nuc_subst_model = "GTRGAMMA" Int? bootstrap = 0 String? outgroup File? dates_file @@ -27,7 +26,6 @@ task gubbins { --iterations ~{iterations} \ --tree-builder ~{tree_builder} \ ~{'--tree-args ' + tree_args} \ - ~{true="--best-model" false="" best_nuc_subst_model} \ ~{'--model ' + nuc_subst_model} \ --bootstrap ~{bootstrap} \ ~{'--outgroup ' + outgroup} \