From 95c801428e7b7a76463cfd2b783d6795acba6744 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Fri, 25 Oct 2024 15:22:10 +0200 Subject: [PATCH 01/32] Here is the one-line commit message for the changes: feat: Add biWFA as default alignment approach with wflign as fallback --- src/common/wflign/src/alignment_printer.hpp | 103 ++++++++++++++++++ src/common/wflign/src/wflign_patch.hpp | 114 +++++--------------- 2 files changed, 130 insertions(+), 87 deletions(-) create mode 100644 src/common/wflign/src/alignment_printer.hpp diff --git a/src/common/wflign/src/alignment_printer.hpp b/src/common/wflign/src/alignment_printer.hpp new file mode 100644 index 00000000..a7bc378d --- /dev/null +++ b/src/common/wflign/src/alignment_printer.hpp @@ -0,0 +1,103 @@ +#ifndef ALIGNMENT_PRINTER_HPP +#define ALIGNMENT_PRINTER_HPP + +#include +#include +#include "wflign.hpp" + +namespace wflign { +namespace wavefront { + +void write_tag_and_md_string( + std::ostream &out, + const char *cigar_ops, + const int cigar_start, + const int cigar_end, + const int target_start, + const char *target, + const int64_t target_offset, + const int64_t target_pointer_shift); + +void write_alignment_sam( + std::ostream &out, + const alignment_t& patch_aln, + const std::string& query_name, + const uint64_t& query_total_length, + const uint64_t& query_offset, + const uint64_t& query_length, + const bool& query_is_rev, + const std::string& target_name, + const uint64_t& target_total_length, + const uint64_t& target_offset, + const uint64_t& target_length, + const float& min_identity, + const float& mashmap_estimated_identity, + const bool& no_seq_in_sam, + const bool& emit_md_tag, + const char* query, + const char* target, + const int64_t& target_pointer_shift); + +bool write_alignment_paf( + std::ostream& out, + const alignment_t& aln, + const std::string& query_name, + const uint64_t& query_total_length, + const uint64_t& query_offset, + const uint64_t& query_length, + const bool& query_is_rev, + const std::string& target_name, + const uint64_t& target_total_length, + const uint64_t& target_offset, + const uint64_t& target_length, + const float& min_identity, + const float& mashmap_estimated_identity, + const bool& with_endline = true, + const bool& is_rev_patch = false); + +void write_merged_alignment( + std::ostream &out, + const std::vector &trace, + wfa::WFAlignerGapAffine2Pieces& wf_aligner, + const wflign_penalties_t& convex_penalties, + const bool& emit_md_tag, + const bool& paf_format_else_sam, + const bool& no_seq_in_sam, + const char* query, + const std::string& query_name, + const uint64_t& query_total_length, + const uint64_t& query_offset, + const uint64_t& query_length, + const bool& query_is_rev, + const char* target, + const std::string& target_name, + const uint64_t& target_total_length, + const uint64_t& target_offset, + const uint64_t& target_length, + const float& min_identity, +#ifdef WFA_PNG_TSV_TIMING + const long& elapsed_time_wflambda_ms, + const uint64_t& num_alignments, + const uint64_t& num_alignments_performed, +#endif + const float& mashmap_estimated_identity, + const uint64_t& wflign_max_len_major, + const uint64_t& wflign_max_len_minor, + const int& erode_k, + const int64_t& chain_gap, + const int& max_patching_score, + const uint64_t& min_inversion_length, + const int& min_wf_length, + const int& max_dist_threshold, +#ifdef WFA_PNG_TSV_TIMING + const std::string* prefix_wavefront_plot_in_png, + const uint64_t& wfplot_max_size, + const bool& emit_patching_tsv, + std::ostream* out_patching_tsv, +#endif + const bool& with_endline = true); + +} // namespace wavefront +} // namespace wflign + +#endif diff --git a/src/common/wflign/src/wflign_patch.hpp b/src/common/wflign/src/wflign_patch.hpp index 7e197abb..e799ccda 100644 --- a/src/common/wflign/src/wflign_patch.hpp +++ b/src/common/wflign/src/wflign_patch.hpp @@ -18,6 +18,7 @@ #include "rkmh.hpp" #include "wflign.hpp" #include "wflign_alignment.hpp" +#include "alignment_printer.hpp" /* * Configuration @@ -46,6 +47,7 @@ namespace wflign { const uint16_t& step_size, wflign_extend_data_t* extend_data, alignment_t& aln); + void do_wfa_patch_alignment( const char* query, const uint64_t& j, @@ -59,9 +61,31 @@ namespace wflign { alignment_t& rev_aln, const int64_t& chain_gap, const int& max_patching_score, - const uint64_t& min_inversion_length, - bool ends_free); + const uint64_t& min_inversion_length); + + void do_biwfa_alignment( + const std::string& query_name, + char* const query, + const uint64_t query_total_length, + const uint64_t query_offset, + const uint64_t query_length, + const bool query_is_rev, + const std::string& target_name, + char* const target, + const uint64_t target_total_length, + const uint64_t target_offset, + const uint64_t target_length, + std::ostream& out, + const wflign_penalties_t& convex_penalties, + const bool emit_md_tag, + const bool paf_format_else_sam, + const bool no_seq_in_sam, + const float min_identity, + const uint64_t wflign_max_len_minor, + const float mashmap_estimated_identity); + void trim_alignment(alignment_t& aln); + std::vector do_progressive_wfa_patch_alignment( const char* query, const uint64_t& query_start, @@ -75,91 +99,7 @@ namespace wflign { const int& max_patching_score, const uint64_t& min_inversion_length, const int& erode_k); - void write_merged_alignment( - std::ostream &out, - const std::vector &trace, - wfa::WFAlignerGapAffine2Pieces& wf_aligner, - const wflign_penalties_t& convex_penalties, - const bool& emit_md_tag, - const bool& paf_format_else_sam, - const bool& no_seq_in_sam, - const char* query, - const std::string& query_name, - const uint64_t& query_total_length, - const uint64_t& query_offset, - const uint64_t& query_length, - const bool& query_is_rev, - const char* target, - const std::string& target_name, - const uint64_t& target_total_length, - const uint64_t& target_offset, - const uint64_t& target_length, - const float& min_identity, -#ifdef WFA_PNG_TSV_TIMING - const long& elapsed_time_wflambda_ms, - const uint64_t& num_alignments, - const uint64_t& num_alignments_performed, -#endif - const float& mashmap_estimated_identity, - const uint64_t& wflign_max_len_major, - const uint64_t& wflign_max_len_minor, - const int& erode_k, - const int64_t& chain_gap, - const int& max_patching_score, - const uint64_t& min_inversion_length, - const int& min_wf_length, - const int& max_dist_threshold, -#ifdef WFA_PNG_TSV_TIMING - const std::string* prefix_wavefront_plot_in_png, - const uint64_t& wfplot_max_size, - const bool& emit_patching_tsv, - std::ostream* out_patching_tsv, -#endif - const bool& with_endline = true); - void write_tag_and_md_string( - std::ostream &out, - const char *cigar_ops, - const int cigar_start, - const int cigar_end, - const int target_start, - const char *target, - const int64_t target_offset, - const int64_t target_pointer_shift); - void write_alignment_sam( - std::ostream &out, - const alignment_t& patch_aln, - const std::string& query_name, - const uint64_t& query_total_length, - const uint64_t& query_offset, - const uint64_t& query_length, - const bool& query_is_rev, - const std::string& target_name, - const uint64_t& target_total_length, - const uint64_t& target_offset, - const uint64_t& target_length, - const float& min_identity, - const float& mashmap_estimated_identity, - const bool& no_seq_in_sam, - const bool& emit_md_tag, - const char* query, - const char* target, - const int64_t& target_pointer_shift); - bool write_alignment_paf( - std::ostream& out, - const alignment_t& aln, - const std::string& query_name, - const uint64_t& query_total_length, - const uint64_t& query_offset, - const uint64_t& query_length, - const bool& query_is_rev, - const std::string& target_name, - const uint64_t& target_total_length, - const uint64_t& target_offset, - const uint64_t& target_length, // unused - const float& min_identity, - const float& mashmap_estimated_identity, - const bool& with_endline = true, - const bool& is_rev_patch = false); + double float2phred(const double& prob); void sort_indels(std::vector& v); From 3ffcab8abb4c3d66fec992be6b7f39c4ef79a765 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Fri, 25 Oct 2024 15:29:36 +0200 Subject: [PATCH 02/32] feat: Add biWFA alignment logic to wflign.cpp --- src/common/wflign/src/wflign.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 3975f014..32c520aa 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -331,6 +331,21 @@ void WFlign::wflign_affine_wavefront( return; } + // Use biWFA for smaller sequences or very high identity matches + if (force_biwfa_alignment || + (query_length <= segment_length * 8 || target_length <= segment_length * 8) || + (mashmap_estimated_identity >= 0.99 && + query_length <= MAX_LEN_FOR_STANDARD_WFA && + target_length <= MAX_LEN_FOR_STANDARD_WFA)) { + + do_biwfa_alignment( + query_name, query, query_total_length, query_offset, query_length, query_is_rev, + target_name, target, target_total_length, target_offset, target_length, + *out, wfa_convex_penalties, emit_md_tag, paf_format_else_sam, no_seq_in_sam, + min_identity, wflign_max_len_minor, mashmap_estimated_identity); + return; + } + // Check if mashmap_estimated_identity == 1 to avoid division by zero, leading to a minhash_kmer_size of 8. // Such low value was leading to confusion in HORs alignments in the human centromeres (high runtime and memory usage, and wrong alignments) const int minhash_kmer_size = mashmap_estimated_identity == 1 ? 17 : std::max(8, std::min(17, (int)std::floor(1.0 / (1.0 - mashmap_estimated_identity)))); From d5765633492bdf12fca560f71fb9d811fea53f9f Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Fri, 25 Oct 2024 15:30:45 +0200 Subject: [PATCH 03/32] fix: Move wfa_convex_penalties declaration earlier --- src/common/wflign/src/wflign.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 32c520aa..ce3aa3ba 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -332,6 +332,7 @@ void WFlign::wflign_affine_wavefront( } // Use biWFA for smaller sequences or very high identity matches + if (force_biwfa_alignment || (query_length <= segment_length * 8 || target_length <= segment_length * 8) || (mashmap_estimated_identity >= 0.99 && From b9c272720611ff56d3dfb97fd432a896e2efeb79 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Fri, 25 Oct 2024 15:31:58 +0200 Subject: [PATCH 04/32] fix: Move wfa_convex_penalties definition earlier --- src/common/wflign/src/wflign.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index ce3aa3ba..bf4eb416 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -509,6 +509,24 @@ void WFlign::wflign_affine_wavefront( std::chrono::steady_clock::now() - start_time).count(); #endif + // Set penalties for wfa convex + wflign_penalties_t wfa_convex_penalties; + if (wfa_patching_mismatch_score > 0 && wfa_patching_gap_opening_score1 > 0 && wfa_patching_gap_extension_score1 > 0 && wfa_patching_gap_opening_score2 > 0 && wfa_patching_gap_extension_score2 > 0){ + wfa_convex_penalties.match = 0; + wfa_convex_penalties.mismatch = wfa_patching_mismatch_score; + wfa_convex_penalties.gap_opening1 = wfa_patching_gap_opening_score1; + wfa_convex_penalties.gap_extension1 = wfa_patching_gap_extension_score1; + wfa_convex_penalties.gap_opening2 = wfa_patching_gap_opening_score2; + wfa_convex_penalties.gap_extension2 = wfa_patching_gap_extension_score2; + } else { + wfa_convex_penalties.match = 0; + wfa_convex_penalties.mismatch = 5; + wfa_convex_penalties.gap_opening1 = 8; + wfa_convex_penalties.gap_extension1 = 2; + wfa_convex_penalties.gap_opening2 = 49; + wfa_convex_penalties.gap_extension2 = 1; + } + // Free old aligner delete wf_aligner; From ba3f458ef44170d405a12b283f4d81153d338a38 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Fri, 25 Oct 2024 15:33:05 +0200 Subject: [PATCH 05/32] fix: move wfa_convex_penalties definition before first use --- src/common/wflign/src/wflign.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index bf4eb416..8f8eb6eb 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -509,23 +509,6 @@ void WFlign::wflign_affine_wavefront( std::chrono::steady_clock::now() - start_time).count(); #endif - // Set penalties for wfa convex - wflign_penalties_t wfa_convex_penalties; - if (wfa_patching_mismatch_score > 0 && wfa_patching_gap_opening_score1 > 0 && wfa_patching_gap_extension_score1 > 0 && wfa_patching_gap_opening_score2 > 0 && wfa_patching_gap_extension_score2 > 0){ - wfa_convex_penalties.match = 0; - wfa_convex_penalties.mismatch = wfa_patching_mismatch_score; - wfa_convex_penalties.gap_opening1 = wfa_patching_gap_opening_score1; - wfa_convex_penalties.gap_extension1 = wfa_patching_gap_extension_score1; - wfa_convex_penalties.gap_opening2 = wfa_patching_gap_opening_score2; - wfa_convex_penalties.gap_extension2 = wfa_patching_gap_extension_score2; - } else { - wfa_convex_penalties.match = 0; - wfa_convex_penalties.mismatch = 5; - wfa_convex_penalties.gap_opening1 = 8; - wfa_convex_penalties.gap_extension1 = 2; - wfa_convex_penalties.gap_opening2 = 49; - wfa_convex_penalties.gap_extension2 = 1; - } // Free old aligner delete wf_aligner; From 00b27a11d7686a9ee8e62fb41706b2205c9c5956 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Fri, 25 Oct 2024 15:33:24 +0200 Subject: [PATCH 06/32] fix: Set penalties for wfa convex in wflign.cpp --- src/common/wflign/src/wflign.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 8f8eb6eb..afeb34f4 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -442,6 +442,24 @@ void WFlign::wflign_affine_wavefront( #ifdef WFA_PNG_TSV_TIMING const auto start_time = std::chrono::steady_clock::now(); #endif + // Set penalties for wfa convex + wflign_penalties_t wfa_convex_penalties; + if (wfa_patching_mismatch_score > 0 && wfa_patching_gap_opening_score1 > 0 && wfa_patching_gap_extension_score1 > 0 && wfa_patching_gap_opening_score2 > 0 && wfa_patching_gap_extension_score2 > 0){ + wfa_convex_penalties.match = 0; + wfa_convex_penalties.mismatch = wfa_patching_mismatch_score; + wfa_convex_penalties.gap_opening1 = wfa_patching_gap_opening_score1; + wfa_convex_penalties.gap_extension1 = wfa_patching_gap_extension_score1; + wfa_convex_penalties.gap_opening2 = wfa_patching_gap_opening_score2; + wfa_convex_penalties.gap_extension2 = wfa_patching_gap_extension_score2; + } else { + wfa_convex_penalties.match = 0; + wfa_convex_penalties.mismatch = 5; + wfa_convex_penalties.gap_opening1 = 8; + wfa_convex_penalties.gap_extension1 = 2; + wfa_convex_penalties.gap_opening2 = 49; + wfa_convex_penalties.gap_extension2 = 1; + } + if (force_biwfa_alignment || (query_length <= segment_length * 8 || target_length <= segment_length * 8) || (mashmap_estimated_identity >= 0.99 From 3307042770f3c8830fb5330c2e3b7c82b74214c7 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Sun, 27 Oct 2024 20:49:47 +0200 Subject: [PATCH 07/32] fix: Remove duplicate declaration of `wfa_convex_penalties` --- src/common/wflign/src/wflign.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index afeb34f4..250639e5 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -382,23 +382,6 @@ void WFlign::wflign_affine_wavefront( */ } - // Set penalties for wfa convex - wflign_penalties_t wfa_convex_penalties; - if (wfa_patching_mismatch_score > 0 && wfa_patching_gap_opening_score1 > 0 && wfa_patching_gap_extension_score1 > 0 && wfa_patching_gap_opening_score2 > 0 && wfa_patching_gap_extension_score2 > 0){ - wfa_convex_penalties.match = 0; - wfa_convex_penalties.mismatch = wfa_patching_mismatch_score; - wfa_convex_penalties.gap_opening1 = wfa_patching_gap_opening_score1; - wfa_convex_penalties.gap_extension1 = wfa_patching_gap_extension_score1; - wfa_convex_penalties.gap_opening2 = wfa_patching_gap_opening_score2; - wfa_convex_penalties.gap_extension2 = wfa_patching_gap_extension_score2; - } else { - wfa_convex_penalties.match = 0; - wfa_convex_penalties.mismatch = 5; - wfa_convex_penalties.gap_opening1 = 8; - wfa_convex_penalties.gap_extension1 = 2; - wfa_convex_penalties.gap_opening2 = 49; - wfa_convex_penalties.gap_extension2 = 1; - } // heuristic bound on the max mash dist, adaptive based on estimated // identity the goal here is to sparsify the set of alignments in the From ec4b7b91b89433464f8999ed7851e52fea2d195b Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Sun, 27 Oct 2024 20:50:21 +0200 Subject: [PATCH 08/32] fix: move declaration of wfa_convex_penalties before first use --- src/common/wflign/src/wflign.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 250639e5..d8a02fba 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -425,6 +425,7 @@ void WFlign::wflign_affine_wavefront( #ifdef WFA_PNG_TSV_TIMING const auto start_time = std::chrono::steady_clock::now(); #endif + // Set penalties for wfa convex wflign_penalties_t wfa_convex_penalties; if (wfa_patching_mismatch_score > 0 && wfa_patching_gap_opening_score1 > 0 && wfa_patching_gap_extension_score1 > 0 && wfa_patching_gap_opening_score2 > 0 && wfa_patching_gap_extension_score2 > 0){ From 1bafd4ec809261237af2c4827d0055716487352f Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Mon, 28 Oct 2024 11:24:23 -0500 Subject: [PATCH 09/32] okify the declaration of convex penalties --- src/common/wflign/src/wflign.cpp | 36 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index d8a02fba..0dd00992 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -331,6 +331,24 @@ void WFlign::wflign_affine_wavefront( return; } + // Set penalties for wfa convex + wflign_penalties_t wfa_convex_penalties; + if (wfa_patching_mismatch_score > 0 && wfa_patching_gap_opening_score1 > 0 && wfa_patching_gap_extension_score1 > 0 && wfa_patching_gap_opening_score2 > 0 && wfa_patching_gap_extension_score2 > 0){ + wfa_convex_penalties.match = 0; + wfa_convex_penalties.mismatch = wfa_patching_mismatch_score; + wfa_convex_penalties.gap_opening1 = wfa_patching_gap_opening_score1; + wfa_convex_penalties.gap_extension1 = wfa_patching_gap_extension_score1; + wfa_convex_penalties.gap_opening2 = wfa_patching_gap_opening_score2; + wfa_convex_penalties.gap_extension2 = wfa_patching_gap_extension_score2; + } else { + wfa_convex_penalties.match = 0; + wfa_convex_penalties.mismatch = 5; + wfa_convex_penalties.gap_opening1 = 8; + wfa_convex_penalties.gap_extension1 = 2; + wfa_convex_penalties.gap_opening2 = 49; + wfa_convex_penalties.gap_extension2 = 1; + } + // Use biWFA for smaller sequences or very high identity matches if (force_biwfa_alignment || @@ -426,24 +444,6 @@ void WFlign::wflign_affine_wavefront( const auto start_time = std::chrono::steady_clock::now(); #endif - // Set penalties for wfa convex - wflign_penalties_t wfa_convex_penalties; - if (wfa_patching_mismatch_score > 0 && wfa_patching_gap_opening_score1 > 0 && wfa_patching_gap_extension_score1 > 0 && wfa_patching_gap_opening_score2 > 0 && wfa_patching_gap_extension_score2 > 0){ - wfa_convex_penalties.match = 0; - wfa_convex_penalties.mismatch = wfa_patching_mismatch_score; - wfa_convex_penalties.gap_opening1 = wfa_patching_gap_opening_score1; - wfa_convex_penalties.gap_extension1 = wfa_patching_gap_extension_score1; - wfa_convex_penalties.gap_opening2 = wfa_patching_gap_opening_score2; - wfa_convex_penalties.gap_extension2 = wfa_patching_gap_extension_score2; - } else { - wfa_convex_penalties.match = 0; - wfa_convex_penalties.mismatch = 5; - wfa_convex_penalties.gap_opening1 = 8; - wfa_convex_penalties.gap_extension1 = 2; - wfa_convex_penalties.gap_opening2 = 49; - wfa_convex_penalties.gap_extension2 = 1; - } - if (force_biwfa_alignment || (query_length <= segment_length * 8 || target_length <= segment_length * 8) || (mashmap_estimated_identity >= 0.99 From 1ba06a39935bd757515fe7deca1e82fd5220ac1f Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 11:24:53 -0500 Subject: [PATCH 10/32] fix: Add missing `do_biwfa_alignment()` function implementation --- src/common/wflign/src/wflign.cpp | 86 ++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 0dd00992..c5e0f805 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -10,6 +10,92 @@ namespace wflign { namespace wavefront { +void do_biwfa_alignment( + const std::string& query_name, + char* const query, + const uint64_t query_total_length, + const uint64_t query_offset, + const uint64_t query_length, + const bool query_is_rev, + const std::string& target_name, + char* const target, + const uint64_t target_total_length, + const uint64_t target_offset, + const uint64_t target_length, + std::ostream& out, + const wflign_penalties_t& penalties, + const bool emit_md_tag, + const bool paf_format_else_sam, + const bool no_seq_in_sam, + const float min_identity, + const uint64_t wflign_max_len_minor, + const float mashmap_estimated_identity) { + + // Create WFA aligner with the provided penalties + wfa::WFAlignerGapAffine2Pieces wf_aligner( + 0, // match + penalties.mismatch, + penalties.gap_opening1, + penalties.gap_extension1, + penalties.gap_opening2, + penalties.gap_extension2, + wfa::WFAligner::Alignment, + wfa::WFAligner::MemoryUltralow); + + wf_aligner.setHeuristicNone(); + + // Perform the alignment + const int status = wf_aligner.alignEnd2End(target, (int)target_length, query, (int)query_length); + + if (status == 0) { // WF_STATUS_SUCCESSFUL + // Create alignment record + alignment_t aln; + aln.ok = true; + aln.j = 0; + aln.i = 0; + aln.query_length = query_length; + aln.target_length = target_length; + + // Copy alignment CIGAR + wflign_edit_cigar_copy(wf_aligner, &aln.edit_cigar); + + // Write alignment + if (paf_format_else_sam) { + write_alignment_paf( + out, + aln, + query_name, + query_total_length, + query_offset, + query_length, + query_is_rev, + target_name, + target_total_length, + target_offset, + target_length, + min_identity, + mashmap_estimated_identity); + } else { + write_sam_alignment( + out, + aln, + query_name, + query, + query_total_length, + query_offset, + query_length, + query_is_rev, + target_name, + target, + target_total_length, + target_offset, + target_length, + emit_md_tag, + no_seq_in_sam); + } + } +} + /* * Configuration */ From 4278901d4a85a250f6d17b7cfc405923f31898ac Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 11:25:51 -0500 Subject: [PATCH 11/32] fix: Replace `write_sam_alignment` with `write_merged_alignment` --- src/common/wflign/src/wflign.cpp | 38 +++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index c5e0f805..0436f7e5 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -76,22 +76,48 @@ void do_biwfa_alignment( min_identity, mashmap_estimated_identity); } else { - write_sam_alignment( + write_merged_alignment( out, - aln, - query_name, + {&aln}, // Convert single alignment to vector + wf_aligner, + penalties, + emit_md_tag, + paf_format_else_sam, + no_seq_in_sam, query, + query_name, query_total_length, query_offset, query_length, query_is_rev, - target_name, target, + target_name, target_total_length, target_offset, target_length, - emit_md_tag, - no_seq_in_sam); + min_identity, +#ifdef WFA_PNG_TSV_TIMING + 0, // elapsed_time_wflambda_ms + 1, // num_alignments + 1, // num_alignments_performed +#endif + mashmap_estimated_identity, + wflign_max_len_minor, + wflign_max_len_minor, + 0, // erode_k + 0, // chain_gap + 0, // max_patching_score + 0, // min_inversion_length + MIN_WF_LENGTH, + 0 // wf_max_dist_threshold +#ifdef WFA_PNG_TSV_TIMING + , + nullptr, // prefix_wavefront_plot_in_png + 0, // wfplot_max_size + false, // emit_patching_tsv + nullptr // out_patching_tsv +#endif + ); } } } From ed9b5ab51d37ccced67171c90aca362a4fa85271 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 11:26:20 -0500 Subject: [PATCH 12/32] fix: feat: Replace `write_sam_alignment` with `write_merged_alignment` and adapt parameters --- src/common/wflign/src/wflign.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 0436f7e5..a0f3eaff 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -10,6 +10,11 @@ namespace wflign { namespace wavefront { +/* +* Configuration +*/ +#define MIN_WF_LENGTH 256 + void do_biwfa_alignment( const std::string& query_name, char* const query, @@ -126,7 +131,6 @@ void do_biwfa_alignment( * Configuration */ #define MAX_LEN_FOR_STANDARD_WFA 1000 -#define MIN_WF_LENGTH 256 /* * Utils From c11e239104a8683394c0f878afa96b9cff0e93ee Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 11:35:13 -0500 Subject: [PATCH 13/32] fix: reverse logic to make biWFA the default aligner --- src/common/wflign/src/wflign.cpp | 10 +++------- src/interface/parse_args.hpp | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index a0f3eaff..83ed3ed9 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -158,7 +158,7 @@ void clean_up_sketches(std::vector*> &sketches) { WFlign::WFlign( const uint16_t segment_length, const float min_identity, - const bool force_biwfa_alignment, + const bool force_wflign, const int wfa_mismatch_score, const int wfa_gap_opening_score, const int wfa_gap_extension_score, @@ -184,7 +184,7 @@ WFlign::WFlign( this->segment_length = segment_length; this->min_identity = min_identity; - this->force_biwfa_alignment = force_biwfa_alignment; + this->force_wflign = force_wflign; this->wfa_mismatch_score = wfa_mismatch_score; this->wfa_gap_opening_score = wfa_gap_opening_score; @@ -560,11 +560,7 @@ void WFlign::wflign_affine_wavefront( const auto start_time = std::chrono::steady_clock::now(); #endif - if (force_biwfa_alignment || - (query_length <= segment_length * 8 || target_length <= segment_length * 8) || - (mashmap_estimated_identity >= 0.99 - && query_length <= MAX_LEN_FOR_STANDARD_WFA && target_length <= MAX_LEN_FOR_STANDARD_WFA) - ) { + if (!force_wflign) { wfa::WFAlignerGapAffine2Pieces* wf_aligner = new wfa::WFAlignerGapAffine2Pieces( 0, diff --git a/src/interface/parse_args.hpp b/src/interface/parse_args.hpp index a59f824e..2cd5e25b 100644 --- a/src/interface/parse_args.hpp +++ b/src/interface/parse_args.hpp @@ -111,7 +111,7 @@ void parse_args(int argc, args::Group alignment_opts(parser, "[ Alignment Options ]"); args::ValueFlag align_input_paf(alignment_opts, "FILE", "derive precise alignments for this input PAF", {'i', "input-paf"}); - args::Flag force_biwfa_alignment(alignment_opts, "force-biwfa", "force alignment with biWFA for all sequence pairs", {'I', "force-biwfa"}); + args::Flag force_wflign(alignment_opts, "force-wflign", "force alignment with WFLign instead of the default biWFA", {'I', "force-wflign"}); args::ValueFlag wflambda_segment_length(alignment_opts, "N", "wflambda segment length: size (in bp) of segment mapped in hierarchical WFA problem [default: 256]", {'W', "wflamda-segment"}); args::ValueFlag wfa_score_params(alignment_opts, "mismatch,gap1,ext1", "score parameters for the wfa alignment (affine); match score is fixed at 0 [default: 2,3,1]", From 69a263123f0a6146d9c050a060cdca2eb9de4857 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 11:35:48 -0500 Subject: [PATCH 14/32] feat: Add option to force WFLign alignment --- src/common/wflign/src/wflign.cpp | 6 +----- src/interface/parse_args.hpp | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 83ed3ed9..a88c8c71 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -467,11 +467,7 @@ void WFlign::wflign_affine_wavefront( // Use biWFA for smaller sequences or very high identity matches - if (force_biwfa_alignment || - (query_length <= segment_length * 8 || target_length <= segment_length * 8) || - (mashmap_estimated_identity >= 0.99 && - query_length <= MAX_LEN_FOR_STANDARD_WFA && - target_length <= MAX_LEN_FOR_STANDARD_WFA)) { + if (!force_wflign) { do_biwfa_alignment( query_name, query, query_total_length, query_offset, query_length, query_is_rev, diff --git a/src/interface/parse_args.hpp b/src/interface/parse_args.hpp index 2cd5e25b..4315f0ca 100644 --- a/src/interface/parse_args.hpp +++ b/src/interface/parse_args.hpp @@ -371,7 +371,7 @@ void parse_args(int argc, align_parameters.emit_md_tag = args::get(emit_md_tag); align_parameters.sam_format = args::get(sam_format); align_parameters.no_seq_in_sam = args::get(no_seq_in_sam); - align_parameters.force_biwfa_alignment = args::get(force_biwfa_alignment); + align_parameters.force_wflign = args::get(force_wflign); map_parameters.split = !args::get(no_split); map_parameters.dropRand = false;//ToFix: !args::get(keep_ties); align_parameters.split = !args::get(no_split); From 9602bfd08d1609fb194807a0fff219326304539b Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 11:36:47 -0500 Subject: [PATCH 15/32] feat: Add force_wflign variable to WFlign class --- src/common/wflign/src/wflign.cpp | 4 ++-- src/common/wflign/src/wflign.hpp | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index a88c8c71..7d651820 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -158,7 +158,7 @@ void clean_up_sketches(std::vector*> &sketches) { WFlign::WFlign( const uint16_t segment_length, const float min_identity, - const bool force_wflign, + const bool force_wflign_, const int wfa_mismatch_score, const int wfa_gap_opening_score, const int wfa_gap_extension_score, @@ -184,7 +184,7 @@ WFlign::WFlign( this->segment_length = segment_length; this->min_identity = min_identity; - this->force_wflign = force_wflign; + this->force_wflign = force_wflign_; this->wfa_mismatch_score = wfa_mismatch_score; this->wfa_gap_opening_score = wfa_gap_opening_score; diff --git a/src/common/wflign/src/wflign.hpp b/src/common/wflign/src/wflign.hpp index a868484c..e678fde2 100644 --- a/src/common/wflign/src/wflign.hpp +++ b/src/common/wflign/src/wflign.hpp @@ -88,6 +88,7 @@ namespace wflign { bool emit_patching_tsv; std::ostream* out_patching_tsv; #endif + bool force_wflign; bool merge_alignments; bool emit_md_tag; bool paf_format_else_sam; @@ -97,7 +98,7 @@ namespace wflign { WFlign( const uint16_t segment_length, const float min_identity, - const bool force_biwfa_alignment, + const bool force_wflign, const int wfa_mismatch_score, const int wfa_gap_opening_score, const int wfa_gap_extension_score, From beedda8ffe3d099358f4d53cf36b5bf5fbdcae26 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 11:37:22 -0500 Subject: [PATCH 16/32] feat: Add force_wflign parameter to align::Parameters struct --- src/align/include/align_parameters.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/align/include/align_parameters.hpp b/src/align/include/align_parameters.hpp index 1b4cacd7..790989af 100644 --- a/src/align/include/align_parameters.hpp +++ b/src/align/include/align_parameters.hpp @@ -26,6 +26,7 @@ struct Parameters { uint16_t wflambda_segment_length; //segment length for wflambda bool force_biwfa_alignment; //force biwfa alignment + bool force_wflign; //force alignment with WFlign instead of the default biWFA int wfa_mismatch_score; int wfa_gap_opening_score; From 78154a3c0693122d5d8705ccf76fac163bc40d4c Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 11:38:43 -0500 Subject: [PATCH 17/32] fix: fix incorrect logic for choosing between WFlign and biWFA --- src/common/wflign/src/wflign.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 7d651820..db36a26f 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -467,7 +467,7 @@ void WFlign::wflign_affine_wavefront( // Use biWFA for smaller sequences or very high identity matches - if (!force_wflign) { + if (force_wflign) { do_biwfa_alignment( query_name, query, query_total_length, query_offset, query_length, query_is_rev, From da5aa07bbc870a102607c0774ff63f9dbc0da774 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 11:40:51 -0500 Subject: [PATCH 18/32] fix: Correct the logic for force_wflign in wflign.cpp --- src/common/wflign/src/wflign.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index db36a26f..7d651820 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -467,7 +467,7 @@ void WFlign::wflign_affine_wavefront( // Use biWFA for smaller sequences or very high identity matches - if (force_wflign) { + if (!force_wflign) { do_biwfa_alignment( query_name, query, query_total_length, query_offset, query_length, query_is_rev, From 75d52f8cfc36a2faaee66f77473c31cb7f238e4a Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Mon, 28 Oct 2024 11:52:21 -0500 Subject: [PATCH 19/32] line --- src/common/wflign/src/wflign.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 7d651820..4c203b7a 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -468,7 +468,6 @@ void WFlign::wflign_affine_wavefront( // Use biWFA for smaller sequences or very high identity matches if (!force_wflign) { - do_biwfa_alignment( query_name, query, query_total_length, query_offset, query_length, query_is_rev, target_name, target, target_total_length, target_offset, target_length, From bf6f63bebd5110974eed40668c75b8688c389d41 Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Mon, 28 Oct 2024 11:59:47 -0500 Subject: [PATCH 20/32] match asm20/defaults for mm2 in biwfa params --- src/common/wflign/src/wflign.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 4c203b7a..7a292cd3 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -458,10 +458,10 @@ void WFlign::wflign_affine_wavefront( wfa_convex_penalties.gap_extension2 = wfa_patching_gap_extension_score2; } else { wfa_convex_penalties.match = 0; - wfa_convex_penalties.mismatch = 5; - wfa_convex_penalties.gap_opening1 = 8; + wfa_convex_penalties.mismatch = 6; + wfa_convex_penalties.gap_opening1 = 6; wfa_convex_penalties.gap_extension1 = 2; - wfa_convex_penalties.gap_opening2 = 49; + wfa_convex_penalties.gap_opening2 = 26; wfa_convex_penalties.gap_extension2 = 1; } From 92660ca9ede2aa3e2827f12582d823da128d0672 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 12:01:43 -0500 Subject: [PATCH 21/32] fix: Set default chain gap to 2k --- src/interface/parse_args.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/interface/parse_args.hpp b/src/interface/parse_args.hpp index 4315f0ca..29047be3 100644 --- a/src/interface/parse_args.hpp +++ b/src/interface/parse_args.hpp @@ -428,8 +428,8 @@ void parse_args(int argc, map_parameters.chain_gap = l; align_parameters.chain_gap = l; } else { - map_parameters.chain_gap = 30000; - align_parameters.chain_gap = 30000; + map_parameters.chain_gap = 2000; + align_parameters.chain_gap = 2000; } if (max_mapping_length) { From a14e4b40f56b1689392ac535eeb9c126b00372b9 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 12:02:05 -0500 Subject: [PATCH 22/32] feat: update help text for chain-gap default value --- src/interface/parse_args.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interface/parse_args.hpp b/src/interface/parse_args.hpp index 29047be3..49c23d3b 100644 --- a/src/interface/parse_args.hpp +++ b/src/interface/parse_args.hpp @@ -84,7 +84,7 @@ void parse_args(int argc, args::ValueFlag query_list(mapping_opts, "FILE", "file containing list of query sequence names", {'A', "query-list"}); args::Flag approx_mapping(mapping_opts, "approx-map", "skip base-level alignment, producing an approximate mapping in PAF", {'m',"approx-map"}); args::Flag no_split(mapping_opts, "no-split", "disable splitting of input sequences during mapping [default: enabled]", {'N',"no-split"}); - args::ValueFlag chain_gap(mapping_opts, "N", "chain mappings closer than this distance in query and target, sets approximate maximum variant length detectable in alignment [default: 30k]", {'c', "chain-gap"}); + args::ValueFlag chain_gap(mapping_opts, "N", "chain mappings closer than this distance in query and target, sets approximate maximum variant length detectable in alignment [default: 2k]", {'c', "chain-gap"}); args::ValueFlag max_mapping_length(mapping_opts, "N", "maximum length of a single mapping before breaking (inf to unset) [default: 50k]", {'P', "max-mapping-length"}); args::Flag drop_low_map_pct_identity(mapping_opts, "K", "drop mappings with estimated identity below --map-pct-id=%", {'K', "drop-low-map-id"}); args::ValueFlag overlap_threshold(mapping_opts, "F", "drop mappings overlapping more than fraction F with a higher scoring mapping [default: 0.5]", {'O', "overlap-threshold"}); From f8d9fa9c20127ee78703db1ab37a57a94247b9e3 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 12:26:13 -0500 Subject: [PATCH 23/32] fix: Properly manage alignment_t objects in wflign --- src/common/wflign/src/wflign.cpp | 18 +++++++++--------- src/common/wflign/src/wflign_patch.cpp | 11 ++++++++++- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 7a292cd3..60e1564d 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -53,16 +53,16 @@ void do_biwfa_alignment( const int status = wf_aligner.alignEnd2End(target, (int)target_length, query, (int)query_length); if (status == 0) { // WF_STATUS_SUCCESSFUL - // Create alignment record - alignment_t aln; - aln.ok = true; - aln.j = 0; - aln.i = 0; - aln.query_length = query_length; - aln.target_length = target_length; + // Create alignment record on heap + auto* aln = new alignment_t(); + aln->ok = true; + aln->j = 0; + aln->i = 0; + aln->query_length = query_length; + aln->target_length = target_length; // Copy alignment CIGAR - wflign_edit_cigar_copy(wf_aligner, &aln.edit_cigar); + wflign_edit_cigar_copy(wf_aligner, &aln->edit_cigar); // Write alignment if (paf_format_else_sam) { @@ -83,7 +83,7 @@ void do_biwfa_alignment( } else { write_merged_alignment( out, - {&aln}, // Convert single alignment to vector + {aln}, // Pass pointer to alignment wf_aligner, penalties, emit_md_tag, diff --git a/src/common/wflign/src/wflign_patch.cpp b/src/common/wflign/src/wflign_patch.cpp index de004ff0..9ec4bdd9 100644 --- a/src/common/wflign/src/wflign_patch.cpp +++ b/src/common/wflign/src/wflign_patch.cpp @@ -2015,8 +2015,17 @@ query_start : query_end) } } - // always clean up + // Clean up free(cigarv); + + // Clean up alignment objects if we own them + if (!paf_format_else_sam) { + for (auto* aln : trace) { + if (aln != nullptr) { + delete aln; + } + } + } if (!paf_format_else_sam) { From 907a16e2e70754c1b5aa792ac4887df2e838f224 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 15:28:10 -0500 Subject: [PATCH 24/32] fix: Dereference alignment_t pointer before passing to write_alignment_paf --- src/common/wflign/src/wflign.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 60e1564d..a8fd3cda 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -68,7 +68,7 @@ void do_biwfa_alignment( if (paf_format_else_sam) { write_alignment_paf( out, - aln, + *aln, // Dereference the pointer query_name, query_total_length, query_offset, From 263321c570e1524154f1629b699f42376d01de72 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Mon, 28 Oct 2024 15:40:34 -0500 Subject: [PATCH 25/32] fix: Use biWFA for smaller sequences or very high identity matches --- src/common/wflign/src/wflign.cpp | 59 ++++++++++---------------------- 1 file changed, 19 insertions(+), 40 deletions(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index a8fd3cda..777a45e4 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -53,22 +53,23 @@ void do_biwfa_alignment( const int status = wf_aligner.alignEnd2End(target, (int)target_length, query, (int)query_length); if (status == 0) { // WF_STATUS_SUCCESSFUL - // Create alignment record on heap - auto* aln = new alignment_t(); - aln->ok = true; - aln->j = 0; - aln->i = 0; - aln->query_length = query_length; - aln->target_length = target_length; + // Create alignment record on stack + alignment_t aln; + aln.ok = true; + aln.j = 0; + aln.i = 0; + aln.query_length = query_length; + aln.target_length = target_length; + aln.is_rev = false; // Copy alignment CIGAR - wflign_edit_cigar_copy(wf_aligner, &aln->edit_cigar); + wflign_edit_cigar_copy(wf_aligner, &aln.edit_cigar); // Write alignment if (paf_format_else_sam) { write_alignment_paf( out, - *aln, // Dereference the pointer + aln, query_name, query_total_length, query_offset, @@ -81,48 +82,26 @@ void do_biwfa_alignment( min_identity, mashmap_estimated_identity); } else { - write_merged_alignment( + // Write SAM output directly + write_alignment_sam( out, - {aln}, // Pass pointer to alignment - wf_aligner, - penalties, - emit_md_tag, - paf_format_else_sam, - no_seq_in_sam, - query, + aln, query_name, query_total_length, query_offset, query_length, query_is_rev, - target, target_name, target_total_length, target_offset, target_length, min_identity, -#ifdef WFA_PNG_TSV_TIMING - 0, // elapsed_time_wflambda_ms - 1, // num_alignments - 1, // num_alignments_performed -#endif mashmap_estimated_identity, - wflign_max_len_minor, - wflign_max_len_minor, - 0, // erode_k - 0, // chain_gap - 0, // max_patching_score - 0, // min_inversion_length - MIN_WF_LENGTH, - 0 // wf_max_dist_threshold -#ifdef WFA_PNG_TSV_TIMING - , - nullptr, // prefix_wavefront_plot_in_png - 0, // wfplot_max_size - false, // emit_patching_tsv - nullptr // out_patching_tsv -#endif - ); + no_seq_in_sam, + emit_md_tag, + query, + target, + 0); // No target pointer shift for biwfa } } } @@ -467,7 +446,7 @@ void WFlign::wflign_affine_wavefront( // Use biWFA for smaller sequences or very high identity matches - if (!force_wflign) { + if (!force_wflign && (query_length <= MAX_LEN_FOR_STANDARD_WFA || target_length <= MAX_LEN_FOR_STANDARD_WFA)) { do_biwfa_alignment( query_name, query, query_total_length, query_offset, query_length, query_is_rev, target_name, target, target_total_length, target_offset, target_length, From 62e6e9f83635f39c528d5449a7b925aac6a772d3 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Tue, 29 Oct 2024 10:40:56 -0500 Subject: [PATCH 26/32] fix: Remove redundant alignment cleanup --- src/common/wflign/src/wflign_patch.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/common/wflign/src/wflign_patch.cpp b/src/common/wflign/src/wflign_patch.cpp index 9ec4bdd9..bbbb6f92 100644 --- a/src/common/wflign/src/wflign_patch.cpp +++ b/src/common/wflign/src/wflign_patch.cpp @@ -2017,18 +2017,15 @@ query_start : query_end) // Clean up free(cigarv); - - // Clean up alignment objects if we own them + if (!paf_format_else_sam) { + // Clean up the trace alignments since we're done with them for (auto* aln : trace) { if (aln != nullptr) { delete aln; } } - } - - - if (!paf_format_else_sam) { + for (auto& patch_aln : multi_patch_alns) { write_alignment_sam( out, patch_aln, query_name, query_total_length, From e1884c0a37cfc349bf32cf6f7387631b6f0dea05 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Tue, 29 Oct 2024 11:44:24 -0500 Subject: [PATCH 27/32] fix: Prevent double-free of alignments in write_merged_alignment() --- src/common/wflign/src/wflign_patch.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/common/wflign/src/wflign_patch.cpp b/src/common/wflign/src/wflign_patch.cpp index bbbb6f92..8977ac01 100644 --- a/src/common/wflign/src/wflign_patch.cpp +++ b/src/common/wflign/src/wflign_patch.cpp @@ -2017,7 +2017,8 @@ query_start : query_end) // Clean up free(cigarv); - + + // Write SAM format alignments and clean up trace if (!paf_format_else_sam) { // Clean up the trace alignments since we're done with them for (auto* aln : trace) { @@ -2026,6 +2027,7 @@ query_start : query_end) } } + // Write the patch alignments for (auto& patch_aln : multi_patch_alns) { write_alignment_sam( out, patch_aln, query_name, query_total_length, @@ -2034,6 +2036,13 @@ query_start : query_end) min_identity, mashmap_estimated_identity, no_seq_in_sam, emit_md_tag, query, target, target_pointer_shift); } + + // Clean up patch alignments after writing + for (auto& patch_aln : multi_patch_alns) { + free(patch_aln.edit_cigar.cigar_ops); + patch_aln.edit_cigar.cigar_ops = nullptr; + } + multi_patch_alns.clear(); } else { // write how many reverse complement alignments were found //std::cerr << "got " << rev_patch_alns.size() << " rev patch alns" << std::endl; From 90177340a2bc4442910629f63d84191b6ba9c289 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Wed, 30 Oct 2024 10:27:33 -0500 Subject: [PATCH 28/32] refactor: Use biWFA alignment directly instead of WFline --- src/align/include/computeAlignments.hpp | 60 ++++++++----------------- 1 file changed, 19 insertions(+), 41 deletions(-) diff --git a/src/align/include/computeAlignments.hpp b/src/align/include/computeAlignments.hpp index 17c7f015..ee6c0855 100644 --- a/src/align/include/computeAlignments.hpp +++ b/src/align/include/computeAlignments.hpp @@ -242,49 +242,19 @@ std::string processAlignment(seq_record_t* rec) { skch::CommonFunc::reverseComplement(query_seq.data(), queryRegionStrand.data(), query_seq.size()); } - wflign::wavefront::WFlign wflign( - param.wflambda_segment_length, - param.min_identity, - param.force_biwfa_alignment, - param.wfa_mismatch_score, - param.wfa_gap_opening_score, - param.wfa_gap_extension_score, - param.wfa_patching_mismatch_score, - param.wfa_patching_gap_opening_score1, - param.wfa_patching_gap_extension_score1, - param.wfa_patching_gap_opening_score2, - param.wfa_patching_gap_extension_score2, - rec->currentRecord.mashmap_estimated_identity, - param.wflign_mismatch_score, - param.wflign_gap_opening_score, - param.wflign_gap_extension_score, - param.wflign_max_mash_dist, - param.wflign_min_wavefront_length, - param.wflign_max_distance_threshold, - param.wflign_max_len_major, - param.wflign_max_len_minor, - param.wflign_erode_k, - param.chain_gap, - param.wflign_min_inv_patch_len, - param.wflign_max_patching_score); + // Set up penalties for biWFA + wflign_penalties_t wfa_penalties; + wfa_penalties.match = 0; + wfa_penalties.mismatch = param.wfa_patching_mismatch_score; + wfa_penalties.gap_opening1 = param.wfa_patching_gap_opening_score1; + wfa_penalties.gap_extension1 = param.wfa_patching_gap_extension_score1; + wfa_penalties.gap_opening2 = param.wfa_patching_gap_opening_score2; + wfa_penalties.gap_extension2 = param.wfa_patching_gap_extension_score2; std::stringstream output; - wflign.set_output( - &output, -#ifdef WFA_PNG_TSV_TIMING - !param.tsvOutputPrefix.empty(), - nullptr, - param.prefix_wavefront_plot_in_png, - param.wfplot_max_size, - !param.path_patching_info_in_tsv.empty(), - nullptr, -#endif - true, // merge alignments - param.emit_md_tag, - !param.sam_format, - param.no_seq_in_sam); - wflign.wflign_affine_wavefront( + // Do direct biWFA alignment + wflign::wavefront::do_biwfa_alignment( rec->currentRecord.qId, queryRegionStrand.data(), rec->queryTotalLength, @@ -295,7 +265,15 @@ std::string processAlignment(seq_record_t* rec) { ref_seq_ptr, rec->refTotalLength, rec->currentRecord.rStartPos, - rec->currentRecord.rEndPos - rec->currentRecord.rStartPos); + rec->currentRecord.rEndPos - rec->currentRecord.rStartPos, + output, + wfa_penalties, + param.emit_md_tag, + !param.sam_format, + param.no_seq_in_sam, + param.min_identity, + param.wflign_max_len_minor, + rec->currentRecord.mashmap_estimated_identity); return output.str(); } From f4a106e208d42ac31ffa8aea08b7f759b397854f Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Wed, 30 Oct 2024 10:30:19 -0500 Subject: [PATCH 29/32] feat: declare do_biwfa_alignment function in wflign.hpp --- src/common/wflign/src/wflign.hpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/common/wflign/src/wflign.hpp b/src/common/wflign/src/wflign.hpp index e678fde2..bf6dd3f4 100644 --- a/src/common/wflign/src/wflign.hpp +++ b/src/common/wflign/src/wflign.hpp @@ -34,6 +34,27 @@ namespace wflign { namespace wavefront { + void do_biwfa_alignment( + const std::string& query_name, + char* const query, + const uint64_t query_total_length, + const uint64_t query_offset, + const uint64_t query_length, + const bool query_is_rev, + const std::string& target_name, + char* const target, + const uint64_t target_total_length, + const uint64_t target_offset, + const uint64_t target_length, + std::ostream& out, + const wflign_penalties_t& penalties, + const bool emit_md_tag, + const bool paf_format_else_sam, + const bool no_seq_in_sam, + const float min_identity, + const uint64_t wflign_max_len_minor, + const float mashmap_estimated_identity); + class WFlign { public: // WFlambda parameters From eb3b735e535b643abb320af62cd2f318e1548460 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Wed, 30 Oct 2024 10:35:14 -0500 Subject: [PATCH 30/32] fix: Comment out pt:Z:true and iv:Z:false tags in SAM and PAF output --- src/common/wflign/src/wflign_patch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/wflign/src/wflign_patch.cpp b/src/common/wflign/src/wflign_patch.cpp index 8977ac01..677887a4 100644 --- a/src/common/wflign/src/wflign_patch.cpp +++ b/src/common/wflign/src/wflign_patch.cpp @@ -2223,8 +2223,8 @@ void write_alignment_sam( << "gi:f:" << patch_gap_compressed_identity << "\t" << "bi:f:" << patch_block_identity << "\t" << "md:f:" << mashmap_estimated_identity << "\t" - << "pt:Z:true" << "\t" - << "iv:Z:" << (patch_aln.is_rev ? "true" : "false"); + //<< "pt:Z:true" << "\t" + //<< "iv:Z:" << (patch_aln.is_rev ? "true" : "false"); if (emit_md_tag) { out << "\t"; From e63c51fe0ea98b43b505a66c7069b1a9287e789d Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Wed, 30 Oct 2024 10:43:05 -0500 Subject: [PATCH 31/32] fix: Add missing semicolon in write_alignment_sam function --- src/common/wflign/src/wflign_patch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/wflign/src/wflign_patch.cpp b/src/common/wflign/src/wflign_patch.cpp index 677887a4..e129f5c1 100644 --- a/src/common/wflign/src/wflign_patch.cpp +++ b/src/common/wflign/src/wflign_patch.cpp @@ -2222,7 +2222,7 @@ void write_alignment_sam( << "NM:i:" << (patch_mismatches + patch_inserted_bp + patch_deleted_bp) << "\t" << "gi:f:" << patch_gap_compressed_identity << "\t" << "bi:f:" << patch_block_identity << "\t" - << "md:f:" << mashmap_estimated_identity << "\t" + << "md:f:" << mashmap_estimated_identity << "\t"; //<< "pt:Z:true" << "\t" //<< "iv:Z:" << (patch_aln.is_rev ? "true" : "false"); From 2fbdef1909769c3f7be4a5cfb76ad4558536c09b Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Wed, 30 Oct 2024 10:46:56 -0500 Subject: [PATCH 32/32] remove dup of biwfa alignment function --- src/common/wflign/src/wflign_patch.hpp | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/src/common/wflign/src/wflign_patch.hpp b/src/common/wflign/src/wflign_patch.hpp index e799ccda..4141f8b8 100644 --- a/src/common/wflign/src/wflign_patch.hpp +++ b/src/common/wflign/src/wflign_patch.hpp @@ -63,27 +63,6 @@ namespace wflign { const int& max_patching_score, const uint64_t& min_inversion_length); - void do_biwfa_alignment( - const std::string& query_name, - char* const query, - const uint64_t query_total_length, - const uint64_t query_offset, - const uint64_t query_length, - const bool query_is_rev, - const std::string& target_name, - char* const target, - const uint64_t target_total_length, - const uint64_t target_offset, - const uint64_t target_length, - std::ostream& out, - const wflign_penalties_t& convex_penalties, - const bool emit_md_tag, - const bool paf_format_else_sam, - const bool no_seq_in_sam, - const float min_identity, - const uint64_t wflign_max_len_minor, - const float mashmap_estimated_identity); - void trim_alignment(alignment_t& aln); std::vector do_progressive_wfa_patch_alignment(