Skip to content

Commit

Permalink
inverted patching works but big mess
Browse files Browse the repository at this point in the history
  • Loading branch information
ekg committed Jun 26, 2024
1 parent 02d21da commit b663abe
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 28 deletions.
45 changes: 45 additions & 0 deletions src/common/wflign/src/wflign_alignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,51 @@ void wflign_edit_cigar_copy(
cigar_dst->end_offset = cigar_length;
memcpy(cigar_dst->cigar_ops,cigar_ops,cigar_length);
}

int calculate_alignment_score(const wflign_cigar_t& cigar, const wflign_penalties_t& penalties) {
int score = 0;
char prev_op = '\0';
int gap_length = 0;

auto process_gap = [&](char op, int length) {
switch (op) {
case 'M':
// Match is free (best case)
break;
case 'X':
score += length * penalties.mismatch;
break;
case 'I':
case 'D':
score += penalties.gap_opening1 + penalties.gap_extension1;
if (length > 1) {
score += std::min(
penalties.gap_extension1 * (length - 1),
penalties.gap_opening2 + penalties.gap_extension2 * (length - 1)
);
}
break;
}
};

for (int i = cigar.begin_offset; i <= cigar.end_offset; ++i) {
char op = (i < cigar.end_offset) ? cigar.cigar_ops[i] : '\0'; // '\0' to process last gap

if (op != prev_op || i == cigar.end_offset) {
if (gap_length > 0) {
process_gap(prev_op, gap_length);
}
gap_length = 1;
} else {
++gap_length;
}

prev_op = op;
}

return score;
}

/*
// No more necessary
bool hack_cigar(wfa::cigar_t &cigar, const char *query, const char *target,
Expand Down
2 changes: 2 additions & 0 deletions src/common/wflign/src/wflign_alignment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,6 @@ void wflign_edit_cigar_copy(
wfa::WFAligner& wf_aligner,
wflign_cigar_t* const cigar_dst);

int calculate_alignment_score(const wflign_cigar_t& cigar, const wflign_penalties_t& penalties);

#endif /* WFLIGN_ALIGNMENT_HPP_ */
102 changes: 74 additions & 28 deletions src/common/wflign/src/wflign_patch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,14 +205,32 @@ void do_wfa_patch_alignment(
const int64_t& chain_gap,
const int& max_patching_score) {

const int max_score = max_patching_score ? max_patching_score :
const int max_score =
max_patching_score ? max_patching_score :
convex_penalties.gap_opening2 +
(convex_penalties.gap_extension1 * std::min(
(int)chain_gap,
(int)std::max(target_length, query_length)
)) + 64;

)) + 64;

/*
std::cerr << "doing wfa patch alignment with parameters:"
<< " query_length " << query_length
<< " target_length " << target_length
<< " chain_gap " << chain_gap
<< " max_patching_score " << max_patching_score
<< " max_score " << max_score
<< std::endl
<< "query " << std::string(query + j, query_length)
<< " target " << std::string(target + i, target_length)
<< std::endl;
*/

wf_aligner.setMaxAlignmentSteps(max_score);

int fwd_score = std::numeric_limits<int>::max();
int rev_score = std::numeric_limits<int>::max();

const int status = wf_aligner.alignEnd2End(target + i, target_length, query + j, query_length);
aln.ok = (status == WF_STATUS_ALG_COMPLETED);

Expand All @@ -234,36 +252,64 @@ void do_wfa_patch_alignment(
#endif

wflign_edit_cigar_copy(wf_aligner, &aln.edit_cigar);
fwd_score = calculate_alignment_score(aln.edit_cigar, convex_penalties);
//std::cerr << "forward score is " << fwd_score << std::endl;
}
// if the score is -int32 max, we can try to reverse complement the query
if (wf_aligner.getAlignmentScore() == -2147483648) {
// Try reverse complement alignment
std::string rev_comp_query = reverse_complement(std::string(query + j, query_length));
const int rev_status = wf_aligner.alignEnd2End(target + i, target_length, rev_comp_query.c_str(), query_length);
rev_aln.ok = (wf_aligner.getAlignmentScore() > -2147483648 && rev_status == WF_STATUS_ALG_COMPLETED);

if (rev_aln.ok) {
//std::cerr << "reverse complement alignment worked!" << std::endl;
// auto fwd_score = wf_aligner.getAlignmentScore(); // this is broken
// compute with
//int calculate_alignment_score(const char* cigar, const wflign_penalties_t& penalties) {
//if (wf_aligner.getAlignmentScore() == -2147483648) {
// Try reverse complement alignment
std::string rev_comp_query = reverse_complement(std::string(query + j, query_length));
const int rev_status = wf_aligner.alignEnd2End(target + i, target_length, rev_comp_query.c_str(), query_length);

//auto rev_score = wf_aligner.getAlignmentScore();
//rev_aln.ok = (rev_score > fwd_score && rev_status == WF_STATUS_ALG_COMPLETED);
rev_aln.ok = (rev_status == WF_STATUS_ALG_COMPLETED);

if (rev_aln.ok) {
//std::cerr << "reverse complement alignment worked!" << std::endl;
#ifdef VALIDATE_WFA_WFLIGN
if (!validate_cigar(wf_aligner.cigar, rev_comp_query.c_str(), target + i, query_length,
target_length, 0, 0)) {
std::cerr << "cigar failure at reverse complement alignment " << j << " " << i << std::endl;
unpack_display_cigar(wf_aligner.cigar, rev_comp_query.c_str(), target + i, query_length,
target_length, 0, 0);
std::cerr << ">query (reverse complement)" << std::endl
<< rev_comp_query << std::endl;
std::cerr << ">target" << std::endl
<< std::string(target + i, target_length) << std::endl;
assert(false);
}
if (!validate_cigar(wf_aligner.cigar, rev_comp_query.c_str(), target + i, query_length,
target_length, 0, 0)) {
std::cerr << "cigar failure at reverse complement alignment " << j << " " << i << std::endl;
unpack_display_cigar(wf_aligner.cigar, rev_comp_query.c_str(), target + i, query_length,
target_length, 0, 0);
std::cerr << ">query (reverse complement)" << std::endl
<< rev_comp_query << std::endl;
std::cerr << ">target" << std::endl
<< std::string(target + i, target_length) << std::endl;
assert(false);
}
#endif

wflign_edit_cigar_copy(wf_aligner, &rev_aln.edit_cigar);
rev_aln.j = j;
rev_aln.i = i;
rev_aln.query_length = query_length;
rev_aln.target_length = target_length;
}
wflign_edit_cigar_copy(wf_aligner, &rev_aln.edit_cigar);
rev_score = calculate_alignment_score(rev_aln.edit_cigar, convex_penalties);
//std::cerr << "reverse score is " << rev_score << std::endl;
rev_aln.j = j;
rev_aln.i = i;
rev_aln.query_length = query_length;
rev_aln.target_length = target_length;
}

if (rev_aln.ok && rev_score < fwd_score) {
rev_aln.ok = true;
aln.ok = false;
std::cerr << "got better score with reverse complement alignment" << std::endl
<< " query_length " << query_length
<< " target_length " << target_length
<< " chain_gap " << chain_gap
<< " max_patching_score " << max_patching_score
<< " max_score " << max_score
<< " fwd_score " << fwd_score
<< " rev_score " << rev_score
<< std::endl
<< "query " << std::string(query + j, query_length)
<< " target " << std::string(target + i, target_length)
<< std::endl;
} else {
rev_aln.ok = false;
}

aln.j = j;
Expand Down

0 comments on commit b663abe

Please sign in to comment.