Skip to content

Commit

Permalink
Merge pull request #246 from waveygang/forcably-biwfa
Browse files Browse the repository at this point in the history
optionally force global biwfa alignment of mappings
  • Loading branch information
ekg authored Jun 2, 2024
2 parents 35194d8 + 3e434f8 commit bb0d43d
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 1 deletion.
78 changes: 78 additions & 0 deletions scripts/make_align_test_case.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/bash

# Function to display usage
usage() {
echo "Usage: $0 -p <paf_file> -f <fasta_file> -o <output_prefix>"
echo " -p, --paf PAF file"
echo " -f, --fasta FASTA file"
echo " -o, --output Output prefix for the new FASTA and adjusted PAF files"
exit 1
}

# Parse command-line arguments
PARSED_ARGUMENTS=$(getopt -a -n "$0" -o p:f:o: --long paf:,fasta:,output: -- "$@")
VALID_ARGUMENTS=$?
if [ "$VALID_ARGUMENTS" != "0" ]; then
usage
fi

eval set -- "$PARSED_ARGUMENTS"
while :
do
case "$1" in
-p | --paf) PAF_FILE="$2" ; shift 2 ;;
-f | --fasta) FASTA_FILE="$2" ; shift 2 ;;
-o | --output) OUTPUT_PREFIX="$2" ; shift 2 ;;
--) shift ; break ;;
*) usage ;;
esac
done

# Validate arguments
if [ -z "$PAF_FILE" ] || [ -z "$FASTA_FILE" ] || [ -z "$OUTPUT_PREFIX" ]; then
usage
fi

OUTPUT_FASTA="${OUTPUT_PREFIX}.fa"
OUTPUT_PAF="${OUTPUT_PREFIX}.paf"

# Extract the first alignment from the PAF file
read -r line < "$PAF_FILE"

# Parse the alignment fields
IFS=$'\t' read -r -a fields <<< "$line"

# Extract query and target information
query_name="${fields[0]}"
query_start="${fields[2]}"
query_end="${fields[3]}"
target_name="${fields[5]}"
target_start="${fields[7]}"
target_end="${fields[8]}"

# Extract sequences using samtools faidx
samtools faidx "$FASTA_FILE" "$query_name:$query_start-$query_end" > "$OUTPUT_FASTA"
samtools faidx "$FASTA_FILE" "$target_name:$target_start-$target_end" >> "$OUTPUT_FASTA"

qname=$(grep ^">" "$OUTPUT_FASTA" | head -n 1 | sed 's/^>//')
tname=$(grep ^">" "$OUTPUT_FASTA" | tail -n 1 | sed 's/^>//')

# Adjust the PAF file
awk -v qname=$qname \
-v tname=$tname \
'{
if (NF > 0) {
query_length = $4 - $3;
target_length = $9 - $8;
$1 = qname;
$6 = tname;
$2 = query_length;
$3 = 0;
$4 = query_length;
$7 = target_length;
$8 = 0;
$9 = target_length;
print $0;
}
}' "$PAF_FILE" | tr ' ' '\t' > "$OUTPUT_PAF"

2 changes: 2 additions & 0 deletions src/align/include/align_parameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ struct Parameters {
//wflambda
uint16_t wflambda_segment_length; //segment length for wflambda

bool force_biwfa_alignment; //force biwfa alignment

int wfa_mismatch_score;
int wfa_gap_opening_score;
int wfa_gap_extension_score;
Expand Down
1 change: 1 addition & 0 deletions src/align/include/computeAlignments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,7 @@ namespace align
wflign::wavefront::WFlign* wflign = new wflign::wavefront::WFlign(
param.wflambda_segment_length,
param.min_identity,
param.force_biwfa_alignment,
param.wfa_mismatch_score,
param.wfa_gap_opening_score,
param.wfa_gap_extension_score,
Expand Down
5 changes: 4 additions & 1 deletion src/common/wflign/src/wflign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ void clean_up_sketches(std::vector<std::vector<rkmh::hash_t>*> &sketches) {
WFlign::WFlign(
const uint16_t segment_length,
const float min_identity,
const bool force_biwfa_alignment,
const int wfa_mismatch_score,
const int wfa_gap_opening_score,
const int wfa_gap_extension_score,
Expand All @@ -66,6 +67,8 @@ WFlign::WFlign(
this->segment_length = segment_length;
this->min_identity = min_identity;

this->force_biwfa_alignment = force_biwfa_alignment;

this->wfa_mismatch_score = wfa_mismatch_score;
this->wfa_gap_opening_score = wfa_gap_opening_score;
this->wfa_gap_extension_score = wfa_gap_extension_score;
Expand Down Expand Up @@ -436,7 +439,7 @@ void WFlign::wflign_affine_wavefront(
#ifdef WFA_PNG_TSV_TIMING
const auto start_time = std::chrono::steady_clock::now();
#endif
if (
if (force_biwfa_alignment ||
(query_length <= segment_length * 8 || target_length <= segment_length * 8) ||
(mashmap_estimated_identity >= 0.99
&& query_length <= MAX_LEN_FOR_STANDARD_WFA && target_length <= MAX_LEN_FOR_STANDARD_WFA)
Expand Down
2 changes: 2 additions & 0 deletions src/common/wflign/src/wflign.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,12 @@ namespace wflign {
bool emit_md_tag;
bool paf_format_else_sam;
bool no_seq_in_sam;
bool force_biwfa_alignment;
// Setup
WFlign(
const uint16_t segment_length,
const float min_identity,
const bool force_biwfa_alignment,
const int wfa_mismatch_score,
const int wfa_gap_opening_score,
const int wfa_gap_extension_score,
Expand Down
2 changes: 2 additions & 0 deletions src/interface/parse_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ void parse_args(int argc,

args::Group alignment_opts(parser, "[ Alignment Options ]");
args::ValueFlag<std::string> align_input_paf(alignment_opts, "FILE", "derive precise alignments for this input PAF", {'i', "input-paf"});
args::Flag force_biwfa_alignment(alignment_opts, "force-biwfa", "force alignment with biWFA for all sequence pairs", {'I', "force-biwfa"});
args::Flag invert_filtering(alignment_opts, "A", "if an input PAF is specified, remove alignments with gap-compressed identity below --map-pct-id x 0.8, else keep all alignments "
"[default: if an input PAF is specified, keep all alignments, else remove alignments with gap-compressed identity below --map-pct-id x 0.8]",
{'O', "invert-filtering"});
Expand Down Expand Up @@ -365,6 +366,7 @@ void parse_args(int argc,
align_parameters.emit_md_tag = args::get(emit_md_tag);
align_parameters.sam_format = args::get(sam_format);
align_parameters.no_seq_in_sam = args::get(no_seq_in_sam);
align_parameters.force_biwfa_alignment = args::get(force_biwfa_alignment);
map_parameters.split = !args::get(no_split);
map_parameters.dropRand = false;//ToFix: !args::get(keep_ties);
align_parameters.split = !args::get(no_split);
Expand Down

0 comments on commit bb0d43d

Please sign in to comment.