Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optionally force global biwfa alignment of mappings #246

Merged
merged 2 commits into from
Jun 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions scripts/make_align_test_case.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/bash

# Function to display usage
usage() {
echo "Usage: $0 -p <paf_file> -f <fasta_file> -o <output_prefix>"
echo " -p, --paf PAF file"
echo " -f, --fasta FASTA file"
echo " -o, --output Output prefix for the new FASTA and adjusted PAF files"
exit 1
}

# Parse command-line arguments
PARSED_ARGUMENTS=$(getopt -a -n "$0" -o p:f:o: --long paf:,fasta:,output: -- "$@")
VALID_ARGUMENTS=$?
if [ "$VALID_ARGUMENTS" != "0" ]; then
usage
fi

eval set -- "$PARSED_ARGUMENTS"
while :
do
case "$1" in
-p | --paf) PAF_FILE="$2" ; shift 2 ;;
-f | --fasta) FASTA_FILE="$2" ; shift 2 ;;
-o | --output) OUTPUT_PREFIX="$2" ; shift 2 ;;
--) shift ; break ;;
*) usage ;;
esac
done

# Validate arguments
if [ -z "$PAF_FILE" ] || [ -z "$FASTA_FILE" ] || [ -z "$OUTPUT_PREFIX" ]; then
usage
fi

OUTPUT_FASTA="${OUTPUT_PREFIX}.fa"
OUTPUT_PAF="${OUTPUT_PREFIX}.paf"

# Extract the first alignment from the PAF file
read -r line < "$PAF_FILE"

# Parse the alignment fields
IFS=$'\t' read -r -a fields <<< "$line"

# Extract query and target information
query_name="${fields[0]}"
query_start="${fields[2]}"
query_end="${fields[3]}"
target_name="${fields[5]}"
target_start="${fields[7]}"
target_end="${fields[8]}"

# Extract sequences using samtools faidx
samtools faidx "$FASTA_FILE" "$query_name:$query_start-$query_end" > "$OUTPUT_FASTA"
samtools faidx "$FASTA_FILE" "$target_name:$target_start-$target_end" >> "$OUTPUT_FASTA"

qname=$(grep ^">" "$OUTPUT_FASTA" | head -n 1 | sed 's/^>//')
tname=$(grep ^">" "$OUTPUT_FASTA" | tail -n 1 | sed 's/^>//')

# Adjust the PAF file
awk -v qname=$qname \
-v tname=$tname \
'{
if (NF > 0) {
query_length = $4 - $3;
target_length = $9 - $8;
$1 = qname;
$6 = tname;
$2 = query_length;
$3 = 0;
$4 = query_length;
$7 = target_length;
$8 = 0;
$9 = target_length;
print $0;
}
}' "$PAF_FILE" | tr ' ' '\t' > "$OUTPUT_PAF"

2 changes: 2 additions & 0 deletions src/align/include/align_parameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ struct Parameters {
//wflambda
uint16_t wflambda_segment_length; //segment length for wflambda

bool force_biwfa_alignment; //force biwfa alignment

int wfa_mismatch_score;
int wfa_gap_opening_score;
int wfa_gap_extension_score;
Expand Down
1 change: 1 addition & 0 deletions src/align/include/computeAlignments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,7 @@ namespace align
wflign::wavefront::WFlign* wflign = new wflign::wavefront::WFlign(
param.wflambda_segment_length,
param.min_identity,
param.force_biwfa_alignment,
param.wfa_mismatch_score,
param.wfa_gap_opening_score,
param.wfa_gap_extension_score,
Expand Down
5 changes: 4 additions & 1 deletion src/common/wflign/src/wflign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ void clean_up_sketches(std::vector<std::vector<rkmh::hash_t>*> &sketches) {
WFlign::WFlign(
const uint16_t segment_length,
const float min_identity,
const bool force_biwfa_alignment,
const int wfa_mismatch_score,
const int wfa_gap_opening_score,
const int wfa_gap_extension_score,
Expand All @@ -66,6 +67,8 @@ WFlign::WFlign(
this->segment_length = segment_length;
this->min_identity = min_identity;

this->force_biwfa_alignment = force_biwfa_alignment;

this->wfa_mismatch_score = wfa_mismatch_score;
this->wfa_gap_opening_score = wfa_gap_opening_score;
this->wfa_gap_extension_score = wfa_gap_extension_score;
Expand Down Expand Up @@ -436,7 +439,7 @@ void WFlign::wflign_affine_wavefront(
#ifdef WFA_PNG_TSV_TIMING
const auto start_time = std::chrono::steady_clock::now();
#endif
if (
if (force_biwfa_alignment ||
(query_length <= segment_length * 8 || target_length <= segment_length * 8) ||
(mashmap_estimated_identity >= 0.99
&& query_length <= MAX_LEN_FOR_STANDARD_WFA && target_length <= MAX_LEN_FOR_STANDARD_WFA)
Expand Down
2 changes: 2 additions & 0 deletions src/common/wflign/src/wflign.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,12 @@ namespace wflign {
bool emit_md_tag;
bool paf_format_else_sam;
bool no_seq_in_sam;
bool force_biwfa_alignment;
// Setup
WFlign(
const uint16_t segment_length,
const float min_identity,
const bool force_biwfa_alignment,
const int wfa_mismatch_score,
const int wfa_gap_opening_score,
const int wfa_gap_extension_score,
Expand Down
2 changes: 2 additions & 0 deletions src/interface/parse_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ void parse_args(int argc,

args::Group alignment_opts(parser, "[ Alignment Options ]");
args::ValueFlag<std::string> align_input_paf(alignment_opts, "FILE", "derive precise alignments for this input PAF", {'i', "input-paf"});
args::Flag force_biwfa_alignment(alignment_opts, "force-biwfa", "force alignment with biWFA for all sequence pairs", {'I', "force-biwfa"});
args::Flag invert_filtering(alignment_opts, "A", "if an input PAF is specified, remove alignments with gap-compressed identity below --map-pct-id x 0.8, else keep all alignments "
"[default: if an input PAF is specified, keep all alignments, else remove alignments with gap-compressed identity below --map-pct-id x 0.8]",
{'O', "invert-filtering"});
Expand Down Expand Up @@ -365,6 +366,7 @@ void parse_args(int argc,
align_parameters.emit_md_tag = args::get(emit_md_tag);
align_parameters.sam_format = args::get(sam_format);
align_parameters.no_seq_in_sam = args::get(no_seq_in_sam);
align_parameters.force_biwfa_alignment = args::get(force_biwfa_alignment);
map_parameters.split = !args::get(no_split);
map_parameters.dropRand = false;//ToFix: !args::get(keep_ties);
align_parameters.split = !args::get(no_split);
Expand Down
Loading