From f4b929d897613413f00825857764d7cb78bdbe7e Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Thu, 14 Nov 2024 14:13:35 -0600 Subject: [PATCH 1/3] feat: Change default overlap threshold to 1.0 with short-circuit optimization --- src/interface/parse_args.hpp | 2 +- src/map/include/filter.hpp | 44 ++++++++++++++++++++---------------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/interface/parse_args.hpp b/src/interface/parse_args.hpp index ec2cb2ab..b96a23f7 100644 --- a/src/interface/parse_args.hpp +++ b/src/interface/parse_args.hpp @@ -364,7 +364,7 @@ void parse_args(int argc, if (overlap_threshold) { map_parameters.overlap_threshold = args::get(overlap_threshold); } else { - map_parameters.overlap_threshold = 0.5; + map_parameters.overlap_threshold = 1.0; } if (kmer_size) { diff --git a/src/map/include/filter.hpp b/src/map/include/filter.hpp index d8b1c3c7..324a984a 100644 --- a/src/map/include/filter.hpp +++ b/src/map/include/filter.hpp @@ -112,16 +112,19 @@ namespace skch } auto kit = it; - // Check for overlaps and mark bad if necessary - for ( ; it != L.end(); it++) { - if (it == L.begin()) continue; - int idx = *it; - for (auto it2 = L.begin(); it2 != kit; it2++) { - double overlap = get_overlap(idx, *it2); - if (overlap > overlapThreshold) { - vec[idx].overlapped = 1; // Mark as bad if it overlaps >50% with the best mapping - vec[idx].discard = 1; - break; + // Skip overlap checking if threshold is 1.0 (allow all overlaps) + if (overlapThreshold < 1.0) { + // Check for overlaps and mark bad if necessary + for ( ; it != L.end(); it++) { + if (it == L.begin()) continue; + int idx = *it; + for (auto it2 = L.begin(); it2 != kit; it2++) { + double overlap = get_overlap(idx, *it2); + if (overlap > overlapThreshold) { + vec[idx].overlapped = 1; // Mark as bad if overlaps more than threshold + vec[idx].discard = 1; + break; + } } } } @@ -393,15 +396,18 @@ namespace skch } auto kit = it; - // Check for overlaps and mark bad if necessary - for ( ; it != L.end(); it++) { - if (it == L.begin()) continue; - int idx = *it; - for (auto it2 = L.begin(); it2 != kit; it2++) { - if (get_overlap(idx, *it2) > overlapThreshold) { - vec[idx].overlapped = 1; // Mark as bad if it overlaps >50% with the best mapping - vec[idx].discard = 1; - break; + // Skip overlap checking if threshold is 1.0 (allow all overlaps) + if (overlapThreshold < 1.0) { + // Check for overlaps and mark bad if necessary + for ( ; it != L.end(); it++) { + if (it == L.begin()) continue; + int idx = *it; + for (auto it2 = L.begin(); it2 != kit; it2++) { + if (get_overlap(idx, *it2) > overlapThreshold) { + vec[idx].overlapped = 1; // Mark as bad if overlaps more than threshold + vec[idx].discard = 1; + break; + } } } } From 09e4c08751a8666246a9d360e67dd86bda75224b Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Thu, 14 Nov 2024 14:16:57 -0600 Subject: [PATCH 2/3] refactor: Update overlap threshold description and default value --- src/interface/parse_args.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interface/parse_args.hpp b/src/interface/parse_args.hpp index b96a23f7..b0752655 100644 --- a/src/interface/parse_args.hpp +++ b/src/interface/parse_args.hpp @@ -93,7 +93,7 @@ void parse_args(int argc, args::Flag no_split(mapping_opts, "no-split", "map each sequence in one piece", {'N',"no-split"}); args::ValueFlag chain_gap(mapping_opts, "INT", "chain gap: max distance to chain mappings [2k]", {'c', "chain-gap"}); args::ValueFlag max_mapping_length(mapping_opts, "INT", "target mapping length [50k]", {'P', "max-length"}); - args::ValueFlag overlap_threshold(mapping_opts, "FLOAT", "maximum mapping overlap fraction [0.5]", {'O', "overlap"}); + args::ValueFlag overlap_threshold(mapping_opts, "FLOAT", "maximum overlap with better mappings (1.0=keep all) [1.0]", {'O', "overlap"}); args::Flag no_filter(mapping_opts, "", "disable mapping filtering", {'f', "no-filter"}); args::Flag no_merge(mapping_opts, "", "disable merging of consecutive mappings", {'M', "no-merge"}); args::ValueFlag kmer_complexity(mapping_opts, "FLOAT", "minimum k-mer complexity threshold", {'J', "kmer-cmplx"}); From 12457f5f1d0cb3e58c6af35455961743a656c3fe Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Thu, 14 Nov 2024 14:19:36 -0600 Subject: [PATCH 3/3] style: Shorten "maximum" to "max" in overlap threshold help text --- src/interface/parse_args.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interface/parse_args.hpp b/src/interface/parse_args.hpp index b0752655..b9ac224d 100644 --- a/src/interface/parse_args.hpp +++ b/src/interface/parse_args.hpp @@ -93,7 +93,7 @@ void parse_args(int argc, args::Flag no_split(mapping_opts, "no-split", "map each sequence in one piece", {'N',"no-split"}); args::ValueFlag chain_gap(mapping_opts, "INT", "chain gap: max distance to chain mappings [2k]", {'c', "chain-gap"}); args::ValueFlag max_mapping_length(mapping_opts, "INT", "target mapping length [50k]", {'P', "max-length"}); - args::ValueFlag overlap_threshold(mapping_opts, "FLOAT", "maximum overlap with better mappings (1.0=keep all) [1.0]", {'O', "overlap"}); + args::ValueFlag overlap_threshold(mapping_opts, "FLOAT", "max overlap with better mappings (1.0=keep all) [1.0]", {'O', "overlap"}); args::Flag no_filter(mapping_opts, "", "disable mapping filtering", {'f', "no-filter"}); args::Flag no_merge(mapping_opts, "", "disable merging of consecutive mappings", {'M', "no-merge"}); args::ValueFlag kmer_complexity(mapping_opts, "FLOAT", "minimum k-mer complexity threshold", {'J', "kmer-cmplx"});