Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make sure GAMP surjection detects when you have the wrong graph #4450

Merged
merged 1 commit into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/subcommand/surject_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,27 @@ static void ensure_alignment_is_for_graph(const Alignment& aln, const HandleGrap
}
}

/// If the given multipath alignment doesn't make sense against the given graph (i.e.
/// doesn't agree with the nodes in the graph), print a message and stop the
/// program. Is thread-safe.
static void ensure_alignment_is_for_graph(const MultipathAlignment& aln, const HandleGraph& graph) {
// For multipaht alignments we just check node existence.
for (auto& subpath : aln.subpath()) {
for (auto& mapping : subpath.path().mapping()) {
nid_t node_id = mapping.position().node_id();
if (!graph.has_node(node_id)) {
// Something is wrong with this alignment.
#pragma omp critical (cerr)
{
std::cerr << "error:[vg surject] MultipathAlignment " << aln.name() << " cannot be interpreted against this graph: node " << node_id << " does not exist!" << std::endl;
std::cerr << "Make sure that you are using the same graph that the reads were mapped to!" << std::endl;
}
exit(1);
}
}
}
}

int main_surject(int argc, char** argv) {

if (argc == 2) {
Expand Down Expand Up @@ -554,6 +575,11 @@ int main_surject(int argc, char** argv) {

exit(1);
}

if (validate) {
ensure_alignment_is_for_graph(src1, *xgidx);
ensure_alignment_is_for_graph(src2, *xgidx);
}

// convert out of protobuf
multipath_alignment_t mp_src1, mp_src2;
Expand Down Expand Up @@ -661,6 +687,10 @@ int main_surject(int argc, char** argv) {
watchdog->check_in(thread_num, src.name());
}

if (validate) {
ensure_alignment_is_for_graph(src, *xgidx);
}

multipath_alignment_t mp_src;
from_proto_multipath_alignment(src, mp_src);

Expand Down
9 changes: 8 additions & 1 deletion src/surjector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@
namespace vg {

using namespace std;


/**
* Widget to surject alignments down to linear paths in the graph.
*
* Assumes the alignments actually go with the graph; the caller is
* repsonsible for ensuring that e.g. all nodes referenced by the
* alignments actually exist.
*/
class Surjector : public AlignerClient {
public:

Expand Down
12 changes: 10 additions & 2 deletions test/t/15_vg_surject.t
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ BASH_TAP_ROOT=../deps/bash-tap
PATH=../bin:$PATH # for vg


plan tests 48
plan tests 52

vg construct -r small/x.fa >j.vg
vg index -x j.xg j.vg
Expand Down Expand Up @@ -196,7 +196,15 @@ is "$(vg surject -x x.xg -s -m -t 1 mapped.gamp | grep -v '@' | wc -l)" 40 "GAMP
is "$(vg surject -x x.xg -M -m -s -t 1 mapped.gamp | grep -v '@' | wc -l)" 80 "GAMP surject can return multimappings"
is "$(vg surject -x x.xg -M -m -s -i -t 1 mapped.gamp | grep -v '@' | wc -l)" 80 "GAMP surject can return multimappings"

rm x.vg x.pathdup.vg x.xg x.gcsa x.gcsa.lcp x.gam mapped.gam mapped.gamp
vg construct -r tiny/tiny.fa > tiny.vg
vg surject -x tiny.vg -s -t 1 mapped.gam >/dev/null 2>err.txt
is "${?}" "1" "Surjection fails when using the wrong graph for GAM"
is "$(cat err.txt | grep 'cannot be interpreted' | wc -l)" "1" "Surjection of GAM to the wrong graph reports the problem"
vg surject -x tiny.vg -s -t 1 -m mapped.gamp >/dev/null 2>err.txt
is "${?}" "1" "Surjection fails when using the wrong graph for GAMP"
is "$(cat err.txt | grep 'cannot be interpreted' | wc -l)" "1" "Surjection of GAMP to the wrong graph reports the problem"

rm x.vg x.pathdup.vg x.xg x.gcsa x.gcsa.lcp x.gam mapped.gam mapped.gamp tiny.vg err.txt

is "$(vg surject -p CHM13#0#chr8 -x surject/opposite_strands.gfa --prune-low-cplx --sam-output --gaf-input surject/opposite_strands.gaf | grep -v "^@" | cut -f3-12 | sort | uniq | wc -l)" "1" "vg surject low compelxity pruning gets the same alignment regardless of read orientation"

Loading