Skip to content

Commit

Permalink
Merge pull request #4453 from xchang1/ziptree-bug
Browse files Browse the repository at this point in the history
Ziptree bug
  • Loading branch information
xchang1 authored Nov 24, 2024
2 parents b05cbd3 + b25d730 commit 0303290
Show file tree
Hide file tree
Showing 4 changed files with 461 additions and 49 deletions.
2 changes: 2 additions & 0 deletions src/snarl_distance_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ using namespace sdsl;
using namespace handlegraph;
using namespace bdsg;

//TODO: If anyone ever remakes the distance index, it would be really helpful for the multicomponent chains to know the lengths of each component

//Minimum distance taking a pos instead of id/orientation/offset
size_t minimum_distance(const SnarlDistanceIndex& distance_index, pos_t pos1, pos_t pos2,
bool unoriented_distance = false, const HandleGraph* graph=nullptr);
Expand Down
345 changes: 344 additions & 1 deletion src/unittest/zip_code_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1224,6 +1224,256 @@ namespace unittest {
zip_forest.validate_zip_forest(distance_index, &seeds, 4);
}
}
TEST_CASE( "zip tree bubble nested in inversion", "[zip_tree]" ) {

VG graph;

Node* n1 = graph.create_node("GCAAAAAAAAAA");
Node* n2 = graph.create_node("GCAA");
Node* n3 = graph.create_node("GCAG");
Node* n4 = graph.create_node("GCA");
Node* n5 = graph.create_node("GCAAAAAAAAAAAAAAA");

Edge* e1 = graph.create_edge(n1, n2);
Edge* e2 = graph.create_edge(n1, n4, false, true);
Edge* e3 = graph.create_edge(n2, n3);
Edge* e4 = graph.create_edge(n2, n4);
Edge* e5 = graph.create_edge(n2, n5, true, false);
Edge* e6 = graph.create_edge(n3, n4);
Edge* e7 = graph.create_edge(n4, n5);

IntegratedSnarlFinder snarl_finder(graph);
SnarlDistanceIndex distance_index;
fill_in_distance_index(&distance_index, &graph, &snarl_finder);



//graph.to_dot(cerr);

SECTION( "Traverse nested chain forwards but the orientation of the chain is backwards in the snarl tree" ) {

vector<std::pair<pos_t, size_t>> positions;
positions.emplace_back(make_pos_t(1, false, 0), 0);
positions.emplace_back(make_pos_t(1, false, 5), 5);
positions.emplace_back(make_pos_t(2, false, 0), 7);
positions.emplace_back(make_pos_t(3, false, 0), 9);
positions.emplace_back(make_pos_t(4, false, 0), 12);
positions.emplace_back(make_pos_t(5, false, 0), 15);
positions.emplace_back(make_pos_t(5, false, 4), 19);
//all are in the same cluster
vector<SnarlDistanceIndexClusterer::Seed> seeds;
vector<MinimizerMapper::Minimizer> minimizers;
for (size_t i= 0 ; i < positions.size() ; i++ ) {
auto pos = positions[i];
ZipCode zipcode;
zipcode.fill_in_zipcode(distance_index, pos.first);
zipcode.fill_in_full_decoder();
seeds.push_back({ pos.first, i, zipcode});

minimizers.emplace_back();
minimizers.back().value.offset = pos.second;
minimizers.back().value.is_reverse = false;
}

VectorView<MinimizerMapper::Minimizer> minimizer_vector(minimizers);


ZipCodeForest zip_forest;
zip_forest.fill_in_forest(seeds, minimizer_vector, distance_index, std::numeric_limits<size_t>::max());
REQUIRE(zip_forest.trees.size() == 1);
zip_forest.print_self(&seeds, &minimizer_vector);
zip_forest.validate_zip_forest(distance_index, &seeds);


bool chain_is_reversed = distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) !=
distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n2->id()));
if (chain_is_reversed) {

vector<size_t> seed_order;
for (size_t i = 0 ; i < zip_forest.trees[0].get_tree_size() ; i++) {
if (zip_forest.trees[0].get_item_at_index(i).get_type() == ZipCodeTree::SEED) {
seed_order.emplace_back(zip_forest.trees[0].get_item_at_index(i).get_value());
}
}
//The seeds should be in the same order as the original list of seeds, but the orientation depends on the orientation of the top-level chain so either way is fine
if (seed_order.front() == 0) {
for (size_t i = 0 ; i < seed_order.size() ; i++) {
REQUIRE(seed_order[i] == i);
}
} else if (seed_order.front() == 5) {
for (size_t i = 0 ; i < seed_order.size() ; i++) {
REQUIRE(seed_order[i] == 5-i);
}
} else {
REQUIRE((seed_order.front() == 0 || seed_order.front() == 5));
}
} else {
//This unit test is for testing the nested chain going backwards so if it isn't it should be rewritten
//Chain 2->4 should be traversed backwards in the snarl tree
//It doesn't matter which direction chain 1->5 is going
cerr << "This test isn't testing the thing its supposed to test because the snarl finder put the chain in a different orientation. So it should probably be rewritten" << endl;
}

}
SECTION( "Traverse nested chain backwards but the orientation of the chain is backwards in the snarl tree" ) {

vector<std::pair<pos_t, size_t>> positions;
positions.emplace_back(make_pos_t(1, false, 0), 0);
positions.emplace_back(make_pos_t(1, false, 5), 5);
positions.emplace_back(make_pos_t(4, false, 0), 7);
positions.emplace_back(make_pos_t(3, false, 0), 9);
positions.emplace_back(make_pos_t(2, false, 0), 12);
positions.emplace_back(make_pos_t(5, false, 0), 15);
positions.emplace_back(make_pos_t(5, false, 4), 19);
//all are in the same cluster
vector<SnarlDistanceIndexClusterer::Seed> seeds;
vector<MinimizerMapper::Minimizer> minimizers;
for (size_t i= 0 ; i < positions.size() ; i++ ) {
auto pos = positions[i];
ZipCode zipcode;
zipcode.fill_in_zipcode(distance_index, pos.first);
zipcode.fill_in_full_decoder();
seeds.push_back({ pos.first, i, zipcode});

minimizers.emplace_back();
minimizers.back().value.offset = pos.second;
minimizers.back().value.is_reverse = false;
}

VectorView<MinimizerMapper::Minimizer> minimizer_vector(minimizers);


ZipCodeForest zip_forest;
zip_forest.fill_in_forest(seeds, minimizer_vector, distance_index, std::numeric_limits<size_t>::max());
REQUIRE(zip_forest.trees.size() == 1);
zip_forest.print_self(&seeds, &minimizer_vector);
zip_forest.validate_zip_forest(distance_index, &seeds);


bool chain_is_reversed = distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) !=
distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n2->id()));
if (chain_is_reversed) {

vector<size_t> seed_order;
for (size_t i = 0 ; i < zip_forest.trees[0].get_tree_size() ; i++) {
if (zip_forest.trees[0].get_item_at_index(i).get_type() == ZipCodeTree::SEED) {
seed_order.emplace_back(zip_forest.trees[0].get_item_at_index(i).get_value());
}
}
//The seeds should be in the same order as the original list of seeds, but the orientation depends on the orientation of the top-level chain so either way is fine
if (seed_order.front() == 0) {
for (size_t i = 0 ; i < seed_order.size() ; i++) {
REQUIRE(seed_order[i] == i);
}
} else if (seed_order.front() == 5) {
for (size_t i = 0 ; i < seed_order.size() ; i++) {
REQUIRE(seed_order[i] == 5-i);
}
} else {
REQUIRE((seed_order.front() == 0 || seed_order.front() == 5));
}
} else {
//This unit test is for testing the nested chain going backwards so if it isn't it should be rewritten
//Chain 2->4 should be traversed backwards in the snarl tree
//It doesn't matter which direction chain 1->5 is going
cerr << "This test isn't testing the thing its supposed to test because the snarl finder put the chain in a different orientation. So it should probably be rewritten" << endl;
}

}

}
TEST_CASE( "zip tree bubble nested in cyclic snarl", "[zip_tree]" ) {

VG graph;

Node* n1 = graph.create_node("GCAAAAAAAAAA");
Node* n2 = graph.create_node("GCAA");
Node* n3 = graph.create_node("GCAG");
Node* n4 = graph.create_node("GCA");
Node* n5 = graph.create_node("GCAAAAAAAAAAAAAAA");

Edge* e1 = graph.create_edge(n1, n2);
Edge* e2 = graph.create_edge(n1, n5);
Edge* e3 = graph.create_edge(n2, n3);
Edge* e4 = graph.create_edge(n2, n4);
Edge* e5 = graph.create_edge(n5, n5, true, false);
Edge* e6 = graph.create_edge(n3, n4);
Edge* e7 = graph.create_edge(n4, n5);

IntegratedSnarlFinder snarl_finder(graph);
SnarlDistanceIndex distance_index;
fill_in_distance_index(&distance_index, &graph, &snarl_finder);



//graph.to_dot(cerr);

SECTION( "Traverse nested chain forwards but the orientation of the chain is backwards in the snarl tree" ) {

vector<std::pair<pos_t, size_t>> positions;
positions.emplace_back(make_pos_t(1, false, 0), 0);
positions.emplace_back(make_pos_t(1, false, 5), 5);
positions.emplace_back(make_pos_t(2, false, 0), 7);
positions.emplace_back(make_pos_t(3, false, 0), 9);
positions.emplace_back(make_pos_t(4, false, 0), 12);
positions.emplace_back(make_pos_t(5, false, 0), 15);
positions.emplace_back(make_pos_t(5, false, 4), 19);
//all are in the same cluster
vector<SnarlDistanceIndexClusterer::Seed> seeds;
vector<MinimizerMapper::Minimizer> minimizers;
for (size_t i= 0 ; i < positions.size() ; i++ ) {
auto pos = positions[i];
ZipCode zipcode;
zipcode.fill_in_zipcode(distance_index, pos.first);
zipcode.fill_in_full_decoder();
seeds.push_back({ pos.first, i, zipcode});

minimizers.emplace_back();
minimizers.back().value.offset = pos.second;
minimizers.back().value.is_reverse = false;
}

VectorView<MinimizerMapper::Minimizer> minimizer_vector(minimizers);


ZipCodeForest zip_forest;
zip_forest.fill_in_forest(seeds, minimizer_vector, distance_index, std::numeric_limits<size_t>::max());
REQUIRE(zip_forest.trees.size() == 1);
zip_forest.print_self(&seeds, &minimizer_vector);
zip_forest.validate_zip_forest(distance_index, &seeds);


bool chain_is_reversed = distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) !=
distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n2->id()));
if (chain_is_reversed) {

vector<size_t> seed_order;
for (size_t i = 0 ; i < zip_forest.trees[0].get_tree_size() ; i++) {
if (zip_forest.trees[0].get_item_at_index(i).get_type() == ZipCodeTree::SEED) {
seed_order.emplace_back(zip_forest.trees[0].get_item_at_index(i).get_value());
}
}
//The seeds should be in the same order as the original list of seeds, but the orientation depends on the orientation of the top-level chain so either way is fine
if (seed_order.front() == 0) {
for (size_t i = 0 ; i < seed_order.size() ; i++) {
REQUIRE(seed_order[i] == i);
}
} else if (seed_order.front() == 5) {
for (size_t i = 0 ; i < seed_order.size() ; i++) {
REQUIRE(seed_order[i] == 5-i);
}
} else {
REQUIRE((seed_order.front() == 0 || seed_order.front() == 5));
}
} else {
//This unit test is for testing the nested chain going backwards so if it isn't it should be rewritten
//Chain 2->4 should be traversed backwards in the snarl tree
//It doesn't matter which direction chain 1->5 is going
cerr << "This test isn't testing the thing its supposed to test because the snarl finder put the chain in a different orientation. So it should probably be rewritten" << endl;
}

}
}
TEST_CASE( "zip tree snarl with inversion", "[zip_tree]" ) {

VG graph;
Expand Down Expand Up @@ -2953,9 +3203,102 @@ namespace unittest {
zip_forest.validate_zip_forest(dist_index, &seeds, 100);
}
}
TEST_CASE( "zipcode tree multicomponent chain nested in irregular snarl",
"[zip_tree]" ) {
VG graph;

Node* n1 = graph.create_node("GCAAAAAAAAAAAAAAAAAAAAAAAAA");
Node* n2 = graph.create_node("T");
Node* n3 = graph.create_node("G");
Node* n4 = graph.create_node("CTGA");
Node* n5 = graph.create_node("GCA");
Node* n6 = graph.create_node("T");
Node* n7 = graph.create_node("T");
Node* n8 = graph.create_node("TTTTTTTTT");
Node* n9 = graph.create_node("TTTTTTTTT");
Node* n10 = graph.create_node("GCAAAAAAAAAAAAA");
Node* n11 = graph.create_node("TTT");
Node* n12 = graph.create_node("GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG");
Node* n13 = graph.create_node("GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG");

Edge* e1 = graph.create_edge(n1, n2);
Edge* e2 = graph.create_edge(n1, n12);
Edge* e3 = graph.create_edge(n2, n3);
Edge* e4 = graph.create_edge(n2, n10);
Edge* e5 = graph.create_edge(n3, n4);
Edge* e6 = graph.create_edge(n3, n5);
Edge* e7 = graph.create_edge(n5, n6);
Edge* e8 = graph.create_edge(n6, n7, true, false);
Edge* e9 = graph.create_edge(n7, n8);
Edge* e10 = graph.create_edge(n7, n9);
Edge* e11 = graph.create_edge(n8, n9);
Edge* e12 = graph.create_edge(n9, n11);
Edge* e13 = graph.create_edge(n10, n11);
Edge* e14 = graph.create_edge(n10, n10, false, true);
Edge* e15 = graph.create_edge(n11, n12);
Edge* e16 = graph.create_edge(n12, n13);

IntegratedSnarlFinder snarl_finder(graph);
SnarlDistanceIndex dist_index;
fill_in_distance_index(&dist_index, &graph, &snarl_finder);

//graph.to_dot(cerr);

SECTION( "Cross unreachable chain" ) {

vector<pair<pos_t, size_t>> positions;
positions.emplace_back(make_pos_t(n3->id(), false, 0), 0);
positions.emplace_back(make_pos_t(n4->id(), false, 0), 0);
positions.emplace_back(make_pos_t(n5->id(), false, 1), 1);
positions.emplace_back(make_pos_t(n6->id(), false, 0), 2);
positions.emplace_back(make_pos_t(n7->id(), false, 0), 3);
positions.emplace_back(make_pos_t(n8->id(), false, 0), 4);
positions.emplace_back(make_pos_t(n9->id(), false, 0), 5);

vector<SnarlDistanceIndexClusterer::Seed> seeds;
vector<MinimizerMapper::Minimizer> minimizers;

for (size_t i = 0 ; i < positions.size() ; ++i) {
auto pos = positions[i];
ZipCode zipcode;
zipcode.fill_in_zipcode(dist_index, pos.first);
zipcode.fill_in_full_decoder();
seeds.push_back({ pos.first, i, zipcode});

minimizers.emplace_back();
minimizers.back().value.offset = pos.second;
minimizers.back().value.is_reverse = false;
}
VectorView<MinimizerMapper::Minimizer> minimizer_vector(minimizers);


ZipCodeForest zip_forest;
zip_forest.fill_in_forest(seeds, minimizer_vector, dist_index, 100, 100);
zip_forest.print_self(&seeds, &minimizer_vector);
zip_forest.validate_zip_forest(dist_index, &seeds, 100);
vector<size_t> seed_order;
for (size_t i = 0 ; i < zip_forest.trees[0].get_tree_size() ; i++) {
if (zip_forest.trees[0].get_item_at_index(i).get_type() == ZipCodeTree::SEED) {
seed_order.emplace_back(zip_forest.trees[0].get_item_at_index(i).get_value());
}
}
//The seeds should be in order of the chain, which is the order I put them in
if (seed_order.front() == 0) {
for (size_t i = 0 ; i < seed_order.size() ; i++) {
REQUIRE(seed_order[i] == i);
}
} else if (seed_order.front() == 6) {
for (size_t i = 0 ; i < seed_order.size() ; i++) {
REQUIRE(seed_order[i] == 6-i);
}
} else {
REQUIRE((seed_order.front() == 0 || seed_order.front() == 6));
}
}
}

//TODO: we can't deal with this properly yet
//TEST_CASE( "Looping chain zipcode tree", "[zip_tree][bug]" ) {
//TEST_CASE( "Looping chain zipcode tree", "[zip_tree]" ) {
// //TODO: This might change but it's a chain 2rev->2rev
// VG graph;

Expand Down
Loading

1 comment on commit 0303290

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch lr-giraffe. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 25106 seconds

Please sign in to comment.