-
Notifications
You must be signed in to change notification settings - Fork 3
/
UnitigGraph.h
171 lines (149 loc) · 6.38 KB
/
UnitigGraph.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#ifndef UNIG_H
#define UNIG_H
#include <boost/graph/graphviz.hpp>
#include <boost/graph/copy.hpp>
#include <boost/graph/connected_components.hpp>
#include <boost/graph/graph_traits.hpp>
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/subgraph.hpp>
#include <boost/config.hpp>
#include <boost/graph/graph_utility.hpp>
#include <boost/graph/filtered_graph.hpp>
#include <boost/range/combine.hpp>
#include "deBruijnGraph.h"
#include <unordered_set>
#include <algorithm> // heap
#include <queue>
struct EdgeProperties;
struct VertexProperties;
struct Capacity {
float first;
float last;
float min;
float max;
float avg;
float length;
friend std::ostream& operator<<(std::ostream& os, const Capacity& cap)
{
return os << "(" << cap.avg << ", " << cap.first << "/" << cap.last << ", (length: " << cap.length << "), (min: " << cap.min << ", max: " << cap.max << ")";
}
};
typedef boost::adjacency_list<boost::listS, boost::listS, boost::bidirectionalS,
VertexProperties,
EdgeProperties> UGraph;
typedef typename boost::graph_traits<UGraph>::vertex_descriptor UVertex;
typedef typename boost::graph_traits<UGraph>::edge_descriptor UEdge;
typedef boost::graph_traits<UGraph>::vertex_iterator uvertex_iter;
// output operator for std::vector
template<class T>
inline std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
{
os << "[ ";
for (auto&& elem : v)
{
os << elem << " ";
}
os << "]";
return os;
}
struct Visits {
std::vector<unsigned int> visits;
friend std::ostream& operator<<(std::ostream& os, const Visits& v)
{
os << "[ ";
for (auto&& elem : v.visits)
{
os << elem << " ";
}
os << "]";
return os;
};
};
struct EdgeProperties {
std::string name;
float capacity;
float residual_capacity;
Capacity cap_info;
Capacity residual_cap_info;
bool visited;
std::vector<unsigned int> visits;
Visits v; //DEBUG ONLY
unsigned int last_visit;
UEdge prev; // edge on path forwards
UEdge next; // edge on path backwards
float fatness; //fatness of the path going through edges forwards
unsigned int distance; //distance from seed forwards
};
struct VertexProperties {
unsigned int index;
unsigned int scc;
unsigned int cc;
std::string name;
unsigned int tarjan_index;
bool onStack;
int visiting_time;
};
typedef std::vector<UVertex> Connected_Component; // to distinguish from regular std::vector<UVertex>
// wrapper around the unitig graph defined above
class UnitigGraph {
public:
// create a UnitigGraph from a dBg and its unbalanced vertices
UnitigGraph(deBruijnGraph&, std::string, std::string, float, unsigned int, unsigned int, int, bool, bool);
UnitigGraph(); // debug
~UnitigGraph();
void set_debug();
void debug(); // debug information
void assemble(std::string, float err, std::string contigs, bool two_strain);
void printGraph(std::ostream&, unsigned int cc);
void dijkstra(UEdge seed, bool residual, bool local, unsigned int cc);
std::vector<UEdge> greedy(UEdge seed, bool residual, bool local, unsigned int cc);
private:
void connectUnbalanced(Vertex*, unsigned int*, std::string, deBruijnGraph&, float, float threshold);
std::vector<std::pair<Vertex*,std::string> > addNeighbours(std::string& curr, const std::vector<char>&, const std::vector<char>&, deBruijnGraph&, unsigned int*, UVertex&, float threshold, float error);
std::pair<Vertex*,std::string> buildEdge(UVertex, Vertex*, std::string, std::string&, unsigned int*, float, float, deBruijnGraph&, float, float threshold);
std::pair<Vertex*,std::string> buildEdgeReverse(UVertex, Vertex*, std::string, std::string&, unsigned int*, float, float, deBruijnGraph&, float, float threshold);
UVertex addVertex(unsigned int*, std::string name, unsigned int ccc);
std::vector<UEdge> get_sources(unsigned int cc);
std::pair<std::string, float> calculate_contigs(std::vector<UEdge>&, std::vector<float>&, unsigned int cc);
float reduce_flow(std::vector<UEdge>&, std::set<unsigned int>&, unsigned int cc, bool init, bool theoretical);
std::vector<UEdge> find_fattest_path(UEdge seed, unsigned int cc);
std::vector<UEdge> fixFlow(UEdge, unsigned int cc);
//UEdge getSeed() const;
std::vector<float> calculate_thresholds(deBruijnGraph&, std::string, unsigned int);
std::vector<float> get_thresholds(std::vector<std::map<unsigned int, unsigned int>>& cov_distr, std::string, unsigned int);
std::pair<float, float> finite_difference(std::vector<float>);
std::vector<float> rolling(std::vector<float>& in, unsigned int len);
std::vector<float> cummin(std::vector<float>& in, unsigned int pos);
std::vector<UEdge> blockPath(UEdge, unsigned int, unsigned int cc);
std::pair<std::vector<UEdge>, std::vector<float>> find_paths(unsigned int cc, bool two_strain);
std::pair<UEdge, bool> checkUnvisitedEdges(UEdge, unsigned int cc);
std::pair<UEdge, bool> getUnvisitedEdge(const std::vector<UEdge>&, unsigned int cc);
float remove_non_unique_paths(std::vector<std::vector<UEdge>>&, std::vector<UEdge>&, unsigned int, unsigned int, unsigned int cc);
std::pair<UEdge, float> get_target(UEdge, bool, unsigned int cc);
std::pair<UEdge, float> get_furthest_target(UEdge, unsigned int cc);
UEdge get_next_source(unsigned int cc);
void cleanPath(std::vector<UEdge>&, std::vector<UEdge>&, unsigned int cc);
void cleanGraph(unsigned int cc, float err);
void removeStableSets(unsigned int cc);
void removeShortPaths(unsigned int cc);
void removeEmpty(unsigned int cc);
void contractPaths(unsigned int cc);
void removeLowEdges(unsigned int cc, float error_rate);
bool hasRelevance(unsigned int cc);
void unvisit(unsigned int cc);
float in_capacity(UVertex, unsigned int cc);
float out_capacity(UVertex, unsigned int cc);
bool test_hypothesis(float to_test_num, float to_test_denom, float h0, float threshold);
unsigned int cc_; // used to mark the CC's. Since some of them might be deleted later on, does not represent the number of cc's
unsigned int k_;
std::vector<UGraph*> graphs_;
std::vector<std::unordered_map<unsigned int, UVertex>> graph_map_;
std::vector<float> thresholds_; // TODO
std::string logfile_;
unsigned int filter_length_;
int thresh_;
bool long_;
bool true_;
bool debug_;
};
#endif