forked from diepthihoang/mpboot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcandidateset.h
175 lines (143 loc) · 4.29 KB
/
candidateset.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/*
* candidateset.h
*
* Created on: Jun 1, 2014
* Author: Tung Nguyen
*/
#ifndef CANDIDATESET_H_
#define CANDIDATESET_H_
#include "tools.h"
#include "alignment.h"
#include <stack>
struct CandidateTree {
string tree; // with branch length
string topology; // tree topology WITHOUT branch lengths and WITH TAXON ID (instead of taxon names) for sorting purpose
double score; // log-likelihood under ML or parsimony score
};
/**
* Candidate tree set, sorted in ascending order of scores, i.e. the last element is the highest scoring tree
*/
class CandidateSet : public multimap<double, CandidateTree> {
public:
/**
* constructor
*/
CandidateSet(int limit, int max_candidates, Alignment *aln);
CandidateSet();
/**
* return randomly one candidate tree from max_candidate
*/
string getRandCandTree();
/**
* return the next parent tree for reproduction.
* Here we always maintain a list of candidate trees which have not
* been used for reproduction. If all candidate trees have been used, we select the
* current best trees as the new parent trees
*/
string getNextCandTree();
/**
* Replace an existing tree in the candidate set
* @param tree the new tree string that will replace the existing tree
* @param score the score of the new tree
* @return true if the topology of \a tree exist in the candidate set
*/
bool replaceTree(string tree, double score);
/**
* create the parent tree set containing top trees
*/
void initParentTrees();
/**
* update / insert tree into set of score is higher than lowest-scoring tree
* @return false if the tree topology already exists
*
*/
bool update(string tree, double score);
/**
* print score of max_candidates best trees
*
* @param numScore
* Number of best scores to print out starting from the highest
*/
vector<double> getBestScores(int numBestScores = 0);
/**
* Return the worst score in the candidate tree set
* @return
*/
double getWorstScore();
/**
* Return \a numTree best tree strings
* @param numTree number of best trees
* @return a list of tree
*/
vector<string> getHighestScoringTrees(int numTree = 0);
/**
* get tree(s) with highest score. More than one tree is
* returned if there are multiple optima.
* @return a vector containing optimal trees
*/
vector<string> getEquallyOptimalTrees();
/**
* destructor
*/
virtual ~CandidateSet();
/**
* hard limit for number of trees (typically superset of candidate set)
*/
int maxCandidates;
/**
* best score in the set
*/
double bestScore;
/**
* maximum number of candidate trees
*/
int popSize;
/** index of tree topologies in set
*
*/
StringDoubleHashMap topologies;
/**
* Trees used for reproduction
*/
stack<string> parentTrees;
/**
* pointer to alignment, just to assign correct IDs for taxa
*/
Alignment *aln;
/**
* check if tree topology WITHOUT branch length exist in the candidate set?
*/
bool treeTopologyExist(string topo);
/**
* check if tree topology WITH branch length exist in the candidate set?
*/
bool treeExist(string tree);
/**
* return a unique topology (sorted by taxon names, rooted at taxon with alphabetically smallest name) without branch lengths
*/
string getTopology(string tree);
/**
* Empty the candidate set
*/
void clear();
/**
* Return a CandidateSet containing \a numTrees of current best candidate trees
* @param numTrees
* @return
*/
CandidateSet getBestCandidateTrees(int numTrees);
/*
* Diep added
* Copy all tree strings in current candidate set into vector<string> candidateTreeVec
*/
void copyToCandidateVec(); // Diep added to avoid bias in support values for big group
/*
* Diep added
* Return a random tree stored in vector<string> candidateTreeVec
* Somehow this works for MP better than getRandCandTree
*/
string getRandCandVecTree();
private:
vector<string> candidateTreeVec; // Diep added to avoid bias in support values for big group
};
#endif /* CANDIDATESET_H_ */