-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.h
325 lines (255 loc) · 9.29 KB
/
config.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
#ifndef __CONFIG_H__
#define __CONFIG_H__
#ifdef WIN32
#define FILESEP "\\"
#else
#define FILESEP "/"
#endif
#include <boost/progress.hpp>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem.hpp>
#include <unordered_map>
#include <list>
using namespace boost;
using namespace boost::property_tree;
const double ALPHA_DEFAULT = 0.2;
const int NUM_OF_QUERY = 20;
const string STATISTIC = "statistic";
const string QUERY = "query";
const string GEN_SS_QUERY = "generate-ss-query";
const string TOPK = "topk";
const string BUILD = "build";
const string GEN_EXACT_TOPK = "gen-exact-topk";
const string GEN_EXACT_SELF = "gen-exact-self";
//const string ASSVERSION = "version";
const string BIPPR = "bippr";
const string FORA = "fora";
const string FWDPUSH = "fwdpush";
const string FORA_MC="fora_mc";
const string MC = "montecarlo";
const string FBRW ="fbrw";
const string FLOS = "flos";
const string GI = "gobal_iteration";
const string DNE = "dne";
const string FORA_BIPPR = "fora_bippr";
//用作计时
const int MC_QUERY = 1;
const int BIPPR_QUERY = 2;
const int FORA_QUERY = 3;
const int HUBPPR_QUERY = 4;
const int FWD_LU = 5;
const int RONDOM_WALK = 6;
const int SOURCE_DIST = 7;
const int SORT_MAP = 8;
const int BWD_LU = 9;
const int PI_QUERY = 10;
const int MC_QUERY2 = 11;
const int FORA_MC_QUERY = 12;
const int RONDOM_WALK2 = 13;
const int MC_DHT_QUERY = 14;
const int FB_QUERY = 15;
const int FLOS_QUERY = 16;
const int GI_QUERY = 17;
const int DNE_QUERY = 18;
const int FBRAW_QUERY = 19;
const int PI_QUERY_SELF = 20;
const int DFS_CYCLE =111;
const double RG_COST = 1;
// 0.2 for webstanford, 0.15 for dblp, 0.126 for pokec, 0.128 for LP, 0.097 for orkut
// const double SG_PUSH_COST = 0.125;
// const double SG_RW_COST = 1.0;
const double SG_RW_COST = 8.0;
#ifdef WIN32
const string parent_folder = "../../";
#else
const string parent_folder = string("./") + FILESEP;
#endif
typedef pair<map<int, double>, map<int, double>> HubBwdidx;
// pi residual
typedef pair<double, HubBwdidx> HubBwdidxWithResidual;
// typedef pair<unordered_map<int, double>, unordered_map<int, double>> Bwdidx;
typedef pair<iMap<double>, iMap<double>> Bwdidx;
typedef pair<iMap<double>, iMap<double>> Fwdidx;
typedef std::vector< std::vector<int> > RwIdx; //random walk idx
class Config {
public:
string graph_alias;
string graph_location;
string action = ""; // query/generate index, etc..
string prefix = "d:\\dropbox\\research\\data\\";
string version = "vector";
string exe_result_dir = parent_folder;
string get_graph_folder() {
return prefix + graph_alias + FILESEP;
}
bool multithread = false;
bool with_rw_idx = false;
bool query_high_degree = false;
bool NDCG = true;
// int num_rw = 10;
double omega; // 1/omega omega = # of random walk
double rmax; // identical to r_max
unsigned int query_size = 1000;
unsigned int max_iter_num = 100;
double pfail = 0;
double dbar = 0;
double epsilon = 0;
double delta = 0;
unsigned int k = 500;
double ppr_decay_alpha = 0.77;
double rw_cost_ratio = 8.0;//8.0;
double rmax_scale = 1.0;
double multithread_param = 1.0;
string algo;
double alpha = ALPHA_DEFAULT;
string exact_pprs_folder;
unsigned int hub_space_consum = 1;
ptree get_data() {
ptree data;
data.put("graph_alias", graph_alias);
data.put("action", action);
data.put("alpha", alpha);
data.put("pfail", pfail);
data.put("epsilon", epsilon);
data.put("delta", delta);
data.put("idx", with_rw_idx);
data.put("query high degree", query_high_degree);
// data.put("avg-idx-count", num_rw);
data.put("k", k);
data.put("rand-walk & push cost ratio", rw_cost_ratio);
data.put("query-size", query_size);
data.put("algo", algo);
data.put("rmax", rmax);
data.put("rmax-scale", rmax_scale);
data.put("omega", omega);
data.put("result-dir", exe_result_dir);
return data;
}
};
class Result {
public:
int n;
long long m;
double avg_query_time;
double total_mem_usage;
double total_time_usage;
double num_randwalk;
double num_rw_idx_use;
double hit_idx_ratio;
double randwalk_time;
double randwalk_time_ratio;
double propagation_time;
double propagation_time_ratio;
double source_dist_time;
double source_dist_time_ratio;
double topk_sort_time;
double topk_sort_time_ratio;
// double topk_precision;
double topk_max_abs_err;
double topk_avg_abs_err;
double topk_max_relative_err;
double topk_avg_relative_err;
double topk_precision;
double topk_recall;
double topk_NDCG;
// double topk_max_add_err;
// double topk_avg_add_err;
int real_topk_source_count;
ptree get_data() {
ptree data;
data.put("n", n);
data.put("m", m);
data.put("avg query time(s/q)", avg_query_time);
data.put("total memory usage(MB)", total_mem_usage);
data.put("total time usage(s)", total_time_usage);
data.put("total time on rand-walks(s)", randwalk_time);
data.put("total time on propagation(s)", propagation_time);
// data.put("total time on source distribution(s)", source_dist_time);
data.put("total time on sorting top-k ppr(s)", topk_sort_time);
data.put("total time ratio on rand-walks(%)", randwalk_time_ratio);
data.put("total time ratio on propagation(%)", propagation_time_ratio);
// data.put("total time ratio on source distribution(%)", source_dist_time_ratio);
// data.put("total time ratio on sorting top-k ppr(%)", topk_sort_time_ratio);
data.put("total number of rand-walks", num_randwalk);
data.put("total number of rand-walk idx used", num_rw_idx_use);
data.put("total usage ratio of rand-walk idx", hit_idx_ratio);
// data.put("avg topk precision", topk_precision);
data.put("topk max absolute error", topk_max_abs_err/real_topk_source_count);
data.put("topk avg absolute error", topk_avg_abs_err/real_topk_source_count);
data.put("topk max relative error", topk_max_relative_err/real_topk_source_count);
data.put("topk avg relative error", topk_avg_relative_err/real_topk_source_count);
data.put("topk precision", topk_precision/real_topk_source_count);
data.put("topk recall", topk_recall/real_topk_source_count);
data.put("topk NDCG", topk_NDCG/real_topk_source_count);
return data;
}
};
extern Config config;
extern Config config2;
extern Result result;
bool exists_test(const std::string &name);
void assert_file_exist(string desc, string name);
namespace Saver {
static string get_current_time_str() {
time_t rawtime;
struct tm *timeinfo;
char buffer[80];
time(&rawtime);
timeinfo = localtime(&rawtime);
strftime(buffer, 80, "%Y-%m-%d %H:%M:%S", timeinfo);
std::string str(buffer);
return str;
}
static string get_time_path() {
// using namespace boost::posix_time;
// auto tm = second_clock::local_time();
if(!boost::algorithm::ends_with(config.exe_result_dir, FILESEP))
config.exe_result_dir += FILESEP;
config.exe_result_dir += "execution/";
if(!boost::filesystem::exists(config.exe_result_dir)){
boost::filesystem::path dir(config.exe_result_dir);
boost::filesystem::create_directories(dir);
}
string filename = config.graph_alias+"."+config.action+"."+config.algo;
if(config.algo == "assppr")
filename = filename + "." + to_string((int)config.rw_cost_ratio);
string idx_flag = config.with_rw_idx?"with_idx":"without_idx";
filename = filename+"."+idx_flag+".";
filename += "k-"+to_string(config.k)+".";
filename += "rmax-"+to_string(config.rmax_scale);
filename += config.query_high_degree? ".query_high_degree":"";
return config.exe_result_dir + filename;
// return config.exe_result_dir + to_iso_string(tm);
}
static ptree combine;
static void init() {
combine.put("start_time", get_current_time_str());
}
static void save_json(Config &config, Result &result, vector<string> args) {
ofstream fout(get_time_path() + ".json");
string command_line = "";
for (int i = 1; i < args.size(); i++) {
command_line += " " + args[i];
}
combine.put("end_time", get_current_time_str());
combine.put("command_line", command_line);
combine.put_child("config", config.get_data());
combine.put_child("result", result.get_data());
ptree timer;
for (int i = 0; i < (int) Timer::timeUsed.size(); i++) {
if (Timer::timeUsed[i] > 0) {
timer.put(to_str(i), Timer::timeUsed[i] / TIMES_PER_SEC);
}
}
combine.put_child("timer", timer);
write_json(fout, combine, true);
}
};
#endif