Skip to content

Commit 84e5822

Browse files
committed
Adding FaultyRank core
1 parent 66d68a2 commit 84e5822

File tree

4 files changed

+322
-1
lines changed

4 files changed

+322
-1
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ Run FaultyRank on the test dataset.
159159

160160
``` diff
161161
$ cd FaultyRank/core
162-
$ ./faultyrank_core -N 4 -E 6 -f FaultyRank/data/test_graph.txt
162+
$ ./faultyrank -N 4 -E 6 -f FaultyRank/data/test_graph.txt
163163
```
164164

165165
# Contact

core/benchmark.sh

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/bin/bash
2+
3+
export OMP_NUM_THREADS=1
4+
echo $OMP_NUM_THREADS
5+
make clean && make
6+
7+
'''
8+
# Dataset: amazon
9+
echo "~~~~~~~~~~~~~~~~~~~~~~~~~<DATASET: AMAZON>~~~~~~~~~~~~~~~~~~~~~~~~~"
10+
echo
11+
echo "<<<<<<<<<ALGO: FAULTY-RANK-PUSH>>>>>>>>>"
12+
taskset --cpu-list 0-70:2 ./faulty_rank_push -N 403394 -E 3387388 -f /home/aislam6/dataset-faultyrank/amazon0601.el
13+
taskset --cpu-list 0-70:2 ./faulty_rank_push -N 403394 -E 4886816 -f /home/aislam6/dataset-faultyrank/amazon0601-un.el
14+
15+
# Dataset: skitter
16+
echo "~~~~~~~~~~~~~~~~~~~~~~~~~<DATASET: SKITTER>~~~~~~~~~~~~~~~~~~~~~~~~~"
17+
echo
18+
echo "<<<<<<<<<ALGO: FAULTY-RANK-PUSH>>>>>>>>>"
19+
taskset --cpu-list 0-70:2 ./faulty_rank_push -N 1696415 -E 11095298 -f /home/aislam6/dataset-faultyrank/as-skitter.el
20+
taskset --cpu-list 0-70:2 ./faulty_rank_push -N 1696415 -E 22190596 -f /home/aislam6/dataset-faultyrank/as-skitter-un.el
21+
22+
# Dataset: road
23+
echo "~~~~~~~~~~~~~~~~~~~~~~~~~<DATASET: ROAD>~~~~~~~~~~~~~~~~~~~~~~~~~"
24+
echo
25+
echo "<<<<<<<<<ALGO: FAULTY-RANK-PUSH>>>>>>>>>"
26+
taskset --cpu-list 0-70:2 ./faulty_rank_push -N 1971281 -E 5533214 -f /home/aislam6/dataset-faultyrank/roadNet-CA.el
27+
28+
# Dataset: livejournal
29+
echo "~~~~~~~~~~~~~~~~~~~~~~~~~<DATASET: LIVEJOURNAL>~~~~~~~~~~~~~~~~~~~~~~~~~"
30+
echo
31+
echo "<<<<<<<<<ALGO: FAULTY-RANK-PUSH>>>>>>>>>"
32+
taskset --cpu-list 0-70:2 ./faulty_rank_push -N 4847571 -E 68993773 -f /home/aislam6/dataset-faultyrank/soc-LiveJournal1.el
33+
taskset --cpu-list 0-70:2 ./faulty_rank_push -N 4847571 -E 85702474 -f /home/aislam6/dataset-faultyrank/soc-LiveJournal1-un.el
34+
35+
# Dataset: generated-random
36+
'''
37+
echo "~~~~~~~~~~~~~~~~~~~~~~~~~<DATASET: RANDOM>~~~~~~~~~~~~~~~~~~~~~~~~~"
38+
echo
39+
echo "<<<<<<<<<ALGO: FAULTY-RANK-PUSH>>>>>>>>>"
40+
taskset --cpu-list 0-70:2 ./faultyrank -N 578072 -E 595666 -f /path_to_final_graph/final_graph.txt -u /path_to_final_unfilled/final_unfilled.txt

core/command_line.h

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
// Copyright (c) 2022, The DIR-LAB of the University of North Carolina at Charlotte
2+
// See LICENSE.txt for license details
3+
4+
#ifndef COMMAND_LINE_H_
5+
#define COMMAND_LINE_H_
6+
7+
#include <getopt.h>
8+
9+
#include <algorithm>
10+
#include <cinttypes>
11+
#include <iostream>
12+
#include <string>
13+
#include <type_traits>
14+
#include <vector>
15+
16+
17+
18+
/*
19+
FaultyRank
20+
Class: CLBase
21+
Author: Raqib Islam
22+
23+
Handles command line argument parsing
24+
- Through inheritance, can add more options to object
25+
- For example, most kernels will use CLApp
26+
*/
27+
28+
29+
class CLBase {
30+
protected:
31+
int argc_;
32+
char** argv_;
33+
std::string get_args_ = "N:E:f:u:i:t:";
34+
std::vector<std::string> help_strings_;
35+
36+
int64_t num_nodes_ = 0;
37+
int64_t num_edges_ = 0;
38+
std::string input_filename_ = "";
39+
std::string up_filename_ = "";
40+
int max_iters_ = 10;
41+
double tolerance_ = 1e-4;
42+
43+
void AddHelpLine(char opt, std::string opt_arg, std::string text,
44+
std::string def = "") {
45+
const int kBufLen = 100;
46+
char buf[kBufLen];
47+
if (opt_arg != "")
48+
opt_arg = "<" + opt_arg + ">";
49+
if (def != "")
50+
def = "[" + def + "]";
51+
snprintf(buf, kBufLen, " -%c %-9s: %-54s%10s", opt, opt_arg.c_str(),
52+
text.c_str(), def.c_str());
53+
help_strings_.push_back(buf);
54+
}
55+
56+
public:
57+
CLBase(int argc, char** argv) : argc_(argc), argv_(argv) {
58+
AddHelpLine('h', "", "print this help message");
59+
AddHelpLine('N', "base_filename_", "load base-graph from file");
60+
AddHelpLine('E', "dynamic_filename_", "load dynamic-graph from file");
61+
AddHelpLine('f', "input_file", "load graph from file");
62+
AddHelpLine('u', "unfilled_property_file", "load unfilled property vertices from file");
63+
AddHelpLine('i', "i", "perform at most i iterations",std::to_string(max_iters_));
64+
AddHelpLine('t', "t", "use tolerance t", std::to_string(tolerance_));
65+
}
66+
67+
bool ParseArgs() {
68+
signed char c_opt;
69+
extern char *optarg; // from and for getopt
70+
while ((c_opt = getopt(argc_, argv_, get_args_.c_str())) != -1) {
71+
HandleArg(c_opt, optarg);
72+
}
73+
if (input_filename_ == "") {
74+
std::cout << "No graph input specified. (Use -h for help)" << std::endl;
75+
return false;
76+
}
77+
if (!num_nodes_ || !num_edges_) {
78+
std::cout << "Need to specify graph properties. (Use -h for help)" << std::endl;
79+
return false;
80+
}
81+
return true;
82+
}
83+
84+
void virtual HandleArg(signed char opt, char* opt_arg) {
85+
switch (opt) {
86+
case 'N': num_nodes_ = atol(opt_arg); break;
87+
case 'E': num_edges_ = atol(opt_arg); break;
88+
case 'f': input_filename_ = std::string(opt_arg); break;
89+
case 'u': up_filename_ = std::string(opt_arg); break;
90+
case 'i': max_iters_ = atoi(opt_arg); break;
91+
case 't': tolerance_ = std::stod(opt_arg); break;
92+
}
93+
}
94+
95+
void PrintUsage() {
96+
for (std::string h : help_strings_)
97+
std::cout << h << std::endl;
98+
std::exit(0);
99+
}
100+
101+
int64_t num_nodes() const { return num_nodes_; }
102+
int64_t num_edges() const { return num_edges_; }
103+
std::string input_filename() const { return input_filename_; }
104+
std::string up_filename() const { return up_filename_; }
105+
bool has_unfilled_property() const { return (up_filename_ != ""); }
106+
int max_iters() const { return max_iters_; }
107+
double tolerance() const { return tolerance_; }
108+
};
109+
110+
111+
112+
class CLApp : public CLBase {
113+
bool do_analysis_ = false;
114+
int num_trials_ = 16;
115+
bool do_verify_ = false;
116+
117+
public:
118+
CLApp(int argc, char** argv) : CLBase(argc, argv) {
119+
get_args_ += "an:v";
120+
AddHelpLine('a', "", "output analysis of last run", "false");
121+
AddHelpLine('n', "n", "perform n trials", std::to_string(num_trials_));
122+
AddHelpLine('v', "", "verify the output of each run", "false");
123+
}
124+
125+
void HandleArg(signed char opt, char* opt_arg) override {
126+
switch (opt) {
127+
case 'a': do_analysis_ = true; break;
128+
case 'n': num_trials_ = atoi(opt_arg); break;
129+
case 'v': do_verify_ = true; break;
130+
default: CLBase::HandleArg(opt, opt_arg);
131+
}
132+
}
133+
134+
bool do_analysis() const { return do_analysis_; }
135+
int num_trials() const { return num_trials_; }
136+
bool do_verify() const { return do_verify_; }
137+
};
138+
139+
#endif // COMMAND_LINE_H_

core/faultyrank.cpp

+142
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
//
2+
// Created by Islam, Abdullah Al Raqibul on 9/6/22.
3+
//
4+
5+
#include "graph.h"
6+
#include "command_line.h"
7+
8+
/// small file
9+
//#define INPUT_GRAPH "test-graphs/lma_c.txt"
10+
//#define UNFILLED_PROPERTY_FILE "test-graphs/up_lma_c.txt"
11+
//#define NUM_VERTICES 4
12+
//#define NUM_EDGES 5
13+
//#define MAX_ITERATION 20
14+
15+
typedef float ScoreT;
16+
using namespace std::chrono;
17+
18+
/// Sugar for printing the rank-values (@id_rank and @p_rank) after @iter iterations
19+
void print_ranks(const Graph &g, const Graph &rg, int iter, vector<ScoreT>& id_rank, vector<ScoreT>& p_rank) {
20+
cout << "\nafter " << iter << " iteration" << endl;
21+
cout << "id-rank: ";
22+
for(NodeID u=0; u<g.num_nodes(); u+=1) cout << id_rank[u] << " ";
23+
cout << endl;
24+
25+
cout << "p-rank: ";
26+
for(NodeID u=0; u<rg.num_nodes(); u+=1) cout << p_rank[u] << " ";
27+
cout << endl;
28+
}
29+
30+
void print_graph_property(const Graph &g, const Graph &rg) {
31+
int64_t count_isolated_nodes = 0;
32+
int64_t count_zero_in_g = 0;
33+
int64_t count_zero_in_rg = 0;
34+
35+
for (NodeID u=0; u < g.num_nodes(); u++) {
36+
if(g.out_degree(u) == 0 && rg.out_degree(u) == 0) count_isolated_nodes += 1;
37+
else if(g.out_degree(u)) count_zero_in_g += 1;
38+
else if(rg.out_degree(u)) count_zero_in_rg += 1;
39+
else {
40+
cout << "ERROR: Should not happen!" << endl;
41+
exit(-1);
42+
}
43+
}
44+
45+
cout << "The graph have: " << count_isolated_nodes << " isolated nodes, ";
46+
cout << count_zero_in_g << " zero-degree nodes in G, and ";
47+
cout << count_zero_in_rg << " zero-degree nodes in RG" << endl;
48+
}
49+
50+
void FaultyPageRank(const Graph &g, const Graph &rg,
51+
int max_iters, double epsilon = 0) {
52+
const ScoreT init_score = 1.0f;
53+
const ScoreT zero_score = 0.0f;
54+
55+
vector<ScoreT> p_rank_prev(g.num_nodes(), init_score);
56+
vector<ScoreT> id_rank(g.num_nodes(),zero_score);
57+
vector<ScoreT> p_rank(g.num_nodes(),zero_score);
58+
59+
/// going to update @id_rank values from Graph @G
60+
for (int iter=0; iter < max_iters; iter+=1) {
61+
///update id rank
62+
for (NodeID u=0; u < g.num_nodes(); u++) {
63+
if (g.out_degree(u) == 0) {
64+
cout<<"Sink node in G"<<u<<endl;
65+
ScoreT share = p_rank_prev[u] / (g.num_nodes() - 1);
66+
for(NodeID v=0; v < g.num_nodes(); v++) {
67+
if (u != v) id_rank[v] += share;
68+
}
69+
}
70+
else {
71+
cout<<"Nodes except sink node in G"<<u<<endl;
72+
ScoreT share = p_rank_prev[u] / g.out_degree(u);
73+
for (EdgeItem neigh: g.graph_[u].neighbors) {
74+
id_rank[neigh.v] += share;
75+
}
76+
}
77+
}
78+
///update property rank
79+
for (NodeID u=0; u < rg.num_nodes(); u++) {
80+
if (rg.out_degree(u) == 0) {
81+
cout<<"Sink node in RG"<<u<<endl;
82+
ScoreT share = id_rank[u] / (rg.num_nodes() - 1);
83+
for(NodeID v=0; v < rg.num_nodes(); v++) {
84+
if (u != v) p_rank[v] += share;
85+
}
86+
}
87+
else if (rg.out_degree(u) > rg.out_degree_paired(u) && !rg.graph_[u].unfilled_property_flag && rg.out_degree_paired(u)) {
88+
cout<<"Weighted node in RG"<<u<<endl;
89+
ScoreT share = id_rank[u] / rg.out_degree_paired(u);
90+
for (EdgeItem neigh: rg.graph_[u].neighbors) {
91+
if(neigh.paired_flag) p_rank[neigh.v] += share;
92+
}
93+
}
94+
else {
95+
cout<<"remaining node in RG"<<u<<endl;
96+
ScoreT share = id_rank[u] / rg.out_degree(u);
97+
for (EdgeItem neigh: rg.graph_[u].neighbors) {
98+
p_rank[neigh.v] += share;
99+
}
100+
}
101+
}
102+
103+
print_ranks(g, rg, iter, id_rank, p_rank);
104+
//#pragma omp parallel for
105+
for (NodeID u=0; u < g.num_nodes(); u++) {
106+
p_rank_prev[u] = p_rank[u];
107+
p_rank[u] = 0.0;
108+
id_rank[u] = 0.0;
109+
}
110+
111+
cout << "Done " << iter << " iteration." << endl;
112+
}
113+
}
114+
115+
int main(int argc, char* argv[]) {
116+
CLApp cli(argc, argv);
117+
if (!cli.ParseArgs()) return -1;
118+
119+
/// building the graph from @file
120+
auto start_g = std::chrono::high_resolution_clock::now();
121+
Graph G(cli.input_filename(), cli.num_nodes(), cli.num_edges());
122+
G.PrintTopology();
123+
auto end_g = std::chrono::high_resolution_clock::now();
124+
double duration_g = std::chrono::duration_cast<std::chrono::nanoseconds>(end_g - start_g).count();
125+
cout << "time to build the original graph: " << duration_g/1000000000 << " seconds." << endl;
126+
127+
/// building the reverse graph of Graph @G
128+
auto start_rg = std::chrono::high_resolution_clock::now();
129+
Graph RG(G);
130+
if(cli.has_unfilled_property()) RG.mark_unfilled_property_vertices(cli.up_filename());
131+
RG.PrintTopology();
132+
auto end_rg = std::chrono::high_resolution_clock::now();
133+
double duration_rg = std::chrono::duration_cast<std::chrono::nanoseconds>(end_rg - start_rg).count();
134+
cout << "time to build the reverse graph: " << duration_rg/1000000000 << " seconds." << endl;
135+
136+
/// call faulty page-rank algorithm
137+
auto start = std::chrono::high_resolution_clock::now();
138+
FaultyPageRank(G, RG, cli.max_iters());
139+
auto end = std::chrono::high_resolution_clock::now();
140+
double duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
141+
cout << "time to run the faulty-rank algorithm: " << duration/1000000000 << " seconds." << endl;
142+
}

0 commit comments

Comments
 (0)