-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraph.py
129 lines (100 loc) · 4.29 KB
/
graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import numpy as np
import scipy.sparse as sp
import heapdict
class Graph:
def __init__(self):
""" Initialize with an empty edge dictionary. """
self.edges = {}
self.nodes = set()
self.A = False
self.nlist = []
def add_edges(self, edges_list):
""" Add a list of edges to the network.
Args:
edges_list: list of (a,b) tuples, where a->b is an edge to add
"""
for left, right in edges_list:
if left in self.edges:
self.edges[left].update({right: 1.0})
else:
self.nodes.add(left)
self.edges[left] = {right: 1.0}
if right not in self.edges:
self.nodes.add(right)
self.edges[right] = {}
def get_neighbors(self, node):
neighbors = set(self.edges[node].keys())
return neighbors
def shortest_path(self, source):
""" Compute the single-source shorting path.
This function uses Djikstra's algorithm to compute the distance from
source to all other nodes in the network.
Args:
source: node index for the source
Returns: tuple: dist, path
dist: dictionary of node:distance values for each node in the graph,
where distance denotes the shortest path distance from source
path: dictionary of node:prev_node values, where prev_node indicates
the previous node on the path from source to node
"""
Q = self.nodes.copy()
dist = {}
path = {}
for vertex in Q:
dist[vertex] = float("inf")
path[vertex] = None
# Distance from source to itself
dist[source] = 0
hdist = heapdict.heapdict(dist)
while Q:
u = hdist.popitem()[0]
Q.remove(u)
neighbors = self.get_neighbors(u)
neighbors = neighbors.intersection(Q)
for neighbor in neighbors:
alt = dist[u] + 1
if alt < dist[neighbor]:
dist[neighbor] = alt
hdist[neighbor] = alt
path[neighbor] = u
return dist, path
def adjacency_matrix(self):
""" Compute an adjacency matrix form of the graph.
Returns: tuple (A, nodes)
A: a sparse matrix in COO form that represents the adjacency matrix
for the graph (i.e., A[j,i] = 1 iff there is an edge i->j)
NOTE: be sure you have this ordering correct!
nodes: a list of nodes indicating the node key corresponding to each
index of the A matrix
"""
nodes = list(self.nodes)
nodeIndexDict = {node: i for i, node in enumerate(nodes)}
col = np.array([nodeIndexDict[src_key] for src_key, valdict in self.edges.items() for trgt_key in valdict.keys()])
row = np.array([nodeIndexDict[trgt_key] for src_key, valdict in self.edges.items() for trgt_key in valdict.keys()])
data = np.ones(len(row), dtype=np.int)
A = sp.coo_matrix((data, (row, col)))
self.A = A
self.nlist = nodes
return A, nodes
def pagerank(self, d=0.85, iters=100):
""" Compute the PageRank score for each node in the network.
Compute PageRank scores using the power method.
Args:
d: 1 - random restart factor
iters: maximum number of iterations of power method
Returns: dict ranks
ranks: a dictionary of node:importance score, for each node in the
network (larger score means higher rank)
"""
A, nlist = self.adjacency_matrix()
sumA = np.array(A.sum(axis=0))[0]
invSumA = 1.0 / sumA
xpd_invSumA = np.array([invSumA[col] for col in A.col])
A.data = A.data * xpd_invSumA * d
# intialize ranks to uniform dist
rank = np.array([1.0 / len(nlist)] * len(nlist))
for i in range(iters):
rank = A.dot(rank) + (1 - d) * 1.0 / len(nlist)
rank = rank / sum(rank)
ranks = dict(zip(nlist, rank))
return ranks