-
Notifications
You must be signed in to change notification settings - Fork 0
/
ppr.py
107 lines (86 loc) · 3.67 KB
/
ppr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import numba
import numpy as np
import scipy.sparse as sp
@numba.njit(cache=True, locals={'_val': numba.float32, 'res': numba.float32, 'res_vnode': numba.float32})
def _calc_ppr_node(inode, indptr, indices, deg, alpha, epsilon):
alpha_eps = alpha * epsilon
f32_0 = numba.float32(0)
p = {inode: f32_0}
r = {}
r[inode] = alpha
q = [inode]
while len(q) > 0:
unode = q.pop()
res = r[unode] if unode in r else f32_0
if unode in p:
p[unode] += res
else:
p[unode] = res
r[unode] = f32_0
_val = (1 - alpha) * res / deg[unode]
for vnode in indices[indptr[unode]:indptr[unode + 1]]:
if vnode in r:
r[vnode] += _val
else:
r[vnode] = _val
res_vnode = r[vnode] if vnode in r else f32_0
if res_vnode >= alpha_eps * deg[vnode]:
if vnode not in q:
q.append(vnode)
return list(p.keys()), list(p.values())
@numba.njit(cache=True)
def calc_ppr(indptr, indices, deg, alpha, epsilon, nodes):
js = []
vals = []
for i, node in enumerate(nodes):
j, val = _calc_ppr_node(node, indptr, indices, deg, alpha, epsilon)
js.append(j)
vals.append(val)
return js, vals
@numba.njit(cache=True, parallel=True)
def calc_ppr_topk_parallel(indptr, indices, deg, alpha, epsilon, nodes, topk):
js = [np.zeros(0, dtype=np.int64)] * len(nodes)
vals = [np.zeros(0, dtype=np.float32)] * len(nodes)
for i in numba.prange(len(nodes)):
j, val = _calc_ppr_node(nodes[i], indptr, indices, deg, alpha, epsilon)
j_np, val_np = np.array(j), np.array(val)
idx_topk = np.argsort(val_np)[-topk:]
js[i] = j_np[idx_topk]
vals[i] = val_np[idx_topk]
return js, vals
def ppr_topk(adj_matrix, alpha, epsilon, nodes, topk):
"""Calculate the PPR matrix approximately using Anderson."""
out_degree = np.sum(adj_matrix > 0, axis=1).A1
nnodes = adj_matrix.shape[0]
neighbors, weights = calc_ppr_topk_parallel(adj_matrix.indptr, adj_matrix.indices, out_degree,
numba.float32(alpha), numba.float32(epsilon), nodes, topk)
return construct_sparse(neighbors, weights, (len(nodes), nnodes))
def construct_sparse(neighbors, weights, shape):
i = np.repeat(np.arange(len(neighbors)), np.fromiter(map(len, neighbors), dtype=np.int))
j = np.concatenate(neighbors)
return sp.coo_matrix((np.concatenate(weights), (i, j)), shape)
def topk_ppr_matrix(adj_matrix, alpha, eps, idx, topk, normalization='row'):
"""Create a sparse matrix where each node has up to the topk PPR neighbors and their weights."""
topk_matrix = ppr_topk(adj_matrix, alpha, eps, idx, topk).tocsr()
if normalization == 'sym':
# Assume undirected (symmetric) adjacency matrix
deg = adj_matrix.sum(1).A1
deg_sqrt = np.sqrt(np.maximum(deg, 1e-12))
deg_inv_sqrt = 1. / deg_sqrt
row, col = topk_matrix.nonzero()
# assert np.all(deg[idx[row]] > 0)
# assert np.all(deg[col] > 0)
topk_matrix.data = deg_sqrt[idx[row]] * topk_matrix.data * deg_inv_sqrt[col]
elif normalization == 'col':
# Assume undirected (symmetric) adjacency matrix
deg = adj_matrix.sum(1).A1
deg_inv = 1. / np.maximum(deg, 1e-12)
row, col = topk_matrix.nonzero()
# assert np.all(deg[idx[row]] > 0)
# assert np.all(deg[col] > 0)
topk_matrix.data = deg[idx[row]] * topk_matrix.data * deg_inv[col]
elif normalization == 'row':
pass
else:
raise ValueError(f"Unknown PPR normalization: {normalization}")
return topk_matrix