-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSpectralData.py
153 lines (129 loc) · 5.28 KB
/
SpectralData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# -*- coding: utf-8 -*-
import numpy as np
import networkx as nx
from sklearn.cluster import KMeans
# import scipy as sc
from scipy.sparse import csr_matrix, spdiags
from scipy.sparse.linalg import eigsh
class SpectralData:
def __init__(self):
self.graph = nx.Graph()
self.size = 0
self.normv = []
self.clusterNum = 0
self.clusters = {}
self.mu = []
def buildProjections(self):
print('Building projections')
# adj
adj = nx.adjacency_matrix(self.graph, weight='weight')
# deg
deg = adj.sum(axis=0)
# choix du nombre de dimensions
k_dim = min(int(0.9 * adj.shape[0]), 30)
# calcul du laplacien
lap = nx.laplacian_matrix(self.graph, weight='weight').asfptype()
lap *= -1
# Rescaling ?
diag = np.array(deg[0]).astype(float)
m = spdiags(diag, 0, diag.shape[1], diag.shape[1])
# calcul des valeurs propres
try:
lambdas, maps = eigsh(A=lap, M=m, k=k_dim, sigma=1.0, which='LM')
self.normv = maps # /np.amax(maps)
print('Projections built')
return True
except:
print('Cannot build projections')
return False
def cluster_nodes(self):
estimator = KMeans(n_clusters=self.clusterNum, init='k-means++', precompute_distances=True, n_init=20,
max_iter=1000)
# estimator = KMeans(n_clusters=2, init='k-means++', precompute_distances=True, n_init=20,max_iter=1000)
estimated_labels = estimator.fit_predict(self.normv)
nodes = self.graph.nodes()
self.clusters = {}
for i in range(0, self.clusterNum):
k = [] # liste des noeuds du meme cluster
for j in range(len(estimated_labels)):
if estimated_labels[j] == i:
k.append(j)
if len(k) > 0:
labels = [nodes[index] for index in k]
center_node = self.set_cluster_center(labels, method='centrality')
self.clusters[center_node[0]] = {'members': labels, 'size': len(labels), 'center': center_node}
for label in labels:
self.graph.node[label]['ClusterName'] = center_node[0]
else:
print('Empty cluster')
def set_cluster_center(self, members, method):
subgraph = self.graph.subgraph(members)
if len(subgraph) > 1:
max_value = 0
max_node_id = ''
if method == 'centrality':
# Searching for node with maximal centrality
s = 1.0 / (len(subgraph) - 1.0)
centrality = dict((n, d * s) for n, d in subgraph.degree_iter())
for key in centrality:
if centrality[key] > max_value:
max_value = centrality[key]
max_node_id = key
center_node = [max_node_id, max_value]
elif method == 'degree':
# Searching for node with max degree
for key in members:
if self.graph.degree(key) > max_value:
max_value = self.graph.degree(key)
max_node_id = key
center_node = [max_node_id, max_value]
else:
center_node = [subgraph.nodes()[0], 0]
return center_node
def clusterize(self):
# print('Clustering')
self.cluster_nodes()
print('Clustering OK')
def saveGraph(self, name='result.gml'):
nx.write_gml(self.graph, name)
print('Graph saved')
def fullProcess(self):
print('Starting full process')
self.buildProjections()
self.clusterize()
print('End of process')
def compareClusters(clusteredGraphs):
Adj = {}
for subgraph in clusteredGraphs:
adjdict = dict()
# Iterating through nodes
for startnode in subgraph.node.keys():
# Checking if node exists in Adj
if startnode in Adj:
adjdict = Adj[startnode]
if 'ClusterName' in subgraph.node[startnode]:
current_cluster = subgraph.node[startnode]['ClusterName']
for endnode in subgraph.node.keys():
if startnode != endnode:
if 'ClusterName' in subgraph.node[endnode]:
if subgraph.node[endnode]['ClusterName'] == current_cluster:
if endnode in adjdict:
adjdict[endnode] += 1
else:
adjdict[endnode] = 1
Adj[startnode] = adjdict
return Adj
def adjToGraph(adj, nodeList, idLabel):
resGraph = nx.Graph()
# Adding nodes
for elem in nodeList:
current_id_node = elem[idLabel] # getting node id
resGraph.add_node(current_id_node, elem)
# for key in elem.keys():
# resGraph.node[current_id_node][key] = elem[key]
# Creating edges from Adj
for xnode in adj.keys():
for ynode in adj[xnode].keys():
if not resGraph.has_edge(str(xnode), str(ynode)) and not ynode == xnode:
resGraph.add_edge(str(xnode), str(ynode), {'weight': adj[xnode][ynode]})
return resGraph