-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathK-Means.py
94 lines (83 loc) · 2.71 KB
/
K-Means.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import numpy as np
import sys
import matplotlib.pyplot as plt
class KMeans(object):
"""向量化计算"""
def __init__(self, dataset, cluster_num=2):
"""
:param dataset: is list likes [[],[]]
"""
self.dataset = dataset
# 簇中心数量
self.cluster_num = cluster_num
# 选取数据集前N个点为簇中心
self.center_points = dataset[0:cluster_num]
self.result = {}
def fit(self):
""":return {簇序号:[[点1],[点2]]...}"""
while True:
# 空字典
for i in range(self.cluster_num):
self.result[i] = []
# 划分
for data in self.dataset:
index = 0
min_distance = sys.maxsize
for i in range(self.cluster_num):
"""分别对每个点计算其到N个簇中心的距离"""
_distance = self._distance(data, self.center_points[i])
if _distance < min_distance:
index = i
min_distance = _distance
self.result[index].append(data)
# 求中心点
new_center = []
for i in range(self.cluster_num):
if len(self.result[i]) is 0:
new_center.append(self.center_points[i])
else:
new_center.append(self._center(self.result[i]))
if new_center == self.center_points:
return
else:
self.center_points = new_center
@staticmethod
def _distance(p1, p2):
"""欧氏距离"""
_sum = 0
for i in range(len(p1)):
_sum += pow(p1[i] - p2[i], 2)
return pow(_sum, 0.5)
@staticmethod
def _center(_list):
return np.array(_list).mean(axis=0).tolist()
if __name__ == '__main__':
with open("./xclara.csv", encoding='utf-8-sig') as f:
dataset_unprocess = f.readlines()
dataset_unprocess.pop(0)
dataset_tmp = []
for tmp in dataset_unprocess:
d = []
for i in tmp.strip().split(","):
d.append(float(i))
dataset_tmp.append(d)
km = KMeans(dataset_tmp, 3)
km.fit()
print(km.result)
# 绘制各个簇
color = ['#0dceda', '#1fab89', '#8971d0']
for k, v in km.result.items():
x_list = []
y_list = []
for x, y in v:
x_list.append(x)
y_list.append(y)
plt.plot(x_list, y_list, '.b', color=color[k])
# 绘制簇中心点
x_list = []
y_list = []
for x, y in km.center_points:
x_list.append(x)
y_list.append(y)
plt.plot(x_list, y_list, '+', color='black')
plt.show()