-
Notifications
You must be signed in to change notification settings - Fork 0
/
Kmeans
43 lines (40 loc) · 1.46 KB
/
Kmeans
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from numpy import *
import matplotlib.pyplot as plt
import operator
inx=[1,3]
def createDataSet():
'创建数据集'
group=array([[1.0,1.1],[1.0,1.0],[0,0],[0,1.1]])
labels=["A","A","B","B"]
x=[1,1,0,0]
y=[1.1,1.0,0,1.1]
plt.figure()
plt.plot(x[0:2],y[0:2],'ro') #A
plt.plot(x[2:4],y[2:4],'bo') #B
plt.plot(1,3,'y*')
plt.show()
return group,labels
def classify(inX,dataSet,labels,k):# dataSet是group inX是测试的点 labels是标签
# 获取维度
dataSetSize=dataSet.shape[0] # 训练数据集数量
#print (dataSetSize)
#print (tile(inX,(dataSetSize,1)))
diffMat=tile(inX,(dataSetSize,1))-dataSet # 测试样本的各维度的差值
#print (diffMat)
sqDiffMat=diffMat**2 # 平方计算
#print (sqDiffMat)
sqDistance=sqDiffMat.sum(axis=1) # 输出每行的值
#print (sqDistance)
distances=sqDistance**0.5 # 开方计算
#print (distances)
sortedDistances=distances.argsort() # 排序 按距离从小到大 输出索引
#print (sortedDistances)
classCount={}
for i in range(k):
voteIlabel=labels[sortedDistances[i]]# voteIlabel是对应索引下的坐标的标签
classCount[voteIlabel]=classCount.get(voteIlabel,0)+1.0 #在该标签的横坐标上加1
sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
group,labels=createDataSet()
res=classify(inx,group,labels,3)
print (res)