-
Notifications
You must be signed in to change notification settings - Fork 4
/
kmedoids.m
112 lines (80 loc) · 3.33 KB
/
kmedoids.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
%
% This function applies a k-medoid algorithm for the given distance matrix
% Author: Celine Scheidt
% Date: April 2009
% Updated: February 2013
function Clustering = kmedoids(D,nbclusters,nbrep)
%% Input Parameters
% - D: distance matrix to use
% - nbclusters: number of clusters to construct
% - nbrep: (optional). Number of kmedoid to be performed. The best cluster
% configuration is returned
%% Output Parameters
% - Clustering: Results of the clustering, which contains:
% - label: is a vector of length Nbmodels, which contains the cluster index that each model belongs to
% - medoids: is a vector of length nbcluster containing the index
% of the medoids
% - weights: is a vector of ength nbcluster containing the number
% of models in each cluster.
%
% Reference: http://en.wikipedia.org/wiki/K-medoids
% Kaufman, L. and Rousseeuw, P.J. (1987), Clustering by means of Medoids,
% in Statistical Data Analysis Based on the –Norm and Related Methods,
% edited by Y. Dodge, North-Holland, 405–416.
if size(D,1) == 1
D = squareform(D);
end
maxIterations = 50;
npoints = size(D,1);
minDistBest = Inf;
if nargin < 3
nbrep = 1;
end
for iter = 1:nbrep % nbrep clustering are performed, the best is returned
% 1. Initalize: randonly select nbclusters of the npoints data points as the medoids
initMedoids = randsample(npoints,nbclusters);
% 2. Associate each data point to the closest medoid
[minDistInit, label] = min(D(initMedoids,:));
currentMedoids = initMedoids;
minDistCurrent = minDistInit;
label_prev = NaN(1,npoints);
nbIter = 0;
while any(label ~= label_prev) && nbIter < maxIterations % while cluster configuration is changing and maxIteration not reached
label_prev = label;
% 3. For each medoid m
for m = 1:nbclusters
NoMedoid = setdiff(1:npoints,currentMedoids);
NewMedoids = currentMedoids;
% For each non-medoid data point o
for o = 1:length(NoMedoid)
% Swap m and o and compute the cost of the configuration
NewMedoids(m) = NoMedoid(o);
[minDist, label] = min(D(NewMedoids,:));
cost = sum(minDist) - sum(minDistCurrent);
if cost < 0 % 4. Select the configuration with the lowest cost
currentMedoids(m) = NoMedoid(o);
[minDistCurrent, label] = min(D(currentMedoids,:));
end
end
end
nbIter = nbIter+1;
end
currentMedoids = sort(currentMedoids);
[minDist, label] = min(D(currentMedoids,:));
% Return the best clustering configuration among the nbrep tested
if sum(minDist) < sum(minDistBest)
fprintf('minDist % .2f, iter %i \n',sum(minDist),iter)
minDistBest = minDist;
labelBest = label;
currentMedoidsBest = currentMedoids;
end
end
%% Once the medoids are defined, store the outputs
weights = zeros(nbclusters,1);
for i = 1:nbclusters
weights(i) = sum(labelBest == i);
end
Clustering.T = labelBest;
Clustering.medoids = currentMedoidsBest';
Clustering.weights = weights;
end