-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyseBestBlindClustering.m
159 lines (113 loc) · 3.8 KB
/
analyseBestBlindClustering.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
close all, clear all
fMask = 'SAVE/BestBlind/BestFeaturesForBlindClustering-%d-clusters-17-features.mat';
nClust = 2:9;
for i = 1:numel(nClust)
data(i) = load(sprintf(fMask,nClust(i)));
end
for i = 1:numel(nClust)
for j = 1:numel(data(i).silhouetteList)
nFeatures{i}(j) = numel(data(i).featIdx{j});
end
for j = 1:data(i).nFeatMax
idx = find(nFeatures{i} == j);
minSilhouette(i,j) = min(data(i).silhouetteList(idx));
[maxSilhouette(i,j),mIdx] = max(data(i).silhouetteList(idx));
maxIdx(i,j) = idx(mIdx);
medianSilhouette(i,j) = median(data(i).silhouetteList(idx));
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Interesting that there is a steep dropp in silhouette value after 3 features
% which are the first 3?
for i = 1:numel(nClust)
featLst = data(i).featIdx{maxIdx(i,3)};
fprintf('%d clusters: ',nClust(i))
for j = 1:numel(featLst)
fprintf('%s ', data(i).allFeatureNames{featLst(j)})
end
fprintf('\n')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
colors = [254,232,200;
253,212,158;
253,187,132;
252,141,89;
239,101,72;
215,48,31;
179,0,0;
127,0,0] / 255;
LW = [2 2 2 4 2 2 2 2];
figure
for i = 1:numel(nClust)
p(i) = plot(1:data(i).nFeatMax,medianSilhouette(i,:), ...
'color', colors(i,:), 'linestyle','-', ...
'linewidth', LW(i));
hold on
% plot(1:data(i).nFeatMax,minSilhouette(i,:), ...
% 'color', colors(i,:),'linestyle','--');
plot(1:data(i).nFeatMax,maxSilhouette(i,:), ...
'color', colors(i,:),'linestyle','--', ...
'linewidth', LW(i));
pLeg{i} = sprintf('%d cluster', nClust(i));
end
legend(p,pLeg);
ylabel('Silhouette value','fontsize',24)
xlabel('Number of features','fontsize',24)
set(gca,'fontsize',24)
box off
saveas(gcf,'FIGS/BestBlindClustering-silhouette.pdf','pdf')
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Plot along the other axis
color2 = [166,206,227
31,120,180
178,223,138
51,160,44
251,154,153
227,26,28
253,191,111
255,127,0
202,178,214
106,61,154
255,255,153
177,89,40]/255;
LS = {'-','--'};
figure
for j = 1:data(1).nFeatMax
p2(j) = plot(nClust,maxSilhouette(:,j), ...
'linewidth', 2, ...
'linestyle', LS{ceil(j/11)}, ...
'color', color2(mod(j-1,12)+1,:));
hold on
pLeg2{j} = sprintf('%d features',j);
end
legend(p2,pLeg2,'location','northeastoutside')
xlabel('Number of clusters','fontsize',24)
ylabel('Silhouette value (max)','fontsize',24)
set(gca,'fontsize',18)
box off
saveas(gcf,'FIGS/BestBlindClustering-silhouette-max.pdf','pdf')
figure
for j = 1:data(1).nFeatMax
p2(j) = plot(nClust,medianSilhouette(:,j), ...
'linewidth', 2, ...
'linestyle', LS{ceil(j/11)}, ...
'color', color2(mod(j-1,12)+1,:));
hold on
pLeg2{j} = sprintf('%d features',j);
end
legend(p2,pLeg2,'location','northeastoutside')
xlabel('Number of clusters','fontsize',24)
ylabel('Silhouette value (median)','fontsize',24)
set(gca,'fontsize',18)
box off
saveas(gcf,'FIGS/BestBlindClustering-silhouette-median.pdf','pdf')
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% We want to find the feature set that gave the best two cluster and
% five cluster answers, and show the corresponding confusion matrix
% for those two.
[maxVal2,maxIdx2] = max(data(1).silhouetteList);
assert(data(1).nClusters == 2)
feature2 = data(1).allFeatureNames{data(1).featIdx{maxIdx2}};
[maxVal5,maxIdx5] = max(data(4).silhouetteList);
assert(data(4).nClusters == 5)
feature5 = data(4).allFeatureNames{data(4).featIdx{maxIdx5}};