-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathnFold_C45.m
77 lines (67 loc) · 2.12 KB
/
nFold_C45.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
function stats = nFold_C45(data_set,data_labels,shuffle,n,nIter,Subsets)
% Using n-fold subset selection and C4.5 decision tree classifier
% Input:
% data_set: data
% data_labels: labels
% shuffle: 1 for random, 0 for non-random partition (Default: 1)
% n: Number of folds to your cross-validation (Default: 3)
% nIter: Number of cross-validation iterations (Default: 1)
% Subsets: pass your own training and testing subsets & labels (Default:
% computer will generate using 'nFold')
%
% Output:
% stats: struct containing TP, FP, TN, FN, etc.
% data_set = eigVec_graphfeats;
% data_labels = labels(:);
% classifier = 'SVM';
% shuffle = 1;
% n = 3;
if nargin < 6
Subsets = {};
end
if nargin < 5
nIter = 1;
end
if nargin < 4
n = 3; % 3-fold cross-validation
end
if nargin < 3
shuffle = 1; % randomized
end
if any(~xor(data_labels == 1, data_labels == -1)), error('Labels must be 1 and -1'); end
stats = struct; %cell(1,nIter);
for j=1:nIter
fprintf('Iteration: %i\n',j);
% reset total statistics
Ttp = 0; Ttn = 0; Tfp = 0; Tfn = 0;
if isempty(Subsets)
[tra tes]=GenerateSubsets('nFold',data_set,data_labels,shuffle,n);
else
tra = Subsets{j}.training;
tes = Subsets{j}.testing;
end
for i=1:n
fprintf(['Fold # ' num2str(i) '\n']);
training_set = data_set(tra{i},:);
testing_set = data_set(tes{i},:);
training_labels = data_labels(tra{i});
testing_labels = data_labels(tes{i});
[temp_stats,methodstring] = Classify( 'C45', training_set , testing_set, training_labels, testing_labels);
Ttp = Ttp + temp_stats.tp;
Ttn = Ttn + temp_stats.tn;
Tfp = Tfp + temp_stats.fp;
Tfn = Tfn + temp_stats.fn;
end
% output statistics
stats(j).tp = Ttp;
stats(j).tn = Ttn;
stats(j).fp = Tfp;
stats(j).fn = Tfn;
stats(j).acc = (Ttp+Ttn)/(Ttp+Ttn+Tfp+Tfn);
stats(j).ppv = Ttp/(Ttp+Tfp);
stats(j).sens = Ttp/(Ttp+Tfn);
stats(j).spec = Ttn/(Tfp+Ttn);
stats(j).subsets.training = tra;
stats(j).subsets.testing = tes;
% stats{j}.prediction = prediction;
end