-
Notifications
You must be signed in to change notification settings - Fork 14
/
transformData.m
68 lines (63 loc) · 2.36 KB
/
transformData.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
function transf_data = transformData(data,varTypes)
% This function transforms the data by
% 1) map the values of categorical attributes between 0 and the number of categories
%
% 2) quantize real valued attributes using nBins numnber of bins
% modify the value of nBins for sparses/denser discretization
% quantType = 'equalfreq' <-- each bin has same num. observations
% quantType = 'equalwidth' <-- each bin has same width
%
%
%
% Copyright 2015 Riccardo Taormina ([email protected]),
% Gulsah Karakaya ([email protected];),
% Stefano Galelli ([email protected]),
% and Selin Damla Ahipasaoglu ([email protected];.
%
% Please refer to README.txt for further information.
%
%
% This file is part of Matlab-Multi-objective-Feature-Selection.
%
% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute
% it and/or modify it under the terms of the GNU General Public License
% as published by the Free Software Foundation, either version 3 of the
% License, or (at your option) any later version.
%
% This code is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with MATLAB_IterativeInputSelection.
% If not, see <http://www.gnu.org/licenses/>.
%
% discretization options
nBins = 20;
quantType = 'equalwidth';
% initialize output array
[nObs,nVars] = size(data);
transf_data = zeros(nObs,nVars);
% loop through all variables
for i = 1 : nVars
% get current attribute
attr = data(:,i);
attrType = varTypes(i);
% transform accordingly with its varType
if attrType == 0
% real-valued, discretize
transf_data(:,i) = quantizeVariable(attr,nBins,quantType);
elseif attrType == 1
% categorical data, sort them between 0 and num. categories
% get categories
categories = unique(attr);
for j = 1 : numel(categories)
ixes = (attr == categories(j));
attr(ixes) = j-1;
end
transf_data(:,i) = attr;
else
error('Attribute num#%d, type not recognized!',i);
end
end