-
Notifications
You must be signed in to change notification settings - Fork 3
/
Chapter_05_LARS.m
223 lines (202 loc) · 6.77 KB
/
Chapter_05_LARS.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
function [beta,CCMIKI] = Chapter_05_LARS(X, y, method, stop, useGram, Gram, trace)
% LARS The LARS algorithm for performing LAR or LASSO.
% BETA = LARS(X, Y) performs least angle regression on the variables in
% X to approximate the response Y. Variables X are assumed to be
% normalized (zero mean, unit length), the response Y is assumed to be
% centered.
% BETA = LARS(X, Y, METHOD), where METHOD is either 'LARS' or 'LARS'
% determines whether least angle regression or lasso regression should
% be performed.
% BETA = LARS(X, Y, METHOD, STOP) with nonzero STOP will perform least
% angle or lasso regression with early stopping. If STOP is negative,
% STOP is an integer that determines the desired number of variables. If
% STOP is positive, it corresponds to an upper bound on the L1-norm of
% the BETA coefficients.
% BETA = LARS(X, Y, METHOD, STOP, USEGRAM) specifies whether the Gram
% matrix X'X should be calculated (USEGRAM = 1) or not (USEGRAM = 0).
% Calculation of the Gram matrix is suitable for low-dimensional
% problems. By default, the Gram matrix is calculated.
% BETA = LARS(X, Y, METHOD, STOP, USEGRAM, GRAM) makes it possible to
% supply a pre-computed Gram matrix. Set USEGRAM to 1 to enable. If no
% Gram matrix is available, exclude argument or set GRAM = [].
% BETA = LARS(X, Y, METHOD, STOP, USEGRAM, GRAM, TRACE) with nonzero
% TRACE will print the adding and subtracting of variables as all
% LARS/lasso solutions are found.
% Returns BETA where each row contains the predictor coefficients of
% one iteration. A suitable row is chosen using e.g. cross-validation,
% possibly including interpolation to achieve sub-iteration accuracy.
%
% Author: Karl Skoglund, IMM, DTU, [email protected]
% Reference: 'Least Angle Regression' by Bradley Efron et al, 2003.
% Input checking
% Set default values.
if nargin < 7
trace = 0;
end
if nargin < 6
Gram = [];
end
if nargin < 5
useGram = 1;
end
if nargin < 4
stop = 0;
end
if nargin < 3
method = 'lars';
end
if strcmpi(method, 'lasso')
lasso = 1;
else
lasso = 0;
end
% LARS variable setup
[n p] = size(X);
CCMIKI=zeros(n,1); counterM=1;
nvars = min(n-1,p); %
maxk = 8*nvars; % Maximum number of iterations
if stop == 0
beta = zeros(2*nvars, p);
elseif stop < 0
beta = zeros(2*round(-stop), p);
else
beta = zeros(100, p);
end
mu = zeros(n, 1); % current "position" as LARS travels towards lsq solution
I = 1:p; % inactive set
A = []; % active set
% Calculate Gram matrix if necessary
if isempty(Gram) && useGram
Gram = X'*X; % Precomputation of the Gram matrix. Fast but memory consuming.
end
if ~useGram
R = []; % Cholesky factorization R'R = X'X where R is upper triangular
end
lassocond = 0; % LASSO condition boolean
stopcond = 0; % Early stopping condition boolean
k = 0; % Iteration count
vars = 0; % Current number of variables
if trace
disp(sprintf('Step\tAdded\tDropped\t\tActive set size'));
end
% LARS main loop
while vars < nvars && ~stopcond && k < maxk
k = k + 1;
c = X'*(y - mu);
[C j] = max(abs(c(I)));
CCMIKI(counterM)=C; counterM=counterM+1;
% disp(C);
j = I(j);
if ~lassocond % if a variable has been dropped, do one iteration with this configuration (don't add new one right away)
if ~useGram
R = cholinsert(R,X(:,j),X(:,A));
end
A = [A j];
I(I == j) = [];
vars = vars + 1;
if trace
disp(sprintf('%d\t\t%d\t\t\t\t\t%d', k, j, vars));
end
end
s = sign(c(A)); % get the signs of the correlations
if useGram
S = s*ones(1,vars);
GA1 = inv(Gram(A,A).*S'.*S)*ones(vars,1);
AA = 1/sqrt(sum(GA1));
w = AA*GA1.*s; % weights applied to each active variable to get equiangular direction
else
GA1 = R\(R'\s);
AA = 1/sqrt(sum(GA1.*s));
w = AA*GA1;
end
u = X(:,A)*w; % equiangular direction (unit vector)
if vars == nvars % if all variables active, go all the way to the lsq solution
gamma = C/AA;
else
a = X'*u; % correlation between each variable and eqiangular vector
temp = [(C - c(I))./(AA - a(I)); (C + c(I))./(AA + a(I))];
gamma = min([temp(temp > 0); C/AA]);
end
% LASSO modification
if lasso
lassocond = 0;
temp = -beta(k,A)./w';
[gamma_tilde] = min([temp(temp > 0) gamma]);
j = find(temp == gamma_tilde);
if gamma_tilde < gamma,
gamma = gamma_tilde;
lassocond = 1;
end
end
mu = mu + gamma*u;
if size(beta,1) < k+1
beta = [beta; zeros(size(beta,1), p)];
end
beta(k+1,A) = beta(k,A) + gamma*w';
% Early stopping at specified bound on L1 norm of beta
if stop > 0
t2 = sum(abs(beta(k+1,:)));
if t2 >= stop
t1 = sum(abs(beta(k,:)));
s = (stop - t1)/(t2 - t1); % interpolation factor 0 < s < 1
beta(k+1,:) = beta(k,:) + s*(beta(k+1,:) - beta(k,:));
stopcond = 1;
end
end
% If LASSO condition satisfied, drop variable from active set
if lassocond == 1
if ~useGram
R = choldelete(R,j);
end
I = [I A(j)];
A(j) = [];
vars = vars - 1;
if trace
disp(sprintf('%d\t\t\t\t%d\t\t\t%d', k, j, vars));
end
end
% Early stopping at specified number of variables
if stop < 0
stopcond = vars >= -stop;
end
end
% trim beta
if size(beta,1) > k+1
beta(k+2:end, :) = [];
end
if k == maxk
disp('LARS warning: Forced exit. Maximum number of iteration reached.');
end
% Fast Cholesky insert and remove functions
% Updates R in a Cholesky factorization R'R = X'X of a data matrix X. R is
% the current R matrix to be updated. x is a column vector representing the
% variable to be added and X is the data matrix containing the currently
% active variables (not including x).
function R = cholinsert(R, x, X)
diag_k = x'*x; % diagonal element k in X'X matrix
if isempty(R)
R = sqrt(diag_k);
else
col_k = x'*X; % elements of column k in X'X matrix
R_k = R'\col_k'; % R'R_k = (X'X)_k, solve for R_k
R_kk = sqrt(diag_k - R_k'*R_k); % norm(x'x) = norm(R'*R), find last element by exclusion
R = [R R_k; [zeros(1,size(R,2)) R_kk]]; % update R
end
% Deletes a variable from the X'X matrix in a Cholesky factorisation R'R =
% X'X. Returns the downdated R. This function is just a stripped version of
% Matlab's qrdelete.
function R = choldelete(R,j)
R(:,j) = []; % remove column j
n = size(R,2);
for k = j:n
p = k:k+1;
[G,R(p,k)] = planerot(R(p,k)); % remove extra element in column
if k < n
R(p,k+1:n) = G*R(p,k+1:n); % adjust rest of row
end
end
R(end,:) = []; % remove zero'ed out row
% To do
%
% There is a modification that turns least angle regression into stagewise
% (epsilon) regression. This has not been implemented.