-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExTwoFunctions.m
554 lines (473 loc) · 16.3 KB
/
ExTwoFunctions.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
classdef ExTwoFunctions
properties
end
methods(Static)
function clear()
clear;
close all;
clc;
end
function [beta, costArray, i] = gradientDescent (features, y, a)
% *Gradient Descent* Function
% This function returns the $\beta$ vector according to the features and
% solution matrix given. It uses gradient descent to calculate $\beta$.
% This is a generic function and can be used for any number of features.
%
% Inputs:
%
% # features: The feature matrix where each row contains one sample entry
% and each column contains unique features. Note: *ones* vector should not
% be concatenated, as it will be done here.
% # y: The solution vector, where each row contains the solution for the
% corresponding sample entry.
%%
% Converting the feature matrix to 'X' by adding *ones* vector
[rows, columns] = size(features);
X = [ones(rows,1) features];
%% Initializing $\alpha$, denoted by a, and $\beta$ denoted by b
% Change the value below to perform gradient descent with different values
% of $\alpha$ and $\beta$.
% a = 0.000001;
b = 0;
%%
% $\beta$ value using normal equation and the resultant cost:
bNormal = ExTwoFunctions.normalEquation(features,y);
costNormal = ExTwoFunctions.mseMultiFeature(features,y,bNormal);
%%
% Performing Gradient Descent
bOld = ones((columns+1), 1)*b; % The assumend beta value
prevCost = ExTwoFunctions.mseMultiFeature(features,y,bOld); % Will contain previous value of %\beta%
newCost = prevCost; % Will contain the current cost value in the loop
bNew = bOld; % Will contain the current beta value in the loop
i = 1; % Iterator
costArray(1,:) = [1 prevCost]; % Returned, can be used to plot changes in the cost with iterations
loopCondition = true;
while loopCondition
i = i+1;
prevCost = newCost;
bOld = bNew;
bNew = bOld - a * X' * (X * bOld - y);
newCost = ExTwoFunctions.mseMultiFeature(features,y,bNew);
costArray(i,:) = [i newCost];
%Breaking condition, when the current cost is within $1%$ of final
%cost of the normal equation
if abs(newCost - costNormal) < (1/100)*costNormal
loopCondition = false;
end
% Following commented lines can be used for debugging, to know how
% many iterations have occured and how sharply is the cost
% decreasing.
% if rem(i,100000) == 0
% i
% newCost
% end
end
beta = bNew;
end
function [beta] = mseMultiFeature(features, y, b)
% *Mean Squared Error* Function
% This function calculates the Mean Squared Error for any type of datasets,
% for linear regression.
%
% Note that the mean squared error formula used is the following:
%
% $\left( \frac{1}{m}\right) \sum_{i=1}^{n} ( h_{\beta}(x) - y _{i})^{2}$
%
% Where, for 'n' features, $h_{\beta}(x)$ can be:
%
% $\beta _{0} + \beta _{1} x _{1} + \beta _{2} x _{2} + ... + \beta _{n} x
% _{n}$
%
% Inputs:
%
% # X: A matrix with each test sample in one row and each feature in one
% column
% # y: Solution for each test sample.
% # b: The value of $\beta$
%
% Output: The mean squared error
% Adding one extra column of ones in the feature matrix
[rows, ~] = size(features);
X = [ones(rows,1) features];
% Calculating the MSE
beta = sum(((sum(((b' * X')'),2)) - y) .^ 2)/rows;
end
function [beta] = normalEquation(features, y)
% *NormalEquation* Function
% This function returns the $\beta$ vector according to the features and solution
% matrix given. This is a generic function and can be used for any number
% of features.
%
% Inputs:
%
% # features: The feature matrix where each row contains one sample entry
% and each column contains unique features. Note: *ones* vector should not
% be concatenated, as it will be done here.
% # y: The solution vector, where each row contains the solution for the
% corresponding sample entry.
% Converting the feature matrix to 'X' by adding *ones* vector
[rows, ~] = size(features);
X = [ones(rows,1) features];
% Now implementing the normal equation
beta = pinv(X' * X) * X' * y;
end
function [data] = normalizeData(features)
% *Normalize Data* Function
% This function normalizes the data given. It uses the following formula to
% normalize:
%
% $\left( \frac{X - \mu}{\sigma}\right)$
%
% Where:
%
% * $\mu = mean,\quad and$
% * $\sigma = standard \quad deviation$
%
% Steps:
%
% # Calculating mean across each column
% # Repeating the mean vector for each rows in features matrix
% # Calculating the standard devication across each column
% # Repeating the standard deviation vector for each row in feature
% matrix
% # Substracting the features matrix with mean matrix and then dividing
% corresponding elements with the standard deviation matrix
[rows,~] = size(features);
if rows == 1
data = features;
return
end
data = (features - repmat(mean(features), rows, 1)) ./ repmat(std(features), rows, 1);
end
function [cost] = polyCost (features, y, b, d)
% *Mean Squared Error Calculator for Polynomial Regression*
% Inputs:
%
% # features: the single feature vector (horizontal vector)
% # y: the solutoin vector
% # b: the $\beta$ value
% # d: the maximum degree of polynomial
x = features';
[~,samples] = size(x);
yM = repmat(x,d+1,1);
for k = 0:d
yM(k+1,:) = yM(k+1,:) .^ k;
end
beetah = repmat(b', 1, samples);
yM = yM .* beetah;
yM = sum(yM);
y = y';
yM = yM - y;
yM = yM .^ 2;
yM = mean(yM);
cost = yM;
end
function [cost] = costLogistic (X, y, b)
% *COST FUNCTION* For logistic regression
% Inputs:
%
% # X: Features Matrix including the ones vector concatenated in the first
% column
% # y: The solution vector
% # b: The value of beta
[n, ~] = size(X);
mulVal = X * b;
cost = (-1/n)*(y'*log(ExTwoFunctions.sigmoid(mulVal))+(1-y)'*log(1-ExTwoFunctions.sigmoid(mulVal)));
end
function [b,costArray, i] = logisticGradient (X,y,a)
% *GRADIENT DESCENT FOR LOGISTIC REGRESSION*
%
% Inputs:
%
% # X: The features array combined with ones vector in the first column.
% # y: The solutions label.
% Feature Count will have the total number of features + 1, as it has a
% ones vector concatenated in the first column
[n, featureCount] = size(X);
% Initializing variables for the loop
bNew = zeros(featureCount, 1);
loopCondition = true;
i = 1;
newCost = ExTwoFunctions.costLogistic(X,y,bNew);
costArray(1,:) = [1, newCost];
while loopCondition
i = i+1;
bOld = bNew;
prevCost = newCost;
bNew = bOld - (a/n)*(X')*((ExTwoFunctions.sigmoid(X*bOld))-y);
newCost = ExTwoFunctions.costLogistic(X,y,bNew);
costArray(i,:) = [i, newCost];
if (prevCost - newCost) < 0.0000005
loopCondition = false;
end
% a = a+a/9000000;
% if rem(i,1000) == 0
% i
% newCost
% end
%
% if i == 500000
% loopCondition = false;
% end
end
b = bNew;
end
function g = sigmoid(z)
g = 1./(1+exp(-z));
end
function [jVal, gradient] = costFunctionFminunc (b, football_X, y)
% *Cost function for fminunc()*
[n, ~] = size(football_X);
X = [ones(n,1) football_X];
mulVal = X * b;
jVal = (-1/n)*(y'*log(ExTwoFunctions.sigmoid(mulVal))+(1-y)'*log(1-ExTwoFunctions.sigmoid(mulVal)));
gradient = (1/n)*(X')*(ExTwoFunctions.sigmoid(X*b)-y);
end
function out = mapFeature(X1, X2, D)
out = ones(size(X1(:,1)));
for i = 1:D
for j = 0:i
out(:, end+1) = (X1.^(i-j)).*(X2.^j);
end
end
end
function [jVal, gradient] = costFunctionFminuncReg (b, X, y, lambda)
% *Cost function for fminunc()*
% Inputs:
% # b: The beta value
% # X: The features matrix without ones vector
% # y: The solution vector
% # lambda: The regularization constant
%
% Output:
%
% # jVal: The cost
% # gradient: The gradient
[n, ~] = size(X);
X = [ones(n,1) X];
mulVal = X * b;
beta_reg = b(2:end);
jVal = ((-1/n)*(y'*log(ExTwoFunctions.sigmoid(mulVal))+(1-y)'*log(1-ExTwoFunctions.sigmoid(mulVal)))) + lambda/(2*n)*[beta_reg'*beta_reg];
gradient = ((1/n)*(X')*(ExTwoFunctions.sigmoid(X*b)-y)) + lambda/n*[0; beta_reg];
end
function images = loadMNISTImages(filename)
%loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing
%the raw MNIST images
fp = fopen(filename, 'rb');
assert(fp ~= -1, ['Could not open ', filename, '']);
magic = fread(fp, 1, 'int32', 0, 'ieee-be');
assert(magic == 2051, ['Bad magic number in ', filename, '']);
numImages = fread(fp, 1, 'int32', 0, 'ieee-be');
numRows = fread(fp, 1, 'int32', 0, 'ieee-be');
numCols = fread(fp, 1, 'int32', 0, 'ieee-be');
images = fread(fp, inf, 'unsigned char');
images = reshape(images, numCols, numRows, numImages);
images = permute(images,[2 1 3]);
fclose(fp);
% Reshape to #pixels x #examples
images = reshape(images, size(images, 1) * size(images, 2), size(images, 3));
% Convert to double and rescale to [0,1]
images = double(images) / 255;
end
function labels = loadMNISTLabels(filename)
%loadMNISTLabels returns a [number of MNIST images]x1 matrix containing
%the labels for the MNIST images
fp = fopen(filename, 'rb');
assert(fp ~= -1, ['Could not open ', filename, '']);
magic = fread(fp, 1, 'int32', 0, 'ieee-be');
assert(magic == 2049, ['Bad magic number in ', filename, '']);
numLabels = fread(fp, 1, 'int32', 0, 'ieee-be');
labels = fread(fp, inf, 'unsigned char');
assert(size(labels,1) == numLabels, 'Mismatch in label count');
fclose(fp);
end
function [bestModels] = forwardSelection (X)
% *Forward Selection Algorithm*
% Inputs: X: The features matrix without the concatenated ones vector along
% with the last column containing the solution vector
% Output: A cell with a total of 'n' arrays, each array containing a model
% which was calculated to be the best in it's category.
y = X(:,end);
X(:,end) = [];
[n, totalFeatures] = size(X);
loopX = [1:totalFeatures];
bestModels = cell(1,totalFeatures); % A cell with multiple arrays
minCost = Inf;
for i = 1:totalFeatures
for j = 1:size(loopX,2)
if i == 1
currentModel = loopX(j);
else
currentModel = [bestModels{i-1} loopX(j)];
end
curX = X(:,currentModel);
mdl = fitlm(curX,y);
curB = mdl.Coefficients.Estimate;
curCost = ExTwoFunctions.mseMultiFeature(curX,y,curB);
if curCost < minCost
minCost = curCost;
bestModels{i} = currentModel;
end
end
newAddedIndex = bestModels{i}(1,end);
loopX = loopX(loopX ~= newAddedIndex);
minCost = Inf;
end
end
function [testError] = kFoldCrossValidation(X,k)
% *K-Fold Cross Validation*
% Inputs:
%
% # X: The features matrix (without the concatenated ones) combined with
% the solution in the last column
% # k: 'k' value for k-fold
%
% Outputs: MSE
% Shuffling the test set
X = X(randperm(size(X,1)),:);
[featCount,~] = size(X);
flag = true; % If perfectly divisible
if (size(X,1)/k < 1)
error('Cannot divide features as k is more than total sample sets');
testError = -1;
return;
end
if rem(featCount,k) == 0
[R,C] = size(X);
n = R/k;
% Converting 2-D matrix to 3-D according to "k"
% Below line taken from : https://stackoverflow.com/questions/1390909/split-long-2d-matrix-into-the-third-dimension#1390941
X = permute(reshape(X',[C k R/k]),[2 1 3]);
else
flag = false;
n = ceil(featCount/k);
xResidue = X(k*(n-1)+1:end,:);
X(k*(n-1)+1:end,:) = [];
[R,C] = size(X);
X = permute(reshape(X',[C k R/k]),[2 1 3]);
n = n-1;
end
testError = 0;
for i = 1:n
xLoop = X;
if n==1 && flag == true
error('Error: Only one step. Cannot separate testing and training');
testError = -1;
return;
end
if n==1 && flag == false
xFlat = xResidue(:,1:end-1);
yFlat = xResidue(:,end);
testFlatX = X(:,1:end-1);
testFlatY = X(:,end);
else
xLoop(:,:,i) = [];
testFlatX = X(:,:,i);
testFlatY = testFlatX(:,end);
testFlatX(:,end) = [];
xFlat = permute(xLoop,[2,1,3]);
xFlat = xFlat(:,:);
xFlat = xFlat';
if flag == false
xFlat = [xFlat; xResidue];
end
yFlat = xFlat(:,end);
xFlat(:,end) = [];
end
mdl = fitlm(xFlat,yFlat);
mse = ExTwoFunctions.mseMultiFeature(testFlatX, testFlatY, mdl.Coefficients.Estimate);
testError = testError + mse;
end
if flag == false
xLoop = X;
testFlatX = xResidue(:,1:end-1);
testFlatY = xResidue(:,end);
xFlat = permute(xLoop,[2,1,3]);
xFlat = xFlat(:,:);
xFlat = xFlat';
yFlat = xFlat(:,end);
xFlat(:,end) = [];
mdl = fitlm(xFlat,yFlat);
mse = ExTwoFunctions.mseMultiFeature(testFlatX, testFlatY, mdl.Coefficients.Estimate);
testError = testError + mse;
end
end
function [jVal] = costLogisticReg (b, X, y, lambda)
% *Cost function for Regularization of Logistic Regression*
% Inputs:
% # b: The beta value
% # X: The features matrix without ones vector
% # y: The solution vector
% # lambda: The regularization constant
%
% Output:
%
% # jVal: The cost
[n, ~] = size(X);
X = [ones(n,1) X];
mulVal = X * b;
beta_reg = b(2:end);
jVal = ((-1/n)*(y'*log(ExTwoFunctions.sigmoid(mulVal))+(1-y)'*log(1-ExTwoFunctions.sigmoid(mulVal)))) + lambda/(2*n)*[beta_reg'*beta_reg];
end
function [b,costArray, i] = gradientDescentReg (X,y,a,lambda)
% *GRADIENT DESCENT FOR LOGISTIC REGRESSION*
%
% Inputs:
%
% # X: The features array combined without ones vector in the first column.
% # y: The solutions label.
% Feature Count will have the total number of features + 1, as it has a
% ones vector concatenated in the first column
[n, featureCount] = size(X);
X = [ones(n,1) X];
% Initializing variables for the loop
bNew = zeros(featureCount+1, 1);
loopCondition = true;
i = 1;
xTemp = X(:,2:end);
newCost = ExTwoFunctions.costLogisticReg(bNew,xTemp,y,lambda);
costArray(1,:) = [1, newCost];
while loopCondition
i = i+1;
bOld = bNew;
prevCost = newCost;
beta_reg = bOld(2:end);
bNew = bOld - a*((1/n)*(X')*((ExTwoFunctions.sigmoid(X*bOld))-y) + lambda/n*[0; beta_reg]);
xTemp = X(:,2:end);
newCost = ExTwoFunctions.costLogisticReg(bNew,xTemp,y,lambda);
costArray(i,:) = [i, newCost];
if (prevCost - newCost) < 0.0000005
loopCondition = false;
% ok = 5
end
% a = a+a/9000000;
% if rem(i,1000) == 0
% i
% newCost
% end
% %
% if i == 300000
% loopCondition = false;
% ok = 6
% end
end
b = bNew;
end
function out = mapFeatureTwo (X,d)
% A Map-Feature like function for exercise 3
for i=0:d
out(:,i+1) = X.^i;
end
end
function out = mapGraph (X,d)
% A Map-Feature like function for exercise 3
% for i=0:d
% out(i+1) = X.^i;
% end
x(:,1) = X;
x = repmat(x,1,d+1);
p = repmat([0:d],size(x,1),1);
out = sum([x.^p], 2);
end
end
end