-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlasso_reg.py
121 lines (87 loc) · 4.11 KB
/
lasso_reg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import numpy as np
from math import sqrt
class lasso_regression:
print('Following methods are avilable')
print('1. get_numpy_data(data, features, output)')
print('2. predict_output(feature_matrix, weights)')
print('3. normalize_features(feature_matrix)')
print('4. lasso_coordinate_descent_step(i, feature_matrix, output, weights, l1_penalty)')
print('5. lasso_cyclical_coordinate_descent(feature_matrix, output, initial_weights, l1_penalty, tolerance)')
print('6. get_residual_sum_of_squares(predictions, output)')
print('7. get_rmse(predictions, output)')
def get_numpy_data(self, data, features, output):
"""
Input: data (dataframe), features (list of features), output(output feature)
Returns feature matrix, output matrix
"""
# Add constant column to the data
data['constant'] = 1
features = ['constant'] + features
features_array = np.array(data[features])
output_array = np.array(data[output])
return(features_array, output_array)
def predict_output(self, feature_matrix, weights):
"""
Input: np array for all predicting features, weights
Returns: prediction
"""
# predictions vector is the dot product of features and weights
predictions = np.dot(feature_matrix, weights)
return(predictions)
def normalize_features(self,feature_matrix):
"""
Function to normalize input features
"""
norms = np.linalg.norm(feature_matrix, axis=0)
normalized_features = feature_matrix / norms
return(normalized_features, norms)
def lasso_coordinate_descent_step(self, i, feature_matrix, output, weights, l1_penalty):
"""
implement lasso cooerdinate descent algorithm
Input: i (step), feature_matrix(input np array), output, weights, ly_penalty (values of L1)
Returns: New weights for each feature by applying cooerdinate descent algorithm
"""
# compute prediction
prediction = predict_output(feature_matrix, weights)
# compute ro[i] = SUM[ [feature_i]*(output - prediction + weight[i]*[feature_i]) ]
ro_i = np.dot(feature_matrix[:, i], (output - prediction + weights[i] * feature_matrix[:, i]))
if i == 0: # intercept -- do not regularize
new_weight_i = ro_i
elif ro_i < -l1_penalty/2.:
new_weight_i = ro_i + l1_penalty/2.
elif ro_i > l1_penalty/2.:
new_weight_i = ro_i - l1_penalty/2.
else:
new_weight_i = 0.
return new_weight_i
def lasso_cyclical_coordinate_descent(self, feature_matrix, output, initial_weights, l1_penalty, tolerance):
converged = False
weights = np.array(initial_weights) # make sure it's a numpy array
while not converged:
converged = True
for i in range(len(weights)):
old_weights_i = weights[i] # old value of weight[i], as it will be overwritten
# the following line uses new values for weight[0], weight[1], ..., weight[i-1]
# and old values for weight[i], ..., weight[d-1]
weights[i] = lasso_coordinate_descent_step(i, feature_matrix, output, weights, l1_penalty)
# use old_weights_i to compute change in coordinate
change_i = abs(weights[i] - old_weights_i)
if change_i >= tolerance:
converged = converged & False
return weights
def get_residual_sum_of_squares(self, predictions, output):
"""
compute residual square
"""
residual = output - predictions
residual_squared = residual * residual
RSS = residual_squared.sum()
return(RSS)
def get_rmse(self, predictions, output):
"""
Compute RMSE
"""
error = predictions - output
val_err = np.dot(error,error)
val_err = np.sqrt(val_err/len(output))
return val_err