-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmultiple_reg.py
96 lines (63 loc) · 3.24 KB
/
multiple_reg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import numpy as np
from math import sqrt
class multiple_regression:
print('Following methods are avilable')
print('1. get_numpy_data(data, features, output)')
print('2. predict_output(feature_matrix, weights)')
print('3. feature_derivative(errors, feature)')
print('4. regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance)')
print('5. get_rmse(predictions, output)')
def get_numpy_data(self, data, features, output):
"""
Input: data (dataframe), features (list of features), output(output feature)
Returns feature matrix, output matrix
"""
# Add constant column to the data
data['constant'] = 1
features = ['constant'] + features
features_array = np.array(data[features])
output_array = np.array(data[output])
return(features_array, output_array)
def predict_output(self, feature_matrix, weights):
# predictions vector is the dot product of features and weights
predictions = np.dot(feature_matrix, weights)
return(predictions)
def feature_derivative(self, errors, feature):
"""
Find feature derivatives (applying gradient descent algortihm)
"""
# derivative = twice the dot product of error and features matrix
derivative = 2*np.dot(errors,feature)
return(derivative)
def regression_gradient_descent(self, feature_matrix, output, initial_weights, step_size, tolerance):
"""
Fit multiple regression model using gradient descent algorithm
Returns weights
"""
converged = False
weights = np.array(initial_weights) # converts initial_weights to a numpy array
while not converged:
# 1. Computer the predictions and errors using initial_weights
predictions = self.predict_output(feature_matrix, weights)
errors = predictions - output
gradient_sum_squares = 0 # initialize the gradient sum of squares
for i in range(len(weights)): # loop over each weight
# compute the derivative for weight[i]:
derivative = self.feature_derivative(errors, feature_matrix[:, i])
# add the squared value of the derivative to the gradient sum of squares (for assessing convergence)
gradient_sum_squares = gradient_sum_squares + (derivative * derivative)
# subtract the step size times the derivative from the current weight
weights[i] = weights[i] - step_size * derivative
# compute the square-root of the gradient sum of squares to get the gradient matnigude:
gradient_magnitude = sqrt(gradient_sum_squares)
if gradient_magnitude < tolerance:
converged = True
return(weights)
def get_rmse(self, predictions, output):
"""
calculate RMSE
"""
error = predictions - output
val_err = np.dot(error,error)
val_err = np.sqrt(val_err/len(output))
return val_err