Skip to content

Commit

Permalink
线性回归中的梯度下降法实现
Browse files Browse the repository at this point in the history
  • Loading branch information
cr-mao committed Aug 25, 2024
1 parent bfdc867 commit 2aaeed9
Show file tree
Hide file tree
Showing 38 changed files with 3,111 additions and 2 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,17 @@ jupyter notebook ,numpy,pandas,matplotlib
- [数据归一化和标准化](machinelearning/knn/07-FeatureScaling/FeatureScaling.ipynb)
- [sklearn中的标准化](machinelearning/knn/08-ScalerinScikitLearn/ScalerInScikitLearn.ipynb)

- 线性回归
- 线性回归法
- [线性回归理论、公式](machinelearning/02线性回归.md)
- [简单线性回归实现](machinelearning/linearRegression/01-SimpleLinearRegressionImplementation/SimpleLinearRegressionImplementation.ipynb)
- [向量化运算效率高](machinelearning/linearRegression/02-Vectorization/Vectorization.ipynb)
- [衡量回归算法的标准,MSE、MAE](machinelearning/linearRegression/03-RegressionMetricsMSE-vs-MAE/RegressionMetricsMSE-vs-MAE.ipynb)
- [最好的衡量线性回归法的指标:R Squared ](machinelearning/linearRegression/04-R-Squared/R-Squared.ipynb)
- [正规方程法实现多元线性回归](machinelearning/linearRegression/05-OurLinearRegression/OurLinearRegression.ipynb)
- [sklearn中解决线性回归](machinelearning/linearRegression/06-RegressionInScikitLlearn/RegressionInScikitlearn.ipynb)

- 梯度下降法
- [模拟实现梯度下降法(单变量)](machinelearning/gradientDescent/01-GradientDescentSimulations/01-GradientDescentSimulations.ipynb)
- [在线性回归中实现梯度下降法](machinelearning/gradientDescent/02-ImplementGradientDescentInLinearRegression/02-ImplementGradientDescentInLinearRegression.ipynb)



Expand Down
114 changes: 114 additions & 0 deletions machinelearning/03梯度下降法.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
## 梯度下降法

梯度下降法是一种基于搜索的最优化方法,作用:最小化一个损失函数。

梯度上升法: 最大化一个效用函数。

![](images/ml_11.png)

移动步长 : -η

- η 称为学习率
- η的取值影响获得最优解的速度
- η的取值不合适,甚至得不到最优解
- η是梯度下降法的一个超参数


调参 ,就是调η 。

- 局部最优解、全局最优解。
- 并不是所有函数都有唯一的极值点 (一会下降一会上升再下降上升等)
- 解决方案
- 多次运行,随机初始化点
- 梯度下降法的初始点也是个超参数。


线性回归法的损失函数具有唯一的最优解。




![](images/ml_13.png)
![](images/ml_14.png)


#### 模拟梯度下降法
[代码](gradientDescent/01-GradientDescentSimulations/01-GradientDescentSimulations.ipynb)



![](images/ml_15.png)


![](images/ml_16.png)

![](images/ml_12.png)

### 线性回归中的梯度下降法



![](images/ml_17.png)


![](images/ml_18.png)
ps:**给 theta0 凑了个x0, 下图xb(i) * theata 是 简化,向量化方式**



这么看m如果越大,损失就越大。 在梯度中是不合理的。 我们统一除以m,排除这个因素


![](images/ml_19.png)


#### 梯度下降法实现

```python
# 求均方差,mes , theta 是一个数组,X_b 是一个矩阵 ,n行i列, n=len(X_b)
def J(theta, X_b, y):
try:
return np.sum((y - X_b.dot(theta))**2) / len(X_b)
except:
# 异常则给 浮点数中的最大值
return float('inf')

# 求导 (也就是损失值、梯度), 就是上图中的三角形J(theta)
def dJ(theta, X_b, y):
res = np.empty(len(theta))
# 上图第0行有点特殊
res[0] = np.sum(X_b.dot(theta) - y)
for i in range(1, len(theta)):
# X_b[:,i] 代表, 第几列
res[i] = (X_b.dot(theta) - y).dot(X_b[:,i])
return res * 2 / len(X_b)


# 求梯度下降中的 最优theta
# initial_theta 初始化theta
# n_iters 迭代次数控制
# eta 学习率
# X_b 矩阵
# y 结果值
# epsilon 接受的误差值
def gradient_descent(X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):

theta = initial_theta
cur_iter = 0

while cur_iter < n_iters:
# 下降梯度
gradient = dJ(theta, X_b, y)
last_theta = theta
theta = theta - eta * gradient
if(abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
break

cur_iter += 1

return theta
```




Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import numpy as np
from .metrics import r2_score

class LinearRegression:

def __init__(self):
"""初始化Linear Regression模型"""
self.coef_ = None
self.intercept_ = None
self._theta = None

def fit_normal(self, X_train, y_train):
"""根据训练数据集X_train, y_train训练Linear Regression模型"""
assert X_train.shape[0] == y_train.shape[0], \
"the size of X_train must be equal to the size of y_train"

X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)

self.intercept_ = self._theta[0]
self.coef_ = self._theta[1:]

return self

def fit_gd(self, X_train, y_train, eta=0.01, n_iters=1e4):
"""根据训练数据集X_train, y_train, 使用梯度下降法训练Linear Regression模型"""
assert X_train.shape[0] == y_train.shape[0], \
"the size of X_train must be equal to the size of y_train"

def J(theta, X_b, y):
try:
return np.sum((y - X_b.dot(theta)) ** 2) / len(y)
except:
return float('inf')

def dJ(theta, X_b, y):
res = np.empty(len(theta))
res[0] = np.sum(X_b.dot(theta) - y)
for i in range(1, len(theta)):
res[i] = (X_b.dot(theta) - y).dot(X_b[:, i])
return res * 2 / len(X_b)

def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8):

theta = initial_theta
cur_iter = 0

while cur_iter < n_iters:
gradient = dJ(theta, X_b, y)
last_theta = theta
theta = theta - eta * gradient
if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
break

cur_iter += 1

return theta

# X_train追加截距
X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
# 每个特征默认都给0的theta值
initial_theta = np.zeros(X_b.shape[1])
self._theta = gradient_descent(X_b, y_train, initial_theta, eta, n_iters)
# 截距
self.intercept_ = self._theta[0]
# 所有参数的 因子
self.coef_ = self._theta[1:]

return self

def predict(self, X_predict):
"""给定待预测数据集X_predict,返回表示X_predict的结果向量"""
assert self.intercept_ is not None and self.coef_ is not None, \
"must fit before predict!"
assert X_predict.shape[1] == len(self.coef_), \
"the feature number of X_predict must be equal to X_train"

X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
return X_b.dot(self._theta)

def score(self, X_test, y_test):
"""根据测试数据集 X_test 和 y_test 确定当前模型的准确度"""

y_predict = self.predict(X_test)
return r2_score(y_test, y_predict)

def __repr__(self):
return "LinearRegression()"
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import numpy as np
from .metrics import r2_score


class SimpleLinearRegression:

def __init__(self):
"""初始化Simple Linear Regression模型"""
self.a_ = None
self.b_ = None

def fit(self, x_train, y_train):
"""根据训练数据集x_train, y_train训练Simple Linear Regression模型"""
assert x_train.ndim == 1, \
"Simple Linear Regressor can only solve single feature training data."
assert len(x_train) == len(y_train), \
"the size of x_train must be equal to the size of y_train"

x_mean = np.mean(x_train)
y_mean = np.mean(y_train)

self.a_ = (x_train - x_mean).dot(y_train - y_mean) / (x_train - x_mean).dot(x_train - x_mean)
self.b_ = y_mean - self.a_ * x_mean

return self

def predict(self, x_predict):
"""给定待预测数据集x_predict,返回表示x_predict的结果向量"""
assert x_predict.ndim == 1, \
"Simple Linear Regressor can only solve single feature training data."
assert self.a_ is not None and self.b_ is not None, \
"must fit before predict!"

return np.array([self._predict(x) for x in x_predict])

def _predict(self, x_single):
"""给定单个待预测数据x,返回x的预测结果值"""
return self.a_ * x_single + self.b_

def score(self, x_test, y_test):
"""根据测试数据集 x_test 和 y_test 确定当前模型的准确度"""

y_predict = self.predict(x_test)
return r2_score(y_test, y_predict)

def __repr__(self):
return "SimpleLinearRegression()"
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np
from math import sqrt
from collections import Counter
from .metrics import accuracy_score

class KNNClassifier:

def __init__(self, k):
"""初始化kNN分类器"""
assert k >= 1, "k must be valid"
self.k = k
self._X_train = None
self._y_train = None

def fit(self, X_train, y_train):
"""根据训练数据集X_train和y_train训练kNN分类器"""
assert X_train.shape[0] == y_train.shape[0], \
"the size of X_train must be equal to the size of y_train"
assert self.k <= X_train.shape[0], \
"the size of X_train must be at least k."

self._X_train = X_train
self._y_train = y_train
return self

def predict(self, X_predict):
"""给定待预测数据集X_predict,返回表示X_predict的结果向量"""
assert self._X_train is not None and self._y_train is not None, \
"must fit before predict!"
assert X_predict.shape[1] == self._X_train.shape[1], \
"the feature number of X_predict must be equal to X_train"

y_predict = [self._predict(x) for x in X_predict]
return np.array(y_predict)

def _predict(self, x):
"""给定单个待预测数据x,返回x的预测结果值"""
assert x.shape[0] == self._X_train.shape[1], \
"the feature number of x must be equal to X_train"

distances = [sqrt(np.sum((x_train - x) ** 2))
for x_train in self._X_train]
nearest = np.argsort(distances)

topK_y = [self._y_train[i] for i in nearest[:self.k]]
votes = Counter(topK_y)

return votes.most_common(1)[0][0]

def score(self, X_test, y_test):
"""根据测试数据集 X_test 和 y_test 确定当前模型的准确度"""

y_predict = self.predict(X_test)
return accuracy_score(y_test, y_predict)

def __repr__(self):
return "KNN(k=%d)" % self.k


Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import numpy as np
from math import sqrt


def accuracy_score(y_true, y_predict):
"""计算y_true和y_predict之间的准确率"""
assert len(y_true) == len(y_predict), \
"the size of y_true must be equal to the size of y_predict"

return np.sum(y_true == y_predict) / len(y_true)


def mean_squared_error(y_true, y_predict):
"""计算y_true和y_predict之间的MSE"""
assert len(y_true) == len(y_predict), \
"the size of y_true must be equal to the size of y_predict"

return np.sum((y_true - y_predict)**2) / len(y_true)


def root_mean_squared_error(y_true, y_predict):
"""计算y_true和y_predict之间的RMSE"""

return sqrt(mean_squared_error(y_true, y_predict))


def mean_absolute_error(y_true, y_predict):
"""计算y_true和y_predict之间的MAE"""
assert len(y_true) == len(y_predict), \
"the size of y_true must be equal to the size of y_predict"

return np.sum(np.absolute(y_true - y_predict)) / len(y_true)


def r2_score(y_true, y_predict):
"""计算y_true和y_predict之间的R Square"""

return 1 - mean_squared_error(y_true, y_predict)/np.var(y_true)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import numpy as np


def train_test_split(X, y, test_ratio=0.2, seed=None):
"""将数据 X 和 y 按照test_ratio分割成X_train, X_test, y_train, y_test"""
assert X.shape[0] == y.shape[0], \
"the size of X must be equal to the size of y"
assert 0.0 <= test_ratio <= 1.0, \
"test_ration must be valid"

if seed:
np.random.seed(seed)

shuffled_indexes = np.random.permutation(len(X))

test_size = int(len(X) * test_ratio)
test_indexes = shuffled_indexes[:test_size]
train_indexes = shuffled_indexes[test_size:]

X_train = X[train_indexes]
y_train = y[train_indexes]

X_test = X[test_indexes]
y_test = y[test_indexes]

return X_train, X_test, y_train, y_test
Loading

0 comments on commit 2aaeed9

Please sign in to comment.