-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
38 changed files
with
3,111 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
## 梯度下降法 | ||
|
||
梯度下降法是一种基于搜索的最优化方法,作用:最小化一个损失函数。 | ||
|
||
梯度上升法: 最大化一个效用函数。 | ||
|
||
![](images/ml_11.png) | ||
|
||
移动步长 : -η | ||
|
||
- η 称为学习率 | ||
- η的取值影响获得最优解的速度 | ||
- η的取值不合适,甚至得不到最优解 | ||
- η是梯度下降法的一个超参数 | ||
|
||
|
||
调参 ,就是调η 。 | ||
|
||
- 局部最优解、全局最优解。 | ||
- 并不是所有函数都有唯一的极值点 (一会下降一会上升再下降上升等) | ||
- 解决方案 | ||
- 多次运行,随机初始化点 | ||
- 梯度下降法的初始点也是个超参数。 | ||
|
||
|
||
线性回归法的损失函数具有唯一的最优解。 | ||
|
||
|
||
|
||
|
||
![](images/ml_13.png) | ||
![](images/ml_14.png) | ||
|
||
|
||
#### 模拟梯度下降法 | ||
[代码](gradientDescent/01-GradientDescentSimulations/01-GradientDescentSimulations.ipynb) | ||
|
||
|
||
|
||
![](images/ml_15.png) | ||
|
||
|
||
![](images/ml_16.png) | ||
|
||
![](images/ml_12.png) | ||
|
||
### 线性回归中的梯度下降法 | ||
|
||
|
||
|
||
![](images/ml_17.png) | ||
|
||
|
||
![](images/ml_18.png) | ||
ps:**给 theta0 凑了个x0, 下图xb(i) * theata 是 简化,向量化方式** | ||
|
||
|
||
|
||
这么看m如果越大,损失就越大。 在梯度中是不合理的。 我们统一除以m,排除这个因素 | ||
|
||
|
||
![](images/ml_19.png) | ||
|
||
|
||
#### 梯度下降法实现 | ||
|
||
```python | ||
# 求均方差,mes , theta 是一个数组,X_b 是一个矩阵 ,n行i列, n=len(X_b) | ||
def J(theta, X_b, y): | ||
try: | ||
return np.sum((y - X_b.dot(theta))**2) / len(X_b) | ||
except: | ||
# 异常则给 浮点数中的最大值 | ||
return float('inf') | ||
|
||
# 求导 (也就是损失值、梯度), 就是上图中的三角形J(theta) | ||
def dJ(theta, X_b, y): | ||
res = np.empty(len(theta)) | ||
# 上图第0行有点特殊 | ||
res[0] = np.sum(X_b.dot(theta) - y) | ||
for i in range(1, len(theta)): | ||
# X_b[:,i] 代表, 第几列 | ||
res[i] = (X_b.dot(theta) - y).dot(X_b[:,i]) | ||
return res * 2 / len(X_b) | ||
|
||
|
||
# 求梯度下降中的 最优theta | ||
# initial_theta 初始化theta | ||
# n_iters 迭代次数控制 | ||
# eta 学习率 | ||
# X_b 矩阵 | ||
# y 结果值 | ||
# epsilon 接受的误差值 | ||
def gradient_descent(X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8): | ||
|
||
theta = initial_theta | ||
cur_iter = 0 | ||
|
||
while cur_iter < n_iters: | ||
# 下降梯度 | ||
gradient = dJ(theta, X_b, y) | ||
last_theta = theta | ||
theta = theta - eta * gradient | ||
if(abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon): | ||
break | ||
|
||
cur_iter += 1 | ||
|
||
return theta | ||
``` | ||
|
||
|
||
|
||
|
489 changes: 489 additions & 0 deletions
489
...earning/gradientDescent/01-GradientDescentSimulations/01-GradientDescentSimulations.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
413 changes: 413 additions & 0 deletions
413
...mentGradientDescentInLinearRegression/02-ImplementGradientDescentInLinearRegression.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
88 changes: 88 additions & 0 deletions
88
.../gradientDescent/02-ImplementGradientDescentInLinearRegression/playML/LinearRegression.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import numpy as np | ||
from .metrics import r2_score | ||
|
||
class LinearRegression: | ||
|
||
def __init__(self): | ||
"""初始化Linear Regression模型""" | ||
self.coef_ = None | ||
self.intercept_ = None | ||
self._theta = None | ||
|
||
def fit_normal(self, X_train, y_train): | ||
"""根据训练数据集X_train, y_train训练Linear Regression模型""" | ||
assert X_train.shape[0] == y_train.shape[0], \ | ||
"the size of X_train must be equal to the size of y_train" | ||
|
||
X_b = np.hstack([np.ones((len(X_train), 1)), X_train]) | ||
self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train) | ||
|
||
self.intercept_ = self._theta[0] | ||
self.coef_ = self._theta[1:] | ||
|
||
return self | ||
|
||
def fit_gd(self, X_train, y_train, eta=0.01, n_iters=1e4): | ||
"""根据训练数据集X_train, y_train, 使用梯度下降法训练Linear Regression模型""" | ||
assert X_train.shape[0] == y_train.shape[0], \ | ||
"the size of X_train must be equal to the size of y_train" | ||
|
||
def J(theta, X_b, y): | ||
try: | ||
return np.sum((y - X_b.dot(theta)) ** 2) / len(y) | ||
except: | ||
return float('inf') | ||
|
||
def dJ(theta, X_b, y): | ||
res = np.empty(len(theta)) | ||
res[0] = np.sum(X_b.dot(theta) - y) | ||
for i in range(1, len(theta)): | ||
res[i] = (X_b.dot(theta) - y).dot(X_b[:, i]) | ||
return res * 2 / len(X_b) | ||
|
||
def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8): | ||
|
||
theta = initial_theta | ||
cur_iter = 0 | ||
|
||
while cur_iter < n_iters: | ||
gradient = dJ(theta, X_b, y) | ||
last_theta = theta | ||
theta = theta - eta * gradient | ||
if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon): | ||
break | ||
|
||
cur_iter += 1 | ||
|
||
return theta | ||
|
||
# X_train追加截距 | ||
X_b = np.hstack([np.ones((len(X_train), 1)), X_train]) | ||
# 每个特征默认都给0的theta值 | ||
initial_theta = np.zeros(X_b.shape[1]) | ||
self._theta = gradient_descent(X_b, y_train, initial_theta, eta, n_iters) | ||
# 截距 | ||
self.intercept_ = self._theta[0] | ||
# 所有参数的 因子 | ||
self.coef_ = self._theta[1:] | ||
|
||
return self | ||
|
||
def predict(self, X_predict): | ||
"""给定待预测数据集X_predict,返回表示X_predict的结果向量""" | ||
assert self.intercept_ is not None and self.coef_ is not None, \ | ||
"must fit before predict!" | ||
assert X_predict.shape[1] == len(self.coef_), \ | ||
"the feature number of X_predict must be equal to X_train" | ||
|
||
X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict]) | ||
return X_b.dot(self._theta) | ||
|
||
def score(self, X_test, y_test): | ||
"""根据测试数据集 X_test 和 y_test 确定当前模型的准确度""" | ||
|
||
y_predict = self.predict(X_test) | ||
return r2_score(y_test, y_predict) | ||
|
||
def __repr__(self): | ||
return "LinearRegression()" |
47 changes: 47 additions & 0 deletions
47
...entDescent/02-ImplementGradientDescentInLinearRegression/playML/SimpleLinearRegression.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import numpy as np | ||
from .metrics import r2_score | ||
|
||
|
||
class SimpleLinearRegression: | ||
|
||
def __init__(self): | ||
"""初始化Simple Linear Regression模型""" | ||
self.a_ = None | ||
self.b_ = None | ||
|
||
def fit(self, x_train, y_train): | ||
"""根据训练数据集x_train, y_train训练Simple Linear Regression模型""" | ||
assert x_train.ndim == 1, \ | ||
"Simple Linear Regressor can only solve single feature training data." | ||
assert len(x_train) == len(y_train), \ | ||
"the size of x_train must be equal to the size of y_train" | ||
|
||
x_mean = np.mean(x_train) | ||
y_mean = np.mean(y_train) | ||
|
||
self.a_ = (x_train - x_mean).dot(y_train - y_mean) / (x_train - x_mean).dot(x_train - x_mean) | ||
self.b_ = y_mean - self.a_ * x_mean | ||
|
||
return self | ||
|
||
def predict(self, x_predict): | ||
"""给定待预测数据集x_predict,返回表示x_predict的结果向量""" | ||
assert x_predict.ndim == 1, \ | ||
"Simple Linear Regressor can only solve single feature training data." | ||
assert self.a_ is not None and self.b_ is not None, \ | ||
"must fit before predict!" | ||
|
||
return np.array([self._predict(x) for x in x_predict]) | ||
|
||
def _predict(self, x_single): | ||
"""给定单个待预测数据x,返回x的预测结果值""" | ||
return self.a_ * x_single + self.b_ | ||
|
||
def score(self, x_test, y_test): | ||
"""根据测试数据集 x_test 和 y_test 确定当前模型的准确度""" | ||
|
||
y_predict = self.predict(x_test) | ||
return r2_score(y_test, y_predict) | ||
|
||
def __repr__(self): | ||
return "SimpleLinearRegression()" |
Empty file.
59 changes: 59 additions & 0 deletions
59
machinelearning/gradientDescent/02-ImplementGradientDescentInLinearRegression/playML/kNN.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import numpy as np | ||
from math import sqrt | ||
from collections import Counter | ||
from .metrics import accuracy_score | ||
|
||
class KNNClassifier: | ||
|
||
def __init__(self, k): | ||
"""初始化kNN分类器""" | ||
assert k >= 1, "k must be valid" | ||
self.k = k | ||
self._X_train = None | ||
self._y_train = None | ||
|
||
def fit(self, X_train, y_train): | ||
"""根据训练数据集X_train和y_train训练kNN分类器""" | ||
assert X_train.shape[0] == y_train.shape[0], \ | ||
"the size of X_train must be equal to the size of y_train" | ||
assert self.k <= X_train.shape[0], \ | ||
"the size of X_train must be at least k." | ||
|
||
self._X_train = X_train | ||
self._y_train = y_train | ||
return self | ||
|
||
def predict(self, X_predict): | ||
"""给定待预测数据集X_predict,返回表示X_predict的结果向量""" | ||
assert self._X_train is not None and self._y_train is not None, \ | ||
"must fit before predict!" | ||
assert X_predict.shape[1] == self._X_train.shape[1], \ | ||
"the feature number of X_predict must be equal to X_train" | ||
|
||
y_predict = [self._predict(x) for x in X_predict] | ||
return np.array(y_predict) | ||
|
||
def _predict(self, x): | ||
"""给定单个待预测数据x,返回x的预测结果值""" | ||
assert x.shape[0] == self._X_train.shape[1], \ | ||
"the feature number of x must be equal to X_train" | ||
|
||
distances = [sqrt(np.sum((x_train - x) ** 2)) | ||
for x_train in self._X_train] | ||
nearest = np.argsort(distances) | ||
|
||
topK_y = [self._y_train[i] for i in nearest[:self.k]] | ||
votes = Counter(topK_y) | ||
|
||
return votes.most_common(1)[0][0] | ||
|
||
def score(self, X_test, y_test): | ||
"""根据测试数据集 X_test 和 y_test 确定当前模型的准确度""" | ||
|
||
y_predict = self.predict(X_test) | ||
return accuracy_score(y_test, y_predict) | ||
|
||
def __repr__(self): | ||
return "KNN(k=%d)" % self.k | ||
|
||
|
38 changes: 38 additions & 0 deletions
38
...elearning/gradientDescent/02-ImplementGradientDescentInLinearRegression/playML/metrics.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import numpy as np | ||
from math import sqrt | ||
|
||
|
||
def accuracy_score(y_true, y_predict): | ||
"""计算y_true和y_predict之间的准确率""" | ||
assert len(y_true) == len(y_predict), \ | ||
"the size of y_true must be equal to the size of y_predict" | ||
|
||
return np.sum(y_true == y_predict) / len(y_true) | ||
|
||
|
||
def mean_squared_error(y_true, y_predict): | ||
"""计算y_true和y_predict之间的MSE""" | ||
assert len(y_true) == len(y_predict), \ | ||
"the size of y_true must be equal to the size of y_predict" | ||
|
||
return np.sum((y_true - y_predict)**2) / len(y_true) | ||
|
||
|
||
def root_mean_squared_error(y_true, y_predict): | ||
"""计算y_true和y_predict之间的RMSE""" | ||
|
||
return sqrt(mean_squared_error(y_true, y_predict)) | ||
|
||
|
||
def mean_absolute_error(y_true, y_predict): | ||
"""计算y_true和y_predict之间的MAE""" | ||
assert len(y_true) == len(y_predict), \ | ||
"the size of y_true must be equal to the size of y_predict" | ||
|
||
return np.sum(np.absolute(y_true - y_predict)) / len(y_true) | ||
|
||
|
||
def r2_score(y_true, y_predict): | ||
"""计算y_true和y_predict之间的R Square""" | ||
|
||
return 1 - mean_squared_error(y_true, y_predict)/np.var(y_true) |
26 changes: 26 additions & 0 deletions
26
...g/gradientDescent/02-ImplementGradientDescentInLinearRegression/playML/model_selection.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import numpy as np | ||
|
||
|
||
def train_test_split(X, y, test_ratio=0.2, seed=None): | ||
"""将数据 X 和 y 按照test_ratio分割成X_train, X_test, y_train, y_test""" | ||
assert X.shape[0] == y.shape[0], \ | ||
"the size of X must be equal to the size of y" | ||
assert 0.0 <= test_ratio <= 1.0, \ | ||
"test_ration must be valid" | ||
|
||
if seed: | ||
np.random.seed(seed) | ||
|
||
shuffled_indexes = np.random.permutation(len(X)) | ||
|
||
test_size = int(len(X) * test_ratio) | ||
test_indexes = shuffled_indexes[:test_size] | ||
train_indexes = shuffled_indexes[test_size:] | ||
|
||
X_train = X[train_indexes] | ||
y_train = y[train_indexes] | ||
|
||
X_test = X[test_indexes] | ||
y_test = y[test_indexes] | ||
|
||
return X_train, X_test, y_train, y_test |
Oops, something went wrong.