-
Notifications
You must be signed in to change notification settings - Fork 1
/
lm_vs_dnn.py
90 lines (68 loc) · 2.35 KB
/
lm_vs_dnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import shutil
import tensorflow as tf
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
boston_dataset = load_boston()
boston = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names)
boston['MEDV'] = boston_dataset.target
boston.head()
X = pd.DataFrame(np.c_[boston['LSTAT'], boston['RM']], columns = ['LSTAT','RM'])
Y = boston['MEDV']
FEATURES = ['LSTAT', 'RM']
LABEL = 'MEDV'
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)
df_train = pd.concat([X_train, Y_train], axis=1)
df_eval = pd.concat([X_test, Y_test], axis=1)
def train_input_fn(df, num_epochs):
return tf.estimator.inputs.pandas_input_fn(
x = df,
y = df[LABEL],
batch_size = 128,
num_epochs = num_epochs,
shuffle = True,
queue_capacity = 1000
)
def eval_input_fn(df):
return tf.estimator.inputs.pandas_input_fn(
x = df,
y = df[LABEL],
batch_size = 128,
shuffle = False,
queue_capacity = 1000
)
# def prediction_input_fn(df):
# return tf.estimator.inputs.pandas_input_fn(
# x = df,
# y = None,
# batch_size = 128,
# shuffle = False,
# queue_capacity = 1000
# )
def make_feature_cols():
input_columns = [tf.feature_column.numeric_column(k) for k in FEATURES]
return input_columns
def print_rmse(model, df):
metrics = model.evaluate(input_fn = eval_input_fn(df))
print('RMSE on dataset = {}'.format(np.sqrt(metrics['average_loss'])))
tf.logging.set_verbosity(tf.logging.INFO)
OUTDIR = 'boston_trained'
plt.hist(df_train.MEDV)
plt.show()
tf.logging.set_verbosity(tf.logging.INFO)
shutil.rmtree(OUTDIR, ignore_errors = True)
linear_model = tf.estimator.LinearRegressor(
feature_columns = make_feature_cols(), model_dir = OUTDIR)
linear_model.train(input_fn = train_input_fn(df_train, num_epochs = 10))
# predictions = model.predict(input_fn = prediction_input_fn(df_test))
print_rmse(linear_model, df_eval)
# 12.391966819763184
tf.logging.set_verbosity(tf.logging.INFO)
shutil.rmtree(OUTDIR, ignore_errors = True)
dnn_model = tf.estimator.DNNRegressor(hidden_units = [8, 8, 8],
feature_columns = make_feature_cols(), model_dir = OUTDIR)
dnn_model.train(input_fn = train_input_fn(df_train, num_epochs = 100))
print_rmse(dnn_model, df_eval)
# 3.889941930770874