-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTrainAndExportCNN.py
184 lines (132 loc) · 6.16 KB
/
TrainAndExportCNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
EXPORT_DIR = './modelForImage'
TRAIN_STEPS = 20000
PRINT_TRAIN_FREQ = 250
MODEL_NAME = 'model_graph_28.pb'
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
weights = {
# 2x2 conv, 1 input, 64 outputs
'wc1': weight_variable([2, 2, 1, 64]),
# 3x3 conv, 64 inputs, 64 outputs
'wc2': weight_variable([3, 3, 64, 64]),
# 5 x 5 conv, 64 input, 10 output (through stride = 2)
'wc3': weight_variable([5, 5, 64, 10]),
# fully connected, 11 * 11 * 10 inputs, 1024 outputs
'wf1': weight_variable([11 * 11 * 10, 100]),
# 1024 inputs, 10 outputs (class prediction)
'out': weight_variable([100, 10])
}
biases = {
'bc1': bias_variable([64]),
'bc2': bias_variable([64]),
'bc3': weight_variable([10]),
'bf1': weight_variable([100]),
'bout': weight_variable([10])
}
# Note this does not add zero-padding (padding = 'VALID') (padding = 'SAME' will output same dimension as lastLayer)
def getConvLayer(lastLayer, weight, bias, stride=1):
conv1 = tf.nn.conv2d(lastLayer, weight, strides=[1, stride, stride, 1], padding='VALID')
return tf.nn.relu(conv1 + bias)
# fully connected with relu
def getFullyConnectedLayer(lastLayer, input, output, bias):
W_fc1 = weight_variable([input, output])
b_fc1 = bias_variable([output])
return tf.nn.relu(tf.matmul(lastLayer, W_fc1) + b_fc1)
def getFullyConnectedLayer(lastLayer, inputOutputWeight, bias):
return tf.nn.relu(tf.matmul(lastLayer, inputOutputWeight) + bias)
# used for printing accuracy, sets the dropout to 1 (no droput)
def printAccuracy(accuracy, step, inputPlaceholder, correctLabelPlaceholder, inputs, correctLabels, keep_prob):
train_accuracy = accuracy.eval(
feed_dict={inputPlaceholder: inputs, correctLabelPlaceholder: correctLabels, keep_prob: 1.0})
print('step %d, training accuracy %g' % (step, train_accuracy))
def printShape(tensor):
print(tensor.shape)
def exportGraph(g, WC1, BC1, WC2, BC2, WC3, BC3, WF1, BF1, W_OUT, B_OUT):
with g.as_default():
#//x_2 = tf.placeholder("float", shape=[None, 784], name="input")
x_input = tf.placeholder("float", shape=[None, 28, 28, 1], name="inputImage")
WC1 = tf.constant(WC1, name="WC1")
BC1 = tf.constant(BC1, name="BC1")
#x_image2 = tf.reshape(x_2, [-1, 28, 28, 1])
CONV1 = getConvLayer(x_input, WC1, BC1)
WC2 = tf.constant(WC2, name="WC2")
BC2 = tf.constant(BC2, name="BC2")
CONV2 = getConvLayer(CONV1, WC2, BC2)
WC3 = tf.constant(WC3, name="WC3")
BC3 = tf.constant(BC3, name="BC3")
CONV3 = getConvLayer(CONV2, WC3, BC3, 2)
CONV3_FLAT = tf.reshape(CONV3, [-1, 11 * 11 * 10])
WF1 = tf.constant(WF1, name="WF1")
BF1 = tf.constant(BF1, name="BF1")
FC1 = getFullyConnectedLayer(CONV3_FLAT, WF1, BF1)
W_OUT = tf.constant(W_OUT, name="W_OUT")
B_OUT = tf.constant(B_OUT, name="B_OUT")
OUTPUT = tf.nn.softmax(tf.matmul(FC1, W_OUT) + B_OUT, name="output")
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
graph_def = g.as_graph_def()
tf.train.write_graph(graph_def, EXPORT_DIR, MODEL_NAME, as_text=False)
# Load the data from the mnist
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# 28 x 28 mnist images = 784 row
x = tf.placeholder(tf.float32, shape=[None, 784])
# reshape 784 back to 28 by 28
# [? , width, height, # color channels]
x_image = tf.reshape(x, [-1, 28, 28, 1])
printShape(x_image)
# 10 hot vectors (0 - 9)
yCorrectLabels = tf.placeholder(tf.float32, shape=[None, 10])
conv1 = getConvLayer(x_image, weights['wc1'], biases['bc1']) # 28 x 28 x 1 => 27 x 27 x 64
printShape(conv1)
conv2 = getConvLayer(conv1, weights['wc2'], biases['bc2']) # 27 x 27 x 64 => 25 x 25 x 64
printShape(conv2)
# conv with stride of 2 to reduce size (instead of pooling)
conv3 = getConvLayer(conv2, weights['wc3'], biases['bc3'], 2) # 25 x 25 x 64 => 11 x 11 x 10
printShape(conv3)
# flatten conv3 to connect to the fully connected layer next
conv3_flat = tf.reshape(conv3, [-1, 11 * 11 * 10]) # 11 x 11 x 10 => 1210
printShape(conv3_flat)
# Fully Connected Layer 1
fully_connected1 = getFullyConnectedLayer(conv3_flat, weights['wf1'], biases['bf1']) # 1210 => 1024
printShape(fully_connected1)
# used for dropout later, hold a ref so we can remove it during testing
keep_prob = tf.placeholder(tf.float32)
fully_connected_drop1 = tf.nn.dropout(fully_connected1, keep_prob)
print("Dropout")
# fully connected layer 2
fully_connected2 = getFullyConnectedLayer(fully_connected_drop1, weights['out'], biases['bout']) # 1024 => 10
printShape(fully_connected2)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=yCorrectLabels, logits=fully_connected2))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(fully_connected2, 1), tf.argmax(yCorrectLabels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
for i in range(TRAIN_STEPS):
batch = mnist.train.next_batch(50)
if i % PRINT_TRAIN_FREQ == 0:
printAccuracy(accuracy, i, x, yCorrectLabels, batch[0], batch[1], keep_prob)
train_step.run(feed_dict={x: batch[0], yCorrectLabels: batch[1], keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(
feed_dict={x: mnist.test.images, yCorrectLabels: mnist.test.labels, keep_prob: 1.0}))
WC1 = weights['wc1'].eval(sess)
BC1 = biases['bc1'].eval(sess)
WC2 = weights['wc2'].eval(sess)
BC2 = biases['bc2'].eval(sess)
WC3 = weights['wc3'].eval(sess)
BC3 = biases['bc3'].eval(sess)
WF1 = weights['wf1'].eval(sess)
BF1 = biases['bf1'].eval(sess)
W_OUT = weights['out'].eval(sess)
B_OUT = biases['bout'].eval(sess)
# Create new graph for exporting
g = tf.Graph()
exportGraph(g, WC1, BC1, WC2, BC2, WC3, BC3, WF1, BF1, W_OUT, B_OUT)