-
Notifications
You must be signed in to change notification settings - Fork 1
/
sgnet.py
174 lines (137 loc) · 5.38 KB
/
sgnet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import tensorflow as tf
import numpy as np
from scipy.misc import imresize
from utils import variable_on_cpu, variable_with_weight_decay
class SGNet:
# Define class level optimizer
lr = 1e-6
#optimizer = tf.train.GradientDescentOptimizer(lr)
def __init__(self, scope, vgg_conv_shape):
"""
Base calss for SGNet, defines the network structure
"""
self.scope = scope
self.params = {
'num_fms': 200, # number of selected featrue maps, inputs of the network
'wd': 0.5, # L2 regulization coefficient
}
self.variables = []
with tf.variable_scope(scope) as scope:
self.pre_M = self._build_graph(vgg_conv_shape)
def _build_graph(self, vgg_conv_shape):
"""
Define Structure.
The first additional convolutional
layer has convolutional kernels of size 9×9 and outputs
36 feature maps as the input to the next layer. The second
additional convolutional layer has kernels of size 5 × 5
and outputs the foreground heat map of the input image.
ReLU is chosen as the nonlinearity for these two layers.
Args:
vgg_conv_shape:
Returns:
conv2:
"""
self.variables = []
self.kernel_weights = []
out_num = vgg_conv_shape[-1]
self.input_maps = tf.placeholder(tf.float32, shape=vgg_conv_shape,
name='selected_maps')
#assert vgg_conv_shape[-1] == self.params['num_fms']
with tf.name_scope('conv1') as scope:
kernel = tf.Variable(tf.truncated_normal([9,9,out_num,36], dtype=tf.float32,stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.input_maps, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[36], dtype=tf.float32),trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(out, name=scope)
self.variables += [kernel, biases]
self.kernel_weights += [kernel]
print(conv1.get_shape().as_list(), 'conv1 shape')
with tf.name_scope('conv2') as scope:
kernel = tf.Variable(tf.truncated_normal([5,5,36,1], dtype=tf.float32, stddev=1e-1), name='weights')
conv = tf.nn.conv2d(conv1, kernel , [1, 1, 1, 1], padding='SAME')
print(conv.get_shape().as_list(), 'conv shape')
biases = tf.Variable(tf.constant(0.0, shape=[1], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(out, name=scope)
self.variables += [kernel, biases]
self.kernel_weights += [kernel]
print('Shape of the out put heat map for %s is %s'%(self.scope, conv2.get_shape().as_list()))
return conv2
def loss(self, gt_M):
"""Returns Losses for the current network.
Args:
gt_M: Tensor, ground truth heat map.
Returns:
Loss:
"""
# Assertion
assert isinstance(gt_M, np.ndarray)
if len(gt_M.shape) == 2:
# gt_M is a 2D mask
gt_M = tf.constant(gt_M.reshape((1,gt_M.shape[0], gt_M.shape[1], 1)), dtype=tf.float32)
elif len(gt_M.shape) == 4:
# gt_M is SGNet.pre_M
gt_M = tf.constant(gt_M, dtype=tf.float32)
else:
print('Unhandled input shape: {0}'.format(gt_M.shape))
with tf.name_scope(self.scope) as scope:
beta = tf.constant(self.params['wd'], name='beta')
loss_rms = tf.reduce_mean(tf.squared_difference(gt_M, self.pre_M))
loss_wd = [tf.reduce_mean(tf.square(w)) for w in self.kernel_weights]
loss_wd = beta * tf.add_n(loss_wd)
total_loss = loss_rms + loss_wd
return total_loss
@classmethod
def eadge_RP():
"""
This method propose a series of ROI along eadges
for a given frame. This should be called when particle
confidence below a critical value, which possibly accounts
for object re-appearance.
"""
pass
class GNet(SGNet):
def __init__(self, scope, vgg_conv_shape):
"""
Fixed params once trained in the first frame
"""
super(GNet, self).__init__(scope, vgg_conv_shape)
class SNet(SGNet):
lr = 1e-2
optimizer = tf.train.GradientDescentOptimizer(lr)
def __init__(self, scope, vgg_conv_shape):
"""
Initialized in the first frame
"""
super(SNet, self).__init__(scope, vgg_conv_shape)
def adaptive_finetune(self, sess, best_M, feed_dict_s):
"""Finetune SNet with best pre_M predicetd by gNet.
Args:
best_M: (1,14,14,1) shape array. gnet.pre_M
"""
# Upsampling best_M
best_M_resized = imresize(best_M[0,:,:,0], [28, 28], interp='bicubic')
loss = self.loss(best_M_resized)
train_op = SNet.optimizer.minimize(loss, var_list=self.variables)
print('SNet adaptive finetune')
for step in range(200):
loss_, _ = sess.run([train_op, loss], feed_dict = feed_dict_s)
print('loss: ', loss_)
def descrimtive_finetune(self, sess, conv4_3_t0, sgt_M, conv4_3_t, pre_M_g, phi):
# Type and shape check!
# reshape pre_M_g
pre_M_g = imresize(pre_M_g[0,:,:,0], [28, 28], interp='bicubic')
pre_M_g = tf.constant(pre_M_g[np.newaxis,:,:,np.newaxis], dtype=tf.float32)
sgt_M = tf.constant(sgt_M, dtype=tf.float32)
Loss_t0 = tf.reduce_mean(tf.squared_difference(sgt_M, self.pre_M))
feed_dict_t0 = {self.input_maps: conv4_3_t0}
train_op_t0 = SNet.optimizer.minimize(Loss_t0, var_list=self.variables)
Loss_t = tf.reduce_sum((1-phi) * tf.reduce_mean(tf.squared_difference(pre_M_g, self.pre_M)))
feed_dict_t = {self.input_maps: conv4_3_t}
train_op_t = SNet.optimizer.minimize(Loss_t0, var_list=self.variables)
for step in range(200):
_, loss_0 = sess.run([train_op_t0, Loss_t0], feed_dict_t0)
_, loss_t = sess.run([train_op_t, Loss_t], feed_dict_t)
print('Loss 0:', loss_0, 'Loss t:', loss_t)