-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVOS_evaluate.py
executable file
·167 lines (129 loc) · 7.28 KB
/
VOS_evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
'''
After training your model with "VOS.py" file, you can evaluate the performance using this file.
This script creates the objects masks based on the trained model
'''
import math
import tensorflow as tf
import numpy as np
import time
from tensorflow import ConfigProto
from utils.saver_mask import Saver_mask
from utils.vos_data_loader import Data_loader
from utils import data_generator as data_generator
from networks.decoder import Decoder
from networks.lstm_initializer import LSTM_initializer
from networks.encoder import Encoder
from networks.unrolled_convLSTM import Unrolled_convLSTM
#######################################################################################################################
class My_network:
def __init__(self, batch_size):
self.my_graph = tf.Graph()
self.batch_size = batch_size
with self.my_graph.as_default():
with tf.variable_scope("global_name_scope", reuse=tf.AUTO_REUSE):
# Initialize placeholders
self.input_images_initializer = tf.placeholder("float", [batch_size, 256, 448, 4])
self.input_image_encoder = tf.placeholder("float", [batch_size, 7, 256, 448, 3])
# Initialize operations
self.init_model()
# Initialize the graph
self.init = tf.global_variables_initializer()
self.saver = tf.train.Saver() #(max_to_keep=0)
def init_model(self):
# Get Initial states for LSTM from the first image and its masked
lstm_initializer = LSTM_initializer()
lstm_initializer.build(self.input_images_initializer)
h_0 = lstm_initializer.h0
c_0 = lstm_initializer.c0
# Get 7 frames and feed them to Encoder
tmp_shape = self.input_image_encoder.shape
input_image_encoder_unstacked = tf.reshape(self.input_image_encoder, [tmp_shape[0]*tmp_shape[1], tmp_shape[2],
tmp_shape[3], tmp_shape[4]])
encoder = Encoder()
encoder.build(input_image_encoder_unstacked)
encoder_output = encoder.conv6
# This will be the set of B batches and F frames to be fed to ConvLSTM
encoder_output_stacked = tf.reshape(encoder_output, [self.input_image_encoder.shape[0],
self.input_image_encoder.shape[1],
encoder_output.shape[1], encoder_output.shape[2],
encoder_output.shape[3]])
# Feed the output of encoder to ConvLSTM
conv_lstm = Unrolled_convLSTM()
conv_lstm.build(encoder_output_stacked, c_0, h_0)
lstm_output = conv_lstm.lstm_output
# This will be fed to decoder
lstm_output_unstacked = tf.reshape(lstm_output, (lstm_output.shape[0]*lstm_output.shape[1],
lstm_output.shape[2], lstm_output.shape[3],
lstm_output.shape[4]))
# Feed the output of ConvLSTM to decoder
decoder = Decoder()
decoder.build(lstm_output_unstacked)
decoder_output = decoder.y_hat
mask_output = decoder.mask_out
self.decoder_output_unstacked = tf.reshape(decoder_output, (lstm_output.shape[0], lstm_output.shape[1],
decoder_output.shape[1], decoder_output.shape[2],
decoder_output.shape[3]))
self.mask_output_unstacked = tf.reshape(mask_output, self.decoder_output_unstacked.shape)
def graph_builder(self):
return self.input_images_initializer, self.input_image_encoder, self.decoder_output_unstacked, \
self.mask_output_unstacked, self.saver, self.my_graph
#######################################################################################################################
# Build the graph
def main(args):
dataset_path = "../new_dataset_small/valid"
scenario_name = args.scenario_name
batch_size = args.batch_size
saver_instance = Saver_mask(scenario_name, original_dataset_path)
my_network = My_network(batch_size)
input_images_initializer, input_image_encoder, decoder_output, mask_output, saver, my_graph\
= my_network.graph_builder()
config = ConfigProto()
config.gpu_options.allow_growth = True
with tf.device("/device:GPU:0"):
print("Reset the graph")
tf.reset_default_graph()
with tf.Session(config=config, graph=my_graph) as sess:
# Restore variables from disk.
saver.restore(sess, checkpoints_path)
print("Model restored from: ", checkpoints_path)
data_loader = Data_loader(dataset_path)
object_list = np.array(data_loader.get_val_object_list())
n_batch = math.floor(len(object_list) / batch_size)+1 # +1 added for debug
print("###DEBUG: Total object number is {:d}, n_batch is {:f}, batch size is: {:d}"
.format(len(object_list), n_batch, batch_size))
for idx in range(n_batch):
start_time = time.time()
start_i = idx*batch_size
end_i = (idx+1)*batch_size
if end_i <= len(object_list):
batch_val_object_list = object_list[start_i:end_i]
else:
batch_val_object_list = object_list[len(object_list)-batch_size:len(object_list)]
assert len(batch_val_object_list) == batch_size
input_initializer, input_encoder = data_loader.get_val_data_batch(batch_val_object_list)
eta_read = time.time() - start_time
start_time = time.time()
val_out = sess.run([mask_output, decoder_output],
feed_dict={input_images_initializer: input_initializer,
input_image_encoder: input_encoder})
eta_learn = time.time() - start_time
print(" ###INFO: ETA: read:{:f}s, train:{:f}s, Progress = %{:f}, Batch #{:d}"
.format(eta_read, eta_learn, 100 * idx / n_batch, idx))
saver_instance.store_masks(val_out, batch_val_object_list)
print("done")
if __name__ == '__main__':
import argparse
from utils.config_reader import conf_reader
# Reading configurations from the YAML file
configs = conf_reader()
original_dataset_path = configs["configs"]["path"] # Path to the original dataset
checkpoints_path = configs["configs"]["checkpoints_path"] # Path to the pre-saved checkpoint files
# Reading input arguments from command line
parser = argparse.ArgumentParser(description='Youtube Video Object Segmentation.')
parser.add_argument('--batch_size', type=int, default=2, help='Size of the mini-batch.')
parser.add_argument('--scenario_name', type=str, default="validation_results", help='Validation scenario name.')
args = parser.parse_args()
# Proprocess dataset and save it to a new directory as new_dataset_small
data_generator.build_new_valid_dataset(original_dataset_path)
# Run the evaluation to creat output masks.
main(args)