forked from intel/neural-compressor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
profiling_inc.py
143 lines (114 loc) · 4.16 KB
/
profiling_inc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import tensorflow as tf
print("Tensorflow version {}".format(tf.__version__))
tf.config.run_functions_eagerly(False)
import numpy as np
import time
import argparse
import os
import json
import tensorflow_hub as hub
import tensorflow_datasets as tfds
w=h=32
class_num=3
def scale(image, label):
w=224
h=224
class_num=3
image = tf.cast(image, tf.float32)
image /= 255.0
return tf.image.resize(image, [w, h]), tf.one_hot(label, class_num)
def val_data():
datasets , info = tfds.load(name = 'beans', with_info = True, as_supervised = True, split = ['train'])
valdataset = [scale(v, l) for v,l in datasets[-1]]
return valdataset
def load_raw_dataset():
raw_datasets, _raw_info = tfds.load(name = 'beans', with_info = True, as_supervised = True, split = ['train'],
batch_size=-1)
ds_numpy = tfds.as_numpy(raw_datasets)
return ds_numpy
def preprocss(dataset):
[images, labels]= dataset
inputs = []
res = []
for image in images:
image = tf.convert_to_tensor(image, dtype=tf.float32)
image /= 255.0
image = tf.image.resize(image, [w, h])
inputs.append(image)
for label in labels:
res.append(tf.one_hot(label, class_num))
return np.array(inputs), np.array(res)
def load_dataset():
return [preprocss(dataset) for dataset in load_raw_dataset()]
def calc_accuracy(predictions, labels):
predictions = np.argmax(predictions.numpy(), axis=1)
labels = np.argmax(labels, axis=1)
same = 0
for i, x in enumerate(predictions):
if x == labels[i]:
same += 1
if len(predictions) == 0:
return 0
else:
return same / len(predictions)
def test_perf(pb_model_file, val_data):
[x_test_np, label_test] = val_data
q_model = tf.saved_model.load(pb_model_file)
x_test = tf.convert_to_tensor(x_test_np)
infer = q_model.signatures["serving_default"]
times = 10
bt = 0
warmup = int(times*0.2)
for i in range(times):
if i == warmup:
bt = time.time()
res = infer(x_test)
et = time.time()
res = list(res.values())[0]
accuracy = calc_accuracy(res, label_test)
print('accuracy:', accuracy)
throughput = len(x_test)*(times-warmup)/(et-bt)
print('max throughput(fps):', throughput)
# latency when BS=1
times = 1
bt = 0
warmup = int(times*0.2)
for i in range(times):
if i == warmup:
bt = time.time()
for i in range(len(x_test)):
res = infer(tf.convert_to_tensor([x_test_np[i]]))
#q_model.test_on_batch(val_data, verbose=0)
et = time.time()
latency = (et - bt) * 1000 / (times - warmup)/len(x_test)
print('latency(ms):', latency)
return accuracy, throughput, latency
def save_res(result):
accuracy, throughput, latency = result
res = {}
res['accuracy'] = accuracy
res['throughput'] = throughput
res['latency'] = latency
outfile = args.index + ".json"
with open(outfile, 'w') as f:
json.dump(res, f)
print("Save result to {}".format(outfile))
parser = argparse.ArgumentParser()
parser.add_argument('--index', type=str, help='file name of output', required=True)
parser.add_argument('--input-graph', type=str, help='file name for graph', required=True)
parser.add_argument('--num-intra-threads', type=str, help='number of threads for an operator', required=False,
default="24" )
parser.add_argument('--num-inter-threads', type=str, help='number of threads across operators', required=False,
default="1")
parser.add_argument('--omp-num-threads', type=str, help='number of threads to use', required=False,
default="24")
args = parser.parse_args()
os.environ["KMP_BLOCKTIME"] = "1"
os.environ["KMP_SETTINGS"] = "0"
os.environ["OMP_NUM_THREADS"] = args.omp_num_threads
os.environ["TF_NUM_INTEROP_THREADS"] = args.num_inter_threads
os.environ["TF_NUM_INTRAOP_THREADS"] = args.num_intra_threads
#os.environ["DNNL_VERBOSE"] = "1"
datasets = load_dataset()
print(len(datasets[-1]))
save_res(test_perf(args.input_graph, datasets[-1]))