forked from intel/neural-compressor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinc_quantize_model.py
81 lines (61 loc) · 2.39 KB
/
inc_quantize_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""
Environment Setting
Enable Intel Optimized TensorFlow 2.6.0 and newer by setting environment variable TF_ENABLE_ONEDNN_OPTS=1
That will accelerate training and inference, and it's mandatory requirement of running Intel® Neural Compressor quantize Fp32 model or deploying the quantized model.
"""
import neural_compressor as inc
print("neural_compressor version {}".format(inc.__version__))
import tensorflow as tf
print("tensorflow {}".format(tf.__version__))
from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion, TuningCriterion
from neural_compressor.data import DataLoader
from neural_compressor.quantization import fit
from neural_compressor import Metric
import mnist_dataset
class Dataset(object):
def __init__(self):
_x_train, _y_train, label_train, x_test, y_test, label_test = mnist_dataset.read_data()
self.test_images = x_test
self.labels = label_test
def __getitem__(self, index):
return self.test_images[index], self.labels[index]
def __len__(self):
return len(self.test_images)
def ver2int(ver):
s_vers = ver.split(".")
res = 0
for i, s in enumerate(s_vers):
res += int(s)*(100**(2-i))
return res
def compare_ver(src, dst):
src_ver = ver2int(src)
dst_ver = ver2int(dst)
if src_ver>dst_ver:
return 1
if src_ver<dst_ver:
return -1
return 0
def auto_tune(input_graph_path, batch_size):
dataset = Dataset()
dataloader = DataLoader(framework='tensorflow', dataset=dataset, batch_size=batch_size)
tuning_criterion = TuningCriterion(max_trials=100)
config = PostTrainingQuantConfig(approach="static", tuning_criterion=tuning_criterion,
accuracy_criterion = AccuracyCriterion(
higher_is_better=True,
criterion='relative',
tolerable_loss=0.01 )
)
top1 = Metric(name="topk", k=1)
q_model = fit(
model=input_graph_path,
conf=config,
calib_dataloader=dataloader,
eval_dataloader=dataloader,
eval_metric=top1
)
return q_model
batch_size = 200
fp32_frozen_pb_file = "fp32_frozen.pb"
int8_pb_file = "alexnet_int8_model.pb"
q_model = auto_tune(fp32_frozen_pb_file, batch_size)
q_model.save(int8_pb_file)