-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
115 lines (93 loc) · 3.9 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import warnings
from collections import OrderedDict, namedtuple
import numpy as np
import tensorrt as trt
import torch
warnings.filterwarnings("ignore")
def test_model_jit(model, imsz):
net = torch.jit.load(model)
im = torch.zeros(1, 3, *imsz).to(torch.device("cuda"))
out = net(im.float())
return out
def test_model_onnx(model):
import numpy as np
import onnxruntime as rt
session = rt.InferenceSession(
model,
sess_options=rt.SessionOptions(),
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
)
input_shape = session.get_inputs()[0].shape
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
im = np.zeros(input_shape)
im = im.astype(np.float32)
results = session.run([output_name], {input_name: im})
return results
def test_model_onnx_v2(model):
import onnx
model_onnx = onnx.load(model)
onnx.checker.check_model(model_onnx)
print(onnx.helper.printable_graph(model_onnx.graph))
class INFERENCE_TENSORRT:
def __init__(self, weight, device, dynamic: bool):
self.device = device
self.fp16 = False
self.dynamic = dynamic
self.bindings = OrderedDict()
self.Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
self.logger = trt.Logger(trt.Logger.INFO)
self.model = self._load_model(weight=weight)
self.context = self.model.create_execution_context()
self.binding_addrs, self.batch_size = self.allocate_buffers()
def _load_model(self, weight):
with open(weight, "rb") as f, trt.Runtime(self.logger) as runtime:
model = runtime.deserialize_cuda_engine(f.read())
return model
def allocate_buffers(self):
for index in range(self.model.num_bindings):
name = self.model.get_binding_name(index)
dtype = trt.nptype(self.model.get_binding_dtype(index))
if self.model.binding_is_input(index):
if -1 in tuple(self.model.get_binding_shape(index)): # dynamic
self.dynamic = True
self.context.set_binding_shape(
index, tuple(self.model.get_profile_shape(0, index)[2])
)
if dtype == np.float16:
self.fp16 = True
shape = tuple(self.context.get_binding_shape(index))
im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
self.bindings[name] = self.Binding(
name, dtype, shape, im, int(im.data_ptr())
)
binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
batch_size = self.bindings["images"].shape[0]
return binding_addrs, batch_size
def infer(self, im):
if self.dynamic and im.shape != self.bindings["images"].shape:
i_in, i_out = (
self.model.get_binding_index(x) for x in ("images", "output")
)
self.context.set_binding_shape(i_in, im.shape) # reshape if dynamic
self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)
self.bindings["output"].data.resize_(
tuple(self.context.get_binding_shape(i_out))
)
s = self.bindings["images"].shape
assert (
im.shape == s
), f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
self.binding_addrs["images"] = int(im.data_ptr())
self.context.execute_v2(list(self.binding_addrs.values()))
y = self.bindings["output"].data
return y
if __name__ == "__main__":
trt = INFERENCE_TENSORRT(
weight="./densenet121.engine", device=torch.device("cuda"), dynamic=False
)
data = torch.from_numpy(np.zeros((1, 3, 224, 224), dtype=np.float32)).to(
torch.device("cuda")
)
y = trt.infer(im=data)
print(y)