Skip to content

Commit

Permalink
tensorflow-lite: quantization cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
koush committed Dec 28, 2024
1 parent a6a986a commit 48c5e1a
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 96 deletions.
78 changes: 44 additions & 34 deletions plugins/tensorflow-lite/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

126 changes: 70 additions & 56 deletions plugins/tensorflow-lite/src/tflite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,67 +223,81 @@ def get_input_size(self) -> Tuple[int, int]:
async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss):
def predict():
interpreter = self.interpreters[threading.current_thread().name]
if self.yolo:
tensor_index = input_details(interpreter, "index")

im = np.stack([input])
i = interpreter.get_input_details()[0]
if i["dtype"] == np.int8:
scale, zero_point = i["quantization"]
if scale == 0.003986024297773838 and zero_point == -128:
# fast path for quantization 1/255 = 0.003986024297773838
im = im.view(np.int8)
im -= 128
else:
im = im.astype(np.float32) / (255.0 * scale)
im = (im + zero_point).astype(np.int8) # de-scale
else:
# this code path is unused.
im = im.astype(np.float32) / 255.0
interpreter.set_tensor(tensor_index, im)
interpreter.invoke()
output_details = interpreter.get_output_details()
input_scale = self.get_input_details()[0]
if self.scrypted_yolo_sep:
outputs = []
for index, output in enumerate(output_details):
o = interpreter.get_tensor(output["index"]).astype(np.float32)
scale, zero_point = output["quantization"]
o -= zero_point
o *= scale
outputs.append(o)

output = yolo_separate_outputs.decode_bbox(outputs, [input.width, input.height])
objs = yolo.parse_yolov9(output[0])
else:
output = output_details[0]
x = interpreter.get_tensor(output["index"])
if x.dtype == np.int8:
scale, zero_point = output["quantization"]
combined_scale = scale * input_scale
if self.scrypted_yolov10:
objs = yolo.parse_yolov10(
x[0],
scale=lambda v: (v - zero_point) * combined_scale,
confidence_scale=lambda v: (v - zero_point) * scale,
threshold_scale=lambda v: (v - zero_point) * scale,
)
else:
objs = yolo.parse_yolov9(
x[0],
scale=lambda v: (v - zero_point) * combined_scale,
confidence_scale=lambda v: (v - zero_point) * scale,
threshold_scale=lambda v: (v - zero_point) * scale,
)
else:
# this code path is unused.
objs = yolo.parse_yolov9(x[0], scale=lambda v: v * input_scale)
else:
if not self.yolo:
tflite_common.set_input(interpreter, input)
interpreter.invoke()
objs = detect.get_objects(
interpreter, score_threshold=0.2, image_scale=(1, 1)
)
return objs

tensor_index = input_details(interpreter, "index")

im = np.stack([input])
i = interpreter.get_input_details()[0]
if i["dtype"] == np.int8:
scale, zero_point = i["quantization"]
if scale == 0.003986024297773838 and zero_point == -128:
# fast path for quantization 1/255 = 0.003986024297773838
im = im.view(np.int8)
im -= 128
else:
im = im.astype(np.float32) / (255.0 * scale)
im = (im + zero_point).astype(np.int8) # de-scale
else:
# this code path is unused.
im = im.astype(np.float32) / 255.0
interpreter.set_tensor(tensor_index, im)
interpreter.invoke()
output_details = interpreter.get_output_details()

# handle sseparate outputs for quantization accuracy
if self.scrypted_yolo_sep:
outputs = []
for output in output_details:
o = interpreter.get_tensor(output["index"]).astype(np.float32)
scale, zero_point = output["quantization"]
o -= zero_point
o *= scale
outputs.append(o)

output = yolo_separate_outputs.decode_bbox(outputs, [input.width, input.height])
if self.scrypted_yolov10:
objs = yolo.parse_yolov10(output[0])
else:
objs = yolo.parse_yolov9(output[0])
return objs

# this scale stuff can probably be optimized to dequantize ahead of time...
output = output_details[0]
x = interpreter.get_tensor(output["index"])
input_scale = self.get_input_details()[0]

# this non-quantized code path is unused but here for reference.
if x.dtype != np.int8 and x.dtype != np.int16:
if self.scrypted_yolov10:
objs = yolo.parse_yolov10(x[0], scale=lambda v: v * input_scale)
else:
objs = yolo.parse_yolov9(x[0], scale=lambda v: v * input_scale)
return objs

# this scale stuff can probably be optimized to dequantize ahead of time...
scale, zero_point = output["quantization"]
combined_scale = scale * input_scale
if self.scrypted_yolov10:
objs = yolo.parse_yolov10(
x[0],
scale=lambda v: (v - zero_point) * combined_scale,
confidence_scale=lambda v: (v - zero_point) * scale,
threshold_scale=lambda v: (v - zero_point) * scale,
)
else:
objs = yolo.parse_yolov9(
x[0],
scale=lambda v: (v - zero_point) * combined_scale,
confidence_scale=lambda v: (v - zero_point) * scale,
threshold_scale=lambda v: (v - zero_point) * scale,
)
return objs

objs = await asyncio.get_event_loop().run_in_executor(self.executor, predict)
Expand Down
8 changes: 2 additions & 6 deletions plugins/tensorflow-lite/src/tflite/yolo_separate_outputs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numpy as np

from common.softmax import softmax
class DFL:
def __init__(self, c1=16):
self.c1 = c1
Expand All @@ -12,10 +12,6 @@ def forward(self, x):
x = np.sum(self.conv_weights * x, axis=1)
return x.reshape(b, 4, a)

def softmax(x, axis=-1):
e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
return e_x / np.sum(e_x, axis=axis, keepdims=True)

def make_anchors(feats, strides, grid_cell_offset=0.5):
anchor_points, stride_tensor = [], []
assert feats is not None
Expand Down Expand Up @@ -59,7 +55,7 @@ def decode_bbox(preds, img_shape):
int(np.sqrt(img_shape[-2] * img_shape[-1] / preds[p].shape[1])) for p in pos if preds[p].shape[2] != 64]
dims = [(img_h // s, img_w // s) for s in strides]
fake_feats = [np.zeros((1, 1, h, w), dtype=preds[0].dtype) for h, w in dims]
anchors, strides = [x.transpose(0, 1) for x in make_anchors(fake_feats, strides, 0.5)] # generate anchors and strides
anchors, strides = make_anchors(fake_feats, strides, 0.5)

strides_tensor = strides.transpose(1, 0)
strides_tensor = np.expand_dims(strides_tensor, 0)
Expand Down

0 comments on commit 48c5e1a

Please sign in to comment.