forked from analogdevicesinc/ai8x-synthesis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkbias.py
149 lines (132 loc) · 5.55 KB
/
kbias.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
###################################################################################################
# Copyright (C) 2018-2020 Maxim Integrated Products, Inc. All Rights Reserved.
#
# Maxim Integrated Products, Inc. Default Copyright Notice:
# https://www.maximintegrated.com/en/aboutus/legal/copyrights.html
#
# Written by RM
###################################################################################################
"""
Bias related functions
"""
import sys
import numpy as np
import tornadocnn as tc
from eprint import eprint
from utils import argmin
_INVALID_VALUE = -(2**63)
def combine(
b,
quantization,
start,
out_chan,
):
"""
When quantizing, combine multiple bias values `b` based on `quantization`. The first kernel
index is `start`. `out_chan` is used to determine whether to pad the result with zeros,
if necessary.
Returns the combined bias values.
"""
val = 0
for i in range(8 // quantization):
if start + i < out_chan:
this_bias = b[start + i] & (2**quantization-1)
val |= this_bias << (i * quantization)
return val
def load(
verbose, # pylint: disable=unused-argument
embedded_code,
apb,
layers,
bias,
quantization, # pylint: disable=unused-argument
group_map,
output_chan,
streaming,
debug, # pylint: disable=unused-argument
):
"""
Write `bias` values for the network to C code.
"""
# Bias: Each group has one bias memory (size BIAS_SIZE bytes). Use only the bias memory in
# one selected group for the layer, and only if the layer uses a bias. Keep track of the
# offsets so they can be programmed into the mask count register later.
if embedded_code:
bias_values = np.zeros((tc.dev.P_NUMGROUPS, tc.dev.BIAS_SIZE), dtype=np.int64)
group_bias_max = [0] * tc.dev.P_NUMGROUPS
bias_offs = [None] * layers
bias_group = [None] * layers
for ll in range(layers):
if bias[ll] is None:
continue
if len(bias[ll]) != output_chan[ll]:
eprint(f'Layer {ll}: output channel count {output_chan[ll]} does not match the number '
f'of bias values {len(bias[ll])}.')
sys.exit(1)
q = 8 # Fixed to 8 bits instead of quantization[ll]
qfactor = 8 // q
# Round up the divided length of bias values
# FIXME: Is it necessary to handle gaps in the next layer?
bias_len = (output_chan[ll] + qfactor-1) // qfactor
if ll == 0 and streaming[ll] and tc.dev.FIX_STREAM_BIAS:
# Work around a problem on AI85
bias_len += 1
if streaming[ll] and tc.dev.FIX_STREAM_BIAS:
eprint(f'Layer {ll} uses streaming and a bias. '
'THIS COMBINATION MIGHT NOT BE FUNCTIONING CORRECTLY!!!',
error=False)
# Pick the group with the least amount of data in it
group = argmin(group_bias_max[t] for t in group_map[ll])
if group_bias_max[group] + bias_len > tc.dev.BIAS_SIZE:
eprint(f'Layer {ll}: bias memory capacity exceeded - available groups: '
f'{group_map[ll]}, used so far: {group_bias_max}, needed: {bias_len}.')
sys.exit(1)
bias_group[ll] = group
bias_offs[ll] = group_bias_max[group]
# Each layer has output_channel number of bias values
i = 0
target_offs = 0
if ll == 0 and streaming[ll] and tc.dev.FIX_STREAM_BIAS:
# Work around a problem on AI85
if not embedded_code:
apb.write_bias(group, bias_offs[ll], 0)
else:
# Store for later
bias_values[group][bias_offs[ll]] = 0
target_offs += 1
while i < output_chan[ll]:
b = combine(bias[ll], q, i, output_chan[ll])
if not embedded_code:
apb.write_bias(group, bias_offs[ll] + target_offs, b)
else:
# Store for later
bias_values[group][bias_offs[ll] + target_offs] = b & 0xff
i += qfactor
target_offs += 1
group_bias_max[group] += bias_len
if embedded_code:
if max(group_bias_max) > 0:
# At least one bias value exists, output defines
for group in range(tc.dev.P_NUMGROUPS):
if group_bias_max[group] == 0:
continue # but not for this group
apb.output_define(bias_values[group][:group_bias_max[group]], f'BIAS_{group}',
'0x%02x', 16)
# Output variables
for group in range(tc.dev.P_NUMGROUPS):
if group_bias_max[group] == 0:
continue
apb.output(f'static const uint8_t bias_{group}[] = BIAS_{group};\n')
apb.output('\n')
# Finally, create function and do memcpy()
apb.output('void memcpy_8to32(uint32_t *dst, const uint8_t *src, size_t n)\n{\n')
apb.output(' while (n-- > 0) {\n *dst++ = *src++;\n }\n}\n\n')
apb.output('void load_bias(void)\n{\n')
for group in range(tc.dev.P_NUMGROUPS):
if group_bias_max[group] == 0:
continue
addr = apb.apb_base + tc.dev.C_GROUP_OFFS*group + tc.dev.C_BRAM_BASE
apb.output(f' memcpy_8to32((uint32_t *) 0x{addr:08x}, bias_{group}, '
f'sizeof(uint8_t) * {group_bias_max[group]});\n')
apb.output('}\n\n')
return bias_offs, bias_group, group_bias_max