-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhfsort.py
330 lines (257 loc) · 11.9 KB
/
hfsort.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
#!/usr/bin/env python
import signal
import logging
from sys import exit
from argparse import ArgumentParser
from src.common.classes import Node, Predecessor
from src.common.functions import create_input_file_generator, exit_handler, \
is_hex
from src.ReportParser import PerfReportParser
from src.C3Algorithm import HFSorter
from src.FileWriter import write_sorted_list, write_to_linker_template, \
write_linker_script
from src.SystemmapParser import get_kallsyms_output, parse_systemmap
def get_size_list(args):
"""
Depending on the set flag get all symbols with their symbol size.
Returns:
If a flag is set and the parsing proceeded smoothly return a dict of
form {symbol_name: symbol_size}, else None.
"""
symbol_size_list = None
if args.kallsyms:
print("Get and parse /proc/kallsyms...")
symbol_size_list = parse_systemmap(get_kallsyms_output(), size_map=True)
if not symbol_size_list:
exit("ERROR No valid output from /proc/kallsyms. "
"Check for problems.")
if args.sizefile:
print(f"Parse sizefile '{args.sizefile}'...")
file = create_input_file_generator(args.sizefile)
symbol_size_list = parse_systemmap(file, nm_s=True)
return symbol_size_list
def get_symbol_size(element, symbol_size_list, ta_sy, debug):
"""
Get the symbol size of the symbol name. Symbol size can be provided via the
element itself or via a sizefile.
Returns
If a symbol size got successfully found the return has an int of value
symbol size, else None.
"""
not_found_size_symbol, debug = debug
if not symbol_size_list:
# element got itself a symbol_size
return int(element.get("symbol_size")), not_found_size_symbol
symbol_size = symbol_size_list.get(ta_sy)
if symbol_size is None:
if debug:
not_found_size_symbol += 1
print(f"DEBUG Symbol size of '{ta_sy}' not found in the "
f"size list. Trying to get the information from the element "
f"itself."
)
# if symbol name is not provided in the sizefile try if element itself
# has size information
symbol_size = element.get("symbol_size")
if symbol_size is None:
if debug:
print(f"DEBUG Symbol size of '{ta_sy}' could not be "
f"determined.")
# if it has not dismiss the try
return None, None
return int(symbol_size), not_found_size_symbol
def parse_and_combine_information_from_multiple_files(args, report_list):
"""
Combine the report information from the report list into usable information
for the C3 heuristic.
It checks for an interesting section in the report and analyses the
target symbols aka nodes. For each unique node, related data in the
report is collected like predecessor, symbol_size, global samples, etc.
This information is used to build up an internal representation of the
directed weighted call graph from the report.
Args:
args: Class of set arguments.
report_list: List of parsed report file.
debug: Set (or not set) debug flag.
Returns:
A list of nodes containing the collected information.
"""
if not report_list.sections:
exit("ERROR: No section available.")
# find interesting section
section = None
for section in report_list.sections:
if "cycles" or "instructions" in section.header.get("event_name"):
# find the first interesting section and use it
break
if not section:
exit("ERROR: No suitable section found")
debug = args.loglevel == logging.DEBUG
node_list, examined_nodes = [], []
total_samples = int(section.header.get("total_samples"))
symbol_size_list = get_size_list(args)
# debugging statistics
not_found_size_symbols, correct_symbols, hex_symbols = 0, 0, 0
for element in section.values:
ta_sy = element.get("target_symbol")
# Skip target symbols that have already been examined
if ta_sy in examined_nodes:
continue
# Check if target symbol is a hex value or if function size is defective
# We only want valid symbol names, so if it is corrupted data, skip it
if is_hex(ta_sy):
examined_nodes.append(ta_sy)
hex_symbols += 1
continue
correct_symbols += 1
symbol_size, not_found_size_symbols = \
get_symbol_size(element, symbol_size_list, ta_sy,
(not_found_size_symbols, debug))
if symbol_size in [0, "unknown", None]:
if debug:
not_found_size_symbols += 1
print(f"DEBUG Symbol size of '{ta_sy}' not found in the"
f"size list.")
examined_nodes.append(ta_sy)
continue
node = \
create_new_node(section.values, symbol_size, ta_sy, total_samples)
node_list.append(node)
# save target_symbol because it is fully examined
examined_nodes.append(ta_sy)
if args.loglevel in [logging.INFO, logging.DEBUG]:
print(f"INFO Total amount of hex lines: {hex_symbols}\n"
f"INFO Total number of target symbols with missing sizes in "
f"the sizefile: "
f"{not_found_size_symbols}\n"
f"INFO Total number of valid lines before getting the size: "
f"{correct_symbols}\n"
f"INFO Percentage of target symbols with missing sizes to "
f"valid lines: "
f"{(not_found_size_symbols / correct_symbols) * 100:.2f}%")
return node_list
def create_new_node(values, symbol_size, ta_sy, total_samples):
"""
Create a new node. Collect all the information for this node from the call
graph.
"""
node = Node(function_name=ta_sy, size=symbol_size)
for el in values:
if el.get("target_symbol") == ta_sy:
# found entry where ta_sy is the target_symbol of the element
node.add_samples(el.get("samples"), total_samples)
if el.get("source_symbol") != ta_sy:
# If the source_symbol is different from the target_symbol,
# add it as a predecessor
node.add_predecessor(Predecessor(el, total_samples))
return node
def start(args):
print("Parse report...")
report = create_input_file_generator(args.report)
parsed_file = PerfReportParser(args.field_separator).parse(report)
print("Create nodes...")
parsed_info = \
parse_and_combine_information_from_multiple_files(args, parsed_file)
sorter = HFSorter(args, parsed_info)
print("Sorting...")
sorted_list = sorter.sort()
write_sorted_list(sorted_list)
debug = args.loglevel == logging.DEBUG
if debug:
# remove debugging characters
sorted_list = [line.lstrip("#").lstrip("+").strip()
for line in sorted_list]
if args.linker_script:
write_linker_script(sorted_list.copy())
if args.template:
write_to_linker_template(args.template, sorted_list.copy(), debug)
def main():
signal.signal(signal.SIGINT, exit_handler)
parser = ArgumentParser(
description='Heuristic Sort - Sorts a single report file into a sorted '
'list using the C3 heuristic. Outputs the sorted list to '
'the "sorted" file.'
)
parser.add_argument('-r', '--report',
help="Specify the report file containing observed "
"samples for each caller-callee call. The fields "
"samples, source_symbol and target_symbol are "
"required.",
required=True,
type=str)
parser.add_argument('-l', '--linker-script',
help="Output the sorted list in a simple linker script "
"format. Inserts the symbols in the "
"form *(.text.symbol).",
action="store_true",
default=False)
parser.add_argument('-t', '--template',
help="Integrate the sorted list into a template "
"vmlinux.lds file. Inserts the symbols in the "
"form *(.text.symbol) at the end of the "
".text section ending in *(.text*). If debug "
"is enabled include the symbols "
'__hfsort_start and __hfsort_end for use '
'in similarity.py.',
default=None,
type=str)
fine_tuning = parser.add_argument_group("Fine-Tuning")
fine_tuning.add_argument('-f', '--field-separator',
help="Specify the field seperator which was used "
"in the report file. "
"Default: $",
default='$',
dest='field_separator',
type=str)
fine_tuning.add_argument('-p', '--min-probability',
help="Set the minimum probability for an arc to "
"be considered relevant. The weight of an "
"arc is calculated by dividing its own "
"number of samples by the total number of "
"samples. "
"Default: 0.1",
default=None,
dest='k_min_prob',
type=float)
fine_tuning.add_argument('-P', '--page-size',
help="Set the page size according to which the "
"C3 heuristic sets the maximum "
"cluster size. "
"Default: 4096 [byte]",
default=4096,
dest='pagesize',
type=int)
optional = parser.add_argument_group("Optional")
optional.add_argument('-k', '--kallsyms',
help="Use the output from /proc/kallsyms to "
"calculate the symbol sizes from. However, it "
"provides only an upper bound for the symbol "
"size, i.e. it is not bit precise.",
action="store_true",
default=False)
optional.add_argument('-S', '--sizefile',
help="Specify a generated symbol size file that "
"represents precise symbol sizes of all symbols "
"in the kernel. The file can be created using "
"the command 'nm -S vmlinux > sizefile'.",
dest='sizefile',
default=None,
type=str)
optional.add_argument('-v', '--verbose',
help="Enable verbose statements.",
action="store_const",
dest="loglevel",
const=logging.INFO)
optional.add_argument('-d', '--debug',
help="Enable debugging statements and debugging "
"information in files.",
action="store_const",
dest="loglevel",
const=logging.DEBUG)
args = parser.parse_args()
if args.kallsyms and args.sizefile:
exit("--sizefile and --kallsyms can't be used together. "
"Only one option can be used to enhance the information.")
start(args)
if __name__ == "__main__":
main()