-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcbench.py
350 lines (310 loc) · 12.9 KB
/
cbench.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
#!/usr/bin/python3
# A compile time benchmark for the ninja build system.
#
# SPDX-License-Identifier: MIT
# See LICENSE for the full text of the license
import os
import math
import json
import datetime
import argparse
# some global values
version_key ='cbench'
file_version_key ='cbench_file_version'
files_key = 'files'
version ='0.1.0'
file_version ='1'
output_key = 'output'
mean_key = 'mean'
stddev_key = 'stddev'
durations_key = 'durations'
tag_key = 'tag'
timestamp_key = 'timestamp'
output_filename = '.cbench_baseline.json'
jobs_key = 'jobs'
path_key = 'path'
class parsed_log:
def __init__(self, files):
"""Constructs a new class instance for a parsed log"""
self.files = files
def total_time(self):
"""Returns the total compilation time in ms"""
duration = 0
for f in self.files:
duration += self.files[f].duration
return duration
def executed_commands(self):
"""Returns the number of executed commands"""
return len(self.files)
class file_data:
def __init__(self, start: str, end: str, timestamp: str, output: str):
"""Constructs a new file record.
Arguments:
start -- Start time of compilation in ms
end -- End time of compilation in ms
timestamp -- timestamp of output in ns
output -- output file
"""
self.name = output
self.start = int(start)
self.end = int(end)
self.timestamp = int(timestamp)
self.duration = self.end - self.start
def update(self, start: str, end: str, timestamp: str, output: str):
new_ts = int(timestamp)
if new_ts>self.timestamp and output==self.name:
self.start = int(start)
self.end = int(end)
self.timestamp = int(timestamp)
self.duration = self.end - self.start
elif output!=self.name:
raise RuntimeError("Cannot update file '{}' with '{}'!".format(self.name,output))
def __str__(self) -> str:
return "{{'duration_ms':{}}}".format(self.duration)
def __repr__(self) -> str:
return str(self)
def parse_ninja_log(path: str) -> parsed_log:
"""Parses the ninja log file and extracts the compilation times."""
ninja_log = open(path, "r")
lines = ninja_log.readlines()
first = lines.pop(0)
if not "# ninja log v5" in first:
raise RuntimeError("Failed to open '{}': the file does not seem to be a ninja log!\nExpected '# ninja log v5', got '{}'".format(path, first))
file_dict = {}
for line in lines:
data = line.split()
if len(data)!=5:
raise RuntimeError("Failed to open '{}': the file does not seem to be a ninja log!".format(path))
start = data[0]
end = data[1]
timestamp = data[2]
output = data[3]
if timestamp=='0':
continue
if output in file_dict:
file_dict[output].update(start,end,timestamp,output)
else:
file = file_data(start,end,timestamp,output)
file_dict[output] = file
return parsed_log(file_dict)
def process_baseline_data(args, passes : list):
rv = []
first = passes.pop(0)
for f in first.files:
mean = first.files[f].duration
sqrmean = mean*mean
count = 1.0
durations = [mean]
for p in passes:
if f in p.files:
mean += p.files[f].duration
sqrmean += p.files[f].duration*p.files[f].duration
count += 1.0
durations.append(p.files[f].duration)
vals = {}
mean = mean/count
vals[output_key] = f
vals[mean_key] = mean
vals[stddev_key] = math.sqrt( (sqrmean - count*(mean*mean))/(count-1.0))
vals[durations_key] = durations
rv.append(vals)
rv.sort(key=lambda x : x[mean_key])
dict = {}
dict[version_key] = version
dict[file_version_key] = file_version
dict[tag_key] = args.tag
dict[jobs_key] = args.jobs
dict[path_key] = args.workdir
dict[timestamp_key] = "{}".format(datetime.datetime.now())
dict[files_key] = rv
return dict
def write_json(report : dict, output_path : str ='cbench_baseline.json'):
json_object = json.dumps(report, indent=4)
with open(output_path, "w") as outfile:
outfile.write(json_object)
def read_json(input_path : str ='cbench_baseline.json') -> dict:
with open(input_path, "r") as infile:
txt = infile.read()
data = json.loads(txt)
if not file_version_key in data or not files_key in data or not data[file_version_key]==file_version:
raise RuntimeError("File '{}' is not a baseline dictionary!".format(input_path))
return data
def run_compiler(jobs: int):
os.system('ninja clean')
os.system('ninja -j{}'.format(jobs))
return parse_ninja_log('.ninja_log')
def create_baseline(args) -> dict:
passes = []
count = args.baseline
for i in range(0,count):
parsed = run_compiler(args.jobs)
print("Run {}/{} : Duration: {} ms for {} commands".format(i+1,count,parsed.total_time(),parsed.executed_commands()))
passes.append(parsed)
report = process_baseline_data(args,passes)
print_baseline(report,args)
write_json(report)
return report
def print_baseline(baseline:dict, args):
print("\n\n\n===========================================================")
print("Compilation baseline:\n{}".format(datetime.datetime.now()))
if args.tag is not None:
print("Tag: '{}'".format(args.tag))
print("{:30} : {}\n""".format('Duration (std dev)', 'File'))
files = baseline['files']
for f in files:
print("{:10.2f} ms (+-{:10.2f} ms) : {}".format(f[mean_key],f[stddev_key],f[output_key]))
def find_in_baseline(baseline:dict, name):
files = baseline['files']
for f in files:
if f[output_key] == name:
return f
return None
diff_ms_key = 'diff_ms'
diff_pct_key = 'diff_pct'
sig_key = 'sig'
def compare_baseline(baseline:dict, current):
rv = []
for f in current.files:
file = current.files[f]
bs = find_in_baseline(baseline,file.name)
if bs is None:
print("skipping "+file.name + " : not in baseline!")
continue
vals = {}
diff_ms = file.duration - bs[mean_key]
vals[output_key] = file.name
vals[mean_key] = bs[mean_key]
vals[stddev_key] = bs[stddev_key]
vals[diff_ms_key] = diff_ms
if bs[mean_key]>0:
vals[diff_pct_key] = diff_ms/bs[mean_key]*100
else:
vals[diff_pct_key] = 0
if bs[stddev_key]>0:
vals[sig_key] = abs(diff_ms)/bs[stddev_key]
else:
vals[sig_key] = abs(diff_ms)
rv.append(vals)
rv.sort(key=lambda x : x[sig_key])
return rv
def compute_total_changes(changes:list):
"""Computes the total change of the compile time and its significance.
The assumption is that the uncertainties are not correlated.
Returns the total time in s, change to previous in s, and standard dev of total."""
total_sum = 0
total_diff = 0
total_var = 0
for r in changes:
total_sum += r[mean_key]
total_diff += r[diff_ms_key]
total_var += r[stddev_key]*r[stddev_key]
total_var = math.sqrt(total_var)
return total_sum/1000.0, total_diff/1000.0, total_var/1000.0
def compute_probability(x:float):
"""Compute the chance that a random value is outside of the interval [mean-x*sigma,mean+x*sigma]
when assuming a gaussian distribution.
Arguments:
x -- diff from mean in standard deviations"""
return 1.0 - math.erf(x/(math.sqrt(2)))
def print_significant_changes(baseline:dict, changes:list, args):
limit = args.sigma
print("""
Compile time benchmark report.
cbench {}
{}
Significant changes larger than {} stddevs
Change [ms] (percent, stddevs) : file
================================================
""".format(version,datetime.datetime.now(),limit))
for r in changes:
if r[sig_key]>limit:
print("{:10.1f} ms ({:5.1f} %, {:5.1f}) : {}".format(r[diff_ms_key],r[diff_pct_key],r[sig_key],r[output_key]))
total, diff, stddev = compute_total_changes(changes)
print("\n================================================")
print("Baseline : {:10.1f} s".format(total))
print("Standard dev : {:10.1f} s".format(stddev))
print("created : {}".format(baseline[timestamp_key]))
print("baseline jobs: {}".format(baseline[jobs_key]))
if baseline[tag_key] is not None:
print("old tag : {:10}".format(baseline[tag_key]))
print("New : {:10.1f} s".format(total + diff))
if args.tag is not None:
print("New tag : {:10}".format(args.tag))
print("Change : {:10.1f} s".format(diff))
print("jobs : {}".format(args.jobs))
if stddev>0:
sig = abs(diff)/stddev
print("Change/stddev: {:10.1f}".format(sig))
p = compute_probability(sig)
print("p-value : {:10.5f}".format(p))
print("(a p-value smaller than 0.05 indicates a significant change)")
if baseline[jobs_key]!=args.jobs:
print("warning: different numbers of jobs may lead to noisy measurements!")
else:
print("Error while computing significance! Standard deviation is 0!")
def run_benchmark(args):
try:
baseline = read_json()
current = run_compiler(args.jobs)
result = compare_baseline(baseline, current)
print_significant_changes(baseline, result, args)
except FileNotFoundError:
print("Failed to read baseline data.")
def get_def_job_count() -> int:
"""Returns the number of cpu cores minus 2 or 1 if there are too few cores."""
cnt = os.cpu_count()
if cnt>2:
return cnt-2
else:
return 1
def dir_path(string: str):
"""Checks if a given string is a valid directory."""
if os.path.isdir(string):
return string
else:
raise NotADirectoryError(string)
def print_version():
"""Prints the version and exits the program."""
print(version)
exit(0)
def get_commandline_arguments():
""""Parses the arguments from the command line and performs error checks.
Returns the arguments as python object on success."""
jobs = get_def_job_count()
parser = argparse.ArgumentParser(prog = 'cbench',
description = 'A benchmark to measure and improve compilation times in conjunction with the ninja build system. The program will execute "ninja clean" and "ninja -j J" in the given working directory and parse the .ninja_log file afterwards. Create a baseline by running e.g. "cbench -b 5" first, then modify your code and see the compilation speed changes with "cbench".')
parser.add_argument('workdir', type=dir_path, default='.', nargs='?', help="the directory where the ninja commands are executed.")
parser.add_argument('-j', '--jobs', type=int, default=jobs, metavar='J',
help="""the number of jobs to spawn when compiling. Less parallel jobs reduce the noise. You should set this value to no more than your physical core count (no hyperthreading). J must be an integer larger than 1. [defaults to {}]""".format(jobs))
parser.add_argument('-b', '--baseline', type=int, metavar='N', help='creates a baseline by running the compilation N times. N must be an integer larger than 2. If the argument is omitted, then the current compilation will be comared to the baseline.')
parser.add_argument('-s', '--sigma', type=float, default=3.0, metavar='S', help='the required change to the compilation time as S*sigma (standard deviations) of a file so that it is added to the report. Must be a float. [defaults to 3.0]')
parser.add_argument('--tag', type=str, metavar='TAG', help='Adds a tag to the generated report.')
parser.add_argument('--version', action='store_true', default=False, help='prints the program version')
try:
args = parser.parse_args()
if args.version:
print_version()
if args.jobs < 1:
print("error: The argument to '-j' must be a positive integer!")
exit(-1)
os.chdir(args.workdir)
if not os.path.exists('rules.ninja'):
print("error: file {}/rules.ninja does no exist! given path is not a build directory for ninja!".format(args.workdir))
exit(-1)
return args
except NotADirectoryError as inst:
print("error: The given path '{}' is not a directory!".format(inst))
exit(-1)
def main():
args = get_commandline_arguments()
if args.baseline is not None:
print("Creating compilation baseline ...")
if args.baseline<3:
print("error: You should at least run the compilation 3 times to get an estimation of the variance of the baseline! Please pass '-b 3' as argument!")
exit(-1)
create_baseline(args)
else:
print("Running compilation benchmark ...")
run_benchmark(args)
if __name__ == '__main__':
main()