Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect instrumentation data from correct dir. #45

Merged
merged 4 commits into from
Apr 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 37 additions & 29 deletions bin/plot_krun_results
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ Plot data from Krun results file(s).

import argparse
import datetime
import glob
import math
import matplotlib
matplotlib.use('Agg')
Expand Down Expand Up @@ -182,11 +183,11 @@ def get_instr_data(key, machine, instr_dir, pexec_idxs):
for pexec_idx in pexec_idxs:
file_ = os.path.join(instr_dir, "%s__%s__%s__%s.json.bz2" %
(bench, vm, variant, pexec_idx))
print("Loading: %s" % file_)
print('Loading: %s' % file_)
try:
js = read_krun_results_file(file_)
except IOError:
print("WARNING: Missing instrumentation data for: %s:%s:%s" % \
print('WARNING: Missing instrumentation data for: %s:%s:%s' % \
(machine, key, pexec_idx))
ret.append(None) # missing instr data
continue
Expand Down Expand Up @@ -317,8 +318,8 @@ def main(is_interactive, data_dcts, plot_titles, window_size, outfile,
try:
for index, page in enumerate(pages):
bmark, vm, mc = all_subplot_titles[index][0].split(', ')[:3]
print 'Plotting %s: %s (%s) on page %02d of %02d.' % \
(mc, bmark, vm, index + 1, len(pages))
print('Plotting %s: %s (%s) on page %02d of %02d.' % \
(mc, bmark, vm, index + 1, len(pages)))

# Strip out indices where the benchmark crashed.
def only_uncrashed(data):
Expand All @@ -334,8 +335,8 @@ def main(is_interactive, data_dcts, plot_titles, window_size, outfile,
ret.append([])
else:
if data == page: # Stops repeated printing of warning.
print("WARNING: requested pexec crashed: "
"%s, %s, %s, %s" % (mc, bmark, vm, i))
print('WARNING: requested pexec crashed: '
'%s, %s, %s, %s' % (mc, bmark, vm, i))
return ret

wct_page = only_uncrashed(page)
Expand Down Expand Up @@ -867,7 +868,7 @@ def draw_page(is_interactive, executions, cycles_executions,

n_execs = len(executions)
if n_execs == 0:
print("WARNING: empty page")
print('WARNING: empty page')
return None

n_rows = int(math.ceil(float(len(executions)) / MAX_SUBPLOTS_PER_ROW))
Expand Down Expand Up @@ -1031,7 +1032,8 @@ def set_pdf_metadata(pdf_document):


def get_data_dictionaries(json_files, benchmarks=[], wallclock_only=False,
outliers=False, unique_outliers=False, changepoints=False):
outliers=False, unique_outliers=False, changepoints=False,
instr_dir=None):
"""Read a list of BZipped JSON files and return their contents as a
dictionaries of key -> machine name -> results.

Expand Down Expand Up @@ -1109,18 +1111,8 @@ def get_data_dictionaries(json_files, benchmarks=[], wallclock_only=False,
machine = machine.split('.')[0]
machine_name = pretty_print_machine(machine)

# Is there instrumentation data with this results file?
instr_data = False
filename_root = filename[:-len('_results.json.bz2')]
instr_dir = filename_root + '_instr_data'
if 'instr_data' not in data_dictionary:
data_dictionary['instr_data'] = dict()
if os.path.isdir(instr_dir):
print 'Collecting instrumentation data for %s from %s.' % \
(filename, instr_dir)
instr_data = True
else:
print 'No VM instrumentation data is available.'

# Collect any results requested from this file.
if benchmarks == []: # Chart all available data from this file.
Expand All @@ -1141,14 +1133,14 @@ def get_data_dictionaries(json_files, benchmarks=[], wallclock_only=False,
data_dictionary['common_outliers'][key] = dict()
data_dictionary['unique_outliers'][key] = dict()
data_dictionary['data'][key][machine] = data['wallclock_times'][key]
print ('Found: %s:%s (%d executions).' % (machine, key,
len(data['wallclock_times'][key])))
print('Found: %s:%s (%d executions).' % (machine, key,
len(data['wallclock_times'][key])))
if wallclock_only:
data_dictionary['cycles_counts'][key][machine] = None
data_dictionary['instr_data'][key][machine] = None
else:
data_dictionary['cycles_counts'][key][machine] = data['core_cycle_counts'][key]
if instr_data:
if instr_dir:
data_dictionary['instr_data'][key][machine] = \
get_instr_data(
key, machine, instr_dir,
Expand Down Expand Up @@ -1216,8 +1208,7 @@ def get_data_dictionaries(json_files, benchmarks=[], wallclock_only=False,
# Hope the key appears in another file, checked below.
continue
if len(data['wallclock_times'][key]) == 0:
print('WARNING: Skipping: %s from %s (no executions)' %
(key, machine))
print('WARNING: Skipping: %s from %s (no executions)' % (key, machine))
if machine not in skipped_keys:
skipped_keys[machine] = list()
skipped_keys[machine].append(key)
Expand Down Expand Up @@ -1284,11 +1275,11 @@ def get_data_dictionaries(json_files, benchmarks=[], wallclock_only=False,
'%g process executions for the benchmark.' %
(p_exec, key, machine, len(data['wallclock_times'][key])))
# Add run sequence to data dictionary.
print 'Adding run sequence to ', key, machine
print('Adding run sequence to ', key, machine)
data_dictionary['data'][key][machine].append(data['wallclock_times'][key][p_exec])
if not wallclock_only:
data_dictionary['cycles_counts'][key][machine].append(data['core_cycle_counts'][key][p_exec])
if instr_data:
if instr_dir:
data_dictionary['instr_data'][key][machine].append(
get_instr_data(key, machine, instr_dir, [p_exec])[0])
else:
Expand Down Expand Up @@ -1351,6 +1342,11 @@ def create_cli_parser():
default=[],
type=str,
help='One or more Krun result files.')
parser.add_argument('--instr-dir',
action='store',
default=None,
type=str,
help='A directory containing VM instrumentation data.')
parser.add_argument('--outfile', '-o',
action='store',
dest='outfile',
Expand Down Expand Up @@ -1542,7 +1538,7 @@ if __name__ == '__main__':
core_cycles = [int(cycle) for cycle in cycles_str]
except ValueError:
fatal_error('invalid --core-cycles argument')
print 'Plotting cycle counts for core(s): %s' % ','.join([str(core) for core in core_cycles])
print('Plotting cycle counts for core(s): %s' % ','.join([str(core) for core in core_cycles]))
else:
core_cycles = None

Expand Down Expand Up @@ -1570,6 +1566,18 @@ if __name__ == '__main__':
options.inset_xlimits[1] > options.xlimits[1]:
fatal_error('--inset-xlimits range must be inside --xlimits range')

if not options.instr_dir:
print('No VM instrumentation data is available.')
else:
if not os.path.exists(options.instr_dir):
fatal_error('%s (VM instrumentation data directory) does not exist.' %
options.instr_dir)
elif not os.path.isdir(options.instr_dir):
fatal_error('%s (VM instrumentation data directory) is not a directory.' %
options.instr_dir)
else:
print('Collecting instrumentation data for from %s.' % options.instr_dir)

# Smaller fonts for on-screen plots.
if options.outfile is None:
TICK_FONTSIZE = 12
Expand All @@ -1580,7 +1588,8 @@ if __name__ == '__main__':
data, plot_titles = get_data_dictionaries(options.json_files[0],
options.benchmarks, options.wallclock,
options.outliers, options.unique_outliers,
options.changepoints or options.changepoint_means)
options.changepoints or options.changepoint_means,
options.instr_dir)

# Find the number of in-proc iterations in a non-crashed pexec
# Assumes we use the same number of in-proc iterations for all pexecs.
Expand All @@ -1595,8 +1604,7 @@ if __name__ == '__main__':
iter_lens = len(pexec)
raise StopIteration() # to break out of all loops at once
else:
print('could not find a non-crashing pexec')
sys.exit(1)
fatal_error('Could not find a non-crashing pexec')
except StopIteration:
pass # good, we found some non-crash data

Expand Down
25 changes: 22 additions & 3 deletions bin/warmup_stats
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ def create_arg_parser():
help='Virtual machine under test (in title-case).')
parser.add_argument('--uname', '-u', dest='uname', action='store', default='',
type=str, help='Full output of `uname -a` from benchmarking machine.')
parser.add_argument('--instr-dir', dest='instr_dir', action='store', default='',
type=str, help=('Directory containing instrumentation data. '
'Only useful when generating plots.'))
# What output file format should be generated?
format_group = parser.add_mutually_exclusive_group(required=True)
format_group.add_argument('--html', dest='type_html', action='store_true', default=False,
Expand Down Expand Up @@ -265,6 +268,8 @@ class BenchmarkFile(object):
# same number of iterations for all pexecs.
data = read_krun_results_file(filename)
found_full_pexec = False
if 'window_size' in data:
self.window = data['window_size']
for bench in data['wallclock_times']:
if found_full_pexec:
break
Expand Down Expand Up @@ -352,6 +357,15 @@ def main(options):
fatal('--uname or -u must be used with CSV input files.')
if options.output_diff and len(input_files) != 2:
fatal('--output-diff expects exactly 2 CSV input files.')
if options.instr_dir:
if not os.path.exists(options.instr_dir):
fatal('%s (VM instrumentation data directory) does not exist.' % options.instr_dir)
elif not os.path.isdir(options.instr_dir):
fatal('%s (VM instrumentation data directory) is not a directory.' % options.instr_dir)
else:
debug('Collecting instrumentation data for from %s.' % options.instr_dir)
else:
debug('No VM instrumentation data is available.')
python_path, pypy_path, pdflatex_path, r_path = check_environment(need_latex=need_latex,
need_plots=need_plots)
info('Processing input files, converting to Krun JSON if necessary.')
Expand Down Expand Up @@ -420,9 +434,14 @@ def main(options):
'same number of iterations.' %
(bm.csv_filename, bm.iterations, iterations))
sys.exit(1)
cli = [python_path, SCRIPT_PLOT_KRUN_RESULTS, '--with-changepoint-means',
'--with-outliers', '-o', options.output_plots, '-w', str(window),
' '.join(input_files)]
if options.instr_dir:
cli = [python_path, SCRIPT_PLOT_KRUN_RESULTS, '--with-changepoint-means',
'--with-outliers', '-o', options.output_plots, '-w', str(window),
'--instr-dir', options.instr_dir, ' '.join(input_files)]
else:
cli = [python_path, SCRIPT_PLOT_KRUN_RESULTS, '--with-changepoint-means',
'--with-outliers', '-o', options.output_plots, '-w', str(window),
' '.join(input_files)]
debug('Running: %s' % ' '.join(cli))
_ = subprocess.check_output(' '.join(cli), shell=True)
debug('Written out: %s' % options.output_plots)
Expand Down