softdevteam · fsfod · Apr 20, 2018 · Apr 19, 2018 · Apr 20, 2018 · Apr 20, 2018
diff --git a/bin/plot_krun_results b/bin/plot_krun_results
@@ -43,6 +43,7 @@ Plot data from Krun results file(s).
 
 import argparse
 import datetime
+import glob
 import math
 import matplotlib
 matplotlib.use('Agg')
@@ -182,11 +183,11 @@ def get_instr_data(key, machine, instr_dir, pexec_idxs):
         for pexec_idx in pexec_idxs:
             file_ = os.path.join(instr_dir, "%s__%s__%s__%s.json.bz2" %
                                  (bench, vm, variant, pexec_idx))
-            print("Loading: %s" % file_)
+            print('Loading: %s' % file_)
             try:
                 js = read_krun_results_file(file_)
             except IOError:
-                print("WARNING: Missing instrumentation data for: %s:%s:%s" % \
+                print('WARNING: Missing instrumentation data for: %s:%s:%s' % \
                       (machine, key, pexec_idx))
                 ret.append(None)  # missing instr data
                 continue
@@ -317,8 +318,8 @@ def main(is_interactive, data_dcts, plot_titles, window_size, outfile,
     try:
         for index, page in enumerate(pages):
             bmark, vm, mc = all_subplot_titles[index][0].split(', ')[:3]
-            print 'Plotting %s: %s (%s) on page %02d of %02d.' % \
-                  (mc, bmark, vm, index + 1, len(pages))
+            print('Plotting %s: %s (%s) on page %02d of %02d.' % \
+                  (mc, bmark, vm, index + 1, len(pages)))
 
             # Strip out indices where the benchmark crashed.
             def only_uncrashed(data):
@@ -334,8 +335,8 @@ def main(is_interactive, data_dcts, plot_titles, window_size, outfile,
                             ret.append([])
                     else:
                         if data == page:  # Stops repeated printing of warning.
-                            print("WARNING: requested pexec crashed: "
-                                  "%s, %s, %s, %s" % (mc, bmark, vm, i))
+                            print('WARNING: requested pexec crashed: '
+                                  '%s, %s, %s, %s' % (mc, bmark, vm, i))
                 return ret
 
             wct_page = only_uncrashed(page)
@@ -867,7 +868,7 @@ def draw_page(is_interactive, executions, cycles_executions,
 
     n_execs = len(executions)
     if n_execs == 0:
-        print("WARNING: empty page")
+        print('WARNING: empty page')
         return None
 
     n_rows = int(math.ceil(float(len(executions)) / MAX_SUBPLOTS_PER_ROW))
@@ -1031,7 +1032,8 @@ def set_pdf_metadata(pdf_document):
 
 
 def get_data_dictionaries(json_files, benchmarks=[], wallclock_only=False,
-                          outliers=False, unique_outliers=False, changepoints=False):
+                          outliers=False, unique_outliers=False, changepoints=False,
+                          instr_dir=None):
     """Read a list of BZipped JSON files and return their contents as a
     dictionaries of key -> machine name -> results.
 
@@ -1109,18 +1111,8 @@ def get_data_dictionaries(json_files, benchmarks=[], wallclock_only=False,
             machine = machine.split('.')[0]
         machine_name = pretty_print_machine(machine)
 
-        # Is there instrumentation data with this results file?
-        instr_data = False
-        filename_root = filename[:-len('_results.json.bz2')]
-        instr_dir = filename_root + '_instr_data'
         if 'instr_data' not in data_dictionary:
             data_dictionary['instr_data'] = dict()
-        if os.path.isdir(instr_dir):
-            print 'Collecting instrumentation data for %s from %s.' % \
-                (filename, instr_dir)
-            instr_data = True
-        else:
-            print 'No VM instrumentation data is available.'
 
         # Collect any results requested from this file.
         if benchmarks == []:  # Chart all available data from this file.
@@ -1141,14 +1133,14 @@ def get_data_dictionaries(json_files, benchmarks=[], wallclock_only=False,
                         data_dictionary['common_outliers'][key] = dict()
                         data_dictionary['unique_outliers'][key] = dict()
                     data_dictionary['data'][key][machine] = data['wallclock_times'][key]
-                    print ('Found: %s:%s (%d executions).' % (machine, key,
-                                            len(data['wallclock_times'][key])))
+                    print('Found: %s:%s (%d executions).' % (machine, key,
+                                                             len(data['wallclock_times'][key])))
                     if wallclock_only:
                         data_dictionary['cycles_counts'][key][machine] = None
                         data_dictionary['instr_data'][key][machine] = None
                     else:
                         data_dictionary['cycles_counts'][key][machine] = data['core_cycle_counts'][key]
-                        if instr_data:
+                        if instr_dir:
                             data_dictionary['instr_data'][key][machine] =  \
                                 get_instr_data(
                                     key, machine, instr_dir,
@@ -1216,8 +1208,7 @@ def get_data_dictionaries(json_files, benchmarks=[], wallclock_only=False,
                     # Hope the key appears in another file, checked below.
                     continue
                 if len(data['wallclock_times'][key]) == 0:
-                    print('WARNING: Skipping: %s from %s (no executions)' %
-                          (key, machine))
+                    print('WARNING: Skipping: %s from %s (no executions)' % (key, machine))
                     if machine not in skipped_keys:
                         skipped_keys[machine] = list()
                     skipped_keys[machine].append(key)
@@ -1284,11 +1275,11 @@ def get_data_dictionaries(json_files, benchmarks=[], wallclock_only=False,
                                 '%g process executions for the benchmark.' %
                                 (p_exec, key, machine, len(data['wallclock_times'][key])))
                         # Add run sequence to data dictionary.
-                        print 'Adding run sequence to ', key, machine
+                        print('Adding run sequence to ', key, machine)
                         data_dictionary['data'][key][machine].append(data['wallclock_times'][key][p_exec])
                         if not wallclock_only:
                             data_dictionary['cycles_counts'][key][machine].append(data['core_cycle_counts'][key][p_exec])
-                            if instr_data:
+                            if instr_dir:
                                 data_dictionary['instr_data'][key][machine].append(
                                     get_instr_data(key, machine, instr_dir, [p_exec])[0])
                             else:
@@ -1351,6 +1342,11 @@ def create_cli_parser():
                         default=[],
                         type=str,
                         help='One or more Krun result files.')
+    parser.add_argument('--instr-dir',
+                        action='store',
+                        default=None,
+                        type=str,
+                        help='A directory containing VM instrumentation data.')
     parser.add_argument('--outfile', '-o',
                         action='store',
                         dest='outfile',
@@ -1542,7 +1538,7 @@ if __name__ == '__main__':
                 core_cycles = [int(cycle) for cycle in cycles_str]
             except ValueError:
                 fatal_error('invalid --core-cycles argument')
-            print 'Plotting cycle counts for core(s): %s' % ','.join([str(core) for core in core_cycles])
+            print('Plotting cycle counts for core(s): %s' % ','.join([str(core) for core in core_cycles]))
     else:
         core_cycles = None
 
@@ -1570,6 +1566,18 @@ if __name__ == '__main__':
             options.inset_xlimits[1] > options.xlimits[1]:
             fatal_error('--inset-xlimits range must be inside --xlimits range')
 
+    if not options.instr_dir:
+        print('No VM instrumentation data is available.')
+    else:
+        if not os.path.exists(options.instr_dir):
+            fatal_error('%s (VM instrumentation data directory) does not exist.' %
+                        options.instr_dir)
+        elif not os.path.isdir(options.instr_dir):
+            fatal_error('%s (VM instrumentation data directory) is not a directory.' %
+                        options.instr_dir)
+        else:
+            print('Collecting instrumentation data for from %s.' % options.instr_dir)
+
     # Smaller fonts for on-screen plots.
     if options.outfile is None:
         TICK_FONTSIZE = 12
@@ -1580,7 +1588,8 @@ if __name__ == '__main__':
     data, plot_titles = get_data_dictionaries(options.json_files[0],
                             options.benchmarks, options.wallclock,
                             options.outliers, options.unique_outliers,
-                            options.changepoints or options.changepoint_means)
+                            options.changepoints or options.changepoint_means,
+                            options.instr_dir)
 
     # Find the number of in-proc iterations in a non-crashed pexec
     # Assumes we use the same number of in-proc iterations for all pexecs.
@@ -1595,8 +1604,7 @@ if __name__ == '__main__':
                         iter_lens = len(pexec)
                         raise StopIteration()  # to break out of all loops at once
         else:
-            print('could not find a non-crashing pexec')
-            sys.exit(1)
+            fatal_error('Could not find a non-crashing pexec')
     except StopIteration:
         pass  # good, we found some non-crash data
 

diff --git a/bin/warmup_stats b/bin/warmup_stats
@@ -145,6 +145,9 @@ def create_arg_parser():
                         help='Virtual machine under test (in title-case).')
     parser.add_argument('--uname', '-u', dest='uname', action='store', default='',
                         type=str, help='Full output of `uname -a` from benchmarking machine.')
+    parser.add_argument('--instr-dir', dest='instr_dir', action='store', default='',
+                        type=str, help=('Directory containing instrumentation data. '
+                                        'Only useful when generating plots.'))
     # What output file format should be generated?
     format_group = parser.add_mutually_exclusive_group(required=True)
     format_group.add_argument('--html', dest='type_html', action='store_true', default=False,
@@ -265,6 +268,8 @@ class BenchmarkFile(object):
             # same number of iterations for all pexecs.
             data = read_krun_results_file(filename)
             found_full_pexec = False
+            if 'window_size' in data:
+                self.window = data['window_size']
             for bench in data['wallclock_times']:
                 if found_full_pexec:
                     break
@@ -352,6 +357,15 @@ def main(options):
                 fatal('--uname or -u must be used with CSV input files.')
     if options.output_diff and len(input_files) != 2:
         fatal('--output-diff expects exactly 2 CSV input files.')
+    if options.instr_dir:
+        if not os.path.exists(options.instr_dir):
+            fatal('%s (VM instrumentation data directory) does not exist.' % options.instr_dir)
+        elif not os.path.isdir(options.instr_dir):
+            fatal('%s (VM instrumentation data directory) is not a directory.' % options.instr_dir)
+        else:
+            debug('Collecting instrumentation data for from %s.' % options.instr_dir)
+    else:
+        debug('No VM instrumentation data is available.')
     python_path, pypy_path, pdflatex_path, r_path = check_environment(need_latex=need_latex,
                                                                       need_plots=need_plots)
     info('Processing input files, converting to Krun JSON if necessary.')
@@ -420,9 +434,14 @@ def main(options):
                                      'same number of iterations.' %
                                      (bm.csv_filename, bm.iterations, iterations))
                     sys.exit(1)
-        cli = [python_path, SCRIPT_PLOT_KRUN_RESULTS, '--with-changepoint-means',
-               '--with-outliers', '-o', options.output_plots, '-w', str(window),
-               ' '.join(input_files)]
+        if options.instr_dir:
+            cli = [python_path, SCRIPT_PLOT_KRUN_RESULTS, '--with-changepoint-means',
+                   '--with-outliers', '-o', options.output_plots, '-w', str(window),
+                   '--instr-dir', options.instr_dir, ' '.join(input_files)]
+        else:
+            cli = [python_path, SCRIPT_PLOT_KRUN_RESULTS, '--with-changepoint-means',
+                   '--with-outliers', '-o', options.output_plots, '-w', str(window),
+                   ' '.join(input_files)]
         debug('Running: %s' % ' '.join(cli))
         _ = subprocess.check_output(' '.join(cli), shell=True)
         debug('Written out: %s' % options.output_plots)