From f8be43746ed61acd784021848c81fb301f353d13 Mon Sep 17 00:00:00 2001 From: Stan Brubaker <120737309+stanbrub@users.noreply.github.com> Date: Fri, 22 Nov 2024 15:54:19 -0700 Subject: [PATCH] feat: Adhoc Dashboard Working Prototype (#387) --- .../tests/standard/StandardTestRunner.java | 2 +- .../run/profile/queries/adhoc_tables.dh.py | 61 ---- .../queries/dashboards/adhoc_dashboard.dh.py | 190 +++++++++++ .../dashboards/benchmark_functions.dh.py | 294 ++++++++++++++++++ 4 files changed, 485 insertions(+), 62 deletions(-) delete mode 100644 src/main/resources/io/deephaven/benchmark/run/profile/queries/adhoc_tables.dh.py create mode 100644 src/main/resources/io/deephaven/benchmark/run/profile/queries/dashboards/adhoc_dashboard.dh.py create mode 100644 src/main/resources/io/deephaven/benchmark/run/profile/queries/dashboards/benchmark_functions.dh.py diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java index a495ec9..7a0f939 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java @@ -388,7 +388,7 @@ void addServiceLog(Bench api) { return; api.log().add("deephaven-engine", logText); var metrics = new Metrics(Timer.now(), "test-runner", "teardown.services"); - metrics.set("log", timer.duration().toMillis(), "standard"); + metrics.set("log", timer.duration().toMillis() / 1000.0, "standard"); api.metrics().add(metrics); } diff --git a/src/main/resources/io/deephaven/benchmark/run/profile/queries/adhoc_tables.dh.py b/src/main/resources/io/deephaven/benchmark/run/profile/queries/adhoc_tables.dh.py deleted file mode 100644 index aac527b..0000000 --- a/src/main/resources/io/deephaven/benchmark/run/profile/queries/adhoc_tables.dh.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2023-2024 Deephaven Data Labs and Patent Pending -# -# Supporting Deephaven queries to use the benchmark_snippet to investigate changes between two or more -# adhoc benchmark set runs -# - Make a table containing rates and diffs for the given benchmark sets -# - Expects the following arguments set in globals() before execution -# - benchmark_sets_arg = [] # The set names to run including user (ex. ['user1/myset1','user1/myset2') -# - benchmark_set_runs_arg = 5 # Number of runs to load from each set (Can be greater than available) -# Requirements: Deephaven 0.32.0 or greater - -from urllib.request import urlopen; import os, re - -benchmark_sets_arg = ['stanbrub/v0.34.0','stanbrub/v0.35.0'] -benchmark_max_sets_arg = 10 - -# benchmark_sets_prefix = os.path.commonprefix(benchmark_sets_arg) -benchmark_sets_prefix = 'stanbrub/v' - -result = None -first_set = None -for benchmark_set in benchmark_sets_arg: - root = 'file:///data' if os.path.exists('/data/deephaven-benchmark') else 'https://storage.googleapis.com' - with urlopen(root + '/deephaven-benchmark/benchmark_tables.dh.py') as r: - benchmark_storage_uri_arg = root + '/deephaven-benchmark' - benchmark_category_arg ='adhoc' - benchmark_actor_filter_arg = os.path.dirname(benchmark_set) - benchmark_set_filter_arg = os.path.basename(benchmark_set) - benchmark_metric_props_arg = ['data.file.size'] - exec(r.read().decode(), globals(), locals()) - - set_name = normalize_name(benchmark_set.replace(benchmark_sets_prefix,'')) - tbl = bench_results_sets.group_by(['benchmark_name']) \ - .view(['Benchmark=benchmark_name', - 'Variability__' + set_name + '=(float)rstd(merge_arrays("long",set_op_rates)) / 100.0', - 'Rate__' + set_name + '=(long)median(op_rate)', - 'DataSize__' + set_name + '=(long)median(data_file_size)']) - if result is None: - result = tbl - first_set = set_name - else: - first_rate = 'Rate__' + first_set - curr_rate = 'Rate__' + set_name - result = result.join(tbl, on=['Benchmark'], joins=['Variability__' + set_name, curr_rate, 'DataSize__' + set_name]) - result = result.update_view([ - 'Change__' + set_name + '=(float)gain(' + first_rate + ',' + curr_rate + ') / 100.0' - ]) - -bench_results = bench_metrics = bench_platforms = bench_metrics_diff = None -bench_results_diff = bench_results_change = tbl = None - -column_formats = [] -for col in result.columns: - n = col.name - if n.startswith('Variability') or n.startswith('Change'): - column_formats.append(n + '=Decimal(`0.0%`)') - if n.startswith('Rate'): - column_formats.append(n + '=Decimal(`###,##0`)') - -adhoc_set_compare = result.format_columns(column_formats) -result = None - diff --git a/src/main/resources/io/deephaven/benchmark/run/profile/queries/dashboards/adhoc_dashboard.dh.py b/src/main/resources/io/deephaven/benchmark/run/profile/queries/dashboards/adhoc_dashboard.dh.py new file mode 100644 index 0000000..20ce9ed --- /dev/null +++ b/src/main/resources/io/deephaven/benchmark/run/profile/queries/dashboards/adhoc_dashboard.dh.py @@ -0,0 +1,190 @@ +# Copyright (c) 2022-2024 Deephaven Data Labs and Patent Pending +# +# Deephaven Adhoc dashboard for visualizing benchmark data captured with Github adhoc workflows. +# The dashboard shows benchmark rates, metrics (e.g. GC, Compile, Heap), and platform comparisons +# (e.g. java version, hardware model, python versions) between data sets. +# +# Requirements: Deephaven 0.36.1 or greater +# +# ruff: noqa: F821 +from urllib.request import urlopen; import os +from deephaven import ui, merge +from deephaven.ui import use_memo, use_state +from deephaven.plot.figure import Figure +from deephaven.plot import PlotStyle + +root = 'file:///nfs' if os.path.exists('/nfs/deephaven-benchmark') else 'https://storage.googleapis.com' +with urlopen(f'{root}/deephaven-benchmark/benchmark_functions.dh.py') as r: + exec(r.read().decode(), globals(), locals()) + storage_uri = f'{root}/deephaven-benchmark' + +def use_dashboard_input(): + actor, set_actor = use_state('') + prefix, set_prefix = use_state('') + user_input, set_user_input = use_state({'actor':'','prefix':''}) + + def update_user_input(): + set_user_input({'actor':actor,'prefix':prefix}) + + input_panel = ui.flex( + ui.text_field(label='Actor', label_position='side', value=actor, on_change=set_actor), + ui.text_field(label='Set Label', label_position='side', value=prefix, on_change=set_prefix), + ui.button('Apply', on_press=lambda: update_user_input()), + direction="row" + ) + return user_input, input_panel + +def use_benchmark_chart(result_table, row_selection, user_input): + actor = user_input['actor']; prefix = user_input['prefix'] + + setids = get_setids(result_table) + setprefix = setprefix = f'{actor}/{prefix}' + selected_benchmark = row_selection['Benchmark']['value'] + ui_figure = Figure() + for setid in setids: + setcol = normalize_column_name(setprefix,setid) + chart_table = result_table.where([f'benchmark_name=`{selected_benchmark}`',f'set_id=`{setid}`']) \ + .sort(['timestamp']).update('run=i+1') + ui_figure = ui_figure.plot_xy(series_name=setcol, t=chart_table, x="run", y="op_rate") + return ui.flex(ui_figure.show()) + +def use_metrics_combo(prop_table, row_selection): + option, set_option = ui.use_state(None) + selected_benchmark = row_selection['Benchmark']['value'] + prop_table = prop_table.where(f'benchmark_name=`{selected_benchmark}`') + + items = [ui.item(r['label'],key=r['key']) for r in get_property_list_keys(prop_table).iter_dict()] + return option,ui.combo_box(items,label='Name',selected_key=option,on_change=set_option,label_position='side') + +def use_metrics_chart(metrics_table,row_selection,metric_selection,user_input): + actor = user_input['actor']; prefix = user_input['prefix'] + setids = get_setids(metrics_table) + setprefix = f'{actor}/{prefix}' + selected_benchmark = row_selection['Benchmark']['value'] + metrics_table = metrics_table.where([f'benchmark_name=`{selected_benchmark}`',f'name=`{metric_selection}`']) + ui_figure = Figure() + for i, setid in enumerate(setids): + if i == 0: + ui_figure = ui_figure.axes(plot_style=PlotStyle.BAR).chart_title(title=f'{selected_benchmark} {metric_selection}') + setcol = normalize_column_name(setprefix,setid) + chart_table = metrics_table.where([f'benchmark_name=`{selected_benchmark}`',f'set_id=`{setid}`']) \ + .sort(['timestamp']).update('run=i+1') + ui_figure = ui_figure.plot_cat(series_name=setcol, t=chart_table, category="run", y="value") + return ui.flex(ui_figure.show()) + +def load_table_memo(table_name, parent_table, user_input): + table_func = globals()[f'load_{table_name}_tables'] + return use_memo( + lambda: table_func(parent_table,user_input['actor'], user_input['prefix']), [user_input]) + +@ui.component +def adhoc_dashboard(): + user_input,input_form = use_dashboard_input() + row_selection,set_row_selection = use_state({'Benchmark':{'value':''}}) + set_table,result_table = load_table_memo('results', None, user_input) + otherdiff,jardiff,pydiff = load_table_memo('diffs', result_table, user_input) + runner_metrics, engine_metrics = load_table_memo('metrics', result_table, user_input) + benchmark_chart = use_benchmark_chart(result_table,row_selection,user_input) + selected_runner_metric,runner_metrics_combo = use_metrics_combo(runner_metrics,row_selection) + selected_engine_metric,engine_metrics_combo = use_metrics_combo(engine_metrics,row_selection) + runner_metrics_chart = use_metrics_chart(runner_metrics,row_selection,selected_runner_metric,user_input) + engine_metrics_chart = use_metrics_chart(engine_metrics,row_selection,selected_engine_metric,user_input) + + return ui.column([ + ui.row(ui.panel(input_form, title='Data Set'), height='9'), + ui.row( + ui.panel(ui.table(set_table, on_row_press=set_row_selection, density='regular'), title='Benchmark Comparison'), + ui.stack( + ui.panel(ui.table(otherdiff, density='regular'), title='Other Changes'), + ui.panel(ui.table(jardiff, density='regular'), title='Jar Changes'), + ui.panel(ui.table(pydiff, density='regular'), title='Python Changes') + ), + height='55'), + ui.row( + ui.stack( + ui.panel(benchmark_chart, title='Run Rates'), + ), + ui.stack( + ui.panel(ui.flex(engine_metrics_combo,engine_metrics_chart,direction='column'), title='Engine Metrics'), + ui.panel(ui.flex(runner_metrics_combo,runner_metrics_chart,direction='column'), title='Runner Metrics'), + activeItemIndex=0 + ), + height='36') + ]) + +Adhoc_Dashboard = ui.dashboard(adhoc_dashboard()) + +def normalize_column_name(prefix, text): + text = re.sub('^.*/','',text[len(prefix):]) + text = normalize_name(text) + return re.sub('^_+','',text) + +def get_property_list_keys(table): + return table.select_distinct(['name']).view(['key=name','label=name']).sort(['label']) + +def get_setids(bench_results): + setids = bench_results.select_distinct(['set_id']).sort_descending(['set_id']) + return [row.set_id for row in setids.iter_tuple()] + +def load_results_tables(parent_table, actor, prefix): + bench_result_sets,bench_results = load_table_or_empty('result_sets',storage_uri,'adhoc',actor, prefix) + + setids = get_setids(bench_result_sets) + setprefix = f'{actor}/{prefix}' + + bench = bench_result_sets.select_distinct(['Benchmark=benchmark_name']) + rate1 = None + for setid in setids: + setcol = normalize_column_name(setprefix,setid) + varcol = 'Var_' + setcol + ratecol = 'Rate_' + setcol + changecol = 'Change_' + setcol + right = bench_result_sets.where(['set_id=`' + setid + '`']) + bench = bench.natural_join(right,on=['Benchmark=benchmark_name'], \ + joins=[varcol+'=variability', ratecol+'=op_rate']) + if rate1 is None: + rate1 = ratecol + else: + bench = bench.update([changecol + '=(float)gain(' + rate1 + ',' + ratecol + ')']) + bench = format_columns(bench, pct_cols=('Var_','Change_'), int_cols=('Rate')) + return bench, bench_results + +def load_diffs_tables(parent_table, actor, prefix): + bench_platform = load_table_or_empty('platform',storage_uri,'adhoc',actor,prefix) + setids = get_setids(parent_table) + setprefix = f'{actor}/{prefix}' + + jointbl = bench_platform.where(['origin=`deephaven-engine`']).first_by(['set_id','name']) + platdiff = jointbl.select_distinct(['name','value']).group_by(['name']) \ + .where(['value.size() > 1']).view(['Name=name']) + + for setid in setids: + setcol = normalize_column_name(setprefix,setid,) + right = jointbl.where(['set_id=`' + setid + '`']) + platdiff = platdiff.natural_join(right,on=['Name=name'], joins=['Val_'+setcol+'=value']) + + jardiff = merge([ + platdiff.where(['Name=`deephaven.version`']), + platdiff.where(['Name=`dependency.jar.size`']), + platdiff.where(['Name.endsWith(`.jar`)']) + ]) + + pydiff = merge([ + platdiff.where(['Name=`python.version`']), + platdiff.where(['Name=`dependency.python.size`']), + platdiff.where(['Name.endsWith(`.py`)']) + ]) + + otherdiff = platdiff.where_not_in(merge([jardiff,pydiff]), cols=['Name']) + jardiff = jardiff.update(['Name=Name.replaceAll(`[.]jar$`,``)']) + pydiff = pydiff.update(['Name=Name.replaceAll(`[.]py$`,``)']) + return otherdiff, jardiff, pydiff + +def load_metrics_tables(parent_table, actor, prefix): + bench_metrics = load_table_or_empty('metrics',storage_uri,'adhoc',actor,prefix) + + runnerdiff = bench_metrics.where(['origin=`test-runner`']) + enginediff = bench_metrics.where(['origin=`deephaven-engine`']) + + return runnerdiff, enginediff + diff --git a/src/main/resources/io/deephaven/benchmark/run/profile/queries/dashboards/benchmark_functions.dh.py b/src/main/resources/io/deephaven/benchmark/run/profile/queries/dashboards/benchmark_functions.dh.py new file mode 100644 index 0000000..be36215 --- /dev/null +++ b/src/main/resources/io/deephaven/benchmark/run/profile/queries/dashboards/benchmark_functions.dh.py @@ -0,0 +1,294 @@ +# Copyright (c) 2022-2024 Deephaven Data Labs and Patent Pending +# +# Deephaven python functions to support Benchmark Dashboards. These functions produce basic tables, +# format strings, and do calculations. The data for creating tables is downloaded and cached from +# either the Deephaven Benchmark GCloud bucket or from NFS on one of Deephaven's demos servers. +# +# Requirements: Deephaven 0.36.1 or greater + +import os, re, glob, jpy +import deephaven.dtypes as dht +from deephaven import read_csv, merge, agg, empty_table, input_table, dtypes as dht +from urllib.request import urlopen, urlretrieve +from numpy import typing as npt + +# Convert the given name to a name suitable for a DH column name +def normalize_name(name): + name = name.replace('/','__') + return re.sub('[^A-Za-z0-9_$]', '_', name) + +# Get the latest GCloud run_ids for the benchmark category up to max_runs +def get_remote_children(parent_uri, category, max_runs=10): + run_ids = [] + search_uri = parent_uri + '?delimiter=/&prefix=' + category + '/' + '&max-keys=10000' + with urlopen(search_uri) as r: + text = r.read().decode() + for run_id in re.findall('{}/([^/><]+)/'.format(category), text, re.MULTILINE): + run_ids.append(run_id) + run_ids.sort(reverse=True) + return run_ids[:max_runs] + +# Get the file-based children of the given parent/category directory +def get_local_children(parent_uri, category, max_runs=10): + run_ids = [] + root_path = parent_uri.replace('file:///','/') + for run_id in os.listdir(os.path.normpath(os.path.join(root_path, category))): + run_ids.append(run_id) + run_ids.sort(reverse=True) + return run_ids[:max_runs] + +# Get the children of the given storage/category uri +def get_children(storage_uri, category, max_runs): + if storage_uri.startswith('http'): + return get_remote_children(storage_uri, category, max_runs) + else: + return get_local_children(storage_uri, category, max_runs) + +# Get the paths for benchmark run data that match the given filters +def get_run_paths(storage_uri, category, actor_filter, set_filter, max_sets): + actor_filter = actor_filter if actor_filter else get_default_actor_filter(category) + set_filter = set_filter if set_filter else get_default_set_filter(category) + set_matcher = re.compile(set_filter) + actor_matcher = re.compile(actor_filter) + run_matcher = re.compile('run-[0-9A-Za-z]+') + benchmark_sets = [] + for actor in get_children(storage_uri, category, 1000): + if actor_matcher.match(actor): + for set_label in get_children(storage_uri, category + '/' + actor, 1000): + if set_matcher.match(set_label): + benchmark_sets.append(actor + '/' + set_label) + benchmark_sets.sort(reverse=True) + benchmark_sets = benchmark_sets[:max_sets] + benchmark_runs = [] + for set_path in benchmark_sets: + for run_id in get_children(storage_uri, category + '/' + set_path, 1000): + if run_matcher.match(run_id): + benchmark_runs.append(set_path + '/' + run_id) + return benchmark_runs + +# Cache an HTTP url into a local directory and return the local path +def cache_remote_csv(uri): + try: + out_path = re.sub('^http.*/deephaven-benchmark/', '/data/deephaven-benchmark/', uri) + os.makedirs(os.path.dirname(out_path), mode=0o777, exist_ok=True) + except Exception as ex: + print('Error downloading file:', out_path, ':', ex) + return uri + try: + out_path_gz = out_path + '.gz' + if os.path.exists(out_path_gz): return out_path_gz + urlretrieve(uri + '.gz', out_path_gz) + print('Cache', uri + '.gz') + return out_path_gz + except Exception: + try: + if os.path.exists(out_path): return out_path + urlretrieve(uri, out_path) + print('Cache', uri) + return out_path + except Exception as ex: + print('Error caching file:', out_path, ':', ex) + return uri + +# Read csv into a table (Currently, pandas is used for gzipped csv) +def dh_read_csv(uri, convert_func): + uri = uri.replace('file:///','/') + uri = cache_remote_csv(uri) if uri.startswith('http') else uri + try: + tbl = read_csv(uri + '.gz') + tbl = convert_func(tbl) + print('Load ' + uri + '.gz') + except Exception: + tbl = read_csv(uri) + tbl = convert_func(tbl) + print('Load ' + uri) + return tbl + +# Merge together benchmark runs from the GCloud bucket for the same csv (e.g. benchmark_results.csv) +def merge_run_tables(parent_uri, run_ids, category, csv_file_name, convert_func): + tables = [] + for run_id in run_ids: + table_uri = parent_uri + '/' + category + '/' + run_id + '/' + csv_file_name + table_csv = dh_read_csv(table_uri, convert_func) + set_id = os.path.dirname(run_id) + run_id = os.path.basename(run_id) + table_csv = table_csv.update_view(['set_id = "' + set_id + '"', 'run_id = "' + run_id + '"']) + tables.append(table_csv) + return merge(tables) + +# Do any conversions of type or column name needed from benchmark-results.csv +def convert_result(table): + return table.view(['benchmark_name','origin','timestamp=(long)timestamp','test_duration=(double)test_duration', + 'op_duration=(double)op_duration','op_rate=(long)op_rate','row_count=(long)row_count']) + +# Do any conversions of type or column name needed from benchmark-metrics.csv +def convert_metric(table): + return table.view(['benchmark_name','origin','timestamp=(long)timestamp','name', + 'value=(double)value','note']) + +# Do any conversions of type or column name needed from benchmark-platform.csv +def convert_platform(table): + return table.view(['origin','name','value']) + +# Get the default actor filter depending on the given category +def get_default_actor_filter(category): + if category in ['release','nightly','compare']: return 'deephaven' + return '.+' + +# Get the default set filter depending on the given category +def get_default_set_filter(category): + if category in ['release','compare']: return '[0-9]{2}[.][0-9]{3}[.][0-9]{2}' # ##.###.## + if category in ['nightly']: return '[0-9]{4}([-][0-9]{2}){2}' # yyyy-mm-dd + return '.+' + +def empty_bench_results(): + return input_table({'benchmark_name':dht.string,'origin':dht.string,'timestamp':dht.int64, + 'test_duration':dht.float64,'op_duration':dht.float64,'op_rate':dht.int64, + 'row_count':dht.int64,'set_id':dht.string,'run_id':dht.string}) + +def empty_bench_result_sets(): + sets = input_table({'benchmark_name':dht.string,'origin':dht.string,'timestamp':dht.int64, + 'test_duration':dht.float64,'set_op_rates':dht.int64_array,'op_duration':dht.float64, + 'op_rate':dht.int64,'row_count':dht.int64,'variability':dht.float32,'set_id':dht.string, + 'run_id':dht.string,'set_count':dht.int64,'deephaven_version':dht.string}) + return sets, empty_bench_results() + +def empty_bench_platform(): + return input_table({'origin':dht.string,'name':dht.string,'value':dht.string, + 'set_id':dht.string,'run_id':dht.string}) + +def empty_bench_metrics(): + return input_table({'benchmark_name':dht.string,'origin':dht.string,'timestamp':dht.int64, + 'name':dht.string,'value':dht.float64,'note':dht.string, 'set_id':dht.string, + 'run_id':dht.string}) + +# Load all benchmark-results.csv data collected from the given storage, category, and filters +def load_bench_results(storage_uri, category='adhoc', actor_filter=None, set_filter=None): + run_ids = get_run_paths(storage_uri, category, actor_filter, set_filter, 100) + return merge_run_tables(storage_uri, run_ids, category, 'benchmark-results.csv', convert_result) + +# Load all benchmark-metrics.csv data collected from the given storage, category, and filters +def load_bench_metrics(storage_uri, category='adhoc', actor_filter=None, set_filter=None): + run_ids = get_run_paths(storage_uri, category, actor_filter, set_filter, 100) + return merge_run_tables(storage_uri, run_ids, category, 'benchmark-metrics.csv', convert_metric) + +# Load all benchmark-platform.csv data collected from the given storage, category, and filters +def load_bench_platform(storage_uri, category='adhoc', actor_filter=None, set_filter=None): + run_ids = get_run_paths(storage_uri, category, actor_filter, set_filter, 100) + return merge_run_tables(storage_uri, run_ids, category, 'benchmark-platform.csv', convert_platform) + +# Load all benchmark-results.csv data collected from the given storage, category, and filters by set +# Sets contain one or more runs for each benchmark. This function loads the median run by rate for each benchmark +def load_bench_result_sets(storage_uri, category='adhoc', actor_filter=None, set_filter=None): + bench_results = load_bench_results(storage_uri,category,actor_filter,set_filter) + bench_results_sets = bench_results.sort(['benchmark_name','origin','set_id','op_rate']) \ + .group_by(['benchmark_name','origin','set_id']) \ + .view(['benchmark_name','origin','timestamp=(long)mid_item(timestamp)','test_duration=(double)mid_item(test_duration)', + 'set_op_rates=op_rate','op_duration=(double)mid_item(op_duration)','op_rate=(long)mid_item(op_rate)', + 'row_count=(long)mid_item(row_count)','variability=(float)rstd(set_op_rates)','set_id', + 'run_id=(String)mid_item(run_id)','set_count=count(set_op_rates)']) + # Attach columns for specified metrics and platform properties + #local_platform_props = [] + #local_metric_props = [] + #bench_results_sets = add_platform_values(bench_results_sets, ['deephaven.version'] + platform_props, local_platform_props) + #bench_results_sets = add_metric_values(bench_results_sets, metric_props, local_metric_props) + return bench_results_sets, bench_results + +def load_table_or_empty(table_name, storage_uri, category='adhoc', actor_filter='', set_filter=''): + actor = actor_filter.strip(); prefix = set_filter.strip() + if actor and prefix: + return globals()[f'load_bench_{table_name}'](storage_uri, category, actor, prefix) + return globals()[f'empty_bench_{table_name}']() + +# Add columns for the specified platform properties +def add_platform_values(table, pnames=[], cnames = []): + pnames = list(dict.fromkeys(pnames)) + for pname in pnames: + new_pname = normalize_name(pname) + cnames.append(new_pname) + single_platforms = bench_platforms.where(['name=pname']).first_by(['set_id','run_id','origin']) + table = table.natural_join( + single_platforms, on=['set_id','run_id','origin'], joins=[new_pname+'=value'] + ) + return table + +# Add columns for the specified metric properties +def add_metric_values(table, pnames=[], cnames=[]): + pnames = list(dict.fromkeys(pnames)) + for pname in pnames: + new_pname = normalize_name(pname) + cnames.append(new_pname) + single_metrtics = bench_metrics.where(['name=pname']).first_by(['benchmark_name','set_id','run_id','origin']) + table = table.natural_join( + single_metrtics, on=['benchmark_name','set_id','run_id','origin'], joins=[new_pname+'=value'] + ) + return table + +# Format column values for percent or integral depending on the start of the name +def format_columns(table,pct_cols=(),int_cols=()): + column_formats = [] + for col in table.columns: + n = col.name + if n.startswith(pct_cols): + column_formats.append(n + '=Decimal(`0.0%`)') + if n.startswith(int_cols): + column_formats.append(n + '=Decimal(`###,##0`)') + return table.format_columns(column_formats) + +import statistics +# Get a percentage standard deviation for the given list of rates +def rstd(rates) -> float: + rates = [i for i in rates if i >= 0] + mean = statistics.mean(rates) + return (statistics.pstdev(rates) / mean) if mean != 0 else 0.0 + +# Get the zscore of one rate against a list of rates +def zscore(rate, rates) -> float: + rates = [i for i in rates if i >= 0] + std = statistics.pstdev(rates) + return ((rate - statistics.mean(rates)) / std) if std != 0 else 0.0 + +# Get the probability that the zscore lacks confidence (lower is better) +def zprob(zscore) -> float: + lower = -abs(zscore) + upper = abs(zscore) + return 1 - (statistics.NormalDist().cdf(upper) - statistics.NormalDist().cdf(lower)) + +from array import array +# Get the percent change between the last rate in a list and the avg of the previous rates +def rchange(rates) -> float: + rates = array('l', rates) + if(len(rates) < 2): return 0.0 + m = statistics.mean(rates[:-1]) + return (rates[-1] - m) / m + +# Get the percentage gain between two values +def gain(start, end) -> float: + return (end - start) / start + +# Format a list of rates to make them easier to read in a DHC table +def format_rates(rates): + return ' '.join("{:,}".format(r) for r in rates) + +# Truncate text to the given size. Add '...' for truncated text. +def truncate(text, size): + if len(text) < size - 3: return text + return text[:size-3] + '...' + +# Get the middle item of the array +def mid_item(arr): + n = len(arr) + return arr[n // 2] + +# Get the last item of the array +def last_item(arr): + return arr[-1] + +# Merge the elements of an array of arrays into a single typed array +def merge_arrays(type_str, arrs): + final_arr = [] + for arr in arrs: + for i in arr.copyToArray(): + final_arr.append(i) + return jpy.array(type_str, final_arr) +