Skip to content

Commit

Permalink
Closes Bears-R-Us#3839: new benchmarks to output performance graph fo…
Browse files Browse the repository at this point in the history
…rmat
  • Loading branch information
ajpotts committed Nov 21, 2024
1 parent 15bd176 commit e60ebde
Show file tree
Hide file tree
Showing 46 changed files with 1,022 additions and 4 deletions.
1 change: 1 addition & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ jobs:
pip install pytest-benchmark==4.0.0
make install-iconv
echo "\$(eval \$(call add-path,/usr/lib/x86_64-linux-gnu/hdf5/serial/))" >> Makefile.paths
mkdir -p benchmark_v2/data
- name: Install Chapel frontend bindings
run: |
(cd $CHPL_HOME/tools/chapel-py && python3 -m pip install .)
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,6 @@ arkouda/*.pyi
arkouda/numpy/*.pyi
arkouda/scipy/*.pyi
arkouda/scipy/stats/*.pyi
benchmark_v2/data/*
benchmark_v2/datdir/*.dat
benchmark_v2/datdir/html/*
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -581,9 +581,15 @@ test-clean:
$(RM) $(TEST_TARGETS) $(addsuffix _real,$(TEST_TARGETS))

size_bm = 10**8
DATE := $(shell date '+%Y_%m_%d_%H_%M_%S')
out=benchmark_v2/data/benchmark_stats_$(DATE).json
.PHONY: benchmark
benchmark:
python3 -m pytest -c benchmark.ini --benchmark-autosave --benchmark-storage=file://benchmark_v2/.benchmarks --size=$(size_bm)
mkdir -p benchmark_v2/data
python3 -m pytest -c benchmark.ini --benchmark-autosave --benchmark-storage=file://benchmark_v2/.benchmarks --size=$(size_bm) --benchmark-json=$(out)
python3 benchmark_v2/reformat_benchmark_results.py --benchmark-data $(out)



version:
@echo $(VERSION);
Expand Down
2 changes: 1 addition & 1 deletion benchmark_v2/aggregate_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def run_agg(g, vals, op):
@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="GroupBy.aggregate")
@pytest.mark.parametrize("op", ak.GroupBy.Reductions)
def bench_aggs(benchmark, op):
def bench_aggregate(benchmark, op):
if op in ["any", "all"]:
g, vals = setup_agg("bool")
else:
Expand Down
4 changes: 2 additions & 2 deletions benchmark_v2/argsort_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

TYPES = ("int64", "uint64", "float64", "str")


@pytest.mark.benchmark(group="arkouda_argsort")
@pytest.mark.skip_correctness_only(True)
@pytest.mark.parametrize("dtype", TYPES)
def bench_argsort(benchmark, dtype):
Expand Down Expand Up @@ -40,7 +40,7 @@ def bench_argsort(benchmark, dtype):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.benchmark(group="numpy_argsort")
@pytest.mark.skip_numpy(False)
@pytest.mark.skip_correctness_only(True)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down
1 change: 1 addition & 0 deletions benchmark_v2/datdir/configs/field_lookup_map.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"argsort": {"Average rate =": {"group": "", "name": "bench_argsort", "benchmark_name": "argsort", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_argsort\\[[\\w\\d]*\\]"}, "Average time =": {"group": "", "name": "bench_argsort", "benchmark_name": "argsort", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_argsort\\[[\\w\\d]*\\]"}}, "coargsort": {"Average rate =": {"group": "", "name": "bench_coargsort", "benchmark_name": "coargsort", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*\\]"}, "Average time =": {"group": "", "name": "bench_coargsort", "benchmark_name": "coargsort", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*\\]"}, "1-array Average rate =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-1\\]"}, "1-array Average time =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-1\\]"}, "2-array Average rate =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-2\\]"}, "2-array Average time =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-2\\]"}, "8-array Average rate =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-8\\]"}, "8-array Average time =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-8\\]"}, "16-array Average rate =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-16\\]"}, "16-array Average time =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-16\\]"}}, "aggregate": {"Average rate =": {"group": "", "name": "bench_aggregate", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_aggregate\\[[\\w\\d]*\\]"}, "Average time =": {"group": "", "name": "bench_aggregate", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_aggregate\\[[\\w\\d]*\\]"}, "Aggregate prod Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[prod]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate prod Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[prod]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate sum Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[sum]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate sum Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[sum]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate mean Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[mean]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate mean Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[mean]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate min Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[min]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate min Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[min]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate max Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[max]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate max Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[max]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate argmin Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[argmin]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate argmin Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[argmin]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate argmax Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[argmax]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate argmax Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[argmax]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate any Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[any]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate any Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[any]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate all Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[all]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate all Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[all]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate xor Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[xor]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate xor Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[xor]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate and Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[and]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate and Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[and]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate or Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[or]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate or Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[or]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate nunique Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[nunique]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate nunique Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[nunique]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}}}
10 changes: 10 additions & 0 deletions benchmark_v2/graph_infra/GRAPHLIST
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# suite: Benchmarks
arkouda.graph
# suite: String Benchmarks
arkouda-string.graph
# suite: Build Stats
arkouda-comp.graph
# suite: Sort Cases
arkouda-sort-cases.graph
# suite: Bigint Benchmarks
arkouda-bigint.graph
4 changes: 4 additions & 0 deletions benchmark_v2/graph_infra/IO.perfkeys
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
write Average time HDF5 =
write Average rate HDF5 =
read Average time HDF5 =
read Average rate HDF5 =
16 changes: 16 additions & 0 deletions benchmark_v2/graph_infra/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
This directory contains infrastructure required by the Chapel graphing scripts

- .perfkeys files contain the strings to search for in benchmark output. These
keys are then stored in .dat files.
- .graph files contain the graph information (title, perfkeys, graphkeys, .dat
file)
- The GRAPHFILE file is a meta file that lists the .graph files

Benchmark output and a .perfkey file is used by `computePerfStats` to create or
append to a .dat file. `genGraphs` then takes the .dat files and the meta
information in the .graph file to generate interactive graphs. To view the
graphs locally you can do:

cd benchmark_v2/datdir/html
python3 -m http.server 8000
open http://localhost:8000/ (or navigate to localhost:8000 in your browser)
25 changes: 25 additions & 0 deletions benchmark_v2/graph_infra/aggregate.perfkeys
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Aggregate sum Average rate =
Aggregate prod Average rate =
Aggregate mean Average rate =
Aggregate min Average rate =
Aggregate max Average rate =
Aggregate argmin Average rate =
Aggregate argmax Average rate =
Aggregate any Average rate =
Aggregate all Average rate =
Aggregate xor Average rate =
Aggregate and Average rate =
Aggregate or Average rate =
Aggregate nunique Average rate =
Aggregate sum Average time =
Aggregate prod Average time =
Aggregate min Average time =
Aggregate max Average time =
Aggregate argmin Average time =
Aggregate argmax Average time =
Aggregate any Average time =
Aggregate all Average time =
Aggregate xor Average time =
Aggregate and Average time =
Aggregate or Average time =
Aggregate nunique Average time =
29 changes: 29 additions & 0 deletions benchmark_v2/graph_infra/arkouda-bigint.graph
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
perfkeys: bigint_from_uint_arrays Average rate =, bigint_to_uint_arrays Average rate =
graphkeys: bigint_from_uint_arrays GiB/s, bigint_to_uint_arrays GiB/s
files: bigint_conversion.dat, bigint_conversion.dat
graphtitle: Bigint Conversion Performance
ylabel: Performance (GiB/s)

perfkeys: Average bigint stream rate =
graphkeys: bigint stream GiB/s
files: bigint_stream.dat
graphtitle: Bigint Stream Performance
ylabel: Performance (GiB/s)

perfkeys: Average bigint AND rate =, Average bigint OR rate =, Average bigint SHIFT rate =
graphkeys: bigint AND GiB/s, bigint OR GiB/s, bigint SHIFT GiB/s
files: bigint_bitwise_binops.dat, bigint_bitwise_binops.dat, bigint_bitwise_binops.dat
graphtitle: Bigint Bitwise Binops Performance
ylabel: Performance (GiB/s)

perfkeys: 1-array Average rate =, 2-array Average rate =, 8-array Average rate =, 16-array Average rate =
graphkeys: 1 array Groupby GiB/s, 2 array Groupby GiB/s, 8 array Groupby GiB/s, 16 array Groupby GiB/s
files: bigint_groupby.dat, bigint_groupby.dat, bigint_groupby.dat, bigint_groupby.dat
graphtitle: Bigint Groupby Performance
ylabel: Performance (GiB/S)

perfkeys: to_ndarray Average rate =, ak.array Average rate =
graphkeys: to_ndarray GiB/s, ak.array GiB/s
files: bigint_array_transfer.dat, bigint_array_transfer.dat
graphtitle: Bigint Array Transfer Performance
ylabel: Performance (GiB/S)
11 changes: 11 additions & 0 deletions benchmark_v2/graph_infra/arkouda-comp.graph
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
perfkeys: total time :
graphkeys: Compile Time
files: comp-time.dat
graphtitle: Build Time
ylabel: Time (sec)

perfkeys: Statements emitted:
graphkeys: Statements Emitted
files: emitted-code-size.dat
graphtitle: Emitted Code Size
ylabel: Statements
41 changes: 41 additions & 0 deletions benchmark_v2/graph_infra/arkouda-sort-cases.graph
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
perfkeys: uniform int64 16-bit RadixSortLSD average rate =, uniform int64 32-bit RadixSortLSD average rate =, uniform int64 64-bit RadixSortLSD average rate =, uniform float64 RadixSortLSD average rate =, uniform int64 16-bit TwoArrayRadixSort average rate =, uniform int64 32-bit TwoArrayRadixSort average rate =, uniform int64 64-bit TwoArrayRadixSort average rate =, uniform float64 TwoArrayRadixSort average rate =
graphkeys: 16-bit LSD, 32-bit LSD, 64-bit LSD, float64 LSD, 16-bit MSD, 32-bit MSD, 64-bit MSD, float64 MSD
files: sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat
graphtitle: Uniformly Distributed Data
ylabel: Performance (GiB/s)

perfkeys: power-law int64 RadixSortLSD average rate =, power-law float64 RadixSortLSD average rate =, power-law int64 TwoArrayRadixSort average rate =, power-law float64 TwoArrayRadixSort average rate =
graphkeys: int64 32-bit LSD, float64 LSD, int64 32-bit MSD, float64 MSD
files: sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat
graphtitle: Power-Law Distributed Data
ylabel: Performance (GiB/s)

perfkeys: RMAT int64 RadixSortLSD average rate =, RMAT int64 TwoArrayRadixSort average rate =
graphkeys: RMAT int64 LSD, RMAT int64 MSD
files: sort-cases.dat, sort-cases.dat
graphtitle: RMAT-Generated Edges (coargsort)
ylabel: Performance (GiB/s)

perfkeys: block-sorted concat int64 RadixSortLSD average rate =, block-sorted interleaved int64 RadixSortLSD average rate =, block-sorted concat int64 TwoArrayRadixSort average rate =, block-sorted interleaved int64 TwoArrayRadixSort average rate =
graphkeys: concatenated 32-bit LSD, interleaved 32-bit LSD, concatenated 32-bit MSD, interleaved 32-bit MSD
files: sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat
graphtitle: Combinations of Sorted Arrays
ylabel: Performance (GiB/s)

perfkeys: refinement int64 RadixSortLSD average rate =, refinement int64 TwoArrayRadixSort average rate =
graphkeys: 2*int64 LSD, 2*int64 MSD
files: sort-cases.dat, sort-cases.dat
graphtitle: Refinement of Sorted Array with an Unsorted Array (coargsort)
ylabel: Performance (GiB/s)

perfkeys: datetime64[ns] RadixSortLSD average rate =, datetime64[ns] TwoArrayRadixSort average rate =
graphkeys: datetime64[ns] LSD, datetime64[ns] MSD
files: sort-cases.dat, sort-cases.dat
graphtitle: Simulated Timestamps with 1-Second Precision Stored as datetime64[ns]
ylabel: Performance (GiB/s)

perfkeys: IP-like 2*int64 RadixSortLSD average rate =, IP-like 2*int64 TwoArrayRadixSort average rate =
graphkeys: IP-like LSD, IP-like MSD
files: sort-cases.dat, sort-cases.dat
graphtitle: Simulated 90/10 Mix of IPv4/IPv6 Addresses (cargsort)
ylabel: Performance (GiB/s)
59 changes: 59 additions & 0 deletions benchmark_v2/graph_infra/arkouda-string.graph
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
perfkeys: Average rate =
graphkeys: Argsort GiB/s
files: str-argsort.dat
graphtitle: String Argsort Performance
ylabel: Performance (GiB/s)

perfkeys: 1-array Average rate =, 2-array Average rate =, 8-array Average rate =, 16-array Average rate =
graphkeys: 1 array Coargsort GiB/s, 2 array Coargsort GiB/s, 8 array Coargsort GiB/s, 16 array Coargsort GiB/s
files: str-coargsort.dat, str-coargsort.dat, str-coargsort.dat, str-coargsort.dat
graphtitle: String Coargsort Performance
ylabel: Performance (GiB/S)

perfkeys: 1-array Average rate =, 2-array Average rate =, 8-array Average rate =, 16-array Average rate =
graphkeys: 1 array Groupby GiB/s, 2 array Groupby GiB/s, 8 array Groupby GiB/s, 16 array Groupby GiB/s
files: str-groupby.dat, str-groupby.dat, str-groupby.dat, str-groupby.dat
graphtitle: String Groupby Performance
ylabel: Performance (GiB/S)

perfkeys: small str array Average rate =, medium str array Average rate =, big str array Average rate =
graphkeys: small str array GiB/s, medium str array GiB/s, big str array GiB/s
files: small-str-groupby.dat, small-str-groupby.dat, small-str-groupby.dat
graphtitle: Small String Groupby Performance
ylabel: Performance (GiB/s)

perfkeys: Average rate =
graphkeys: Gather GiB/s
files: str-gather.dat
graphtitle: String Gather Performance
ylabel: Performance (GiB/s)

perfkeys: Medium average rate =, Large average rate =
graphkeys: Medium GiB/s, Large GiB/s
files: str-in1d.dat, str-in1d.dat
graphtitle: String in1d Performance
ylabel: Performance (GiB/s)

perfkeys: Hashing good locality Average rate =, Hashing poor locality Average rate =
graphkeys: Good Locality, Poor Locality
files: str-locality.dat, str-locality.dat
graphtitle: String Hashing Performance
ylabel: Performance (GiB/s)

perfkeys: Regex searching good locality Average rate =, Regex searching poor locality Average rate =
graphkeys: Good Locality, Poor Locality
files: str-locality.dat, str-locality.dat
graphtitle: String Regex Search Performance
ylabel: Performance (GiB/s)

perfkeys: Casting good locality Average rate =, Casting poor locality Average rate =
graphkeys: Good Locality, Poor Locality
files: str-locality.dat, str-locality.dat
graphtitle: String cast-to-float Performance
ylabel: Performance (GiB/s)

perfkeys: Comparing to scalar good locality Average rate =, Comparing to scalar poor locality Average rate =
graphkeys: Good Locality, Poor Locality
files: str-locality.dat, str-locality.dat
graphtitle: String compare-vs-scalar Performance
ylabel: Performance (GiB/s)
Loading

0 comments on commit e60ebde

Please sign in to comment.