diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 212e14e40e..51f2edfe72 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -229,6 +229,7 @@ jobs: pip install pytest-benchmark==4.0.0 make install-iconv echo "\$(eval \$(call add-path,/usr/lib/x86_64-linux-gnu/hdf5/serial/))" >> Makefile.paths + mkdir -p benchmark_v2/data - name: Install Chapel frontend bindings run: | (cd $CHPL_HOME/tools/chapel-py && python3 -m pip install .) diff --git a/.gitignore b/.gitignore index 2ec97a4262..6d546cc0fe 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,6 @@ arkouda/*.pyi arkouda/numpy/*.pyi arkouda/scipy/*.pyi arkouda/scipy/stats/*.pyi +benchmark_v2/data/* +benchmark_v2/datdir/*.dat +benchmark_v2/datdir/html/* diff --git a/Makefile b/Makefile index 5244afb2fa..c6737cb513 100644 --- a/Makefile +++ b/Makefile @@ -581,9 +581,15 @@ test-clean: $(RM) $(TEST_TARGETS) $(addsuffix _real,$(TEST_TARGETS)) size_bm = 10**8 +DATE := $(shell date '+%Y_%m_%d_%H_%M_%S') +out=benchmark_v2/data/benchmark_stats_$(DATE).json .PHONY: benchmark benchmark: - python3 -m pytest -c benchmark.ini --benchmark-autosave --benchmark-storage=file://benchmark_v2/.benchmarks --size=$(size_bm) + mkdir -p benchmark_v2/data + python3 -m pytest -c benchmark.ini --benchmark-autosave --benchmark-storage=file://benchmark_v2/.benchmarks --size=$(size_bm) --benchmark-json=$(out) + python3 benchmark_v2/reformat_benchmark_results.py --benchmark-data $(out) + + version: @echo $(VERSION); diff --git a/benchmark_v2/aggregate_benchmark.py b/benchmark_v2/aggregate_benchmark.py index 7e94dc0db4..5557837323 100644 --- a/benchmark_v2/aggregate_benchmark.py +++ b/benchmark_v2/aggregate_benchmark.py @@ -27,7 +27,7 @@ def run_agg(g, vals, op): @pytest.mark.skip_correctness_only(True) @pytest.mark.benchmark(group="GroupBy.aggregate") @pytest.mark.parametrize("op", ak.GroupBy.Reductions) -def bench_aggs(benchmark, op): +def bench_aggregate(benchmark, op): if op in ["any", "all"]: g, vals = setup_agg("bool") else: diff --git a/benchmark_v2/argsort_benchmark.py b/benchmark_v2/argsort_benchmark.py index 9dd53fcb4e..697ac48ff3 100644 --- a/benchmark_v2/argsort_benchmark.py +++ b/benchmark_v2/argsort_benchmark.py @@ -5,7 +5,7 @@ TYPES = ("int64", "uint64", "float64", "str") - +@pytest.mark.benchmark(group="arkouda_argsort") @pytest.mark.skip_correctness_only(True) @pytest.mark.parametrize("dtype", TYPES) def bench_argsort(benchmark, dtype): @@ -40,7 +40,7 @@ def bench_argsort(benchmark, dtype): (nbytes / benchmark.stats["mean"]) / 2**30 ) - +@pytest.mark.benchmark(group="numpy_argsort") @pytest.mark.skip_numpy(False) @pytest.mark.skip_correctness_only(True) @pytest.mark.parametrize("dtype", TYPES) diff --git a/benchmark_v2/datdir/configs/field_lookup_map.json b/benchmark_v2/datdir/configs/field_lookup_map.json new file mode 100644 index 0000000000..ace252e6a6 --- /dev/null +++ b/benchmark_v2/datdir/configs/field_lookup_map.json @@ -0,0 +1 @@ +{"argsort": {"Average rate =": {"group": "", "name": "bench_argsort", "benchmark_name": "argsort", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_argsort\\[[\\w\\d]*\\]"}, "Average time =": {"group": "", "name": "bench_argsort", "benchmark_name": "argsort", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_argsort\\[[\\w\\d]*\\]"}}, "coargsort": {"Average rate =": {"group": "", "name": "bench_coargsort", "benchmark_name": "coargsort", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*\\]"}, "Average time =": {"group": "", "name": "bench_coargsort", "benchmark_name": "coargsort", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*\\]"}, "1-array Average rate =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-1\\]"}, "1-array Average time =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-1\\]"}, "2-array Average rate =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-2\\]"}, "2-array Average time =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-2\\]"}, "8-array Average rate =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-8\\]"}, "8-array Average time =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-8\\]"}, "16-array Average rate =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-16\\]"}, "16-array Average time =": {"group": "Arkouda_CoArgSort", "name": "", "benchmark_name": "coargsort", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_coargsort\\[[\\w\\d]*-16\\]"}}, "aggregate": {"Average rate =": {"group": "", "name": "bench_aggregate", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": "bench_aggregate\\[[\\w\\d]*\\]"}, "Average time =": {"group": "", "name": "bench_aggregate", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": "bench_aggregate\\[[\\w\\d]*\\]"}, "Aggregate prod Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[prod]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate prod Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[prod]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate sum Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[sum]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate sum Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[sum]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate mean Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[mean]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate mean Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[mean]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate min Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[min]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate min Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[min]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate max Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[max]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate max Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[max]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate argmin Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[argmin]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate argmin Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[argmin]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate argmax Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[argmax]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate argmax Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[argmax]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate any Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[any]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate any Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[any]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate all Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[all]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate all Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[all]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate xor Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[xor]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate xor Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[xor]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate and Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[and]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate and Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[and]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate or Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[or]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate or Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[or]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}, "Aggregate nunique Average rate =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[nunique]", "benchmark_name": "aggregate", "lookup_path": ["extra_info", "transfer_rate"], "lookup_regex": ""}, "Aggregate nunique Average time =": {"group": "GroupBy.aggregate", "name": "bench_aggregate[nunique]", "benchmark_name": "aggregate", "lookup_path": ["stats", "mean"], "lookup_regex": ""}}} \ No newline at end of file diff --git a/benchmark_v2/graph_infra/GRAPHLIST b/benchmark_v2/graph_infra/GRAPHLIST new file mode 100644 index 0000000000..1265d25929 --- /dev/null +++ b/benchmark_v2/graph_infra/GRAPHLIST @@ -0,0 +1,10 @@ +# suite: Benchmarks +arkouda.graph +# suite: String Benchmarks +arkouda-string.graph +# suite: Build Stats +arkouda-comp.graph +# suite: Sort Cases +arkouda-sort-cases.graph +# suite: Bigint Benchmarks +arkouda-bigint.graph \ No newline at end of file diff --git a/benchmark_v2/graph_infra/IO.perfkeys b/benchmark_v2/graph_infra/IO.perfkeys new file mode 100644 index 0000000000..7a58e398d0 --- /dev/null +++ b/benchmark_v2/graph_infra/IO.perfkeys @@ -0,0 +1,4 @@ +write Average time HDF5 = +write Average rate HDF5 = +read Average time HDF5 = +read Average rate HDF5 = \ No newline at end of file diff --git a/benchmark_v2/graph_infra/README.md b/benchmark_v2/graph_infra/README.md new file mode 100644 index 0000000000..218020e56d --- /dev/null +++ b/benchmark_v2/graph_infra/README.md @@ -0,0 +1,16 @@ +This directory contains infrastructure required by the Chapel graphing scripts + +- .perfkeys files contain the strings to search for in benchmark output. These + keys are then stored in .dat files. +- .graph files contain the graph information (title, perfkeys, graphkeys, .dat + file) +- The GRAPHFILE file is a meta file that lists the .graph files + +Benchmark output and a .perfkey file is used by `computePerfStats` to create or +append to a .dat file. `genGraphs` then takes the .dat files and the meta +information in the .graph file to generate interactive graphs. To view the +graphs locally you can do: + + cd benchmark_v2/datdir/html + python3 -m http.server 8000 + open http://localhost:8000/ (or navigate to localhost:8000 in your browser) diff --git a/benchmark_v2/graph_infra/aggregate.perfkeys b/benchmark_v2/graph_infra/aggregate.perfkeys new file mode 100644 index 0000000000..f9516809c8 --- /dev/null +++ b/benchmark_v2/graph_infra/aggregate.perfkeys @@ -0,0 +1,25 @@ +Aggregate sum Average rate = +Aggregate prod Average rate = +Aggregate mean Average rate = +Aggregate min Average rate = +Aggregate max Average rate = +Aggregate argmin Average rate = +Aggregate argmax Average rate = +Aggregate any Average rate = +Aggregate all Average rate = +Aggregate xor Average rate = +Aggregate and Average rate = +Aggregate or Average rate = +Aggregate nunique Average rate = +Aggregate sum Average time = +Aggregate prod Average time = +Aggregate min Average time = +Aggregate max Average time = +Aggregate argmin Average time = +Aggregate argmax Average time = +Aggregate any Average time = +Aggregate all Average time = +Aggregate xor Average time = +Aggregate and Average time = +Aggregate or Average time = +Aggregate nunique Average time = diff --git a/benchmark_v2/graph_infra/arkouda-bigint.graph b/benchmark_v2/graph_infra/arkouda-bigint.graph new file mode 100644 index 0000000000..2cf144c676 --- /dev/null +++ b/benchmark_v2/graph_infra/arkouda-bigint.graph @@ -0,0 +1,29 @@ +perfkeys: bigint_from_uint_arrays Average rate =, bigint_to_uint_arrays Average rate = +graphkeys: bigint_from_uint_arrays GiB/s, bigint_to_uint_arrays GiB/s +files: bigint_conversion.dat, bigint_conversion.dat +graphtitle: Bigint Conversion Performance +ylabel: Performance (GiB/s) + +perfkeys: Average bigint stream rate = +graphkeys: bigint stream GiB/s +files: bigint_stream.dat +graphtitle: Bigint Stream Performance +ylabel: Performance (GiB/s) + +perfkeys: Average bigint AND rate =, Average bigint OR rate =, Average bigint SHIFT rate = +graphkeys: bigint AND GiB/s, bigint OR GiB/s, bigint SHIFT GiB/s +files: bigint_bitwise_binops.dat, bigint_bitwise_binops.dat, bigint_bitwise_binops.dat +graphtitle: Bigint Bitwise Binops Performance +ylabel: Performance (GiB/s) + +perfkeys: 1-array Average rate =, 2-array Average rate =, 8-array Average rate =, 16-array Average rate = +graphkeys: 1 array Groupby GiB/s, 2 array Groupby GiB/s, 8 array Groupby GiB/s, 16 array Groupby GiB/s +files: bigint_groupby.dat, bigint_groupby.dat, bigint_groupby.dat, bigint_groupby.dat +graphtitle: Bigint Groupby Performance +ylabel: Performance (GiB/S) + +perfkeys: to_ndarray Average rate =, ak.array Average rate = +graphkeys: to_ndarray GiB/s, ak.array GiB/s +files: bigint_array_transfer.dat, bigint_array_transfer.dat +graphtitle: Bigint Array Transfer Performance +ylabel: Performance (GiB/S) diff --git a/benchmark_v2/graph_infra/arkouda-comp.graph b/benchmark_v2/graph_infra/arkouda-comp.graph new file mode 100644 index 0000000000..a206f36060 --- /dev/null +++ b/benchmark_v2/graph_infra/arkouda-comp.graph @@ -0,0 +1,11 @@ +perfkeys: total time : +graphkeys: Compile Time +files: comp-time.dat +graphtitle: Build Time +ylabel: Time (sec) + +perfkeys: Statements emitted: +graphkeys: Statements Emitted +files: emitted-code-size.dat +graphtitle: Emitted Code Size +ylabel: Statements diff --git a/benchmark_v2/graph_infra/arkouda-sort-cases.graph b/benchmark_v2/graph_infra/arkouda-sort-cases.graph new file mode 100644 index 0000000000..804a9d284f --- /dev/null +++ b/benchmark_v2/graph_infra/arkouda-sort-cases.graph @@ -0,0 +1,41 @@ +perfkeys: uniform int64 16-bit RadixSortLSD average rate =, uniform int64 32-bit RadixSortLSD average rate =, uniform int64 64-bit RadixSortLSD average rate =, uniform float64 RadixSortLSD average rate =, uniform int64 16-bit TwoArrayRadixSort average rate =, uniform int64 32-bit TwoArrayRadixSort average rate =, uniform int64 64-bit TwoArrayRadixSort average rate =, uniform float64 TwoArrayRadixSort average rate = +graphkeys: 16-bit LSD, 32-bit LSD, 64-bit LSD, float64 LSD, 16-bit MSD, 32-bit MSD, 64-bit MSD, float64 MSD +files: sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat +graphtitle: Uniformly Distributed Data +ylabel: Performance (GiB/s) + +perfkeys: power-law int64 RadixSortLSD average rate =, power-law float64 RadixSortLSD average rate =, power-law int64 TwoArrayRadixSort average rate =, power-law float64 TwoArrayRadixSort average rate = +graphkeys: int64 32-bit LSD, float64 LSD, int64 32-bit MSD, float64 MSD +files: sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat +graphtitle: Power-Law Distributed Data +ylabel: Performance (GiB/s) + +perfkeys: RMAT int64 RadixSortLSD average rate =, RMAT int64 TwoArrayRadixSort average rate = +graphkeys: RMAT int64 LSD, RMAT int64 MSD +files: sort-cases.dat, sort-cases.dat +graphtitle: RMAT-Generated Edges (coargsort) +ylabel: Performance (GiB/s) + +perfkeys: block-sorted concat int64 RadixSortLSD average rate =, block-sorted interleaved int64 RadixSortLSD average rate =, block-sorted concat int64 TwoArrayRadixSort average rate =, block-sorted interleaved int64 TwoArrayRadixSort average rate = +graphkeys: concatenated 32-bit LSD, interleaved 32-bit LSD, concatenated 32-bit MSD, interleaved 32-bit MSD +files: sort-cases.dat, sort-cases.dat, sort-cases.dat, sort-cases.dat +graphtitle: Combinations of Sorted Arrays +ylabel: Performance (GiB/s) + +perfkeys: refinement int64 RadixSortLSD average rate =, refinement int64 TwoArrayRadixSort average rate = +graphkeys: 2*int64 LSD, 2*int64 MSD +files: sort-cases.dat, sort-cases.dat +graphtitle: Refinement of Sorted Array with an Unsorted Array (coargsort) +ylabel: Performance (GiB/s) + +perfkeys: datetime64[ns] RadixSortLSD average rate =, datetime64[ns] TwoArrayRadixSort average rate = +graphkeys: datetime64[ns] LSD, datetime64[ns] MSD +files: sort-cases.dat, sort-cases.dat +graphtitle: Simulated Timestamps with 1-Second Precision Stored as datetime64[ns] +ylabel: Performance (GiB/s) + +perfkeys: IP-like 2*int64 RadixSortLSD average rate =, IP-like 2*int64 TwoArrayRadixSort average rate = +graphkeys: IP-like LSD, IP-like MSD +files: sort-cases.dat, sort-cases.dat +graphtitle: Simulated 90/10 Mix of IPv4/IPv6 Addresses (cargsort) +ylabel: Performance (GiB/s) diff --git a/benchmark_v2/graph_infra/arkouda-string.graph b/benchmark_v2/graph_infra/arkouda-string.graph new file mode 100644 index 0000000000..ef29bbc3f3 --- /dev/null +++ b/benchmark_v2/graph_infra/arkouda-string.graph @@ -0,0 +1,59 @@ +perfkeys: Average rate = +graphkeys: Argsort GiB/s +files: str-argsort.dat +graphtitle: String Argsort Performance +ylabel: Performance (GiB/s) + +perfkeys: 1-array Average rate =, 2-array Average rate =, 8-array Average rate =, 16-array Average rate = +graphkeys: 1 array Coargsort GiB/s, 2 array Coargsort GiB/s, 8 array Coargsort GiB/s, 16 array Coargsort GiB/s +files: str-coargsort.dat, str-coargsort.dat, str-coargsort.dat, str-coargsort.dat +graphtitle: String Coargsort Performance +ylabel: Performance (GiB/S) + +perfkeys: 1-array Average rate =, 2-array Average rate =, 8-array Average rate =, 16-array Average rate = +graphkeys: 1 array Groupby GiB/s, 2 array Groupby GiB/s, 8 array Groupby GiB/s, 16 array Groupby GiB/s +files: str-groupby.dat, str-groupby.dat, str-groupby.dat, str-groupby.dat +graphtitle: String Groupby Performance +ylabel: Performance (GiB/S) + +perfkeys: small str array Average rate =, medium str array Average rate =, big str array Average rate = +graphkeys: small str array GiB/s, medium str array GiB/s, big str array GiB/s +files: small-str-groupby.dat, small-str-groupby.dat, small-str-groupby.dat +graphtitle: Small String Groupby Performance +ylabel: Performance (GiB/s) + +perfkeys: Average rate = +graphkeys: Gather GiB/s +files: str-gather.dat +graphtitle: String Gather Performance +ylabel: Performance (GiB/s) + +perfkeys: Medium average rate =, Large average rate = +graphkeys: Medium GiB/s, Large GiB/s +files: str-in1d.dat, str-in1d.dat +graphtitle: String in1d Performance +ylabel: Performance (GiB/s) + +perfkeys: Hashing good locality Average rate =, Hashing poor locality Average rate = +graphkeys: Good Locality, Poor Locality +files: str-locality.dat, str-locality.dat +graphtitle: String Hashing Performance +ylabel: Performance (GiB/s) + +perfkeys: Regex searching good locality Average rate =, Regex searching poor locality Average rate = +graphkeys: Good Locality, Poor Locality +files: str-locality.dat, str-locality.dat +graphtitle: String Regex Search Performance +ylabel: Performance (GiB/s) + +perfkeys: Casting good locality Average rate =, Casting poor locality Average rate = +graphkeys: Good Locality, Poor Locality +files: str-locality.dat, str-locality.dat +graphtitle: String cast-to-float Performance +ylabel: Performance (GiB/s) + +perfkeys: Comparing to scalar good locality Average rate =, Comparing to scalar poor locality Average rate = +graphkeys: Good Locality, Poor Locality +files: str-locality.dat, str-locality.dat +graphtitle: String compare-vs-scalar Performance +ylabel: Performance (GiB/s) diff --git a/benchmark_v2/graph_infra/arkouda.graph b/benchmark_v2/graph_infra/arkouda.graph new file mode 100644 index 0000000000..d6d31cb489 --- /dev/null +++ b/benchmark_v2/graph_infra/arkouda.graph @@ -0,0 +1,149 @@ +perfkeys: Average rate = +graphkeys: Stream GiB/s +files: stream.dat +graphtitle: Stream Performance +ylabel: Performance (GiB/s) + +perfkeys: Average rate = +graphkeys: Argsort GiB/s +files: argsort.dat +graphtitle: Argsort Performance +ylabel: Performance (GiB/s) + +perfkeys: 1-array Average rate =, 2-array Average rate =, 8-array Average rate =, 16-array Average rate = +graphkeys: 1 array Coargsort GiB/s, 2 array Coargsort GiB/s, 8 array Coargsort GiB/s, 16 array Coargsort GiB/s +files: coargsort.dat, coargsort.dat, coargsort.dat, coargsort.dat +graphtitle: Coargsort Performance +ylabel: Performance (GiB/s) + +perfkeys: 1-array Average rate =, 2-array Average rate =, 8-array Average rate =, 16-array Average rate = +graphkeys: 1 array Groupby GiB/s, 2 array Groupby GiB/s, 8 array Groupby GiB/s, 16 array Groupby GiB/s +files: groupby.dat, groupby.dat, groupby.dat, groupby.dat +graphtitle: Groupby Performance +ylabel: Performance (GiB/s) + +perfkeys: Aggregate sum Average rate =, Aggregate xor Average rate =, Aggregate and Average rate =, Aggregate or Average rate =, Aggregate mean Average rate = +graphkeys: sum (GiB/s), xor (GiB/s), and (GiB/s), or (GiB/s), mean (GiB/s) +files: aggregate.dat, aggregate.dat, aggregate.dat, aggregate.dat, aggregate.dat +graphtitle: Grouped Aggregation Performance (Tier 1) +ylabel: Performance (GiB/s) + +perfkeys: Aggregate min Average rate =, Aggregate max Average rate =, Aggregate argmin Average rate =, Aggregate argmax Average rate =, Aggregate prod Average rate = +graphkeys: min (GiB/s), max (GiB/s), argmin (GiB/s), argmax (GiB/s), prod (GiB/s) +files: aggregate.dat, aggregate.dat, aggregate.dat, aggregate.dat, aggregate.dat +graphtitle: Grouped Aggregation Performance (Tier 2) +ylabel: Performance (GiB/s) + +perfkeys: Aggregate any Average rate =, Aggregate all Average rate =, Aggregate nunique Average rate = +graphkeys: any (GiB/s), all (GiB/s), nunique (GiB/s) +files: aggregate.dat, aggregate.dat, aggregate.dat +graphtitle: Grouped Aggregation Performance (Tier 3) +ylabel: Performance (GiB/s) + +perfkeys: Average rate = +graphkeys: Gather GiB/s +files: gather.dat +graphtitle: Gather Performance +ylabel: Performance (GiB/s) + +perfkeys: Average rate = +graphkeys: Scatter GiB/s +files: scatter.dat +graphtitle: Scatter Performance +ylabel: Performance (GiB/s) + +perfkeys: cumsum Average rate =, cumprod Average rate = +graphkeys: Cumsum GiB/s, Cumprod GiB/s +files: scan.dat, scan.dat +graphtitle: Scan Performance +ylabel: Performance (GiB/s) + +perfkeys: sum Average rate =, prod Average rate =, min Average rate =, max Average rate = +graphkeys: Sum GiB/s, Prod GiB/s, Min GiB/s, Max GiB/s +files: reduce.dat, reduce.dat, reduce.dat, reduce.dat +graphtitle: Reduce Performance +ylabel: Performance (GiB/s) + +perfkeys: Medium average rate =, Large average rate = +graphkeys: Medium GiB/s, Large GiB/s +files: in1d.dat, in1d.dat +graphtitle: in1d Performance +ylabel: Performance (GiB/s) + +perfkeys: intersect1d Average rate =, union1d Average rate =, setxor1d Average rate =, setdiff1d Average rate = +graphkeys: Intersect GiB/s, Union GiB/s, Xor GiB/s, Diff GiB/s +files: setops.dat, setops.dat, setops.dat, setops.dat +graphtitle: Set Operations Performance +ylabel: Performance (GiB/s) + +perfkeys: zeros Average rate =, ones Average rate =, randint Average rate = +graphkeys: Zeros GiB/s, Ones GiB/s, Randint GiB/s +files: array_create.dat, array_create.dat, array_create.dat +graphtitle: Array Creation Performance +ylabel: Performance (GiB/s) + +perfkeys: to_ndarray Average rate =, ak.array Average rate = +graphkeys: to_ndarray GiB/s, ak.array GiB/s +files: array_transfer.dat, array_transfer.dat +graphtitle: Array Transfer Performance +ylabel: Performance (GiB/s) + +perfkeys: write Average rate HDF5 =, read Average rate HDF5 = +graphkeys: Write GiB/s, Read GiB/s +files: IO.dat, IO.dat +graphtitle: IO Performance +ylabel: Performance (GiB/s) + +perfkeys: write Average rate HDF5 =, read Average rate HDF5 = +graphkeys: Write GiB/s, Read GiB/s +files: multiIO.dat, multiIO.dat +graphtitle: HDF5 10 Files/Loc IO Performance +ylabel: Performance (GiB/s) + +perfkeys: write Average rate none =, write Average rate snappy =, write Average rate gzip =, write Average rate brotli =, write Average rate zstd =, write Average rate lz4 =, read Average rate none =, read Average rate snappy =, read Average rate gzip =, read Average rate brotli =, read Average rate zstd =, read Average rate lz4 = +graphkeys: Write no compression GiB/s, Write snappy GiB/s, Write gzip GiB/s, Write brotli GiB/s, Write zstd GiB/s, Write lz4 GiB/s, Read no compression GiB/s, Read snappy GiB/s, Read gzip GiB/s, Read brotli GiB/s, Read zstd GiB/s, Read lz4 GiB/s +repeat-files: parquetIO.dat +graphtitle: Parquet IO Performance +ylabel: Performance (GiB/s) + +perfkeys: write Average rate none =, write Average rate snappy =, write Average rate gzip =, write Average rate brotli =, write Average rate zstd =, write Average rate lz4 =, read Average rate none =, read Average rate snappy =, read Average rate gzip =, read Average rate brotli =, read Average rate zstd =, read Average rate lz4 = +graphkeys: Write no compression GiB/s, Write snappy GiB/s, Write gzip GiB/s, Write brotli GiB/s, Write zstd GiB/s, Write lz4 GiB/s, Read no compression GiB/s, Read snappy GiB/s, Read gzip GiB/s, Read brotli GiB/s, Read zstd GiB/s, Read lz4 GiB/s +repeat-files: parquetMultiIO.dat +graphtitle: Parquet 10 Files/Loc IO Performance +ylabel: Performance (GiB/s) + +perfkeys: write Average rate CSV =, read Average rate CSV = +graphkeys: Write GiB/s, Read GiB/s +repeat-files: csvIO.dat +graphtitle: CSV IO Performance +ylabel: Performance (GiB/s) + +perfkeys: non-regex with literal substring Average rate =, regex with literal substring Average rate =, regex with pattern Average rate = +graphkeys: non-regex with literal substring GiB/s, regex with literal substring GiB/s, regex with pattern GiB/s +files: substring_search.dat, substring_search.dat, substring_search.dat +graphtitle: Substring Search Performance +ylabel: Performance (GiB/s) + +perfkeys: non-regex flatten with literal delimiter Average rate =, regex flatten with literal delimiter Average rate =, regex flatten with pattern delimiter Average rate = +graphkeys: non-regex flatten with literal delimiter GiB/s, regex flatten with literal delimiter GiB/s, regex flatten with pattern delimiter GiB/s +files: flatten.dat, flatten.dat, flatten.dat +graphtitle: Flatten Performance +ylabel: Performance (GiB/s) + +perfkeys: _get_head_tail_server Average time =, _get_head_tail Average time = +graphkeys: _get_head_tail_server Average time s, _get_head_tail Average time s +files: dataframe.dat, dataframe.dat +graphtitle: DataFrame Display Performance +ylabel: Time (Seconds) + +perfkeys: Average idna encode rate =, Average idna decode rate =, Average ascii encode rate =, Average ascii decode rate = +graphkeys: idna encode GiB/s, idna decode GiB/s, ascii encode GiB/s, ascii decode GiB/s +files: encode.dat, encode.dat, encode.dat, encode.dat +graphtitle: Encode/Decode Performance +ylabel: Performance (GiB/s) + +perfkeys: Average rate = +graphkeys: Noop ops/s +files: noop.dat +graphtitle: Noop Performance +ylabel: Performance (ops/s) diff --git a/benchmark_v2/graph_infra/array_create.perfkeys b/benchmark_v2/graph_infra/array_create.perfkeys new file mode 100644 index 0000000000..cb5589ecb7 --- /dev/null +++ b/benchmark_v2/graph_infra/array_create.perfkeys @@ -0,0 +1,6 @@ +zeros Average time = +zeros Average rate = +ones Average time = +ones Average rate = +randint Average time = +randint Average rate = diff --git a/benchmark_v2/graph_infra/array_transfer.perfkeys b/benchmark_v2/graph_infra/array_transfer.perfkeys new file mode 100644 index 0000000000..d8adc0c0c9 --- /dev/null +++ b/benchmark_v2/graph_infra/array_transfer.perfkeys @@ -0,0 +1,4 @@ +to_ndarray Average time = +to_ndarray Average rate = +ak.array Average time = +ak.array Average rate = diff --git a/benchmark_v2/graph_infra/bigint_array_transfer.perfkeys b/benchmark_v2/graph_infra/bigint_array_transfer.perfkeys new file mode 100644 index 0000000000..d8adc0c0c9 --- /dev/null +++ b/benchmark_v2/graph_infra/bigint_array_transfer.perfkeys @@ -0,0 +1,4 @@ +to_ndarray Average time = +to_ndarray Average rate = +ak.array Average time = +ak.array Average rate = diff --git a/benchmark_v2/graph_infra/bigint_bitwise_binops.perfkeys b/benchmark_v2/graph_infra/bigint_bitwise_binops.perfkeys new file mode 100644 index 0000000000..ac72fe9c30 --- /dev/null +++ b/benchmark_v2/graph_infra/bigint_bitwise_binops.perfkeys @@ -0,0 +1,6 @@ +Average bigint AND time = +Average bigint AND rate = +Average bigint OR time = +Average bigint OR rate = +Average bigint SHIFT time = +Average bigint SHIFT rate = diff --git a/benchmark_v2/graph_infra/bigint_conversion.perfkeys b/benchmark_v2/graph_infra/bigint_conversion.perfkeys new file mode 100644 index 0000000000..1b7bb53efc --- /dev/null +++ b/benchmark_v2/graph_infra/bigint_conversion.perfkeys @@ -0,0 +1,4 @@ +bigint_from_uint_arrays Average time = +bigint_from_uint_arrays Average rate = +bigint_to_uint_arrays Average time = +bigint_to_uint_arrays Average rate = diff --git a/benchmark_v2/graph_infra/bigint_groupby.perfkeys b/benchmark_v2/graph_infra/bigint_groupby.perfkeys new file mode 100644 index 0000000000..8ace5cd38a --- /dev/null +++ b/benchmark_v2/graph_infra/bigint_groupby.perfkeys @@ -0,0 +1,8 @@ +1-array Average time = +1-array Average rate = +2-array Average time = +2-array Average rate = +8-array Average time = +8-array Average rate = +16-array Average time = +16-array Average rate = diff --git a/benchmark_v2/graph_infra/bigint_stream.perfkeys b/benchmark_v2/graph_infra/bigint_stream.perfkeys new file mode 100644 index 0000000000..7e75908c69 --- /dev/null +++ b/benchmark_v2/graph_infra/bigint_stream.perfkeys @@ -0,0 +1,2 @@ +Average bigint stream time = +Average bigint stream rate = diff --git a/benchmark_v2/graph_infra/coargsort.perfkeys b/benchmark_v2/graph_infra/coargsort.perfkeys new file mode 100644 index 0000000000..8ace5cd38a --- /dev/null +++ b/benchmark_v2/graph_infra/coargsort.perfkeys @@ -0,0 +1,8 @@ +1-array Average time = +1-array Average rate = +2-array Average time = +2-array Average rate = +8-array Average time = +8-array Average rate = +16-array Average time = +16-array Average rate = diff --git a/benchmark_v2/graph_infra/comp-time.perfkeys b/benchmark_v2/graph_infra/comp-time.perfkeys new file mode 100644 index 0000000000..2176642498 --- /dev/null +++ b/benchmark_v2/graph_infra/comp-time.perfkeys @@ -0,0 +1 @@ +total time : diff --git a/benchmark_v2/graph_infra/csvIO.perfkeys b/benchmark_v2/graph_infra/csvIO.perfkeys new file mode 100644 index 0000000000..6b3e1482f1 --- /dev/null +++ b/benchmark_v2/graph_infra/csvIO.perfkeys @@ -0,0 +1,4 @@ +write Average time CSV = +write Average rate CSV = +read Average time CSV = +read Average rate CSV = \ No newline at end of file diff --git a/benchmark_v2/graph_infra/dataframe.perfkeys b/benchmark_v2/graph_infra/dataframe.perfkeys new file mode 100644 index 0000000000..1aac236e14 --- /dev/null +++ b/benchmark_v2/graph_infra/dataframe.perfkeys @@ -0,0 +1,4 @@ +_get_head_tail_server Average time = +_get_head_tail_server Average rate = +_get_head_tail Average time = +_get_head_tail Average rate = \ No newline at end of file diff --git a/benchmark_v2/graph_infra/emitted-code-size.perfkeys b/benchmark_v2/graph_infra/emitted-code-size.perfkeys new file mode 100644 index 0000000000..883cf0bc52 --- /dev/null +++ b/benchmark_v2/graph_infra/emitted-code-size.perfkeys @@ -0,0 +1 @@ +Statements emitted: diff --git a/benchmark_v2/graph_infra/encode.perfkeys b/benchmark_v2/graph_infra/encode.perfkeys new file mode 100644 index 0000000000..c09495120c --- /dev/null +++ b/benchmark_v2/graph_infra/encode.perfkeys @@ -0,0 +1,8 @@ +Average idna encode time = +Average idna encode rate = +Average ascii encode time = +Average ascii encode rate = +Average idna decode time = +Average idna decode rate = +Average ascii decode time = +Average ascii decode rate = \ No newline at end of file diff --git a/benchmark_v2/graph_infra/flatten.perfkeys b/benchmark_v2/graph_infra/flatten.perfkeys new file mode 100644 index 0000000000..6048f1fc2f --- /dev/null +++ b/benchmark_v2/graph_infra/flatten.perfkeys @@ -0,0 +1,6 @@ +non-regex flatten with literal delimiter Average time = +non-regex flatten with literal delimiter Average rate = +regex flatten with literal delimiter Average time = +regex flatten with literal delimiter Average rate = +regex flatten with pattern delimiter Average time = +regex flatten with pattern delimiter Average rate = diff --git a/benchmark_v2/graph_infra/groupby.perfkeys b/benchmark_v2/graph_infra/groupby.perfkeys new file mode 100644 index 0000000000..8ace5cd38a --- /dev/null +++ b/benchmark_v2/graph_infra/groupby.perfkeys @@ -0,0 +1,8 @@ +1-array Average time = +1-array Average rate = +2-array Average time = +2-array Average rate = +8-array Average time = +8-array Average rate = +16-array Average time = +16-array Average rate = diff --git a/benchmark_v2/graph_infra/in1d.perfkeys b/benchmark_v2/graph_infra/in1d.perfkeys new file mode 100644 index 0000000000..4811158832 --- /dev/null +++ b/benchmark_v2/graph_infra/in1d.perfkeys @@ -0,0 +1,4 @@ +Medium average time = +Medium average rate = +Large average time = +Large average rate = diff --git a/benchmark_v2/graph_infra/multiIO.perfkeys b/benchmark_v2/graph_infra/multiIO.perfkeys new file mode 100644 index 0000000000..7a58e398d0 --- /dev/null +++ b/benchmark_v2/graph_infra/multiIO.perfkeys @@ -0,0 +1,4 @@ +write Average time HDF5 = +write Average rate HDF5 = +read Average time HDF5 = +read Average rate HDF5 = \ No newline at end of file diff --git a/benchmark_v2/graph_infra/parquetIO.perfkeys b/benchmark_v2/graph_infra/parquetIO.perfkeys new file mode 100644 index 0000000000..8fd696dc1f --- /dev/null +++ b/benchmark_v2/graph_infra/parquetIO.perfkeys @@ -0,0 +1,24 @@ +write Average time none = +write Average rate none = +write Average time snappy = +write Average rate snappy = +write Average time gzip = +write Average rate gzip = +write Average time brotli = +write Average rate brotli = +write Average time zstd = +write Average rate zstd = +write Average time lz4 = +write Average rate lz4 = +read Average time none = +read Average rate none = +read Average time snappy = +read Average rate snappy = +read Average time gzip = +read Average rate gzip = +read Average time brotli = +read Average rate brotli = +read Average time zstd = +read Average rate zstd = +read Average time lz4 = +read Average rate lz4 = diff --git a/benchmark_v2/graph_infra/parquetMultiIO.perfkeys b/benchmark_v2/graph_infra/parquetMultiIO.perfkeys new file mode 100644 index 0000000000..8fd696dc1f --- /dev/null +++ b/benchmark_v2/graph_infra/parquetMultiIO.perfkeys @@ -0,0 +1,24 @@ +write Average time none = +write Average rate none = +write Average time snappy = +write Average rate snappy = +write Average time gzip = +write Average rate gzip = +write Average time brotli = +write Average rate brotli = +write Average time zstd = +write Average rate zstd = +write Average time lz4 = +write Average rate lz4 = +read Average time none = +read Average rate none = +read Average time snappy = +read Average rate snappy = +read Average time gzip = +read Average rate gzip = +read Average time brotli = +read Average rate brotli = +read Average time zstd = +read Average rate zstd = +read Average time lz4 = +read Average rate lz4 = diff --git a/benchmark_v2/graph_infra/perfkeys b/benchmark_v2/graph_infra/perfkeys new file mode 100644 index 0000000000..bb7c261e03 --- /dev/null +++ b/benchmark_v2/graph_infra/perfkeys @@ -0,0 +1,2 @@ +Average time = +Average rate = diff --git a/benchmark_v2/graph_infra/reduce.perfkeys b/benchmark_v2/graph_infra/reduce.perfkeys new file mode 100644 index 0000000000..5673ad468f --- /dev/null +++ b/benchmark_v2/graph_infra/reduce.perfkeys @@ -0,0 +1,8 @@ +sum Average time = +sum Average rate = +prod Average time = +prod Average rate = +min Average time = +min Average rate = +max Average time = +max Average rate = diff --git a/benchmark_v2/graph_infra/scan.perfkeys b/benchmark_v2/graph_infra/scan.perfkeys new file mode 100644 index 0000000000..185dfff27d --- /dev/null +++ b/benchmark_v2/graph_infra/scan.perfkeys @@ -0,0 +1,4 @@ +cumsum Average time = +cumsum Average rate = +cumprod Average time = +cumprod Average rate = diff --git a/benchmark_v2/graph_infra/setops.perfkeys b/benchmark_v2/graph_infra/setops.perfkeys new file mode 100644 index 0000000000..18f08f230d --- /dev/null +++ b/benchmark_v2/graph_infra/setops.perfkeys @@ -0,0 +1,8 @@ +intersect1d Average time = +intersect1d Average rate = +union1d Average time = +union1d Average rate = +setxor1d Average time = +setxor1d Average rate = +setdiff1d Average time = +setdiff1d Average rate = \ No newline at end of file diff --git a/benchmark_v2/graph_infra/small-str-groupby.perfkeys b/benchmark_v2/graph_infra/small-str-groupby.perfkeys new file mode 100644 index 0000000000..c0aed2d861 --- /dev/null +++ b/benchmark_v2/graph_infra/small-str-groupby.perfkeys @@ -0,0 +1,6 @@ +small str array Average time = +small str array Average rate = +medium str array Average time = +medium str array Average rate = +big str array Average time = +big str array Average rate = diff --git a/benchmark_v2/graph_infra/sort-cases.perfkeys b/benchmark_v2/graph_infra/sort-cases.perfkeys new file mode 100644 index 0000000000..abe56ee5ef --- /dev/null +++ b/benchmark_v2/graph_infra/sort-cases.perfkeys @@ -0,0 +1,48 @@ +uniform int64 16-bit RadixSortLSD average time = +uniform int64 16-bit RadixSortLSD average rate = +uniform int64 16-bit TwoArrayRadixSort average time = +uniform int64 16-bit TwoArrayRadixSort average rate = +uniform int64 32-bit RadixSortLSD average time = +uniform int64 32-bit RadixSortLSD average rate = +uniform int64 32-bit TwoArrayRadixSort average time = +uniform int64 32-bit TwoArrayRadixSort average rate = +uniform int64 64-bit RadixSortLSD average time = +uniform int64 64-bit RadixSortLSD average rate = +uniform int64 64-bit TwoArrayRadixSort average time = +uniform int64 64-bit TwoArrayRadixSort average rate = +uniform float64 RadixSortLSD average time = +uniform float64 RadixSortLSD average rate = +uniform float64 TwoArrayRadixSort average time = +uniform float64 TwoArrayRadixSort average rate = +power-law float64 RadixSortLSD average time = +power-law float64 RadixSortLSD average rate = +power-law float64 TwoArrayRadixSort average time = +power-law float64 TwoArrayRadixSort average rate = +power-law int64 RadixSortLSD average time = +power-law int64 RadixSortLSD average rate = +power-law int64 TwoArrayRadixSort average time = +power-law int64 TwoArrayRadixSort average rate = +RMAT int64 RadixSortLSD average time = +RMAT int64 RadixSortLSD average rate = +RMAT int64 TwoArrayRadixSort average time = +RMAT int64 TwoArrayRadixSort average rate = +block-sorted concat int64 RadixSortLSD average time = +block-sorted concat int64 RadixSortLSD average rate = +block-sorted concat int64 TwoArrayRadixSort average time = +block-sorted concat int64 TwoArrayRadixSort average rate = +block-sorted interleaved int64 RadixSortLSD average time = +block-sorted interleaved int64 RadixSortLSD average rate = +block-sorted interleaved int64 TwoArrayRadixSort average time = +block-sorted interleaved int64 TwoArrayRadixSort average rate = +refinement int64 RadixSortLSD average time = +refinement int64 RadixSortLSD average rate = +refinement int64 TwoArrayRadixSort average time = +refinement int64 TwoArrayRadixSort average rate = +datetime64[ns] RadixSortLSD average time = +datetime64[ns] RadixSortLSD average rate = +datetime64[ns] TwoArrayRadixSort average time = +datetime64[ns] TwoArrayRadixSort average rate = +IP-like 2*int64 RadixSortLSD average time = +IP-like 2*int64 RadixSortLSD average rate = +IP-like 2*int64 TwoArrayRadixSort average time = +IP-like 2*int64 TwoArrayRadixSort average rate = \ No newline at end of file diff --git a/benchmark_v2/graph_infra/str-coargsort.perfkeys b/benchmark_v2/graph_infra/str-coargsort.perfkeys new file mode 100644 index 0000000000..8ace5cd38a --- /dev/null +++ b/benchmark_v2/graph_infra/str-coargsort.perfkeys @@ -0,0 +1,8 @@ +1-array Average time = +1-array Average rate = +2-array Average time = +2-array Average rate = +8-array Average time = +8-array Average rate = +16-array Average time = +16-array Average rate = diff --git a/benchmark_v2/graph_infra/str-groupby.perfkeys b/benchmark_v2/graph_infra/str-groupby.perfkeys new file mode 100644 index 0000000000..8ace5cd38a --- /dev/null +++ b/benchmark_v2/graph_infra/str-groupby.perfkeys @@ -0,0 +1,8 @@ +1-array Average time = +1-array Average rate = +2-array Average time = +2-array Average rate = +8-array Average time = +8-array Average rate = +16-array Average time = +16-array Average rate = diff --git a/benchmark_v2/graph_infra/str-in1d.perfkeys b/benchmark_v2/graph_infra/str-in1d.perfkeys new file mode 100644 index 0000000000..4811158832 --- /dev/null +++ b/benchmark_v2/graph_infra/str-in1d.perfkeys @@ -0,0 +1,4 @@ +Medium average time = +Medium average rate = +Large average time = +Large average rate = diff --git a/benchmark_v2/graph_infra/str-locality.perfkeys b/benchmark_v2/graph_infra/str-locality.perfkeys new file mode 100644 index 0000000000..03795d505b --- /dev/null +++ b/benchmark_v2/graph_infra/str-locality.perfkeys @@ -0,0 +1,16 @@ +Hashing good locality Average time = +Hashing good locality Average rate = +Hashing poor locality Average time = +Hashing poor locality Average rate = +Regex searching good locality Average time = +Regex searching good locality Average rate = +Regex searching poor locality Average time = +Regex searching poor locality Average rate = +Casting good locality Average time = +Casting good locality Average rate = +Casting poor locality Average time = +Casting poor locality Average rate = +Comparing to scalar good locality Average time = +Comparing to scalar good locality Average rate = +Comparing to scalar poor locality Average time = +Comparing to scalar poor locality Average rate = diff --git a/benchmark_v2/graph_infra/substring_search.perfkeys b/benchmark_v2/graph_infra/substring_search.perfkeys new file mode 100644 index 0000000000..f38ef0f768 --- /dev/null +++ b/benchmark_v2/graph_infra/substring_search.perfkeys @@ -0,0 +1,6 @@ +non-regex with literal substring Average time = +non-regex with literal substring Average rate = +regex with literal substring Average time = +regex with literal substring Average rate = +regex with pattern Average time = +regex with pattern Average rate = diff --git a/benchmark_v2/reformat_benchmark_results.py b/benchmark_v2/reformat_benchmark_results.py new file mode 100644 index 0000000000..181e1788f4 --- /dev/null +++ b/benchmark_v2/reformat_benchmark_results.py @@ -0,0 +1,415 @@ +#!/usr/bin/env python3 +""" +This is a driver script to automatically run the Arkouda benchmarks in this +directory and optionally graph the results. Graphing requires that $CHPL_HOME +points to a valid Chapel directory. This will start and stop the Arkouda server +automatically. +""" +import argparse +import csv +import json +import logging +import os +import re +import subprocess +import sys +from datetime import datetime +from typing import Union + +benchmark_dir = os.path.dirname(__file__) +util_dir = os.path.join(benchmark_dir, "..", "server_util", "test") +sys.path.insert(0, os.path.abspath(util_dir)) + +logging.basicConfig(level=logging.INFO) + +BENCHMARKS = [ + # "stream", + "argsort", + "coargsort", + # "groupby", + "aggregate", + # "gather", + # "scatter", + # "reduce", + # "in1d", + # "scan", + # "noop", + # "setops", + # "array_create", + # "array_transfer", + # "IO", + # "csvIO", + # "small-str-groupby", + # "str-argsort", + # "str-coargsort", + # "str-groupby", + # "str-gather", + # "str-in1d", + # "substring_search", + # "split", + # "sort-cases", + # "multiIO", + # "str-locality", + # "dataframe", + # "encode", + # "bigint_conversion", + # "bigint_stream", + # "bigint_bitwise_binops", + # "bigint_groupby", + # "bigint_array_transfer", +] + +if os.getenv("ARKOUDA_SERVER_PARQUET_SUPPORT"): + BENCHMARKS.append("parquetIO") + BENCHMARKS.append("parquetMultiIO") + + +def get_chpl_util_dir(): + """Get the Chapel directory that contains graph generation utilities.""" + CHPL_HOME = os.getenv("CHPL_HOME") + if not CHPL_HOME: + logging.error("$CHPL_HOME not set") + sys.exit(1) + chpl_util_dir = os.path.join(CHPL_HOME, "util", "test") + if not os.path.isdir(chpl_util_dir): + logging.error("{} does not exist".format(chpl_util_dir)) + sys.exit(1) + return chpl_util_dir + + +def generate_graphs(args): + """ + Generate graphs using the existing .dat files and graph infrastructure. + """ + genGraphs = os.path.join(get_chpl_util_dir(), "genGraphs") + cmd = [ + genGraphs, + "--perfdir", + args.dat_dir, + "--outdir", + args.graph_dir, + "--graphlist", + os.path.join(args.graph_infra, "GRAPHLIST"), + "--testdir", + args.graph_infra, + "--alttitle", + "Arkouda Performance Graphs", + ] + + if args.platform_name: + cmd += ["--name", args.platform_name] + if args.configs: + cmd += ["--configs", args.configs] + if args.start_date: + cmd += ["--startdate", args.start_date] + if args.annotations: + cmd += ["--annotate", args.annotations] + + subprocess.check_output(cmd) + + +def create_parser(): + parser = argparse.ArgumentParser(description=__doc__) + + parser.add_argument( + "--dat-dir", + default=os.path.join(benchmark_dir, "datdir"), + help="Directory with .dat files stored", + ) + parser.add_argument("--graph-dir", help="Directory to place generated graphs") + parser.add_argument( + "--graph-infra", + default=os.path.join(benchmark_dir, "graph_infra"), + help="Directory containing graph infrastructure", + ) + parser.add_argument("--platform-name", default="", help="Test platform name") + parser.add_argument("--description", default="", help="Description of this configuration") + parser.add_argument("--annotations", default="", help="File containing annotations") + parser.add_argument("--configs", help="comma seperate list of configurations") + parser.add_argument("--start-date", help="graph start date") + parser.add_argument("--benchmark-data", help="the benchnmark output data in json format.") + + return parser + + +def get_header_dict(directory_path): + """Get a dictionary there the keys are the benchmark name and the values are list of header fields for the .dat file.""" + headers = read_files_in_directory(directory_path) + for benchmark_name in BENCHMARKS: + if benchmark_name not in headers.keys(): + headers[benchmark_name] = ["Average time =", "Average rate ="] + + for key in headers.keys(): + headers[key].insert(0, "# Date") + + return headers + + +# This algorithm for reading files into a dictionary was generated with assistance from Perplexity AI (2024). +def read_files_in_directory(directory_path): + """Get a dictionary there the keys are the benchmark name and the values are list of header fields for the .dat file. + Only retrieves header fields in the .perfkeys files in the graph_infra directory.""" + # Dictionary to store file names and their contents + file_contents = {} + + # Iterate through all files in the directory + for filename in os.listdir(directory_path): + file_path = os.path.join(directory_path, filename) + + # Check if it's a file (not a directory) + if (".perfkeys" in file_path) and os.path.isfile(file_path): + try: + # Open and read the file + with open(file_path, "r", encoding="utf-8") as file: + # Read all lines and store them in a list + lines = file.readlines() + + # Strip newline characters from each line + lines = [line.strip() for line in lines] + + # Add the file name and its contents to the dictionary + key = re.search(r"([\w_]+).perfkeys", filename)[1] + file_contents[key] = lines + except Exception as e: + print(f"Error reading file {filename}: {str(e)}") + + return file_contents + + +def get_nested_value(data: dict, keys: list): + """Look up a value in a dictionary using a list of keys.""" + for key in keys: + if isinstance(data, dict): + data = data.get(key, {}) + else: + return None + return data if data != {} else None + + +def get_value(field: str, benchmark_name: str, field_lookup_map: dict, benchmark_data): + """get the value of a field in a benchmark using the field_lookup_map and the benchmark_data in pytest json format.""" + regex_str = None + if ( + field_lookup_map.get(benchmark_name).get(field) is not None + and isinstance(field_lookup_map.get(benchmark_name).get(field).get("lookup_regex"), str) + and field_lookup_map.get(benchmark_name).get(field).get("lookup_regex") != "" + ): + regex_str = field_lookup_map.get(benchmark_name).get(field).get("lookup_regex") + + lookup_path = None + if field_lookup_map.get(benchmark_name).get(field) is not None and isinstance( + field_lookup_map.get(benchmark_name).get(field).get("lookup_path"), list + ): + lookup_path = field_lookup_map.get(benchmark_name).get(field).get("lookup_path") + + if field == "# Date": + date_str = benchmark_data["datetime"] + return datetime.fromisoformat(date_str).strftime("%m/%d/%y") + elif regex_str is not None and regex_str != "": + return compute_average(regex_str, lookup_path, benchmark_data) + elif benchmark_name in field_lookup_map.keys() and field in field_lookup_map[benchmark_name].keys(): + group = field_lookup_map[benchmark_name][field]["group"] + name = field_lookup_map[benchmark_name][field]["name"] + lookup_path = field_lookup_map[benchmark_name][field]["lookup_path"] + + for benchmark in benchmark_data["benchmarks"]: + if (benchmark["group"] == group) and (benchmark["name"] == name): + value = get_nested_value(benchmark, lookup_path) + return get_float_value(value) + + print(f"Could not get value for {field} in {benchmark_name} data.") + return -1.0 + + +def compute_average(benchmark_name_regex: str, keys: list, benchmark_data): + """Compute the average value of a statistic, using a regex on the benchmark name to determine which values to use.""" + sum = 0.0 + N = 0 + for benchmark in benchmark_data["benchmarks"]: + if re.match(benchmark_name_regex, benchmark["name"]): + value = get_float_value(get_nested_value(benchmark, keys)) + sum += value + N += 1 + if N > 0: + return sum / N + else: + print(f"Could not compute average over {benchmark_name_regex}.") + return -1.0 + + +def get_float_value(value: Union[float, str]): + if isinstance(value, str): + # extract float value: + return float(re.search(r"[\d\.]+", value)[0]) + elif isinstance(value, float): + return value + else: + raise TypeError("In get_float_value, value must be a float or string.") + + +def gen_lookup_map(write=False, out_file="field_lookup_map.json"): + """Temporarily use a script to generate the lookup dictionary and save to file when write=True.""" + field_lookup_map = {} + for benchmark_name in BENCHMARKS: + field_lookup_map[benchmark_name] = {} + + field_lookup_map[benchmark_name]["Average rate ="] = get_lookup_dict( + name="bench_" + benchmark_name, + benchmark_name=benchmark_name, + lookup_path=[ + "extra_info", + "transfer_rate", + ], + lookup_regex="bench_" + benchmark_name + r"\[[\w\d]*\]", + ) + + field_lookup_map[benchmark_name]["Average time ="] = get_lookup_dict( + name="bench_" + benchmark_name, + benchmark_name=benchmark_name, + lookup_path=[ + "stats", + "mean", + ], + lookup_regex="bench_" + benchmark_name + r"\[[\w\d]*\]", + ) + + for op in [ + "prod", + "sum", + "mean", + "min", + "max", + "argmin", + "argmax", + "any", + "all", + "xor", + "and", + "or", + "nunique", + ]: + + field_lookup_map["aggregate"][f"Aggregate {op} Average rate ="] = get_lookup_dict( + group="GroupBy.aggregate", + name=f"bench_aggregate[{op}]", + benchmark_name="aggregate", + lookup_path=[ + "extra_info", + "transfer_rate", + ], + ) + + field_lookup_map["aggregate"][f"Aggregate {op} Average time ="] = get_lookup_dict( + group="GroupBy.aggregate", + name=f"bench_aggregate[{op}]", + benchmark_name="aggregate", + lookup_path=[ + "stats", + "mean", + ], + ) + + for num in [1, 2, 8, 16]: + + field_lookup_map["coargsort"][f"{num}-array Average rate ="] = get_lookup_dict( + group="Arkouda_CoArgSort", + benchmark_name="coargsort", + lookup_path=["extra_info", "transfer_rate"], + lookup_regex=f"bench_coargsort\\[[\\w\\d]*-{num}\\]", + ) + + field_lookup_map["coargsort"][f"{num}-array Average time ="] = get_lookup_dict( + group="Arkouda_CoArgSort", + benchmark_name="coargsort", + lookup_path=[ + "stats", + "mean", + ], + lookup_regex=f"bench_coargsort\\[[\\w\\d]*-{num}\\]", + ) + + if write: + with open(out_file, "w") as fp: + json.dump(field_lookup_map, fp) + + return field_lookup_map + + +def get_lookup_dict(group="", name="", benchmark_name="", lookup_path=[], lookup_regex=""): + """Populate the lookup dictionary fields and return a dictionary.""" + ret_dict = { + "group": group, + "name": name, + "benchmark_name": benchmark_name, + "lookup_path": lookup_path, + "lookup_regex": lookup_regex, + } + return ret_dict + + +# ./benchmark_v2/reformat_benchmark_results.py +def main(): + parser = create_parser() + args, client_args = parser.parse_known_args() + args.graph_dir = args.graph_dir or os.path.join(args.dat_dir, "html") + configs_dir = os.path.join(args.dat_dir, "configs") + benchmark_data_path = args.benchmark_data + + os.makedirs(configs_dir, exist_ok=True) + + lookup_map_path = configs_dir + "/field_lookup_map.json" + + # TODO: remove gen_lookup_map + gen_lookup_map(True, lookup_map_path) + + with open(lookup_map_path, "r") as file: + field_lookup_map = json.load(file) + + headers = get_header_dict(args.graph_infra) + + # Load benchmark data + with open(benchmark_data_path, "r") as file: + benchmark_data = json.load(file) + + # Convert benchmark data to output rows, in dictionary format with benchmark names as the keys. + out_data = {} + + for benchmark_name in BENCHMARKS: + if benchmark_name not in headers.keys(): + print(f"Could could not find headers for {benchmark_name}.") + else: + header = headers[benchmark_name] + row = [ + get_value(field, benchmark_name, field_lookup_map, benchmark_data) + for field in header + ] + + if benchmark_name in out_data.keys() and isinstance(out_data[benchmark_name], list): + out_data[benchmark_name].append(row) + else: + out_data[benchmark_name] = [row] + + # Write the outputs to .dat files + for benchmark_name in BENCHMARKS: + if benchmark_name not in out_data.keys(): + print(f"Could not generate {benchmark_name}.dat\nskipping....") + continue + + data_file = args.dat_dir + f"/{benchmark_name}.dat" + header = headers[benchmark_name] + + if not os.path.exists(data_file): + with open(data_file, "a", newline="") as file: + writer = csv.writer(file, delimiter="\t") + writer.writerow(header) + + if out_data[benchmark_name] is not None: + with open(data_file, "a", newline="") as file: + writer = csv.writer(file, delimiter="\t") + writer.writerows(out_data[benchmark_name]) + + generate_graphs(args) + + +if __name__ == "__main__": + main()