perf(python): Add python benchmark test (#1916)

## What does this PR do? Rewrote [pyperformance bm_pickle](https://github.com/python/pyperformance/tree/main/pyperformance/data-files/benchmarks/bm_pickle) using fury as a benchmark test for pyfury. ## Related issues  ## Does this PR introduce any user-facing change?  - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark  ``` ..................... fury_dict: Mean +- std dev: 278 us +- 3 us ..................... fury_dict_group: Mean +- std dev: 302 us +- 8 us ..................... fury_tuple: Mean +- std dev: 262 us +- 4 us ..................... fury_list: Mean +- std dev: 279 us +- 4 us ```
apache · Oct 30, 2024 · baeff52 · baeff52
1 parent e087481
commit baeff52
Show file tree

Hide file tree

Showing 4 changed files with 150 additions and 0 deletions.
diff --git a/integration_tests/README.md b/integration_tests/README.md
@@ -4,5 +4,6 @@
 - [latest_jdk_tests](latest_jdk_tests): test latest jdk.
 - [graalvm_tests](graalvm_tests): test graalvm native image support.
 - [jpms_tests](jpms_tests): test JPMS module names.
+- [cpython_benchmark](cpython_benchmark): fury CPython microbenchmark.
 
 > Note that this integration_tests is not designed as a maven multi-module project on purpose, so we can introduce features of higher jdk version without breaking compilation for lower jdk, and add integration tests for other languages.
diff --git a/integration_tests/cpython_benchmark/README.md b/integration_tests/cpython_benchmark/README.md
@@ -0,0 +1,27 @@
+# Fury CPython Benchmark
+
+Microbenchmark for Fury serialization in cpython
+
+## Benchmark
+
+Step 1: Install Fury into Python
+
+Step 2: Install the dependencies required for the benchmark script
+
+```bash
+pip install -r requirements.txt
+```
+
+Step 3: Execute the benchmark script
+
+```bash
+python fury_benchmark.py
+```
+
+`--affinity CPU_LIST` specify CPU affinity for worker processes
+
+`-o FILENAME, --output FILENAME` write results encoded to JSON into FILENAME
+
+`--profile PROFILE` collect profile data using cProfile and output to the given file
+
+`--help` to get more `pyperf` options
diff --git a/integration_tests/cpython_benchmark/fury_benchmark.py b/integration_tests/cpython_benchmark/fury_benchmark.py
@@ -0,0 +1,121 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime
+import random
+import sys
+from pyfury import Fury, Language
+import pyperf
+
+
+# The benchmark case is rewritten from pyperformance bm_pickle
+# https://github.com/python/pyperformance/blob/main/pyperformance/data-files/benchmarks/bm_pickle/run_benchmark.py
+DICT = {
+    "ads_flags": 0,
+    "age": 18,
+    "birthday": datetime.date(1980, 5, 7),
+    "bulletin_count": 0,
+    "comment_count": 0,
+    "country": "BR",
+    "encrypted_id": "G9urXXAJwjE",
+    "favorite_count": 9,
+    "first_name": "",
+    "flags": 412317970704,
+    "friend_count": 0,
+    "gender": "m",
+    "gender_for_display": "Male",
+    "id": 302935349,
+    "is_custom_profile_icon": 0,
+    "last_name": "",
+    "locale_preference": "pt_BR",
+    "member": 0,
+    "tags": ["a", "b", "c", "d", "e", "f", "g"],
+    "profile_foo_id": 827119638,
+    "secure_encrypted_id": "Z_xxx2dYx3t4YAdnmfgyKw",
+    "session_number": 2,
+    "signup_id": "201-19225-223",
+    "status": "A",
+    "theme": 1,
+    "time_created": 1225237014,
+    "time_updated": 1233134493,
+    "unread_message_count": 0,
+    "user_group": "0",
+    "username": "collinwinter",
+    "play_count": 9,
+    "view_count": 7,
+    "zip": "",
+}
+
+TUPLE = (
+    [
+        265867233,
+        265868503,
+        265252341,
+        265243910,
+        265879514,
+        266219766,
+        266021701,
+        265843726,
+        265592821,
+        265246784,
+        265853180,
+        45526486,
+        265463699,
+        265848143,
+        265863062,
+        265392591,
+        265877490,
+        265823665,
+        265828884,
+        265753032,
+    ],
+    60,
+)
+
+
+LIST = [[list(range(10)), list(range(10))] for _ in range(10)]
+
+
+def mutate_dict(orig_dict, random_source):
+    new_dict = dict(orig_dict)
+    for key, value in new_dict.items():
+        rand_val = random_source.random() * sys.maxsize
+        if isinstance(key, (int, bytes, str)):
+            new_dict[key] = type(key)(rand_val)
+    return new_dict
+
+
+random_source = random.Random(5)
+DICT_GROUP = [mutate_dict(DICT, random_source) for _ in range(3)]
+
+
+def fury_python_object(obj):
+    fury = Fury(language=Language.PYTHON, ref_tracking=True)
+    binary = fury.serialize(obj)
+    fury.deserialize(binary)
+
+
+def micro_benchmark():
+    runner = pyperf.Runner()
+    runner.bench_func("fury_dict", fury_python_object, DICT)
+    runner.bench_func("fury_dict_group", fury_python_object, DICT_GROUP)
+    runner.bench_func("fury_tuple", fury_python_object, TUPLE)
+    runner.bench_func("fury_list", fury_python_object, LIST)
+
+
+if __name__ == "__main__":
+    micro_benchmark()
diff --git a/integration_tests/cpython_benchmark/requirements.txt b/integration_tests/cpython_benchmark/requirements.txt
@@ -0,0 +1 @@
+pyperf