forked from ijl/orjson
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pynumpy
executable file
·133 lines (103 loc) · 3.26 KB
/
pynumpy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python3
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import gc
import io
import json
import os
import sys
import time
from timeit import timeit
import numpy
import psutil
import rapidjson
import simplejson
from memory_profiler import memory_usage
from tabulate import tabulate
import orjson
os.sched_setaffinity(os.getpid(), {0, 1})
kind = sys.argv[1] if len(sys.argv) >= 1 else ""
if kind == "int32":
array = numpy.random.randint(((2 ** 31) - 1), size=(100000, 100), dtype=numpy.int32)
elif kind == "float64":
array = numpy.random.random(size=(50000, 100))
assert array.dtype == numpy.float64
elif kind == "bool":
array = numpy.random.choice((True, False), size=(100000, 200))
elif kind == "int8":
array = numpy.random.randint(((2 ** 7) - 1), size=(100000, 100), dtype=numpy.int8)
elif kind == "uint8":
array = numpy.random.randint(((2 ** 8) - 1), size=(100000, 100), dtype=numpy.uint8)
else:
print("usage: pynumpy (bool|int32|float64|int8|uint8)")
sys.exit(1)
proc = psutil.Process()
def default(__obj):
if isinstance(__obj, numpy.ndarray):
return __obj.tolist()
headers = ("Library", "Latency (ms)", "RSS diff (MiB)", "vs. orjson")
LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json")
ITERATIONS = 10
orjson_dumps = lambda: orjson.dumps(array, option=orjson.OPT_SERIALIZE_NUMPY)
ujson_dumps = None
rapidjson_dumps = lambda: rapidjson.dumps(array, default=default).encode("utf-8")
simplejson_dumps = lambda: simplejson.dumps(array, default=default).encode("utf-8")
json_dumps = lambda: json.dumps(array, default=default).encode("utf-8")
output_in_mib = len(orjson_dumps()) / 1024 / 1024
print(f"{output_in_mib:,.1f}MiB {kind} output (orjson)")
gc.collect()
mem_before = proc.memory_full_info().rss / 1024 / 1024
def per_iter_latency(val):
if val is None:
return None
return (val * 1000) / ITERATIONS
def test_correctness(func):
return orjson.loads(func()) == array.tolist()
table = []
for lib_name in LIBRARIES:
gc.collect()
print(f"{lib_name}...")
func = locals()[f"{lib_name}_dumps"]
if func is None:
total_latency = None
latency = None
mem = None
correct = False
else:
total_latency = timeit(
func,
number=ITERATIONS,
)
latency = per_iter_latency(total_latency)
time.sleep(1)
mem = max(memory_usage((func,), interval=0.001, timeout=latency * 2))
correct = test_correctness(func)
if lib_name == "orjson":
compared_to_orjson = 1
orjson_latency = latency
elif latency:
compared_to_orjson = latency / orjson_latency
else:
compared_to_orjson = None
if not correct:
latency = None
mem = 0
mem_diff = mem - mem_before
table.append(
(
lib_name,
f"{latency:,.0f}" if latency else "",
f"{mem_diff:,.0f}" if mem else "",
f"{compared_to_orjson:,.1f}" if (latency and compared_to_orjson) else "",
)
)
buf = io.StringIO()
buf.write(tabulate(table, headers, tablefmt="grid") + "\n")
print(
buf.getvalue()
.replace("-", "")
.replace("*", "-")
.replace("=", "-")
.replace("+", "|")
.replace("|||||", "")
.replace("\n\n", "\n")
)