Skip to content

Commit

Permalink
Merge pull request #104 from itamarst/81-benchmark
Browse files Browse the repository at this point in the history
Basic benchmarking works now
  • Loading branch information
exarkun authored Oct 23, 2023
2 parents 7c857df + 6b2fa0c commit f2d7d29
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 36 deletions.
28 changes: 10 additions & 18 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,24 +115,16 @@ Performance
To run the benchmarks, execute the included bench/bench_zfec.py script with
optional --k= and --m= arguments.

On my Athlon 64 2.4 GHz workstation (running Linux), the "zfec" command-line
tool encoded a 160 MB file with m=100, k=94 (about 6% redundancy) in 3.9
seconds, where the "par2" tool encoded the file with about 6% redundancy in
27 seconds. zfec encoded the same file with m=12, k=6 (100% redundancy) in
4.1 seconds, where par2 encoded it with about 100% redundancy in 7 minutes
and 56 seconds.

The underlying C library in benchmark mode encoded from a file at about 4.9
million bytes per second and decoded at about 5.8 million bytes per second.

On Peter's fancy Intel Mac laptop (2.16 GHz Core Duo), it encoded from a file
at about 6.2 million bytes per second.

On my even fancier Intel Mac laptop (2.33 GHz Core Duo), it encoded from a
file at about 6.8 million bytes per second.

On my old PowerPC G4 867 MHz Mac laptop, it encoded from a file at about 1.3
million bytes per second.
Here's the results for an i7-12700k:

```
measuring encoding of data with K=3, M=10, encoding 1000000 bytes 1000 times in a row...
Average MB/s: 364
measuring decoding of primary-only data with K=3, M=10, 1000 times in a row...
Average MB/s: 1894750
measuring decoding of secondary-only data with K=3, M=10, 1000 times in a row...
Average MB/s: 3298
```

Here is a paper analyzing the performance of various erasure codes and their
implementations, including zfec:
Expand Down
55 changes: 37 additions & 18 deletions bench/bench_zfec.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from zfec import easyfec, Encoder, filefec
from zfec import easyfec, Encoder, filefec, Decoder
from pyutil import mathutil

import os, sys

from pyutil import benchutil
from time import time

FNAME="benchrandom.data"

Expand All @@ -16,7 +15,7 @@ def donothing(results, reslenthing):
K=3
M=10

d = ""
d = b""
ds = []
easyfecenc = None
fecenc = None
Expand All @@ -30,12 +29,12 @@ def _make_new_rand_data(size, k, m):
blocksize = mathutil.div_ceil(size, k)
for i in range(k):
ds[i] = d[i*blocksize:(i+1)*blocksize]
ds[-1] = ds[-1] + "\x00" * (len(ds[-2]) - len(ds[-1]))
ds[-1] = ds[-1] + b"\x00" * (len(ds[-2]) - len(ds[-1]))
easyfecenc = easyfec.Encoder(k, m)
fecenc = Encoder(k, m)

import sha
hashers = [ sha.new() for i in range(M) ]
from hashlib import sha256
hashers = [ sha256() for i in range(M) ]
def hashem(results, reslenthing):
for i, result in enumerate(results):
hashers[i].update(result)
Expand Down Expand Up @@ -77,26 +76,46 @@ def _encode_data_fec(N):

def bench(k, m):
SIZE = 10**6
MAXREPS = 64
MAXREPS = 1000
# for f in [_encode_file_stringy_easyfec, _encode_file_stringy, _encode_file, _encode_file_not_really,]:
# for f in [_encode_file,]:
# for f in [_encode_file_not_really, _encode_file_not_really_and_hash, _encode_file, _encode_file_and_hash,]:
# for f in [_encode_data_not_really, _encode_data_easyfec, _encode_data_fec,]:
print "measuring encoding of data with K=%d, M=%d, reporting results in nanoseconds per byte after encoding %d bytes %d times in a row..." % (k, m, SIZE, MAXREPS)
print("measuring encoding of data with K=%d, M=%d, encoding %d bytes %d times in a row..." % (k, m, SIZE, MAXREPS))
# for f in [_encode_data_fec, _encode_data_not_really]:
for f in [_encode_data_fec]:
def _init_func(size):
return _make_new_rand_data(size, k, m)
for BSIZE in [SIZE]:
results = benchutil.rep_bench(f, n=BSIZE, initfunc=_init_func, MAXREPS=MAXREPS, MAXTIME=None, UNITS_PER_SECOND=1000000000)
print "and now represented in MB/s..."
print
best = results['best']
mean = results['mean']
worst = results['worst']
print "best: % 4.3f MB/sec" % (10**3 / best)
print "mean: % 4.3f MB/sec" % (10**3 / mean)
print "worst: % 4.3f MB/sec" % (10**3 / worst)
_init_func(BSIZE)
start = time()
for _ in range(MAXREPS):
f(BSIZE)
elapsed = (time() - start) / MAXREPS
print("Average MB/s:", (BSIZE / (1024 * 1024)) / elapsed)

print("measuring decoding of primary-only data with K=%d, M=%d, %d times in a row..." % (k, m, MAXREPS))
blocks = fecenc.encode(ds)
sharenums = list(range(len(blocks)))
decer = Decoder(k, m)
start = time()
for _ in range(MAXREPS):
decer.decode(blocks[:k], sharenums[:k])
assert b"".join(decer.decode(blocks[:k], sharenums[:k]))[:SIZE] == b"".join(ds)[:SIZE]
elapsed = (time() - start) / MAXREPS
print("Average MB/s:", (sum(len(b) for b in blocks) / (1024 * 1024)) / elapsed)

print("measuring decoding of secondary-only data with K=%d, M=%d, %d times in a row..." % (k, m, MAXREPS))
blocks = fecenc.encode(ds)
sharenums = list(range(len(blocks)))
decer = Decoder(k, m)
start = time()
for _ in range(MAXREPS):
decer.decode(blocks[k:k+k], sharenums[k:k+k])
assert b"".join(decer.decode(blocks[k:k+k], sharenums[k:k+k]))[:SIZE] == b"".join(ds)[:SIZE]
elapsed = (time() - start) / MAXREPS
print("Average MB/s:", (sum(len(b) for b in blocks) / (1024 * 1024)) / elapsed)


k = K
m = M
Expand Down

0 comments on commit f2d7d29

Please sign in to comment.