Skip to content

Commit

Permalink
Merge pull request #15 from Jille/benchmark
Browse files Browse the repository at this point in the history
Add a script to benchmark and add to the README
  • Loading branch information
bwesterb authored Jul 22, 2024
2 parents 3b33e86 + 14b5ce4 commit a9e80c1
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 10 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Benchmark output files
asm
naive
purego
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,37 @@ func main() {
```

Makes use of AVX2 on AMD64 and NEON on ARM64.

## Benchmarks

Created using `./benchmark.sh`.

This shows three benchmarks:

* `naive` is a simple for loop doing one byte at a time.
* `purego` are our slightly optimized versions that work on uint64s instead of bytes.
* `asm` are the AVX2 implementations and the reason to use this library.

```
goos: linux
goarch: amd64
pkg: github.com/bwesterb/go-and
cpu: 13th Gen Intel(R) Core(TM) i9-13900
│ naive │ purego │ asm │
│ sec/op │ sec/op vs base │ sec/op vs base │
And-32 273.05µ ± 5% 64.48µ ± 2% -76.39% (p=0.000 n=10) 21.88µ ± 1% -91.99% (p=0.000 n=10)
Or-32 274.70µ ± 6% 64.36µ ± 1% -76.57% (p=0.000 n=10) 21.81µ ± 1% -92.06% (p=0.000 n=10)
AndNot-32 310.78µ ± 2% 71.01µ ± 2% -77.15% (p=0.000 n=10) 21.83µ ± 1% -92.98% (p=0.000 n=10)
Memset-32 167.77µ ± 0% 167.55µ ± 0% -0.13% (p=0.002 n=10) 15.88µ ± 1% -90.53% (p=0.000 n=10)
Popcnt-32 126.84µ ± 0% 71.42µ ± 1% -43.69% (p=0.000 n=10) 32.48µ ± 1% -74.40% (p=0.000 n=10)
geomean 218.3µ 81.18µ -62.82% 22.18µ -89.84%
│ naive │ purego │ asm │
│ B/s │ B/s vs base │ B/s vs base │
And-32 3.411Gi ± 5% 14.444Gi ± 2% +323.45% (p=0.000 n=10) 42.560Gi ± 1% +1147.72% (p=0.000 n=10)
Or-32 3.391Gi ± 7% 14.470Gi ± 1% +326.78% (p=0.000 n=10) 42.708Gi ± 1% +1159.61% (p=0.000 n=10)
AndNot-32 2.997Gi ± 2% 13.116Gi ± 2% +337.68% (p=0.000 n=10) 42.665Gi ± 1% +1323.72% (p=0.000 n=10)
Memset-32 5.551Gi ± 0% 5.559Gi ± 0% +0.13% (p=0.002 n=10) 58.642Gi ± 1% +956.36% (p=0.000 n=10)
Popcnt-32 7.342Gi ± 0% 13.040Gi ± 1% +77.60% (p=0.000 n=10) 28.677Gi ± 1% +290.57% (p=0.000 n=10)
geomean 4.266Gi 11.47Gi +168.93% 41.98Gi +884.14%
```
80 changes: 70 additions & 10 deletions and_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,25 @@ import (
"testing"
)

func testAgainstGeneric(t *testing.T, fancy, generic func(dst, a, b []byte), size int) {
func andNaive(dst, a, b []byte) {
for i := range dst {
dst[i] = a[i] & b[i]
}
}

func orNaive(dst, a, b []byte) {
for i := range dst {
dst[i] = a[i] | b[i]
}
}

func andNotNaive(dst, a, b []byte) {
for i := range dst {
dst[i] = (^a[i]) & b[i]
}
}

func testAgainst(t *testing.T, fancy, generic func(dst, a, b []byte), size int) {
a := make([]byte, size)
b := make([]byte, size)
c1 := make([]byte, size)
Expand All @@ -25,32 +43,38 @@ func testAgainstGeneric(t *testing.T, fancy, generic func(dst, a, b []byte), siz
}
}

func TestAndAgainstGeneric(t *testing.T) {
func TestAnd(t *testing.T) {
for i := 0; i < 20; i++ {
size := 1 << i
testAgainstGeneric(t, And, andGeneric, size)
testAgainst(t, And, andNaive, size)
testAgainst(t, andGeneric, andNaive, size)
for j := 0; j < 10; j++ {
testAgainstGeneric(t, And, andGeneric, size+rand.IntN(100))
testAgainst(t, And, andNaive, size+rand.IntN(100))
testAgainst(t, andGeneric, andNaive, size+rand.IntN(100))
}
}
}

func TestOrAgainstGeneric(t *testing.T) {
func TestOr(t *testing.T) {
for i := 0; i < 20; i++ {
size := 1 << i
testAgainstGeneric(t, Or, orGeneric, size)
testAgainst(t, Or, orNaive, size)
testAgainst(t, orGeneric, orNaive, size)
for j := 0; j < 10; j++ {
testAgainstGeneric(t, Or, orGeneric, size+rand.IntN(100))
testAgainst(t, Or, orNaive, size+rand.IntN(100))
testAgainst(t, orGeneric, orNaive, size+rand.IntN(100))
}
}
}

func TestAndNotAgainstGeneric(t *testing.T) {
func TestAndNot(t *testing.T) {
for i := 0; i < 20; i++ {
size := 1 << i
testAgainstGeneric(t, AndNot, andNotGeneric, size)
testAgainst(t, AndNot, andNotNaive, size)
testAgainst(t, andNotGeneric, andNotNaive, size)
for j := 0; j < 10; j++ {
testAgainstGeneric(t, AndNot, andNotGeneric, size+rand.IntN(100))
testAgainst(t, AndNot, andNotNaive, size+rand.IntN(100))
testAgainst(t, andNotGeneric, andNotNaive, size+rand.IntN(100))
}
}
}
Expand Down Expand Up @@ -79,6 +103,18 @@ func BenchmarkAndGeneric(b *testing.B) {
}
}

func BenchmarkAndNaive(b *testing.B) {
b.StopTimer()
size := 1000000
a := make([]byte, size)
bb := make([]byte, size)
b.SetBytes(int64(size))
b.StartTimer()
for i := 0; i < b.N; i++ {
andNaive(a, a, bb)
}
}

func BenchmarkOr(b *testing.B) {
b.StopTimer()
size := 1000000
Expand All @@ -103,6 +139,18 @@ func BenchmarkOrGeneric(b *testing.B) {
}
}

func BenchmarkOrNaive(b *testing.B) {
b.StopTimer()
size := 1000000
a := make([]byte, size)
bb := make([]byte, size)
b.SetBytes(int64(size))
b.StartTimer()
for i := 0; i < b.N; i++ {
orNaive(a, a, bb)
}
}

func BenchmarkAndNot(b *testing.B) {
b.StopTimer()
size := 1000000
Expand All @@ -126,3 +174,15 @@ func BenchmarkAndNotGeneric(b *testing.B) {
andNotGeneric(a, a, bb)
}
}

func BenchmarkAndNotNaive(b *testing.B) {
b.StopTimer()
size := 1000000
a := make([]byte, size)
bb := make([]byte, size)
b.SetBytes(int64(size))
b.StartTimer()
for i := 0; i < b.N; i++ {
andNotNaive(a, a, bb)
}
}
9 changes: 9 additions & 0 deletions benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/sh

set -ex

# ^.{2,6}$ is a hack to skip the .+Generic benchmarks
go test -run=^# -count=10 -bench="^Benchmark.{2,6}$" | tee asm
go test -run=^# -count=10 -bench="^Benchmark.{2,6}$" -tags purego | tee purego
go test -run=^# -count=10 -bench="^Benchmark.{2,6}Naive$" | sed --unbuffered 's/Naive//g' | tee naive
go run golang.org/x/perf/cmd/benchstat@latest naive purego asm
17 changes: 17 additions & 0 deletions memset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ import (
"testing"
)

func memsetNaive(dst []byte, b byte) {
for i := range dst {
dst[i] = b
}
}

func testMemset(t *testing.T, size int) {
a := make([]byte, size)
Memset(a, 0xff)
Expand Down Expand Up @@ -46,3 +52,14 @@ func BenchmarkMemsetGeneric(b *testing.B) {
memsetGeneric(a, 0xff)
}
}

func BenchmarkMemsetNaive(b *testing.B) {
b.StopTimer()
size := 1000000
a := make([]byte, size)
b.SetBytes(int64(size))
b.StartTimer()
for i := 0; i < b.N; i++ {
memsetNaive(a, 0xff)
}
}
20 changes: 20 additions & 0 deletions popcnt_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
package and

import (
"math/bits"
"math/rand/v2"
"testing"
)

func popcntNaive(a []byte) int {
var ret int
for i := range a {
ret += bits.OnesCount8(a[i])
}
return ret
}

func testPopcntAgainstGeneric(t *testing.T, size int) {
a := make([]byte, size)
rng := rand.New(rand.NewPCG(0, 0))
Expand Down Expand Up @@ -49,3 +58,14 @@ func BenchmarkPopcntGeneric(b *testing.B) {
_ = popcntGeneric(a)
}
}

func BenchmarkPopcntNaive(b *testing.B) {
b.StopTimer()
size := 1000000
a := make([]byte, size)
b.SetBytes(int64(size))
b.StartTimer()
for i := 0; i < b.N; i++ {
_ = popcntNaive(a)
}
}

0 comments on commit a9e80c1

Please sign in to comment.