From afeb21f7d255cb22eb46b1961e2cba9c7ed20ccf Mon Sep 17 00:00:00 2001 From: Matthew Rothenberg Date: Fri, 29 Dec 2023 13:01:49 -0500 Subject: [PATCH] refactor: uint64 backing, math/rand/v2, and remove PickSource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This a first step in refactoring towards a v3 release. A major semantic version release is required in order to let us fully remove the deprecated PickSource from our API. Switches to using the new math/rand/v2 module. Paving the way for the future. Removes PickSource method: As planned, this will remove the previously deprecated PickSource method that uses v1 of math/rand sources. Custom randomness source should be reintroduced in a future version using a different methodology (e.g. setting on the Chooser instead of with each function call). Switches the Chooser backing from being a system int to a uint64. This should result in defined behavior across 32 and 64 bit systems (with the potential for some performance regressions on 32 bit systems, which I consider an acceptable tradeoff). Internal implementation: removes the custom searchInts binary sort that was present for performance (ala github.com/mroth/xsort)in favor of slices.BinarySearch which is available as of go1.21 and hits acceptable performance as of go1.22. goos: darwin goarch: arm64 pkg: github.com/mroth/weightedrand/v2 │ v2-main.txt │ v3-dev1.txt │ │ sec/op │ sec/op vs base │ NewChooser/size=1e1-8 132.7n ± 0% 132.9n ± 0% ~ (p=0.314 n=6) NewChooser/size=1e2-8 476.1n ± 1% 472.8n ± 0% -0.70% (p=0.002 n=6) NewChooser/size=1e3-8 3.406µ ± 0% 3.412µ ± 0% ~ (p=0.379 n=6) NewChooser/size=1e4-8 31.19µ ± 1% 31.03µ ± 0% -0.51% (p=0.002 n=6) NewChooser/size=1e5-8 296.6µ ± 0% 295.9µ ± 0% ~ (p=0.394 n=6) NewChooser/size=1e6-8 2.843m ± 1% 2.843m ± 1% ~ (p=0.485 n=6) NewChooser/size=1e7-8 35.83m ± 1% 35.92m ± 1% ~ (p=0.485 n=6) Pick/size=1e1-8 22.49n ± 8% 20.28n ± 9% -9.80% (p=0.015 n=6) Pick/size=1e2-8 35.26n ± 2% 32.82n ± 2% -6.92% (p=0.002 n=6) Pick/size=1e3-8 48.41n ± 1% 45.38n ± 1% -6.26% (p=0.002 n=6) Pick/size=1e4-8 63.30n ± 1% 60.23n ± 2% -4.85% (p=0.002 n=6) Pick/size=1e5-8 85.92n ± 1% 82.53n ± 1% -3.95% (p=0.002 n=6) Pick/size=1e6-8 111.5n ± 1% 107.3n ± 4% -3.72% (p=0.013 n=6) Pick/size=1e7-8 240.7n ± 2% 233.2n ± 1% -3.10% (p=0.002 n=6) PickParallel/size=1e1-8 2.982n ± 6% 2.760n ± 5% -7.43% (p=0.009 n=6) PickParallel/size=1e2-8 4.679n ± 1% 4.360n ± 2% -6.81% (p=0.002 n=6) PickParallel/size=1e3-8 6.422n ± 2% 6.059n ± 1% -5.66% (p=0.002 n=6) PickParallel/size=1e4-8 8.463n ± 0% 8.114n ± 58% ~ (p=0.058 n=6) PickParallel/size=1e5-8 11.55n ± 3% 11.06n ± 0% -4.24% (p=0.002 n=6) PickParallel/size=1e6-8 14.98n ± 0% 14.40n ± 0% -3.87% (p=0.002 n=6) PickParallel/size=1e7-8 34.70n ± 0% 33.71n ± 0% -2.82% (p=0.002 n=6) PickSourceParallel/size=1e1-8 2.752n ± 10% PickSourceParallel/size=1e2-8 4.369n ± 2% PickSourceParallel/size=1e3-8 5.989n ± 1% PickSourceParallel/size=1e4-8 7.991n ± 2% PickSourceParallel/size=1e5-8 11.28n ± 0% PickSourceParallel/size=1e6-8 14.59n ± 0% PickSourceParallel/size=1e7-8 33.86n ± 0% geomean 120.0n 279.7n -3.59% ¹ ¹ benchmark set differs from baseline; geomeans may not be comparable --- weightedrand.go | 74 +++++---------------------------- weightedrand_test.go | 98 +++++++++++++++----------------------------- 2 files changed, 44 insertions(+), 128 deletions(-) diff --git a/weightedrand.go b/weightedrand.go index ec9a73b..09b9042 100644 --- a/weightedrand.go +++ b/weightedrand.go @@ -10,7 +10,9 @@ package weightedrand import ( "errors" - "math/rand" + "math" + "math/rand/v2" + "slices" "sort" ) @@ -33,8 +35,8 @@ func NewChoice[T any, W integer](item T, weight W) Choice[T, W] { // performance on repeated calls for weighted random selection. type Chooser[T any, W integer] struct { data []Choice[T, W] - totals []int - max int + totals []uint64 + max uint64 } // NewChooser initializes a new Chooser for picking from the provided choices. @@ -43,20 +45,15 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro return choices[i].Weight < choices[j].Weight }) - totals := make([]int, len(choices)) - runningTotal := 0 + totals := make([]uint64, len(choices)) + var runningTotal uint64 for i, c := range choices { if c.Weight < 0 { continue // ignore negative weights, can never be picked } - // case of single ~uint64 or similar value that exceeds maxInt on its own - if uint64(c.Weight) >= maxInt { - return nil, errWeightOverflow - } - - weight := int(c.Weight) // convert weight to int for internal counter usage - if (maxInt - runningTotal) <= weight { + weight := uint64(c.Weight) // convert weight to uint64 for internal counter usage + if (math.MaxUint64 - runningTotal) <= weight { return nil, errWeightOverflow } runningTotal += weight @@ -70,11 +67,6 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro return &Chooser[T, W]{data: choices, totals: totals, max: runningTotal}, nil } -const ( - intSize = 32 << (^uint(0) >> 63) // cf. strconv.IntSize - maxInt = 1<<(intSize-1) - 1 - maxUint64 = 1<<64 - 1 -) // Possible errors returned by NewChooser, preventing the creation of a Chooser // with unsafe runtime states. @@ -93,51 +85,7 @@ var ( // // Utilizes global rand as the source of randomness. Safe for concurrent usage. func (c Chooser[T, W]) Pick() T { - r := rand.Intn(c.max) + 1 - i := searchInts(c.totals, r) - return c.data[i].Item -} - -// PickSource returns a single weighted random Choice.Item from the Chooser, -// utilizing the provided *rand.Rand source rs for randomness. -// -// The primary use-case for this is avoid lock contention from the global random -// source if utilizing Chooser(s) from multiple goroutines in extremely -// high-throughput situations. -// -// It is the responsibility of the caller to ensure the provided rand.Source is -// free from thread safety issues. -// -// Deprecated: Since go1.21 global rand no longer suffers from lock contention -// when used in multiple high throughput goroutines, as long as you don't -// manually seed it. Use [Chooser.Pick] instead. -func (c Chooser[T, W]) PickSource(rs *rand.Rand) T { - r := rs.Intn(c.max) + 1 - i := searchInts(c.totals, r) + r := rand.Uint64N(c.max) + 1 + i, _ := slices.BinarySearch(c.totals, r) return c.data[i].Item } - -// The standard library sort.SearchInts() just wraps the generic sort.Search() -// function, which takes a function closure to determine truthfulness. However, -// since this function is utilized within a for loop, it cannot currently be -// properly inlined by the compiler, resulting in non-trivial performance -// overhead. -// -// Thus, this is essentially manually inlined version. In our use case here, it -// results in a significant throughput increase for Pick. -// -// See also github.com/mroth/xsort. -func searchInts(a []int, x int) int { - // Possible further future optimization for searchInts via SIMD if we want - // to write some Go assembly code: http://0x80.pl/articles/simd-search.html - i, j := 0, len(a) - for i < j { - h := int(uint(i+j) >> 1) // avoid overflow when computing h - if a[h] < x { - i = h + 1 - } else { - j = h - } - } - return i -} diff --git a/weightedrand_test.go b/weightedrand_test.go index 6dc642b..cda03ff 100644 --- a/weightedrand_test.go +++ b/weightedrand_test.go @@ -3,10 +3,8 @@ package weightedrand import ( "fmt" "math" - "math/rand" - "sync" + "math/rand/v2" "testing" - "time" ) /****************************************************************************** @@ -41,37 +39,42 @@ const ( func TestNewChooser(t *testing.T) { tests := []struct { name string - cs []Choice[rune, int] + cs []Choice[rune, int64] wantErr error }{ { name: "zero choices", - cs: []Choice[rune, int]{}, + cs: []Choice[rune, int64]{}, wantErr: errNoValidChoices, }, { name: "no choices with positive weight", - cs: []Choice[rune, int]{{Item: 'a', Weight: 0}, {Item: 'b', Weight: 0}}, + cs: []Choice[rune, int64]{{Item: 'a', Weight: 0}, {Item: 'b', Weight: 0}}, wantErr: errNoValidChoices, }, { name: "choice with weight equals 1", - cs: []Choice[rune, int]{{Item: 'a', Weight: 1}}, + cs: []Choice[rune, int64]{{Item: 'a', Weight: 1}}, wantErr: nil, }, { - name: "weight overflow", - cs: []Choice[rune, int]{{Item: 'a', Weight: maxInt/2 + 1}, {Item: 'b', Weight: maxInt/2 + 1}}, + name: "weight overflow", + cs: []Choice[rune, int64]{ + {Item: 'a', Weight: math.MaxInt64/2 + 1}, + {Item: 'b', Weight: math.MaxInt64/2 + 1}, + {Item: 'c', Weight: math.MaxInt64/2 + 1}, + {Item: 'd', Weight: math.MaxInt64/2 + 1}, + }, wantErr: errWeightOverflow, }, { name: "nominal case", - cs: []Choice[rune, int]{{Item: 'a', Weight: 1}, {Item: 'b', Weight: 2}}, + cs: []Choice[rune, int64]{{Item: 'a', Weight: 1}, {Item: 'b', Weight: 2}}, wantErr: nil, }, { name: "negative weight case", - cs: []Choice[rune, int]{{Item: 'a', Weight: 3}, {Item: 'b', Weight: -2}}, + cs: []Choice[rune, int64]{{Item: 'a', Weight: 3}, {Item: 'b', Weight: -2}}, wantErr: nil, }, } @@ -96,8 +99,24 @@ func TestNewChooser(t *testing.T) { wantErr error }{ { - name: "weight overflow from single uint64 exceeding system maxInt", - cs: []Choice[rune, uint64]{{Item: 'a', Weight: maxInt + 1}}, + name: "single uint64 equalling MaxUint64", + cs: []Choice[rune, uint64]{{Item: 'a', Weight: math.MaxUint64}}, + wantErr: errWeightOverflow, + }, + { + name: "single uint64 equalling MaxUint64 and a zero weight", + cs: []Choice[rune, uint64]{ + {Item: 'a', Weight: math.MaxUint64}, + {Item: 'b', Weight: 0}, + }, + wantErr: errWeightOverflow, + }, + { + name: "multiple uint64s with sum MaxUint64", + cs: []Choice[rune, uint64]{ + {Item: 'a', Weight: math.MaxUint64/2 + 1}, + {Item: 'b', Weight: math.MaxUint64/2 + 1}, + }, wantErr: errWeightOverflow, }, } @@ -139,38 +158,6 @@ func TestChooser_Pick(t *testing.T) { verifyFrequencyCounts(t, counts, choices) } -// TestChooser_PickSource is the same test methodology as TestChooser_Pick, but -// here we use the PickSource method and access the same chooser concurrently -// from multiple different goroutines, each providing its own source of -// randomness. -func TestChooser_PickSource(t *testing.T) { - choices := mockFrequencyChoices(t, testChoices) - chooser, err := NewChooser(choices...) - if err != nil { - t.Fatal(err) - } - t.Log("totals in chooser", chooser.totals) - - counts1 := make(map[int]int) - counts2 := make(map[int]int) - var wg sync.WaitGroup - wg.Add(2) - checker := func(counts map[int]int) { - defer wg.Done() - rs := rand.New(rand.NewSource(time.Now().UTC().UnixNano())) - for i := 0; i < testIterations/2; i++ { - c := chooser.PickSource(rs) - counts[c]++ - } - } - go checker(counts1) - go checker(counts2) - wg.Wait() - - verifyFrequencyCounts(t, counts1, choices) - verifyFrequencyCounts(t, counts2, choices) -} - // Similar to what is used in randutil test, but in randomized order to avoid // any issues with algorithms that are accidentally dependant on presorted data. func mockFrequencyChoices(t *testing.T, n int) []Choice[int, int] { @@ -259,30 +246,11 @@ func BenchmarkPickParallel(b *testing.B) { } } -func BenchmarkPickSourceParallel(b *testing.B) { - for n := BMMinChoices; n <= BMMaxChoices; n *= 10 { - b.Run(fmt.Sprintf("size=%s", fmt1eN(n)), func(b *testing.B) { - choices := mockChoices(n) - chooser, err := NewChooser(choices...) - if err != nil { - b.Fatal(err) - } - b.ResetTimer() - b.RunParallel(func(pb *testing.PB) { - rs := rand.New(rand.NewSource(time.Now().UTC().UnixNano())) - for pb.Next() { - _ = chooser.PickSource(rs) - } - }) - }) - } -} - func mockChoices(n int) []Choice[rune, int] { choices := make([]Choice[rune, int], 0, n) for i := 0; i < n; i++ { s := '🥑' - w := rand.Intn(10) + w := rand.IntN(10) c := NewChoice(s, w) choices = append(choices, c) }