From afeb21f7d255cb22eb46b1961e2cba9c7ed20ccf Mon Sep 17 00:00:00 2001
From: Matthew Rothenberg <mrothenberg@gmail.com>
Date: Fri, 29 Dec 2023 13:01:49 -0500
Subject: [PATCH] refactor: uint64 backing, math/rand/v2, and remove PickSource
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This a first step in refactoring towards a v3 release.  A major semantic
version release is required in order to let us fully remove the
deprecated PickSource from our API.

Switches to using the new math/rand/v2 module.  Paving the way for the
future.

Removes PickSource method: As planned, this will remove the previously
deprecated PickSource method that uses v1 of math/rand sources. Custom
randomness source should be reintroduced in a future version using a
different methodology (e.g. setting on the Chooser instead of with each
function call).

Switches the Chooser backing from being a system int to a uint64. This
should result in defined behavior across 32 and 64 bit systems (with
the potential for some performance regressions on 32 bit systems, which
I consider an acceptable tradeoff).

Internal implementation: removes the custom searchInts binary sort that
was present for performance (ala github.com/mroth/xsort)in favor of
slices.BinarySearch which is available as of go1.21 and hits acceptable
performance as of go1.22.

goos: darwin
goarch: arm64
pkg: github.com/mroth/weightedrand/v2
                              │ v2-main.txt  │             v3-dev1.txt              │
                              │    sec/op    │    sec/op     vs base                │
NewChooser/size=1e1-8           132.7n ±  0%   132.9n ±  0%       ~ (p=0.314 n=6)
NewChooser/size=1e2-8           476.1n ±  1%   472.8n ±  0%  -0.70% (p=0.002 n=6)
NewChooser/size=1e3-8           3.406µ ±  0%   3.412µ ±  0%       ~ (p=0.379 n=6)
NewChooser/size=1e4-8           31.19µ ±  1%   31.03µ ±  0%  -0.51% (p=0.002 n=6)
NewChooser/size=1e5-8           296.6µ ±  0%   295.9µ ±  0%       ~ (p=0.394 n=6)
NewChooser/size=1e6-8           2.843m ±  1%   2.843m ±  1%       ~ (p=0.485 n=6)
NewChooser/size=1e7-8           35.83m ±  1%   35.92m ±  1%       ~ (p=0.485 n=6)
Pick/size=1e1-8                 22.49n ±  8%   20.28n ±  9%  -9.80% (p=0.015 n=6)
Pick/size=1e2-8                 35.26n ±  2%   32.82n ±  2%  -6.92% (p=0.002 n=6)
Pick/size=1e3-8                 48.41n ±  1%   45.38n ±  1%  -6.26% (p=0.002 n=6)
Pick/size=1e4-8                 63.30n ±  1%   60.23n ±  2%  -4.85% (p=0.002 n=6)
Pick/size=1e5-8                 85.92n ±  1%   82.53n ±  1%  -3.95% (p=0.002 n=6)
Pick/size=1e6-8                 111.5n ±  1%   107.3n ±  4%  -3.72% (p=0.013 n=6)
Pick/size=1e7-8                 240.7n ±  2%   233.2n ±  1%  -3.10% (p=0.002 n=6)
PickParallel/size=1e1-8         2.982n ±  6%   2.760n ±  5%  -7.43% (p=0.009 n=6)
PickParallel/size=1e2-8         4.679n ±  1%   4.360n ±  2%  -6.81% (p=0.002 n=6)
PickParallel/size=1e3-8         6.422n ±  2%   6.059n ±  1%  -5.66% (p=0.002 n=6)
PickParallel/size=1e4-8         8.463n ±  0%   8.114n ± 58%       ~ (p=0.058 n=6)
PickParallel/size=1e5-8         11.55n ±  3%   11.06n ±  0%  -4.24% (p=0.002 n=6)
PickParallel/size=1e6-8         14.98n ±  0%   14.40n ±  0%  -3.87% (p=0.002 n=6)
PickParallel/size=1e7-8         34.70n ±  0%   33.71n ±  0%  -2.82% (p=0.002 n=6)
PickSourceParallel/size=1e1-8   2.752n ± 10%
PickSourceParallel/size=1e2-8   4.369n ±  2%
PickSourceParallel/size=1e3-8   5.989n ±  1%
PickSourceParallel/size=1e4-8   7.991n ±  2%
PickSourceParallel/size=1e5-8   11.28n ±  0%
PickSourceParallel/size=1e6-8   14.59n ±  0%
PickSourceParallel/size=1e7-8   33.86n ±  0%
geomean                         120.0n         279.7n        -3.59%               ¹
¹ benchmark set differs from baseline; geomeans may not be comparable
---
 weightedrand.go      | 74 +++++----------------------------
 weightedrand_test.go | 98 +++++++++++++++-----------------------------
 2 files changed, 44 insertions(+), 128 deletions(-)

diff --git a/weightedrand.go b/weightedrand.go
index ec9a73b..09b9042 100644
--- a/weightedrand.go
+++ b/weightedrand.go
@@ -10,7 +10,9 @@ package weightedrand
 
 import (
 	"errors"
-	"math/rand"
+	"math"
+	"math/rand/v2"
+	"slices"
 	"sort"
 )
 
@@ -33,8 +35,8 @@ func NewChoice[T any, W integer](item T, weight W) Choice[T, W] {
 // performance on repeated calls for weighted random selection.
 type Chooser[T any, W integer] struct {
 	data   []Choice[T, W]
-	totals []int
-	max    int
+	totals []uint64
+	max    uint64
 }
 
 // NewChooser initializes a new Chooser for picking from the provided choices.
@@ -43,20 +45,15 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro
 		return choices[i].Weight < choices[j].Weight
 	})
 
-	totals := make([]int, len(choices))
-	runningTotal := 0
+	totals := make([]uint64, len(choices))
+	var runningTotal uint64
 	for i, c := range choices {
 		if c.Weight < 0 {
 			continue // ignore negative weights, can never be picked
 		}
 
-		// case of single ~uint64 or similar value that exceeds maxInt on its own
-		if uint64(c.Weight) >= maxInt {
-			return nil, errWeightOverflow
-		}
-
-		weight := int(c.Weight) // convert weight to int for internal counter usage
-		if (maxInt - runningTotal) <= weight {
+		weight := uint64(c.Weight) // convert weight to uint64 for internal counter usage
+		if (math.MaxUint64 - runningTotal) <= weight {
 			return nil, errWeightOverflow
 		}
 		runningTotal += weight
@@ -70,11 +67,6 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro
 	return &Chooser[T, W]{data: choices, totals: totals, max: runningTotal}, nil
 }
 
-const (
-	intSize   = 32 << (^uint(0) >> 63) // cf. strconv.IntSize
-	maxInt    = 1<<(intSize-1) - 1
-	maxUint64 = 1<<64 - 1
-)
 
 // Possible errors returned by NewChooser, preventing the creation of a Chooser
 // with unsafe runtime states.
@@ -93,51 +85,7 @@ var (
 //
 // Utilizes global rand as the source of randomness. Safe for concurrent usage.
 func (c Chooser[T, W]) Pick() T {
-	r := rand.Intn(c.max) + 1
-	i := searchInts(c.totals, r)
-	return c.data[i].Item
-}
-
-// PickSource returns a single weighted random Choice.Item from the Chooser,
-// utilizing the provided *rand.Rand source rs for randomness.
-//
-// The primary use-case for this is avoid lock contention from the global random
-// source if utilizing Chooser(s) from multiple goroutines in extremely
-// high-throughput situations.
-//
-// It is the responsibility of the caller to ensure the provided rand.Source is
-// free from thread safety issues.
-//
-// Deprecated: Since go1.21 global rand no longer suffers from lock contention
-// when used in multiple high throughput goroutines, as long as you don't
-// manually seed it. Use [Chooser.Pick] instead.
-func (c Chooser[T, W]) PickSource(rs *rand.Rand) T {
-	r := rs.Intn(c.max) + 1
-	i := searchInts(c.totals, r)
+	r := rand.Uint64N(c.max) + 1
+	i, _ := slices.BinarySearch(c.totals, r)
 	return c.data[i].Item
 }
-
-// The standard library sort.SearchInts() just wraps the generic sort.Search()
-// function, which takes a function closure to determine truthfulness. However,
-// since this function is utilized within a for loop, it cannot currently be
-// properly inlined by the compiler, resulting in non-trivial performance
-// overhead.
-//
-// Thus, this is essentially manually inlined version.  In our use case here, it
-// results in a significant throughput increase for Pick.
-//
-// See also github.com/mroth/xsort.
-func searchInts(a []int, x int) int {
-	// Possible further future optimization for searchInts via SIMD if we want
-	// to write some Go assembly code: http://0x80.pl/articles/simd-search.html
-	i, j := 0, len(a)
-	for i < j {
-		h := int(uint(i+j) >> 1) // avoid overflow when computing h
-		if a[h] < x {
-			i = h + 1
-		} else {
-			j = h
-		}
-	}
-	return i
-}
diff --git a/weightedrand_test.go b/weightedrand_test.go
index 6dc642b..cda03ff 100644
--- a/weightedrand_test.go
+++ b/weightedrand_test.go
@@ -3,10 +3,8 @@ package weightedrand
 import (
 	"fmt"
 	"math"
-	"math/rand"
-	"sync"
+	"math/rand/v2"
 	"testing"
-	"time"
 )
 
 /******************************************************************************
@@ -41,37 +39,42 @@ const (
 func TestNewChooser(t *testing.T) {
 	tests := []struct {
 		name    string
-		cs      []Choice[rune, int]
+		cs      []Choice[rune, int64]
 		wantErr error
 	}{
 		{
 			name:    "zero choices",
-			cs:      []Choice[rune, int]{},
+			cs:      []Choice[rune, int64]{},
 			wantErr: errNoValidChoices,
 		},
 		{
 			name:    "no choices with positive weight",
-			cs:      []Choice[rune, int]{{Item: 'a', Weight: 0}, {Item: 'b', Weight: 0}},
+			cs:      []Choice[rune, int64]{{Item: 'a', Weight: 0}, {Item: 'b', Weight: 0}},
 			wantErr: errNoValidChoices,
 		},
 		{
 			name:    "choice with weight equals 1",
-			cs:      []Choice[rune, int]{{Item: 'a', Weight: 1}},
+			cs:      []Choice[rune, int64]{{Item: 'a', Weight: 1}},
 			wantErr: nil,
 		},
 		{
-			name:    "weight overflow",
-			cs:      []Choice[rune, int]{{Item: 'a', Weight: maxInt/2 + 1}, {Item: 'b', Weight: maxInt/2 + 1}},
+			name: "weight overflow",
+			cs: []Choice[rune, int64]{
+				{Item: 'a', Weight: math.MaxInt64/2 + 1},
+				{Item: 'b', Weight: math.MaxInt64/2 + 1},
+				{Item: 'c', Weight: math.MaxInt64/2 + 1},
+				{Item: 'd', Weight: math.MaxInt64/2 + 1},
+			},
 			wantErr: errWeightOverflow,
 		},
 		{
 			name:    "nominal case",
-			cs:      []Choice[rune, int]{{Item: 'a', Weight: 1}, {Item: 'b', Weight: 2}},
+			cs:      []Choice[rune, int64]{{Item: 'a', Weight: 1}, {Item: 'b', Weight: 2}},
 			wantErr: nil,
 		},
 		{
 			name:    "negative weight case",
-			cs:      []Choice[rune, int]{{Item: 'a', Weight: 3}, {Item: 'b', Weight: -2}},
+			cs:      []Choice[rune, int64]{{Item: 'a', Weight: 3}, {Item: 'b', Weight: -2}},
 			wantErr: nil,
 		},
 	}
@@ -96,8 +99,24 @@ func TestNewChooser(t *testing.T) {
 		wantErr error
 	}{
 		{
-			name:    "weight overflow from single uint64 exceeding system maxInt",
-			cs:      []Choice[rune, uint64]{{Item: 'a', Weight: maxInt + 1}},
+			name:    "single uint64 equalling MaxUint64",
+			cs:      []Choice[rune, uint64]{{Item: 'a', Weight: math.MaxUint64}},
+			wantErr: errWeightOverflow,
+		},
+		{
+			name: "single uint64 equalling MaxUint64 and a zero weight",
+			cs: []Choice[rune, uint64]{
+				{Item: 'a', Weight: math.MaxUint64},
+				{Item: 'b', Weight: 0},
+			},
+			wantErr: errWeightOverflow,
+		},
+		{
+			name: "multiple uint64s with sum MaxUint64",
+			cs: []Choice[rune, uint64]{
+				{Item: 'a', Weight: math.MaxUint64/2 + 1},
+				{Item: 'b', Weight: math.MaxUint64/2 + 1},
+			},
 			wantErr: errWeightOverflow,
 		},
 	}
@@ -139,38 +158,6 @@ func TestChooser_Pick(t *testing.T) {
 	verifyFrequencyCounts(t, counts, choices)
 }
 
-// TestChooser_PickSource is the same test methodology as TestChooser_Pick, but
-// here we use the PickSource method and access the same chooser concurrently
-// from multiple different goroutines, each providing its own source of
-// randomness.
-func TestChooser_PickSource(t *testing.T) {
-	choices := mockFrequencyChoices(t, testChoices)
-	chooser, err := NewChooser(choices...)
-	if err != nil {
-		t.Fatal(err)
-	}
-	t.Log("totals in chooser", chooser.totals)
-
-	counts1 := make(map[int]int)
-	counts2 := make(map[int]int)
-	var wg sync.WaitGroup
-	wg.Add(2)
-	checker := func(counts map[int]int) {
-		defer wg.Done()
-		rs := rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
-		for i := 0; i < testIterations/2; i++ {
-			c := chooser.PickSource(rs)
-			counts[c]++
-		}
-	}
-	go checker(counts1)
-	go checker(counts2)
-	wg.Wait()
-
-	verifyFrequencyCounts(t, counts1, choices)
-	verifyFrequencyCounts(t, counts2, choices)
-}
-
 // Similar to what is used in randutil test, but in randomized order to avoid
 // any issues with algorithms that are accidentally dependant on presorted data.
 func mockFrequencyChoices(t *testing.T, n int) []Choice[int, int] {
@@ -259,30 +246,11 @@ func BenchmarkPickParallel(b *testing.B) {
 	}
 }
 
-func BenchmarkPickSourceParallel(b *testing.B) {
-	for n := BMMinChoices; n <= BMMaxChoices; n *= 10 {
-		b.Run(fmt.Sprintf("size=%s", fmt1eN(n)), func(b *testing.B) {
-			choices := mockChoices(n)
-			chooser, err := NewChooser(choices...)
-			if err != nil {
-				b.Fatal(err)
-			}
-			b.ResetTimer()
-			b.RunParallel(func(pb *testing.PB) {
-				rs := rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
-				for pb.Next() {
-					_ = chooser.PickSource(rs)
-				}
-			})
-		})
-	}
-}
-
 func mockChoices(n int) []Choice[rune, int] {
 	choices := make([]Choice[rune, int], 0, n)
 	for i := 0; i < n; i++ {
 		s := '🥑'
-		w := rand.Intn(10)
+		w := rand.IntN(10)
 		c := NewChoice(s, w)
 		choices = append(choices, c)
 	}