Skip to content

Commit

Permalink
Merge pull request #2 from k8gb-io/handle-zero-records
Browse files Browse the repository at this point in the history
Handling zero PDF items
  • Loading branch information
kuritka authored Aug 25, 2022
2 parents a725274 + abd9519 commit 5fdf4f1
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 33 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tag.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,5 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
RELEASE_BRANCHES: main
WITH_V: true
DEFAULT_BUMP: patch
DEFAULT_BUMP: minor
# PRERELEASE_SUFFIX: beta
19 changes: 15 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ go get github.com/k8gb-io/go-weight-shuffling
// pdf requires to be 100% in total.
pdf := []int{30, 40, 20, 10}
// handle error in real code
ws, _ := NewWS(pdf)
ws, _ := gows.NewWS(pdf)
// the index is selected from the probability determined by the pdf
index := ws.Pick()
```
Expand All @@ -42,15 +42,15 @@ while it will return 0 or 2 in about 10 out of 100 cases.

**The only condition is that the sum of all values in the PDF is always equal to 100!**

## PickVector() Usage
## PickVector(Settings) Usage

```go
// pdf requires to be 100% in total.
pdf := []int{30, 40, 20, 10}
// handle error in real code
ws, _ := NewWS(pdf)
ws, _ := gows.NewWS(pdf)
// the result will be slices of the index, which will be "probably" sorted by probability
indexes := wrr.PickVector()
indexes := wrr.PickVector(gows.KeepIndexesForZeroPDF)
```

A bit more complex case is when you need to shuffle the indexes in the array to match the PDF instead of one element.
Expand All @@ -66,6 +66,17 @@ vector. For example, for `PDF={30,40,20,10}` the result will be like this:
the function returns an index slice such that index0 will be represented in the zero position in about 30% of cases,
index1 will be in the first position in about 40% of cases, etc.

### PickVector settings argument
The Settings argument defines how the PickVector function will return indexes. Imagine you have
a PDF for three different parts and you set one of them to 0 (just turn it off, because the
probability of this index will be 0). The solution is not universal, each use-case requires
different behavior. Currently we define two versions of the behavior.

- `KeepIndexesForZeroPDF` keeps indexes for zero pdf elements; e.g: for `pdf=[0,50,50,0,0,0]` returns `[1,2,0,3,4,5]` or `[2,1,0,3,4,5]`
- `IgnoreIndexesForZeroPDF` filter indexes for zero pdf elements; e.g: for `pdf=[0,50,50,0,0,0]` returns `[1,2]` or `[2,1]`

Translated with www.DeepL.com/Translator (free version)

## Examples
This library is ideal for Weight RoundRobin. Imagine you need to balance these addresses (can be applied to whole groups
of addresses):
Expand Down
80 changes: 53 additions & 27 deletions gows/ws.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,54 +22,70 @@ import (

// WS Weight Round Robin Alghoritm
type WS struct {
pdf []int
index100 int
pdf touples
filteredPDFFromZeroElements touples
}

type touples []struct {
index int
percentage int
}

//The Settings argument defines how the PickVector function will return indexes
type Settings int

const (
// KeepIndexesForZeroPDF keeps indexes for zero pdf elements.
// e.g: for pdf=[0,50,50,0,0,0] may return [1,2,0,3,4,5] or [2,1,0,3,4,5]
KeepIndexesForZeroPDF Settings = iota

// IgnoreIndexesForZeroPDF filter indexes for zero pdf elements.
// e.g: for pdf=[0,50,50,0,0,0] may return [1,2] or [2,1]
IgnoreIndexesForZeroPDF
)

// NewWS instantiate weight round robin
func NewWS(pdf []int) (wrr *WS, err error) {
r := 0
max100 := -1
wrr = new(WS)
for i, v := range pdf {
if v == 100 {
max100 = i
}
r += v
if v < 0 || v > 100 {
return wrr, fmt.Errorf("value %v out of range [0;100]", v)
}
t := struct {
index int
percentage int
}{i, v}
if v != 0 {
wrr.filteredPDFFromZeroElements = append(wrr.filteredPDFFromZeroElements, t)
}
wrr.pdf = append(wrr.pdf, t)
}
if r != 100 {
return wrr, fmt.Errorf("sum of pdf elements must be equal to 100 perent")
}
rand.Seed(time.Now().UnixNano())
wrr = new(WS)
wrr.pdf = pdf
wrr.index100 = max100
return wrr, nil
}

// PickVector returns slice shuffled by pdf distribution.
// The item with the highest probability will occur more often
// at the position that has the highest probability in the PDF
// see README.md
func (w *WS) PickVector() (indexes []int) {
if w.index100 != -1 {
return w.handle100()
}

pdf := make([]int, len(w.pdf))
copy(pdf, w.pdf)
func (w *WS) PickVector(settings Settings) (indexes []int) {
pdf := make(touples, len(w.filteredPDFFromZeroElements))
copy(pdf, w.filteredPDFFromZeroElements)
balance := 100
for i := 0; i < len(pdf); i++ {
cdf := w.getCDF(pdf)
index := w.pick(cdf, balance)
indexes = append(indexes, index)

balance -= pdf[index]
pdf[index] = 0
balance -= pdf[index].percentage
pdf[index].percentage = 0
}
return indexes
return w.indexes(settings, indexes)
}

// Pick returns one index with probability given by pdf
Expand All @@ -80,32 +96,42 @@ func (w *WS) Pick() int {
}

// pick one index
func (w *WS) pick(cdf []int, n int) int {
func (w *WS) pick(cdf touples, n int) int {
r := rand.Intn(n)
index := 0
for r >= cdf[index] {
for r >= cdf[index].percentage {
index++
}
return index
}

func (w *WS) getCDF(pdf []int) (cdf []int) {
func (w *WS) getCDF(pdf touples) (cdf touples) {
// prepare cdf
for i := 0; i < len(pdf); i++ {
cdf = append(cdf, 0)
cdf = append(cdf, struct {
index int
percentage int
}{index: 0, percentage: 0})
}
cdf[0] = pdf[0]
for i := 1; i < len(pdf); i++ {
cdf[i] = cdf[i-1] + pdf[i]
cdf[i].percentage = cdf[i-1].percentage + pdf[i].percentage
}
return cdf
}

// there is no reason to calculate CDF and recompute PDF's if some field has 100%
func (w *WS) handle100() (indexes []int) {
func (w *WS) indexes(settings Settings, calculatedIndexes []int) (indexes []int) {
if settings == IgnoreIndexesForZeroPDF {
for _, v := range calculatedIndexes {
indexes = append(indexes, w.filteredPDFFromZeroElements[v].index)
}
return indexes
}
for i := 0; i < len(w.pdf); i++ {
indexes = append(indexes, i)
}
indexes[0], indexes[w.index100] = indexes[w.index100], indexes[0]
for i, v := range calculatedIndexes {
indexes[i], indexes[w.filteredPDFFromZeroElements[v].index] = indexes[w.filteredPDFFromZeroElements[v].index], indexes[i]
}
return indexes
}
70 changes: 69 additions & 1 deletion gows/ws_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ func TestInit(t *testing.T) {
{"hundred", []int{0, 0, 100}, true},
{"hundred", []int{100, 0}, true},
{"hundred", []int{100}, true},
{"50 50 0", []int{50, 50, 0}, true},
{"50 50 0 0", []int{50, 50, 0, 0}, true},
{"50 0 50 0", []int{50, 0, 50, 0}, true},
{"0 50 0 50", []int{0, 50, 0, 50}, true},
{"50 0 0 50", []int{50, 0, 0, 50}, true},
{"0 0 50 0 0 50 0 0", []int{0, 0, 50, 0, 0, 50, 0, 0}, true},
}
for _, test := range tests {
t.Run(fmt.Sprintf("%s: %v", test.name, test.pdf), func(t *testing.T) {
Expand Down Expand Up @@ -67,6 +73,7 @@ func TestPick(t *testing.T) {
{"multiple zeros", []int{100, 0, 0}, 0},
{"multiple zeros", []int{0, 100, 0}, 0},
{"multiple zeros", []int{0, 0, 100}, 0},
{"50 50 0", []int{50, 50, 0}, 20},
}

for _, test := range tests {
Expand Down Expand Up @@ -104,6 +111,12 @@ func TestPickVector(t *testing.T) {
{"multiple zeros", []int{0, 100, 0}, 0},
{"multiple zeros", []int{0, 0, 100}, 0},
{"multiple zeros", []int{0, 0, 0, 100, 0, 0}, 0},
{"50 50 0", []int{50, 50, 0}, 5},
{"50 50 0 0", []int{50, 50, 0, 0}, 5},
{"50 0 50 0", []int{50, 0, 50, 0}, 5},
{"0 50 0 50", []int{0, 50, 0, 50}, 5},
{"50 0 0 50", []int{50, 0, 0, 50}, 5},
{"0 0 50 0 0 50 0 0", []int{0, 0, 50, 0, 0, 50, 0, 0}, 5},
}
for _, test := range tests {
t.Run(fmt.Sprintf("%s: %v", test.name, test.pdf), func(t *testing.T) {
Expand All @@ -117,7 +130,7 @@ func TestPickVector(t *testing.T) {

for i := 0; i < n; i++ {

indexes := wrr.PickVector()
indexes := wrr.PickVector(KeepIndexesForZeroPDF)
for _, v := range indexes {
assert.True(t, v >= 0 && v < len(test.pdf), "Pick returned index out of range")
}
Expand All @@ -140,6 +153,61 @@ func TestPickVector(t *testing.T) {
}
}

func TestSettings(t *testing.T) {
tests := []struct {
name string
pdf []int
expectedIndexValues []int
settings Settings
}{
{"happy distribution -Ignore", []int{30, 40, 20, 10}, []int{0, 1, 2, 3}, IgnoreIndexesForZeroPDF},
{"happy distribution - Keep", []int{30, 40, 20, 10}, []int{0, 1, 2, 3}, KeepIndexesForZeroPDF},

{"one element - Ignore", []int{100}, []int{0}, 0},
{"one element - Keep", []int{100}, []int{0}, 0},

{"one zero - Ignore", []int{100, 0}, []int{0}, IgnoreIndexesForZeroPDF},
{"one zero - Keep ", []int{100, 0}, []int{0, 1}, KeepIndexesForZeroPDF},
{"0 100 0 - Ignore", []int{0, 100, 0}, []int{1}, IgnoreIndexesForZeroPDF},
{"0 100 0 - Keep ", []int{0, 100, 0}, []int{0, 1, 2}, KeepIndexesForZeroPDF},

{"0 50 0 50 - Ignore ", []int{0, 50, 0, 50}, []int{1, 3}, IgnoreIndexesForZeroPDF},
{"0 50 0 50 - Keep ", []int{0, 50, 0, 50}, []int{0, 1, 2, 3}, KeepIndexesForZeroPDF},

{"0 0 50 0 0 50 0 0 - Ignore", []int{0, 0, 50, 0, 0, 50, 0, 0}, []int{2, 5}, IgnoreIndexesForZeroPDF},
{"0 0 50 0 0 50 0 0 - Keep", []int{0, 0, 50, 0, 0, 50, 0, 0}, []int{0, 1, 2, 3, 4, 5, 6, 7}, KeepIndexesForZeroPDF},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
wrr, err := NewWS(test.pdf)
require.NoError(t, err)
for x := 0; x < 2; x++ {
indexes := wrr.PickVector(test.settings)
assert.True(t, containsValues(indexes, test.expectedIndexValues), "%v %v", indexes, test.expectedIndexValues)
}
})
}
}

// slice a contains same values as defined in slice b.
// the values could be in different order but must be present in both slices
func containsValues(a, b []int) bool {
if len(a) != len(b) {
return false
}
m := make(map[int]int, len(a))
for i := 0; i < len(a); i++ {
m[a[i]]++
m[b[i]]++
}
for _, v := range m {
if v != 2 {
return false
}
}
return true
}

func sum(result []int) (sum int) {
for _, v := range result {
sum += v
Expand Down

0 comments on commit 5fdf4f1

Please sign in to comment.