Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
sgreben committed Oct 5, 2024
0 parents commit 9de5c16
Show file tree
Hide file tree
Showing 16 changed files with 839 additions and 0 deletions.
11 changes: 11 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file

version: 2
updates:
- package-ecosystem: "gomod" # See documentation for possible values
directory: "/" # Location of package manifests
schedule:
interval: "daily"
30 changes: 30 additions & 0 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# This workflow will build a golang project
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go

name: Go

on:
push:
branches: ["main"]
pull_request:
branches: ["main"]

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: "1.23"

- name: Build
run: go build -v ./...

- name: Test
run: go test -v ./...

- name: Coverage
run: go test -v -cover ./...
53 changes: 53 additions & 0 deletions .github/workflows/gocover.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Go coverage badge # The name of the workflow that will appear on Github

on:
push:
branches: [main]
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
go: [1.23]
steps:
- uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go }}

- name: Build
run: go install

- name: Test
run: |
go test -v -cover ./... -coverprofile coverage.out -coverpkg ./...
go tool cover -func coverage.out -o coverage.out # Replaces coverage.out with the analysis of coverage.out
- name: Go Coverage Badge
uses: tj-actions/coverage-badge-go@v2
if: ${{ runner.os == 'Linux' && matrix.go == '1.23' }} # Runs this on only one of the ci builds.
with:
green: 80
filename: coverage.out
link: https://github.com/keilerkonzept/bitknn/actions/workflows/gocover.yaml

- uses: stefanzweifel/git-auto-commit-action@v5
id: auto-commit-action
with:
commit_message: Apply Code Coverage Badge
skip_fetch: true
skip_checkout: true
file_pattern: ./README.md

- name: Push Changes
if: steps.auto-commit-action.outputs.changes_detected == 'true'
uses: ad-m/github-push-action@master
with:
github_token: ${{ github.token }}
branch: ${{ github.ref }}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.out
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2024 KEILERKONZEPT UG (haftungsbeschränkt)

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
79 changes: 79 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# bitknn

[![Go Reference](https://pkg.go.dev/badge/github.com/keilerkonzept/bitknn.svg)](https://pkg.go.dev/github.com/keilerkonzept/bitknn)
[![Go Report Card](https://goreportcard.com/badge/github.com/keilerkonzept/bitknn)](https://goreportcard.com/report/github.com/keilerkonzept/bitknn)


```go
import "github.com/keilerkonzept/bitknn"
```

`bitknn` is a fast k-nearest neighbors (k-NN) library for `uint64`s, using Hamming distance to measure similarity.

If you need to classify **binary feature vectors that fit into `uint64`s**, this library might be useful. It is fast mainly because we can use cheap bitwise ops (XOR + POPCNT) to calculate distances between `uint64` values. For smaller datasets, the performance of the [neighbor heap](heap.go) is also relevant, and so this part has been tuned here also.

You can optionally weigh class votes by distance, or specify different vote values per data point.


**Contents**
- [Usage](#usage)
- [Options](#options)
- [License](#license)

## Usage

```go
package main

import (
"fmt"
"github.com/keilerkonzept/bitknn"
)

func main() {
// feature vectors packed into uint64s
data := []uint64{0b101010, 0b111000, 0b000111}
// class labels
labels := []int{0, 1, 1}

model := bitknn.Fit(data, labels, 2, bitknn.WithLinearDecay())

// one vote counter per class
votes := make([]float64, 2)
model.Predict1(0b101011, votes)

fmt.Println("Votes:", votes)
}
```

## Options

- `WithLinearDecay()`: Apply linear distance weighting (`1 / (1 + dist)`).
- `WithQuadraticDecay()`: Apply quadratic distance weighting (`1 / (1 + dist^2)`).
- `WithDistanceWeightFunc(f func(dist int) float64)`: Use a custom distance weighting function.
- `WithValues(values []float64)`: Assign specific vote values for each data point.

## Benchmarks

```
goos: darwin
goarch: arm64
pkg: github.com/keilerkonzept/bitknn
cpu: Apple M1 Pro
```

| op | N | k | iters | ns/op | B/op | allocs/op |
|------------|---------|-----|---------|--------------|------|-----------|
| `Predict1` | 100 | 3 | 8308794 | 121.4 ns/op | 0 | 0 |
| `Predict1` | 100 | 10 | 4707778 | 269.7 ns/op | 0 | 0 |
| `Predict1` | 100 | 100 | 2255380 | 549.2 ns/op | 0 | 0 |
| `Predict1` | 1000 | 3 | 1693364 | 659.3 ns/op | 0 | 0 |
| `Predict1` | 1000 | 10 | 1220426 | 1005 ns/op | 0 | 0 |
| `Predict1` | 1000 | 100 | 345151 | 3560 ns/op | 0 | 0 |
| `Predict1` | 1000000 | 3 | 2076 | 566647 ns/op | 0 | 0 |
| `Predict1` | 1000000 | 10 | 2112 | 568787 ns/op | 0 | 0 |
| `Predict1` | 1000000 | 100 | 2066 | 587827 ns/op | 0 | 0 |

## License

MIT License
5 changes: 5 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module github.com/keilerkonzept/bitknn

go 1.23.0

require github.com/google/go-cmp v0.6.0
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
78 changes: 78 additions & 0 deletions heap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package bitknn

import "unsafe"

// neighborHeap is a max-heap that stores distances and their corresponding indices.
// The heap is used to keep track of nearest neighbors.
type neighborHeap struct {
distances []int
lastDistance *int
indices []int
lastIndex *int
len int
}

const unsafeSizeofInt = unsafe.Sizeof(int(0))

func makeNeighborHeap(distances, indices []int) neighborHeap {
return neighborHeap{
distances: distances,
lastDistance: (*int)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(distances)), unsafeSizeofInt*uintptr(len(distances)-1))),
indices: indices,
lastIndex: (*int)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(indices)), unsafeSizeofInt*uintptr(len(indices)-1))),
}
}

func (me *neighborHeap) swap(i, j int) {
me.distances[i], me.distances[j] = me.distances[j], me.distances[i]
me.indices[i], me.indices[j] = me.indices[j], me.indices[i]
}

func (me *neighborHeap) less(i, j int) bool {
return me.distances[i] > me.distances[j]
}

func (me *neighborHeap) pushpop(value int, index int) {
n := me.len
*me.lastDistance = value
*me.lastIndex = index
me.up(n)
me.swap(0, n)

// me.down(0, n)
i := 0
for {
l := 2*i + 1 // Left child
if l >= n || l < 0 { // If no left child, break
break
}
j := l
if r := l + 1; r < n && me.less(r, l) { // If right child exists and is smaller, select right child
j = r
}
if !me.less(j, i) { // If parent is smaller than selected child, break
break
}
me.swap(i, j) // Swap parent with child
i = j // Continue pushing down
}
}

func (me *neighborHeap) push(value int, index int) {
n := me.len
me.distances[n] = value
me.indices[n] = index
me.len = n + 1
me.up(n)
}

func (me *neighborHeap) up(i int) {
for {
p := (i - 1) / 2 // Parent index
if p == i || !me.less(i, p) { // If parent is larger or i is root, stop
break
}
me.swap(p, i) // Swap child with parent
i = p // Continue moving up
}
}
97 changes: 97 additions & 0 deletions heap_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package bitknn

import (
"testing"
)

func TestMakeNeighborHeap(t *testing.T) {
distances := []int{10, 20, 30}
indices := []int{1, 2, 3}
heap := makeNeighborHeap(distances, indices)

// Check if lastDistance and lastIndex are pointing to the correct elements
if *heap.lastDistance != 30 {
t.Errorf("Expected lastDistance to be 30, got %d", *heap.lastDistance)
}
if *heap.lastIndex != 3 {
t.Errorf("Expected lastIndex to be 3, got %d", *heap.lastIndex)
}
}

func TestNeighborHeapSwap(t *testing.T) {
heap := neighborHeap{
distances: []int{10, 20, 30},
indices: []int{1, 2, 3},
}

heap.swap(0, 2)

if heap.distances[0] != 30 || heap.distances[2] != 10 {
t.Errorf("Swap failed on distances, got %v", heap.distances)
}
if heap.indices[0] != 3 || heap.indices[2] != 1 {
t.Errorf("Swap failed on indices, got %v", heap.indices)
}
}

func TestNeighborHeapLess(t *testing.T) {
heap := neighborHeap{
distances: []int{10, 20, 30},
indices: []int{1, 2, 3},
}

if !heap.less(2, 0) {
t.Errorf("Expected less(2, 0) to be true, got false")
}

if heap.less(0, 2) {
t.Errorf("Expected less(0, 2) to be false, got true")
}
}

func TestNeighborHeapPushPop(t *testing.T) {
distances := []int{30, 20, 10, 0}
indices := []int{1, 2, 3, 0}
heap := makeNeighborHeap(distances, indices)
heap.len = 3

heap.pushpop(25, 4)

// Check if heap is reordered correctly
expectedDistances := []int{25, 20, 10,
30,
}
expectedIndices := []int{4, 2, 3,
1,
}
for i := range expectedDistances {
if heap.distances[i] != expectedDistances[i] {
t.Errorf("Expected distance at %d to be %d, got %d", i, expectedDistances[i], heap.distances[i])
}
if heap.indices[i] != expectedIndices[i] {
t.Errorf("Expected index at %d to be %d, got %d", i, expectedIndices[i], heap.indices[i])
}
}
}

func TestNeighborHeapPush(t *testing.T) {
heap := makeNeighborHeap(
make([]int, 4),
make([]int, 4),
)

heap.push(10, 3)
heap.push(15, 5)
heap.push(25, 6)
heap.pushpop(9, 3)
heap.pushpop(7, 2)
heap.pushpop(8, 1)
heap.pushpop(6, 0)

if heap.distances[0] != 8 {
t.Errorf("Expected root distance to be 25, got %d", heap.distances[0])
}
if heap.indices[0] != 1 {
t.Errorf("Expected root index to be 6, got %d", heap.indices[0])
}
}
Loading

0 comments on commit 9de5c16

Please sign in to comment.