-
Notifications
You must be signed in to change notification settings - Fork 12
/
clusters.go
103 lines (80 loc) · 2.75 KB
/
clusters.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
// Package clusters provides abstract definitions of clusterers as well as
// their implementations.
package clusters
import (
"math"
)
// DistanceFunc represents a function for measuring distance
// between n-dimensional vectors.
type DistanceFunc func([]float64, []float64) float64
// Online represents parameters important for online learning in
// clustering algorithms.
type Online struct {
Alpha float64
Dimension int
}
// HCEvent represents the intermediate result of computation of hard clustering algorithm
// and are transmitted periodically to the caller during online learning
type HCEvent struct {
Cluster int
Observation []float64
}
// Clusterer defines the operation of learning
// common for all algorithms
type Clusterer interface {
Learn([][]float64) error
}
// HardClusterer defines a set of operations for hard clustering algorithms
type HardClusterer interface {
// Sizes returns sizes of respective clusters
Sizes() []int
// Guesses returns mapping from data point indices to cluster numbers. Clusters' numbering begins at 1.
Guesses() []int
// Predict returns number of cluster to which the observation would be assigned
Predict(observation []float64) int
// IsOnline tells the algorithm supports online learning
IsOnline() bool
// WithOnline configures the algorithms for online learning with given parameters
WithOnline(Online) HardClusterer
// Online begins the process of online training of an algorithm. Observations are sent on the observations channel,
// once no more are expected an empty struct needs to be sent on done channel. Caller receives intermediate results of computation via
// the returned channel.
Online(observations chan []float64, done chan struct{}) chan *HCEvent
// Implement common operation
Clusterer
}
// Estimator defines a computation used to determine an optimal number of clusters in the dataset
type Estimator interface {
// Estimate provides an expected number of clusters in the dataset
Estimate([][]float64) (int, error)
}
// Importer defines an operation of importing the dataset from an external file
type Importer interface {
// Import fetches the data from a file, start and end arguments allow user
// to specify the span of data columns to be imported (inclusively)
Import(file string, start, end int) ([][]float64, error)
}
var (
// EuclideanDistance is one of the common distance measurement
EuclideanDistance = func(a, b []float64) float64 {
var (
s, t float64
)
for i, _ := range a {
t = a[i] - b[i]
s += t * t
}
return math.Sqrt(s)
}
// EuclideanDistanceSquared is one of the common distance measurement
EuclideanDistanceSquared = func(a, b []float64) float64 {
var (
s, t float64
)
for i, _ := range a {
t = a[i] - b[i]
s += t * t
}
return s
}
)