forked from KevinCoble/AIToolbox
-
Notifications
You must be signed in to change notification settings - Fork 0
/
MachineLearningProtocols.swift
357 lines (299 loc) · 12.1 KB
/
MachineLearningProtocols.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
//
// MachineLearningProtocols.swift
// AIToolbox
//
// Created by Kevin Coble on 1/16/16.
// Copyright © 2016 Kevin Coble. All rights reserved.
//
import Foundation
public enum MachineLearningError: Error {
case dataNotRegression
case dataNotClassification
case dataWrongDimension
case notEnoughData
case modelNotRegression
case modelNotClassification
case notTrained
case initializationError
case didNotConverge
case continuationNotSupported
case operationTimeout
case continueTrainingClassesNotSame
}
public enum DataSetType // data type
{
case regression
case classification
case realAndClass // Has both real array and class (SVM predict values, etc.)
}
public protocol MLDataSet : class { // Machine learning data set provider
var dataType : DataSetType { get }
var inputDimension: Int { get }
var outputDimension: Int { get }
var size: Int { get }
var optionalData: AnyObject? { get set } // Optional data that can be temporarily added by methods using the data set
func getInput(_ index: Int) throws ->[Double]
}
public protocol MLRegressionDataSet : MLDataSet { // Machine learning regression data set provider
func getOutput(_ index: Int) throws ->[Double]
func setOutput(_ index: Int, newOutput : [Double]) throws
}
public protocol MLClassificationDataSet : MLDataSet { // Machine learning classification data set provider
func getClass(_ index: Int) throws ->Int
func setClass(_ index: Int, newClass : Int) throws
}
public protocol MLCombinedDataSet : MLRegressionDataSet, MLClassificationDataSet { // Machine learning classification AND regression data set provider (for classes that can do both)
}
public protocol Classifier {
func getInputDimension() -> Int
func getParameterDimension() -> Int // May only be valid after training
func getNumberOfClasses() -> Int // May only be valid after training
func setParameters(_ parameters: [Double]) throws
func setCustomInitializer(_ function: ((_ trainData: MLDataSet)->[Double])!)
func getParameters() throws -> [Double]
func trainClassifier(_ trainData: MLClassificationDataSet) throws
func continueTrainingClassifier(_ trainData: MLClassificationDataSet) throws // Trains without initializing parameters first
func classifyOne(_ inputs: [Double]) throws ->Int
func classify(_ testData: MLClassificationDataSet) throws
}
public protocol Regressor {
func getInputDimension() -> Int
func getOutputDimension() -> Int
func getParameterDimension() -> Int
func setParameters(_ parameters: [Double]) throws
func setCustomInitializer(_ function: ((_ trainData: MLDataSet)->[Double])!)
func getParameters() throws -> [Double]
func trainRegressor(_ trainData: MLRegressionDataSet) throws
func continueTrainingRegressor(_ trainData: MLRegressionDataSet) throws // Trains without initializing parameters first
func predictOne(_ inputs: [Double]) throws ->[Double]
func predict(_ testData: MLRegressionDataSet) throws
}
public protocol NonLinearEquation {
// If output dimension > 1, parameters is a matrix with each row the parameters for one of the outputs
var parameters: [Double] { get set }
func getInputDimension() -> Int
func getOutputDimension() -> Int
func getParameterDimension() -> Int // This must be an integer multiple of output dimension
func setParameters(_ parameters: [Double]) throws
func getOutputs(_ inputs: [Double]) throws -> [Double] // Returns vector outputs sized for outputs
func getGradient(_ inputs: [Double]) throws -> [Double] // Returns vector gradient sized for parameters - can be stubbed for ParameterDelta method
}
extension MLDataSet {
public func getRandomIndexSet() -> [Int]
{
// Get the ordered array of indices
var shuffledArray: [Int] = []
for i in 0..<size { shuffledArray.append(i) }
// empty and single-element collections don't shuffle
if size < 2 { return shuffledArray }
// Shuffle
for i in 0..<size - 1 {
#if os(Linux)
let j = Int(random() % (size - i)) + i
#else
let j = Int(arc4random_uniform(UInt32(size - i))) + i
#endif
guard i != j else { continue }
swap(&shuffledArray[i], &shuffledArray[j])
}
return shuffledArray
}
}
extension MLRegressionDataSet {
public func getInputRange() -> [(minimum: Double, maximum: Double)]
{
// Allocate the array of tuples
var results : [(minimum: Double, maximum: Double)] = Array(repeating: (minimum: Double.infinity, maximum: -Double.infinity), count: inputDimension)
// Go through each input
for index in 0..<size {
do {
let input = try getInput(index)
// Go through each dimension
for dimension in 0..<inputDimension {
if (input[dimension] < results[dimension].minimum) { results[dimension].minimum = input[dimension] }
if (input[dimension] > results[dimension].maximum) { results[dimension].maximum = input[dimension] }
}
}
catch {
// Error getting input array
}
}
return results
}
public func getOutputRange() -> [(minimum: Double, maximum: Double)]
{
// Allocate the array of tuples
var results : [(minimum: Double, maximum: Double)] = Array(repeating: (minimum: Double.infinity, maximum: -Double.infinity), count: outputDimension)
// Go through each output
for index in 0..<size {
do {
let outputs = try getOutput(index)
// Go through each dimension
for dimension in 0..<outputDimension {
if (outputs[dimension] < results[dimension].minimum) { results[dimension].minimum = outputs[dimension] }
if (outputs[dimension] > results[dimension].maximum) { results[dimension].maximum = outputs[dimension] }
}
}
catch {
// Error getting output array
}
}
return results
}
public func singleOutput(_ index: Int) -> Double?
{
// Validate the index
if (index < 0) { return nil}
if (index >= size) { return nil }
// Get the data
do {
let outputs = try getOutput(index)
return outputs[0]
}
catch {
// index error
return nil
}
}
}
extension MLClassificationDataSet {
public func getInputRange() -> [(minimum: Double, maximum: Double)]
{
// Allocate the array of tuples
var results : [(minimum: Double, maximum: Double)] = Array(repeating: (minimum: Double.infinity, maximum: -Double.infinity), count: inputDimension)
// Go through each input
for index in 0..<size {
do {
let input = try getInput(index)
// Go through each dimension
for dimension in 0..<inputDimension {
if (input[dimension] < results[dimension].minimum) { results[dimension].minimum = input[dimension] }
if (input[dimension] > results[dimension].maximum) { results[dimension].maximum = input[dimension] }
}
}
catch {
// Error getting input array
}
}
return results
}
public func groupClasses() throws -> ClassificationData
{
if (dataType == .regression) { throw DataTypeError.invalidDataType }
// If the data already has classification data, skip
if (optionalData != nil) {
if optionalData is ClassificationData { return optionalData as! ClassificationData }
}
// Create a classification data addendum
let classificationData = ClassificationData()
// Get the different data labels
for index in 0..<size {
let thisClass = try getClass(index)
let thisClassIndex = classificationData.foundLabels.index(of: thisClass)
if let classIndex = thisClassIndex {
// Class label found, increment count
classificationData.classCount[classIndex] += 1
// Add offset of data point
classificationData.classOffsets[classIndex].append(index)
}
else {
// Class label not found, add it
classificationData.foundLabels.append(thisClass)
classificationData.classCount.append(1) // Start count at 1 - this instance
classificationData.classOffsets.append([index]) // First offset is this point
}
}
// Set the classification data as the optional data for the data set
return classificationData
}
public func singleOutput(_ index: Int) -> Double?
{
// Validate the index
if (index < 0) { return nil}
if (index >= size) { return nil }
// Get the data
do {
let outputClass = try getClass(index)
return Double(outputClass)
}
catch {
// index error
return nil
}
}
}
extension MLCombinedDataSet {
public func singleOutput(_ index: Int) -> Double?
{
// Validate the index
if (index < 0) { return nil}
if (index >= size) { return nil }
// Get the data
do {
switch dataType {
case .regression:
let outputs = try getOutput(index)
return outputs[0]
case .classification:
let outputClass = try getClass(index)
return Double(outputClass)
case .realAndClass:
let outputClass = try getClass(index) // Class is a single output
return Double(outputClass)
}
}
catch {
// index error
return nil
}
}
}
extension Classifier {
/// Calculate the precentage correct on a classification network using a test data set
public func getClassificationPercentage(_ testData: DataSet) throws -> Double
{
// Verify the data set is the right type
if (testData.dataType != .classification) { throw DataTypeError.invalidDataType }
var countCorrect = 0
// Do for the entire test set
for index in 0..<testData.size {
// Get the results of a feedForward run
let inputs = try testData.getInput(index)
let result = try classifyOne(inputs)
let expectedClass = try testData.getClass(index)
if (result == expectedClass) {countCorrect += 1}
}
// Calculate the percentage
return Double(countCorrect) / Double(testData.size)
}
}
extension Regressor {
/// Calculate the total absolute value of error on a regressor using a test data set
public func getTotalAbsError(_ testData: DataSet) throws -> Double
{
// Verify the data set is the right type
if (testData.dataType != .regression) { throw DataTypeError.invalidDataType }
var sum = 0.0
// Do for the entire test set
for index in 0..<testData.size{
// Get the results of a prediction
let inputs = try testData.getInput(index)
let results = try predictOne(inputs)
// Sum up the differences
for nodeIndex in 0..<results.count {
let outputs = try testData.getOutput(index)
sum += abs(results[nodeIndex] - outputs[nodeIndex])
}
}
return sum
}
}
open class ClassificationData {
var foundLabels: [Int] = []
var classCount: [Int] = []
var classOffsets: [[Int]] = []
var numClasses: Int
{
return foundLabels.count
}
}