-
Notifications
You must be signed in to change notification settings - Fork 0
/
generaliz.js
93 lines (77 loc) · 2.12 KB
/
generaliz.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"use stricts";
/**
* Reinforcement Learning Generalisation Module
* @author StarColon Projects
*/
var generaliz = {}
var State = require('./state.js');
var colors = require('colors');
/**
* R' = ϴX
* x : state
* r : estimated reward
* ϴ : affine transformation
*
* @param {List} of #State instance
* @param {List} of #number reward (in sync with @states)
* @param {Number} number of iterations to do
* @param {Number} learning rate (non-negative)
* @param {String} fitting method
*/
generaliz.fit = function(states,rewards,maxIters,alpha,method){
// Initialise linear coefficient vector
var dim = states[0].state.length;
var ϴ = new Array(dim+1).fill(1);
// Iterate until ϴ converges
if (method=='GD')
return gradientDescent(ϴ,states,rewards,maxIters,alpha)
else
return ϴ;
}
generaliz.estimate = function(ϴ){
return function(state){
return state.state.reduce((sum,s,i) => s*ϴ[i],0)
}
}
/**
* Iteratively minimising the error function
*/
function gradientDescent(ϴ,states,rewards,nIters,alpha){
if (nIters<=0)
return ϴ;
console.log(`Iteration #${nIters}`.green);
console.log(' ϴ : '.yellow, ϴ);
// Simple Linear approximation:
// R`(ϴ,S) = θ0 + θ1s1 + θ2s2 + ... θNsN, N = dim(states)
// Approximation square error (scalar):
// E(ϴ,S) = (1/N) Σ(R(S)-R`(ϴ))², R is an actual reward
// Gradient of error:
// ∇(ϴ,S) = δE(ϴ,S) δθ
var G = gradientE(ϴ,states,rewards);
// Update the linear coeff by gradient
var ϴ = ϴ.map((θ,i) => θ-alpha*G[i]);
// Repeat
return gradientDescent(ϴ,states,rewards,nIters-1,alpha)
}
// For linear combination ϴ
function gradientE(ϴ,states,rewards){
var N = parseFloat(states.length);
// Error of n-th state sample
function E(n,i){
var Sn = [1].concat(states[n].state); // Sn[0] = 1
return Sn[i]*(rewards[n] - ϴ.reduce((sum,θ,k) => sum+θ*Sn[k],0));
}
// Total δE/δθi
function sumError(i){
var e = 0;
for (var n=0; n<N; n++) e += E(n,i);
return e;
}
function gradient(θ,i){
return (-2/N)*sumError(i);
}
var G = ϴ.map(gradient);
console.log(' ∇ : '.yellow,G);
return G;
}
module.exports = generaliz;