-
Notifications
You must be signed in to change notification settings - Fork 26
/
BinaryStrategy.py
67 lines (56 loc) · 1.86 KB
/
BinaryStrategy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import numpy as np
class BinaryStrategy(object):
"""
Binary strategy selector.
Defaults:
K=2 arms
D=2 features/arm
epsilon=0.05 learning rate
"""
def __init__(self, K=2, D=2, epsilon=0.05):
self.K = K
self.D = D
self.epsilon = epsilon
self.alpha = np.ones((K,D)).astype(int)
self.beta = np.ones((K,D)).astype(int)
# @property
# def std(self):
# return np.sqrt(self.variance)
def simulate(self,features,rewards,weights):
N = rewards.size/self.K
regret = np.zeros((N,1))
rmse = np.zeros((N,1))
for i in range(0,N):
S = np.zeros((self.K,self.D))
F = features[i]
R = rewards[i]
armOpt = 0
armMax = 0.
for k in range(0,self.K):
armSum = 0.0
for d in range(0,self.D):
alphaSample = self.alpha[k,d]
betaSample = self.beta[k,d]
s = np.random.beta(alphaSample,betaSample)
S[k,d] = s
part = s * F[d]
armSum += part
if armSum > armMax:
armMax = armSum
armOpt = k
invest = np.random.uniform() <= self.epsilon
#choose an arm to learn with p=epsilon
if invest:
armAlt = armOpt
while (armAlt == armOpt):
armAlt = int(np.random.uniform() * self.K)
armOpt = armAlt
armReward = R[armOpt]
armRegret = armMax - armReward
regret[i] = abs(armRegret)
rmse[i] = np.sqrt(np.mean((weights - S)**2))
if armReward > 0:
self.alpha[armOpt] += F
else:
self.beta[armOpt] += F
return regret, rmse