-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathftl.py
29 lines (24 loc) · 770 Bytes
/
ftl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from .randmax import randmax
import numpy as np
from .base_mab import BaseMAB
class FTL(BaseMAB):
"""follow the leader (a.k.a. greedy strategy)
Parameters
----------
nbArms :int,
Number of arms of bandit
"""
def __init__(self,nbArms):
self.nbArms = nbArms
self.clear()
def clear(self):
self.nbDraws = np.zeros(self.nbArms)
self.cumRewards = np.zeros(self.nbArms)
def chooseArmToPlay(self):
if (min(self.nbDraws)==0):
return randmax(-self.nbDraws)
else:
return randmax(self.cumRewards/self.nbDraws)
def receiveReward(self,arm,reward):
self.cumRewards[arm] = self.cumRewards[arm]+reward
self.nbDraws[arm] = self.nbDraws[arm] +1