-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathql_adv.py
57 lines (54 loc) · 1.8 KB
/
ql_adv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import random
class maze:
def __init__(self,a,ld,ins,fs):
self.a = a
self.s = a
self.l = []
self.ld = ld
self.lr = 0.1
self.gamma = 0.6
self.q = []
self.ins = ins
self.fs = fs
def new_s(self,s,a):
return a
def calc(self,s,a):
lr,gamma = self.lr,self.gamma
return ((1-lr)*self.q[s][a] + lr*(self.l[s][a]+gamma*max(self.q[maze.new_s(self,s,a)])))
def train(self,ins,fs):
self.l = [[-1 for i in range(self.a)] for j in range(self.a)]
for i in range(self.a):
for j in range(self.a):
if j in self.ld[i]:
self.l[i][j] = 0.1
if j == fs :
self.l[i][j] =1
self.q =[[0 for i in range(self.a)] for j in range(self.s)]
steps,num = self.a*100,self.a*100
eps = 0.05
for i in range(num):
csd = random.randint(0,self.s-1)
for j in range(steps):
cs = csd
epsd = random.random()
if cs == fs:
break
if epsd > eps:
self.q[cs][self.q[cs].index(max(self.q[cs]))] = maze.calc(self,cs,self.q[cs].index(max(self.q[cs])))
cs = maze.new_s(self,cs,self.q[cs].index(max(self.q[cs])))
else:
x = random.randint(0,self.a-1)
self.q[cs][x] = maze.calc(self,cs,x)
cs = maze.new_s(self,cs,x)
cs = ins
ans = []
# print('..')
while 1 :
ans.append(cs)
cs = self.q[cs].index(max(self.q[cs]))
if cs == fs:
break
ans.append(fs)
return ans
def main(self):
return (self.train(self.ins,self.fs))