@@ -37,6 +37,7 @@ def stack(flat, layers=16):
37
37
38
38
class Game2048Env (gym .Env ): # directions 0, 1, 2, 3 are up, right, down, left
39
39
metadata = {'render.modes' : ['human' , 'ansi' ]}
40
+ max_steps = 10000
40
41
41
42
def __init__ (self ):
42
43
# Definitions for game. Board must be square.
@@ -56,6 +57,9 @@ def __init__(self):
56
57
self .set_illegal_move_reward (0. )
57
58
self .set_max_tile (None )
58
59
60
+ self .max_illegal = 50 # max number of illegal actions
61
+ self .num_illegal = 0
62
+
59
63
# Initialise seed
60
64
self .seed ()
61
65
@@ -111,8 +115,14 @@ def step(self, action):
111
115
except IllegalMove as e :
112
116
logging .debug ("Illegal move" )
113
117
info ['illegal_move' ] = True
114
- done = False
118
+ if self .steps > self .max_steps :
119
+ done = True
120
+ else :
121
+ done = False
115
122
reward = self .illegal_move_reward
123
+ self .num_illegal += 1
124
+ if self .num_illegal >= self .max_illegal : # exceed the maximum number of illegal actions
125
+ done = True
116
126
117
127
info = self ._get_info (info )
118
128
@@ -123,6 +133,7 @@ def reset(self):
123
133
self .Matrix = np .zeros ((self .h , self .w ), np .int )
124
134
self .score = 0
125
135
self .steps = 0
136
+ self .num_illegal = 0
126
137
127
138
logging .debug ("Adding tiles" )
128
139
self .add_tile ()
@@ -272,6 +283,9 @@ def isend(self):
272
283
273
284
if self .max_tile is not None and self .highest () == self .max_tile :
274
285
return True
286
+
287
+ if self .steps >= self .max_steps :
288
+ return True
275
289
276
290
for direction in range (4 ):
277
291
try :
0 commit comments