forked from voice32/2048_RL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgame.py
296 lines (240 loc) · 10.4 KB
/
game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
import numpy as np
import matplotlib.pyplot as plt
ACTION_UP = 0
ACTION_DOWN = 1
ACTION_LEFT = 2
ACTION_RIGHT = 3
base_dir = '.'
class Game():
""" 2048 game environment"""
def __init__(self, size = 4, seed = 42, negative_reward = -10, reward_mode='log2', cell_move_penalty = 0.1):
self.board_dim = size # board dimension
self.state_size = size * size # total number of cells
self.action_size = 4 # number of available actions
np.random.seed(seed)
self.best_game_history = []
self.negative_reward = negative_reward
self.reward_mode = reward_mode
self.cell_move_penalty = cell_move_penalty
def save_best_game_history(self):
self.best_game_history = self.history.copy()
with open(base_dir+'/best_game_hist.pkl', 'wb') as f:
pickle.dump(self.best_game_history, f)
def reset(self, init_fields = 2, step_penalty = 0, bootstrapping = False):
""" Initializes the board
Params
======
init_fields (int): how many fields to fill initially
step_penalty (int): the cost of an action
bootstrapping (bool): whether to create a new (initial) board or simulate some intermediate game state
"""
self.game_board = np.zeros((self.board_dim, self.board_dim))
if not bootstrapping:
for i in range(init_fields):
self.fill_random_empty_cell()
else:
self.random_board()
self.score = np.sum(self.game_board)
self.reward = 0
self.current_cell_move_penalty = 0
self.done = False
self.steps = 0
self.rewards_list = []
self.scores_list = []
self.step_penalty = step_penalty
self.history = []
self.history.append({
'action': -1,
'new_board': self.game_board.copy(),
'old_board': None,
'score': self.score,
'reward': self.reward
})
def shift(self, board):
""" Shifts all cells to the left and gathers penalties if needed """
shifted_board = np.empty((board.shape[0], board.shape[1]))
for i, row in enumerate(board):
shifted = np.zeros(len(row))
idx = 0
for iv, v in enumerate(row):
if v != 0:
shifted[idx] = v
if iv != idx:
self.current_cell_move_penalty += self.cell_move_penalty * v
idx += 1
shifted_board[i] = shifted
return shifted_board
def calc_board(self, board):
""" Calculate all cell mergers and return the new state of the board"""
self.reward = 0
self.current_cell_move_penalty = 0
shifted_board = self.shift(board)
merged_board = np.empty((shifted_board.shape[0], shifted_board.shape[1]))
for idx, row in enumerate(shifted_board):
for i in range(len(row)-1):
if row[i] != 0 and row[i] == row[i+1]:
row[i] = row[i] * 2
row[i+1] = 0
if self.reward_mode == 'log2':
self.reward += np.log2(row[i])
else:
self.reward += row[i]
merged_board[idx] = row
merged_board = self.shift(merged_board)
return merged_board
def current_state(self):
""" Returns a flattened array of board cell values """
return np.reshape(self.game_board.copy(), -1)
def step(self, action, action_values):
""" Applies the selected action to the board """
old_board = self.game_board.copy()
temp_board = self.game_board.copy()
# Here we flip/transpose the board depending on the action in order to unify the calculation
if action == ACTION_LEFT:
temp_board = self.calc_board(temp_board)
elif action == ACTION_RIGHT:
temp_board = np.flip(self.calc_board(np.flip(temp_board, axis=1)), axis=1)
elif action == ACTION_UP:
temp_board = np.transpose(
np.flip(
self.calc_board(np.flip(np.transpose(temp_board), axis=0)), axis=0))
elif action == ACTION_DOWN:
temp_board = np.transpose(
np.flip(
self.calc_board(np.flip(np.transpose(temp_board), axis=1)), axis=1))
else: # just in case it happens
return (self.game_board, 0, self.done)
if not np.array_equal(self.game_board, temp_board):
# Fill an empty cell with a new value
self.game_board = temp_board.copy()
self.fill_random_empty_cell()
# Reward is the sum of the merged cells minus step cost
self.reward = self.reward - self.current_cell_move_penalty
self.score = np.sum(self.game_board)
self.done = self.check_is_done()
self.moved = True
else:
self.reward = self.negative_reward
self.moved = False
self.steps += 1
self.rewards_list.append(self.reward)
# Save the new state
self.history.append({
'action': action,
'action_values': action_values,
'old_board': old_board,
'new_board': self.game_board.copy(),
'score': self.score,
'reward': self.reward
})
return (self.game_board, self.reward, self.done)
def virtual_step(self, action):
if action == ACTION_LEFT:
new_game_board = self.calc_board(self.game_board.copy())
elif action == ACTION_RIGHT:
new_game_board = np.flip(self.calc_board(np.flip(self.game_board, axis=1)), axis=1)
elif action == ACTION_UP:
new_game_board = np.transpose(
np.flip(
self.calc_board(np.flip(np.transpose(self.game_board), axis=0)), axis=0))
elif action == ACTION_DOWN:
new_game_board = np.transpose(
np.flip(
self.calc_board(np.flip(np.transpose(self.game_board), axis=1)), axis=1))
else: # just in case it happens
return (self.game_board, 0, self.done)
self.reward = self.reward - self.step_penalty
self.score = np.sum(self.game_board)
self.done = self.check_is_done(new_game_board)
return (new_game_board, self.reward, self.done)
def check_is_done(self, board = None):
""" Check if the game is over """
if board is None:
board = self.game_board
# If there are at least one cell with 0, then the game is not over
if not np.all(board):
return False
# If all cells are filled, we need to check if there are any possible moves
else:
# Check if there are any equal adjacent cells across horisontal and vertical axes
for row in board:
for cell in range(len(row) - 1):
if row[cell] == row[cell+1]:
return False
for row in np.transpose(board):
for cell in range(len(row) - 1):
if row[cell] == row[cell+1]:
return False
# There are no equal adjacent cells, the game is over
return True
def print_board(self, transpose = False):
""" Deprecated """
if not transpose:
print(self.game_board)
else:
print(np.transpose(self.game_board))
def fill_random_empty_cell(self, playing=True):
""" Finds an empty cell and fills it with 2 or 4 with 90/10% probability respectively (as per game rules on Wikipedia) """
# If all cells are filled, there is no place to put a new value, just pass
if np.all(self.game_board):
return
# Pick the cell
x = np.random.randint(self.board_dim)
y = np.random.randint(self.board_dim)
# Check if it is empty, otherwise pick a new one
while self.game_board[x, y] != 0:
x = np.random.randint(self.board_dim)
y = np.random.randint(self.board_dim)
# If it is a regular game, only values 2 and 4 are allowed
if playing:
self.game_board[x, y] = np.random.choice([2, 4], p=[0.9, 0.1])
else:
# Otherwise it is a boostrapping game, then any values are allowed with certain probability
self.game_board[x, y] = np.random.choice([2**i for i in range(1, 17)], p=np.linspace(1, 0.001, 16)/np.sum(np.linspace(1, 0.001, 16)))
def draw_board(self, board = None, title = 'Current game'):
""" Draws a colored game board """
cell_colors = {
0: '#FFFFFF',
2: '#EEE4DA',
4: '#ECE0C8',
8: '#ECB280',
16:'#EC8D53',
32:'#F57C5F',
64:'#E95937',
128:'#F3D96B',
256:'#F2D04A',
512:'#E5BF2E',
1024:'#E2B814',
2048:'#EBC502',
4096:'#00A2D8',
8192:'#9ED682',
16384:'#9ED682',
32768:'#9ED682',
65536:'#9ED682',
131072:'#9ED682',
}
if board is None:
board = self.game_board
ncols = self.board_dim
nrows = self.board_dim
# create the plots
fig = plt.figure(figsize=(3,3))
plt.suptitle(title)
axes = [ fig.add_subplot(nrows, ncols, r * ncols + c) for r in range(0, nrows) for c in range(1, ncols+1) ]
# add some data
v = np.reshape(board, -1)
for i, ax in enumerate(axes):
ax.text(0.5, 0.5, str(int(v[i])), horizontalalignment='center', verticalalignment='center')
ax.set_facecolor(cell_colors[int(v[i])])
# remove the x and y ticks
for ax in axes:
ax.set_xticks([])
ax.set_yticks([])
plt.show()
def random_board(self):
""" Creates a randomly filled board for bootstrapping """
# Define how many cells we want to fill
num_filled_cells = np.random.randint(12) + 4
# Fill these cells
for i in range(num_filled_cells):
self.fill_random_empty_cell(playing=False)