-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAI_GridWorld.py
124 lines (102 loc) · 3.8 KB
/
AI_GridWorld.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# Grid World: AI-controlled play
# Instructions:
# Move up, down, left, or right to move the character. The
# objective is to find the key and get to the door
#
# Control:
# arrows : Merge up, down, left, or right
# s : Toggle slow play
# a : Toggle AI player
# d : Toggle rendering
# r : Restart game
# q / ESC : Quit
from GridWorld import GridWorld
import numpy as np
import pygame
from collections import defaultdict
# Initialize the environment
env = GridWorld()
env.reset()
x, y, has_key = env.get_state()
# Definitions and default settings
actions = ["left", "right", "up", "down"]
exit_program = False
action_taken = False
slow = False
runai = True
render = False
done = False
# Game clock
clock = pygame.time.Clock()
# this is the loop that will loop through the n obervations we want to make
for i in range(200):
consecutive_wins = 0
attemps_used = 0
q_table = defaultdict(lambda: [0, 0, 0, 0])
# this is the game loop, that will continue for the same observation,
# until 10 consecutive wins have been made
while not exit_program:
if render:
env.render()
# Slow down rendering to 5 fps
if slow and runai:
clock.tick(5)
# Automatic reset environment in AI mode
if done and runai:
env.reset()
x, y, has_key = env.get_state()
# Process game events
for event in pygame.event.get():
if event.type == pygame.QUIT:
exit_program = True
if event.type == pygame.KEYDOWN:
if event.key in [pygame.K_ESCAPE, pygame.K_q]:
exit_program = True
if event.key == pygame.K_UP:
best_action, action_taken = "up", True
if event.key == pygame.K_DOWN:
best_action, action_taken = "down", True
if event.key == pygame.K_RIGHT:
best_action, action_taken = "right", True
if event.key == pygame.K_LEFT:
best_action, action_taken = "left", True
if event.key == pygame.K_r:
env.reset()
if event.key == pygame.K_d:
render = not render
if event.key == pygame.K_s:
slow = not slow
if event.key == pygame.K_a:
runai = not runai
clock.tick(5)
# AI controller (enable/disable by pressing 'a')
if runai:
# save the current state, so we can access it after we have made the step
old_x, old_y, old_has_key = x, y, has_key
# get the
tile_actions = q_table[(x, y, has_key)]
# the best action for this tile
best_action = np.argmax(tile_actions)
# do the best action
(x, y, has_key), reward, done = env.step(actions[best_action])
# set the tile_actions q-score in the direction to the sum of:
# - the reward we got
# - the gamma-adjusted max q-score of the tile we arrived at
tile_actions[best_action] = reward + 0.18 * max(q_table[(x, y, has_key)])
q_table[(old_x, old_y, old_has_key)] = tile_actions
# count the consecetive wins we have, and if 10, break the loop for this observation
if done and env.score > 100:
consecutive_wins += 1
elif done:
consecutive_wins = 0
if consecutive_wins == 10:
break
if done:
attemps_used += 1
# Human controller
else:
if action_taken:
(x, y, has_key), reward, done = env.step(best_action)
action_taken = False
print(attemps_used)
env.close()