-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_player.py
155 lines (118 loc) · 4.39 KB
/
model_player.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from Environment import Entity
import pygame
import os
import random
import time as t
import numpy as np
import matplotlib.pyplot as plt
# Centers window
x, y = 300, 70
os.environ['SDL_VIDEO_WINDOW_POS'] = "%d,%d" % (x, y)
# Game window presets
FPS = 6
WIDTH, HEIGHT = 800, 600
grey = (37, 35, 35)
red = (255, 0 , 0)
white = (240, 240, 240)
gold = (250, 210, 20)
orange = (240, 150, 20)
# players and render presets
CELL_WIDTH = 60
DIM = Entity.DIM # dimensions
THICKNESS = 3
Entity.INIT_VARS = (2, DIM**2-1, 4)
# Bellman equation constants
DISCOUNT = 0.95
LEARNING_RATE = 0.1
MAX_STEPS = 20
EPISODES = 450_000
# initialize environment and players
Entity.initialize_players()
Entity.init_class(MAX_STEPS) # sets max steps before simulation terminates.
player = Entity.entities[0]
action_dic = {
0: "up",
1: "right",
2: "down",
3: "left",
}
def draw_square(screen, side_length, posx, posy, color, thickness):
square = pygame.Rect(posx, posy, side_length, side_length)
pygame.draw.rect(screen, color, square, thickness)
def draw_grid(screen, cell_width, dimensions, color, thickness) -> list:
''' a centered nxn grid where n is dimensions, locx and locy are the location of top left corner
and function returns the cell_matrix '''
grid_length = (cell_width + thickness)*dimensions
locx, locy = (WIDTH - grid_length)//2, (HEIGHT-grid_length)//2
# print(locx, locy)
posx, posy = locx, locy
for n in range(dimensions):
for m in range(dimensions):
draw_square(screen, cell_width, posx + m*cell_width, posy+n*cell_width, color, thickness)
# return [(locx + cell_width*x, locy + cell_width*(x%dimensions)) for x in range(6*6)]
def get_cell_matrix():
'''returns the coordinates of the centers of each cell of the grid'''
grid_length = (CELL_WIDTH + THICKNESS)*DIM
locx, locy = (WIDTH - grid_length)//2, (HEIGHT-grid_length)//2
return [(locx + CELL_WIDTH*(x%DIM + 0.5)//1, locy + CELL_WIDTH*(x//DIM +0.5)//1) for x in range(DIM*DIM)]
def show_render(render: bool):
if render:
pygame.init()
screen = pygame.display.set_mode((WIDTH, HEIGHT))
clock = pygame.time.Clock()
screen.fill(grey)
for event in pygame.event.get():
if event.type == pygame.QUIT:
quit()
draw_grid(screen, CELL_WIDTH, DIM, white, 3)
for entity in Entity.entities:
cell_matrix = get_cell_matrix()
color_table = {"player": gold, "bomb": red, "home": orange} #! set dynamically later
color = color_table[entity.identitiy]
pygame.draw.circle(screen, color, cell_matrix[entity.index], CELL_WIDTH//3)
pygame.display.flip()
clock.tick(FPS)
else:
pygame.quit()
# dim**2 is the number of possible possitions, 2 is whether or not theres a bomb in a particular direction
state_space = (DIM**2, 2, 2, 2)
action_space = (4,)
# q_table = np.random.uniform(low= -20, high= 10, size=(state_space + action_space))
# OR load previously trained q_table
q_table = np.load("models/q_table_98000.npy")
wins = 0
win_record = []
init_state = player.get_state()
ep_score = []
action_book = {}
state_book = {}
rewards_statistic = {"ep": [], "avg": [], "min": [], "max": []}
for episode in range(EPISODES):
is_over = False
state = init_state
Entity.restart()
# print(f"epsilon: {epsilon}")
wins_every = 0
action_list = []
state_list = [player.get_state()]
while not is_over:
show_render(True)
# APPLYING BELLMAN'S equation
action = np.argmax(q_table[state]) # gets the index of the largest q val
action_list.append(action)
*new_state, reward, is_over = player.env_step(action)
new_state = new_state[0]
state_list.append(new_state)
max_future_q = np.max(q_table[new_state])
current_q = q_table[state + (action, )]
if reward > 10:
print(f"#{episode} I WON DAD! at") #{state[0]}")
wins += 1
wins_every += 1
# update old state to new state
state = new_state
###########################
# print(f"Episode {episode} - score: {Entity.score} at step: {Entity.playerSteps}")
# print("---------")
# print(f"#{episode} - score {Entity.score}")
print(f"THE END\n------------\nWe have won {wins} times out of {EPISODES} episodes or {len(ep_score)}")