Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Caulet guillaume Flappy Bird #38

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions Caulet/FlappyAgent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import numpy as np
import _pickle as cPickle

def discrete_state(state):
x = str(int(round(state['next_pipe_dist_to_player']/20)))
y = str(int(round((state['player_y'] - state['next_pipe_bottom_y'])/20)))
v = str(int(round(state['player_vel'])))
return x+"-"+y+"-"+v
flag_dict=False
Q= dict()

def FlappyPolicy(state, screen):
action=None
global flag_dict
global Q

if not flag_dict:
Q = cPickle.load(open("Qql",'rb'))
flag_dict=False
s=discrete_state(state)

if s in Q.keys():
a = np.argmax(Q[s][:])
else:
a = 0

if a==0:
action=0
else:
action=119

return action


5 changes: 5 additions & 0 deletions Caulet/Note.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Implémentation d'un algorithme de Q-Learning sur le jeu Flappy Bird
L'entrainement se fait à partir du fichier training.py et le résultat est stocké sous la forme d'un dictionnaire.
L'espace du jeu a été discrétisé par bloc de 20 pixel par rapport au prochain tuyau.
Cette discrétisation est sortie sous la forme d'une chaine de caractère (x-y-vel) et celle-ci est utilisée comme clef du dictionnaire.
La structure de l'algorithme de Q-learning est grandement inspirée de celui-vu dans RL3.
Binary file added Caulet/Qql
Binary file not shown.
31 changes: 31 additions & 0 deletions Caulet/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# You're not allowed to change this file
from ple.games.flappybird import FlappyBird
from ple import PLE
import numpy as np
from FlappyAgent import FlappyPolicy

game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.

p.init()
reward = 0.0

nb_games = 100
cumulated = np.zeros((nb_games))

for i in range(nb_games):
p.reset_game()

while(not p.game_over()):
state = game.getGameState()
screen = p.getScreenRGB()
action=FlappyPolicy(state, screen) ### Your job is to define this function.

reward = p.act(action)
cumulated[i] = cumulated[i] + reward

average_score = np.mean(cumulated)
max_score = np.max(cumulated)
print(average_score)
print(max_score)
110 changes: 110 additions & 0 deletions Caulet/training.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import numpy as np
from ple.games.flappybird import FlappyBird
from ple import PLE
import numpy as np
import _pickle as cPickle

#Discretisation de l'espace
def discrete_state(state):
x = str(int(round(state['next_pipe_dist_to_player']/20)))
y = str(int(round((state['player_y'] - state['next_pipe_bottom_y'])/20)))
v = str(int(round(state['player_vel'])))
return x+"-"+y+"-"+v

#GLIE actor #state = s' ajouté pour aider à la décision
def epsilon_greedy(Q, s, epsilon, state):
a = 0

if s in Q.keys():
a = np.argmax(Q[s][:])
random_act=np.random.rand()
if random_act <= epsilon :
if random_act <= 0.5 * epsilon:
if state['next_pipe_bottom_y'] - state['player_y'] < 50 :
a = 1
else:
a = 0
else:
if state['player_y'] - state['next_pipe_top_y'] > 50 :
a = 0
else:
a = 1
return a

# passer de 1 à 119
def call_action(a):
if a==0:
action=0
else:
action=119

return action

#Init
gamma = 0.95
alpha = 0.9
epsilon = 0.1
nb_games = 60000
resolution = 10
Q= dict()
game = FlappyBird(graphics="fixed")
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
score =0
score_100=0
# Q-learning
for i in range(1,nb_games):
if i%100 == 0:
print('moyenne sur 100 : %.2f' %(5+score_100 /100)) #dernière reward = -5
if score_100/100>200:
break
score_100 = 0 # reset score100

if i% 1000 == 0 :
if alpha>0.1:
alpha/=2
print('parties jouées : %d, états recensés : %d' %(i,len(Q)))
print('Moyenne : %.2f' % (5 + score / 1000)) #dernière reward = -5
if score /1000 > 100:
break
score = 0 # reset score

if i% 4000 ==0:
epsilon/=2
#Init du Q-learning
p.init()
p.reset_game()
state=game.getGameState()
reward = training_reward = 0

s = discrete_state(state)
action = epsilon_greedy(Q,s,epsilon,state)
Q[s] = [0.0,0.0]

while not p.game_over(): # repeat

reward = p.act(call_action(action)) #retourne un entier correspondant la récompense associée à l'action 0 si action sans effet immediat, 1 si on depasse un tuyau et -5 si l'on meurt.
if reward == -5:
training_reward = -1000 #rejet de cette action
else:
training_reward = 1

state_ = game.getGameState() #s'
s_ = discrete_state(state_)#s' discrete
action_ = epsilon_greedy(Q,s_,epsilon, state_) #In s, choose a (GLIE actor)
#added s' to help the action choice in obvious situation

if s_ not in Q.keys():
Q[s_] = [0.0,0.0]

delta = (training_reward + gamma * np.max(Q[s_][:]) - Q[s][action]) #Temporal difference: δ=r+γmaxa′Q(s′,a′)−Q(s,a)
Q[s][action]=Q[s][action] + alpha *delta #Update Q: Q(s,a)←Q(s,a)+αδ

s = s_ #s←s′
action =action_

score +=reward
score_100+=reward

with open('Qql', 'wb') as f:
cPickle.dump(Q,f)

33 changes: 29 additions & 4 deletions RandomBird/FlappyAgent.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,34 @@
import numpy as np
import _pickle as cPickle

def discrete_state(state):
x = str(int(round(state['next_pipe_dist_to_player']/20)))
y = str(int(round((state['player_y'] - state['next_pipe_bottom_y'])/20)))
v = str(int(round(state['player_vel'])))
return x+"-"+y+"-"+v
flag_dict=False
Q= dict()

def FlappyPolicy(state, screen):
action=None
if(np.random.randint(0,2)<1):
action=119
return action
action=None
global flag_dict
global Q

if not flag_dict:
Q = cPickle.load(open("Qql",'rb'))
flag_dict=False
s=discrete_state(state)

if s in Q.keys():
a = np.argmax(Q[s][:])
else:
a = 0

if a==0:
action=0
else:
action=119

return action


5 changes: 5 additions & 0 deletions RandomBird/Note.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Implémentation d'un algorithme de Q-Learning sur le jeu Flappy Bird
L'entrainement se fait à partir du fichier training.py et le résultat est stocké sous la forme d'un dictionnaire.
L'espace du jeu a été discrétisé par bloc de 20 pixel par rapport au prochain tuyau.
Cette discrétisation est sortie sous la forme d'une chaine de caractère (x-y-vel) et celle-ci est utilisée comme clef du dictionnaire.
La structure de l'algorithme de Q-learning est grandement inspirée de celui-vu dans RL3.
Binary file added RandomBird/Qql
Binary file not shown.
4 changes: 3 additions & 1 deletion RandomBird/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from FlappyAgent import FlappyPolicy

game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=False, display_screen=True)
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.

p.init()
Expand All @@ -27,3 +27,5 @@

average_score = np.mean(cumulated)
max_score = np.max(cumulated)
print(average_score)
print(max_score)
110 changes: 110 additions & 0 deletions RandomBird/training.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import numpy as np
from ple.games.flappybird import FlappyBird
from ple import PLE
import numpy as np
import _pickle as cPickle

#Discretisation de l'espace
def discrete_state(state):
x = str(int(round(state['next_pipe_dist_to_player']/20)))
y = str(int(round((state['player_y'] - state['next_pipe_bottom_y'])/20)))
v = str(int(round(state['player_vel'])))
return x+"-"+y+"-"+v

#GLIE actor #state = s' ajouté pour aider à la décision
def epsilon_greedy(Q, s, epsilon, state):
a = 0

if s in Q.keys():
a = np.argmax(Q[s][:])
random_act=np.random.rand()
if random_act <= epsilon :
if random_act <= 0.5 * epsilon:
if state['next_pipe_bottom_y'] - state['player_y'] < 50 :
a = 1
else:
a = 0
else:
if state['player_y'] - state['next_pipe_top_y'] > 50 :
a = 0
else:
a = 1
return a

# passer de 1 à 119
def call_action(a):
if a==0:
action=0
else:
action=119

return action

#Init
gamma = 0.95
alpha = 0.9
epsilon = 0.1
nb_games = 60000
resolution = 10
Q= dict()
game = FlappyBird(graphics="fixed")
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
score =0
score_100=0
# Q-learning
for i in range(1,nb_games):
if i%100 == 0:
print('moyenne sur 100 : %.2f' %(5+score_100 /100)) #dernière reward = -5
if score_100/100>200:
break
score_100 = 0 # reset score100

if i% 1000 == 0 :
if alpha>0.1:
alpha/=2
print('parties jouées : %d, états recensés : %d' %(i,len(Q)))
print('Moyenne : %.2f' % (5 + score / 1000)) #dernière reward = -5
if score /1000 > 100:
break
score = 0 # reset score

if i% 4000 ==0:
epsilon/=2
#Init du Q-learning
p.init()
p.reset_game()
state=game.getGameState()
reward = training_reward = 0

s = discrete_state(state)
action = epsilon_greedy(Q,s,epsilon,state)
Q[s] = [0.0,0.0]

while not p.game_over(): # repeat

reward = p.act(call_action(action)) #retourne un entier correspondant la récompense associée à l'action 0 si action sans effet immediat, 1 si on depasse un tuyau et -5 si l'on meurt.
if reward == -5:
training_reward = -1000 #rejet de cette action
else:
training_reward = 1

state_ = game.getGameState() #s'
s_ = discrete_state(state_)#s' discrete
action_ = epsilon_greedy(Q,s_,epsilon, state_) #In s, choose a (GLIE actor)
#added s' to help the action choice in obvious situation

if s_ not in Q.keys():
Q[s_] = [0.0,0.0]

delta = (training_reward + gamma * np.max(Q[s_][:]) - Q[s][action]) #Temporal difference: δ=r+γmaxa′Q(s′,a′)−Q(s,a)
Q[s][action]=Q[s][action] + alpha *delta #Update Q: Q(s,a)←Q(s,a)+αδ

s = s_ #s←s′
action =action_

score +=reward
score_100+=reward

with open('Qql', 'wb') as f:
cPickle.dump(Q,f)