diff --git a/Caulet/FlappyAgent.py b/Caulet/FlappyAgent.py
new file mode 100644
index 0000000..7e52807
--- /dev/null
+++ b/Caulet/FlappyAgent.py
@@ -0,0 +1,34 @@
+import numpy as np
+import _pickle as cPickle
+
+def discrete_state(state):
+	x = str(int(round(state['next_pipe_dist_to_player']/20)))
+	y = str(int(round((state['player_y'] - state['next_pipe_bottom_y'])/20)))
+	v = str(int(round(state['player_vel'])))
+	return x+"-"+y+"-"+v
+flag_dict=False
+Q= dict()
+
+def FlappyPolicy(state, screen):
+	action=None
+	global flag_dict
+	global Q
+	
+	if not flag_dict:
+		Q = cPickle.load(open("Qql",'rb'))
+		flag_dict=False
+	s=discrete_state(state)
+
+	if s in Q.keys():
+		a = np.argmax(Q[s][:])
+	else:
+		a = 0
+
+	if a==0:
+		action=0
+	else:
+		action=119
+	
+	return action
+
+
diff --git a/Caulet/Note.txt b/Caulet/Note.txt
new file mode 100644
index 0000000..89aad1a
--- /dev/null
+++ b/Caulet/Note.txt
@@ -0,0 +1,5 @@
+Implémentation d'un algorithme de Q-Learning sur le jeu Flappy Bird
+L'entrainement se fait à partir du fichier training.py et le résultat est stocké sous la forme d'un dictionnaire.
+L'espace du jeu a été discrétisé par bloc de 20 pixel par rapport au prochain tuyau.
+Cette discrétisation est sortie sous la forme d'une chaine de caractère (x-y-vel) et celle-ci est utilisée comme clef du dictionnaire. 
+La structure de l'algorithme de Q-learning est grandement inspirée de celui-vu dans RL3.
diff --git a/Caulet/Qql b/Caulet/Qql
new file mode 100644
index 0000000..6878699
Binary files /dev/null and b/Caulet/Qql differ
diff --git a/Caulet/run.py b/Caulet/run.py
new file mode 100644
index 0000000..20761a8
--- /dev/null
+++ b/Caulet/run.py
@@ -0,0 +1,31 @@
+# You're not allowed to change this file
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+from FlappyAgent import FlappyPolicy
+
+game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
+# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.
+
+p.init()
+reward = 0.0
+
+nb_games = 100
+cumulated = np.zeros((nb_games))
+
+for i in range(nb_games):
+    p.reset_game()
+    
+    while(not p.game_over()):
+        state = game.getGameState()
+        screen = p.getScreenRGB()
+        action=FlappyPolicy(state, screen) ### Your job is to define this function.
+        
+        reward = p.act(action)
+        cumulated[i] = cumulated[i] + reward
+
+average_score = np.mean(cumulated)
+max_score = np.max(cumulated)
+print(average_score)
+print(max_score)
diff --git a/Caulet/training.py b/Caulet/training.py
new file mode 100644
index 0000000..02da4a8
--- /dev/null
+++ b/Caulet/training.py
@@ -0,0 +1,110 @@
+import numpy as np
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+import _pickle as cPickle
+
+#Discretisation de l'espace
+def discrete_state(state):
+	x = str(int(round(state['next_pipe_dist_to_player']/20)))
+	y = str(int(round((state['player_y'] - state['next_pipe_bottom_y'])/20)))
+	v = str(int(round(state['player_vel'])))
+	return x+"-"+y+"-"+v
+
+#GLIE actor #state = s' ajouté pour aider à la décision
+def epsilon_greedy(Q, s, epsilon, state):
+	a = 0
+
+	if s in Q.keys():
+		a = np.argmax(Q[s][:])
+	random_act=np.random.rand()
+	if random_act <= epsilon :
+		if random_act <= 0.5 * epsilon:
+			if state['next_pipe_bottom_y'] - state['player_y'] < 50 :
+				a = 1
+			else:
+				a = 0
+		else:
+			if state['player_y'] - state['next_pipe_top_y'] > 50 :
+				a = 0
+			else:
+				a = 1
+	return a
+
+# passer de 1 à 119
+def call_action(a):
+	if a==0:
+		action=0
+	else:
+		action=119
+	
+	return action
+
+#Init
+gamma = 0.95
+alpha = 0.9
+epsilon = 0.1
+nb_games = 60000
+resolution = 10
+Q= dict()
+game = FlappyBird(graphics="fixed")
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
+score =0
+score_100=0
+# Q-learning
+for i in range(1,nb_games):
+	if i%100 == 0:
+		print('moyenne sur 100 : %.2f' %(5+score_100 /100)) #dernière reward = -5
+		if score_100/100>200:
+			break
+		score_100 = 0 # reset score100
+
+	if i% 1000 == 0 :
+		if alpha>0.1:
+			alpha/=2
+		print('parties jouées : %d, états recensés : %d' %(i,len(Q)))
+		print('Moyenne : %.2f' % (5 + score / 1000)) #dernière reward = -5
+		if score /1000 > 100:
+			break
+		score = 0 # reset score
+
+	if i% 4000 ==0:
+		epsilon/=2
+	#Init du Q-learning
+	p.init()
+	p.reset_game()
+	state=game.getGameState()
+	reward = training_reward = 0
+
+	s = discrete_state(state)
+	action = epsilon_greedy(Q,s,epsilon,state)
+	Q[s] = [0.0,0.0]
+
+	while not p.game_over(): # repeat
+		
+		reward = p.act(call_action(action)) #retourne un entier correspondant la récompense associée à l'action 0 si action sans effet immediat, 1 si on depasse un tuyau et -5 si l'on meurt.
+		if reward == -5: 
+			training_reward = -1000 #rejet de cette action 
+		else: 
+			training_reward = 1
+
+		state_ = game.getGameState() #s'
+		s_ = discrete_state(state_)#s' discrete
+		action_ = epsilon_greedy(Q,s_,epsilon, state_) #In s, choose a (GLIE actor)
+		#added s' to help the action choice in obvious situation
+
+		if s_ not in Q.keys():
+			Q[s_] = [0.0,0.0]
+		
+		delta = (training_reward + gamma * np.max(Q[s_][:]) - Q[s][action]) #Temporal difference: δ=r+γmaxa′Q(s′,a′)−Q(s,a)
+		Q[s][action]=Q[s][action] + alpha *delta #Update Q: Q(s,a)←Q(s,a)+αδ
+		
+		s = s_ #s←s′
+		action =action_
+
+		score +=reward
+		score_100+=reward
+
+with open('Qql', 'wb') as f:
+	cPickle.dump(Q,f) 
+
diff --git a/RandomBird/FlappyAgent.py b/RandomBird/FlappyAgent.py
index 9f3ec84..7e52807 100644
--- a/RandomBird/FlappyAgent.py
+++ b/RandomBird/FlappyAgent.py
@@ -1,9 +1,34 @@
 import numpy as np
+import _pickle as cPickle
+
+def discrete_state(state):
+	x = str(int(round(state['next_pipe_dist_to_player']/20)))
+	y = str(int(round((state['player_y'] - state['next_pipe_bottom_y'])/20)))
+	v = str(int(round(state['player_vel'])))
+	return x+"-"+y+"-"+v
+flag_dict=False
+Q= dict()
 
 def FlappyPolicy(state, screen):
-    action=None
-    if(np.random.randint(0,2)<1):
-        action=119
-    return action
+	action=None
+	global flag_dict
+	global Q
+	
+	if not flag_dict:
+		Q = cPickle.load(open("Qql",'rb'))
+		flag_dict=False
+	s=discrete_state(state)
+
+	if s in Q.keys():
+		a = np.argmax(Q[s][:])
+	else:
+		a = 0
+
+	if a==0:
+		action=0
+	else:
+		action=119
+	
+	return action
 
 
diff --git a/RandomBird/Note.txt b/RandomBird/Note.txt
new file mode 100644
index 0000000..89aad1a
--- /dev/null
+++ b/RandomBird/Note.txt
@@ -0,0 +1,5 @@
+Implémentation d'un algorithme de Q-Learning sur le jeu Flappy Bird
+L'entrainement se fait à partir du fichier training.py et le résultat est stocké sous la forme d'un dictionnaire.
+L'espace du jeu a été discrétisé par bloc de 20 pixel par rapport au prochain tuyau.
+Cette discrétisation est sortie sous la forme d'une chaine de caractère (x-y-vel) et celle-ci est utilisée comme clef du dictionnaire. 
+La structure de l'algorithme de Q-learning est grandement inspirée de celui-vu dans RL3.
diff --git a/RandomBird/Qql b/RandomBird/Qql
new file mode 100644
index 0000000..6878699
Binary files /dev/null and b/RandomBird/Qql differ
diff --git a/RandomBird/run.py b/RandomBird/run.py
index 39b5801..20761a8 100644
--- a/RandomBird/run.py
+++ b/RandomBird/run.py
@@ -5,7 +5,7 @@
 from FlappyAgent import FlappyPolicy
 
 game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
-p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=False, display_screen=True)
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
 # Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.
 
 p.init()
@@ -27,3 +27,5 @@
 
 average_score = np.mean(cumulated)
 max_score = np.max(cumulated)
+print(average_score)
+print(max_score)
diff --git a/RandomBird/training.py b/RandomBird/training.py
new file mode 100644
index 0000000..02da4a8
--- /dev/null
+++ b/RandomBird/training.py
@@ -0,0 +1,110 @@
+import numpy as np
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+import _pickle as cPickle
+
+#Discretisation de l'espace
+def discrete_state(state):
+	x = str(int(round(state['next_pipe_dist_to_player']/20)))
+	y = str(int(round((state['player_y'] - state['next_pipe_bottom_y'])/20)))
+	v = str(int(round(state['player_vel'])))
+	return x+"-"+y+"-"+v
+
+#GLIE actor #state = s' ajouté pour aider à la décision
+def epsilon_greedy(Q, s, epsilon, state):
+	a = 0
+
+	if s in Q.keys():
+		a = np.argmax(Q[s][:])
+	random_act=np.random.rand()
+	if random_act <= epsilon :
+		if random_act <= 0.5 * epsilon:
+			if state['next_pipe_bottom_y'] - state['player_y'] < 50 :
+				a = 1
+			else:
+				a = 0
+		else:
+			if state['player_y'] - state['next_pipe_top_y'] > 50 :
+				a = 0
+			else:
+				a = 1
+	return a
+
+# passer de 1 à 119
+def call_action(a):
+	if a==0:
+		action=0
+	else:
+		action=119
+	
+	return action
+
+#Init
+gamma = 0.95
+alpha = 0.9
+epsilon = 0.1
+nb_games = 60000
+resolution = 10
+Q= dict()
+game = FlappyBird(graphics="fixed")
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
+score =0
+score_100=0
+# Q-learning
+for i in range(1,nb_games):
+	if i%100 == 0:
+		print('moyenne sur 100 : %.2f' %(5+score_100 /100)) #dernière reward = -5
+		if score_100/100>200:
+			break
+		score_100 = 0 # reset score100
+
+	if i% 1000 == 0 :
+		if alpha>0.1:
+			alpha/=2
+		print('parties jouées : %d, états recensés : %d' %(i,len(Q)))
+		print('Moyenne : %.2f' % (5 + score / 1000)) #dernière reward = -5
+		if score /1000 > 100:
+			break
+		score = 0 # reset score
+
+	if i% 4000 ==0:
+		epsilon/=2
+	#Init du Q-learning
+	p.init()
+	p.reset_game()
+	state=game.getGameState()
+	reward = training_reward = 0
+
+	s = discrete_state(state)
+	action = epsilon_greedy(Q,s,epsilon,state)
+	Q[s] = [0.0,0.0]
+
+	while not p.game_over(): # repeat
+		
+		reward = p.act(call_action(action)) #retourne un entier correspondant la récompense associée à l'action 0 si action sans effet immediat, 1 si on depasse un tuyau et -5 si l'on meurt.
+		if reward == -5: 
+			training_reward = -1000 #rejet de cette action 
+		else: 
+			training_reward = 1
+
+		state_ = game.getGameState() #s'
+		s_ = discrete_state(state_)#s' discrete
+		action_ = epsilon_greedy(Q,s_,epsilon, state_) #In s, choose a (GLIE actor)
+		#added s' to help the action choice in obvious situation
+
+		if s_ not in Q.keys():
+			Q[s_] = [0.0,0.0]
+		
+		delta = (training_reward + gamma * np.max(Q[s_][:]) - Q[s][action]) #Temporal difference: δ=r+γmaxa′Q(s′,a′)−Q(s,a)
+		Q[s][action]=Q[s][action] + alpha *delta #Update Q: Q(s,a)←Q(s,a)+αδ
+		
+		s = s_ #s←s′
+		action =action_
+
+		score +=reward
+		score_100+=reward
+
+with open('Qql', 'wb') as f:
+	cPickle.dump(Q,f) 
+