-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlesson_1_code.py
124 lines (95 loc) · 2.88 KB
/
lesson_1_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os, sys, numpy
module_path = os.path.abspath(os.path.join('../tools'))
if module_path not in sys.path: sys.path.append(module_path)
from DangerousGridWorld import GridWorld
def random_dangerous_grid_world( environment ):
"""
Performs a random trajectory on the given Dangerous Grid World environment
Args:
environment: OpenAI Gym environment
Returns:
trajectory: an array containing the sequence of states visited by the agent
"""
trajectory = []
#
# YOUR CODE HERE!
#
for step in range(10):
#
# YOUR CODE HERE!
#
if False: break # <- Hint: check if the state is terminal
return trajectory
class RecyclingRobot():
"""
Class that implements the environment Recycling Robot of the book: 'Reinforcement
Learning: an introduction, Sutton & Barto'. Example 3.3 page 52 (second edition).
Attributes
----------
observation_space : int
define the number of possible actions of the environment
action_space: int
define the number of possible states of the environment
actions: dict
a dictionary that translate the 'action code' in human languages
states: dict
a dictionary that translate the 'state code' in human languages
Methods
-------
reset( self )
method that reset the environment to an initial state; returns the state
step( self, action )
method that perform the action given in input, computes the next state and the reward; returns
next_state and reward
render( self )
method that print the internal state of the environment
"""
def __init__( self ):
# Loading the default parameters
self.alfa = 0.7
self.beta = 0.7
self.r_search = 0.5
self.r_wait = 0.2
# Defining the environment variables
self.observation_space = None
self.action_space = None
self.actions = None
self.states = None
def reset( self ):
#
# YOUR CODE HERE!
#
return self.state
def step( self, action ):
reward = 0
#
# YOUR CODE HERE!
#
return self.state, reward, False, None
def render( self ):
#
# YOUR CODE HERE!
#
return True
def main():
print( "\n************************************************" )
print( "* Welcome to the first lesson of the RL-Lab! *" )
print( "* (MDP and Environments) *" )
print( "************************************************" )
print( "\nA) Random Policy on Dangerous Grid World:" )
env = GridWorld()
env.render()
random_trajectory = random_dangerous_grid_world( env )
print( "\nRandom trajectory generated:", random_trajectory )
print( "\nB) Custom Environment: Recycling Robot" )
env = RecyclingRobot()
state = env.reset()
ep_reward = 0
for step in range(10):
a = numpy.random.randint( 0, env.action_space )
new_state, r, _, _ = env.step( a )
ep_reward += r
print( f"\tFrom state '{env.states[state]}' selected action '{env.actions[a]}': \t total reward: {ep_reward:1.1f}" )
state = new_state
if __name__ == "__main__":
main()