-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcart_pole_more_details.py
46 lines (41 loc) · 1.29 KB
/
cart_pole_more_details.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gym
env = gym.make('CartPole-v0')
for i_episode in range(20):
observation = env.reset()
for t in range(100):
env.render()
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
print('observation={0}'.format(observation))
print('action={0}'.format(action))
print('reward={0}'.format(reward))
print('done={0}\n'.format(done))
if done:
print("Episode finished after {} timesteps".format(t+1))
break
'''
observation=[ 0.04262979 -0.18194424 -0.13940168 -0.35241294]
action=1
reward=1.0
done=False
observation=[ 0.0389909 -0.3748371 -0.14644994 -0.10673193]
action=0
reward=1.0
done=False
observation=[ 0.03149416 -0.56758993 -0.14858458 0.13639976]
action=0
reward=1.0
done=False
observation=[ 0.02014236 -0.37068671 -0.14585658 -0.19922403]
action=1
reward=1.0
done=False
'''
'''
https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
check source code for details below
* observation: state, is x, x_dot, theta, theta_dot
* action: force = self.force_mag if action==1 else -self.force_mag, action is 0/1
* reward: if not done: reward = 1.0, else reward=0
the goal is always to increase your total reward
'''