forked from sunghoonhong/AirsimDRL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrandomly.py
87 lines (70 loc) · 2.14 KB
/
randomly.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
'''
Author: Sunghoon Hong
Title: randomly.py
Description:
Random Agent for Airsim environment
'''
import time
import csv
import math
import argparse
import numpy as np
from airsim_env import Env
class RandomAgentDiscrete(object):
def __init__(self, action_size):
self.action_size = action_size
def get_action(self):
action = np.random.choice(self.action_size)
return action
class RandomAgentContinuous(object):
def __init__(self, action_size):
self.action_size = action_size
def get_action(self):
action = np.random.uniform(-2, 2, self.action_size)
return action
def interpret_action(action):
scaling_factor = 0.5
if action == 0:
quad_offset = (0, 0, 0)
elif action == 1:
quad_offset = (scaling_factor, 0, 0)
elif action == 2:
quad_offset = (0, scaling_factor, 0)
elif action == 3:
quad_offset = (0, 0, scaling_factor)
elif action == 4:
quad_offset = (-scaling_factor, 0, 0)
elif action == 5:
quad_offset = (0, -scaling_factor, 0)
elif action == 6:
quad_offset = (0, 0, -scaling_factor)
return quad_offset
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--verbose', action='store_true')
parser.add_argument('--continuous', action='store_true')
args = parser.parse_args()
if args.continuous:
agent = RandomAgentContinuous(3)
else:
agent = RandomAgentDiscrete(7)
env = Env()
episode = 0
while True:
done = False
timestep = 0
score = 0
_ = env.reset()
while not done:
timestep += 1
action = agent.get_action()
if not args.continuous:
action = interpret_action(action)
_, reward, done, info = env.step(action)
score += reward
# stack history here
if args.verbose:
print('Step %d Action %s Reward %.2f Info %s:' % (timestep, action, reward, info))
# done
print('Ep %d: Step %d Score %.3f' % (episode, timestep, score))
episode += 1