-
Notifications
You must be signed in to change notification settings - Fork 1
/
quadrotor_wrapper.py
74 lines (64 loc) · 3.14 KB
/
quadrotor_wrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gym
import numpy as np
'''
Observation wrapper is made according to this paper:
https://arxiv.org/abs/1707.05110
18-element in observation space:
- 3 relative linear position ( Target linear position - Drone's linear position)
- 3 linear velocity
- 9 element of rotation matrix
- 3 angular velocity
'''
class QuadrotorObsWrapper(gym.ObservationWrapper):
def __init__(self,env):
super(QuadrotorObsWrapper,self).__init__(env)
obs_high = np.array([
np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max,
np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max,
np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max,
np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max,
np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max,
np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max])
obs_low = np.array([
np.finfo(np.float32).min, np.finfo(np.float32).min, np.finfo(np.float32).min,
np.finfo(np.float32).min, np.finfo(np.float32).min, np.finfo(np.float32).min,
np.finfo(np.float32).min, np.finfo(np.float32).min, np.finfo(np.float32).min,
np.finfo(np.float32).min, np.finfo(np.float32).min, np.finfo(np.float32).min,
np.finfo(np.float32).min, np.finfo(np.float32).min, np.finfo(np.float32).min,
np.finfo(np.float32).min, np.finfo(np.float32).min, np.finfo(np.float32).min])
self.observation_space = gym.spaces.Box(low=obs_low, high=obs_high, dtype=np.float32)
def observation(self,observation):
obs = np.zeros(18)
# Relative linear postion
obs[0:3] = (self.target - self.state[0:3])
# Linear velocity
obs[3:6] = self.state[3:6]
# 9-element of rotation matrix
ct = np.cos(self.state[6])
cp = np.cos(self.state[7])
cg = np.cos(self.state[8])
st = np.sin(self.state[6])
sp = np.sin(self.state[7])
sg = np.sin(self.state[8])
R_x = np.array([[1, 0, 0], [0, ct, -st], [0, st, ct]])
R_y = np.array([[cp, 0, sp], [0, 1, 0], [-sp, 0, cp]])
R_z = np.array([[cg, -sg, 0], [sg, cg, 0], [0, 0, 1]])
R = np.dot(R_z, np.dot(R_y, R_x))
obs[6:15] = R.ravel()
# Angular velocity
obs[15:18] = self.state[9:12]
return obs
'''
Reward wrapper is made according to this paper:
https://arxiv.org/abs/1707.05110
'''
class QuadrotorRewardWrapper(gym.RewardWrapper):
def __init__(self,env):
super(QuadrotorRewardWrapper, self).__init__(env)
def reward(self,reward):
distance_cost = 4e-3*np.linalg.norm(self.target - self.state[0:3])
velocity_cost = 5e-4*np.linalg.norm(self.state[3:6])
pose_cost = 2e-4*np.linalg.norm(self.state[6:9])
angular_vel_cost = 3e-4*np.linalg.norm(self.state[9:12])
total_cost = distance_cost + velocity_cost + pose_cost + angular_vel_cost
return -total_cost