-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathArm2DEnv.py
183 lines (147 loc) · 6.92 KB
/
Arm2DEnv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#%%
import numpy as np
import torch
import scipy.integrate as itg
import gym
from utils import ArmDynamicsFun, Jacobian, Jacobian_dot, Hand2Joint, Joint2Hand, dist_from_straight, rand_targ_circle, fibonacci_samples
from arm_params import *
#%%
# arm movement constraints :
# The Human Arm Kinematics and Dynamics
# During Daily Activities – Toward a 7 DOF
# Upper Limb Powered Exoskeleton
_torque_thres = 25
arm_cnstr = {
'shoulder':{
'UB_U': 1*_torque_thres,
'LB_U': -1*_torque_thres,
'UB_X': np.deg2rad(135),
'LB_X': np.deg2rad(-60)
},
'elbow':{
'UB_U': 1*_torque_thres,
'LB_U': -1*_torque_thres,
'UB_X': np.deg2rad(175),
'LB_X': np.deg2rad(0)
}
}
# arm environment gym class
class ArmModel(gym.Env):
def __init__(self):
# arm biophysical constraints
self.arm_cnstr = arm_cnstr
# center of the workspace, initial position of the arm for experiments
self.wsapce_center = np.array([-0.15, 0.30])
# workspace: a [0.5x0.35] rectangle on the center
_ws_low = self.wsapce_center + np.array([-0.15, -0.0])
_ws_high = self.wsapce_center + np.array([0.15, 0.15])
self.wspace = gym.spaces.Box(
low = _ws_low,
high = _ws_high
)
# equidistant random points that covers workspace about the
self.fibo_ws = fibonacci_samples(nb_samples=100, center=self.wsapce_center, ws_high=_ws_high, ws_low=_ws_low)
self.mode = 'train' # 'train' or 'eval'
_joint_high = np.array([self.arm_cnstr['shoulder']['LB_X'], self.arm_cnstr['elbow']['LB_X']])
_joint_low = np.array([self.arm_cnstr['shoulder']['UB_X'], self.arm_cnstr['elbow']['UB_X']])
_joint_vel_thresh = np.array([10.]*2)
_hand_vel_thresh = np.array([1.]*2)
_loc_high = np.concatenate(([self.wspace.high.max()], [self.wspace.high.max()]))
_loc_low = np.concatenate(([self.wspace.high.min()], [self.wspace.high.min()]))
self.observation_space = gym.spaces.Box(
low = np.concatenate((_joint_low, -1*_joint_vel_thresh, _loc_low, -1*_hand_vel_thresh, _loc_low, _loc_low, [0.])),
high = np.concatenate((_joint_high, +1*_joint_vel_thresh, _loc_high, +1*_hand_vel_thresh, _loc_high, _loc_high, [1.])),
dtype=np.float32
)
self.action_space = gym.spaces.Box(
low = np.array([self.arm_cnstr['shoulder']['LB_U'], self.arm_cnstr['elbow']['LB_U']]),
high = np.array([self.arm_cnstr['shoulder']['UB_U'], self.arm_cnstr['elbow']['UB_U']])
)
self.dt = dt
self.metadata = {'render.modes': []}
self.flag_reached = False
self.state = None
self.VISION = None
self.obs = None
self.iter = 0
def set_origin(self, position):
self.origin_hand = np.array([position[0], position[1]]) # initially set the origin to the center of the workspace
self.origin_joint = np.concatenate((Hand2Joint(self.origin_hand, 'pos'), 0.0, 0.0), axis=None)
def set_target(self, position):
# target position in hand space, assuming target velocity is always zero
# [x y xd yd]
self.target_hand = np.array([position[0], position[1], 0.0, 0.0])
# [q1 q2 q1d q2d]
self.target_joint = np.concatenate((Hand2Joint(self.target_hand, 'pos'), 0.0, 0.0), axis=None)
def is_feasible(self, X, U):
# making sure arm's constrains are met
q1_feas = X[0]>=self.arm_cnstr['shoulder']['LB_X'] and X[0] <= self.arm_cnstr['shoulder']['UB_X']
q2_feas = X[1]>=self.arm_cnstr['elbow']['LB_X'] and X[1] <= self.arm_cnstr['elbow']['UB_X']
u1_feas = U[0]>=self.arm_cnstr['shoulder']['LB_U'] and U[0] <= self.arm_cnstr['shoulder']['UB_U']
u2_feas = U[1]>=self.arm_cnstr['elbow']['LB_U'] and U[1] <= self.arm_cnstr['elbow']['UB_U']
return q1_feas and q2_feas and u1_feas and u2_feas
def ArmDynamics(self,t,X,U):
dX_dt = np.array(ArmDynamicsFun(X,U)).squeeze()
return dX_dt
def cost(self, X_joint, U):
# X: [q1, q2, q1d, q2e]
# convert to hand space
X_hand = Joint2Hand(X_joint, 'lower', 'pos', 'vel')
X_t_hand = self.target_hand
X_t_joint = self.target_joint
reward = 0
eps = 0.005 + 0.05/(self.iter+1)**0.8 #**0.8 # shrink the epsilon circle while iterating timesteps
lmbd = 0.5
dist_p = np.linalg.norm((X_hand[:2]-X_t_hand[:2]), ord=2) # position
dist_o = np.linalg.norm((X_joint[:2]-X_t_joint[:2]), ord=2) # orientation
dist = lmbd*dist_p + (1-lmbd)*dist_o
if dist_p > eps and dist_o > eps:
reward += -dist
else:
reward += 1
self.flag_reached = True
return reward
def step_from_state(self,state,U):
done = not self.is_feasible(state,U)
c = self.cost(state,U)
info = {}
if done:
c = -5
return state,c,done,info
res = itg.solve_ivp(self.ArmDynamics,(0,dt),state,args=(U,))
state_next = res.y[:,-1]
done = not self.is_feasible(state_next,U)
if done:
c = -5
done = done or self.flag_reached
return state_next,c,done,info
def step(self,U):
# obs: [states(q1, q2, q1d, q2d), (hand_x, hand_y, hand_xd, handyd)], goal_x, goal_y, reached_goal
self.state = self.obs[0:4] # q1, q2, q1d, q2d
state_next,c,done,info = self.step_from_state(self.state,U)
self.state = np.copy(state_next)
self.VISION = np.concatenate((self.state, Joint2Hand(state_next, 'lower', 'pos', 'vel')))
self.obs = np.concatenate((self.VISION, self.target_hand[0:2], self.origin_hand, [1. if self.flag_reached else 0.]))
obs_next = np.copy(self.obs)
self.iter += 1
return obs_next,c,done,info
def reset(self):
self.flag_reached = False
self.iter = 0
# target random around ws center:
#rand_origin = self.wsapce_center
#self.set_origin(rand_origin)
#rand_targ = self.wsapce_center+rand_targ_circle(0.1) # random target about the center of the workspace
#self.set_target(rand_targ)
# fibonacci start and end position:
if self.mode == 'train':
origin_idx, targ_idx = np.random.choice(self.fibo_ws.shape[0], 2, replace = False)
rand_origin = self.fibo_ws[origin_idx,:]
self.set_origin(rand_origin)
rand_targ = self.fibo_ws[targ_idx,:] # random target about the center of the workspace
self.set_target(rand_targ)
self.state = Hand2Joint(np.array([self.origin_hand[0], self.origin_hand[1], 0.0, 0.0]), 'pos', 'vel')
self.VISION = np.concatenate((self.state, Joint2Hand(self.state, 'lower', 'pos', 'vel')))
self.obs = np.concatenate((self.VISION, self.target_hand[0:2], self.origin_hand, [1. if self.flag_reached else 0.]))
return np.copy(self.obs)
#%%