-
Notifications
You must be signed in to change notification settings - Fork 19
/
sawyer_grasp_env_boundingbox.py
307 lines (269 loc) · 16.7 KB
/
sawyer_grasp_env_boundingbox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
"""
The environment of Sawyer Arm + Baxter Gripper for graping object.
With a bounding box of the arange that the gripper cannot move outside.
"""
from os.path import dirname, join, abspath
from pyrep import PyRep
from pyrep.robots.arms.sawyer import Sawyer
from pyrep.robots.end_effectors.baxter_gripper import BaxterGripper
from pyrep.objects.proximity_sensor import ProximitySensor
from pyrep.objects.vision_sensor import VisionSensor
from pyrep.objects.shape import Shape
from pyrep.objects.dummy import Dummy
from pyrep.const import JointType, JointMode
import numpy as np
import matplotlib.pyplot as plt
import math
POS_MIN, POS_MAX = [0.1, -0.3, 1.], [0.45, 0.3, 1.] # valid position range of target object
class GraspEnv(object):
''' Sawyer robot grasping a cuboid '''
def __init__(self, headless, control_mode='joint_velocity'):
'''
parameters:
:headless: bool, if True, no visualization, else with visualization.
:control mode: str, 'end_position' or 'joint_velocity'.
'''
# set public variables
self.headless = headless # if headless is True, no visualization
self.reward_offset = 10.0 # reward of achieving the grasping object
self.reward_range = self.reward_offset # reward range for register gym env when using vectorized env wrapper
self.penalty_offset = 1. # penalty value for undesired cases
self.fall_down_offset = 0.1 # distance for judging the target object fall off the table
self.metadata=[] # gym env argument
self.control_mode = control_mode # the control mode of robotic arm: 'end_position' or 'joint_velocity'
# launch and set up the scene, and set the proxy variables in represent of the counterparts in the scene
self.pr = PyRep() # call the PyRep
if control_mode == 'end_position': # need to use different scene, the one with all joints in inverse kinematics mode
SCENE_FILE = join(dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new_ik.ttt') # scene with joints controlled by ik (inverse kinematics)
elif control_mode == 'joint_velocity': # the scene with all joints in force/torch mode for forward kinematics
SCENE_FILE = join(dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new.ttt') # scene with joints controlled by forward kinematics
self.pr.launch(SCENE_FILE, headless=headless) # lunch the scene, headless means no visualization
self.pr.start() # start the scene
self.agent = Sawyer() # get the robot arm in the scene
self.gripper = BaxterGripper() # get the gripper in the scene
self.gripper_left_pad = Shape('BaxterGripper_leftPad') # the left pad on the gripper finger
self.proximity_sensor = ProximitySensor('BaxterGripper_attachProxSensor') # need the name of the sensor here
self.vision_sensor = VisionSensor('Vision_sensor') # need the name of the sensor here
self.table = Shape('diningTable') # the table in the scene for checking collision
if control_mode == 'end_position': # control the robot arm by the position of its end using inverse kinematics
self.agent.set_control_loop_enabled(True) # if false, inverse kinematics won't work
self.action_space = np.zeros(4) # 3 DOF end position control + 1 rotation of gripper
elif control_mode == 'joint_velocity': # control the robot arm by directly setting velocity values on each joint, using forward kinematics
self.agent.set_control_loop_enabled(False)
self.action_space = np.zeros(7) # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it.
else:
raise NotImplementedError
self.observation_space = np.zeros(17) # position and velocity of 7 joints + position of the target
self.agent.set_motor_locked_at_zero_velocity(True)
self.target = Shape('target') # get the target object
self.agent_ee_tip = self.agent.get_tip() # a part of robot as the end of inverse kinematics chain for controlling
self.tip_target = Dummy('Sawyer_target') # the target point of the tip (end of the robot arm) to move towards
self.tip_pos = self.agent_ee_tip.get_position() # tip x,y,z position
# set a proper initial robot gesture or tip position
if control_mode == 'end_position':
initial_pos = [0.3, 0.1, 0.9]
self.tip_target.set_position(initial_pos)
# one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously
self.tip_target.set_orientation([0,np.pi,np.pi/2], reset_dynamics=True) # first two dimensions along x and y axis make gripper face downwards
elif control_mode == 'joint_velocity':
self.initial_joint_positions = [0.001815199851989746, -1.4224984645843506, \
0.704303503036499, 2.54307222366333, 2.972468852996826, -0.4989511966705322, 4.105560302734375] # a proper initial gesture
self.agent.set_joint_positions(self.initial_joint_positions)
self.pr.step()
self.initial_tip_positions = self.agent_ee_tip.get_position()
self.initial_target_positions = self.target.get_position()
def _get_state(self):
'''
Return state containing arm joint positions/velocities & target position.
'''
return np.array(self.agent.get_joint_positions() + # list, dim=7
self.agent.get_joint_velocities() + # list, dim=7
self.target.get_position()) # list, dim=3
def _is_holding(self):
'''
Return the state of holding the target or not, return bool.
'''
# Note that the collision check is not always accurate all the time,
# for continuous collision frames, maybe only the first 4-5 frames of collision can be detected.
pad_collide_object = self.gripper_left_pad.check_collision(self.target)
if pad_collide_object and self.proximity_sensor.is_detected(self.target)==True:
return True
else:
return False
def _move(self, action, bounding_offset=0.15, step_factor=0.2, max_itr=20, max_error=0.05, rotation_norm =5.):
'''
Move the end effector on robot arm according to the action with inverse kinematics for 'end_position' control mode;
Inverse kinematics mode control is achieved through setting the tip target instead of using .solve_ik(),
because sometimes the .solve_ik() does not function correctly.
Mode: a close-loop proportional control, using ik.
parameters:
:bounding_offset: offset of bounding box outside the valid target position range, as valid and safe range of action
:step_factor: small step factor mulitplied on the difference of current and desired position, i.e. proportional factor
:max_itr: maximum moving iterations
:max_error: upper bound of distance error for movement at each call
:rotation_norm: factor for normalization of rotation values, since the action are of the same scale for each dimension
'''
pos=self.gripper.get_position()
# check if state+action will be within of the bounding box, if so, move normally; otherwise the action is not conducted.
# i.e. x_min < x < x_max and y_min < y < y_max and z > z_min
if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset \
and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+2*bounding_offset \
and pos[2]+action[2] > POS_MIN[2]-2*bounding_offset: # larger offset in z axis
# there is a mismatch between the object set_orientation() and get_orientation():
# the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation().
ori_z=-self.agent_ee_tip.get_orientation()[2] # the minus is because the mismatch between the set_orientation() and get_orientation()
target_pos = np.array(self.agent_ee_tip.get_position())+np.array(action[:3])
diff=1 # intialization
itr=0
while np.sum(np.abs(diff))>max_error and itr<max_itr:
itr+=1
# set pos in small step
cur_pos = self.agent_ee_tip.get_position()
diff=target_pos-cur_pos # difference of current and target position, close-loop control
pos = cur_pos+step_factor*diff # step small step according to current difference, to prevent that ik cannot be solved
self.tip_target.set_position(pos.tolist())
self.pr.step() # every time when setting target tip, need to call simulation step to achieve it
# one big step for z-rotation is enough, but small error still exists due to the ik solver
ori_z+=rotation_norm*action[3] # normalize the rotation values, as usually same action range is used in policy for both rotation and position
self.tip_target.set_orientation([0, np.pi, ori_z]) # make gripper face downwards and rotate ori_z along z axis
self.pr.step()
else:
print("Potential Movement Out of the Bounding Box!")
pass # no action if potentially moving out of the bounding box
def reinit(self):
'''
Reinitialize the environment, e.g. when the gripper is broken during exploration.
'''
self.shutdown() # shutdown the original env first
self.__init__(self.headless) # initialize with the same headless mode
def reset(self, random_target=False):
'''
Get a random position within a cuboid and set the target position.
'''
# set target object
if random_target: # randomize
pos = list(np.random.uniform(POS_MIN, POS_MAX)) # sample from uniform in valid range
self.target.set_position(pos) # random position
else: # non-randomize
self.target.set_position(self.initial_target_positions) # fixed position
self.target.set_orientation([0,0,0])
self.pr.step()
# set end position to be initialized
if self.control_mode == 'end_position': # JointMode.IK
self.agent.set_control_loop_enabled(True) # ik mode
self.tip_target.set_position(self.initial_tip_positions) # cannot set joint positions directly due to in ik mode or force/torch mode
self.pr.step()
# prevent stuck cases. as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly, therefore taking
# some random action when desired position is not reached.
itr=0
max_itr=10
while np.sum(np.abs(np.array(self.agent_ee_tip.get_position()-np.array(self.initial_tip_positions))))>0.1 and itr<max_itr:
itr+=1
self.step(np.random.uniform(-0.2,0.2,4)) # take random actions for preventing the stuck cases
self.pr.step()
elif self.control_mode == 'joint_velocity': # JointMode.FORCE
self.agent.set_joint_positions(self.initial_joint_positions)
self.pr.step()
# set collidable, for collision detection
self.gripper_left_pad.set_collidable(True) # set the pad on the gripper to be collidable, so as to check collision
self.target.set_collidable(True)
# open the gripper if it's not fully open
if np.sum(self.gripper.get_open_amount())<1.5:
self.gripper.actuate(1, velocity=0.5)
self.pr.step()
return self._get_state() # return current state of the environment
def step(self, action):
'''
Move the robot arm according to the action.
If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim rotation of gripper;
if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim rotation of gripper;
'''
# initialization
done=False # episode finishes
reward=0
hold_flag=False # holding the object or not
if self.control_mode == 'end_position':
if action is None or action.shape[0]!=4: # check if action is valid
print('No actions or wrong action dimensions!')
action = list(np.random.uniform(-0.1, 0.1, 4)) # random
self._move(action)
elif self.control_mode == 'joint_velocity':
if action is None or action.shape[0]!=7: # check if action is valid
print('No actions or wrong action dimensions!')
action = list(np.random.uniform(-0.1, 0.1, 7)) # random
self.agent.set_joint_target_velocities(action) # Execute action on arm
self.pr.step()
else:
raise NotImplementedError
ax, ay, az = self.gripper.get_position()
if math.isnan(ax): # capture the broken gripper cases during exploration
print('Gripper position is nan.')
self.reinit()
done=True
tx, ty, tz = self.target.get_position()
offset=0.08 # augmented reward: offset of target position above the target object
sqr_distance = (ax - tx) ** 2 + (ay - ty) ** 2 + (az - (tz+offset)) ** 2 # squared distance between the gripper and the target object
''' for visual-based control only, large time consumption! '''
# current_vision = self.vision_sensor.capture_rgb() # capture a screenshot of the view with vision sensor
# plt.imshow(current_vision)
# plt.savefig('./img/vision.png')
# close the gripper if close enough to the object and the object is detected with the proximity sensor
if sqr_distance<0.1 and self.proximity_sensor.is_detected(self.target)== True:
# make sure the gripper is open before grasping
self.gripper.actuate(1, velocity=0.5)
self.pr.step()
self.gripper.actuate(0, velocity=0.5) # if done, close the hand, 0 for close and 1 for open; velocity 0.5 ensures the gripper to close with in one frame
self.pr.step() # Step the physics simulation
if self._is_holding():
reward += self.reward_offset # extra reward for grasping the object
done=True
hold_flag = True
else:
self.gripper.actuate(1, velocity=0.5)
self.pr.step()
elif np.sum(self.gripper.get_open_amount())<1.5: # if gripper is closed (not fully open) due to collision or esle, open it; get_open_amount() return list of gripper joint values
self.gripper.actuate(1, velocity=0.5)
self.pr.step()
else:
pass
# the base reward is negative distance to target
reward -= np.sqrt(sqr_distance)
# case when the object fall off the table
if tz < self.initial_target_positions[2]-self.fall_down_offset:
done = True
reward = -self.reward_offset
# Augmented reward for orientation: better grasping gesture if the gripper has vertical orientation to the target object.
# Note: the frame of gripper has a difference of pi/2 in z orientation as the frame of target.
desired_orientation = np.concatenate(([np.pi, 0], [self.target.get_orientation()[2]])) # gripper vertical to target in z and facing downwards,
rotation_penalty = -np.sum(np.abs(np.array(self.agent_ee_tip.get_orientation())-desired_orientation))
rotation_norm = 0.02
reward += rotation_norm*rotation_penalty
# Penalty for collision with the table
if self.gripper_left_pad.check_collision(self.table):
reward -= self.penalty_offset
#print('Penalize collision with table.')
if math.isnan(reward): # capture the cases of numerical problem
reward = 0.
return self._get_state(), reward, done, {'finished': hold_flag}
def shutdown(self):
''' Close the simulator '''
self.pr.stop()
self.pr.shutdown()
if __name__ == '__main__':
CONTROL_MODE='joint_velocity' # 'end_position' or 'joint_velocity'
env=GraspEnv(headless=False, control_mode=CONTROL_MODE)
for eps in range(30):
env.reset()
for step in range(30):
if CONTROL_MODE=='end_position':
action=np.random.uniform(-0.2,0.2,4) # 4 dim control for 'end_position': 3 positions and 1 rotation (z-axis)
elif CONTROL_MODE=='joint_velocity':
action=np.random.uniform(-2.,2.,7)
else:
raise NotImplementedError
try:
env.step(action)
except KeyboardInterrupt:
print('Shut Down!')
env.shutdown()
env.shutdown()