Skip to content

Commit

Permalink
Merge pull request #3 from JeanElsner/jean/dev/env
Browse files Browse the repository at this point in the history
Refactor environment workflow
  • Loading branch information
JeanElsner authored Nov 17, 2023
2 parents 48e3a7b + 17fd026 commit f0f3fb5
Show file tree
Hide file tree
Showing 17 changed files with 447 additions and 318 deletions.
21 changes: 21 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"editor.tabSize": 2,

"pylint.args": [
"--rcfile=.pylintrc"
],

"yapf.args": [
"--style",
"{based_on_style: google, indent_width: 2}"
],

"[python]": {
"editor.formatOnSaveMode": "file",
"editor.formatOnSave": true,
"editor.defaultFormatter": "eeyore.yapf",
"editor.codeActionsOnSave": {
"source.organizeImports": true
},
},
}
2 changes: 1 addition & 1 deletion .vscode/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
{
"label": "doc-api",
"type": "shell",
"command": "touch src/dm_robotics/__init__.py && sphinx-apidoc -d 1 -M -T -f -o doc src/dm_robotics && rm src/dm_robotics/__init__.py && rm doc/dm_robotics.rst"
"command": "touch src/dm_robotics/__init__.py && sphinx-apidoc -e -d 1 -M -T -f -o doc src/dm_robotics && rm src/dm_robotics/__init__.py && rm doc/dm_robotics.rst"
}
]
}
1 change: 1 addition & 0 deletions examples/assets/two_arm.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
friction='0.4'/>
<light directional='true' diffuse='.7 .7 .7' pos='1 .1 2' dir='0 -.1 -2' specular='.3 .3 .3' castshadow='true'/>
<body pos="0 0 1" euler="0 0 30">
<joint type="hinge" axis="0 0 1" damping="2000" />
<geom type="box" size=".15 .2 .15" />
<geom type="cylinder" size=".1 .3" />
<geom type="sphere" size=".2" pos="0 0 .4" />
Expand Down
10 changes: 5 additions & 5 deletions examples/gripper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import numpy as np
from dm_env import specs

from dm_robotics.panda import env_builder
from dm_robotics.panda import environment
from dm_robotics.panda import parameters as params
from dm_robotics.panda import run_loop, utils

Expand Down Expand Up @@ -48,12 +48,12 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
parser = utils.default_arg_parser()
args = parser.parse_args()

# The Panda model and environment support many customization
# parameters. Here we use only the defaults and robot IP if provided.
# Use RobotParams to customize Panda robots added to the environment.
# We use robot_ip for hardware-in-the-loop operation.
robot_params = params.RobotParams(robot_ip=args.robot_ip)
panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params)
panda_env = environment.PandaEnvironment(robot_params)

with panda_env_builder.build_task_environment() as env:
with panda_env.build_task_environment() as env:
# Print the full action, observation and reward specification.
utils.full_spec(env)
# Initialize the agent.
Expand Down
14 changes: 5 additions & 9 deletions examples/haptics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

import dm_env
import numpy as np
from dm_control import mjcf
from dm_control import composer, mjcf
from dm_env import specs

from dm_robotics.panda import arm_constants, env_builder
from dm_robotics.panda import arm_constants, environment
from dm_robotics.panda import parameters as params
from dm_robotics.panda import utils

Expand Down Expand Up @@ -41,20 +41,16 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:

# Load environment from an MJCF file.
XML_PATH = os.path.join(os.path.dirname(__file__), 'assets', 'haptics.xml')
arena = mjcf.from_file(XML_PATH)
arena = composer.Arena(xml_path=XML_PATH)

# Robot parameters include the robot's IP for HIL,
# haptic actuation mode and joint damping.
robot_params = params.RobotParams(robot_ip=args.robot_ip,
actuation=arm_constants.Actuation.HAPTIC,
joint_damping=np.zeros(7))
# Environment parameters consist of the MJCF element defined above
# as arena and a higher control timestep for smooth interaction.
env_params = params.EnvirontmentParameters(arena=arena, control_timestep=0.01)
panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params,
env_params=env_params)
panda_env = environment.PandaEnvironment(robot_params, arena, 0.01)

with panda_env_builder.build_task_environment() as env:
with panda_env.build_task_environment() as env:
# Print the full action, observation and reward specification.
utils.full_spec(env)
# Initialize the agent.
Expand Down
9 changes: 4 additions & 5 deletions examples/minimal_working_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
from dm_env import specs

from dm_robotics.panda import env_builder
from dm_robotics.panda import environment
from dm_robotics.panda import parameters as params
from dm_robotics.panda import run_loop, utils

Expand All @@ -30,12 +30,11 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
parser = utils.default_arg_parser()
args = parser.parse_args()

# The MoMa model and environment support many customization
# parameters. Here we use only the defaults.
# Use RobotParams to customize Panda robots added to the environment.
robot_params = params.RobotParams(robot_ip=args.robot_ip)
panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params)
panda_env = environment.PandaEnvironment(robot_params)

with panda_env_builder.build_task_environment() as env:
with panda_env.build_task_environment() as env:
# Print the full action, observation and reward specification
utils.full_spec(env)
# Initialize the agent
Expand Down
10 changes: 5 additions & 5 deletions examples/motion_cartesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
from dm_env import specs

from dm_robotics.panda import env_builder
from dm_robotics.panda import environment
from dm_robotics.panda import parameters as params
from dm_robotics.panda import run_loop, utils

Expand Down Expand Up @@ -46,12 +46,12 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
parser = utils.default_arg_parser()
args = parser.parse_args()

# The Panda model and environment support many customization
# parameters. Here we use only the defaults and robot IP if provided.
# Use RobotParams to customize Panda robots added to the environment.
# We use robot_ip for hardware-in-the-loop operation.
robot_params = params.RobotParams(robot_ip=args.robot_ip)
panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params)
panda_env = environment.PandaEnvironment(robot_params)

with panda_env_builder.build_task_environment() as env:
with panda_env.build_task_environment() as env:
# Print the full action, observation and reward specification.
utils.full_spec(env)
# Initialize the agent.
Expand Down
10 changes: 5 additions & 5 deletions examples/motion_joint.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
from dm_env import specs

from dm_robotics.panda import arm_constants, env_builder
from dm_robotics.panda import arm_constants, environment
from dm_robotics.panda import parameters as params
from dm_robotics.panda import run_loop, utils

Expand Down Expand Up @@ -36,14 +36,14 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
parser = utils.default_arg_parser()
args = parser.parse_args()

# The Panda model and environment support many customization
# parameters. Here we use only the robot IP if provided and set
# Use RobotParams to customize Panda robots added to the environment.
# We use robot_ip for hardware-in-the-loop operation and set
# the actuation mode to joint velocities.
robot_params = params.RobotParams(
robot_ip=args.robot_ip, actuation=arm_constants.Actuation.JOINT_VELOCITY)
panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params)
panda_env = environment.PandaEnvironment(robot_params)

with panda_env_builder.build_task_environment() as env:
with panda_env.build_task_environment() as env:
# Print the full action, observation and reward specification.
utils.full_spec(env)
# Initialize the agent.
Expand Down
7 changes: 3 additions & 4 deletions examples/multiple_robots.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
from dm_env import specs

from dm_robotics.panda import env_builder
from dm_robotics.panda import environment
from dm_robotics.panda import parameters as params
from dm_robotics.panda import run_loop, utils

Expand Down Expand Up @@ -38,10 +38,9 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
pose=[.5, -.5, 0, 0, 0, np.pi * 3 / 4])
robot_3 = params.RobotParams(name='robot_3',
pose=[.5, .5, 0, 0, 0, np.pi * 5 / 4])
panda_env_builder = env_builder.PandaEnvironmentBuilder(
[robot_1, robot_2, robot_3])
panda_env = environment.PandaEnvironment([robot_1, robot_2, robot_3])

with panda_env_builder.build_task_environment() as env:
with panda_env.build_task_environment() as env:
# Print the full action, observation and reward specification
utils.full_spec(env)
# Initialize the agent
Expand Down
92 changes: 92 additions & 0 deletions examples/rewards_and_observations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
""" Minimal working example of the dm_robotics Panda model. """
import typing

import dm_env
import numpy as np
from dm_control import composer, mjcf
from dm_env import specs
from dm_robotics.agentflow import spec_utils
from dm_robotics.agentflow.preprocessors import (observation_transforms,
rewards, timestep_preprocessor)
from dm_robotics.geometry import pose_distribution
from dm_robotics.moma import entity_initializer, prop, robot, sensor
from dm_robotics.moma.models.arenas import empty
from dm_robotics.moma.sensors import prop_pose_sensor

from dm_robotics.panda import environment
from dm_robotics.panda import parameters as params
from dm_robotics.panda import run_loop, utils


class Agent:
""" Agents are used to control a robot's actions given
current observations and rewards. This agent does nothing.
"""

def __init__(self, spec: specs.BoundedArray) -> None:
self._spec = spec

def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
""" Provides robot actions every control-timestep. """
del timestep # not used
action = np.zeros(shape=self._spec.shape, dtype=self._spec.dtype)
return action


if __name__ == '__main__':
# We initialize the default configuration for logging
# and argument parsing. These steps are optional.
utils.init_logging()
parser = utils.default_arg_parser()
args = parser.parse_args()

class Ball(prop.Prop):

def _build(self, *args, **kwargs):
mjcf_root = mjcf.RootElement()
body = mjcf_root.worldbody.add('body', name='prop_root')
body.add('geom', type='sphere', size=[0.05], solref=[0.01, 0.5], mass=10)
super()._build('ball', mjcf_root)

def set_initializer_pose(self, physics: mjcf.Physics, pos: np.ndarray,
quat: np.ndarray,
random_state: np.random.RandomState):
del random_state
self.set_pose(physics, pos, quat)

ball = Ball()
ball_pose_dist = pose_distribution.UniformPoseDistribution(
min_pose_bounds=[0.2, -0.5, 0.1, 0, 0, 0],
max_pose_bounds=[0.5, 0.5, 0.5, 0, 0, 0])
ball_initializer = entity_initializer.PoseInitializer(
ball.set_initializer_pose, ball_pose_dist.sample_pose)

def ball_reward(observation: spec_utils.ObservationValue):
ball_distance = np.linalg.norm(observation['ball_pose'][:3] -
observation['panda_tcp_pos'])
return np.clip(1.0 - ball_distance, 0, 1)

reward = rewards.ComputeReward(
ball_reward,
validation_frequency=timestep_preprocessor.ValidationFrequency.ALWAYS)

# Use RobotParams to customize Panda robots added to the environment.
robot_params = params.RobotParams(robot_ip=args.robot_ip)
panda_env = environment.PandaEnvironment(robot_params)

panda_env.add_timestep_preprocessors([reward])
panda_env.add_props([ball])
panda_env.add_entity_initializers([ball_initializer])
panda_env.add_extra_sensors([prop_pose_sensor.PropPoseSensor(ball, 'ball')])

with panda_env.build_task_environment() as env:
# Print the full action, observation and reward specification
utils.full_spec(env)
# Initialize the agent
agent = Agent(env.action_spec())
# Run the environment and agent either in headless mode or inside the GUI.
if args.gui:
app = utils.ApplicationWithPlot()
app.launch(env, policy=agent.step)
else:
run_loop.run(env, agent, [], max_steps=1000, real_time=True)
19 changes: 9 additions & 10 deletions examples/two_arm_robot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

import dm_env
import numpy as np
from dm_control import mjcf
from dm_control import composer, mjcf
from dm_env import specs

from dm_robotics.panda import env_builder
from dm_robotics.panda import environment
from dm_robotics.panda import parameters as params
from dm_robotics.panda import run_loop, utils

Expand Down Expand Up @@ -39,10 +39,10 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
# The environment includes a simple robot frame and
# reference frame for our two-arm robot.
XML_PATH = os.path.join(os.path.dirname(__file__), 'assets', 'two_arm.xml')
arena = mjcf.from_file(XML_PATH)
left_frame = arena.find('site', 'left')
right_frame = arena.find('site', 'right')
control_frame = arena.find('site', 'control')
arena = composer.Arena(xml_path=XML_PATH)
left_frame = arena.mjcf_model.find('site', 'left')
right_frame = arena.mjcf_model.find('site', 'right')
control_frame = arena.mjcf_model.find('site', 'control')

# We use the sites defined in the MJCF to attach the robot arms
# to the body. The robot's control site is used as a reference frame
Expand All @@ -53,11 +53,10 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
right = params.RobotParams(attach_site=right_frame,
name='right',
control_frame=control_frame)
env_params = params.EnvirontmentParameters(arena=arena)
panda_env_builder = env_builder.PandaEnvironmentBuilder([left, right],
env_params)
env_params = params.EnvirontmentParameters(mjcf_root=arena)
panda_env = environment.PandaEnvironment([left, right], arena)

with panda_env_builder.build_task_environment() as env:
with panda_env.build_task_environment() as env:
# Print the full action, observation and reward specification
utils.full_spec(env)
# Initialize the agent
Expand Down
Loading

0 comments on commit f0f3fb5

Please sign in to comment.