Merge pull request #3 from JeanElsner/jean/dev/env

Refactor environment workflow
JeanElsner · Nov 17, 2023 · f0f3fb5 · f0f3fb5
2 parents 48e3a7b + 17fd026
commit f0f3fb5
Show file tree

Hide file tree

Showing 17 changed files with 447 additions and 318 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,21 @@
+{   
+    "editor.tabSize": 2,
+
+    "pylint.args": [
+        "--rcfile=.pylintrc"
+    ],
+
+    "yapf.args": [
+        "--style",
+        "{based_on_style: google, indent_width: 2}"
+    ],
+
+    "[python]": {
+        "editor.formatOnSaveMode": "file",
+        "editor.formatOnSave": true,
+        "editor.defaultFormatter": "eeyore.yapf",
+        "editor.codeActionsOnSave": {
+            "source.organizeImports": true
+        },
+    },
+}
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
@@ -21,7 +21,7 @@
       {
         "label": "doc-api",
         "type": "shell",
-        "command": "touch src/dm_robotics/__init__.py && sphinx-apidoc -d 1 -M -T -f -o doc src/dm_robotics && rm src/dm_robotics/__init__.py && rm doc/dm_robotics.rst"
+        "command": "touch src/dm_robotics/__init__.py && sphinx-apidoc -e -d 1 -M -T -f -o doc src/dm_robotics && rm src/dm_robotics/__init__.py && rm doc/dm_robotics.rst"
       }
     ]
 }
diff --git a/examples/assets/two_arm.xml b/examples/assets/two_arm.xml
@@ -15,6 +15,7 @@
         friction='0.4'/>
     <light directional='true' diffuse='.7 .7 .7' pos='1 .1 2' dir='0 -.1 -2' specular='.3 .3 .3' castshadow='true'/>
     <body pos="0 0 1" euler="0 0 30">
+      <joint type="hinge" axis="0 0 1" damping="2000" />
       <geom type="box" size=".15 .2 .15" />
       <geom type="cylinder" size=".1 .3" />
       <geom type="sphere" size=".2" pos="0 0 .4" />

diff --git a/examples/gripper.py b/examples/gripper.py
@@ -13,7 +13,7 @@
 import numpy as np
 from dm_env import specs
 
-from dm_robotics.panda import env_builder
+from dm_robotics.panda import environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import run_loop, utils
 
@@ -48,12 +48,12 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
   parser = utils.default_arg_parser()
   args = parser.parse_args()
 
-  # The Panda model and environment support many customization
-  # parameters. Here we use only the defaults and robot IP if provided.
+  # Use RobotParams to customize Panda robots added to the environment.
+  # We use robot_ip for hardware-in-the-loop operation.
   robot_params = params.RobotParams(robot_ip=args.robot_ip)
-  panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params)
+  panda_env = environment.PandaEnvironment(robot_params)
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification.
     utils.full_spec(env)
     # Initialize the agent.

diff --git a/examples/haptics.py b/examples/haptics.py
@@ -8,10 +8,10 @@
 
 import dm_env
 import numpy as np
-from dm_control import mjcf
+from dm_control import composer, mjcf
 from dm_env import specs
 
-from dm_robotics.panda import arm_constants, env_builder
+from dm_robotics.panda import arm_constants, environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import utils
 
@@ -41,20 +41,16 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
 
   # Load environment from an MJCF file.
   XML_PATH = os.path.join(os.path.dirname(__file__), 'assets', 'haptics.xml')
-  arena = mjcf.from_file(XML_PATH)
+  arena = composer.Arena(xml_path=XML_PATH)
 
   # Robot parameters include the robot's IP for HIL,
   # haptic actuation mode and joint damping.
   robot_params = params.RobotParams(robot_ip=args.robot_ip,
                                     actuation=arm_constants.Actuation.HAPTIC,
                                     joint_damping=np.zeros(7))
-  # Environment parameters consist of the MJCF element defined above
-  # as arena and a higher control timestep for smooth interaction.
-  env_params = params.EnvirontmentParameters(arena=arena, control_timestep=0.01)
-  panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params,
-                                                          env_params=env_params)
+  panda_env = environment.PandaEnvironment(robot_params, arena, 0.01)
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification.
     utils.full_spec(env)
     # Initialize the agent.

diff --git a/examples/minimal_working_example.py b/examples/minimal_working_example.py
@@ -3,7 +3,7 @@
 import numpy as np
 from dm_env import specs
 
-from dm_robotics.panda import env_builder
+from dm_robotics.panda import environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import run_loop, utils
 
@@ -30,12 +30,11 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
   parser = utils.default_arg_parser()
   args = parser.parse_args()
 
-  # The MoMa model and environment support many customization
-  # parameters. Here we use only the defaults.
+  # Use RobotParams to customize Panda robots added to the environment.
   robot_params = params.RobotParams(robot_ip=args.robot_ip)
-  panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params)
+  panda_env = environment.PandaEnvironment(robot_params)
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification
     utils.full_spec(env)
     # Initialize the agent

diff --git a/examples/motion_cartesian.py b/examples/motion_cartesian.py
@@ -7,7 +7,7 @@
 import numpy as np
 from dm_env import specs
 
-from dm_robotics.panda import env_builder
+from dm_robotics.panda import environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import run_loop, utils
 
@@ -46,12 +46,12 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
   parser = utils.default_arg_parser()
   args = parser.parse_args()
 
-  # The Panda model and environment support many customization
-  # parameters. Here we use only the defaults and robot IP if provided.
+  # Use RobotParams to customize Panda robots added to the environment.
+  # We use robot_ip for hardware-in-the-loop operation.
   robot_params = params.RobotParams(robot_ip=args.robot_ip)
-  panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params)
+  panda_env = environment.PandaEnvironment(robot_params)
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification.
     utils.full_spec(env)
     # Initialize the agent.

diff --git a/examples/motion_joint.py b/examples/motion_joint.py
@@ -5,7 +5,7 @@
 import numpy as np
 from dm_env import specs
 
-from dm_robotics.panda import arm_constants, env_builder
+from dm_robotics.panda import arm_constants, environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import run_loop, utils
 
@@ -36,14 +36,14 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
   parser = utils.default_arg_parser()
   args = parser.parse_args()
 
-  # The Panda model and environment support many customization
-  # parameters. Here we use only the robot IP if provided and set
+  # Use RobotParams to customize Panda robots added to the environment.
+  # We use robot_ip for hardware-in-the-loop operation and set
   # the actuation mode to joint velocities.
   robot_params = params.RobotParams(
       robot_ip=args.robot_ip, actuation=arm_constants.Actuation.JOINT_VELOCITY)
-  panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params)
+  panda_env = environment.PandaEnvironment(robot_params)
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification.
     utils.full_spec(env)
     # Initialize the agent.

diff --git a/examples/multiple_robots.py b/examples/multiple_robots.py
@@ -3,7 +3,7 @@
 import numpy as np
 from dm_env import specs
 
-from dm_robotics.panda import env_builder
+from dm_robotics.panda import environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import run_loop, utils
 
@@ -38,10 +38,9 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
                                pose=[.5, -.5, 0, 0, 0, np.pi * 3 / 4])
   robot_3 = params.RobotParams(name='robot_3',
                                pose=[.5, .5, 0, 0, 0, np.pi * 5 / 4])
-  panda_env_builder = env_builder.PandaEnvironmentBuilder(
-      [robot_1, robot_2, robot_3])
+  panda_env = environment.PandaEnvironment([robot_1, robot_2, robot_3])
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification
     utils.full_spec(env)
     # Initialize the agent

diff --git a/examples/rewards_and_observations.py b/examples/rewards_and_observations.py
@@ -0,0 +1,92 @@
+""" Minimal working example of the dm_robotics Panda model. """
+import typing
+
+import dm_env
+import numpy as np
+from dm_control import composer, mjcf
+from dm_env import specs
+from dm_robotics.agentflow import spec_utils
+from dm_robotics.agentflow.preprocessors import (observation_transforms,
+                                                 rewards, timestep_preprocessor)
+from dm_robotics.geometry import pose_distribution
+from dm_robotics.moma import entity_initializer, prop, robot, sensor
+from dm_robotics.moma.models.arenas import empty
+from dm_robotics.moma.sensors import prop_pose_sensor
+
+from dm_robotics.panda import environment
+from dm_robotics.panda import parameters as params
+from dm_robotics.panda import run_loop, utils
+
+
+class Agent:
+  """ Agents are used to control a robot's actions given
+  current observations and rewards. This agent does nothing.
+  """
+
+  def __init__(self, spec: specs.BoundedArray) -> None:
+    self._spec = spec
+
+  def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
+    """ Provides robot actions every control-timestep. """
+    del timestep  # not used
+    action = np.zeros(shape=self._spec.shape, dtype=self._spec.dtype)
+    return action
+
+
+if __name__ == '__main__':
+  # We initialize the default configuration for logging
+  # and argument parsing. These steps are optional.
+  utils.init_logging()
+  parser = utils.default_arg_parser()
+  args = parser.parse_args()
+
+  class Ball(prop.Prop):
+
+    def _build(self, *args, **kwargs):
+      mjcf_root = mjcf.RootElement()
+      body = mjcf_root.worldbody.add('body', name='prop_root')
+      body.add('geom', type='sphere', size=[0.05], solref=[0.01, 0.5], mass=10)
+      super()._build('ball', mjcf_root)
+
+    def set_initializer_pose(self, physics: mjcf.Physics, pos: np.ndarray,
+                             quat: np.ndarray,
+                             random_state: np.random.RandomState):
+      del random_state
+      self.set_pose(physics, pos, quat)
+
+  ball = Ball()
+  ball_pose_dist = pose_distribution.UniformPoseDistribution(
+      min_pose_bounds=[0.2, -0.5, 0.1, 0, 0, 0],
+      max_pose_bounds=[0.5, 0.5, 0.5, 0, 0, 0])
+  ball_initializer = entity_initializer.PoseInitializer(
+      ball.set_initializer_pose, ball_pose_dist.sample_pose)
+
+  def ball_reward(observation: spec_utils.ObservationValue):
+    ball_distance = np.linalg.norm(observation['ball_pose'][:3] -
+                                   observation['panda_tcp_pos'])
+    return np.clip(1.0 - ball_distance, 0, 1)
+
+  reward = rewards.ComputeReward(
+      ball_reward,
+      validation_frequency=timestep_preprocessor.ValidationFrequency.ALWAYS)
+
+  # Use RobotParams to customize Panda robots added to the environment.
+  robot_params = params.RobotParams(robot_ip=args.robot_ip)
+  panda_env = environment.PandaEnvironment(robot_params)
+
+  panda_env.add_timestep_preprocessors([reward])
+  panda_env.add_props([ball])
+  panda_env.add_entity_initializers([ball_initializer])
+  panda_env.add_extra_sensors([prop_pose_sensor.PropPoseSensor(ball, 'ball')])
+
+  with panda_env.build_task_environment() as env:
+    # Print the full action, observation and reward specification
+    utils.full_spec(env)
+    # Initialize the agent
+    agent = Agent(env.action_spec())
+    # Run the environment and agent either in headless mode or inside the GUI.
+    if args.gui:
+      app = utils.ApplicationWithPlot()
+      app.launch(env, policy=agent.step)
+    else:
+      run_loop.run(env, agent, [], max_steps=1000, real_time=True)
diff --git a/examples/two_arm_robot.py b/examples/two_arm_robot.py
@@ -3,10 +3,10 @@
 
 import dm_env
 import numpy as np
-from dm_control import mjcf
+from dm_control import composer, mjcf
 from dm_env import specs
 
-from dm_robotics.panda import env_builder
+from dm_robotics.panda import environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import run_loop, utils
 
@@ -39,10 +39,10 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
   # The environment includes a simple robot frame and
   # reference frame for our two-arm robot.
   XML_PATH = os.path.join(os.path.dirname(__file__), 'assets', 'two_arm.xml')
-  arena = mjcf.from_file(XML_PATH)
-  left_frame = arena.find('site', 'left')
-  right_frame = arena.find('site', 'right')
-  control_frame = arena.find('site', 'control')
+  arena = composer.Arena(xml_path=XML_PATH)
+  left_frame = arena.mjcf_model.find('site', 'left')
+  right_frame = arena.mjcf_model.find('site', 'right')
+  control_frame = arena.mjcf_model.find('site', 'control')
 
   # We use the sites defined in the MJCF to attach the robot arms
   # to the body. The robot's control site is used as a reference frame
@@ -53,11 +53,10 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
   right = params.RobotParams(attach_site=right_frame,
                              name='right',
                              control_frame=control_frame)
-  env_params = params.EnvirontmentParameters(arena=arena)
-  panda_env_builder = env_builder.PandaEnvironmentBuilder([left, right],
-                                                          env_params)
+  env_params = params.EnvirontmentParameters(mjcf_root=arena)
+  panda_env = environment.PandaEnvironment([left, right], arena)
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification
     utils.full_spec(env)
     # Initialize the agent