diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..3a9df2b
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,21 @@
+{   
+    "editor.tabSize": 2,
+
+    "pylint.args": [
+        "--rcfile=.pylintrc"
+    ],
+
+    "yapf.args": [
+        "--style",
+        "{based_on_style: google, indent_width: 2}"
+    ],
+
+    "[python]": {
+        "editor.formatOnSaveMode": "file",
+        "editor.formatOnSave": true,
+        "editor.defaultFormatter": "eeyore.yapf",
+        "editor.codeActionsOnSave": {
+            "source.organizeImports": true
+        },
+    },
+}
\ No newline at end of file
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
index 5d524b4..3fe3ddf 100644
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -21,7 +21,7 @@
       {
         "label": "doc-api",
         "type": "shell",
-        "command": "touch src/dm_robotics/__init__.py && sphinx-apidoc -d 1 -M -T -f -o doc src/dm_robotics && rm src/dm_robotics/__init__.py && rm doc/dm_robotics.rst"
+        "command": "touch src/dm_robotics/__init__.py && sphinx-apidoc -e -d 1 -M -T -f -o doc src/dm_robotics && rm src/dm_robotics/__init__.py && rm doc/dm_robotics.rst"
       }
     ]
 }
diff --git a/examples/assets/two_arm.xml b/examples/assets/two_arm.xml
index a78899a..8efd397 100644
--- a/examples/assets/two_arm.xml
+++ b/examples/assets/two_arm.xml
@@ -15,6 +15,7 @@
         friction='0.4'/>
     <light directional='true' diffuse='.7 .7 .7' pos='1 .1 2' dir='0 -.1 -2' specular='.3 .3 .3' castshadow='true'/>
     <body pos="0 0 1" euler="0 0 30">
+      <joint type="hinge" axis="0 0 1" damping="2000" />
       <geom type="box" size=".15 .2 .15" />
       <geom type="cylinder" size=".1 .3" />
       <geom type="sphere" size=".2" pos="0 0 .4" />
diff --git a/examples/gripper.py b/examples/gripper.py
index 99ca69c..ff71f4f 100644
--- a/examples/gripper.py
+++ b/examples/gripper.py
@@ -13,7 +13,7 @@
 import numpy as np
 from dm_env import specs
 
-from dm_robotics.panda import env_builder
+from dm_robotics.panda import environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import run_loop, utils
 
@@ -48,12 +48,12 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
   parser = utils.default_arg_parser()
   args = parser.parse_args()
 
-  # The Panda model and environment support many customization
-  # parameters. Here we use only the defaults and robot IP if provided.
+  # Use RobotParams to customize Panda robots added to the environment.
+  # We use robot_ip for hardware-in-the-loop operation.
   robot_params = params.RobotParams(robot_ip=args.robot_ip)
-  panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params)
+  panda_env = environment.PandaEnvironment(robot_params)
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification.
     utils.full_spec(env)
     # Initialize the agent.
diff --git a/examples/haptics.py b/examples/haptics.py
index 55a1563..16b8fef 100644
--- a/examples/haptics.py
+++ b/examples/haptics.py
@@ -8,10 +8,10 @@
 
 import dm_env
 import numpy as np
-from dm_control import mjcf
+from dm_control import composer, mjcf
 from dm_env import specs
 
-from dm_robotics.panda import arm_constants, env_builder
+from dm_robotics.panda import arm_constants, environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import utils
 
@@ -41,20 +41,16 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
 
   # Load environment from an MJCF file.
   XML_PATH = os.path.join(os.path.dirname(__file__), 'assets', 'haptics.xml')
-  arena = mjcf.from_file(XML_PATH)
+  arena = composer.Arena(xml_path=XML_PATH)
 
   # Robot parameters include the robot's IP for HIL,
   # haptic actuation mode and joint damping.
   robot_params = params.RobotParams(robot_ip=args.robot_ip,
                                     actuation=arm_constants.Actuation.HAPTIC,
                                     joint_damping=np.zeros(7))
-  # Environment parameters consist of the MJCF element defined above
-  # as arena and a higher control timestep for smooth interaction.
-  env_params = params.EnvirontmentParameters(arena=arena, control_timestep=0.01)
-  panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params,
-                                                          env_params=env_params)
+  panda_env = environment.PandaEnvironment(robot_params, arena, 0.01)
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification.
     utils.full_spec(env)
     # Initialize the agent.
diff --git a/examples/minimal_working_example.py b/examples/minimal_working_example.py
index 3939c02..992fc4f 100644
--- a/examples/minimal_working_example.py
+++ b/examples/minimal_working_example.py
@@ -3,7 +3,7 @@
 import numpy as np
 from dm_env import specs
 
-from dm_robotics.panda import env_builder
+from dm_robotics.panda import environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import run_loop, utils
 
@@ -30,12 +30,11 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
   parser = utils.default_arg_parser()
   args = parser.parse_args()
 
-  # The MoMa model and environment support many customization
-  # parameters. Here we use only the defaults.
+  # Use RobotParams to customize Panda robots added to the environment.
   robot_params = params.RobotParams(robot_ip=args.robot_ip)
-  panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params)
+  panda_env = environment.PandaEnvironment(robot_params)
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification
     utils.full_spec(env)
     # Initialize the agent
diff --git a/examples/motion_cartesian.py b/examples/motion_cartesian.py
index 2a598f3..bcbbd03 100644
--- a/examples/motion_cartesian.py
+++ b/examples/motion_cartesian.py
@@ -7,7 +7,7 @@
 import numpy as np
 from dm_env import specs
 
-from dm_robotics.panda import env_builder
+from dm_robotics.panda import environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import run_loop, utils
 
@@ -46,12 +46,12 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
   parser = utils.default_arg_parser()
   args = parser.parse_args()
 
-  # The Panda model and environment support many customization
-  # parameters. Here we use only the defaults and robot IP if provided.
+  # Use RobotParams to customize Panda robots added to the environment.
+  # We use robot_ip for hardware-in-the-loop operation.
   robot_params = params.RobotParams(robot_ip=args.robot_ip)
-  panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params)
+  panda_env = environment.PandaEnvironment(robot_params)
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification.
     utils.full_spec(env)
     # Initialize the agent.
diff --git a/examples/motion_joint.py b/examples/motion_joint.py
index 5b5ed45..53dde5e 100644
--- a/examples/motion_joint.py
+++ b/examples/motion_joint.py
@@ -5,7 +5,7 @@
 import numpy as np
 from dm_env import specs
 
-from dm_robotics.panda import arm_constants, env_builder
+from dm_robotics.panda import arm_constants, environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import run_loop, utils
 
@@ -36,14 +36,14 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
   parser = utils.default_arg_parser()
   args = parser.parse_args()
 
-  # The Panda model and environment support many customization
-  # parameters. Here we use only the robot IP if provided and set
+  # Use RobotParams to customize Panda robots added to the environment.
+  # We use robot_ip for hardware-in-the-loop operation and set
   # the actuation mode to joint velocities.
   robot_params = params.RobotParams(
       robot_ip=args.robot_ip, actuation=arm_constants.Actuation.JOINT_VELOCITY)
-  panda_env_builder = env_builder.PandaEnvironmentBuilder(robot_params)
+  panda_env = environment.PandaEnvironment(robot_params)
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification.
     utils.full_spec(env)
     # Initialize the agent.
diff --git a/examples/multiple_robots.py b/examples/multiple_robots.py
index 5152a02..988dac7 100644
--- a/examples/multiple_robots.py
+++ b/examples/multiple_robots.py
@@ -3,7 +3,7 @@
 import numpy as np
 from dm_env import specs
 
-from dm_robotics.panda import env_builder
+from dm_robotics.panda import environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import run_loop, utils
 
@@ -38,10 +38,9 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
                                pose=[.5, -.5, 0, 0, 0, np.pi * 3 / 4])
   robot_3 = params.RobotParams(name='robot_3',
                                pose=[.5, .5, 0, 0, 0, np.pi * 5 / 4])
-  panda_env_builder = env_builder.PandaEnvironmentBuilder(
-      [robot_1, robot_2, robot_3])
+  panda_env = environment.PandaEnvironment([robot_1, robot_2, robot_3])
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification
     utils.full_spec(env)
     # Initialize the agent
diff --git a/examples/rewards_and_observations.py b/examples/rewards_and_observations.py
new file mode 100644
index 0000000..f120b31
--- /dev/null
+++ b/examples/rewards_and_observations.py
@@ -0,0 +1,92 @@
+""" Minimal working example of the dm_robotics Panda model. """
+import typing
+
+import dm_env
+import numpy as np
+from dm_control import composer, mjcf
+from dm_env import specs
+from dm_robotics.agentflow import spec_utils
+from dm_robotics.agentflow.preprocessors import (observation_transforms,
+                                                 rewards, timestep_preprocessor)
+from dm_robotics.geometry import pose_distribution
+from dm_robotics.moma import entity_initializer, prop, robot, sensor
+from dm_robotics.moma.models.arenas import empty
+from dm_robotics.moma.sensors import prop_pose_sensor
+
+from dm_robotics.panda import environment
+from dm_robotics.panda import parameters as params
+from dm_robotics.panda import run_loop, utils
+
+
+class Agent:
+  """ Agents are used to control a robot's actions given
+  current observations and rewards. This agent does nothing.
+  """
+
+  def __init__(self, spec: specs.BoundedArray) -> None:
+    self._spec = spec
+
+  def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
+    """ Provides robot actions every control-timestep. """
+    del timestep  # not used
+    action = np.zeros(shape=self._spec.shape, dtype=self._spec.dtype)
+    return action
+
+
+if __name__ == '__main__':
+  # We initialize the default configuration for logging
+  # and argument parsing. These steps are optional.
+  utils.init_logging()
+  parser = utils.default_arg_parser()
+  args = parser.parse_args()
+
+  class Ball(prop.Prop):
+
+    def _build(self, *args, **kwargs):
+      mjcf_root = mjcf.RootElement()
+      body = mjcf_root.worldbody.add('body', name='prop_root')
+      body.add('geom', type='sphere', size=[0.05], solref=[0.01, 0.5], mass=10)
+      super()._build('ball', mjcf_root)
+
+    def set_initializer_pose(self, physics: mjcf.Physics, pos: np.ndarray,
+                             quat: np.ndarray,
+                             random_state: np.random.RandomState):
+      del random_state
+      self.set_pose(physics, pos, quat)
+
+  ball = Ball()
+  ball_pose_dist = pose_distribution.UniformPoseDistribution(
+      min_pose_bounds=[0.2, -0.5, 0.1, 0, 0, 0],
+      max_pose_bounds=[0.5, 0.5, 0.5, 0, 0, 0])
+  ball_initializer = entity_initializer.PoseInitializer(
+      ball.set_initializer_pose, ball_pose_dist.sample_pose)
+
+  def ball_reward(observation: spec_utils.ObservationValue):
+    ball_distance = np.linalg.norm(observation['ball_pose'][:3] -
+                                   observation['panda_tcp_pos'])
+    return np.clip(1.0 - ball_distance, 0, 1)
+
+  reward = rewards.ComputeReward(
+      ball_reward,
+      validation_frequency=timestep_preprocessor.ValidationFrequency.ALWAYS)
+
+  # Use RobotParams to customize Panda robots added to the environment.
+  robot_params = params.RobotParams(robot_ip=args.robot_ip)
+  panda_env = environment.PandaEnvironment(robot_params)
+
+  panda_env.add_timestep_preprocessors([reward])
+  panda_env.add_props([ball])
+  panda_env.add_entity_initializers([ball_initializer])
+  panda_env.add_extra_sensors([prop_pose_sensor.PropPoseSensor(ball, 'ball')])
+
+  with panda_env.build_task_environment() as env:
+    # Print the full action, observation and reward specification
+    utils.full_spec(env)
+    # Initialize the agent
+    agent = Agent(env.action_spec())
+    # Run the environment and agent either in headless mode or inside the GUI.
+    if args.gui:
+      app = utils.ApplicationWithPlot()
+      app.launch(env, policy=agent.step)
+    else:
+      run_loop.run(env, agent, [], max_steps=1000, real_time=True)
diff --git a/examples/two_arm_robot.py b/examples/two_arm_robot.py
index 8ebfb93..d6019b7 100644
--- a/examples/two_arm_robot.py
+++ b/examples/two_arm_robot.py
@@ -3,10 +3,10 @@
 
 import dm_env
 import numpy as np
-from dm_control import mjcf
+from dm_control import composer, mjcf
 from dm_env import specs
 
-from dm_robotics.panda import env_builder
+from dm_robotics.panda import environment
 from dm_robotics.panda import parameters as params
 from dm_robotics.panda import run_loop, utils
 
@@ -39,10 +39,10 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
   # The environment includes a simple robot frame and
   # reference frame for our two-arm robot.
   XML_PATH = os.path.join(os.path.dirname(__file__), 'assets', 'two_arm.xml')
-  arena = mjcf.from_file(XML_PATH)
-  left_frame = arena.find('site', 'left')
-  right_frame = arena.find('site', 'right')
-  control_frame = arena.find('site', 'control')
+  arena = composer.Arena(xml_path=XML_PATH)
+  left_frame = arena.mjcf_model.find('site', 'left')
+  right_frame = arena.mjcf_model.find('site', 'right')
+  control_frame = arena.mjcf_model.find('site', 'control')
 
   # We use the sites defined in the MJCF to attach the robot arms
   # to the body. The robot's control site is used as a reference frame
@@ -53,11 +53,10 @@ def step(self, timestep: dm_env.TimeStep) -> np.ndarray:
   right = params.RobotParams(attach_site=right_frame,
                              name='right',
                              control_frame=control_frame)
-  env_params = params.EnvirontmentParameters(arena=arena)
-  panda_env_builder = env_builder.PandaEnvironmentBuilder([left, right],
-                                                          env_params)
+  env_params = params.EnvirontmentParameters(mjcf_root=arena)
+  panda_env = environment.PandaEnvironment([left, right], arena)
 
-  with panda_env_builder.build_task_environment() as env:
+  with panda_env.build_task_environment() as env:
     # Print the full action, observation and reward specification
     utils.full_spec(env)
     # Initialize the agent
diff --git a/src/dm_robotics/panda/arm.py b/src/dm_robotics/panda/arm.py
index 7d3f17c..a8deb35 100644
--- a/src/dm_robotics/panda/arm.py
+++ b/src/dm_robotics/panda/arm.py
@@ -56,7 +56,16 @@ class _ActuatorParams:
 
 
 class Panda(robot_arm.RobotArm):
-  """A class representing a Panda robot arm."""
+  """A class representing a Panda robot arm.
+
+  Args:
+    name: The name of this robot. Used as a prefix in the MJCF name
+      attributes.
+    actuation: Instance of :py:class:`dm_robotics.panda.arm_constants.Actuation`
+      specifying which actuation mode to use.
+    use_rotated_gripper: If True, mounts the gripper in a rotated position to
+      match the real placement of the gripper on the physical Panda.
+  """
 
   # Define member variables that are created in the _build function. This is to
   # comply with pytype correctly.
@@ -74,16 +83,6 @@ def _build(self,
              actuation: consts.Actuation = consts.Actuation.CARTESIAN_VELOCITY,
              use_rotated_gripper: bool = True,
              hardware=None) -> None:
-    """Initializes Panda.
-
-    Args:
-      name: The name of this robot. Used as a prefix in the MJCF name
-        attributes.
-      actuation: Instance of `consts.Actuation` specifying which
-        actuation mode to use.
-      use_rotated_gripper: If True, mounts the gripper in a rotated position to
-        match the real placement of the gripper on the physical Panda.
-    """
     self.hardware = hardware
     self._mjcf_root = mjcf.from_path(consts.XML_PATH)
     self._mjcf_root.model = name
@@ -97,14 +96,6 @@ def initialize_episode(self, physics: mjcf.Physics,
     """Function called at the beginning of every episode."""
     del random_state  # Unused.
 
-    # Apply gravity compensation
-    # body_elements = self.mjcf_model.find_all('body')
-    # gravity = np.hstack([physics.model.opt.gravity, [0, 0, 0]])
-    # physics_bodies = physics.bind(body_elements)
-    # if physics_bodies is None:
-    #   raise ValueError('Calling physics.bind with bodies returns None.')
-    # physics_bodies.xfrc_applied[:] = -gravity * physics_bodies.mass[..., None]
-
   @property
   def joints(self) -> List[types.MjcfElement]:
     """List of joint elements belonging to the arm."""
@@ -184,16 +175,6 @@ def set_joint_angles(self, physics: mjcf.Physics,
 
   def after_substep(self, physics: mjcf.Physics,
                     random_state: np.random.RandomState) -> None:
-    """A callback which is executed after a simulation step.
-
-    This function is necessary when using the integrated velocity mujoco
-    actuator. Mujoco will limit the incoming velocity but the hidden state of
-    the integrated velocity actuators must be clipped to the actuation range.
-
-    Args:
-      physics: An instance of `mjcf.Physics`.
-      random_state: An instance of `np.random.RandomState`.
-    """
     del random_state  # Unused.
 
     # Clip the actuator.act with the actuator limits.
@@ -206,7 +187,6 @@ def after_substep(self, physics: mjcf.Physics,
                                          a_max=consts.JOINT_LIMITS['max'])
 
   def _add_mjcf_elements(self, use_rotated_gripper: bool):
-    """Defines the arms MJCF joints and sensors."""
     self._joints = [
         self._mjcf_root.find('joint', j) for j in consts.JOINT_NAMES
     ]
@@ -232,7 +212,6 @@ def _add_mjcf_elements(self, use_rotated_gripper: bool):
       self._attachment_site = self._wrist_site
 
   def _add_actuators(self):
-    """Adds the Mujoco actuators to the robot arm."""
     if self._actuation not in consts.Actuation:
       raise ValueError((f'Actuation {self._actuation} is not a valid actuation.'
                         'Please specify one of '
@@ -246,14 +225,6 @@ def _add_actuators(self):
       self._add_mjcf_actuators(dyntype='none')
 
   def _add_mjcf_actuators(self, dyntype: str = 'integrator') -> None:
-    """Adds integrated velocity actuators to the mjcf model.
-
-    This function adds integrated velocity actuators and default class
-    attributes to the mjcf model according to the values in `sawyer_constants`,
-    `_SAWYER_ACTUATOR_PARAMS` and `_INTEGRATED_VELOCITY_DEFAULT_DCLASS`.
-    `self._actuators` is created to contain the list of actuators created.
-    """
-
     # Construct list of ctrlrange tuples from act limits and actuation mode.
     ctrl_ranges = list(
         zip(consts.ACTUATION_LIMITS[self._actuation]['min'],
@@ -392,9 +363,10 @@ def set_control(self, physics: mjcf.Physics, command: np.ndarray) -> None:
 
 
 class ArmEffector(arm_effector.ArmEffector):
-  """Robot arm effector for the Panda MoMa model that takes `parameters.RobotParams`
+  """Robot arm effector for the Panda MoMa model that takes
+  :py:class:`dm_robotics.panda.parameters.RobotParams`
   and changes the joint stiffness and damping of the robot arm. Otherwise behaves
-  likes `dm_robotics.moma.effectors.arm_effector.ArmEffector`."""
+  likes :py:class:`dm_robotics.moma.effectors.arm_effector.ArmEffector`."""
 
   def __init__(self, robot_params: params.RobotParams, arm: robot_arm.RobotArm):
     """
@@ -481,6 +453,20 @@ def _project_wrench(self, wrench: np.ndarray,
     return torque.astype(np.float32)
 
 
+@enum.unique
+class ControlObservations(enum.Enum):
+  """Observations exposed by this sensor in control frame."""
+  POS = '{}_pos_control'
+  QUAT = '{}_quat_control'
+  POSE = '{}_pose_control'
+  RMAT = '{}_rmat_control'
+  VEL = '{}_vel_control'
+
+  def get_obs_key(self, name: str) -> str:
+    """Returns the key to the observation in the observables dict."""
+    return self.value.format(name)
+
+
 class RobotTCPSensor(site_sensor.SiteSensor):
   """Version of `dm_robotics.moma.sensors.site_sensor.SiteSensor` that
     takes tool center point pose measurements of a gripper and accepts
@@ -497,30 +483,38 @@ def __init__(self, gripper: robot_hand.AnyRobotHand,
           as reference frame. Falls back to world frame if `None`."""
     super().__init__(gripper.tool_center_point, f'{robot_params.name}_tcp')
     self._frame = robot_params.control_frame
+    self._observables.update({
+        self.get_obs_key(ControlObservations.POS):
+            observable.Generic(self._pos_control),
+        self.get_obs_key(ControlObservations.QUAT):
+            observable.Generic(self._quat_control),
+        self.get_obs_key(ControlObservations.RMAT):
+            observable.Generic(self._rmat_control),
+        self.get_obs_key(ControlObservations.POSE):
+            observable.Generic(self._pose_control),
+        self.get_obs_key(ControlObservations.VEL):
+            observable.Generic(self._vel_control),
+    })
+    for obs in self._observables.values():
+      obs.enabled = True
 
-  def _site_pos(self, physics: mjcf.Physics) -> np.ndarray:
-    return self._site_pose(physics)[:3]
+  def _pos_control(self, physics: mjcf.Physics) -> np.ndarray:
+    return self._pose_control(physics)[:3]
 
-  def _site_quat(self, physics: mjcf.Physics) -> np.ndarray:
-    return self._site_pose(physics)[3:]
+  def _quat_control(self, physics: mjcf.Physics) -> np.ndarray:
+    return self._pose_control(physics)[3:]
 
-  def _site_pose(self, physics: mjcf.Physics) -> np.ndarray:
+  def _pose_control(self, physics: mjcf.Physics) -> np.ndarray:
     pos = physics.bind(self._site).xpos
     quat = tr.mat_to_quat(np.reshape(physics.bind(self._site).xmat, [3, 3]))
     quat = tr.positive_leading_quat(quat)
     return geometry.PoseStamped(geometry.Pose(pos, quat)).get_relative_pose(
         self._frame, mujoco_physics.wrap(physics)).to_posquat()
-    # return geometry.PoseStamped(tr.pos_quat_to_hmat(
-    #     pos,
-    #     quat)).get_relative_pose(self._frame,
-    #                              mujoco_physics.wrap(physics)).to_posquat()
-    # return geometry.PoseStamped(tr.pos_quat_to_hmat(pos, quat)).to_frame(
-    #     self._frame, mujoco_physics.wrap(physics)).pose.to_posquat()
 
-  def _site_rmat(self, physics: mjcf.Physics) -> np.ndarray:
-    return tr.quat_to_mat(self._site_quat(physics))[:3, :3].reshape((-1,))
+  def _rmat_control(self, physics: mjcf.Physics) -> np.ndarray:
+    return tr.quat_to_mat(self._quat_control(physics))[:3, :3].reshape((-1,))
 
-  def _site_vel_world(self, physics: mjcf.Physics) -> np.ndarray:
+  def _vel_control(self, physics: mjcf.Physics) -> np.ndarray:
     return geometry.TwistStamped(super()._site_vel_world(physics),
                                  None).get_relative_twist(
                                      self._frame,
diff --git a/src/dm_robotics/panda/arm_constants.py b/src/dm_robotics/panda/arm_constants.py
index dbd72fe..b609094 100644
--- a/src/dm_robotics/panda/arm_constants.py
+++ b/src/dm_robotics/panda/arm_constants.py
@@ -9,15 +9,14 @@
 
 
 class Actuation(enum.Enum):
-  """Available actuation methods for the Panda MoMa model. Note that
-    `POSITION` is supported in simulation only and is used to track
-    the real robot joint positions in haptic mode to generate realistic
-    constraint forces. The actuation methods use the impedance as
-    defined in `panda.parameters.RobotParams` where applicable."""
+  """Available actuation methods for the Panda MoMa model.
+  The actuation methods use the joint stiffness and damping as
+  defined in :py:class:`dm_robotics.panda.parameters.RobotParams` where applicable."""
   CARTESIAN_VELOCITY = 0
+  """Cartesian end-effector velocity control."""
   JOINT_VELOCITY = 1
   """Joint actuators receive a velocity signal that is integrated and
-    tracked by a position controller."""
+  tracked by a position controller."""
   HAPTIC = 2
   """Enables haptic interaction between a physical Panda robot and the simulation."""
 
diff --git a/src/dm_robotics/panda/env_builder.py b/src/dm_robotics/panda/env_builder.py
deleted file mode 100644
index f31a8f7..0000000
--- a/src/dm_robotics/panda/env_builder.py
+++ /dev/null
@@ -1,195 +0,0 @@
-"""RL environment builder for the Panda MoMa model.
-  This module allows you to build a full RL environment,
-  including fully configured arena, initializers, robot and task.
-  As such it can be considered the main module for users to
-  interact with. The default configuration is a ready to run
-  `dm_control.rl.control.Environment` but all aspects like e.g.
-  reward, observation and action spec can be fully customized."""
-import dataclasses
-from typing import Callable, Optional, Sequence, Union
-
-from dm_control import composer, mjcf
-from dm_control.rl import control
-from dm_robotics.agentflow.preprocessors import (observation_transforms,
-                                                 timestep_preprocessor)
-from dm_robotics.geometry import joint_angles_distribution, pose_distribution
-from dm_robotics.moma import base_task, entity_initializer, prop
-from dm_robotics.moma import robot as robot_module
-from dm_robotics.moma import (scene_initializer, subtask_env,
-                              subtask_env_builder)
-from dm_robotics.moma.models.arenas import empty
-
-from dm_robotics import agentflow as af
-
-from . import arm, hardware
-from . import parameters as params
-from . import utils
-
-
-class UserArena(empty.Arena):
-
-  def _build(self, root_element: mjcf.RootElement):
-    self._mjcf_root = root_element
-    self._ground = self._mjcf_root.find('geom', 'ground')
-
-
-class PandaEnvironmentBuilder:
-  """Subtask environment builder class for the Panda MoMa model.
-
-  Execution order:
-
-    * build_arena
-    * build_robot
-    * build_entity_initializer
-    * build_scene_initializer"""
-
-  def __init__(
-      self,
-      robot_params: Union[params.RobotParams,
-                          Sequence[params.RobotParams]] = params.RobotParams(),
-      env_params: params.EnvirontmentParameters = params.EnvirontmentParameters(
-      ),
-      builder_extensions: params.BuilderExtensions = params.BuilderExtensions(),
-      timestep_preprocessors: Optional[Sequence[
-          timestep_preprocessor.TimestepPreprocessor]] = None
-  ) -> None:
-    self._robot_params = robot_params if isinstance(
-        robot_params, Sequence) else [robot_params]
-    self._env_params = env_params
-    self._builder_extensions = builder_extensions
-    self._timestep_preprocessors = timestep_preprocessors
-
-  def build_task_environment(self) -> subtask_env.SubTaskEnvironment:
-    """Builds an rl environment for the Panda robot."""
-    task = self._build_task()
-
-    env_builder = subtask_env_builder.SubtaskEnvBuilder()
-    env_builder.set_task(task)
-
-    base_env = env_builder.build_base_env()
-    physics = base_env.physics
-
-    parent_action_spec = task.effectors_action_spec(physics)
-    full_action_space = af.IdentityActionSpace(parent_action_spec)
-
-    if self._timestep_preprocessors is not None:
-      for preproc in self._timestep_preprocessors:
-        env_builder.add_preprocessor(preproc)
-
-    env_builder.set_action_space(full_action_space)
-    env = env_builder.build()
-
-    return env
-
-  def _bulid_arena(self) -> composer.Arena:
-    arena = empty.Arena()
-    if self._env_params.arena is not None:
-      arena = UserArena(self._env_params.arena)
-      # arena.mjcf_model.include_copy(self._env_params.arena,
-      #                               override_attributes=True)
-    arena.mjcf_model.compiler.angle = 'degree'
-    arena.mjcf_model.option.timestep = 0.002
-    if self._builder_extensions.build_arena is not None:
-      self._builder_extensions.build_arena(arena)
-    return arena
-
-  def _build_scene_initializer(
-      self, robots: Sequence[robot_module.Robot],
-      arena: composer.Arena) -> base_task.SceneInitializer:
-    scene_initializers = []
-    if self._builder_extensions.build_scene_initializer is not None:
-      scene_initializers.append(
-          self._builder_extensions.build_scene_initializer(robots, arena))
-    for robot, robot_params in zip(robots, self._robot_params):
-      if robot_params.pose is not None:
-        arm_pose = pose_distribution.ConstantPoseDistribution(robot_params.pose)
-        scene_initializers.append(
-            scene_initializer.EntityPoseInitializer(robot.arm_frame,
-                                                    arm_pose.sample_pose))
-    return scene_initializer.CompositeSceneInitializer(scene_initializers)
-
-  def _build_entity_initializer(
-      self, robots: Sequence[robot_module.Robot],
-      arena: composer.Arena) -> entity_initializer.base_initializer.Initializer:
-    entity_initializers = []
-    if self._builder_extensions.build_entity_initializer is not None:
-      entity_initializers.append(
-          self._builder_extensions.build_entity_initializer(robots, arena))
-    for robot, robot_params in zip(robots, self._robot_params):
-      arm_joint_values = joint_angles_distribution.ConstantPanTiltDistribution(
-          robot_params.joint_values)
-      arm_init = entity_initializer.JointsInitializer(
-          robot.position_arm_joints, arm_joint_values.sample_angles)
-      entity_initializers.append(arm_init)
-      if robot_params.has_hand:
-        gripper_joint_values = joint_angles_distribution.ConstantPanTiltDistribution(
-            [.04, .04])
-        initialize_gripper = entity_initializer.JointsInitializer(
-            robot.gripper.set_joint_angles, gripper_joint_values.sample_angles)
-        entity_initializers.append(initialize_gripper)
-    return entity_initializer.TaskEntitiesInitializer(entity_initializers)
-
-  def _build_robot(self) -> Sequence[robot_module.Robot]:
-    robots = []
-    for robot_params in self._robot_params:
-      if robot_params.robot_ip is not None:
-        robot = hardware.build_robot(robot_params)
-      else:
-        robot = arm.build_robot(robot_params)
-      robots.append(robot)
-    if self._builder_extensions.build_robots is not None:
-      self._builder_extensions.build_robots(robots)
-    return robots
-
-  def _build_props(self) -> Sequence[prop.Prop]:
-    return []
-
-  def _build_task(self) -> base_task.BaseTask:
-    arena = self._bulid_arena()
-    robots = self._build_robot()
-
-    for robot, robot_params in zip(robots, self._robot_params):
-      arena.attach(robot.arm, robot_params.attach_site)
-
-    init_episode = self._build_entity_initializer(robots, arena)
-    init_scene = self._build_scene_initializer(robots, arena)
-
-    extra_effectors = []
-
-    if self._builder_extensions.build_extra_effectors is not None:
-      extra_effectors = self._builder_extensions.build_extra_effectors(
-          robots, arena)
-
-    # block = prop.Block()
-    # # frame = arena.attach(block)
-    # frame = arena.add_free_entity(block)
-    # block.set_freejoint(frame.freejoint)
-
-    # gripper_pose_dist = pose_distribution.UniformPoseDistribution(
-    #     # Provide 6D min and max bounds for the 3D position and 3D euler angles.
-    #     min_pose_bounds=np.array(
-    #         [0.5, -0.1, 0.1, 0.75 * np.pi, -0.25 * np.pi, -0.5 * np.pi]),
-    #     max_pose_bounds=np.array(
-    #         [0.7, 0.1, 0.2, 1.25 * np.pi, 0.25 * np.pi, 0.5 * np.pi]))
-
-    # initialize_arm = entity_initializer.PoseInitializer(
-    #     moma_robot.position_gripper, gripper_pose_dist.sample_pose)
-
-    # block_pose_dist = pose_distribution.UniformPoseDistribution(
-    #     min_pose_bounds=[0.1, -0.5, 0.02, 0, 0, 0],
-    #     max_pose_bounds=[0.5, 0.5, 0.02, 0, 0, 0])
-    # initialize_block = entity_initializer.PoseInitializer(
-    #     block.set_pose, block_pose_dist.sample_pose)
-    # init_episode = entity_initializer.TaskEntitiesInitializer([init_episode, initialize_block])
-
-    task = base_task.BaseTask(
-        task_name='panda',
-        arena=arena,
-        robots=robots,
-        props=[],
-        extra_sensors=[utils.TimeSensor()],
-        extra_effectors=extra_effectors,
-        control_timestep=self._env_params.control_timestep,
-        scene_initializer=init_scene,
-        episode_initializer=init_episode)
-    return task
diff --git a/src/dm_robotics/panda/environment.py b/src/dm_robotics/panda/environment.py
new file mode 100644
index 0000000..6d4bd2d
--- /dev/null
+++ b/src/dm_robotics/panda/environment.py
@@ -0,0 +1,140 @@
+"""RL environment builder for the Panda MoMa model.
+  This module allows you to build a full RL environment,
+  including fully configured arena, initializers, robot and task.
+  As such it can be considered the main module for users to
+  interact with. The default configuration is a ready to run
+  `dm_control.rl.control.Environment` but all aspects like e.g.
+  reward, observation and action spec can be fully customized."""
+import dataclasses
+from typing import Callable, Optional, Sequence, Union
+
+from dm_control import composer, mjcf
+from dm_control.rl import control
+from dm_robotics.agentflow.preprocessors import (observation_transforms,
+                                                 timestep_preprocessor)
+from dm_robotics.geometry import joint_angles_distribution, pose_distribution
+from dm_robotics.moma import base_task, entity_initializer, prop
+from dm_robotics.moma import robot as robot_module
+from dm_robotics.moma import (scene_initializer, sensor, subtask_env,
+                              subtask_env_builder)
+from dm_robotics.moma.models.arenas import empty
+
+from dm_robotics import agentflow as af
+
+from . import arm, hardware
+from . import parameters as params
+from . import utils
+
+
+class PandaEnvironment:
+
+  def __init__(self,
+               robot_params: Union[
+                   params.RobotParams,
+                   Sequence[params.RobotParams]] = params.RobotParams(),
+               arena: composer.Arena = empty.Arena(),
+               control_timestep: float = 0.1,
+               physics_timestep: float = 0.002) -> None:
+    self._arena = arena
+    self._robot_params = robot_params if isinstance(
+        robot_params, Sequence) else [robot_params]
+    self._control_timestep = control_timestep
+
+    self._arena.mjcf_model.compiler.angle = 'degree'
+    self._arena.mjcf_model.option.timestep = physics_timestep
+
+    self._robots = []
+    for robot_params in self._robot_params:
+      if robot_params.robot_ip is not None:
+        robot = hardware.build_robot(robot_params)
+      else:
+        robot = arm.build_robot(robot_params)
+      self._robots.append(robot)
+    for robot, robot_params in zip(self._robots, self._robot_params):
+      self._arena.attach(robot.arm, robot_params.attach_site)
+
+    self._scene_initializers = []
+    self._entity_initializers = []
+    self._extra_sensors = []
+    self._extra_effectors = []
+    self._props = []
+    self._timestep_preprocessors = []
+
+  @property
+  def robots(self) -> Sequence[robot_module.Robot]:
+    return self._robots
+
+  def build_task_environment(self) -> subtask_env.SubTaskEnvironment:
+    """Builds an rl environment for the Panda robot."""
+    self._extra_sensors.append(utils.TimeSensor())
+    task = base_task.BaseTask(
+        task_name='panda',
+        arena=self._arena,
+        robots=self._robots,
+        props=self._props,
+        extra_sensors=self._extra_sensors,
+        extra_effectors=self._extra_effectors,
+        control_timestep=self._control_timestep,
+        scene_initializer=self._build_scene_initializer(),
+        episode_initializer=self._build_entity_initializer())
+
+    env_builder = subtask_env_builder.SubtaskEnvBuilder()
+    env_builder.set_task(task)
+
+    base_env = env_builder.build_base_env()
+    physics = base_env.physics
+
+    parent_action_spec = task.effectors_action_spec(physics)
+    full_action_space = af.IdentityActionSpace(parent_action_spec)
+
+    for preproc in self._timestep_preprocessors:
+      env_builder.add_preprocessor(preproc)
+
+    env_builder.set_action_space(full_action_space)
+    env = env_builder.build()
+
+    return env
+
+  def add_props(self, props: Sequence[prop.Prop]):
+    self._props.extend(props)
+    for p in self._props:
+      self._arena.add_free_entity(p)
+
+  def add_entity_initializers(
+      self,
+      initializers: Sequence[entity_initializer.base_initializer.Initializer]):
+    self._entity_initializers.extend(initializers)
+
+  def add_timestep_preprocessors(
+      self,
+      preprocessors: Sequence[timestep_preprocessor.TimestepPreprocessor]):
+    self._timestep_preprocessors.extend(preprocessors)
+
+  def add_extra_sensors(self, extra_sensors: Sequence[sensor.Sensor]):
+    self._extra_sensors.extend(extra_sensors)
+
+  def _build_scene_initializer(self) -> base_task.SceneInitializer:
+    for robot, robot_params in zip(self._robots, self._robot_params):
+      if robot_params.pose is not None:
+        arm_pose = pose_distribution.ConstantPoseDistribution(robot_params.pose)
+        self._scene_initializers.insert(
+            0,
+            scene_initializer.EntityPoseInitializer(robot.arm_frame,
+                                                    arm_pose.sample_pose))
+    return scene_initializer.CompositeSceneInitializer(self._scene_initializers)
+
+  def _build_entity_initializer(
+      self) -> entity_initializer.base_initializer.Initializer:
+    for robot, robot_params in zip(self._robots, self._robot_params):
+      arm_joint_values = joint_angles_distribution.ConstantPanTiltDistribution(
+          robot_params.joint_values)
+      arm_init = entity_initializer.JointsInitializer(
+          robot.position_arm_joints, arm_joint_values.sample_angles)
+      self._entity_initializers.insert(0, arm_init)
+      if robot_params.has_hand:
+        gripper_joint_values = joint_angles_distribution.ConstantPanTiltDistribution(
+            [.04, .04])
+        initialize_gripper = entity_initializer.JointsInitializer(
+            robot.gripper.set_joint_angles, gripper_joint_values.sample_angles)
+        self._entity_initializers.insert(0, initialize_gripper)
+    return entity_initializer.TaskEntitiesInitializer(self._entity_initializers)
diff --git a/src/dm_robotics/panda/parameters.py b/src/dm_robotics/panda/parameters.py
index 750c18b..72bac91 100644
--- a/src/dm_robotics/panda/parameters.py
+++ b/src/dm_robotics/panda/parameters.py
@@ -3,7 +3,8 @@
 from typing import Callable, Optional, Sequence
 
 from dm_control import composer, mjcf
-from dm_robotics.moma import base_task, effector, entity_initializer, robot
+from dm_robotics.moma import (base_task, effector, entity_initializer, robot,
+                              sensor)
 from dm_robotics.moma.models.arenas import empty
 
 from . import arm_constants
@@ -41,7 +42,7 @@ class RobotParams:
 
 
 @dataclasses.dataclass
-class BuilderExtensions:
+class Extensions:
   """Defines callables to extend the subtask environment builder."""
   build_arena: Optional[Callable[[empty.Arena], None]] = None
   build_robots: Optional[Callable[[Sequence[robot.Robot]], None]] = None
@@ -54,6 +55,8 @@ class BuilderExtensions:
   build_extra_effectors: Optional[
       Callable[[Sequence[robot.Robot], composer.Arena],
                Sequence[effector.Effector]]] = None
+  build_extra_sensors: Optional[Callable[
+      [Sequence[robot.Robot], composer.Arena], Sequence[sensor.Sensor]]] = None
 
 
 @dataclasses.dataclass
@@ -61,8 +64,10 @@ class EnvirontmentParameters:
   """Task-level parameters.
 
   Args:
-    arena: MJCF model describing the environment/arena. A copy
-      of this will be included in `dm_robotics.moma.models.arenas.empty.Arena`.
+    mjcf_root: MJCF model of the scene. The Panda robots will be added
+      by :py:class:`dm_robotics.panda.env_builder.PandaEnvironmentBuilder`-
+    arena: Reinforcement learning environment.
     control_timestep: Timestep size of the controlling agent."""
-  arena: Optional[mjcf.RootElement] = None
+  mjcf_root: Optional[mjcf.RootElement] = None
+  arena: Optional[composer.Environment] = None
   control_timestep: float = 0.1
diff --git a/src/dm_robotics/panda/utils.py b/src/dm_robotics/panda/utils.py
index 7ca9f5e..6d44afe 100644
--- a/src/dm_robotics/panda/utils.py
+++ b/src/dm_robotics/panda/utils.py
@@ -151,15 +151,12 @@ def _time(self, physics: mjcf.Physics) -> np.ndarray:
     return np.array([physics.data.time])
 
 
-class PlotComponent(renderer.Component):
-  """ A plotting component for `dm_control.viewer.application.Application`. """
+class Plot(renderer.Component):
 
   def __init__(self,
                runtime: runtime_module.Runtime,
                maxlen: int = 500) -> None:
     self._rt = runtime
-    self._obs_idx = None
-    self._obs_keys = None
     self.maxlen = min(maxlen, mujoco.mjMAXLINEPNT)
     self.maxlines = 0
     self.x = np.linspace(-self.maxlen, 0, self.maxlen)
@@ -169,6 +166,27 @@ def __init__(self,
     self.fig.flg_barplot = 0
     self.fig.flg_selection = 0
     self.fig.range = [[1, 0], [1, 0]]
+    self.fig.linewidth = 1.5
+
+  def reset_data(self):
+    for i in range(self.maxlines):
+      for j in range(self.maxlen):
+        del j
+        self.y[i].append(0)
+
+
+class ObservationPlot(Plot):
+  """
+  Plotting component for :py:class:`dm_control.viewer.application.Application`
+  that allows you to browse through the observations.
+  """
+
+  def __init__(self,
+               runtime: runtime_module.Runtime,
+               maxlen: int = 1000) -> None:
+    super().__init__(runtime, maxlen)
+    self._obs_idx = None
+    self._obs_keys = None
 
   def _init_buffer(self):
     for obs in self._rt._time_step.observation.values():
@@ -180,13 +198,16 @@ def _init_buffer(self):
     self.reset_data()
     self._obs_idx = 0
     self._obs_keys = list(self._rt._time_step.observation.keys())
+    self.update_title()
+
+  def update_title(self):
+    self.fig.title = f'{self._obs_keys[self._obs_idx]:100s}'
 
   def render(self, context, viewport):
     if self._rt._time_step is None:
       return
     if self._obs_idx is None:
       self._init_buffer()
-    pos = mujoco.MjrRect(5, viewport.height - 256 - 5, 256, 256)
     obs = np.atleast_1d(
         self._rt._time_step.observation[self._obs_keys[self._obs_idx]])
     for i in range(self.maxlines):
@@ -197,22 +218,78 @@ def render(self, context, viewport):
                                                           ]).T.reshape((-1,))
       else:
         self.fig.linepnt[i] = 0
-    self.fig.title = f'{self._obs_keys[self._obs_idx]:100s}'
+    pos = mujoco.MjrRect(5, viewport.height - 200 - 5, 300, 200)
     mujoco.mjr_figure(pos, self.fig, context.ptr)
 
-  def reset_data(self):
-    for i in range(self.maxlines):
-      for j in range(self.maxlen):
-        del j
-        self.y[i].append(0)
-
   def next_obs(self):
     self._obs_idx = (self._obs_idx + 1) % len(self._obs_keys)
     self.reset_data()
+    self.update_title()
 
   def prev_obs(self):
     self._obs_idx = (self._obs_idx - 1) % len(self._obs_keys)
     self.reset_data()
+    self.update_title()
+
+
+class ActionPlot(Plot):
+  """
+  A plotting component for :py:class:`dm_control.viewer.application.Application`
+  that plots the agent's actions.
+  """
+
+  def __init__(self,
+               runtime: runtime_module.Runtime,
+               maxlen: int = 1000) -> None:
+    super().__init__(runtime, maxlen)
+    self._init_buffer()
+    self.fig.title = 'Actions'
+
+  def _init_buffer(self):
+    self.maxlines = self._rt._default_action.shape[0]
+    for _1 in range(self.maxlines):
+      self.y.append(deque(maxlen=self.maxlen))
+    self.reset_data()
+
+  def render(self, context, viewport):
+    if self._rt._time_step is None:
+      return
+    for i, a in enumerate(self._rt.last_action):
+      self.fig.linepnt[i] = self.maxlen
+      self.y[i].append(a)
+      self.fig.linedata[i][:self.maxlen * 2] = np.array([self.x,
+                                                         self.y[i]]).T.reshape(
+                                                             (-1,))
+    pos = mujoco.MjrRect(300 + 5, viewport.height - 200 - 5, 300, 200)
+    mujoco.mjr_figure(pos, self.fig, context.ptr)
+
+
+class RewardPlot(Plot):
+  """
+  A plotting component for :py:class:`dm_control.viewer.application.Application`
+  that plots the environment's reward.
+  """
+
+  def __init__(self,
+               runtime: runtime_module.Runtime,
+               maxlen: int = 1000) -> None:
+    super().__init__(runtime, maxlen)
+    self.fig.title = 'Reward'
+    self.maxlines = 1
+    self.y.append(deque(maxlen=self.maxlen))
+    self.reset_data()
+
+  def render(self, context, viewport):
+    if self._rt._time_step is None:
+      return
+    r = self._rt._time_step.reward
+    self.fig.linepnt[0] = self.maxlen
+    self.y[0].append(r)
+    self.fig.linedata[0][:self.maxlen * 2] = np.array([self.x,
+                                                       self.y[0]]).T.reshape(
+                                                           (-1,))
+    pos = mujoco.MjrRect(2 * 300 + 5, viewport.height - 200 - 5, 300, 200)
+    mujoco.mjr_figure(pos, self.fig, context.ptr)
 
 
 class PlotHelp(views.ColumnTextModel):
@@ -235,7 +312,9 @@ def __init__(self, title='Explorer', width=1024, height=768):
 
   def _perform_deferred_reload(self, params):
     super()._perform_deferred_reload(params)
-    cmp = PlotComponent(self._runtime)
+    cmp = ObservationPlot(self._runtime)
     self._renderer.components += cmp
+    self._renderer.components += ActionPlot(self._runtime)
+    self._renderer.components += RewardPlot(self._runtime)
     self._input_map.bind(cmp.next_obs, user_input.KEY_F4)
     self._input_map.bind(cmp.prev_obs, user_input.KEY_F3)