feat: add network file and adapt to anymal_d

AuTURBO · mqjinwon · Oct 16, 2024 · Sep 7, 2024 · Sep 7, 2024 · Sep 7, 2024
commit 6311c204f6d04319dc6232a5eec86d98c1f78f37
diff --git a/exts/StrideSim/StrideSim/anymal_articulation.py b/exts/StrideSim/StrideSim/anymal_articulation.py
@@ -1,12 +1,3 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved.
-#
-# NVIDIA CORPORATION and its licensors retain all intellectual property
-# and proprietary rights in and to this software, related documentation
-# and any modifications thereto. Any use, reproduction, disclosure or
-# distribution of this software and related documentation without an express
-# license agreement from NVIDIA CORPORATION is strictly prohibited.
-#
-
 import io
 import numpy as np
 import sys
@@ -20,8 +11,8 @@
 from omni.isaac.core.utils.prims import define_prim, get_prim_at_path
 from omni.isaac.core.utils.rotations import euler_to_rot_matrix, quat_to_euler_angles, quat_to_rot_matrix
 from omni.isaac.core.utils.stage import get_current_stage
+from omni.isaac.core.utils.types import ArticulationAction
 from omni.isaac.nucleus import get_assets_root_path
-from omni.isaac.quadruped.utils import LstmSeaNetwork
 
 from StrideSim.settings import ISAACLAB_LAB, ISAACLAB_LAB_ASSETS, ISAACLAB_LAB_TASKS
 
@@ -35,21 +26,19 @@
 from omni.isaac.lab.utils.assets import ISAACLAB_NUCLEUS_DIR
 
 
-class AnymalD_Atriculation(Articulation):
-    """The ANYmal quadruped"""
+class AnymalD_Atriculation:
+    """The AnymalD quadruped"""
 
     def __init__(
         self,
         prim_path: str,
-        name: str = "anymal",
+        name: str = "anymalD",
         usd_path: Optional[str] = None,
         position: Optional[np.ndarray] = None,
         orientation: Optional[np.ndarray] = None,
     ) -> None:
         """
-        [Summary]
-
-        initialize robot, set up sensors and controller
+        Initialize robot and load RL policy.
 
         Args:
             prim_path {str} -- prim path of the robot on the stage
@@ -74,37 +63,36 @@ def __init__(
 
                 asset_path = f"{ISAACLAB_NUCLEUS_DIR}/Robots/ANYbotics/ANYmal-D/anymal_d.usd"
 
-                carb.log_warn("asset path is: " + asset_path)
                 prim.GetReferences().AddReference(asset_path)
 
-        super().__init__(prim_path=self._prim_path, name=name, position=position, orientation=orientation)
+        self.robot = Articulation(
+            prim_path=self._prim_path,
+            name=name,
+            position=position,
+            orientation=orientation,
+        )
 
         self._dof_control_modes: List[int] = list()
 
+        # print("assets_root_path: ", assets_root_path)
+
         # Policy
-        file_content = omni.client.read_file(assets_root_path + "/Isaac/Samples/Quadruped/Anymal_Policies/policy_1.pt")[
-            2
-        ]
+        file_content = omni.client.read_file(
+            "/home/jin/Documents/StrideSim/exts/StrideSim/StrideSim/network/policy.pt"
+        )[2]
         file = io.BytesIO(memoryview(file_content).tobytes())
 
         self._policy = torch.jit.load(file)
-        self._base_vel_lin_scale = 2.0
-        self._base_vel_ang_scale = 0.25
-        self._joint_pos_scale = 1.0
-        self._joint_vel_scale = 0.05
+        self._base_vel_lin_scale = 1
+        self._base_vel_ang_scale = 1
         self._action_scale = 0.5
-        self._default_joint_pos = np.array([0.0, 0.4, -0.8, 0.0, -0.4, 0.8, -0.0, 0.4, -0.8, -0.0, -0.4, 0.8])
+        self._default_joint_pos = np.array(
+            # [0.0, 0.4, -0.8, 0.0, -0.4, 0.8, -0.0, 0.4, -0.8, -0.0, -0.4, 0.8]
+            [0.0, 0.0, 0.0, 0.0, 0.4, -0.4, 0.4, -0.4, -0.8, 0.8, -0.8, 0.8]
+        )
         self._previous_action = np.zeros(12)
         self._policy_counter = 0
-
-        # Actuator network
-        file_content = omni.client.read_file(
-            assets_root_path + "/Isaac/Samples/Quadruped/Anymal_Policies/sea_net_jit2.pt"
-        )[2]
-        file = io.BytesIO(memoryview(file_content).tobytes())
-        self._actuator_network = LstmSeaNetwork()
-        self._actuator_network.setup(file, self._default_joint_pos)
-        self._actuator_network.reset()
+        self._decimation = 4
 
         # Height scaner
         y = np.arange(-0.5, 0.6, 0.1)
@@ -123,9 +111,8 @@ def _hit_report_callback(self, hit):
         return True
 
     def _compute_observation(self, command):
-        """[summary]
-
-        compute the observation vector for the policy
+        """
+        Compute the observation vector for the policy
 
         Argument:
         command {np.ndarray} -- the robot command (v_x, v_y, w_z)
@@ -134,12 +121,20 @@ def _compute_observation(self, command):
         np.ndarray -- The observation vector.
 
         """
-        lin_vel_I = self.get_linear_velocity()
-        ang_vel_I = self.get_angular_velocity()
-        pos_IB, q_IB = self.get_world_pose()
+        lin_vel_I = self.robot.get_linear_velocity()
+        ang_vel_I = self.robot.get_angular_velocity()
+        pos_IB, q_IB = self.robot.get_world_pose()
 
         R_IB = quat_to_rot_matrix(q_IB)
         R_BI = R_IB.transpose()
+
+        print("lin_vel_I: ", lin_vel_I)
+        print("ang_vel_I: ", ang_vel_I)
+        print("pos_IB: ", pos_IB)
+        print("q_IB: ", q_IB)
+        print("R_IB: ", R_IB)
+        print("R_BI: ", R_BI)
+
         lin_vel_b = np.matmul(R_BI, lin_vel_I)
         ang_vel_b = np.matmul(R_BI, ang_vel_I)
         gravity_b = np.matmul(R_BI, np.array([0.0, 0.0, -1.0]))
@@ -156,24 +151,12 @@ def _compute_observation(self, command):
         obs[10] = self._base_vel_lin_scale * command[1]
         obs[11] = self._base_vel_ang_scale * command[2]
         # Joint states
-        # joint_state from the DC interface now has the order of
-        # 'FL_hip_joint',   'FR_hip_joint',   'RL_hip_joint',   'RR_hip_joint',
-        # 'FL_thigh_joint', 'FR_thigh_joint', 'RL_thigh_joint', 'RR_thigh_joint',
-        # 'FL_calf_joint',  'FR_calf_joint',  'RL_calf_joint',  'RR_calf_joint'
-
-        # while the learning controller uses the order of
-        # FL_hip_joint FL_thigh_joint FL_calf_joint
-        # FR_hip_joint FR_thigh_joint FR_calf_joint
-        # RL_hip_joint RL_thigh_joint RL_calf_joint
-        # RR_hip_joint RR_thigh_joint RR_calf_joint
-        # Convert DC order to controller order for joint info
-        current_joint_pos = self.get_joint_positions()
-        current_joint_vel = self.get_joint_velocities()
-        current_joint_pos = np.array(current_joint_pos.reshape([3, 4]).T.flat)
-        current_joint_vel = np.array(current_joint_vel.reshape([3, 4]).T.flat)
-        obs[12:24] = self._joint_pos_scale * (current_joint_pos - self._default_joint_pos)
-        obs[24:36] = self._joint_vel_scale * current_joint_vel
+        current_joint_pos = self.robot.get_joint_positions()
+        current_joint_vel = self.robot.get_joint_velocities()
+        obs[12:24] = current_joint_pos - self._default_joint_pos
+        obs[24:36] = current_joint_vel
 
+        # Previous Action
         obs[36:48] = self._previous_action
 
         # height_scanner
@@ -186,69 +169,54 @@ def _compute_observation(self, command):
         for i in range(world_scan_points.shape[0]):
             self._query_info.clear()
             self.physx_query_interface.raycast_all(
-                tuple(world_scan_points[i]), (0.0, 0.0, -1.0), 100, self._hit_report_callback
+                tuple(world_scan_points[i]),
+                (0.0, 0.0, -1.0),
+                100,
+                self._hit_report_callback,
             )
             if self._query_info:
                 distance = min(self._query_info)
                 obs[48 + i] = np.clip(distance - 0.5, -1.0, 1.0)
             else:
-                print("No hit")
+                pass
+                # print("No hit")
         return obs
 
     def advance(self, dt, command):
-        """[summary]
-
-        compute the desired torques and apply them to the articulation
+        """
+        Compute the desired torques and apply them to the articulation
 
         Argument:
         dt {float} -- Timestep update in the world.
         command {np.ndarray} -- the robot command (v_x, v_y, w_z)
 
         """
-        if self._policy_counter % 4 == 0:
+        if self._policy_counter % self._decimation == 0:
             obs = self._compute_observation(command)
             with torch.no_grad():
                 obs = torch.from_numpy(obs).view(1, -1).float()
                 self.action = self._policy(obs).detach().view(-1).numpy()
             self._previous_action = self.action.copy()
 
-        # joint_state from the DC interface now has the order of
-        # 'FL_hip_joint',   'FR_hip_joint',   'RL_hip_joint',   'RR_hip_joint',
-        # 'FL_thigh_joint', 'FR_thigh_joint', 'RL_thigh_joint', 'RR_thigh_joint',
-        # 'FL_calf_joint',  'FR_calf_joint',  'RL_calf_joint',  'RR_calf_joint'
-
-        # while the learning controller uses the order of
-        # FL_hip_joint FL_thigh_joint FL_calf_joint
-        # FR_hip_joint FR_thigh_joint FR_calf_joint
-        # RL_hip_joint RL_thigh_joint RL_calf_joint
-        # RR_hip_joint RR_thigh_joint RR_calf_joint
-        # Convert DC order to controller order for joint info
-        current_joint_pos = self.get_joint_positions()
-        current_joint_vel = self.get_joint_velocities()
-        current_joint_pos = np.array(current_joint_pos.reshape([3, 4]).T.flat)
-        current_joint_vel = np.array(current_joint_vel.reshape([3, 4]).T.flat)
-        joint_torques, _ = self._actuator_network.compute_torques(
-            current_joint_pos, current_joint_vel, self._action_scale * self.action
-        )
+        action = ArticulationAction(joint_positions=self._default_joint_pos + (self.action * self._action_scale))
 
-        # finally convert controller order to DC order for command torque
-        torque_reorder = np.array(joint_torques.reshape([4, 3]).T.flat)
-        self.set_joint_efforts(torque_reorder)
+        print("network action: ", self.action)
+
+        self.robot.apply_action(action)
 
         self._policy_counter += 1
 
     def initialize(self, physics_sim_view=None) -> None:
-        """[summary]
-
-        initialize the articulation interface, set up drive mode
         """
-        super().initialize(physics_sim_view=physics_sim_view)
-        self.get_articulation_controller().set_effort_modes("force")
-        self.get_articulation_controller().switch_control_mode("effort")
+        Initialize robot the articulation interface, set up drive mode
+        """
+        self.robot.initialize(physics_sim_view=physics_sim_view)
+        self.robot.get_articulation_controller().set_effort_modes("force")
+        self.robot.get_articulation_controller().switch_control_mode("position")
+        self.robot._articulation_view.set_gains(np.zeros(12) + 200, np.zeros(12) + 5)
 
     def post_reset(self) -> None:
-        """[summary]
-
-        post reset articulation
         """
-        super().post_reset()
+        Post reset articulation
+        """
+        self.robot.post_reset()