add demo of best models

ll7 · Aug 15, 2023 · 2116eb0 · 2116eb0
1 parent 1be8d59
commit 2116eb0
Show file tree

Hide file tree

Showing 9 changed files with 230 additions and 37 deletions.
diff --git a/README.md b/README.md
@@ -65,7 +65,14 @@ python3 -m pytest tests
 python3 -m pylint robot_sf
 ```
 
-### 5. Run StableBaselines Training (Docker)
+### 5. Run Visual Debugging of Pre-Trained Demo Models
+
+```sh
+python3 demo_offensive.py
+python3 demo_defensive.py
+```
+
+### 6. Run StableBaselines Training (Docker)
 
 ```sh
 docker-compose build && docker-compose run \
@@ -74,7 +81,7 @@ docker-compose build && docker-compose run \
 
 *Note: See [this setup](./docs/GPU_SETUP.md) to install Docker with GPU support.*
 
-### 6. Edit Maps
+### 7. Edit Maps
 
 ```sh
 sudo apt-get update && sudo apt-get install -y python3-tk
@@ -86,25 +93,6 @@ python3 -m map_editor
 
 *Note: See [this documentation](./docs/MAP_EDITOR_USAGE.md) on how to use the map editor.*
 
-### 7. Visual Debugging
-
-```sh
-cp scripts/debug_random_policy.py .
-python3 debug_random_policy.py
-```
-
-```sh
-mkdir model
-pushd model;
-    wget https://megastore.rz.uni-augsburg.de/get/0oxVNm_Boo/ -O ppo_model.zip
-popd
-```
-
-```sh
-cp scripts/debug_trained_policy.py .
-python3 debug_trained_policy.py
-```
-
 ### 8. Optimize Training Hyperparams (Docker)
 
 ```sh

diff --git a/demo_defensive.py b/demo_defensive.py
@@ -0,0 +1,169 @@
+import numpy as np
+from gym import spaces
+from stable_baselines3 import PPO
+
+from robot_sf.robot_env import RobotEnv, OBS_DRIVE_STATE, OBS_RAYS
+from robot_sf.sim_config import EnvSettings
+from robot_sf.sim.sim_config import SimulationSettings
+from robot_sf.robot.differential_drive import DifferentialDriveSettings
+
+
+def training():
+    env_config = EnvSettings(
+        sim_config=SimulationSettings(stack_steps=1, difficulty=0, ped_density_by_difficulty=[0.06]),
+        robot_config=DifferentialDriveSettings(radius=1.0))
+    env = RobotEnv(env_config, debug=True)
+    env.observation_space, env.action_space = prepare_gym_spaces()
+    model = PPO.load("./model/run_023", env=env)
+
+    def obs_adapter(orig_obs):
+        drive_state = orig_obs[OBS_DRIVE_STATE]
+        ray_state = orig_obs[OBS_RAYS]
+        drive_state = drive_state[:, :-1]
+        drive_state[:, 2] *= 10
+        drive_state = np.squeeze(drive_state)
+        ray_state = np.squeeze(ray_state)
+        return np.concatenate((ray_state, drive_state), axis=0)
+
+    obs = env.reset()
+    for _ in range(10000):
+        obs = obs_adapter(obs)
+        action, _ = model.predict(obs, deterministic=True)
+        obs, _, done, _ = env.step(action)
+        env.render()
+
+        if done:
+            obs = env.reset()
+            env.render()
+    env.exit()
+
+
+def prepare_gym_spaces():
+    obs_low = np.array([ 0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,          0. ,         0.,          0.,
+                         0.,          0.,          0.,         -0.5,         0.,         -3.14159265])
+
+
+    obs_high = np.array([1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
+                         2.00000000e+00, 5.00000000e-01, 1.07480231e+03, 3.14159265e+00])
+
+    action_low = np.array([-2.0, -0.5])
+    action_high = np.array([2.0, 0.5])
+
+    obs_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float64)
+    action_space = spaces.Box(low=action_low, high=action_high, dtype=np.float64)
+    return obs_space, action_space
+
+
+if __name__ == '__main__':
+    training()
diff --git a/demo_offensive.py b/demo_offensive.py
@@ -0,0 +1,28 @@
+from stable_baselines3 import PPO
+from robot_sf.robot_env import RobotEnv
+from robot_sf.sim_config import EnvSettings
+from robot_sf.sim.sim_config import SimulationSettings
+from robot_sf.robot.bicycle_drive import BicycleDriveSettings
+
+
+def training():
+    env_config = EnvSettings(
+        sim_config=SimulationSettings(difficulty=0, ped_density_by_difficulty=[0.06]),
+        robot_config=BicycleDriveSettings(radius=0.5, max_accel=3.0, allow_backwards=True))
+    env = RobotEnv(env_config, debug=True)
+    model = PPO.load("./model/run_043", env=env)
+
+    obs = env.reset()
+    for _ in range(10000):
+        action, _ = model.predict(obs, deterministic=True)
+        obs, _, done, _ = env.step(action)
+        env.render()
+
+        if done:
+            obs = env.reset()
+            env.render()
+    env.exit()
+
+
+if __name__ == '__main__':
+    training()
diff --git a/model/run_023.zip b/model/run_023.zip
diff --git a/model/run_043.zip b/model/run_043.zip
diff --git a/robot_sf/feature_extractor.py b/robot_sf/feature_extractor.py
@@ -16,9 +16,9 @@ class DynamicsExtractor(BaseFeaturesExtractor):
     def __init__(
             self, observation_space: spaces.Dict,
             use_ray_conv: bool=True,
-            num_filters: List[int]=[256, 256, 64, 32],
-            kernel_sizes: List[int]=[5, 5, 5, 3],
-            dropout_rates: List[float]=[0.1, 0.1, 0.3, 0.3]):
+            num_filters: List[int]=[64, 16, 16, 16],
+            kernel_sizes: List[int]=[3, 3, 3, 3],
+            dropout_rates: List[float]=[0.3, 0.3, 0.3, 0.3]):
         rays_space: spaces.Box = observation_space.spaces[OBS_RAYS]
         drive_state_space: spaces.Box = observation_space.spaces[OBS_DRIVE_STATE]
         drive_state_features = np.prod(drive_state_space.shape)

diff --git a/robot_sf/nav/navigation.py b/robot_sf/nav/navigation.py
@@ -1,5 +1,5 @@
 from random import sample, randint
-from math import dist
+from math import dist, atan2
 from dataclasses import dataclass, field
 from typing import List, Tuple, Optional
 
@@ -32,6 +32,11 @@ def next_waypoint(self) -> Optional[Vec2D]:
         return self.waypoints[self.waypoint_id + 1] \
             if self.waypoint_id + 1 < len(self.waypoints) else None
 
+    @property
+    def initial_orientation(self) -> float:
+        return atan2(self.waypoints[1][1] - self.waypoints[0][1],
+                     self.waypoints[1][0] - self.waypoints[0][0])
+
     def update_position(self, pos: Vec2D):
         reached_waypoint = dist(self.current_waypoint, pos) <= self.proximity_threshold
         if reached_waypoint:
@@ -49,7 +54,7 @@ def sample_route(
         spawn_id: Optional[int]=None) -> List[Vec2D]:
 
     spawn_id = spawn_id if spawn_id is not None else \
-        randint(0, len(map_def.robot_spawn_zones) - 1)
+        randint(0, map_def.num_start_pos - 1)
     routes = map_def.robot_routes_by_spawn_id[spawn_id]
     route = sample(routes, k=1)[0]
     initial_spawn = sample_zone(route.spawn_zone, 1)[0]

diff --git a/robot_sf/robot_env.py b/robot_sf/robot_env.py
@@ -142,15 +142,20 @@ def simple_reward(
     return reward
 
 
-def init_simulators(env_config: EnvSettings, map_def: MapDefinition, num_robots: int = 1):
+def init_simulators(
+        env_config: EnvSettings, map_def: MapDefinition,
+        num_robots: int = 1, random_start_pos: bool = True) -> List[Simulator]:
     num_sims = ceil(num_robots / map_def.num_start_pos)
     goal_proximity = env_config.robot_config.radius + env_config.sim_config.goal_radius
     sims: List[Simulator] = []
 
     for i in range(num_sims):
         n = map_def.num_start_pos if i < num_sims - 1 else num_robots % map_def.num_start_pos
         sim_robots = [env_config.robot_factory() for _ in range(n)]
-        sims.append(Simulator(env_config.sim_config, map_def, sim_robots, goal_proximity))
+        sim = Simulator(
+            env_config.sim_config, map_def, sim_robots,
+            goal_proximity, random_start_pos)
+        sims.append(sim)
 
     return sims
 
@@ -207,7 +212,7 @@ def __init__(
         self.action_space, self.observation_space, orig_obs_space = init_spaces(env_config, map_def)
 
         self.reward_func, self.debug = reward_func, debug
-        self.simulator = init_simulators(env_config, map_def)[0]
+        self.simulator = init_simulators(env_config, map_def, random_start_pos=True)[0]
         d_t = env_config.sim_config.time_per_step_in_secs
         max_ep_time = env_config.sim_config.sim_time_in_secs
 
@@ -217,7 +222,7 @@ def __init__(
         self.last_action = None
         if debug:
             self.sim_ui = SimulationView(
-                scaling=6,
+                scaling=10,
                 obstacles=map_def.obstacles,
                 robot_radius=env_config.robot_config.radius,
                 ped_radius=env_config.sim_config.ped_radius,
@@ -275,7 +280,7 @@ def __init__(
             dtype=self.single_action_space.low.dtype)
 
         self.reward_func, self.debug = reward_func, debug
-        self.simulators = init_simulators(env_config, map_def, num_robots)
+        self.simulators = init_simulators(env_config, map_def, num_robots, random_start_pos=False)
         self.states: List[RobotState] = []
         d_t = env_config.sim_config.time_per_step_in_secs
         max_ep_time = env_config.sim_config.sim_time_in_secs

diff --git a/robot_sf/sim/simulator.py b/robot_sf/sim/simulator.py
@@ -1,6 +1,5 @@
-from math import atan2
 from dataclasses import dataclass, field
-from typing import List, Tuple, Union, Callable
+from typing import List, Tuple, Union
 
 from pysocialforce import Simulator as PySFSimulator
 from pysocialforce.simulator import make_forces as pysf_make_forces
@@ -31,9 +30,9 @@ class Simulator:
     map_def: MapDefinition
     robots: List[Robot]
     goal_proximity_threshold: float
+    random_start_pos: bool
     robot_navs: List[RouteNavigator] = field(init=False)
     pysf_sim: PySFSimulator = field(init=False)
-    sample_route: Callable[[], List[Vec2D]] = field(init=False)
     pysf_state: PedestrianStates = field(init=False)
     groups: PedestrianGroupings = field(init=False)
     peds_behaviors: List[PedestrianBehavior] = field(init=False)
@@ -90,10 +89,9 @@ def reset_state(self):
             collision = not nav.reached_waypoint
             is_at_final_goal = nav.reached_destination
             if collision or is_at_final_goal:
-                waypoints = sample_route(self.map_def, i)
+                waypoints = sample_route(self.map_def, None if self.random_start_pos else i)
                 nav.new_route(waypoints[1:])
-                new_orient = atan2(waypoints[1][1] - waypoints[0][1], waypoints[1][0] - waypoints[0][0])
-                robot.reset_state((waypoints[0], new_orient))
+                robot.reset_state((waypoints[0], nav.initial_orientation))
 
     def step_once(self, actions: List[RobotAction]):
         for behavior in self.peds_behaviors: