Skip to content

Commit

Permalink
add demo of best models
Browse files Browse the repository at this point in the history
  • Loading branch information
Bonifatius94 committed Aug 15, 2023
1 parent 1be8d59 commit 2116eb0
Show file tree
Hide file tree
Showing 9 changed files with 230 additions and 37 deletions.
30 changes: 9 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,14 @@ python3 -m pytest tests
python3 -m pylint robot_sf
```

### 5. Run StableBaselines Training (Docker)
### 5. Run Visual Debugging of Pre-Trained Demo Models

```sh
python3 demo_offensive.py
python3 demo_defensive.py
```

### 6. Run StableBaselines Training (Docker)

```sh
docker-compose build && docker-compose run \
Expand All @@ -74,7 +81,7 @@ docker-compose build && docker-compose run \

*Note: See [this setup](./docs/GPU_SETUP.md) to install Docker with GPU support.*

### 6. Edit Maps
### 7. Edit Maps

```sh
sudo apt-get update && sudo apt-get install -y python3-tk
Expand All @@ -86,25 +93,6 @@ python3 -m map_editor

*Note: See [this documentation](./docs/MAP_EDITOR_USAGE.md) on how to use the map editor.*

### 7. Visual Debugging

```sh
cp scripts/debug_random_policy.py .
python3 debug_random_policy.py
```

```sh
mkdir model
pushd model;
wget https://megastore.rz.uni-augsburg.de/get/0oxVNm_Boo/ -O ppo_model.zip
popd
```

```sh
cp scripts/debug_trained_policy.py .
python3 debug_trained_policy.py
```

### 8. Optimize Training Hyperparams (Docker)

```sh
Expand Down
169 changes: 169 additions & 0 deletions demo_defensive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import numpy as np
from gym import spaces
from stable_baselines3 import PPO

from robot_sf.robot_env import RobotEnv, OBS_DRIVE_STATE, OBS_RAYS
from robot_sf.sim_config import EnvSettings
from robot_sf.sim.sim_config import SimulationSettings
from robot_sf.robot.differential_drive import DifferentialDriveSettings


def training():
env_config = EnvSettings(
sim_config=SimulationSettings(stack_steps=1, difficulty=0, ped_density_by_difficulty=[0.06]),
robot_config=DifferentialDriveSettings(radius=1.0))
env = RobotEnv(env_config, debug=True)
env.observation_space, env.action_space = prepare_gym_spaces()
model = PPO.load("./model/run_023", env=env)

def obs_adapter(orig_obs):
drive_state = orig_obs[OBS_DRIVE_STATE]
ray_state = orig_obs[OBS_RAYS]
drive_state = drive_state[:, :-1]
drive_state[:, 2] *= 10
drive_state = np.squeeze(drive_state)
ray_state = np.squeeze(ray_state)
return np.concatenate((ray_state, drive_state), axis=0)

obs = env.reset()
for _ in range(10000):
obs = obs_adapter(obs)
action, _ = model.predict(obs, deterministic=True)
obs, _, done, _ = env.step(action)
env.render()

if done:
obs = env.reset()
env.render()
env.exit()


def prepare_gym_spaces():
obs_low = np.array([ 0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., 0. , 0., 0.,
0., 0., 0., -0.5, 0., -3.14159265])


obs_high = np.array([1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.00000000e+01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
2.00000000e+00, 5.00000000e-01, 1.07480231e+03, 3.14159265e+00])

action_low = np.array([-2.0, -0.5])
action_high = np.array([2.0, 0.5])

obs_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float64)
action_space = spaces.Box(low=action_low, high=action_high, dtype=np.float64)
return obs_space, action_space


if __name__ == '__main__':
training()
28 changes: 28 additions & 0 deletions demo_offensive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from stable_baselines3 import PPO
from robot_sf.robot_env import RobotEnv
from robot_sf.sim_config import EnvSettings
from robot_sf.sim.sim_config import SimulationSettings
from robot_sf.robot.bicycle_drive import BicycleDriveSettings


def training():
env_config = EnvSettings(
sim_config=SimulationSettings(difficulty=0, ped_density_by_difficulty=[0.06]),
robot_config=BicycleDriveSettings(radius=0.5, max_accel=3.0, allow_backwards=True))
env = RobotEnv(env_config, debug=True)
model = PPO.load("./model/run_043", env=env)

obs = env.reset()
for _ in range(10000):
action, _ = model.predict(obs, deterministic=True)
obs, _, done, _ = env.step(action)
env.render()

if done:
obs = env.reset()
env.render()
env.exit()


if __name__ == '__main__':
training()
Binary file added model/run_023.zip
Binary file not shown.
Binary file added model/run_043.zip
Binary file not shown.
6 changes: 3 additions & 3 deletions robot_sf/feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ class DynamicsExtractor(BaseFeaturesExtractor):
def __init__(
self, observation_space: spaces.Dict,
use_ray_conv: bool=True,
num_filters: List[int]=[256, 256, 64, 32],
kernel_sizes: List[int]=[5, 5, 5, 3],
dropout_rates: List[float]=[0.1, 0.1, 0.3, 0.3]):
num_filters: List[int]=[64, 16, 16, 16],
kernel_sizes: List[int]=[3, 3, 3, 3],
dropout_rates: List[float]=[0.3, 0.3, 0.3, 0.3]):
rays_space: spaces.Box = observation_space.spaces[OBS_RAYS]
drive_state_space: spaces.Box = observation_space.spaces[OBS_DRIVE_STATE]
drive_state_features = np.prod(drive_state_space.shape)
Expand Down
9 changes: 7 additions & 2 deletions robot_sf/nav/navigation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from random import sample, randint
from math import dist
from math import dist, atan2
from dataclasses import dataclass, field
from typing import List, Tuple, Optional

Expand Down Expand Up @@ -32,6 +32,11 @@ def next_waypoint(self) -> Optional[Vec2D]:
return self.waypoints[self.waypoint_id + 1] \
if self.waypoint_id + 1 < len(self.waypoints) else None

@property
def initial_orientation(self) -> float:
return atan2(self.waypoints[1][1] - self.waypoints[0][1],
self.waypoints[1][0] - self.waypoints[0][0])

def update_position(self, pos: Vec2D):
reached_waypoint = dist(self.current_waypoint, pos) <= self.proximity_threshold
if reached_waypoint:
Expand All @@ -49,7 +54,7 @@ def sample_route(
spawn_id: Optional[int]=None) -> List[Vec2D]:

spawn_id = spawn_id if spawn_id is not None else \
randint(0, len(map_def.robot_spawn_zones) - 1)
randint(0, map_def.num_start_pos - 1)
routes = map_def.robot_routes_by_spawn_id[spawn_id]
route = sample(routes, k=1)[0]
initial_spawn = sample_zone(route.spawn_zone, 1)[0]
Expand Down
15 changes: 10 additions & 5 deletions robot_sf/robot_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,20 @@ def simple_reward(
return reward


def init_simulators(env_config: EnvSettings, map_def: MapDefinition, num_robots: int = 1):
def init_simulators(
env_config: EnvSettings, map_def: MapDefinition,
num_robots: int = 1, random_start_pos: bool = True) -> List[Simulator]:
num_sims = ceil(num_robots / map_def.num_start_pos)
goal_proximity = env_config.robot_config.radius + env_config.sim_config.goal_radius
sims: List[Simulator] = []

for i in range(num_sims):
n = map_def.num_start_pos if i < num_sims - 1 else num_robots % map_def.num_start_pos
sim_robots = [env_config.robot_factory() for _ in range(n)]
sims.append(Simulator(env_config.sim_config, map_def, sim_robots, goal_proximity))
sim = Simulator(
env_config.sim_config, map_def, sim_robots,
goal_proximity, random_start_pos)
sims.append(sim)

return sims

Expand Down Expand Up @@ -207,7 +212,7 @@ def __init__(
self.action_space, self.observation_space, orig_obs_space = init_spaces(env_config, map_def)

self.reward_func, self.debug = reward_func, debug
self.simulator = init_simulators(env_config, map_def)[0]
self.simulator = init_simulators(env_config, map_def, random_start_pos=True)[0]
d_t = env_config.sim_config.time_per_step_in_secs
max_ep_time = env_config.sim_config.sim_time_in_secs

Expand All @@ -217,7 +222,7 @@ def __init__(
self.last_action = None
if debug:
self.sim_ui = SimulationView(
scaling=6,
scaling=10,
obstacles=map_def.obstacles,
robot_radius=env_config.robot_config.radius,
ped_radius=env_config.sim_config.ped_radius,
Expand Down Expand Up @@ -275,7 +280,7 @@ def __init__(
dtype=self.single_action_space.low.dtype)

self.reward_func, self.debug = reward_func, debug
self.simulators = init_simulators(env_config, map_def, num_robots)
self.simulators = init_simulators(env_config, map_def, num_robots, random_start_pos=False)
self.states: List[RobotState] = []
d_t = env_config.sim_config.time_per_step_in_secs
max_ep_time = env_config.sim_config.sim_time_in_secs
Expand Down
10 changes: 4 additions & 6 deletions robot_sf/sim/simulator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from math import atan2
from dataclasses import dataclass, field
from typing import List, Tuple, Union, Callable
from typing import List, Tuple, Union

from pysocialforce import Simulator as PySFSimulator
from pysocialforce.simulator import make_forces as pysf_make_forces
Expand Down Expand Up @@ -31,9 +30,9 @@ class Simulator:
map_def: MapDefinition
robots: List[Robot]
goal_proximity_threshold: float
random_start_pos: bool
robot_navs: List[RouteNavigator] = field(init=False)
pysf_sim: PySFSimulator = field(init=False)
sample_route: Callable[[], List[Vec2D]] = field(init=False)
pysf_state: PedestrianStates = field(init=False)
groups: PedestrianGroupings = field(init=False)
peds_behaviors: List[PedestrianBehavior] = field(init=False)
Expand Down Expand Up @@ -90,10 +89,9 @@ def reset_state(self):
collision = not nav.reached_waypoint
is_at_final_goal = nav.reached_destination
if collision or is_at_final_goal:
waypoints = sample_route(self.map_def, i)
waypoints = sample_route(self.map_def, None if self.random_start_pos else i)
nav.new_route(waypoints[1:])
new_orient = atan2(waypoints[1][1] - waypoints[0][1], waypoints[1][0] - waypoints[0][0])
robot.reset_state((waypoints[0], new_orient))
robot.reset_state((waypoints[0], nav.initial_orientation))

def step_once(self, actions: List[RobotAction]):
for behavior in self.peds_behaviors:
Expand Down

0 comments on commit 2116eb0

Please sign in to comment.