Skip to content

Commit

Permalink
- update learning rate out of gradient steps to avoid redundant update
Browse files Browse the repository at this point in the history
- add learning rate sheduler to config
  • Loading branch information
kim-mskw committed Nov 20, 2024
1 parent 35bfbfa commit bc2db86
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 11 deletions.
26 changes: 17 additions & 9 deletions assume/reinforcement_learning/algorithms/matd3.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,21 @@ def update_policy(self):

logger.debug("Updating Policy")
n_rl_agents = len(self.learning_role.rl_strats.keys())

# Update noise decay and leanrng rate
updated_noise_decay = self.learning_role.noise_schedule(
self.learning_role.get_progress_remaining()
)

#loop again over all units to avoid update call for every gradient step, as it will be ambiguous
for u_id, unit_strategy in self.learning_role.rl_strats.items():
critic = self.learning_role.critics[u_id]
actor = self.learning_role.rl_strats[u_id].actor
unit_strategy.action_noise.update_noise_decay(updated_noise_decay)
# Update learning rate
self.update_learning_rate([critic.optimizer, actor.optimizer])


for _ in range(self.gradient_steps):
self.n_updates += 1
i = 0
Expand All @@ -408,9 +423,7 @@ def update_policy(self):
actor = self.learning_role.rl_strats[u_id].actor
actor_target = self.learning_role.rl_strats[u_id].actor_target

# Update learning rate
self.update_learning_rate([critic.optimizer, actor.optimizer])


if i % 100 == 0:
# only update target networks every 100 steps, to have delayed network update
transitions = self.learning_role.buffer.sample(self.batch_size)
Expand Down Expand Up @@ -531,9 +544,4 @@ def update_policy(self):
)
i += 1

# Update noise decay
updated_noise_decay = self.learning_role.noise_schedule(
self.learning_role.get_progress_remaining()
)
for unit_strategy in self.learning_role.rl_strats.values():
unit_strategy.action_noise.update_noise_decay(updated_noise_decay)

1 change: 1 addition & 0 deletions assume/reinforcement_learning/learning_role.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def __init__(
if use_lr_schedule:
self.lr_schedule = get_schedule_fn(linear_schedule(self.learning_rate))
else:
#linear schedule as no config item stating it should be sheduled is present
self.lr_schedule = get_schedule_fn(self.learning_rate)

noise_dt = learning_config.get("noise_dt", 1)
Expand Down
6 changes: 4 additions & 2 deletions examples/inputs/example_02a/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ tiny:
actor_architecture: mlp
learning_rate: 0.001
training_episodes: 10
episodes_collecting_initial_experience: 3
episodes_collecting_initial_experience: 1
train_freq: 24h
gradient_steps: -1
batch_size: 64
gamma: 0.99
device: cpu
use_lr_schedule: True
noise_sigma: 0.1
noise_scale: 1
noise_dt: 1
Expand Down Expand Up @@ -62,11 +63,12 @@ base:
learning_rate: 0.001
training_episodes: 50
episodes_collecting_initial_experience: 5
train_freq: 24h
train_freq: 12h
gradient_steps: -1
batch_size: 256
gamma: 0.99
device: cpu
use_lr_schedule: True
noise_sigma: 0.1
noise_scale: 1
noise_dt: 1
Expand Down

0 comments on commit bc2db86

Please sign in to comment.