Skip to content

Commit

Permalink
target_entropy to -np.prod
Browse files Browse the repository at this point in the history
  • Loading branch information
beardyFace committed Nov 5, 2024
1 parent 13f85cf commit 291eaf7
Show file tree
Hide file tree
Showing 9 changed files with 9 additions and 9 deletions.
3 changes: 2 additions & 1 deletion cares_reinforcement_learning/algorithm/mbrl/DynaSAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def __init__(
self.policy_update_freq = config.policy_update_freq
self.target_update_freq = config.target_update_freq

self.target_entropy = -np.prod(self.actor_net.num_actions)

self.actor_net_optimiser = torch.optim.Adam(
self.actor_net.parameters(), lr=config.actor_lr
)
Expand All @@ -61,7 +63,6 @@ def __init__(
# Set to initial alpha to 1.0 according to other baselines.
self.log_alpha = torch.tensor(np.log(1.0)).to(device)
self.log_alpha.requires_grad = True
self.target_entropy = -self.action_num
self.log_alpha_optimizer = torch.optim.Adam(
[self.log_alpha], lr=config.alpha_lr
)
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/LA3PSAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(
self.learn_counter = 0
self.target_update_freq = config.target_update_freq

self.target_entropy = -self.actor_net.num_actions
self.target_entropy = -np.prod(self.actor_net.num_actions)

self.actor_net_optimiser = torch.optim.Adam(
self.actor_net.parameters(), lr=config.actor_lr
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/LAPSAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(
self.policy_update_freq = config.policy_update_freq
self.target_update_freq = config.target_update_freq

self.target_entropy = -self.actor_net.num_actions
self.target_entropy = -np.prod(self.actor_net.num_actions)

self.actor_net_optimiser = torch.optim.Adam(
self.actor_net.parameters(), lr=config.actor_lr
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/MAPERSAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(
self.policy_update_freq = config.policy_update_freq
self.target_update_freq = config.target_update_freq

self.target_entropy = -self.actor_net.num_actions
self.target_entropy = -np.prod(self.actor_net.num_actions)

# MAPER-PER parameters
self.scale_r = 1.0
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/PERSAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(
self.policy_update_freq = config.policy_update_freq
self.target_update_freq = config.target_update_freq

self.target_entropy = -self.actor_net.num_actions
self.target_entropy = -np.prod(self.actor_net.num_actions)

self.actor_net_optimiser = torch.optim.Adam(
self.actor_net.parameters(), lr=config.actor_lr
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/RDSAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(
self.policy_update_freq = config.policy_update_freq
self.target_update_freq = config.target_update_freq

self.target_entropy = -self.actor_net.num_actions
self.target_entropy = -np.prod(self.actor_net.num_actions)

# RD-PER parameters
self.scale_r = 1.0
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/SAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(
self.policy_update_freq = config.policy_update_freq
self.target_update_freq = config.target_update_freq

self.target_entropy = -self.actor_net.num_actions
self.target_entropy = -np.prod(self.actor_net.num_actions)

self.actor_net_optimiser = torch.optim.Adam(
self.actor_net.parameters(), lr=config.actor_lr
Expand Down
1 change: 0 additions & 1 deletion cares_reinforcement_learning/algorithm/policy/SACAE.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ def __init__(
critic_beta = 0.9
alpha_beta = 0.5

# set target entropy to -|A|
self.target_entropy = -np.prod(self.actor_net.num_actions)

self.actor_net_optimiser = torch.optim.Adam(
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/TQC.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(

self.device = device

self.target_entropy = -self.actor_net.num_actions
self.target_entropy = -np.prod(self.actor_net.num_actions)

self.actor_net_optimiser = torch.optim.Adam(
self.actor_net.parameters(), lr=config.actor_lr
Expand Down

0 comments on commit 291eaf7

Please sign in to comment.