Skip to content

Commit

Permalink
revert -np.prod
Browse files Browse the repository at this point in the history
  • Loading branch information
beardyFace committed Nov 5, 2024
1 parent 291eaf7 commit 57dca04
Show file tree
Hide file tree
Showing 7 changed files with 7 additions and 7 deletions.
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/mbrl/DynaSAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(
self.policy_update_freq = config.policy_update_freq
self.target_update_freq = config.target_update_freq

self.target_entropy = -np.prod(self.actor_net.num_actions)
self.target_entropy = -self.action_num

self.actor_net_optimiser = torch.optim.Adam(
self.actor_net.parameters(), lr=config.actor_lr
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/LAPSAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(
self.policy_update_freq = config.policy_update_freq
self.target_update_freq = config.target_update_freq

self.target_entropy = -np.prod(self.actor_net.num_actions)
self.target_entropy = -self.actor_net.num_actions

self.actor_net_optimiser = torch.optim.Adam(
self.actor_net.parameters(), lr=config.actor_lr
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/PERSAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(
self.policy_update_freq = config.policy_update_freq
self.target_update_freq = config.target_update_freq

self.target_entropy = -np.prod(self.actor_net.num_actions)
self.target_entropy = -self.actor_net.num_actions

self.actor_net_optimiser = torch.optim.Adam(
self.actor_net.parameters(), lr=config.actor_lr
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/RDSAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(
self.policy_update_freq = config.policy_update_freq
self.target_update_freq = config.target_update_freq

self.target_entropy = -np.prod(self.actor_net.num_actions)
self.target_entropy = -self.actor_net.num_actions

# RD-PER parameters
self.scale_r = 1.0
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/SAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(
self.policy_update_freq = config.policy_update_freq
self.target_update_freq = config.target_update_freq

self.target_entropy = -np.prod(self.actor_net.num_actions)
self.target_entropy = -self.actor_net.num_actions

self.actor_net_optimiser = torch.optim.Adam(
self.actor_net.parameters(), lr=config.actor_lr
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/SACAE.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __init__(
critic_beta = 0.9
alpha_beta = 0.5

self.target_entropy = -np.prod(self.actor_net.num_actions)
self.target_entropy = -self.actor_net.num_actions

self.actor_net_optimiser = torch.optim.Adam(
self.actor_net.parameters(), lr=config.actor_lr, betas=(actor_beta, 0.999)
Expand Down
2 changes: 1 addition & 1 deletion cares_reinforcement_learning/algorithm/policy/TQC.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(

self.device = device

self.target_entropy = -np.prod(self.actor_net.num_actions)
self.target_entropy = -self.actor_net.num_actions

self.actor_net_optimiser = torch.optim.Adam(
self.actor_net.parameters(), lr=config.actor_lr
Expand Down

0 comments on commit 57dca04

Please sign in to comment.