-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
148 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
""" | ||
Original Paper: https://openreview.net/pdf?id=xCVJMsPv3RT | ||
Code based on: https://github.com/TakuyaHiraoka/Dropout-Q-Functions-for-Doubly-Efficient-Reinforcement-Learning/blob/main/KUCodebase/code/agent.py | ||
This code runs automatic entropy tuning | ||
""" | ||
|
||
import torch | ||
|
||
from cares_reinforcement_learning.algorithm.policy import SAC | ||
from cares_reinforcement_learning.networks.DroQ import Actor, Critic | ||
from cares_reinforcement_learning.util.configurations import DroQConfig | ||
|
||
|
||
class DroQ(SAC): | ||
def __init__( | ||
self, | ||
actor_network: Actor, | ||
critic_network: Critic, | ||
config: DroQConfig, | ||
device: torch.device, | ||
): | ||
super().__init__(actor_network, critic_network, config, device) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .actor import DefaultActor, Actor | ||
from .critic import DefaultCritic, Critic |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
""" | ||
This is a stub file for the Actor class - reads directly off SAC's Actor class. | ||
""" | ||
|
||
# pylint: disable=unused-import | ||
from cares_reinforcement_learning.networks.SAC import Actor, DefaultActor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
""" | ||
This is a stub file for the Critic class - reads directly off SAC's Critic class. | ||
""" | ||
|
||
# pylint: disable=unused-import | ||
from torch import nn | ||
|
||
from cares_reinforcement_learning.networks.common import TwinQNetwork | ||
from cares_reinforcement_learning.networks.SAC import Critic | ||
from cares_reinforcement_learning.util.configurations import DroQConfig, MLPConfig | ||
|
||
|
||
class DefaultCritic(TwinQNetwork): | ||
def __init__( | ||
self, | ||
observation_size: int, | ||
num_actions: int, | ||
): | ||
input_size = observation_size + num_actions | ||
hidden_sizes = [256, 256] | ||
|
||
critic_config: MLPConfig = MLPConfig( | ||
hidden_sizes=hidden_sizes, | ||
dropout_layer="Dropout", | ||
dropout_layer_args={"p": 0.005}, | ||
norm_layer="LayerNorm", | ||
layer_order=["dropout", "layernorm", "activation"], | ||
) | ||
|
||
super().__init__( | ||
input_size=input_size, | ||
output_size=1, | ||
config=critic_config, | ||
) | ||
|
||
# Q1 architecture | ||
# pylint: disable-next=invalid-name | ||
self.Q1 = nn.Sequential( | ||
nn.Linear(input_size, hidden_sizes[0]), | ||
nn.Dropout(0.005), | ||
nn.LayerNorm(hidden_sizes[0]), | ||
nn.ReLU(), | ||
nn.Linear(hidden_sizes[0], hidden_sizes[1]), | ||
nn.Dropout(0.005), | ||
nn.LayerNorm(hidden_sizes[1]), | ||
nn.ReLU(), | ||
nn.Linear(hidden_sizes[1], 1), | ||
) | ||
|
||
# Q2 architecture | ||
# pylint: disable-next=invalid-name | ||
self.Q2 = nn.Sequential( | ||
nn.Linear(input_size, hidden_sizes[0]), | ||
nn.Dropout(0.005), | ||
nn.LayerNorm(hidden_sizes[0]), | ||
nn.ReLU(), | ||
nn.Linear(hidden_sizes[0], hidden_sizes[1]), | ||
nn.Dropout(0.005), | ||
nn.LayerNorm(hidden_sizes[1]), | ||
nn.ReLU(), | ||
nn.Linear(hidden_sizes[1], 1), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters