Skip to content

Commit

Permalink
Fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
yura-hb committed May 4, 2024
1 parent 43b3691 commit 30590bc
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 13 deletions.
2 changes: 2 additions & 0 deletions diploma_thesis/agents/utils/action/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .action_selector import ActionSelector
from .greedy import Greedy
from .epsilon_greedy import EpsilonGreedy
from .epsilon_sample import EpsilonSample
from .sample import Sample
from .uniform import Uniform
from .phase_selector import PhaseSelector
Expand All @@ -11,6 +12,7 @@

key_to_cls = {
'epsilon_greedy': EpsilonGreedy,
'epsilon_sample': EpsilonSample,
'greedy': Greedy,
'sample': Sample,
'uniform': Uniform,
Expand Down
45 changes: 45 additions & 0 deletions diploma_thesis/agents/utils/action/epsilon_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from .action_selector import *


class EpsilonSample(ActionSelector):

def __init__(self, epsilon: float, min_epsilon: float, decay_factor: float, decay_steps: int):
super().__init__()
self.min_epsilon = min_epsilon
self.decay_factor = decay_factor
self.decay_steps = decay_steps
self.epsilon = epsilon
self.steps = 0

def __call__(self, distribution: torch.Tensor) -> Tuple[int, torch.Tensor]:
self.decay()

distribution = torch.atleast_1d(distribution)

action = torch.argmax(distribution).item()
policy = torch.zeros_like(distribution) + self.epsilon / distribution.size(0)
policy[action] += 1 - self.epsilon

if torch.rand(1) < self.epsilon:
distribution = torch.distributions.Categorical(logits=distribution)

return distribution.sample((1,)).item(), distribution.probs

return action, policy

def decay(self):
self.steps += 1

if self.steps % self.decay_steps == 0:
self.epsilon = max(self.epsilon * self.decay_factor, self.min_epsilon)

self.log_info(f'Set exploration rate to { self.epsilon }. Total decay steps: { self.steps }')

@staticmethod
def from_cli(parameters: Dict):
return EpsilonGreedy(
epsilon=parameters['epsilon'],
min_epsilon=parameters.get('min_epsilon', parameters['epsilon']),
decay_factor=parameters.get('decay_factor', 1.0),
decay_steps=parameters.get('decay_steps', 10000000)
)
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,10 @@ parameters:

optimizer:
model:
kind: 'rmsprop'
kind: 'adam'
parameters:
lr: 0.001
betas: [ 0.99, 0.99 ]

return:
kind: 'no'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ base_model: &base_model 'configuration/experiments/jsp/GRAPH-NN/experiments/3/ma


default_mods: &default_mods
- 'util/infrastructure/cuda.yml'
# - 'util/infrastructure/cuda.yml'
- 'util/train_schedule/on_store_32.yml'

###############################################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,10 @@ parameters:

optimizer:
model:
kind: 'rmsprop'
kind: 'adam'
parameters:
lr: 0.001
betas: [ 0.99, 0.99 ]

return:
kind: 'no'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ ppo_1: &ppo_1
- [
'__none__',
]
- [
'util/infrastructure/cuda.yml'
]
# - [
# 'util/infrastructure/cuda.yml'
# ]
- [ 'util/train_schedule/on_stored_data_exclusively_3.yml']

ppo_2: &ppo_2
Expand All @@ -47,9 +47,9 @@ ppo_2: &ppo_2
'__none__',
# 'agent/ppo/p3or.yml'
]
- [
'util/infrastructure/cuda.yml'
]
# - [
# 'util/infrastructure/cuda.yml'
# ]
- [ 'util/train_schedule/on_stored_data_exclusively_3.yml']

###############################################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dqn_1: &dqn_1
- 'configuration/mods/machine/mods'
mods:
- *default_mods
- 'util/agent/multi_agent.yml'
- 'util/optimizer/sgd.yml'

dqn_2: &dqn_2
Expand All @@ -24,16 +25,17 @@ dqn_2: &dqn_2
- 'configuration/mods/machine/mods'
mods:
- *default_mods
- 'util/agent/multi_agent.yml'
- 'util/optimizer/adam.yml'


dqn_3: &dqn_3
base_path: *base_model
template: *template
mod_dirs:
- 'configuration/mods/machine/mods'
mods:
- *default_mods
- 'util/agent/multi_agent.yml'
- 'util/optimizer/adam_w.yml'


Expand All @@ -44,6 +46,7 @@ dqn_4: &dqn_4
- 'configuration/mods/machine/mods'
mods:
- *default_mods
- 'util/agent/multi_agent.yml'
- 'util/optimizer/adamax.yml'

dqn_5: &dqn_5
Expand All @@ -53,6 +56,7 @@ dqn_5: &dqn_5
- 'configuration/mods/machine/mods'
mods:
- *default_mods
- 'util/agent/multi_agent.yml'
- 'util/optimizer/rms_prop.yml'

dqn_6: &dqn_6
Expand All @@ -62,12 +66,13 @@ dqn_6: &dqn_6
- 'configuration/mods/machine/mods'
mods:
- *default_mods
- 'util/agent/multi_agent.yml'
- 'util/optimizer/radam.yml'

#############################################################################################

reward: &reward
- kind: 'global_tardiness'
- kind: 'global_decomposed_tardiness'
parameters:
span: 128

Expand Down Expand Up @@ -95,7 +100,7 @@ task:
n_threads: 6
debug: False
store_run_statistics: False
output_dir: 'results/jsp/experiments/0 (MARL)/Optimizer/model/'
output_dir: 'results/jsp/experiments/MARL/(1) Optimizer/model/'

tasks:
- kind: 'multi_value'
Expand Down
7 changes: 6 additions & 1 deletion diploma_thesis/configuration/experiments/jsp/tournament.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ task:
- kind: 'persisted_agents'
parameters:
prefix: ''
path: 'results/jsp/experiments/0 (MARL)'
path: 'results/jsp/experiments/MARL/(1) Optimizer'
depth: 5

# - kind: 'persisted_agents'
Expand All @@ -44,6 +44,11 @@ task:
# path: 'results/jsp/experiments/2. GRAPH-NN'
# depth: 5

action_selector:
kind: 'epsilon_greedy'
parameters:
epsilon: 0.05

criteria:
- kind: 'makespan'
parameters:
Expand Down
15 changes: 15 additions & 0 deletions diploma_thesis/workflow/tournament.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from tabulate import tabulate
from tqdm import tqdm

from agents.utils.action import from_cli
from environment import Statistics
from simulator import EvaluateConfiguration, EpisodicSimulator, Simulation
from simulator.graph import GraphModel
Expand Down Expand Up @@ -76,6 +77,11 @@ def __evaluate__(tournament: 'Tournament',
print(f'Error loading candidate {candidate.name}')
return []

action_selector = tournament.action_selector

if action_selector is not None:
a = 10

machine.with_logger(logger)
work_center.with_logger(logger)

Expand Down Expand Up @@ -129,6 +135,15 @@ def run_log_file(self, output_dir):
def store_run_statistics(self):
return self.parameters.get('store_run_statistics', False)

@property
def action_selector(self):
action_selector = self.parameters.get('action_selector', None)

if action_selector is not None:
action_selector = from_cli(action_selector)

return action_selector

@property
def should_update(self):
return self.parameters.get('update', False)
Expand Down

0 comments on commit 30590bc

Please sign in to comment.