Fixes

yura-hb · May 4, 2024 · 30590bc · 30590bc
1 parent 43b3691
commit 30590bc
Show file tree

Hide file tree

Showing 9 changed files with 87 additions and 13 deletions.
diff --git a/diploma_thesis/agents/utils/action/__init__.py b/diploma_thesis/agents/utils/action/__init__.py
@@ -2,6 +2,7 @@
 from .action_selector import ActionSelector
 from .greedy import Greedy
 from .epsilon_greedy import EpsilonGreedy
+from .epsilon_sample import EpsilonSample
 from .sample import Sample
 from .uniform import Uniform
 from .phase_selector import PhaseSelector
@@ -11,6 +12,7 @@
 
 key_to_cls = {
     'epsilon_greedy': EpsilonGreedy,
+    'epsilon_sample': EpsilonSample,
     'greedy': Greedy,
     'sample': Sample,
     'uniform': Uniform,

diff --git a/diploma_thesis/agents/utils/action/epsilon_sample.py b/diploma_thesis/agents/utils/action/epsilon_sample.py
@@ -0,0 +1,45 @@
+from .action_selector import *
+
+
+class EpsilonSample(ActionSelector):
+
+    def __init__(self, epsilon: float, min_epsilon: float, decay_factor: float, decay_steps: int):
+        super().__init__()
+        self.min_epsilon = min_epsilon
+        self.decay_factor = decay_factor
+        self.decay_steps = decay_steps
+        self.epsilon = epsilon
+        self.steps = 0
+
+    def __call__(self, distribution: torch.Tensor) -> Tuple[int, torch.Tensor]:
+        self.decay()
+
+        distribution = torch.atleast_1d(distribution)
+
+        action = torch.argmax(distribution).item()
+        policy = torch.zeros_like(distribution) + self.epsilon / distribution.size(0)
+        policy[action] += 1 - self.epsilon
+
+        if torch.rand(1) < self.epsilon:
+            distribution = torch.distributions.Categorical(logits=distribution)
+
+            return distribution.sample((1,)).item(), distribution.probs
+
+        return action, policy
+
+    def decay(self):
+        self.steps += 1
+
+        if self.steps % self.decay_steps == 0:
+            self.epsilon = max(self.epsilon * self.decay_factor, self.min_epsilon)
+
+            self.log_info(f'Set exploration rate to { self.epsilon }. Total decay steps: { self.steps }')
+
+    @staticmethod
+    def from_cli(parameters: Dict):
+        return EpsilonGreedy(
+            epsilon=parameters['epsilon'],
+            min_epsilon=parameters.get('min_epsilon', parameters['epsilon']),
+            decay_factor=parameters.get('decay_factor', 1.0),
+            decay_steps=parameters.get('decay_steps', 10000000)
+        )
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1/flexible_machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1/flexible_machine.yml
@@ -65,9 +65,10 @@ parameters:
 
       optimizer:
         model:
-          kind: 'rmsprop'
+          kind: 'adam'
           parameters:
             lr: 0.001
+            betas: [ 0.99, 0.99 ]
 
       return:
         kind: 'no'

diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3/0/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3/0/experiment.yml
@@ -3,7 +3,7 @@ base_model: &base_model 'configuration/experiments/jsp/GRAPH-NN/experiments/3/ma
 
 
 default_mods: &default_mods
-  - 'util/infrastructure/cuda.yml'
+#  - 'util/infrastructure/cuda.yml'
   - 'util/train_schedule/on_store_32.yml'
 
 ###############################################################################################

diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3/machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3/machine.yml
@@ -68,9 +68,10 @@ parameters:
 
       optimizer:
         model:
-          kind: 'rmsprop'
+          kind: 'adam'
           parameters:
             lr: 0.001
+            betas: [ 0.99, 0.99 ]
 
       return:
         kind: 'no'

diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4/0/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4/0/experiment.yml
@@ -30,9 +30,9 @@ ppo_1: &ppo_1
       - [
           '__none__',
       ]
-      - [
-        'util/infrastructure/cuda.yml'
-      ]
+#      - [
+#        'util/infrastructure/cuda.yml'
+#      ]
       - [ 'util/train_schedule/on_stored_data_exclusively_3.yml']
 
 ppo_2: &ppo_2
@@ -47,9 +47,9 @@ ppo_2: &ppo_2
           '__none__',
 #          'agent/ppo/p3or.yml'
       ]
-      - [
-        'util/infrastructure/cuda.yml'
-      ]
+#      - [
+#        'util/infrastructure/cuda.yml'
+#      ]
       - [ 'util/train_schedule/on_stored_data_exclusively_3.yml']
 
 ###############################################################################################

diff --git a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/0/1/experiment.yml b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/0/1/experiment.yml
@@ -15,6 +15,7 @@ dqn_1: &dqn_1
     - 'configuration/mods/machine/mods'
   mods:
     - *default_mods
+    - 'util/agent/multi_agent.yml'
     - 'util/optimizer/sgd.yml'
 
 dqn_2: &dqn_2
@@ -24,16 +25,17 @@ dqn_2: &dqn_2
     - 'configuration/mods/machine/mods'
   mods:
     - *default_mods
+    - 'util/agent/multi_agent.yml'
     - 'util/optimizer/adam.yml'
 
-
 dqn_3: &dqn_3
   base_path: *base_model
   template: *template
   mod_dirs:
     - 'configuration/mods/machine/mods'
   mods:
     - *default_mods
+    - 'util/agent/multi_agent.yml'
     - 'util/optimizer/adam_w.yml'
 
 
@@ -44,6 +46,7 @@ dqn_4: &dqn_4
     - 'configuration/mods/machine/mods'
   mods:
     - *default_mods
+    - 'util/agent/multi_agent.yml'
     - 'util/optimizer/adamax.yml'
 
 dqn_5: &dqn_5
@@ -53,6 +56,7 @@ dqn_5: &dqn_5
     - 'configuration/mods/machine/mods'
   mods:
     - *default_mods
+    - 'util/agent/multi_agent.yml'
     - 'util/optimizer/rms_prop.yml'
 
 dqn_6: &dqn_6
@@ -62,12 +66,13 @@ dqn_6: &dqn_6
     - 'configuration/mods/machine/mods'
   mods:
     - *default_mods
+    - 'util/agent/multi_agent.yml'
     - 'util/optimizer/radam.yml'
 
 #############################################################################################
 
 reward: &reward
-  - kind: 'global_tardiness'
+  - kind: 'global_decomposed_tardiness'
     parameters:
       span: 128
 
@@ -95,7 +100,7 @@ task:
   n_threads: 6
   debug: False
   store_run_statistics: False
-  output_dir: 'results/jsp/experiments/0 (MARL)/Optimizer/model/'
+  output_dir: 'results/jsp/experiments/MARL/(1) Optimizer/model/'
 
   tasks:
     - kind: 'multi_value'

diff --git a/diploma_thesis/configuration/experiments/jsp/tournament.yml b/diploma_thesis/configuration/experiments/jsp/tournament.yml
@@ -35,7 +35,7 @@ task:
     - kind: 'persisted_agents'
       parameters:
           prefix: ''
-          path: 'results/jsp/experiments/0 (MARL)'
+          path: 'results/jsp/experiments/MARL/(1) Optimizer'
           depth: 5
 
 #    - kind: 'persisted_agents'
@@ -44,6 +44,11 @@ task:
 #        path: 'results/jsp/experiments/2. GRAPH-NN'
 #        depth: 5
 
+  action_selector:
+    kind: 'epsilon_greedy'
+    parameters:
+      epsilon: 0.05
+
   criteria:
     - kind: 'makespan'
       parameters:

diff --git a/diploma_thesis/workflow/tournament.py b/diploma_thesis/workflow/tournament.py
@@ -12,6 +12,7 @@
 from tabulate import tabulate
 from tqdm import tqdm
 
+from agents.utils.action import from_cli
 from environment import Statistics
 from simulator import EvaluateConfiguration, EpisodicSimulator, Simulation
 from simulator.graph import GraphModel
@@ -76,6 +77,11 @@ def __evaluate__(tournament: 'Tournament',
         print(f'Error loading candidate {candidate.name}')
         return []
 
+    action_selector = tournament.action_selector
+
+    if action_selector is not None:
+        a = 10
+
     machine.with_logger(logger)
     work_center.with_logger(logger)
 
@@ -129,6 +135,15 @@ def run_log_file(self, output_dir):
     def store_run_statistics(self):
         return self.parameters.get('store_run_statistics', False)
 
+    @property
+    def action_selector(self):
+        action_selector = self.parameters.get('action_selector', None)
+
+        if action_selector is not None:
+            action_selector = from_cli(action_selector)
+
+        return action_selector
+
     @property
     def should_update(self):
         return self.parameters.get('update', False)