Change training schedule

yura-hb · yura-hb · commit 5c2049434320 · 2024-04-24T13:11:07.000+02:00
diff --git a/diploma_thesis/agents/utils/rl/dqn.py b/diploma_thesis/agents/utils/rl/dqn.py
@@ -1,25 +1,26 @@
-from dataclasses import dataclass
 from typing import Dict
 
 import tensordict
 from torch.optim.swa_utils import AveragedModel, get_ema_avg_fn
 
 from agents.utils.memory import NotReadyException
-from agents.utils.rl.rl import *
 from agents.utils.return_estimator import ValueFetchMethod
+from agents.utils.rl.rl import *
 
 
 class DeepQTrainer(RLTrainer):
     @dataclass
     class Configuration:
         decay: float = 0.99
         update_steps: int = 10
+        epochs: int = 1
 
         @staticmethod
         def from_cli(parameters: Dict):
             return DeepQTrainer.Configuration(
                 decay=parameters.get('decay', 0.99),
-                update_steps=parameters.get('update_steps', 20)
+                update_steps=parameters.get('update_steps', 20),
+                epochs=parameters.get('epochs', 1)
             )
 
     def __init__(self, configuration: Configuration, *args, **kwargs):
@@ -37,34 +38,35 @@ def configure(self, model: Policy):
         self._target_model = AveragedModel(model.clone(), avg_fn=avg_fn).to(self.device)
 
     def __train__(self, model: Policy):
-        try:
-            batch, info = self.storage.sample(device=self.device)
-        except NotReadyException:
-            return
+        for _ in range(self.configuration.epochs):
+            try:
+                batch, info = self.storage.sample(device=self.device)
+            except NotReadyException:
+                return
 
-        with torch.no_grad():
-            q_values = self.estimate_q(model, batch)
+            with torch.no_grad():
+                q_values = self.estimate_q(model, batch)
 
-        def compute_loss():
-            actions = self.__get_action_values__(model, batch.state, batch.action)
+            def compute_loss():
+                actions = self.__get_action_values__(model, batch.state, batch.action)
 
-            loss_ = self.loss(actions, q_values)
-            td_error_ = torch.square(actions - q_values)
+                loss_ = self.loss(actions, q_values)
+                td_error_ = torch.square(actions - q_values)
 
-            entropy = torch.distributions.Categorical(logits=actions).entropy().mean()
+                entropy = torch.distributions.Categorical(logits=actions).entropy().mean()
 
-            return loss_, (td_error_, entropy)
+                return loss_, (td_error_, entropy)
 
-        loss, result = self.step(compute_loss, self.optimizer)
+            loss, result = self.step(compute_loss, self.optimizer)
 
-        td_error, entropy = result
-        td_error_mean = td_error.mean()
+            td_error, entropy = result
+            td_error_mean = td_error.mean()
 
-        self.record_loss(loss)
-        self.record_loss(td_error_mean, key='td_error')
-        self.record_loss(entropy, key='entropy')
+            self.record_loss(loss)
+            self.record_loss(td_error_mean, key='td_error')
+            self.record_loss(entropy, key='entropy')
 
-        print(f'loss: {loss}, td_error: {td_error_mean}, entropy: {entropy}')
+            print(f'loss: {loss}, td_error: {td_error_mean}, entropy: {entropy}')
 
         with torch.no_grad():
             if self.optimizer.step_count % self.configuration.update_steps == 0:
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (DQN)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (DQN)/experiment.yml
@@ -19,7 +19,7 @@ graph: &graph
     is_work_center_set_in_shop_floor_connected: False
 
 default_mods: &default_mods
-  - 'util/infrastructure/cuda.yml'
+#  - 'util/infrastructure/cuda.yml'
   - 'util/train_schedule/on_store_64.yml'
 
 ###############################################################################################
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_machine.yml
@@ -54,12 +54,13 @@ parameters:
     parameters:
       decay: 1.0
       update_steps: 20
+      epochs: 5
 
       memory:
         kind: 'replay'
         parameters:
           size: 2048
-          batch_size: 128
+          batch_size: 256
           prefetch: 8
 
       loss:
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_marl_machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_marl_machine.yml
@@ -55,12 +55,13 @@ parameters:
     parameters:
       decay: 1.0
       update_steps: 20
+      epochs: 5
 
       memory:
         kind: 'replay'
         parameters:
           size: 2048
-          batch_size: 128
+          batch_size: 256
           prefetch: 8
 
       loss:
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/machine.yml
@@ -54,12 +54,13 @@ parameters:
     parameters:
       decay: 1.0
       update_steps: 20
+      epochs: 5
 
       memory:
         kind: 'replay'
         parameters:
-          size: 1024
-          batch_size: 128
+          size: 2048
+          batch_size: 256
           prefetch: 8
 
       loss:
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/marl_machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/marl_machine.yml
@@ -55,12 +55,13 @@ parameters:
     parameters:
       decay: 1.0
       update_steps: 20
+      epochs: 5
 
       memory:
         kind: 'replay'
         parameters:
           size: 2048
-          batch_size: 128
+          batch_size: 256
           prefetch: 8
 
       loss:
diff --git a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/2/experiment.yml b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/2/experiment.yml
@@ -125,11 +125,11 @@ parallel_multi_source_run: &parallel_multi_source_run
   parameters:
     mods:
       __inout_factory__:
-        - [ ['utilization/90.yml'] ]
+        - [ ['utilization/80.yml'] ]
     nested:
       parameters:
         dispatch:
-          seed: [ [ 0, 1, 2 ] ]
+          seed: [ [ 0, 1, 2, 3 ] ]
 
 
 ###############################################################################################
diff --git a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/2/parallel_run.yml b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/2/parallel_run.yml
@@ -20,4 +20,4 @@ parameters:
     train_interval: 100
     max_training_steps: 0
 
-  n_workers: 3
+  n_workers: 4
diff --git a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/machine.yml b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/machine.yml
@@ -68,7 +68,7 @@ parameters:
 
       optimizer:
         model:
-          kind: 'sgd'
+          kind: 'adam_w'
           parameters:
             lr: 0.001
 
diff --git a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/marl_machine.yml b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/marl_machine.yml
@@ -68,7 +68,7 @@ parameters:
 
       optimizer:
         model:
-          kind: 'sgd'
+          kind: 'adam_w'
           parameters:
             lr: 0.001
 
diff --git a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/templates/baseline/rules.yml b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/templates/baseline/rules.yml
@@ -1,6 +1,6 @@
 
 rules:
   - 'spt'
-  - 'lwkr'
+  - 'cr'
   - 'ms'
   - 'winq'
diff --git a/diploma_thesis/configuration/experiments/jsp/tournament.yml b/diploma_thesis/configuration/experiments/jsp/tournament.yml
@@ -4,7 +4,7 @@ task:
   n_workers: 8
   n_threads: 8
   name: 'rules'
-  output_dir: 'results/jsp/experiments/tournaments/jsp/1. MARL-DQN'
+  output_dir: 'results/jsp/experiments/tournaments/jsp/Static Rules'
   store_run_statistics: True
   log_run: False
   update: True
@@ -26,12 +26,12 @@ task:
 #        prefix: 'preferred'
 #        path: 'results/jsp/experiments/1_4/preferred'
 
-    - kind: 'persisted_agents'
-      parameters:
-          prefix: ''
-          path: 'results/jsp/experiments/1. MARL-DQN'
-          depth: 5
-#
+#    - kind: 'persisted_agents'
+#      parameters:
+#          prefix: ''
+#          path: 'results/jsp/experiments/1. MARL-DQN'
+#          depth: 5
+##
 #    - kind: 'persisted_agents'
 #      parameters:
 #        prefix: 'flexible_with_graph'
@@ -97,11 +97,11 @@ task:
 #        prefix: 'marl_all'
 #        path: 'results/jsp/experiments/1_2/4'
 #
-#    - kind: 'static'
-#      parameters:
-#            scheduling: 'all'
-#            routing:
-#              - 'ct'
+    - kind: 'static'
+      parameters:
+            scheduling: 'all'
+            routing:
+              - 'ct'
 
 
 #      - kind: 'persisted_agents'

-Original file line number
+Diff line change
 rules:
   - 'spt'
 -  - 'lwkr'
 +  - 'cr'
   - 'ms'
   - 'winq'