Skip to content

Commit 8d4a2d4

Browse files
committed
Change training schedule
1 parent 2c11db2 commit 8d4a2d4

35 files changed

+164
-126
lines changed

diploma_thesis/agents/utils/action/sample.py

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import dis
2+
13
import torch.distributions
24

35
from .action_selector import *
@@ -19,6 +21,8 @@ def __call__(self, distribution: torch.Tensor) -> Tuple[int, torch.Tensor]:
1921

2022
action = distribution.sample().item()
2123

24+
print("action: ", action, "entropy: ", distribution.entropy().item(), distribution.probs)
25+
2226
return action, distribution.probs
2327

2428
@staticmethod

diploma_thesis/agents/utils/policy/flexible_action.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def encode(self, state):
3838
prev_count += target_nodes_count
3939

4040
actions = torch.nn.utils.rnn.pad_sequence(result, batch_first=True, padding_value=-float('inf'))
41-
lengths = torch.tensor(lengths)
41+
lengths = torch.tensor(lengths).to(actions.device)
4242

4343
output[Keys.ACTIONS] = (actions, lengths)
4444

diploma_thesis/agents/utils/rl/utils/ppo_mixin.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def base_parameters_from_cli(parameters: Dict):
2525
sample_count=parameters.get('sample_count', 128),
2626
policy_step_ratio=parameters.get('policy_step_ratio', 1.0),
2727
entropy_regularization=parameters.get('entropy_regularization', 0.0),
28-
rollback_ratio=parameters.get('rollback_ratio', 0.1),
28+
rollback_ratio=parameters.get('rollback_ratio', 0.0),
2929
critic_weight=parameters.get('critic_weight', 1.0),
3030
epochs=parameters.get('epochs', 1),
3131
priority_reduction_ratio=parameters.get('priority_reduction_ratio', 1.05)
@@ -124,7 +124,7 @@ def actor_loss(batch, logits, configuration: PPOConfiguration, device):
124124
advantages = batch.info[Record.ADVANTAGE_KEY]
125125

126126
# Normalize advantages
127-
advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
127+
# advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
128128

129129
action_probs = batch.info[Record.POLICY_KEY][range, batch.action.view(-1)]
130130

@@ -139,6 +139,8 @@ def actor_loss(batch, logits, configuration: PPOConfiguration, device):
139139

140140
advantages = torch.min(weights * advantages, clipped_weights * advantages)
141141

142+
print(advantages)
143+
142144
entropy = distribution.entropy().mean()
143145

144146
return torch.mean(advantages) + entropy_regularization * entropy, entropy

diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (DQN)/experiment.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ graph: &graph
2020

2121
default_mods: &default_mods
2222
- 'util/infrastructure/cuda.yml'
23+
- 'util/train_schedule/on_store_64.yml'
2324

2425
###############################################################################################
2526

@@ -135,7 +136,7 @@ long_single_source_run: &long_single_source_run
135136
nested:
136137
parameters:
137138
dispatch:
138-
seed: [ [ 0 ] ]
139+
seed: [ [ 0, 1, 2, 3 ] ]
139140

140141

141142
###############################################################################################
@@ -194,8 +195,7 @@ task:
194195
base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml'
195196
mod_dirs:
196197
- 'configuration/mods/run/mods'
197-
mods:
198-
- 'n_workers/1.yml'
198+
mods: []
199199
nested:
200200
parameters:
201201
simulations:

diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (PPO)/experiment.yml

+8-5
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ ppo_1: &ppo_1
3737
# 'util/agent/multi_agent.yml'
3838
]
3939
- [ 'util/infrastructure/cuda.yml' ]
40+
- [ 'util/train_schedule/on_store.yml' ]
4041

4142
ppo_2: &ppo_2
4243
base_path: *base_model
@@ -55,6 +56,7 @@ ppo_2: &ppo_2
5556
# 'util/agent/multi_agent.yml'
5657
]
5758
- [ 'util/infrastructure/cuda.yml' ]
59+
- [ 'util/train_schedule/on_store.yml' ]
5860

5961
ppo_3: &ppo_3
6062
base_path: *base_model
@@ -73,6 +75,7 @@ ppo_3: &ppo_3
7375
# 'util/agent/multi_agent.yml'
7476
]
7577
- [ 'util/infrastructure/cuda.yml' ]
78+
- [ 'util/train_schedule/on_store.yml' ]
7679

7780
ppo_4: &ppo_4
7881
base_path: *base_model
@@ -91,6 +94,7 @@ ppo_4: &ppo_4
9194
# 'util/agent/multi_agent.yml'
9295
]
9396
- [ 'util/infrastructure/cuda.yml' ]
97+
- [ 'util/train_schedule/on_store.yml' ]
9498
###############################################################################################
9599

96100
reward: &reward
@@ -112,15 +116,15 @@ long_single_source_run: &long_single_source_run
112116
nested:
113117
parameters:
114118
dispatch:
115-
seed: [ [ 0 ] ]
119+
seed: [ [ 0, 1, 2, 3 ] ]
116120

117121

118122
###############################################################################################
119123

120124

121125
task:
122126
kind: 'multi_task'
123-
n_workers: 4
127+
n_workers: 8
124128
n_threads: 32
125129
debug: False
126130
store_run_statistics: False
@@ -160,7 +164,7 @@ task:
160164
simulator:
161165
kind: 'td'
162166
parameters:
163-
memory: 96
167+
memory: 64
164168
emit_trajectory: True
165169

166170
graph:
@@ -172,8 +176,7 @@ task:
172176
base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml'
173177
mod_dirs:
174178
- 'configuration/mods/run/mods'
175-
mods:
176-
- 'n_workers/1.yml'
179+
mods: []
177180
nested:
178181
parameters:
179182
simulations:

diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (DQN)/experiment.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ graph: &graph
1919
is_work_center_set_in_shop_floor_connected: False
2020

2121
default_mods: &default_mods
22-
['util/infrastructure/cuda.yml']
22+
- 'util/infrastructure/cuda.yml'
23+
- 'util/train_schedule/on_store_64.yml'
2324

2425
###############################################################################################
2526

@@ -231,7 +232,7 @@ long_single_source_run: &long_single_source_run
231232
nested:
232233
parameters:
233234
dispatch:
234-
seed: [ [ 0 ] ]
235+
seed: [ [ 0, 1, 2, 3 ] ]
235236

236237

237238
###############################################################################################
@@ -290,8 +291,7 @@ task:
290291
base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml'
291292
mod_dirs:
292293
- 'configuration/mods/run/mods'
293-
mods:
294-
- 'n_workers/1.yml'
294+
mods: []
295295
nested:
296296
parameters:
297297
simulations:

diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (PPO)/experiment.yml

+8-4
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,13 @@ ppo_1: &ppo_1
3030
- [ 'util/optimizer/grad_norm.yml' ]
3131
- [
3232
'__none__',
33-
'agent/ppo/p3or.yml'
33+
# 'agent/ppo/p3or.yml'
3434
]
3535
- [
3636
'__none__',
3737
# 'util/agent/multi_agent.yml'
3838
]
39+
- ['util/train_schedule/on_store.yml']
3940
- ['util/infrastructure/cuda.yml']
4041

4142
ppo_2: &ppo_2
@@ -55,6 +56,7 @@ ppo_2: &ppo_2
5556
# 'util/agent/multi_agent.yml'
5657
]
5758
- ['util/infrastructure/cuda.yml']
59+
- ['util/train_schedule/on_store.yml']
5860

5961
ppo_3: &ppo_3
6062
base_path: *base_model
@@ -73,6 +75,7 @@ ppo_3: &ppo_3
7375
# 'util/agent/multi_agent.yml'
7476
]
7577
- ['util/infrastructure/cuda.yml']
78+
- ['util/train_schedule/on_store.yml']
7679

7780
ppo_4: &ppo_4
7881
base_path: *base_model
@@ -91,6 +94,7 @@ ppo_4: &ppo_4
9194
# 'util/agent/multi_agent.yml'
9295
]
9396
- ['util/infrastructure/cuda.yml']
97+
- [ 'util/train_schedule/on_store.yml']
9498
###############################################################################################
9599

96100
reward: &reward
@@ -112,7 +116,7 @@ long_single_source_run: &long_single_source_run
112116
nested:
113117
parameters:
114118
dispatch:
115-
seed: [ [ 0 ] ]
119+
seed: [ [ 0, 1, 2, 3 ] ]
116120

117121

118122
###############################################################################################
@@ -132,6 +136,7 @@ task:
132136
base:
133137
name: 'model'
134138
output_dir: '1'
139+
seed: 0
135140
log_stdout: False
136141

137142
machine_agent:
@@ -172,8 +177,7 @@ task:
172177
base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml'
173178
mod_dirs:
174179
- 'configuration/mods/run/mods'
175-
mods:
176-
- 'n_workers/1.yml'
180+
mods: []
177181
nested:
178182
parameters:
179183
simulations:

diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (DQN)/experiment.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ graph: &graph
2020

2121
default_mods: &default_mods
2222
- 'util/infrastructure/cuda.yml'
23+
- 'util/train_schedule/on_store_64.yml'
2324

2425
###############################################################################################
2526

@@ -323,7 +324,7 @@ long_single_source_run: &long_single_source_run
323324
nested:
324325
parameters:
325326
dispatch:
326-
seed: [ [ 0 ] ]
327+
seed: [ [ 0, 1, 2, 3 ] ]
327328

328329

329330
###############################################################################################
@@ -382,8 +383,7 @@ task:
382383
base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml'
383384
mod_dirs:
384385
- 'configuration/mods/run/mods'
385-
mods:
386-
- 'n_workers/1.yml'
386+
mods: []
387387
nested:
388388
parameters:
389389
simulations:

diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (PPO)/experiment.yml

+9-4
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ ppo_1: &ppo_1
3737
# 'util/agent/multi_agent.yml'
3838
]
3939
- ['util/infrastructure/cuda.yml']
40+
- [ 'util/train_schedule/on_store.yml']
4041

4142
ppo_2: &ppo_2
4243
base_path: *base_model
@@ -55,6 +56,7 @@ ppo_2: &ppo_2
5556
# 'util/agent/multi_agent.yml'
5657
]
5758
- ['util/infrastructure/cuda.yml']
59+
- [ 'util/train_schedule/on_store.yml']
5860

5961
ppo_3: &ppo_3
6062
base_path: *base_model
@@ -73,6 +75,7 @@ ppo_3: &ppo_3
7375
# 'util/agent/multi_agent.yml'
7476
]
7577
- ['util/infrastructure/cuda.yml']
78+
- [ 'util/train_schedule/on_store.yml']
7679

7780
ppo_4: &ppo_4
7881
base_path: *base_model
@@ -91,6 +94,7 @@ ppo_4: &ppo_4
9194
# 'util/agent/multi_agent.yml'
9295
]
9396
- ['util/infrastructure/cuda.yml']
97+
- [ 'util/train_schedule/on_store.yml']
9498

9599
ppo_5: &ppo_5
96100
base_path: *base_model
@@ -109,6 +113,7 @@ ppo_5: &ppo_5
109113
# 'util/agent/multi_agent.yml'
110114
]
111115
- ['util/infrastructure/cuda.yml']
116+
- [ 'util/train_schedule/on_store.yml']
112117

113118
ppo_6: &ppo_6
114119
base_path: *base_model
@@ -127,6 +132,7 @@ ppo_6: &ppo_6
127132
# 'util/agent/multi_agent.yml'
128133
]
129134
- ['util/infrastructure/cuda.yml']
135+
- [ 'util/train_schedule/on_store.yml']
130136
###############################################################################################
131137

132138
reward: &reward
@@ -148,7 +154,7 @@ long_single_source_run: &long_single_source_run
148154
nested:
149155
parameters:
150156
dispatch:
151-
seed: [ [ 0 ] ]
157+
seed: [ [ 0, 1, 2, 3 ] ]
152158

153159

154160
###############################################################################################
@@ -196,7 +202,7 @@ task:
196202
simulator:
197203
kind: 'td'
198204
parameters:
199-
memory: 96
205+
memory: 64
200206
emit_trajectory: True
201207

202208
graph:
@@ -208,8 +214,7 @@ task:
208214
base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml'
209215
mod_dirs:
210216
- 'configuration/mods/run/mods'
211-
mods:
212-
- 'n_workers/1.yml'
217+
mods: []
213218
nested:
214219
parameters:
215220
simulations:

diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4 (DQN)/experiment.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ graph: &graph
2020

2121
default_mods: &default_mods
2222
- 'util/infrastructure/cuda.yml'
23+
- 'util/train_schedule/on_store_64.yml'
2324
###############################################################################################
2425

2526
dqn_1: &dqn_1
@@ -182,7 +183,7 @@ long_single_source_run: &long_single_source_run
182183
nested:
183184
parameters:
184185
dispatch:
185-
seed: [ [ 0 ] ]
186+
seed: [ [ 0, 1, 2, 3 ] ]
186187

187188

188189
###############################################################################################
@@ -241,8 +242,7 @@ task:
241242
base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml'
242243
mod_dirs:
243244
- 'configuration/mods/run/mods'
244-
mods:
245-
- 'n_workers/1.yml'
245+
mods: []
246246
nested:
247247
parameters:
248248
simulations:

0 commit comments

Comments
 (0)