Skip to content

Commit d56cc7f

Browse files
committed
Implement final eval
1 parent faed827 commit d56cc7f

File tree

25 files changed

+139
-69284
lines changed

25 files changed

+139
-69284
lines changed

diploma_thesis/agents/utils/rl/dqn.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import copy
12
from typing import Dict
23

34
import tensordict
@@ -36,7 +37,7 @@ def configure(self, model: Policy):
3637

3738
avg_fn = get_ema_avg_fn(self.configuration.decay)
3839

39-
self._target_model = AveragedModel(model.clone(), avg_fn=avg_fn).to(self.device)
40+
self._target_model = copy.deepcopy(model) #AveragedModel(model.clone(), avg_fn=avg_fn).to(self.device)
4041

4142
def __train__(self, model: Policy):
4243
for _ in range(self.configuration.epochs):
@@ -54,8 +55,6 @@ def compute_loss():
5455
weight = torch.tensor(info['_weight']) if '_weight' in info.keys() else torch.ones_like(q_values)
5556
weight = weight.to(actions.device)
5657

57-
print(weight, info)
58-
5958
loss_ = (self.loss(actions, q_values) * weight).mean()
6059
td_error_ = torch.square(actions - q_values)
6160

@@ -76,7 +75,7 @@ def compute_loss():
7675

7776
with torch.no_grad():
7877
if self.optimizer.step_count % self.configuration.update_steps == 0:
79-
self._target_model.update_parameters(model)
78+
self._target_model = copy.deepcopy(model)
8079

8180
self.storage.update_priority(info['index'], td_error)
8281

@@ -100,7 +99,7 @@ def __get_action_values__(model: Policy, state, actions):
10099

101100
@property
102101
def target_model(self):
103-
return self._target_model.module
102+
return self._target_model
104103

105104
def state_dict(self):
106105
state_dict = super().state_dict()

diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/0/0/experiment.yml

+12-11
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ dqn_1: &dqn_1
1212
- 'configuration/mods/machine/mods'
1313
mods:
1414
- 'agent/dqn/ddqn.yml'
15-
- 'agent/dqn/prioritized.yml'
15+
# - 'agent/dqn/prioritized.yml'
16+
- 'util/agent/multi_agent.yml'
1617
- *default_mods
1718

1819
###############################################################################################
@@ -24,7 +25,7 @@ dqn_2: &dqn_2
2425
- 'configuration/mods/machine/mods'
2526
mods:
2627
- 'agent/dqn/ddqn.yml'
27-
- 'agent/dqn/prioritized.yml'
28+
# - 'agent/dqn/prioritized.yml'
2829
- *default_mods
2930

3031
###############################################################################################
@@ -36,7 +37,7 @@ dqn_3: &dqn_3
3637
- 'configuration/mods/machine/mods'
3738
mods:
3839
- 'agent/dqn/ddqn.yml'
39-
- 'agent/dqn/prioritized.yml'
40+
# - 'agent/dqn/prioritized.yml'
4041
- *default_mods
4142

4243
###############################################################################################
@@ -139,14 +140,14 @@ task:
139140
machine_agent:
140141
parameters:
141142
- *dqn_1
142-
- output_dir: 'Relu'
143-
machine_agent:
144-
parameters:
145-
- *dqn_2
146-
- output_dir: 'Tanh'
147-
machine_agent:
148-
parameters:
149-
- *dqn_3
143+
# - output_dir: 'Relu'
144+
# machine_agent:
145+
# parameters:
146+
# - *dqn_2
147+
# - output_dir: 'Tanh'
148+
# machine_agent:
149+
# parameters:
150+
# - *dqn_3
150151

151152
tape:
152153
machine_reward:

diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/0/1/experiment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
template: &template 'baseline'
2+
template: &template 'relu'
33
base_model: &base_model 'configuration/experiments/jsp/MARL-DQN/experiment/0/machine.yml'
44

55
default_mods: &default_mods

diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/0/2/experiment.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,21 @@ base_model: &base_model 'configuration/experiments/jsp/MARL-DQN/experiment/0/mac
55

66
default_mods: &default_mods
77
- 'agent/dqn/ddqn.yml'
8-
- 'agent/dqn/prioritized.yml'
8+
# - 'agent/dqn/prioritized.yml'
99

1010
###############################################################################################
1111

1212
dqn_1: &dqn_1
1313
base_path: *base_model
14-
template: 'baseline'
14+
template: 'relu'
1515
mod_dirs:
1616
- 'configuration/mods/machine/mods'
1717
mods:
1818
- *default_mods
1919

2020
dqn_2: &dqn_2
2121
base_path: *base_model
22-
template: 'baseline_orthogonal'
22+
template: 'relu_orthogonal'
2323
mod_dirs:
2424
- 'configuration/mods/machine/mods'
2525
mods:

diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/0/3/experiment.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Evaluate the effectivenes of basic DQNs on the JSP environment
22

3-
template: &template 'baseline'
3+
template: &template 'relu'
44
base_model: &base_model 'configuration/experiments/jsp/MARL-DQN/experiment/0/machine.yml'
55

66
default_mods: &default_mods []
@@ -11,7 +11,7 @@ dqn_1: &dqn_1
1111
template: *template
1212
mod_dirs:
1313
- 'agent/dqn/ddqn.yml'
14-
- 'agent/dqn/prioritized.yml'
14+
# - 'agent/dqn/prioritized.yml'
1515
- 'configuration/mods/machine/mods'
1616
mods:
1717
*default_mods
@@ -23,7 +23,7 @@ dqn_1_on_store: &dqn_1_on_store
2323
- 'configuration/mods/machine/mods'
2424
mods:
2525
- 'agent/dqn/ddqn.yml'
26-
- 'agent/dqn/prioritized.yml'
26+
# - 'agent/dqn/prioritized.yml'
2727
- *default_mods
2828
- 'util/train_schedule/on_store_32.yml'
2929

@@ -34,7 +34,7 @@ dqn_2_on_store: &dqn_2_on_store
3434
- 'configuration/mods/machine/mods'
3535
mods:
3636
- 'agent/dqn/ddqn.yml'
37-
- 'agent/dqn/prioritized.yml'
37+
# - 'agent/dqn/prioritized.yml'
3838
- *default_mods
3939
- 'util/train_schedule/on_store_32.yml'
4040

diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/0/episode_simulation.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ parameters:
77
timespan: 5000
88
machines_per_work_center: 1
99
work_center_count: 10
10-
deduce_naive_actions: True
10+
# deduce_naive_actions: False
1111

1212
dispatch:
1313
initial_job_assignment:

diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/0/machine.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,11 @@ parameters:
5757
memory:
5858
kind: 'replay'
5959
parameters:
60-
size: 2048
61-
batch_size: 128
60+
size: 1024
61+
batch_size: 64
6262

6363
loss:
64-
kind: 'huber'
64+
kind: 'smooth_l1'
6565
parameters:
6666
reduction: 'none'
6767

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2+
encoder:
3+
kind: 'deep_marl_mr'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
2+
3+
layers:
4+
- kind: 'layer_norm'
5+
parameters:
6+
normalized_shape: [5, 5]
7+
signature: 'state -> actions_hidden'
8+
9+
- kind: 'flatten'
10+
parameters:
11+
signature: 'actions_hidden -> actions_hidden'
12+
13+
- kind: 'linear'
14+
parameters:
15+
dim: 256
16+
activation: 'relu'
17+
signature: 'actions_hidden -> actions_hidden'
18+
initialization: 'orthogonal'
19+
- kind: 'linear'
20+
parameters:
21+
dim: 256
22+
activation: 'relu'
23+
signature: 'actions_hidden -> actions_hidden'
24+
initialization: 'orthogonal'
25+
26+
- kind: 'alias'
27+
parameters:
28+
signature: 'actions_hidden -> values_hidden'
29+
30+
- kind: 'linear'
31+
parameters:
32+
dim: 256
33+
activation: 'relu'
34+
signature: 'actions_hidden -> actions_hidden'
35+
initialization: 'orthogonal'
36+
37+
- kind: 'linear'
38+
parameters:
39+
dim: 256
40+
activation: 'relu'
41+
signature: 'values_hidden -> values_hidden'
42+
initialization: 'orthogonal'
43+
44+
- kind: 'linear'
45+
parameters:
46+
dim: 1
47+
activation: 'none'
48+
signature: 'actions_hidden -> actor_value'
49+
initialization: 'orthogonal'
50+
51+
- kind: 'output'
52+
parameters:
53+
value: 'values_hidden'
54+
actor_value: actor_value
55+
actions: 'actions_hidden'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
2+
rules:
3+
- 'spt'
4+
- 'lwkr'
5+
- 'ms'
6+
- 'winq'

diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/2/machine.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,12 @@ parameters:
5151
device: 'cpu'
5252
sample_count: 512
5353
policy_step_ratio: 0.2
54-
entropy_regularization: 0.05
54+
entropy_regularization: 0.1
5555
# entropy_decay: 0.999
5656
rollback_ratio: 0.01
5757
critic_weight: 1.0
5858

59-
epochs: 15
59+
epochs: 10
6060

6161
loss:
6262
kind: 'cross_entropy'

diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/2/templates/baseline/model.yml

+5-5
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ layers:
1515
dim: 256
1616
activation: 'relu'
1717
signature: 'actions_hidden -> actions_hidden'
18-
initialization: 'xavier'
18+
initialization: 'orthogonal'
1919
- kind: 'linear'
2020
parameters:
2121
dim: 256
2222
activation: 'relu'
2323
signature: 'actions_hidden -> actions_hidden'
24-
initialization: 'xavier'
24+
initialization: 'orthogonal'
2525

2626
- kind: 'alias'
2727
parameters:
@@ -32,21 +32,21 @@ layers:
3232
dim: 256
3333
activation: 'relu'
3434
signature: 'actions_hidden -> actions_hidden'
35-
initialization: 'xavier'
35+
initialization: 'orthogonal'
3636

3737
- kind: 'linear'
3838
parameters:
3939
dim: 256
4040
activation: 'relu'
4141
signature: 'values_hidden -> values_hidden'
42-
initialization: 'xavier'
42+
initialization: 'orthogonal'
4343

4444
- kind: 'linear'
4545
parameters:
4646
dim: 1
4747
activation: 'none'
4848
signature: 'actions_hidden -> actor_value'
49-
initialization: 'xavier'
49+
initialization: 'orthogonal'
5050

5151
- kind: 'output'
5252
parameters:

diploma_thesis/configuration/experiments/jsp/tournament.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ task:
44
n_workers: 4
55
n_threads: 10
66
name: 'rules'
7-
output_dir: 'results/jsp/experiments/tournaments/jsp/2. GRAPH-NN Flexible (Tournament)'
7+
output_dir: 'results/jsp/experiments/tournaments/jsp/0. MARL (Tournament)'
88
store_run_statistics: True
99
log_run: False
1010
update: True
@@ -35,7 +35,7 @@ task:
3535
- kind: 'persisted_agents'
3636
parameters:
3737
prefix: ''
38-
path: 'results/jsp/experiments/0 (Graph)'
38+
path: 'results/jsp/experiments/0 (MARL.)'
3939
depth: 5
4040

4141
# - kind: 'persisted_agents'

0 commit comments

Comments
 (0)