-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
executable file
·157 lines (119 loc) · 5.97 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import argparse
import numpy as np
import optuna
import torch
from data import data_helper
from models.optimizer_helper import get_optim_and_scheduler
from models.utils import get_model
from utils.utils import Logger, get_args
class Trainer:
def __init__(self, args, device, trial=None):
self.args = args
self.device = device
self.trial = trial
# Model
self.model, self.args = get_model(self.args)
self.model = torch.nn.DataParallel(self.model, device_ids=[self.args.cuda], output_device=self.device)
self.save_model_path = './bin/best_' + self.args.network + "_" + self.args.target + '.pth'
if self.args.load_model:
self.model.load_state_dict(torch.load(self.save_model_path))
# Data
if self.args.dg:
self.args.batch_size = self.args.batch_size // 3
self.target_loader = data_helper.get_test_dataloader(self.args)
else:
self.target_loader = []
self.source_loader, self.val_loader = data_helper.get_dataset_list(self.args)
#self.len_dataloader = -1
self.test_loaders = {"val": self.val_loader, "test": self.target_loader}
if self.args.verbose:
t_len = len(self.target_loader.dataset) if self.args.dg else 0
print(f"Dataset size: train {len(self.val_loader.dataset)*9}, val {len(self.val_loader.dataset)}, test {t_len}")
# Optimizer
self.optimizer, self.scheduler = get_optim_and_scheduler(network=self.model, epochs=self.args.epochs,
lr=self.args.lr, nesterov=self.args.nesterov,
sched=self.args.lr_scheduler)
if self.args.meth == 'None':
self.criterion = torch.nn.CrossEntropyLoss(reduction='none')
self.criterion = self.criterion.to(self.device)
def _do_epoch(self, epoch=None):
self.model.train()
for _ in range(len(self.val_loader.dataset)*3//self.args.batch_size):
minibatches_device = [next(iter(loader)) for loader in self.source_loader]
if self.args.meth == 'None':
all_x = torch.cat([x for x, y in minibatches_device]).cuda()
all_y = torch.cat([y for x, y in minibatches_device]).cuda()
pred_y = self.model(all_x)
loss = self.criterion(pred_y, all_y)
else:
loss = self.model(minibatches_device)
self.optimizer.zero_grad()
loss.backward(loss)
self.optimizer.step()
del loss
self.model.eval()
with torch.no_grad():
for phase, loader in self.test_loaders.items():
if loader:
total = len(loader.dataset)
class_correct = self.do_test(loader)
class_acc = float(class_correct) / total
self.logger.log_test(phase, {"class": class_acc})
self.results[phase][self.current_epoch] = class_acc
if self.trial is not None:
self.trial.report(class_acc, epoch)
if self.trial.should_prune():
raise optuna.exceptions.TrialPruned()
# Save best model
if class_acc > self.best_acc:
self.best_acc = class_acc
torch.save(self.model.state_dict(), self.save_model_path)
def do_test(self, loader):
class_correct = 0
for it, ((data, class_l), _) in enumerate(loader):
data, class_l = data.to(self.device), class_l.to(self.device)
class_logit = self.model(data)
_, cls_pred = class_logit.max(dim=1)
class_correct += torch.sum(cls_pred == class_l.data)
return class_correct
def do_training(self):
self.logger = Logger(self.args, update_frequency=30)
self.results = {"val": torch.zeros(self.args.epochs), "test": torch.zeros(self.args.epochs)}
self.best_acc = 0
for self.current_epoch in range(self.args.epochs):
self.logger.new_epoch(self.scheduler.get_last_lr())
self._do_epoch(self.current_epoch)
self.scheduler.step()
val_res = self.results["val"]
test_res = self.results["test"]
idx_best = val_res.argmax()
if self.args.verbose:
print("Best val %g, Corresponding test %g - Best test: %g, Best epoch: %g" % (
val_res.max(), test_res[idx_best], test_res.max(), idx_best))
else:
print("Best val %g, Corresponding test %g" % (val_res.max(), test_res[idx_best]))
self.logger.save_best(test_res[idx_best], test_res.max())
return self.logger, self.model, test_res[idx_best]
def main():
args = get_args()
args = data_helper.get_source_domains(args)
print(args)
device = torch.cuda.set_device(args.cuda)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
trainer = Trainer(args, device)
if args.verbose:
print(f'''Training: model={args.network}, bs={args.batch_size}, lr={args.lr}, scheduler={args.lr_scheduler},
epochs={args.epochs}, cuda={args.cuda}, target={args.target}, meth={args.meth}''')
if args.training:
trainer.do_training()
else:
trainer.model.load_state_dict(torch.load(trainer.save_model_path))
trainer.model.eval()
with torch.no_grad():
total = len(trainer.target_loader.dataset)
class_correct = trainer.do_test(trainer.target_loader)
class_acc = float(class_correct) / total
print(class_acc)
if __name__ == "__main__":
torch.backends.cudnn.benchmark = True
main()