-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
70 lines (55 loc) · 2.14 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import hydra
import torch
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from hydra.utils import instantiate
from utils.utils import init_run, get_class_weights
from preprocessing.preprocessing import build_vocabulary
@hydra.main(config_path="configs", version_base=None)
def main(config):
device = init_run(config)
model = instantiate(config=config.model).to(device)
if config.general.compute_class_weights:
class_weights = get_class_weights(config.dataset.path, config.dataset.train)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
criterion = CrossEntropyLoss(weight=class_weights, reduction="mean")
else:
criterion = CrossEntropyLoss()
optimizer = Adam(lr=config.optimizer.learning_rate, params=model.parameters())
scheduler = ReduceLROnPlateau(optimizer, mode="max", factor=0.5, patience=2)
vocab, tokenizer = build_vocabulary(config.dataset.path, config.dataset.train)
train_dataloader = instantiate(
config=config.dataloader,
dataset_name=config.dataset.train,
vocab=vocab,
tokenizer=tokenizer
).create_dataloader()
val_dataloader = instantiate(
config=config.dataloader,
dataset_name=config.dataset.val,
vocab=vocab,
tokenizer=tokenizer
).create_dataloader()
test_dataloader = instantiate(
config=config.dataloader,
dataset_name=config.dataset.test,
vocab=vocab,
tokenizer=tokenizer
).create_dataloader()
trainer = instantiate(
config=config.trainer,
model=model,
train_dataloader=train_dataloader,
val_dataloader=val_dataloader,
test_dataloader=test_dataloader,
device=device,
criterion=criterion,
optimizer=optimizer
)
for epoch in range(config.general.max_epochs):
trainer.train(current_epoch_nr=epoch)
trainer.evaluate(current_epoch_nr=epoch, scheduler=scheduler)
trainer.test()
if __name__ == '__main__':
main()