forked from maxjcohen/transformer
-
Notifications
You must be signed in to change notification settings - Fork 3
/
search.py
96 lines (77 loc) · 2.72 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import itertools
import datetime
import json
from collections import OrderedDict
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
from tst import Transformer
from tst.loss import OZELoss
from src.dataset import OzeDataset
from src.utils import compute_loss, fit, Logger
# ===== user set params ====
search_params = OrderedDict({
"d_model": [32], # 2 ** np.arange(5, 10),
"q": [8], # 2 ** np.arange(3, 7),
"v": [8], # 2 ** np.arange(3, 7),
"h": [4], # np.arange(2, 9, 2),
"N": [2], # np.arange(2, 7, 2),
"attention_size": [12], # np.arange(12, 73, 12)
})
# Training parameters
DATASET_PATH = 'datasets/dataset.npz'
BATCH_SIZE = 4
NUM_WORKERS = 4
LR = 2e-4
EPOCHS = 10
# ===== user set params ====
# Model parameters
dropout = 0.2 # Dropout rate
pe = None # Positional encoding
chunk_mode = "window"
d_input = 39 # From dataset
d_output = 8 # From dataset
# Config
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device {device}")
# Define loss function
loss_function = OZELoss(alpha=0.3)
# Load dataset
ozeDataset = OzeDataset(DATASET_PATH)
# Split between train and val
dataset_train, dataset_val = random_split(ozeDataset, (750, 250))
dataloader_train = DataLoader(dataset_train,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=NUM_WORKERS
)
dataloader_val = DataLoader(dataset_val,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=NUM_WORKERS
)
# Start search
n_steps = np.prod([len(search_range)
for search_range in search_params.values()])
logger = Logger('search_log.csv', search_params)
with tqdm(total=n_steps*EPOCHS) as pbar:
for params in itertools.product(*search_params.values()):
params = {key: params[idx]
for idx, key in enumerate(search_params.keys())}
pbar.set_postfix(params)
# Load transformer with Adam optimizer and MSE loss function
net = Transformer(d_input=d_input,
d_output=d_output,
dropout=dropout,
chunk_mode=chunk_mode,
pe=pe,
**params).to(device)
optimizer = optim.Adam(net.parameters(), lr=LR)
# Fit model
loss = fit(net, optimizer, loss_function, dataloader_train,
dataloader_val, epochs=EPOCHS, pbar=pbar, device=device)
# Log
logger.log(params, loss=loss)