Skip to content

Commit

Permalink
GRAD update issue
Browse files Browse the repository at this point in the history
  • Loading branch information
vikigenius committed Apr 2, 2019
1 parent 5154043 commit 3e39a87
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 70 deletions.
6 changes: 3 additions & 3 deletions logging_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ handlers:
encoding: utf8

loggers:
src.models.ved_varattn:
level: ERROR
src.utils.training_utils:
level: DEBUG
handlers: [console]
propagate: no

root:
level: INFO
level: DEBUG
handlers: [console, info_file_handler, error_file_handler]
...
1 change: 1 addition & 0 deletions settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ hparams:
decay_schedule: 'logarithmic'
last_epoch: 30
weight_decay: 5.0e-4
# decay_filters: ['bias', 'bn', 'downsample.1']
adam:
params:
learning_rate: 0.001
Expand Down
2 changes: 1 addition & 1 deletion src/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self, map_file: str, tdur=None):
with open(map_file, 'rb') as f:
self.spec_list = pickle.load(f)
self.tdur = tdur
self.processor = ProcessedRaw(16000.0, preprocess=False)
self.processor = ProcessedRaw(16000.0, preprocess=True)

def __getitem__(self, idx):
sinfo = self.spec_list[idx]
Expand Down
18 changes: 2 additions & 16 deletions src/models/resnet_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def __init__(self, block, layers, num_classes=1000,
super(ResNet, self).__init__()
self.inplanes = 64
# Changed 3 to 1 for single channel
self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
Expand All @@ -119,8 +119,7 @@ def __init__(self, block, layers, num_classes=1000,
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.fc1 = nn.Linear(9, 1)
self.fc2 = nn.Linear(512 * block.expansion, num_classes)
self.fc = nn.Linear(512 * block.expansion, num_classes)

for m in self.modules():
if isinstance(m, nn.Conv2d):
Expand Down Expand Up @@ -169,19 +168,6 @@ def forward(self, x):
x = self.layer3(x)
x = self.layer4(x)

x = x.permute(0, 1, 3, 2)
x = self.fc1(x)
x = x.permute(0, 1, 3, 2)

width = x.size(3)

# Now do average pooling
x = F.avg_pool2d(x, (1, width))
#
# x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc2(x)

return x


Expand Down
20 changes: 17 additions & 3 deletions src/models/specnet.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/usr/bin/env python
from torch import nn
import torch
import torch.nn.functional as F
from src.models.resnet_base import resnet50
from src.models.ops import CELoss
from src.models.ops import CELoss, Identity
from src.utils.math_utils import nextpow2
from src.utils import torch_utils

Expand All @@ -12,7 +13,11 @@ def __init__(self, num_classes, sf, win_size, hop_len,
window=torch.hamming_window):
super().__init__()
self.num_classes = num_classes
self.base = resnet50(num_classes=self.num_classes)
self.base = resnet50(pretrained=True)
self.base.fc = Identity()
self.base.avgpool = Identity()
self.fc = nn.Linear(9, 1)
self.classifier = nn.Linear(2048, num_classes)
self.criterion = nn.CrossEntropyLoss()
self.loss_obj = CELoss
self.sf = sf
Expand Down Expand Up @@ -40,7 +45,16 @@ def spectrogram(self, signal: torch.Tensor):
def forward(self, batch):
signal = batch['raw']
spec = self.spectrogram(signal).unsqueeze(1)
return self.base(spec)
spec = spec.repeat(1, 3, 1, 1) # Convert to RGB
resout = self.base(spec)
x = resout.permute(0, 1, 3, 2)
x = self.fc(x)
x = x.permute(0, 1, 3, 2)
width = x.size(3)
x = F.avg_pool2d(x, (1, width))
x = x.view(x.size(0), -1)
y = self.classifier(x)
return y

def loss(self, model_outs, batch):
if self.num_classes == 2:
Expand Down
2 changes: 1 addition & 1 deletion src/models/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def train(ctx, dataset, model_type, resume, progress, gender, ckpt,
model = SpecNet(hparams.num_classes, hparams.sf,
hparams.win_size, hparams.hop_len)
validator = partial(validate, hparams, val_dataset, model, progress)
optimizer_name = 'adam'
optimizer_name = 'sgd'
else:
dataset = RawSpeech(train_map_file, hparams.duration)
val_dataset = RawSpeechChunks(test_map_file, hparams.duration,
Expand Down
107 changes: 61 additions & 46 deletions src/utils/training_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,52 @@
import logging
import os
import torch
import math
import numpy as np
from datetime import datetime
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import LambdaLR
from torch.optim.lr_scheduler import _LRScheduler, LambdaLR
from tqdm import tqdm
from src.utils import torch_utils


logger = logging.getLogger(__name__)


class LogarithmicDecay(object):
def __init__(self, init_lr, fin_lr, last_epoch):
self.init_lr = init_lr
self.fin_lr = fin_lr
self.last_epoch = last_epoch

def __call__(self, epoch):
fact = (self.fin_lr - self.init_lr)/math.log(self.last_epoch)
lr = fact*math.log(epoch, 2) + self.init_lr
return lr
class InterpolatingScheduler(_LRScheduler):
def __init__(self, optimizer, steps, lrs, scale='log', last_epoch=-1):
"""A scheduler that interpolates given values
Args:
- optimizer: pytorch optimizer
- steps: list or array with the x coordinates of the values
- lrs: list or array with the learning rates corresponding to the steps
- scale: one of ['linear', 'log'] the scale on which to interpolate.
Log is usefull since learning rates operate on a
logarithmic scale.
Usage:
fc = nn.Linear(1,1)
optimizer = optim.Adam(fc.parameters())
lr_scheduler = InterpolatingScheduler(optimizer,
steps=[0, 100, 400], lrs=[1e-6, 1e-4, 1e-8], scale='log')
"""
self.scale = scale
self.steps = steps
self.lrs = lrs
super().__init__(optimizer, last_epoch)

def get_lr(self):
x = [self.last_epoch]
if self.scale == 'linear':
y = np.interp(x, self.steps, self.lrs)
elif self.scale == 'log':
y = np.interp(x, self.steps, np.log(self.lrs))
y = np.exp(y)
else:
raise ValueError("scale should be one of ['linear', 'log']")
logger.debug(f'Epoch = {self.last_epoch}, lr = {y[0]}')
return [y[0] for lr in self.base_lrs]


class Trainer(object):
Expand All @@ -38,19 +62,12 @@ def __init__(self, hparams, app_config, model: nn.Module):
val_start = hparams.val_start
self.val_ofs = self.val_step - val_start

def _create_save_dir(self, save_path, save_format):
self.save_path = save_path.format(type(self.model).__name__)
curr_time = datetime.now()
ts = curr_time.strftime(save_format)
self.save_model_path = os.path.join(self.save_path, ts)
os.makedirs(self.save_model_path)

def _setup_adam(self, params):
lr = params['learning_rate']
def get_filter_parameters(self, params: dict):
no_decay = params.get('decay_filters')
if no_decay is None:
return self.model.parameters()
param_optimizer = list(self.model.named_parameters())

no_decay = ['bias', 'bn', 'downsample.1']

optimizer_grouped_parameters = [
{
'params': [
Expand All @@ -62,7 +79,19 @@ def _setup_adam(self, params):
nd in n for nd in no_decay)],
'weight_decay': 0.0}
]
self.optimizer = optim.Adam(optimizer_grouped_parameters, lr=lr,
return optimizer_grouped_parameters

def _create_save_dir(self, save_path, save_format):
self.save_path = save_path.format(type(self.model).__name__)
curr_time = datetime.now()
ts = curr_time.strftime(save_format)
self.save_model_path = os.path.join(self.save_path, ts)
os.makedirs(self.save_model_path)

def _setup_adam(self, params):
lr = params['learning_rate']
parameters = self.get_filter_parameters(params)
self.optimizer = optim.Adam(parameters, lr=lr,
amsgrad=True)
self.scheduler = LambdaLR(self.optimizer, lambda x: x)

Expand All @@ -76,29 +105,13 @@ def _setup_rmsprop(self, params):
self.scheduler = LambdaLR(self.optimizer, lambda x: x)

def _setup_sgd(self, params):
param_optimizer = list(self.model.named_parameters())

no_decay = ['bias', 'bn', 'downsample.1']

optimizer_grouped_parameters = [
{
'params': [
p for n, p in param_optimizer if not any(
nd in n for nd in no_decay)],
'initial_lr': params['init_lr'],
'weight_decay': params['weight_decay']},
{
'params': [p for n, p in param_optimizer if any(
nd in n for nd in no_decay)],
'initial_lr': params['init_lr'],
'weight_decay': 0.0}
]
parameters = self.get_filter_parameters(params)
self.optimizer = optim.SGD(
optimizer_grouped_parameters, lr=params['init_lr'],
momentum=0.9)
decay = LogarithmicDecay(
params['init_lr'], params['fin_lr'], params['last_epoch'])
self.scheduler = LambdaLR(self.optimizer, decay, params['last_epoch'])
parameters, lr=params['init_lr'], momentum=0.9)

self.scheduler = InterpolatingScheduler(
self.optimizer, [0, params['last_epoch']],
[params['init_lr'], params['fin_lr']])

def setup_optimizers(self, optimizer, params, resume: bool):
if optimizer == 'adam':
Expand Down Expand Up @@ -136,7 +149,9 @@ def train(self, dataset: Dataset, num_workers: int,
upd, total = self.model.loss(model_outs, batch)

self.optimizer.zero_grad()
assert total < 10.0, f'Step = {step}'
total.backward()
assert total < 10.0, f'Step = {step}'

self.optimizer.step()

Expand Down

0 comments on commit 3e39a87

Please sign in to comment.