From 3e39a87bfc446f39c04bd0330fca5e3928207d0f Mon Sep 17 00:00:00 2001 From: vikigenius Date: Tue, 2 Apr 2019 19:48:26 -0400 Subject: [PATCH] GRAD update issue --- logging_config.yml | 6 +- settings.yml | 1 + src/data/dataset.py | 2 +- src/models/resnet_base.py | 18 +----- src/models/specnet.py | 20 ++++++- src/models/train_model.py | 2 +- src/utils/training_utils.py | 107 ++++++++++++++++++++---------------- 7 files changed, 86 insertions(+), 70 deletions(-) diff --git a/logging_config.yml b/logging_config.yml index e38082e..a7f1522 100644 --- a/logging_config.yml +++ b/logging_config.yml @@ -31,12 +31,12 @@ handlers: encoding: utf8 loggers: - src.models.ved_varattn: - level: ERROR + src.utils.training_utils: + level: DEBUG handlers: [console] propagate: no root: - level: INFO + level: DEBUG handlers: [console, info_file_handler, error_file_handler] ... diff --git a/settings.yml b/settings.yml index c115a16..97bbfcb 100644 --- a/settings.yml +++ b/settings.yml @@ -62,6 +62,7 @@ hparams: decay_schedule: 'logarithmic' last_epoch: 30 weight_decay: 5.0e-4 + # decay_filters: ['bias', 'bn', 'downsample.1'] adam: params: learning_rate: 0.001 diff --git a/src/data/dataset.py b/src/data/dataset.py index 8290c98..8b0018e 100644 --- a/src/data/dataset.py +++ b/src/data/dataset.py @@ -25,7 +25,7 @@ def __init__(self, map_file: str, tdur=None): with open(map_file, 'rb') as f: self.spec_list = pickle.load(f) self.tdur = tdur - self.processor = ProcessedRaw(16000.0, preprocess=False) + self.processor = ProcessedRaw(16000.0, preprocess=True) def __getitem__(self, idx): sinfo = self.spec_list[idx] diff --git a/src/models/resnet_base.py b/src/models/resnet_base.py index a63a2ed..eb895d1 100644 --- a/src/models/resnet_base.py +++ b/src/models/resnet_base.py @@ -110,7 +110,7 @@ def __init__(self, block, layers, num_classes=1000, super(ResNet, self).__init__() self.inplanes = 64 # Changed 3 to 1 for single channel - self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) @@ -119,8 +119,7 @@ def __init__(self, block, layers, num_classes=1000, self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) - self.fc1 = nn.Linear(9, 1) - self.fc2 = nn.Linear(512 * block.expansion, num_classes) + self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): @@ -169,19 +168,6 @@ def forward(self, x): x = self.layer3(x) x = self.layer4(x) - x = x.permute(0, 1, 3, 2) - x = self.fc1(x) - x = x.permute(0, 1, 3, 2) - - width = x.size(3) - - # Now do average pooling - x = F.avg_pool2d(x, (1, width)) - # - # x = self.avgpool(x) - x = x.view(x.size(0), -1) - x = self.fc2(x) - return x diff --git a/src/models/specnet.py b/src/models/specnet.py index 8219181..068eb18 100644 --- a/src/models/specnet.py +++ b/src/models/specnet.py @@ -1,8 +1,9 @@ #!/usr/bin/env python from torch import nn import torch +import torch.nn.functional as F from src.models.resnet_base import resnet50 -from src.models.ops import CELoss +from src.models.ops import CELoss, Identity from src.utils.math_utils import nextpow2 from src.utils import torch_utils @@ -12,7 +13,11 @@ def __init__(self, num_classes, sf, win_size, hop_len, window=torch.hamming_window): super().__init__() self.num_classes = num_classes - self.base = resnet50(num_classes=self.num_classes) + self.base = resnet50(pretrained=True) + self.base.fc = Identity() + self.base.avgpool = Identity() + self.fc = nn.Linear(9, 1) + self.classifier = nn.Linear(2048, num_classes) self.criterion = nn.CrossEntropyLoss() self.loss_obj = CELoss self.sf = sf @@ -40,7 +45,16 @@ def spectrogram(self, signal: torch.Tensor): def forward(self, batch): signal = batch['raw'] spec = self.spectrogram(signal).unsqueeze(1) - return self.base(spec) + spec = spec.repeat(1, 3, 1, 1) # Convert to RGB + resout = self.base(spec) + x = resout.permute(0, 1, 3, 2) + x = self.fc(x) + x = x.permute(0, 1, 3, 2) + width = x.size(3) + x = F.avg_pool2d(x, (1, width)) + x = x.view(x.size(0), -1) + y = self.classifier(x) + return y def loss(self, model_outs, batch): if self.num_classes == 2: diff --git a/src/models/train_model.py b/src/models/train_model.py index f3c6951..546e8c4 100644 --- a/src/models/train_model.py +++ b/src/models/train_model.py @@ -133,7 +133,7 @@ def train(ctx, dataset, model_type, resume, progress, gender, ckpt, model = SpecNet(hparams.num_classes, hparams.sf, hparams.win_size, hparams.hop_len) validator = partial(validate, hparams, val_dataset, model, progress) - optimizer_name = 'adam' + optimizer_name = 'sgd' else: dataset = RawSpeech(train_map_file, hparams.duration) val_dataset = RawSpeechChunks(test_map_file, hparams.duration, diff --git a/src/utils/training_utils.py b/src/utils/training_utils.py index 99adc69..88ab9ef 100644 --- a/src/utils/training_utils.py +++ b/src/utils/training_utils.py @@ -2,11 +2,11 @@ import logging import os import torch -import math +import numpy as np from datetime import datetime from torch import nn, optim from torch.utils.data import Dataset, DataLoader -from torch.optim.lr_scheduler import LambdaLR +from torch.optim.lr_scheduler import _LRScheduler, LambdaLR from tqdm import tqdm from src.utils import torch_utils @@ -14,16 +14,40 @@ logger = logging.getLogger(__name__) -class LogarithmicDecay(object): - def __init__(self, init_lr, fin_lr, last_epoch): - self.init_lr = init_lr - self.fin_lr = fin_lr - self.last_epoch = last_epoch - - def __call__(self, epoch): - fact = (self.fin_lr - self.init_lr)/math.log(self.last_epoch) - lr = fact*math.log(epoch, 2) + self.init_lr - return lr +class InterpolatingScheduler(_LRScheduler): + def __init__(self, optimizer, steps, lrs, scale='log', last_epoch=-1): + """A scheduler that interpolates given values + + Args: + - optimizer: pytorch optimizer + - steps: list or array with the x coordinates of the values + - lrs: list or array with the learning rates corresponding to the steps + - scale: one of ['linear', 'log'] the scale on which to interpolate. + Log is usefull since learning rates operate on a + logarithmic scale. + + Usage: + fc = nn.Linear(1,1) + optimizer = optim.Adam(fc.parameters()) + lr_scheduler = InterpolatingScheduler(optimizer, + steps=[0, 100, 400], lrs=[1e-6, 1e-4, 1e-8], scale='log') + """ + self.scale = scale + self.steps = steps + self.lrs = lrs + super().__init__(optimizer, last_epoch) + + def get_lr(self): + x = [self.last_epoch] + if self.scale == 'linear': + y = np.interp(x, self.steps, self.lrs) + elif self.scale == 'log': + y = np.interp(x, self.steps, np.log(self.lrs)) + y = np.exp(y) + else: + raise ValueError("scale should be one of ['linear', 'log']") + logger.debug(f'Epoch = {self.last_epoch}, lr = {y[0]}') + return [y[0] for lr in self.base_lrs] class Trainer(object): @@ -38,19 +62,12 @@ def __init__(self, hparams, app_config, model: nn.Module): val_start = hparams.val_start self.val_ofs = self.val_step - val_start - def _create_save_dir(self, save_path, save_format): - self.save_path = save_path.format(type(self.model).__name__) - curr_time = datetime.now() - ts = curr_time.strftime(save_format) - self.save_model_path = os.path.join(self.save_path, ts) - os.makedirs(self.save_model_path) - - def _setup_adam(self, params): - lr = params['learning_rate'] + def get_filter_parameters(self, params: dict): + no_decay = params.get('decay_filters') + if no_decay is None: + return self.model.parameters() param_optimizer = list(self.model.named_parameters()) - no_decay = ['bias', 'bn', 'downsample.1'] - optimizer_grouped_parameters = [ { 'params': [ @@ -62,7 +79,19 @@ def _setup_adam(self, params): nd in n for nd in no_decay)], 'weight_decay': 0.0} ] - self.optimizer = optim.Adam(optimizer_grouped_parameters, lr=lr, + return optimizer_grouped_parameters + + def _create_save_dir(self, save_path, save_format): + self.save_path = save_path.format(type(self.model).__name__) + curr_time = datetime.now() + ts = curr_time.strftime(save_format) + self.save_model_path = os.path.join(self.save_path, ts) + os.makedirs(self.save_model_path) + + def _setup_adam(self, params): + lr = params['learning_rate'] + parameters = self.get_filter_parameters(params) + self.optimizer = optim.Adam(parameters, lr=lr, amsgrad=True) self.scheduler = LambdaLR(self.optimizer, lambda x: x) @@ -76,29 +105,13 @@ def _setup_rmsprop(self, params): self.scheduler = LambdaLR(self.optimizer, lambda x: x) def _setup_sgd(self, params): - param_optimizer = list(self.model.named_parameters()) - - no_decay = ['bias', 'bn', 'downsample.1'] - - optimizer_grouped_parameters = [ - { - 'params': [ - p for n, p in param_optimizer if not any( - nd in n for nd in no_decay)], - 'initial_lr': params['init_lr'], - 'weight_decay': params['weight_decay']}, - { - 'params': [p for n, p in param_optimizer if any( - nd in n for nd in no_decay)], - 'initial_lr': params['init_lr'], - 'weight_decay': 0.0} - ] + parameters = self.get_filter_parameters(params) self.optimizer = optim.SGD( - optimizer_grouped_parameters, lr=params['init_lr'], - momentum=0.9) - decay = LogarithmicDecay( - params['init_lr'], params['fin_lr'], params['last_epoch']) - self.scheduler = LambdaLR(self.optimizer, decay, params['last_epoch']) + parameters, lr=params['init_lr'], momentum=0.9) + + self.scheduler = InterpolatingScheduler( + self.optimizer, [0, params['last_epoch']], + [params['init_lr'], params['fin_lr']]) def setup_optimizers(self, optimizer, params, resume: bool): if optimizer == 'adam': @@ -136,7 +149,9 @@ def train(self, dataset: Dataset, num_workers: int, upd, total = self.model.loss(model_outs, batch) self.optimizer.zero_grad() + assert total < 10.0, f'Step = {step}' total.backward() + assert total < 10.0, f'Step = {step}' self.optimizer.step()