-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathonecycle.py
102 lines (77 loc) · 3.46 KB
/
onecycle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import tensorflow as tf
import numpy as np
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)
from tensorflow.keras.callbacks import Callback
class CosineAnnealer:
def __init__(self, start, end, steps):
self.start = start
self.end = end
self.steps = steps
self.n = 0
def step(self):
self.n += 1
cos = np.cos(np.pi * (self.n / self.steps)) + 1
return self.end + (self.start - self.end) / 2. * cos
class OneCycleScheduler(Callback):
""" Callback
that schedules the learning rate on a 1cycle policy as per Leslie Smith's paper(https://arxiv.org/pdf/1803.09820.pdf).
If the model supports a momentum parameter, it will also be adapted by the schedule.
The implementation adopts additional improvements as per the fastai library: https://docs.fast.ai/callbacks.one_cycle.html, where
only two phases are used and the adaptation is done using cosine annealing.
In phase 1 the LR increases from lrmax÷fac->r to lrmax and momentum decreases from mommax to mommin.
In the second phase the LR decreases from lrmax to lrmax÷fac->r*1e4 and momemtum from mommax to mommin.
By default the phases are not of equal length, with the phase 1 percentage controlled by the parameter phase1_pct.
"""
def __init__(self, lr_max, steps, mom_min=0.85, mom_max=0.95, phase_1_pct=0.3, div_factor=25.):
super(OneCycleScheduler, self).__init__()
lr_min = lr_max / div_factor
final_lr = lr_max / (div_factor * 1e4)
phase_1_steps = steps * phase_1_pct
phase_2_steps = steps - phase_1_steps
self.phase_1_steps = phase_1_steps
self.phase_2_steps = phase_2_steps
self.phase = 0
self.step = 0
self.phases = [[CosineAnnealer(lr_min, lr_max, phase_1_steps), CosineAnnealer(mom_max, mom_min, phase_1_steps)],
[CosineAnnealer(lr_max, final_lr, phase_2_steps), CosineAnnealer(mom_min, mom_max, phase_2_steps)]]
self.lrs = []
self.moms = []
def on_train_begin(self, logs=None):
self.phase = 0
self.step = 0
self.set_lr(self.lr_schedule().start)
self.set_momentum(self.mom_schedule().start)
def on_train_batch_begin(self, batch, logs=None):
self.lrs.append(self.get_lr())
self.moms.append(self.get_momentum())
def on_train_batch_end(self, batch, logs=None):
self.step += 1
if self.step >= self.phase_1_steps:
self.phase = 1
self.set_lr(self.lr_schedule().step())
self.set_momentum(self.mom_schedule().step())
def get_lr(self):
try:
return tf.keras.backend.get_value(self.model.optimizer.lr)
except AttributeError:
return None
def get_momentum(self):
try:
return tf.keras.backend.get_value(self.model.optimizer.momentum)
except AttributeError:
return None
def set_lr(self, lr):
try:
tf.keras.backend.set_value(self.model.optimizer.lr, lr)
except AttributeError:
pass # ignore
def set_momentum(self, mom):
try:
tf.keras.backend.set_value(self.model.optimizer.momentum, mom)
except AttributeError:
pass # ignore
def lr_schedule(self):
return self.phases[self.phase][0]
def mom_schedule(self):
return self.phases[self.phase][1]