-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoptimizers.py
84 lines (67 loc) · 2.73 KB
/
optimizers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#coding:utf-8
import torch
from functools import reduce
from torch.optim import AdamW
class MultiOptimizer:
def __init__(self, optimizers={}, schedulers={}):
self.optimizers = optimizers
self.schedulers = schedulers
self.keys = list(optimizers.keys())
self.param_groups = reduce(lambda x,y: x+y, [v.param_groups for v in self.optimizers.values()])
def state_dict(self):
state_dicts = [(key, self.optimizers[key].state_dict())\
for key in self.keys]
return state_dicts
def load_state_dict(self, state_dict):
for key, val in state_dict:
try:
self.optimizers[key].load_state_dict(val)
except:
print("Unloaded %s" % key)
def step(self, key=None):
if key is not None:
self.optimizers[key].step()
else:
_ = [self.optimizers[key].step() for key in self.keys]
def zero_grad(self, key=None):
if key is not None:
self.optimizers[key].zero_grad()
else:
_ = [self.optimizers[key].zero_grad() for key in self.keys]
def scheduler(self, *args, key=None):
if key is not None:
self.schedulers[key].step(*args)
else:
_ = [self.schedulers[key].step(*args) for key in self.keys]
def build_optimizer(parameters):
optimizer, scheduler = _define_optimizer(parameters)
return optimizer, scheduler
def _define_optimizer(params):
optimizer_params = params['optimizer_params']
sch_params = params['scheduler_params']
optimizer = AdamW(
params['params'],
lr=optimizer_params.get('lr', 1e-4),
weight_decay=optimizer_params.get('weight_decay', 5e-4),
betas=(0.9, 0.98),
eps=1e-9)
scheduler = _define_scheduler(optimizer, sch_params)
return optimizer, scheduler
def _define_scheduler(optimizer, params):
print(params)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
optimizer,
max_lr=params.get('max_lr', 5e-4),
epochs=params.get('epochs', 200),
steps_per_epoch=params.get('steps_per_epoch', 1000),
pct_start=params.get('pct_start', 0.0),
final_div_factor=params.get('final_div_factor', 10) )
# scheduler = StepLR(optimizer, 100, gamma=5, last_epoch=-1)
return scheduler
def build_multi_optimizer(parameters_dict, scheduler_params):
optim = dict([(key, AdamW(params, lr=1e-4, weight_decay=1e-6, betas=(0.9, 0.98), eps=1e-9))
for key, params in parameters_dict.items()])
schedulers = dict([(key, _define_scheduler(opt, scheduler_params)) \
for key, opt in optim.items()])
multi_optim = MultiOptimizer(optim, schedulers)
return multi_optim