-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlwf.py
106 lines (89 loc) · 3.85 KB
/
lwf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Copyright 2022-present, Lorenzo Bonicelli, Pietro Buzzega, Matteo Boschini, Angelo Porrello, Simone Calderara.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
from stream_benchmark.datasets.aux_cifar100 import AuxDataset
import torch
from torch.optim import SGD
from torch.utils.data import DataLoader
from stream_benchmark.models.__base_model import BaseModel
def smooth(logits, temp, dim):
log = logits ** (1 / temp)
return log / torch.sum(log, dim).unsqueeze(1)
def modified_kl_div(old, new):
return -torch.mean(torch.sum(old * torch.log(new), 1))
class Lwf(BaseModel):
name = "lwf"
description = "Continual learning via Learning without Forgetting."
link = "https://arxiv.org/abs/1606.09282"
def __init__(self, backbone, loss, lr, n_epochs, batch_size, task_start_idx, lwf, **_):
super(Lwf, self).__init__(backbone, loss, lr)
self.old_net = None
self.soft = torch.nn.Softmax(dim=1)
self.logsoft = torch.nn.LogSoftmax(dim=1)
self.task_start_idx = task_start_idx
self.current_task = 0
nc = self.task_start_idx[-1]
self.eye = torch.tril(torch.ones((nc, nc))).bool().to(self.device)
self.n_epochs = n_epochs
self.lr = lr
self.batch_size = batch_size
self.alpha = lwf['alpha']
self.softmax_temp = lwf['softmax_temp']
def begin_task(self, train_loader, task_start_idx, *_):
dl = None
self.net.eval()
if self.current_task > 0:
# warm-up
opt = SGD(self.net.classifier.parameters(), lr=self.lr)
for epoch in range(self.n_epochs):
for i, data in enumerate(train_loader):
inputs, labels = data
labels = task_start_idx + labels
inputs, labels = inputs.to(self.device), labels.to(self.device)
opt.zero_grad()
with torch.no_grad():
feats = self.net(inputs, returnt="features")
mask = (
self.eye[self.task_start_idx[self.current_task + 1] - 1]
^ self.eye[self.task_start_idx[self.current_task] - 1]
)
outputs = self.net.classifier(feats)[:, mask]
loss = self.loss(
outputs, labels - self.task_start_idx[self.current_task]
)
loss.backward()
opt.step()
x, y, logits = [], [], []
with torch.no_grad():
for data in train_loader:
inputs, labels = data
out = self.net(inputs.to(self.device)).cpu()
x.append(inputs)
y.append(labels)
logits.append(out)
x, y, logits = torch.concat(x), torch.concat(y), torch.concat(logits)
dl = DataLoader(
AuxDataset(x, y, logits=logits),
batch_size=self.batch_size,
shuffle=True,
)
self.net.train()
self.current_task += 1
return dl
def end_task(self, *_):
pass
def observe(self, inputs, labels, not_aug_inputs, logits=None):
self.optimizer.zero_grad()
outputs = self.net(inputs)
mask = self.eye[self.task_start_idx[self.current_task] - 1]
loss = self.loss(outputs[:, mask], labels)
if logits is not None:
mask = self.eye[self.task_start_idx[self.current_task - 1] - 1]
loss += self.alpha * modified_kl_div(
smooth(self.soft(logits[:, mask]).to(self.device), 2, 1),
smooth(self.soft(outputs[:, mask]), 2, 1),
)
loss.backward()
self.optimizer.step()
return loss.item()