forked from qubvel-org/segmentation_models.pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdecoder.py
217 lines (188 loc) · 7.12 KB
/
decoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
from typing import Any, Dict, List, Optional, Union
import torch
import torch.nn as nn
import torch.nn.functional as F
from segmentation_models_pytorch.base import modules as md
class PABBlock(nn.Module):
def __init__(self, in_channels: int, pab_channels: int = 64):
super().__init__()
# Series of 1x1 conv to generate attention feature maps
self.pab_channels = pab_channels
self.in_channels = in_channels
self.top_conv = nn.Conv2d(in_channels, pab_channels, kernel_size=1)
self.center_conv = nn.Conv2d(in_channels, pab_channels, kernel_size=1)
self.bottom_conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1)
self.map_softmax = nn.Softmax(dim=1)
self.out_conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1)
def forward(self, x: torch.Tensor) -> torch.Tensor:
batch_size, _, height, width = x.shape
x_top = self.top_conv(x)
x_center = self.center_conv(x)
x_bottom = self.bottom_conv(x)
x_top = x_top.flatten(2)
x_center = x_center.flatten(2).transpose(1, 2)
x_bottom = x_bottom.flatten(2).transpose(1, 2)
sp_map = torch.matmul(x_center, x_top)
sp_map = self.map_softmax(sp_map.view(batch_size, -1))
sp_map = sp_map.view(batch_size, height * width, height * width)
sp_map = torch.matmul(sp_map, x_bottom)
sp_map = sp_map.reshape(batch_size, self.in_channels, height, width)
x = x + sp_map
x = self.out_conv(x)
return x
class MFABBlock(nn.Module):
def __init__(
self,
in_channels: int,
skip_channels: int,
out_channels: int,
interpolation_mode: str = "nearest",
use_norm: Union[bool, str, Dict[str, Any]] = "batchnorm",
reduction: int = 16,
):
# MFABBlock is just a modified version of SE-blocks, one for skip, one for input
super().__init__()
self.hl_conv = nn.Sequential(
md.Conv2dReLU(
in_channels,
in_channels,
kernel_size=3,
padding=1,
use_norm=use_norm,
),
md.Conv2dReLU(
in_channels,
skip_channels,
kernel_size=1,
use_norm=use_norm,
),
)
reduced_channels = max(1, skip_channels // reduction)
self.SE_ll = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(skip_channels, reduced_channels, 1),
nn.ReLU(inplace=True),
nn.Conv2d(reduced_channels, skip_channels, 1),
nn.Sigmoid(),
)
self.SE_hl = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(skip_channels, reduced_channels, 1),
nn.ReLU(inplace=True),
nn.Conv2d(reduced_channels, skip_channels, 1),
nn.Sigmoid(),
)
self.conv1 = md.Conv2dReLU(
skip_channels
+ skip_channels, # we transform C-prime form high level to C from skip connection
out_channels,
kernel_size=3,
padding=1,
use_norm=use_norm,
)
self.conv2 = md.Conv2dReLU(
out_channels,
out_channels,
kernel_size=3,
padding=1,
use_norm=use_norm,
)
self.interpolation_mode = interpolation_mode
def forward(
self, x: torch.Tensor, skip: Optional[torch.Tensor] = None
) -> torch.Tensor:
x = self.hl_conv(x)
x = F.interpolate(x, scale_factor=2.0, mode=self.interpolation_mode)
attention_hl = self.SE_hl(x)
if skip is not None:
attention_ll = self.SE_ll(skip)
attention_hl = attention_hl + attention_ll
x = x * attention_hl
x = torch.cat([x, skip], dim=1)
x = self.conv1(x)
x = self.conv2(x)
return x
class DecoderBlock(nn.Module):
def __init__(
self,
in_channels: int,
skip_channels: int,
out_channels: int,
interpolation_mode: str = "nearest",
use_norm: Union[bool, str, Dict[str, Any]] = "batchnorm",
):
super().__init__()
self.conv1 = md.Conv2dReLU(
in_channels + skip_channels,
out_channels,
kernel_size=3,
padding=1,
use_norm=use_norm,
)
self.conv2 = md.Conv2dReLU(
out_channels,
out_channels,
kernel_size=3,
padding=1,
use_norm=use_norm,
)
self.interpolation_mode = interpolation_mode
def forward(
self, x: torch.Tensor, skip: Optional[torch.Tensor] = None
) -> torch.Tensor:
x = F.interpolate(x, scale_factor=2.0, mode=self.interpolation_mode)
if skip is not None:
x = torch.cat([x, skip], dim=1)
x = self.conv1(x)
x = self.conv2(x)
return x
class MAnetDecoder(nn.Module):
def __init__(
self,
encoder_channels: List[int],
decoder_channels: List[int],
n_blocks: int = 5,
reduction: int = 16,
use_norm: Union[bool, str, Dict[str, Any]] = "batchnorm",
pab_channels: int = 64,
interpolation_mode: str = "nearest",
):
super().__init__()
if n_blocks != len(decoder_channels):
raise ValueError(
"Model depth is {}, but you provide `decoder_channels` for {} blocks.".format(
n_blocks, len(decoder_channels)
)
)
# remove first skip with same spatial resolution
encoder_channels = encoder_channels[1:]
# reverse channels to start from head of encoder
encoder_channels = encoder_channels[::-1]
# computing blocks input and output channels
head_channels = encoder_channels[0]
in_channels = [head_channels] + list(decoder_channels[:-1])
skip_channels = list(encoder_channels[1:]) + [0]
out_channels = decoder_channels
self.center = PABBlock(head_channels, pab_channels=pab_channels)
# combine decoder keyword arguments
kwargs = dict(
use_norm=use_norm, interpolation_mode=interpolation_mode
) # no attention type here
blocks = [
MFABBlock(in_ch, skip_ch, out_ch, reduction=reduction, **kwargs)
if skip_ch > 0
else DecoderBlock(in_ch, skip_ch, out_ch, **kwargs)
for in_ch, skip_ch, out_ch in zip(in_channels, skip_channels, out_channels)
]
# for the last we dont have skip connection -> use simple decoder block
self.blocks = nn.ModuleList(blocks)
def forward(self, features: List[torch.Tensor]) -> torch.Tensor:
features = features[1:] # remove first skip with same spatial resolution
features = features[::-1] # reverse channels to start from head of encoder
head = features[0]
skips = features[1:]
x = self.center(head)
for i, decoder_block in enumerate(self.blocks):
skip = skips[i] if i < len(skips) else None
x = decoder_block(x, skip)
return x