-
Notifications
You must be signed in to change notification settings - Fork 253
/
Copy pathFMM.py
110 lines (87 loc) · 3.45 KB
/
FMM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import torch
import torch.nn as nn
import torch.nn.functional as F
#论文:Spatially-Adaptive Feature Modulation for Efficient Image Super-Resolution (ICCV 2023)
#论文地址:https://openaccess.thecvf.com/content/ICCV2023/papers/Sun_Spatially-Adaptive_Feature_Modulation_for_Efficient_Image_Super-Resolution_ICCV_2023_paper.pdf
# Layer Norm
class LayerNorm(nn.Module):
def __init__(self, normalized_shape, eps=1e-6, data_format="channels_first"):
super().__init__()
self.weight = nn.Parameter(torch.ones(normalized_shape))
self.bias = nn.Parameter(torch.zeros(normalized_shape))
self.eps = eps
self.data_format = data_format
if self.data_format not in ["channels_last", "channels_first"]:
raise NotImplementedError
self.normalized_shape = (normalized_shape, )
def forward(self, x):
if self.data_format == "channels_last":
return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
elif self.data_format == "channels_first":
u = x.mean(1, keepdim=True)
s = (x - u).pow(2).mean(1, keepdim=True)
x = (x - u) / torch.sqrt(s + self.eps)
x = self.weight[:, None, None] * x + self.bias[:, None, None]
return x
# convolutional channel mixer (CCM)
class CCM(nn.Module):
def __init__(self, dim, growth_rate=2.0):
super().__init__()
hidden_dim = int(dim * growth_rate)
self.ccm = nn.Sequential(
nn.Conv2d(dim, hidden_dim, 3, 1, 1),
nn.GELU(),
nn.Conv2d(hidden_dim, dim, 1, 1, 0)
)
def forward(self, x):
return self.ccm(x)
# spatially-adaptive feature modulation (SAFM)
class SAFM(nn.Module):
def __init__(self, dim, n_levels=4):
super().__init__()
self.n_levels = n_levels
chunk_dim = dim // n_levels
# Spatial Weighting
self.mfr = nn.ModuleList(
[nn.Conv2d(chunk_dim, chunk_dim, 3, 1, 1, groups=chunk_dim) for i in range(self.n_levels)])
# # Feature Aggregation
self.aggr = nn.Conv2d(dim, dim, 1, 1, 0)
# Activation
self.act = nn.GELU()
def forward(self, x):
h, w = x.size()[-2:]
xc = x.chunk(self.n_levels, dim=1)
out = []
for i in range(self.n_levels):
if i > 0:
p_size = (h // 2 ** i, w // 2 ** i)
s = F.adaptive_max_pool2d(xc[i], p_size)
s = self.mfr[i](s)
s = F.interpolate(s, size=(h, w), mode='nearest')
else:
s = self.mfr[i](xc[i])
out.append(s)
out = self.aggr(torch.cat(out, dim=1))
out = self.act(out) * x
return out
# feature mixing module(FMM)
class FMM(nn.Module):
def __init__(self, dim, ffn_scale=2.0):
super().__init__()
self.norm1 = LayerNorm(dim)
self.norm2 = LayerNorm(dim)
# Multiscale Block
self.safm = SAFM(dim)
# Feedforward layer
self.ccm = CCM(dim, ffn_scale)
def forward(self, x):
x = self.safm(self.norm1(x)) + x
x = self.ccm(self.norm2(x)) + x
return x
if __name__ == '__main__':
block = FMM(64)
input = torch.randn(1, 64, 32, 32)
output = block(input)
# 打印输入和输出的形状
print(input.size())
print(output.size())