From 052667740d6a7cf0e8e53b9fddfacf3a75afa569 Mon Sep 17 00:00:00 2001 From: ffiirree Date: Thu, 8 Dec 2022 20:31:15 +0800 Subject: [PATCH] version: 0.1.0 --- README.md | 10 +-- cvm/models/efficientnet.py | 8 +-- cvm/models/efficientnetv2.py | 16 ++--- cvm/models/ghostnet.py | 12 ++-- cvm/models/mnasnet.py | 6 +- cvm/models/mobilenetv3.py | 34 +++++----- cvm/models/ops/blocks/__init__.py | 6 +- cvm/models/ops/blocks/aspp.py | 2 +- cvm/models/ops/blocks/bottleneck.py | 52 +++++++-------- cvm/models/ops/blocks/cbam.py | 8 ++- cvm/models/ops/blocks/channel.py | 65 +++++++++++++++++++ cvm/models/ops/blocks/channel_combine.py | 25 ------- cvm/models/ops/blocks/channel_shuffle.py | 15 ----- cvm/models/ops/blocks/channel_split.py | 28 -------- .../ops/blocks/depthwise_separable_conv2d.py | 10 +-- .../ops/blocks/{norm_act.py => factory.py} | 16 +++++ cvm/models/ops/blocks/gaussian_blur.py | 6 +- cvm/models/ops/blocks/global_context.py | 6 +- .../ops/blocks/inverted_residual_block.py | 32 ++++----- cvm/models/ops/blocks/non_local.py | 8 ++- cvm/models/ops/blocks/selective_kernel.py | 8 ++- cvm/models/ops/blocks/spatial_channel.py | 61 +++++++++++++++++ cvm/models/ops/blocks/squeeze_excite.py | 8 +-- cvm/models/ops/blocks/vanilla_conv2d.py | 10 +-- cvm/models/regnet.py | 22 +++---- cvm/models/resnet.py | 10 +-- cvm/models/rexnet.py | 13 ++-- cvm/models/vae/vae.py | 14 ++-- cvm/models/vgnet.py | 37 ++++++----- cvm/version.py | 2 +- train.py | 58 +++++++++-------- 31 files changed, 351 insertions(+), 257 deletions(-) create mode 100644 cvm/models/ops/blocks/channel.py delete mode 100644 cvm/models/ops/blocks/channel_combine.py delete mode 100644 cvm/models/ops/blocks/channel_shuffle.py delete mode 100644 cvm/models/ops/blocks/channel_split.py rename cvm/models/ops/blocks/{norm_act.py => factory.py} (85%) create mode 100644 cvm/models/ops/blocks/spatial_channel.py diff --git a/README.md b/README.md index 974b308..8a9fe03 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Computer Vision Models -## Classification +## Backbones - [x] `AlexNet` - [ImageNet Classification with Deep Convolutional Neural Networks](https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf), 2012 - [x] `VGGNets` - [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556), 2014 @@ -25,14 +25,12 @@ - [x] `ShuffleNet V2` - [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164), 2018 - [x] `MnasNet` - [MnasNet: Platform-Aware Neural Architecture Search for Mobile](https://arxiv.org/abs/1807.11626), 2018 - [x] `GhostNet` - [GhostNet: More Features from Cheap Operations](https://arxiv.org/abs/1911.11907), CVPR, 2019 -- [ ] `ResNeSt` - [ResNeSt: Split-Attention Networks](https://arxiv.org/abs/2004.08955), 2020 - [ ] `HRNet` - [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/abs/1908.07919), 2019 - [ ] `CSPNet` - [CSPNet: A New Backbone that can Enhance Learning Capability of CNN](https://arxiv.org/abs/1911.11929), 2019 - [x] `EfficientNet` - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946), 2019 - [x] `EfficientNetV2` - [EfficientNetV2: Smaller Models and Faster Training](https://arxiv.org/abs/2104.00298), 2021 - [x] `RegNet` - [Designing Network Design Spaces](https://arxiv.org/abs/2003.13678), 2020 - [ ] `GPU-EfficientNets` - [Neural Architecture Design for GPU-Efficient Networks](https://arxiv.org/abs/2006.14090), 2020 -- [ ] `HaloNets` - [Scaling Local Self-Attention for Parameter Efficient Visual Backbones](https://arxiv.org/abs/2103.12731), 2021 - [ ] `LambdaNetworks` - [LambdaNetworks: Modeling Long-Range Interactions Without Attention](https://arxiv.org/abs/2102.08602), 2021 - [ ] `RepVGG` - [RepVGG: Making VGG-style ConvNets Great Again](https://arxiv.org/abs/2101.03697), 2021 - [ ] `HardCoRe-NAS` - [HardCoRe-NAS: Hard Constrained diffeRentiable Neural Architecture Search](https://arxiv.org/abs/2102.11646), 2021 @@ -47,9 +45,11 @@ - [x] `Squeeze-and-Excitation` - [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507), CVPR, 2017 - [x] `Gather-Excite` - [Gather-Excite: Exploiting Feature Context in Convolutional Neural Networks](https://arxiv.org/abs/1810.12348), NeurIPS, 2018 - [x] `CBAM` - [CBAM: Convolutional Block Attention Module](https://arxiv.org/abs/1807.06521), ECCV, 2018 -- [x] `SKNets` - [Selective Kernel Networks](https://arxiv.org/abs/1903.06586), CVPR, 2019 +- [x] `SelectiveKernel` - [Selective Kernel Networks](https://arxiv.org/abs/1903.06586), CVPR, 2019 - [x] `ECA` - [ECA-Net: Efficient Channel Attention for Deep Convolutional Neural Networks](https://arxiv.org/abs/1910.03151), CVPR, 2019 -- [x] `GlobalContextBlick` - [GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond](https://arxiv.org/abs/1904.11492), 2019 +- [x] `GlobalContext` - [GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond](https://arxiv.org/abs/1904.11492), 2019 +- [ ] `ResNeSt` - [ResNeSt: Split-Attention Networks](https://arxiv.org/abs/2004.08955), 2020 +- [ ] `HaloNets` - [Scaling Local Self-Attention for Parameter Efficient Visual Backbones](https://arxiv.org/abs/2103.12731), 2021 ### Transformer diff --git a/cvm/models/efficientnet.py b/cvm/models/efficientnet.py index 33ab43d..8695620 100644 --- a/cvm/models/efficientnet.py +++ b/cvm/models/efficientnet.py @@ -46,7 +46,7 @@ def __init__( num_classes: int = 1000, width_coefficient: float = 1, depth_coefficient: float = 1, - se_ratio: float = 0.25, + rd_ratio: float = 0.25, dropout_rate: float = 0.2, drop_path_rate: float = 0.2, dilations: List[int] = None, @@ -91,7 +91,7 @@ def __init__( self.n[i], self.s[i], self.k[i], - se_ratio, + rd_ratio, dilations[len(self.features) + (stages[i] - 1)] ) @@ -117,7 +117,7 @@ def make_layers( n: int, stride: int, kernel_size: int = 3, - se_ratio: float = None, + rd_ratio: float = None, dilation: int = 1 ): layers = [] @@ -130,7 +130,7 @@ def make_layers( self.block( inp, oup, t, kernel_size=kernel_size, stride=stride if dilation == 1 else 1, - dilation=max(dilation // stride, 1), survival_prob=survival_prob, se_ratio=se_ratio + dilation=max(dilation // stride, 1), survival_prob=survival_prob, rd_ratio=rd_ratio ) ) diff --git a/cvm/models/efficientnetv2.py b/cvm/models/efficientnetv2.py index 4eae54e..2df8f83 100644 --- a/cvm/models/efficientnetv2.py +++ b/cvm/models/efficientnetv2.py @@ -56,7 +56,7 @@ def __init__( filters: List[int] = [24, 24, 48, 64, 128, 160, 256, 1280], layers: List[int] = [2, 4, 5, 6, 9, 15], strides: List[int] = [1, 2, 2, 2, 1, 2], - se_ratio: List[float] = [0, 0, 0, 0.25, 0.25, 0.25], + rd_ratio: List[float] = [0, 0, 0, 0.25, 0.25, 0.25], thumbnail: bool = False, **kwargs: Any ): @@ -76,7 +76,7 @@ def __init__( features.append( self.make_layers( block_type[i], filters[i], expand_ratio[i], filters[i+1], - n=layers[i], stride=strides[i], se_ratio=se_ratio[i] + n=layers[i], stride=strides[i], rd_ratio=rd_ratio[i] ) ) @@ -98,7 +98,7 @@ def make_layers( oup: int, n: int, stride: int, - se_ratio: float = None + rd_ratio: float = None ): layers = [] for i in range(n): @@ -111,7 +111,7 @@ def make_layers( block( inp, oup, t, stride=stride, - survival_prob=survival_prob, se_ratio=se_ratio + survival_prob=survival_prob, rd_ratio=rd_ratio ) ) @@ -136,7 +136,7 @@ def efficientnet_v2_s(pretrained: bool = False, pth: str = None, **kwargs: Any): filters=[24, 24, 48, 64, 128, 160, 256, 1280], layers=[2, 4, 5, 6, 9, 15], strides=[1, 2, 2, 2, 1, 2], - se_ratio=[0, 0, 0, 0.25, 0.25, 0.25], + rd_ratio=[0, 0, 0, 0.25, 0.25, 0.25], **kwargs ) @@ -155,7 +155,7 @@ def efficientnet_v2_m(pretrained: bool = False, pth: str = None, **kwargs: Any): filters=[24, 24, 48, 80, 160, 176, 304, 512, 1280], layers=[3, 5, 5, 7, 14, 18, 5], strides=[1, 2, 2, 2, 1, 2, 1], - se_ratio=[0, 0, 0, 0.25, 0.25, 0.25, 0.25], + rd_ratio=[0, 0, 0, 0.25, 0.25, 0.25, 0.25], **kwargs ) @@ -174,7 +174,7 @@ def efficientnet_v2_l(pretrained: bool = False, pth: str = None, **kwargs: Any): filters=[32, 32, 64, 96, 192, 224, 384, 640, 1280], layers=[4, 7, 7, 10, 19, 25, 7], strides=[1, 2, 2, 2, 1, 2, 1], - se_ratio=[0, 0, 0, 0.25, 0.25, 0.25, 0.25], + rd_ratio=[0, 0, 0, 0.25, 0.25, 0.25, 0.25], **kwargs ) @@ -193,7 +193,7 @@ def efficientnet_v2_xl(pretrained: bool = False, pth: str = None, **kwargs: Any) filters=[32, 32, 64, 96, 192, 256, 512, 640, 1280], layers=[4, 8, 8, 16, 24, 32, 8], strides=[1, 2, 2, 2, 1, 2, 1], - se_ratio=[0, 0, 0, 0.25, 0.25, 0.25, 0.25], + rd_ratio=[0, 0, 0, 0.25, 0.25, 0.25, 0.25], **kwargs ) diff --git a/cvm/models/ghostnet.py b/cvm/models/ghostnet.py index f589701..727a131 100644 --- a/cvm/models/ghostnet.py +++ b/cvm/models/ghostnet.py @@ -56,10 +56,10 @@ def __init__( dw_kernel_size: int = 3, stride: int = 1, act_layer: nn.Module = nn.ReLU, - se_ratio: float = 0. + rd_ratio: float = 0. ): super(GhostBottleneck, self).__init__() - has_se = se_ratio is not None and se_ratio > 0. + has_attn = rd_ratio is not None and rd_ratio > 0. self.stride = stride # Point-wise expansion @@ -73,8 +73,8 @@ def __init__( self.bn_dw = nn.BatchNorm2d(mid_chs) # Squeeze-and-excitation - if has_se: - self.se = blocks.SEBlock(mid_chs, ratio=se_ratio) + if has_attn: + self.se = blocks.SEBlock(mid_chs, rd_ratio=rd_ratio) else: self.se = None @@ -137,10 +137,10 @@ def __init__( for cfg in cfgs: stage = blocks.Stage() - for k, t, c, se_ratio, s in cfg: + for k, t, c, rd_ratio, s in cfg: oup = make_divisible(c * multiplier, 4) stage.append(GhostBottleneck( - inp, make_divisible(t * multiplier, 4), oup, k, s, se_ratio=se_ratio + inp, make_divisible(t * multiplier, 4), oup, k, s, rd_ratio=rd_ratio )) inp = oup diff --git a/cvm/models/mnasnet.py b/cvm/models/mnasnet.py index 9aedeb4..16134aa 100644 --- a/cvm/models/mnasnet.py +++ b/cvm/models/mnasnet.py @@ -56,12 +56,12 @@ def make_layers( n: int, stride: int, kernel_size: int = 3, - se_ratio: float = None + rd_ratio: float = None ): - layers = [blocks.InvertedResidualBlock(inp, oup, t, kernel_size, stride, se_ratio=se_ratio)] + layers = [blocks.InvertedResidualBlock(inp, oup, t, kernel_size, stride, rd_ratio=rd_ratio)] for _ in range(n - 1): - layers.append(blocks.InvertedResidualBlock(oup, oup, t, kernel_size, se_ratio=se_ratio)) + layers.append(blocks.InvertedResidualBlock(oup, oup, t, kernel_size, rd_ratio=rd_ratio)) return blocks.Stage(layers) diff --git a/cvm/models/mobilenetv3.py b/cvm/models/mobilenetv3.py index fc4d2a2..8dc11b0 100644 --- a/cvm/models/mobilenetv3.py +++ b/cvm/models/mobilenetv3.py @@ -48,23 +48,23 @@ def __init__( blocks.Conv2dBlock(in_channels, 16, 3, stride=FRONT_S, activation_fn=hs) )), ('stage1', blocks.Stage( - blocks.InvertedResidualBlock(16, 16, 1, kernel_size=3, stride=strides[0], se_ratio=0.5, se_ind=True) + blocks.InvertedResidualBlock(16, 16, 1, kernel_size=3, stride=strides[0], rd_ratio=0.5, se_ind=True) )), ('stage2', blocks.Stage( blocks.InvertedResidualBlock(16, 24, 72/16, kernel_size=3, stride=strides[1], dilation=dilations[0]), blocks.InvertedResidualBlock(24, 24, 88/24, kernel_size=3, dilation=dilations[1]) )), ('stage3', blocks.Stage( - blocks.InvertedResidualBlock(24, 40, 4, kernel_size=5, stride=strides[2], dilation=dilations[1], se_ratio=0.25, se_ind=True, activation_fn=hs), - blocks.InvertedResidualBlock(40, 40, 6, kernel_size=5, dilation=dilations[2], se_ratio=0.25, se_ind=True, activation_fn=hs), - blocks.InvertedResidualBlock(40, 40, 6, kernel_size=5, dilation=dilations[2], se_ratio=0.25, se_ind=True, activation_fn=hs), - blocks.InvertedResidualBlock(40, 48, 3, kernel_size=5, dilation=dilations[2], se_ratio=0.25, se_ind=True, activation_fn=hs), - blocks.InvertedResidualBlock(48, 48, 3, kernel_size=5, dilation=dilations[2], se_ratio=0.25, se_ind=True, activation_fn=hs) + blocks.InvertedResidualBlock(24, 40, 4, kernel_size=5, stride=strides[2], dilation=dilations[1], rd_ratio=0.25, se_ind=True, activation_fn=hs), + blocks.InvertedResidualBlock(40, 40, 6, kernel_size=5, dilation=dilations[2], rd_ratio=0.25, se_ind=True, activation_fn=hs), + blocks.InvertedResidualBlock(40, 40, 6, kernel_size=5, dilation=dilations[2], rd_ratio=0.25, se_ind=True, activation_fn=hs), + blocks.InvertedResidualBlock(40, 48, 3, kernel_size=5, dilation=dilations[2], rd_ratio=0.25, se_ind=True, activation_fn=hs), + blocks.InvertedResidualBlock(48, 48, 3, kernel_size=5, dilation=dilations[2], rd_ratio=0.25, se_ind=True, activation_fn=hs) )), ('stage4', blocks.Stage( - blocks.InvertedResidualBlock(48, 96, 6, kernel_size=5, stride=strides[3], dilation=dilations[2], se_ratio=0.25, se_ind=True, activation_fn=hs), - blocks.InvertedResidualBlock(96, 96, 6, kernel_size=5, dilation=dilations[3], se_ratio=0.25, se_ind=True, activation_fn=hs), - blocks.InvertedResidualBlock(96, 96, 6, kernel_size=5, dilation=dilations[3], se_ratio=0.25, se_ind=True, activation_fn=hs), + blocks.InvertedResidualBlock(48, 96, 6, kernel_size=5, stride=strides[3], dilation=dilations[2], rd_ratio=0.25, se_ind=True, activation_fn=hs), + blocks.InvertedResidualBlock(96, 96, 6, kernel_size=5, dilation=dilations[3], rd_ratio=0.25, se_ind=True, activation_fn=hs), + blocks.InvertedResidualBlock(96, 96, 6, kernel_size=5, dilation=dilations[3], rd_ratio=0.25, se_ind=True, activation_fn=hs), blocks.Conv2d1x1Block(96, 576, activation_fn=hs) )) ])) @@ -125,22 +125,22 @@ def __init__( blocks.InvertedResidualBlock(24, 24, t=3, kernel_size=3, dilation=dilations[0]) )), ('stage2', blocks.Stage( - blocks.InvertedResidualBlock(24, 40, t=3, kernel_size=5, stride=strides[1], dilation=dilations[0], se_ratio=0.25, se_ind=True), - blocks.InvertedResidualBlock(40, 40, t=3, kernel_size=5, dilation=dilations[1], se_ratio=0.25, se_ind=True), - blocks.InvertedResidualBlock(40, 40, t=3, kernel_size=5, dilation=dilations[1], se_ratio=0.25, se_ind=True) + blocks.InvertedResidualBlock(24, 40, t=3, kernel_size=5, stride=strides[1], dilation=dilations[0], rd_ratio=0.25, se_ind=True), + blocks.InvertedResidualBlock(40, 40, t=3, kernel_size=5, dilation=dilations[1], rd_ratio=0.25, se_ind=True), + blocks.InvertedResidualBlock(40, 40, t=3, kernel_size=5, dilation=dilations[1], rd_ratio=0.25, se_ind=True) )), ('stage3', blocks.Stage( blocks.InvertedResidualBlock(40, 80, t=6, kernel_size=3, stride=strides[2], dilation=dilations[1], activation_fn=hs), blocks.InvertedResidualBlock(80, 80, t=200/80, kernel_size=3, dilation=dilations[2], activation_fn=hs), blocks.InvertedResidualBlock(80, 80, t=184/80, kernel_size=3, dilation=dilations[2], activation_fn=hs), blocks.InvertedResidualBlock(80, 80, t=184/80, kernel_size=3, dilation=dilations[2], activation_fn=hs), - blocks.InvertedResidualBlock(80, 112, t=6, kernel_size=3, dilation=dilations[2], se_ratio=0.25, se_ind=True, activation_fn=hs), - blocks.InvertedResidualBlock(112, 112, t=6, kernel_size=3, dilation=dilations[2], se_ratio=0.25, se_ind=True, activation_fn=hs) + blocks.InvertedResidualBlock(80, 112, t=6, kernel_size=3, dilation=dilations[2], rd_ratio=0.25, se_ind=True, activation_fn=hs), + blocks.InvertedResidualBlock(112, 112, t=6, kernel_size=3, dilation=dilations[2], rd_ratio=0.25, se_ind=True, activation_fn=hs) )), ('stage4', blocks.Stage( - blocks.InvertedResidualBlock(112, 160, t=6, kernel_size=5, stride=strides[3], dilation=dilations[2], se_ratio=0.25, se_ind=True, activation_fn=hs), - blocks.InvertedResidualBlock(160, 160, t=6, kernel_size=5, dilation=dilations[3], se_ratio=0.25, se_ind=True, activation_fn=hs), - blocks.InvertedResidualBlock(160, 160, t=6, kernel_size=5, dilation=dilations[3], se_ratio=0.25, se_ind=True, activation_fn=hs), + blocks.InvertedResidualBlock(112, 160, t=6, kernel_size=5, stride=strides[3], dilation=dilations[2], rd_ratio=0.25, se_ind=True, activation_fn=hs), + blocks.InvertedResidualBlock(160, 160, t=6, kernel_size=5, dilation=dilations[3], rd_ratio=0.25, se_ind=True, activation_fn=hs), + blocks.InvertedResidualBlock(160, 160, t=6, kernel_size=5, dilation=dilations[3], rd_ratio=0.25, se_ind=True, activation_fn=hs), blocks.Conv2d1x1Block(160, 960, activation_fn=hs) )) ])) diff --git a/cvm/models/ops/blocks/__init__.py b/cvm/models/ops/blocks/__init__.py index 3a9fc66..5a75bfb 100644 --- a/cvm/models/ops/blocks/__init__.py +++ b/cvm/models/ops/blocks/__init__.py @@ -1,11 +1,9 @@ -from .norm_act import normalizer, activation, normalizer_fn, activation_fn, norm_activation +from .factory import normalizer, activation, normalizer_fn, activation_fn, norm_activation, attention, attention_fn from .stage import Stage from .affine import Affine from .vanilla_conv2d import Conv2d1x1, Conv2d3x3, Conv2d1x1BN, Conv2d3x3BN, Conv2d1x1Block, Conv2dBlock from .bottleneck import ResBasicBlockV1, BottleneckV1, ResBasicBlockV2, BottleneckV2 -from .channel_combine import Combine -from .channel_split import ChannelChunk, ChannelSplit -from .channel_shuffle import ChannelShuffle +from .channel import Combine, ChannelChunk, ChannelSplit, ChannelShuffle from .depthwise_separable_conv2d import DepthwiseConv2d, PointwiseConv2d, DepthwiseConv2dBN, PointwiseConv2dBN, DepthwiseBlock, PointwiseBlock from .inverted_residual_block import InvertedResidualBlock, FusedInvertedResidualBlock from .squeeze_excite import se, SEBlock diff --git a/cvm/models/ops/blocks/aspp.py b/cvm/models/ops/blocks/aspp.py index e2b6f87..6e57bcb 100644 --- a/cvm/models/ops/blocks/aspp.py +++ b/cvm/models/ops/blocks/aspp.py @@ -2,7 +2,7 @@ import torch.nn as nn import torch.nn.functional as F from .vanilla_conv2d import Conv2d1x1, Conv2d1x1Block, Conv2dBlock -from .channel_combine import Combine +from .channel import Combine from typing import List diff --git a/cvm/models/ops/blocks/bottleneck.py b/cvm/models/ops/blocks/bottleneck.py index 90128c6..83355e8 100644 --- a/cvm/models/ops/blocks/bottleneck.py +++ b/cvm/models/ops/blocks/bottleneck.py @@ -1,11 +1,11 @@ import torch import torch.nn as nn -from . import norm_act +from . import factory from .squeeze_excite import SEBlock from .drop import DropPath from .vanilla_conv2d import Conv2d3x3, Conv2d1x1 -from .channel_combine import Combine +from .channel import Combine from typing import OrderedDict @@ -21,7 +21,7 @@ def __init__( dilation: int = 1, groups: int = 1, width_per_group: int = 64, - se_ratio: float = None, + rd_ratio: float = None, drop_path_rate: float = None, use_resnetd_shortcut: bool = False, normalizer_fn: nn.Module = None, @@ -29,10 +29,10 @@ def __init__( ): super().__init__() - normalizer_fn = normalizer_fn or norm_act._NORMALIZER - activation_fn = activation_fn or norm_act._ACTIVATION + normalizer_fn = normalizer_fn or factory._NORMALIZER + activation_fn = activation_fn or factory._ACTIVATION - self.has_se = se_ratio is not None and se_ratio > 0 and se_ratio <= 1 + self.has_attn = rd_ratio is not None and rd_ratio > 0 and rd_ratio <= 1 self.use_shortcut = stride != 1 or inp != oup * self.expansion if width_per_group != 64: @@ -46,8 +46,8 @@ def __init__( ('norm2', normalizer_fn(oup)) ])) - if self.has_se: - self.branch1.add_module('se', SEBlock(oup, se_ratio)) + if self.has_attn: + self.branch1.add_module('se', SEBlock(oup, rd_ratio=rd_ratio)) if drop_path_rate: self.branch1.add_module('drop', DropPath(1. - drop_path_rate)) @@ -90,7 +90,7 @@ def __init__( dilation: int = 1, groups: int = 1, width_per_group: int = 64, - se_ratio: float = None, + rd_ratio: float = None, drop_path_rate: float = None, use_resnetd_shortcut: bool = False, normalizer_fn: nn.Module = None, @@ -98,12 +98,12 @@ def __init__( ): super().__init__() - normalizer_fn = normalizer_fn or norm_act._NORMALIZER - activation_fn = activation_fn or norm_act._ACTIVATION + normalizer_fn = normalizer_fn or factory._NORMALIZER + activation_fn = activation_fn or factory._ACTIVATION width = int(oup * (width_per_group / 64)) * groups - self.has_se = se_ratio is not None and se_ratio > 0 and se_ratio <= 1 + self.has_attn = rd_ratio is not None and rd_ratio > 0 and rd_ratio <= 1 self.use_shortcut = stride != 1 or inp != oup * self.expansion self.branch1 = nn.Sequential(OrderedDict([ @@ -117,8 +117,8 @@ def __init__( ('norm3', normalizer_fn(oup * self.expansion,)) ])) - if self.has_se: - self.branch1.add_module('se', SEBlock(oup * self.expansion, se_ratio)) + if self.has_attn: + self.branch1.add_module('se', SEBlock(oup * self.expansion, rd_ratio=rd_ratio)) if drop_path_rate: self.branch1.add_module('drop', DropPath(1. - drop_path_rate)) @@ -161,7 +161,7 @@ def __init__( dilation: int = 1, groups: int = 1, width_per_group: int = 64, - se_ratio: float = None, + rd_ratio: float = None, drop_path_rate: float = None, use_resnetd_shortcut: bool = False, normalizer_fn: nn.Module = None, @@ -169,10 +169,10 @@ def __init__( ): super().__init__() - normalizer_fn = normalizer_fn or norm_act._NORMALIZER - activation_fn = activation_fn or norm_act._ACTIVATION + normalizer_fn = normalizer_fn or factory._NORMALIZER + activation_fn = activation_fn or factory._ACTIVATION - self.has_se = se_ratio is not None and se_ratio > 0 and se_ratio <= 1 + self.has_attn = rd_ratio is not None and rd_ratio > 0 and rd_ratio <= 1 self.use_shortcut = stride != 1 or inp != oup if width_per_group != 64: @@ -187,8 +187,8 @@ def __init__( ('conv2', Conv2d3x3(oup, oup)) ])) - if self.has_se: - self.branch1.add_module('se', SEBlock(oup, se_ratio)) + if self.has_attn: + self.branch1.add_module('se', SEBlock(oup, rd_ratio=rd_ratio)) if drop_path_rate: self.branch1.add_module('drop', DropPath(1. - drop_path_rate)) @@ -226,7 +226,7 @@ def __init__( dilation: int = 1, groups: int = 1, width_per_group: int = 64, - se_ratio: float = None, + rd_ratio: float = None, drop_path_rate: float = None, use_resnetd_shortcut: bool = False, normalizer_fn: nn.Module = None, @@ -234,12 +234,12 @@ def __init__( ): super().__init__() - normalizer_fn = normalizer_fn or norm_act._NORMALIZER - activation_fn = activation_fn or norm_act._ACTIVATION + normalizer_fn = normalizer_fn or factory._NORMALIZER + activation_fn = activation_fn or factory._ACTIVATION width = int(oup * (width_per_group / 64)) * groups - self.has_se = se_ratio is not None and se_ratio > 0 and se_ratio <= 1 + self.has_attn = rd_ratio is not None and rd_ratio > 0 and rd_ratio <= 1 self.use_shortcut = stride != 1 or inp != oup * self.expansion self.branch1 = nn.Sequential(OrderedDict([ @@ -254,9 +254,9 @@ def __init__( ('conv3', Conv2d1x1(width, oup * self.expansion)) ])) - if self.has_se: + if self.has_attn: self.branch1.add_module('se', SEBlock( - oup * self.expansion, se_ratio)) + oup * self.expansion, rd_ratio=rd_ratio)) if drop_path_rate: self.branch1.add_module('drop', DropPath(1. - drop_path_rate)) diff --git a/cvm/models/ops/blocks/cbam.py b/cvm/models/ops/blocks/cbam.py index d649055..9b47ad6 100644 --- a/cvm/models/ops/blocks/cbam.py +++ b/cvm/models/ops/blocks/cbam.py @@ -1,19 +1,21 @@ import torch from torch import nn from .vanilla_conv2d import Conv2d1x1 -from .norm_act import normalizer_fn, activation_fn +from .factory import normalizer_fn, activation_fn +from ..functional import make_divisible class ChannelAttention(nn.Module): def __init__( self, in_channels, - rd_ratio, + rd_ratio: float = 1/8, + rd_divisor: int = 8, gate_fn: nn.Module = nn.Sigmoid ) -> None: super().__init__() - rd_channels = int(in_channels * rd_ratio) + rd_channels = make_divisible(in_channels * rd_ratio, rd_divisor) self.max_pool = nn.AdaptiveMaxPool2d((1, 1)) self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) diff --git a/cvm/models/ops/blocks/channel.py b/cvm/models/ops/blocks/channel.py new file mode 100644 index 0000000..579fe83 --- /dev/null +++ b/cvm/models/ops/blocks/channel.py @@ -0,0 +1,65 @@ +import torch +from torch import nn +from ..functional import channel_shuffle + + +class ChannelChunk(nn.Module): + def __init__(self, groups: int): + super().__init__() + + self.groups = groups + + def forward(self, x: torch.Tensor): + return torch.chunk(x, self.groups, dim=1) + + def extra_repr(self): + return f'groups={self.groups}' + + +class ChannelSplit(nn.Module): + def __init__(self, sections): + super().__init__() + + self.sections = sections + + def forward(self, x: torch.Tensor): + return torch.split(x, self.sections, dim=1) + + def extra_repr(self): + return f'sections={self.sections}' + + +class ChannelShuffle(nn.Module): + def __init__(self, groups: int): + super().__init__() + + self.groups = groups + + def forward(self, x): + return channel_shuffle(x, self.groups) + + def extra_repr(self): + return 'groups={}'.format(self.groups) + + +class Combine(nn.Module): + def __init__(self, method: str = 'ADD', *args, **kwargs): + super().__init__() + assert method in ['ADD', 'CONCAT'], '' + + self.method = method + self._combine = self._add if self.method == 'ADD' else self._cat + + @staticmethod + def _add(x): + return x[0] + x[1] + + @staticmethod + def _cat(x): + return torch.cat(x, dim=1) + + def forward(self, x): + return self._combine(x) + + def extra_repr(self): + return f'method=\'{self.method}\'' diff --git a/cvm/models/ops/blocks/channel_combine.py b/cvm/models/ops/blocks/channel_combine.py deleted file mode 100644 index fb05bee..0000000 --- a/cvm/models/ops/blocks/channel_combine.py +++ /dev/null @@ -1,25 +0,0 @@ -import torch -from torch import nn - - -class Combine(nn.Module): - def __init__(self, method: str = 'ADD', *args, **kwargs): - super().__init__() - assert method in ['ADD', 'CONCAT'], '' - - self.method = method - self._combine = self._add if self.method == 'ADD' else self._cat - - @staticmethod - def _add(x): - return x[0] + x[1] - - @staticmethod - def _cat(x): - return torch.cat(x, dim=1) - - def forward(self, x): - return self._combine(x) - - def extra_repr(self): - return f'method=\'{self.method}\'' diff --git a/cvm/models/ops/blocks/channel_shuffle.py b/cvm/models/ops/blocks/channel_shuffle.py deleted file mode 100644 index e4d3d0b..0000000 --- a/cvm/models/ops/blocks/channel_shuffle.py +++ /dev/null @@ -1,15 +0,0 @@ -from torch import nn -from ..functional import channel_shuffle - - -class ChannelShuffle(nn.Module): - def __init__(self, groups: int): - super().__init__() - - self.groups = groups - - def forward(self, x): - return channel_shuffle(x, self.groups) - - def extra_repr(self): - return 'groups={}'.format(self.groups) diff --git a/cvm/models/ops/blocks/channel_split.py b/cvm/models/ops/blocks/channel_split.py deleted file mode 100644 index 298000d..0000000 --- a/cvm/models/ops/blocks/channel_split.py +++ /dev/null @@ -1,28 +0,0 @@ -import torch -from torch import nn - - -class ChannelChunk(nn.Module): - def __init__(self, groups: int): - super().__init__() - - self.groups = groups - - def forward(self, x: torch.Tensor): - return torch.chunk(x, self.groups, dim=1) - - def extra_repr(self): - return f'groups={self.groups}' - - -class ChannelSplit(nn.Module): - def __init__(self, sections): - super().__init__() - - self.sections = sections - - def forward(self, x: torch.Tensor): - return torch.split(x, self.sections, dim=1) - - def extra_repr(self): - return f'sections={self.sections}' diff --git a/cvm/models/ops/blocks/depthwise_separable_conv2d.py b/cvm/models/ops/blocks/depthwise_separable_conv2d.py index 140be82..e924f7b 100644 --- a/cvm/models/ops/blocks/depthwise_separable_conv2d.py +++ b/cvm/models/ops/blocks/depthwise_separable_conv2d.py @@ -1,5 +1,5 @@ from torch import nn -from . import norm_act +from . import factory class DepthwiseConv2d(nn.Conv2d): @@ -45,7 +45,7 @@ def __init__( dilation: int = 1, normalizer_fn: nn.Module = None ): - normalizer_fn = normalizer_fn or norm_act._NORMALIZER + normalizer_fn = normalizer_fn or factory._NORMALIZER super().__init__( DepthwiseConv2d(inp, oup, kernel_size, stride=stride, padding=padding, dilation=dilation) @@ -63,7 +63,7 @@ def __init__( stride: int = 1, normalizer_fn: nn.Module = None ): - normalizer_fn = normalizer_fn or norm_act._NORMALIZER + normalizer_fn = normalizer_fn or factory._NORMALIZER super().__init__( PointwiseConv2d(inp, oup, stride=stride) @@ -88,7 +88,7 @@ def __init__( ): super().__init__( DepthwiseConv2d(inp, oup, kernel_size, stride, padding=padding, dilation=dilation), - *norm_act.norm_activation(oup, normalizer_fn, activation_fn, norm_position) + *factory.norm_activation(oup, normalizer_fn, activation_fn, norm_position) ) @@ -105,5 +105,5 @@ def __init__( ): super().__init__( PointwiseConv2d(inp, oup, stride=stride, groups=groups), - *norm_act.norm_activation(oup, normalizer_fn, activation_fn, norm_position) + *factory.norm_activation(oup, normalizer_fn, activation_fn, norm_position) ) diff --git a/cvm/models/ops/blocks/norm_act.py b/cvm/models/ops/blocks/factory.py similarity index 85% rename from cvm/models/ops/blocks/norm_act.py rename to cvm/models/ops/blocks/factory.py index 3268b8f..a06cc90 100644 --- a/cvm/models/ops/blocks/norm_act.py +++ b/cvm/models/ops/blocks/factory.py @@ -2,10 +2,12 @@ from contextlib import contextmanager from functools import partial import torch.nn as nn +from .squeeze_excite import SEBlock _NORM_POSIITON: str = 'before' _NORMALIZER: nn.Module = nn.BatchNorm2d _ACTIVATION: nn.Module = partial(nn.ReLU, inplace=True) +_ATTENTION: nn.Module = SEBlock class Nil: @@ -46,6 +48,16 @@ def activation(fn: nn.Module): _ACTIVATION = _pre_activation +@contextmanager +def attention(fn: nn.Module): + global _ATTENTION + + _pre_attn = _ATTENTION + _ATTENTION = fn + yield + _ATTENTION = _pre_attn + + def normalizer_fn(channels): return _NORMALIZER(channels) @@ -54,6 +66,10 @@ def activation_fn(): return _ACTIVATION() +def attention_fn(channels, **kwargs): + return _ATTENTION(channels, **kwargs) + + def norm_activation( channels, normalizer_fn: nn.Module = None, diff --git a/cvm/models/ops/blocks/gaussian_blur.py b/cvm/models/ops/blocks/gaussian_blur.py index 35eb742..2d9e4bf 100644 --- a/cvm/models/ops/blocks/gaussian_blur.py +++ b/cvm/models/ops/blocks/gaussian_blur.py @@ -1,7 +1,7 @@ import torch from torch import nn import torch.nn.functional as F -from . import norm_act +from . import factory from ..functional import get_gaussian_kernels2d from typing import Tuple @@ -70,7 +70,7 @@ def __init__( dilation: int = 1, normalizer_fn: nn.Module = None ): - normalizer_fn = normalizer_fn or norm_act._NORMALIZER + normalizer_fn = normalizer_fn or factory._NORMALIZER super().__init__( GaussianBlur(channels, kernel_size, sigma_range, normalize, @@ -96,5 +96,5 @@ def __init__( super().__init__( GaussianBlur(channels, kernel_size, sigma_range, normalize, stride=stride, padding=padding, dilation=dilation), - *norm_act.norm_activation(channels, normalizer_fn, activation_fn, norm_position) + *factory.norm_activation(channels, normalizer_fn, activation_fn, norm_position) ) diff --git a/cvm/models/ops/blocks/global_context.py b/cvm/models/ops/blocks/global_context.py index cb4842e..7a50ea8 100644 --- a/cvm/models/ops/blocks/global_context.py +++ b/cvm/models/ops/blocks/global_context.py @@ -2,6 +2,7 @@ from torch import nn from .vanilla_conv2d import Conv2d1x1 from .norm import LayerNorm2d +from ..functional import make_divisible class GlobalContextBlock(nn.Module): @@ -12,11 +13,12 @@ class GlobalContextBlock(nn.Module): def __init__( self, in_channels, - rd_ratio + rd_ratio: float = 1/8, + rd_divisor: int = 8, ) -> None: super().__init__() - channels = int(in_channels * rd_ratio) + channels = make_divisible(in_channels * rd_ratio, rd_divisor) self.conv1x1 = Conv2d1x1(in_channels, 1, bias=True) self.softmax = nn.Softmax(dim=1) diff --git a/cvm/models/ops/blocks/inverted_residual_block.py b/cvm/models/ops/blocks/inverted_residual_block.py index 2738792..84edc5e 100644 --- a/cvm/models/ops/blocks/inverted_residual_block.py +++ b/cvm/models/ops/blocks/inverted_residual_block.py @@ -1,9 +1,9 @@ import torch.nn as nn -from . import norm_act +from . import factory from .vanilla_conv2d import Conv2d1x1Block, Conv2d1x1BN, Conv2dBlock from .depthwise_separable_conv2d import DepthwiseBlock, DepthwiseConv2dBN from .squeeze_excite import SEBlock -from .channel_combine import Combine +from .channel import Combine from .drop import DropPath @@ -17,7 +17,7 @@ def __init__( stride: int = 1, padding: int = None, dilation: int = 1, - se_ratio: float = None, + rd_ratio: float = None, se_ind: bool = False, survival_prob: float = None, normalizer_fn: nn.Module = None, @@ -31,11 +31,11 @@ def __init__( self.oup = oup self.stride = stride self.apply_residual = (self.stride == 1) and (self.inp == self.oup) - self.se_ratio = se_ratio if se_ind or se_ratio is None else (se_ratio / t) - self.has_se = (self.se_ratio is not None) and (self.se_ratio > 0) and (self.se_ratio <= 1) + self.rd_ratio = rd_ratio if se_ind or rd_ratio is None else (rd_ratio / t) + self.has_attn = (self.rd_ratio is not None) and (self.rd_ratio > 0) and (self.rd_ratio <= 1) - normalizer_fn = normalizer_fn or norm_act._NORMALIZER - activation_fn = activation_fn or norm_act._ACTIVATION + normalizer_fn = normalizer_fn or factory._NORMALIZER + activation_fn = activation_fn or factory._ACTIVATION layers = [] if t != 1: @@ -48,8 +48,8 @@ def __init__( layers.append(DepthwiseConv2dBN(self.planes, self.planes, kernel_size, stride=self.stride, padding=padding, dilation=dilation, normalizer_fn=normalizer_fn)) - if self.has_se: - layers.append(SEBlock(self.planes, self.se_ratio)) + if self.has_attn: + layers.append(SEBlock(self.planes, rd_ratio=self.rd_ratio)) if dw_se_act: layers.append(dw_se_act()) @@ -79,7 +79,7 @@ def __init__( kernel_size: int = 3, stride: int = 1, padding: int = None, - se_ratio: float = None, + rd_ratio: float = None, se_ind: bool = False, survival_prob: float = None, normalizer_fn: nn.Module = None, @@ -93,19 +93,19 @@ def __init__( self.stride = stride self.padding = padding if padding is not None else (kernel_size // 2) self.apply_residual = (self.stride == 1) and (self.inp == self.oup) - self.se_ratio = se_ratio if se_ind or se_ratio is None else (se_ratio / t) - self.has_se = (self.se_ratio is not None) and (self.se_ratio > 0) and (self.se_ratio <= 1) + self.rd_ratio = rd_ratio if se_ind or rd_ratio is None else (rd_ratio / t) + self.has_attn = (self.rd_ratio is not None) and (self.rd_ratio > 0) and (self.rd_ratio <= 1) - normalizer_fn = normalizer_fn or norm_act._NORMALIZER - activation_fn = activation_fn or norm_act._ACTIVATION + normalizer_fn = normalizer_fn or factory._NORMALIZER + activation_fn = activation_fn or factory._ACTIVATION layers = [ Conv2dBlock(inp, self.planes, kernel_size, stride=self.stride, padding=self.padding, normalizer_fn=normalizer_fn, activation_fn=activation_fn) ] - if self.has_se: - layers.append(SEBlock(self.planes, self.se_ratio)) + if self.has_attn: + layers.append(SEBlock(self.planes, rd_ratio=self.rd_ratio)) layers.append(Conv2d1x1BN( self.planes, oup, normalizer_fn=normalizer_fn)) diff --git a/cvm/models/ops/blocks/non_local.py b/cvm/models/ops/blocks/non_local.py index 4f0002a..ea7f576 100644 --- a/cvm/models/ops/blocks/non_local.py +++ b/cvm/models/ops/blocks/non_local.py @@ -1,6 +1,7 @@ import torch from torch import nn from .vanilla_conv2d import Conv2d1x1 +from ..functional import make_divisible class NonLocalBlock(nn.Module): @@ -12,15 +13,16 @@ class NonLocalBlock(nn.Module): def __init__( self, in_channels, - ratio, + rd_ratio, + rd_divisor: int = 8, use_scale: bool = True, use_norm: bool = True ): super().__init__() - channels = int(in_channels * ratio) + channels = make_divisible(in_channels * rd_ratio, rd_divisor) - self.ratio = ratio + self.ratio = rd_ratio self.scale = channels ** -0.5 if use_scale else 1.0 self.use_scale = use_scale diff --git a/cvm/models/ops/blocks/selective_kernel.py b/cvm/models/ops/blocks/selective_kernel.py index 371d23f..fee6bb6 100644 --- a/cvm/models/ops/blocks/selective_kernel.py +++ b/cvm/models/ops/blocks/selective_kernel.py @@ -2,7 +2,8 @@ from torch import nn from .vanilla_conv2d import Conv2d1x1, Conv2d1x1Block from .depthwise_separable_conv2d import DepthwiseBlock -from .channel_combine import Combine +from .channel import Combine +from ..functional import make_divisible class SelectiveKernelBlock(nn.Module): @@ -13,13 +14,14 @@ class SelectiveKernelBlock(nn.Module): def __init__( self, in_channels, - rd_ratio + rd_ratio: float = 1/8, + rd_divisor: int = 8, ) -> None: super().__init__() self.in_channels = in_channels - rd_channels = max(int(in_channels * rd_ratio), 32) + rd_channels = max(make_divisible(in_channels * rd_ratio, rd_divisor), 32) self.conv3x3 = DepthwiseBlock(in_channels, in_channels, kernel_size=3, dilation=1) self.conv5x5 = DepthwiseBlock(in_channels, in_channels, kernel_size=3, dilation=2) diff --git a/cvm/models/ops/blocks/spatial_channel.py b/cvm/models/ops/blocks/spatial_channel.py new file mode 100644 index 0000000..1479b68 --- /dev/null +++ b/cvm/models/ops/blocks/spatial_channel.py @@ -0,0 +1,61 @@ + +import torch +import torch.nn as nn +from .depthwise_separable_conv2d import PointwiseConv2dBN + + +class SCAttention(nn.Module): + def __init__( + self, + in_channels, + rd_ratio + ) -> None: + super().__init__() + + planes = int(in_channels * rd_ratio) + + self.reduce = nn.Sequential( + nn.AvgPool2d((3, 3), (2, 2)), + # blocks.DepthwiseBlock(in_channels, in_channels, 3, stride=2), + PointwiseConv2dBN(in_channels, planes, bias=True) + ) + + self.down1x1 = nn.Sequential( + nn.AdaptiveAvgPool2d((1, 1)), + PointwiseConv2dBN(planes, planes, bias=True) + ) + + self.expand = PointwiseConv2dBN(planes, in_channels, bias=True) + + self.reset_parameters() + + def reset_parameters(self): + for name, m in self.named_modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if len(list(m.parameters())) > 1: + nn.init.constant_(m.bias, 0.0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 0.0) + nn.init.constant_(m.bias, 0.0) + elif isinstance(m, nn.GroupNorm): + nn.init.constant_(m.weight, 0.0) + nn.init.constant_(m.bias, 0.0) + + def forward(self, x): + N, C, W, H = x.shape + + reduce = self.reduce(x) + + down_1x1 = self.down1x1(reduce) + + spatial = torch.nn.functional.interpolate( + torch.sum(reduce * torch.softmax(down_1x1, dim=1), dim=1, keepdims=True), + x.shape[-2:] + ).view(N, 1, -1) + + channel = self.expand(down_1x1).squeeze(-1) + + score = torch.sigmoid(torch.einsum('nco, nox -> ncx', channel, spatial).view(N, C, W, H)) + + return x * score diff --git a/cvm/models/ops/blocks/squeeze_excite.py b/cvm/models/ops/blocks/squeeze_excite.py index 5e38da1..5bc8d9e 100644 --- a/cvm/models/ops/blocks/squeeze_excite.py +++ b/cvm/models/ops/blocks/squeeze_excite.py @@ -1,7 +1,7 @@ from functools import partial from contextlib import contextmanager from torch import nn -from . import norm_act +from . import factory from .vanilla_conv2d import Conv2d1x1 from ..functional import make_divisible from typing import OrderedDict @@ -46,11 +46,11 @@ class SEBlock(nn.Sequential): def __init__( self, channels, - ratio, + rd_ratio, inner_activation_fn: nn.Module = None, gating_fn: nn.Module = None ): - squeezed_channels = make_divisible(int(channels * ratio), _SE_DIVISOR) + squeezed_channels = make_divisible(int(channels * rd_ratio), _SE_DIVISOR) inner_activation_fn = inner_activation_fn or _SE_INNER_NONLINEAR gating_fn = gating_fn or _SE_GATING_FN @@ -59,7 +59,7 @@ def __init__( layers['pool'] = nn.AdaptiveAvgPool2d((1, 1)) layers['reduce'] = Conv2d1x1(channels, squeezed_channels, bias=True) if _SE_USE_NORM: - layers['norm'] = norm_act.normalizer_fn(squeezed_channels) + layers['norm'] = factory.normalizer_fn(squeezed_channels) layers['act'] = inner_activation_fn() layers['expand'] = Conv2d1x1(squeezed_channels, channels, bias=True) layers['gate'] = gating_fn() diff --git a/cvm/models/ops/blocks/vanilla_conv2d.py b/cvm/models/ops/blocks/vanilla_conv2d.py index bbc4133..9b8a048 100644 --- a/cvm/models/ops/blocks/vanilla_conv2d.py +++ b/cvm/models/ops/blocks/vanilla_conv2d.py @@ -1,5 +1,5 @@ from torch import nn -from . import norm_act +from . import factory class Conv2d3x3(nn.Conv2d): @@ -48,7 +48,7 @@ def __init__( groups: int = 1, normalizer_fn: nn.Module = None ): - normalizer_fn = normalizer_fn or norm_act._NORMALIZER + normalizer_fn = normalizer_fn or factory._NORMALIZER padding = padding if padding is not None else dilation super().__init__( @@ -70,7 +70,7 @@ def __init__( groups: int = 1, normalizer_fn: nn.Module = None ): - normalizer_fn = normalizer_fn or norm_act._NORMALIZER + normalizer_fn = normalizer_fn or factory._NORMALIZER super().__init__( Conv2d1x1(in_channels, out_channels, stride=stride, @@ -96,7 +96,7 @@ def __init__( super().__init__( Conv2d1x1(in_channels, out_channels, stride=stride, padding=padding, bias=bias, groups=groups), - *norm_act.norm_activation(out_channels, normalizer_fn, activation_fn, norm_position) + *factory.norm_activation(out_channels, normalizer_fn, activation_fn, norm_position) ) @@ -121,5 +121,5 @@ def __init__( super().__init__( nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias, stride=stride, padding=padding, dilation=dilation, groups=groups), - *norm_act.norm_activation(out_channels, normalizer_fn, activation_fn, norm_position) + *factory.norm_activation(out_channels, normalizer_fn, activation_fn, norm_position) ) diff --git a/cvm/models/regnet.py b/cvm/models/regnet.py index fb233f4..2db1804 100644 --- a/cvm/models/regnet.py +++ b/cvm/models/regnet.py @@ -33,7 +33,7 @@ def __init__( group_width, bottleneck_multiplier, dilation, - se_ratio + rd_ratio ): super().__init__() @@ -42,8 +42,8 @@ def __init__( self.add_module('1x1-1', blocks.Conv2d1x1Block(inp, wb)) self.add_module('3x3', blocks.Conv2dBlock(wb, wb, stride=stride, groups=(wb // group_width), dilation=dilation)) - if se_ratio: - self.add_module('se', blocks.SEBlock(wb, (inp * se_ratio) / wb)) # se <-> inp + if rd_ratio: + self.add_module('se', blocks.SEBlock(wb, rd_ratio=(inp * rd_ratio) / wb)) # se <-> inp self.add_module('1x1-2', blocks.Conv2d1x1BN(wb, oup)) @@ -59,7 +59,7 @@ def __init__( group_width: int = 1, bottleneck_multiplier: float = 1.0, dilation: int = 1, - se_ratio: float = None, + rd_ratio: float = None, ) -> None: super().__init__() @@ -76,7 +76,7 @@ def __init__( group_width, bottleneck_multiplier, dilation, - se_ratio, + rd_ratio, ) self.act = blocks.activation_fn() @@ -99,7 +99,7 @@ def __init__( group_widths, bottleneck_multiplier, dilation: int, - se_ratio: float, + rd_ratio: float, stage_index: int ): super().__init__() @@ -114,7 +114,7 @@ def __init__( group_widths, bottleneck_multiplier, max(dilation // (stride if i == 0 else 1), 1), - se_ratio + rd_ratio ) ) @@ -132,7 +132,7 @@ def __init__( wm: float = None, b: float = None, g: int = None, - se_ratio: float = None, + rd_ratio: float = None, dropout_rate: float = 0.0, dilations: List[int] = [1, 1, 1, 1], thumbnail: bool = False, @@ -181,7 +181,7 @@ def __init__( group_widths[i], bottleneck_multipliers[i], dilations[i], - se_ratio, + rd_ratio, i + 1 ) ) @@ -207,13 +207,13 @@ def _regnet( wm: float, b: float = 1.0, g: int = None, - se_ratio: float = None, + rd_ratio: float = None, pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any ): - model = RegNet(d=d, w0=w0, wa=wa, wm=wm, b=b, g=g, se_ratio=se_ratio, **kwargs) + model = RegNet(d=d, w0=w0, wa=wa, wm=wm, b=b, g=g, rd_ratio=rd_ratio, **kwargs) if pretrained: load_from_local_or_url(model, pth, kwargs.get('url', None), progress) diff --git a/cvm/models/resnet.py b/cvm/models/resnet.py index 888341b..1afab54 100644 --- a/cvm/models/resnet.py +++ b/cvm/models/resnet.py @@ -26,7 +26,7 @@ def __init__( layers: List[int] = [2, 2, 2, 2], groups: int = 1, width_per_group: int = 64, - se_ratio: float = None, + rd_ratio: float = None, dropout_rate: float = 0.0, drop_path_rate: float = None, block: nn.Module = blocks.ResBasicBlockV1, @@ -50,7 +50,7 @@ def __init__( self.groups = groups self.width_per_group = width_per_group self.block = block - self.ratio = se_ratio + self.ratio = rd_ratio self.drop_path_rate = drop_path_rate self.use_resnetd_shortcut = use_resnetd_shortcut self.version = 1 @@ -144,7 +144,7 @@ def make_layers(self, inp, oup, stride, n, block_num, dilation): stride=stride if dilation == 1 else 1, groups=self.groups, width_per_group=self.width_per_group, - se_ratio=self.ratio, + rd_ratio=self.ratio, drop_path_rate=self.get_drop_path_rate(block_num), use_resnetd_shortcut=self.use_resnetd_shortcut, dilation=max(1, (dilation//stride)) @@ -158,13 +158,13 @@ def make_layers(self, inp, oup, stride, n, block_num, dilation): def _resnet( layers: List[int], block: nn.Module, - se_ratio: float = None, + rd_ratio: float = None, pretrained: bool = False, pth: str = None, progress: bool = False, **kwargs: Any ): - model = ResNet(layers=layers, block=block, se_ratio=se_ratio, **kwargs) + model = ResNet(layers=layers, block=block, rd_ratio=rd_ratio, **kwargs) if pretrained: load_from_local_or_url(model, pth, kwargs.get('url', None), progress) diff --git a/cvm/models/rexnet.py b/cvm/models/rexnet.py index 2c44e28..f11d9f1 100644 --- a/cvm/models/rexnet.py +++ b/cvm/models/rexnet.py @@ -15,12 +15,12 @@ def __init__( t, kernel_size: int = 3, stride: int = 1, padding: int = 1, - se_ratio: float = None, + rd_ratio: float = None, se_ind: bool = True, dw_se_act: nn.Module = nn.ReLU6 ): super().__init__(inp, oup, t, kernel_size=kernel_size, stride=stride, - padding=padding, se_ratio=se_ratio, se_ind=se_ind, dw_se_act=dw_se_act) + padding=padding, rd_ratio=rd_ratio, se_ind=se_ind, dw_se_act=dw_se_act) self.apply_residual = (stride == 1) and (inp <= oup) self.branch2 = nn.Identity() if self.apply_residual else None @@ -53,7 +53,7 @@ def __init__( n = [2, 2, 3, 3, 5] # repeats s = [FRONT_S, 2, 2, 1, 2] - se = [0, 1/12, 1/12, 1/12, 1/12] + ratios = [0, 1/12, 1/12, 1/12, 1/12] self.depth = (sum(n[:]) + 1) * 3 increase = 180 / (self.depth // 3 * 1.0) @@ -67,10 +67,11 @@ def multiplier(x): return int(round(x * width_multiplier)) inplanes, planes = 16, 16 + increase for i, layers in enumerate(n): - features.append(InvertedResidualBlock(multiplier(inplanes), multiplier(planes), 6, stride=s[i], se_ratio=se[i])) + features.append(InvertedResidualBlock(multiplier(inplanes), + multiplier(planes), 6, stride=s[i], rd_ratio=ratios[i])) inplanes, planes = planes, planes + increase for _ in range(layers - 1): - features.append(InvertedResidualBlock(multiplier(inplanes), multiplier(planes), 6, se_ratio=se[i])) + features.append(InvertedResidualBlock(multiplier(inplanes), multiplier(planes), 6, rd_ratio=ratios[i])) inplanes, planes = planes, planes + increase features.append(blocks.Conv2d1x1Block(multiplier(inplanes), multiplier(1280))) @@ -191,7 +192,7 @@ def forward(self, x): @export def rexnet_plain(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): model = ReXNetPlain(**kwargs) - + if pretrained: load_from_local_or_url(model, pth, kwargs.get('url', None), progress) return model diff --git a/cvm/models/vae/vae.py b/cvm/models/vae/vae.py index bf97d3d..a24ac5d 100644 --- a/cvm/models/vae/vae.py +++ b/cvm/models/vae/vae.py @@ -18,6 +18,7 @@ def __init__( self.image_size = image_size self.nz = nz + # Q(z|X) self.encoder = nn.Sequential( nn.Flatten(1), nn.Linear(self.image_size ** 2, 512), @@ -25,10 +26,11 @@ def __init__( nn.Linear(512, 512), nn.LeakyReLU(0.2, inplace=True), nn.Linear(512, 256), - nn.LeakyReLU(0.2, inplace=True), + nn.LeakyReLU(0.2, inplace=True), nn.Linear(256, self.nz * 2) ) + # P(X|z) self.decoder = nn.Sequential( nn.Linear(self.nz, 256), nn.LeakyReLU(0.2, inplace=True), @@ -41,13 +43,15 @@ def __init__( nn.Unflatten(1, (1, image_size, image_size)) ) + def sample_z(self, mu, logvar): + eps = torch.randn_like(logvar) + + return mu + eps * torch.exp(0.5 * logvar) + def forward(self, x): mu, logvar = torch.chunk(self.encoder(x), 2, dim=1) - std = torch.exp(0.5 * logvar) - eps = torch.randn_like(logvar) - - z = mu + eps * std + z = self.sample_z(mu, logvar) x = self.decoder(z) return x, mu, logvar diff --git a/cvm/models/vgnet.py b/cvm/models/vgnet.py index 009f3ad..98792d8 100644 --- a/cvm/models/vgnet.py +++ b/cvm/models/vgnet.py @@ -39,7 +39,7 @@ class HalfIdentityBlock(nn.Module): def __init__( self, inp: int, - se_ratio: float = 0.0 + rd_ratio: float = 0.0 ): super().__init__() @@ -47,10 +47,10 @@ def __init__( self.combine = blocks.Combine('CONCAT') self.conv1x1 = blocks.PointwiseBlock(inp, inp // 2) - if se_ratio > 0.0: + if rd_ratio > 0.0: self.conv1x1 = nn.Sequential( blocks.PointwiseBlock(inp, inp // 2), - blocks.SEBlock(inp // 2, se_ratio) + blocks.attention_fn(inp // 2, rd_ratio=rd_ratio) ) def forward(self, x): @@ -65,7 +65,7 @@ def __init__( oup, stride: int = 2, method: str = 'blur', - se_ratio: float = 0.0 + rd_ratio: float = 0.0 ): assert method in ['blur', 'dwconv', 'maxpool'], f'{method}' @@ -85,10 +85,10 @@ def __init__( self.split = None if inp == split_chs else blocks.ChannelSplit([inp - split_chs, split_chs]) self.conv1x1 = blocks.PointwiseBlock(inp, oup - split_chs) - if se_ratio > 0.0: + if rd_ratio > 0.0: self.conv1x1 = nn.Sequential( blocks.PointwiseBlock(inp, oup - split_chs), - blocks.SEBlock(oup - split_chs, se_ratio) + blocks.attention_fn(oup - split_chs, rd_ratio=rd_ratio) ) self.halve = nn.Identity() @@ -117,7 +117,7 @@ def __init__( channels: List[int] = None, downsamplings: List[str] = None, layers: List[int] = None, - se_ratio: float = 0.0, + rd_ratio: List[float] = [0.0, 0.0, 0.0, 0.0], dropout_rate: float = 0.2, thumbnail: bool = False, **kwargs: Any @@ -142,7 +142,7 @@ def __init__( strides[i], downsamplings[i], layers[i], - se_ratio + rd_ratio[i] ) ) @@ -158,12 +158,12 @@ def __init__( nn.Linear(channels[-1], num_classes) ) - def make_layers(self, inp, oup, s, m, n, se_ratio): + def make_layers(self, inp, oup, s, m, n, rd_ratio): layers = [ - DownsamplingBlock(inp, oup, stride=s, method=m, se_ratio=se_ratio) + DownsamplingBlock(inp, oup, stride=s, method=m, rd_ratio=rd_ratio) ] for _ in range(n - 1): - layers.append(HalfIdentityBlock(oup, se_ratio)) + layers.append(HalfIdentityBlock(oup, rd_ratio)) layers.append(blocks.Combine('CONCAT')) return blocks.Stage(layers) @@ -208,12 +208,13 @@ def vgnetg_1_0mp(pretrained: bool = False, pth: str = None, progress: bool = Tru @export @blocks.se(partial(nn.SiLU, inplace=True)) +@blocks.attention(blocks.SEBlock) @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.2-vgnets-weights/vgnetg_1_0mp_se-914a9c4a.pth') def vgnetg_1_0mp_se(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): kwargs['channels'] = [28, 56, 112, 224, 368] kwargs['downsamplings'] = ['blur', 'blur', 'blur', 'blur'] kwargs['layers'] = [4, 7, 13, 2] - kwargs['se_ratio'] = 0.25 + kwargs['rd_ratio'] = [0.25, 0.25, 0.25, 0.25] return _vgnet(pretrained, pth, progress, **kwargs) @@ -228,12 +229,13 @@ def vgnetg_1_5mp(pretrained: bool = False, pth: str = None, progress: bool = Tru @export @blocks.se(partial(nn.SiLU, inplace=True)) +@blocks.attention(blocks.SEBlock) @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.2-vgnets-weights/vgnetg_1_5mp_se-6d9ebf3b.pth') def vgnetg_1_5mp_se(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): kwargs['channels'] = [32, 64, 128, 256, 512] kwargs['downsamplings'] = ['blur', 'blur', 'blur', 'blur'] kwargs['layers'] = [3, 7, 14, 2] - kwargs['se_ratio'] = 0.25 + kwargs['rd_ratio'] = [0.25, 0.25, 0.25, 0.25] return _vgnet(pretrained, pth, progress, **kwargs) @@ -248,12 +250,13 @@ def vgnetg_2_0mp(pretrained: bool = False, pth: str = None, progress: bool = Tru @export @blocks.se(partial(nn.SiLU, inplace=True)) +@blocks.attention(blocks.SEBlock) @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.2-vgnets-weights/vgnetg_2_0mp_se-132bc3af.pth') def vgnetg_2_0mp_se(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): kwargs['channels'] = [32, 72, 168, 376, 512] kwargs['downsamplings'] = ['blur', 'blur', 'blur', 'blur'] kwargs['layers'] = [3, 6, 13, 2] - kwargs['se_ratio'] = 0.25 + kwargs['rd_ratio'] = [0.25, 0.25, 0.25, 0.25] return _vgnet(pretrained, pth, progress, **kwargs) @@ -268,12 +271,13 @@ def vgnetg_2_5mp(pretrained: bool = False, pth: str = None, progress: bool = Tru @export @blocks.se(partial(nn.SiLU, inplace=True)) +@blocks.attention(blocks.SEBlock) @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.2-vgnets-weights/vgnetg_2_5mp_se-ed87bdb1.pth') def vgnetg_2_5mp_se(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): kwargs['channels'] = [32, 80, 192, 400, 544] kwargs['downsamplings'] = ['blur', 'blur', 'blur', 'blur'] kwargs['layers'] = [3, 6, 16, 2] - kwargs['se_ratio'] = 0.25 + kwargs['rd_ratio'] = [0.25, 0.25, 0.25, 0.25] return _vgnet(pretrained, pth, progress, **kwargs) @@ -287,9 +291,10 @@ def vgnetg_5_0mp(pretrained: bool = False, pth: str = None, progress: bool = Tru @export @blocks.se(partial(nn.SiLU, inplace=True)) +@blocks.attention(blocks.SEBlock) def vgnetg_5_0mp_se(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): kwargs['channels'] = [32, 88, 216, 456, 856] kwargs['downsamplings'] = ['blur', 'blur', 'blur', 'blur'] kwargs['layers'] = [4, 7, 15, 5] - kwargs['se_ratio'] = 0.25 + kwargs['rd_ratio'] = [0.25, 0.25, 0.25, 0.25] return _vgnet(pretrained, pth, progress, **kwargs) diff --git a/cvm/version.py b/cvm/version.py index dc982b5..541f859 100644 --- a/cvm/version.py +++ b/cvm/version.py @@ -1 +1 @@ -__version__ = '0.0.27' \ No newline at end of file +__version__ = '0.1.0' \ No newline at end of file diff --git a/train.py b/train.py index 62893a0..5b112cf 100644 --- a/train.py +++ b/train.py @@ -1,5 +1,6 @@ import json import time +import traceback import datetime import argparse import torch @@ -285,33 +286,36 @@ def validate(val_loader, model, criterion, log_suffix=''): logger.info(f'Steps/Epoch: {len(train_loader)}') benchmark = Benchmark() - for epoch in range(0, args.epochs): - train( - train_loader, - model, - criterion, - optimizer, - scheduler, - scaler, - epoch, - args, - mixupcutmix_fn, - model_ema - ) - - validate(val_loader, model, criterion, log_suffix=' ') - if model_ema is not None: - validate(val_loader, model_ema.module, criterion, log_suffix=' ') - - train_loader.reset() - val_loader.reset() - - if args.rank == 0 and epoch > (args.epochs - 10): - model_path = f'{log_dir}/{model_name}_{epoch:0>3}_{time.time()}.pth' - torch.save(model.module.state_dict(), model_path) - logger.info(f'Saved: {model_path}!') - + try: + for epoch in range(0, args.epochs): + train( + train_loader, + model, + criterion, + optimizer, + scheduler, + scaler, + epoch, + args, + mixupcutmix_fn, + model_ema + ) + + validate(val_loader, model, criterion, log_suffix=' ') if model_ema is not None: - torch.save(model_ema.module.state_dict(), f'{log_dir}/{model_name}_EMA_{epoch:0>3}_{time.time()}.pth') + validate(val_loader, model_ema.module, criterion, log_suffix=' ') + + train_loader.reset() + val_loader.reset() + + if args.rank == 0 and epoch > (args.epochs - 10): + model_path = f'{log_dir}/{model_name}_{epoch:0>3}_{time.time()}.pth' + torch.save(model.module.state_dict(), model_path) + logger.info(f'Saved: {model_path}!') + + if model_ema is not None: + torch.save(model_ema.module.state_dict(), f'{log_dir}/{model_name}_EMA_{epoch:0>3}_{time.time()}.pth') + except: + logger.error(traceback.format_exc()) logger.info(f'Total time: {benchmark.elapsed():>.3f}s')