FluxML · darsnack · Jan 27, 2023 · Dec 27, 2022 · Dec 27, 2022 · Dec 28, 2022
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -36,6 +36,7 @@ julia> ]add Metalhead
 | [ViT](https://arxiv.org/abs/2010.11929)                | [`ViT`](@ref)               | N            |
 | [ConvNeXt](https://arxiv.org/abs/2201.03545)           | [`ConvNeXt`](@ref)          | N            |
 | [ConvMixer](https://arxiv.org/abs/2201.09792)          | [`ConvMixer`](@ref)         | N            |
+| [UNet](https://arxiv.org/abs/1505.04597v1)             | [`UNet`](@ref)              | N            |
 
 To contribute new models, see our [contributing docs](@ref Contributing-to-Metalhead.jl).
 

diff --git a/src/Metalhead.jl b/src/Metalhead.jl
@@ -53,6 +53,7 @@ include("convnets/densenet.jl")
 include("convnets/squeezenet.jl")
 include("convnets/convnext.jl")
 include("convnets/convmixer.jl")
+include("convnets/unet.jl")
 
 # Mixers
 include("mixers/core.jl")
@@ -73,15 +74,15 @@ export AlexNet, VGG, VGG11, VGG13, VGG16, VGG19,
        GoogLeNet, Inception3, Inceptionv3, Inceptionv4, InceptionResNetv2, Xception,
        SqueezeNet, MobileNetv1, MobileNetv2, MobileNetv3, MNASNet,
        EfficientNet, EfficientNetv2, ConvMixer, ConvNeXt,
-       MLPMixer, ResMLP, gMLP, ViT
+       MLPMixer, ResMLP, gMLP, ViT, UNet
 
 # use Flux._big_show to pretty print large models
 for T in (:AlexNet, :VGG, :SqueezeNet, :ResNet, :WideResNet, :ResNeXt,
           :SEResNet, :SEResNeXt, :Res2Net, :Res2NeXt, :GoogLeNet, :DenseNet,
           :Inceptionv3, :Inceptionv4, :InceptionResNetv2, :Xception,
           :MobileNetv1, :MobileNetv2, :MobileNetv3, :MNASNet,
           :EfficientNet, :EfficientNetv2, :ConvMixer, :ConvNeXt,
-          :MLPMixer, :ResMLP, :gMLP, :ViT)
+          :MLPMixer, :ResMLP, :gMLP, :ViT, :UNet)
     @eval Base.show(io::IO, ::MIME"text/plain", model::$T) = _maybe_big_show(io, model)
 end
 

diff --git a/src/convnets/unet.jl b/src/convnets/unet.jl
@@ -0,0 +1,97 @@
+function PixelShuffleICNR(inplanes, outplanes; r = 2)
+    return Chain(basic_conv_bn((1, 1), inplanes, outplanes * (r^2)),
+                 Flux.PixelShuffle(r))
+end
+
+function UNetCombineLayer(inplanes, outplanes)
+    return Chain(basic_conv_bn((3, 3), inplanes, outplanes; pad = 1),
+                 basic_conv_bn((3, 3), outplanes, outplanes; pad = 1))
+end
+
+function UNetMiddleBlock(inplanes)
+    return Chain(basic_conv_bn((3, 3), inplanes, 2inplanes; pad = 1),
+                 basic_conv_bn((3, 3), 2inplanes, inplanes; pad = 1))
+end
+
+function UNetFinalBlock(inplanes, outplanes)
+    return Chain(basicblock(inplanes, inplanes; reduction_factor = 1),
+                 basic_conv_bn((1, 1), inplanes, outplanes))
+end
+
+function unetlayers(layers, sz; outplanes = nothing, skip_upscale = 0,
+                    m_middle = _ -> (identity,))
+    isempty(layers) && return m_middle(sz[end - 1])
+
+    layer, layers = layers[1], layers[2:end]
+    outsz = Flux.outputsize(layer, sz)
+    does_downscale = sz[1] ÷ 2 == outsz[1]
+
+    if !does_downscale
+        return Chain(layer, unetlayers(layers, outsz; outplanes, skip_upscale)...)
+    elseif does_downscale && skip_upscale > 0
+        return Chain(layer,
+                     unetlayers(layers, outsz; skip_upscale = skip_upscale - 1,
+                                outplanes)...)
+    else
+        childunet = Chain(unetlayers(layers, outsz; skip_upscale)...)
+        outsz = Flux.outputsize(childunet, outsz)
+
+        inplanes = sz[end - 1]
+        midplanes = outsz[end - 1]
+        outplanes = isnothing(outplanes) ? inplanes : outplanes
+
+        return UNetBlock(Chain(layer, childunet),
+                         inplanes, midplanes, outplanes)
+    end
+end
+
+function UNetBlock(m_child, inplanes, midplanes, outplanes = 2inplanes)
+    return Chain(;
+                 upsample = SkipConnection(Chain(; child = m_child,
+                                                 upsample = PixelShuffleICNR(midplanes,
+                                                                             midplanes)),
+                                           Parallel(cat_channels, identity,
+                                                    BatchNorm(inplanes))),
+                 act = xs -> relu.(xs),
+                 combine = UNetCombineLayer(inplanes + midplanes, outplanes))
+end
+
+"""
+    UNet(backbone, inputsize)
+    DenseNet(transition_configs::NTuple{N,Integer})
+
+Creates a UNet model with specified backbone. Backbone of Any Metalhead model can be used as
+encoder .
+Set `pretrain = true` to load the model with pre-trained weights for ImageNet.
+
+!!! warning
+
+    `UNet` does not currently support pretrained weights.
+
+See also [`Metalhead.UNet`](@ref).
+"""
+struct UNet
+    layers::Any
+end
+@functor UNet
+
+function UNet(backbone,
+              inputsize,
+              outplanes,
+              final = UNetFinalBlock,
+              fdownscale::Integer = 0,
+              kwargs...)
+    backbonelayers = collect(iterlayers(backbone))
+    layers = unetlayers(backbonelayers,
+                        inputsize;
+                        m_middle = UNetMiddleBlock,
+                        skip_upscale = fdownscale,
+                        kwargs...)
+
+    outsz = Flux.outputsize(layers, inputsize)
+    layers = Chain(layers, final(outsz[end - 1], outplanes))
+
+    return UNet(layers)
+end
+
+(m::UNet)(x::AbstractArray) = m.layers(x)
diff --git a/src/utilities.jl b/src/utilities.jl
@@ -4,13 +4,13 @@ seconddimmean(x) = dropdims(mean(x; dims = 2); dims = 2)
 # utility function for making sure that all layers have a channel size divisible by 8
 # used by MobileNet variants
 function _round_channels(channels::Number, divisor::Integer = 8, min_value::Integer = 0)
-    new_channels = max(min_value, floor(Int, channels + divisor / 2) ÷ divisor * divisor)
-    # Make sure that round down does not go down by more than 10%
-    return new_channels < 0.9 * channels ? new_channels + divisor : new_channels
+	new_channels = max(min_value, floor(Int, channels + divisor / 2) ÷ divisor * divisor)
+	# Make sure that round down does not go down by more than 10%
+	return new_channels < 0.9 * channels ? new_channels + divisor : new_channels
 end
 
 """
-    addact(activation = relu, xs...)
+	addact(activation = relu, xs...)
 
 Convenience function for applying an activation function to the output after
 summing up the input arrays. Useful as the `connection` argument for the block
@@ -19,7 +19,7 @@ function in [`resnet`](@ref).
 addact(activation = relu, xs...) = activation(sum(xs))
 
 """
-    actadd(activation = relu, xs...)
+	actadd(activation = relu, xs...)
 
 Convenience function for adding input arrays after applying an activation
 function to them. Useful as the `connection` argument for the block function in
@@ -28,7 +28,7 @@ function to them. Useful as the `connection` argument for the block function in
 actadd(activation = relu, xs...) = sum(activation.(x) for x in xs)
 
 """
-    cat_channels(x, y, zs...)
+	cat_channels(x, y, zs...)
 
 Concatenate `x` and `y` (and any `z`s) along the channel dimension (third dimension).
 Equivalent to `cat(x, y, zs...; dims=3)`.
@@ -40,7 +40,7 @@ cat_channels(x::Tuple, y::AbstractArray...) = cat_channels(x..., y...)
 cat_channels(x::Tuple) = cat_channels(x...)
 
 """
-    swapdims(perm)
+	swapdims(perm)
 
 Convenience function for permuting the dimensions of an array.
 `perm` is a vector or tuple specifying a permutation of the input dimensions.
@@ -50,32 +50,35 @@ swapdims(perm) = Base.Fix2(permutedims, perm)
 
 # Utility function for pretty printing large models
 function _maybe_big_show(io, model)
-    if isdefined(Flux, :_big_show)
-        if isnothing(get(io, :typeinfo, nothing)) # e.g. top level in REPL
-            Flux._big_show(io, model)
-        else
-            show(io, model)
-        end
-    else
-        show(io, model)
-    end
+	if isdefined(Flux, :_big_show)
+		if isnothing(get(io, :typeinfo, nothing)) # e.g. top level in REPL
+			Flux._big_show(io, model)
+		else
+			show(io, model)
+		end
+	else
+		show(io, model)
+	end
 end
 
 """
-    linear_scheduler(drop_prob = 0.0; start_value = 0.0, depth)
-    linear_scheduler(drop_prob::Nothing; depth::Integer)
+	linear_scheduler(drop_prob = 0.0; start_value = 0.0, depth)
+	linear_scheduler(drop_prob::Nothing; depth::Integer)
 
 Returns the dropout probabilities for a given depth using the linear scaling rule. Note
 that this returns evenly spaced values between `start_value` and `drop_prob`, not including
 `drop_prob`. If `drop_prob` is `nothing`, it returns a `Vector` of length `depth` with all
 values equal to `nothing`.
 """
 function linear_scheduler(drop_prob = 0.0; depth::Integer, start_value = 0.0)
-    return LinRange(start_value, drop_prob, depth + 1)[1:depth]
+	return LinRange(start_value, drop_prob, depth + 1)[1:depth]
 end
 linear_scheduler(drop_prob::Nothing; depth::Integer) = fill(drop_prob, depth)
 
 # Utility function for depth and configuration checks in models
 function _checkconfig(config, configs)
-    @assert config in configs "Invalid configuration. Must be one of $(sort(collect(configs)))."
+	@assert config in configs "Invalid configuration. Must be one of $(sort(collect(configs)))."
 end
+
+# Utility function to return Iterator over layers, adopted from FastAI.jl
+iterlayers(m::Chain) = Iterators.flatten(iterlayers(l) for l in m.layers)