From fc3eff4141a62a65933e4d5c4bd45b30a3eca913 Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Sat, 19 Feb 2022 13:07:04 -0500 Subject: [PATCH 1/7] rm Flux.Zeros, take N+1 --- src/Flux.jl | 1 - src/deprecations.jl | 8 ++++ src/layers/basic.jl | 6 ++- src/layers/conv.jl | 22 +++++----- src/utils.jl | 6 +-- src/zeros.jl | 52 ---------------------- test/cuda/layers.jl | 4 +- test/layers/basic.jl | 2 +- test/layers/conv.jl | 2 +- test/optimise.jl | 2 +- test/utils.jl | 100 ++++++++++--------------------------------- 11 files changed, 54 insertions(+), 151 deletions(-) delete mode 100644 src/zeros.jl diff --git a/src/Flux.jl b/src/Flux.jl index 2b204567d0..ad7233cfd7 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -36,7 +36,6 @@ using CUDA const use_cuda = Ref{Union{Nothing,Bool}}(nothing) include("utils.jl") -include("zeros.jl") include("onehot.jl") include("functor.jl") diff --git a/src/deprecations.jl b/src/deprecations.jl index e258f41897..c7ca77b6cc 100644 --- a/src/deprecations.jl +++ b/src/deprecations.jl @@ -19,6 +19,14 @@ zeros32(::Type, dims...) = throw(ArgumentError("Flux.zeros32 is always Float32, @deprecate frequencies(xs) group_counts(xs) +struct Zeros + function Zeros() + Base.depwarn("Flux.Zeros is no more, has ceased to be, is bereft of life, is an ex-boondoggle... please use bias=false instead", :Zeros) + false + end +end +Zeros(args...) = Zeros() # was used both Dense(10, 2, initb = Zeros) and Dense(rand(2,10), Zeros()) + # Channel notation: Changed to match Conv, but very softly deprecated! # Perhaps change to @deprecate for v0.14, but there is no plan to remove these. Dense(in::Integer, out::Integer, σ = identity; kw...) = diff --git a/src/layers/basic.jl b/src/layers/basic.jl index 952ff7d444..c61374455a 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -167,7 +167,7 @@ end function Base.show(io::IO, l::Dense) print(io, "Dense(", size(l.weight, 2), " => ", size(l.weight, 1)) l.σ == identity || print(io, ", ", l.σ) - l.bias == Zeros() && print(io, "; bias=false") + l.bias == false && print(io, "; bias=false") print(io, ")") end @@ -394,7 +394,11 @@ function Base.show(io::IO, l::Bilinear) print(io, "Bilinear((", size(l.weight, 2), ", ", size(l.weight, 3), ") => ", size(l.weight, 1)) end l.σ == identity || print(io, ", ", l.σ) +<<<<<<< HEAD l.bias == Flux.Zeros() && print(io, "; bias=false") +======= + l.bias === false && print(io, ", bias=false") +>>>>>>> 1ef2cd377 (rm Flux.Zeros, take N+1) print(io, ")") end diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 519618e4be..8036780d95 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -6,6 +6,10 @@ _paddims(x::Tuple, y::Tuple) = (x..., y[(end - (length(y) - length(x) - 1)):end] expand(N, i::Tuple) = i expand(N, i::Integer) = ntuple(_ -> i, N) +conv_reshape_bias(c) = c.bias isa AbstractVector ? + reshape(c.bias, map(_->1, c.stride)..., :, 1) : + c.bias + """ SamePad() @@ -61,8 +65,8 @@ Then: Keywords to control initialization of the layer: * `init` - Function used to generate initial weights. Defaults to `glorot_uniform`. -* `bias` - Initial bias is zero by default, this can be disabled entirely by setting it to - `false`, or another vector explicitly as `bias = randn(Float32, out)`. +* `bias` - The initial bias vector is all zero by default. Trainable bias can be disabled entirely + by setting this to `false`, or another vector can be provided such as `bias = randn(Float32, out)`. See also [`ConvTranspose`](@ref), [`DepthwiseConv`](@ref), [`CrossCor`](@ref). @@ -159,10 +163,9 @@ end @functor Conv function (c::Conv)(x::AbstractArray) - b = reshape(c.bias, map(_->1, c.stride)..., :, 1) σ = NNlib.fast_act(c.σ, x) cdims = DenseConvDims(x, c.weight; stride = c.stride, padding = c.pad, dilation = c.dilation, groups = c.groups) - σ.(conv(x, c.weight, cdims) .+ b) + σ.(conv(x, c.weight, cdims) .+ conv_reshape_bias(c)) end _channels_in(l ::Conv) = size(l.weight, ndims(l.weight)-1) * l.groups @@ -183,7 +186,7 @@ function _print_conv_opt(io::IO, l) if hasproperty(l, :groups) (l.groups == 1) || print(io, ", groups=", l.groups) end - (l.bias isa Zeros) && print(io, ", bias=false") + (l.bias === false) && print(io, ", bias=false") end """ @@ -277,10 +280,9 @@ end @nograd conv_transpose_dims function (c::ConvTranspose)(x::AbstractArray) - b = reshape(c.bias, map(_->1, c.stride)..., :, 1) σ = NNlib.fast_act(c.σ, x) cdims = conv_transpose_dims(c, x) - σ.(∇conv_data(x, c.weight, cdims) .+ b) + σ.(∇conv_data(x, c.weight, cdims) .+ conv_reshape_bias(c)) end function Base.show(io::IO, l::ConvTranspose) @@ -372,10 +374,9 @@ depthwiseconvfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; init = glorot_uniform) where N = init(filter..., div(ch[2], ch[1]), ch[1]) function (c::DepthwiseConv)(x) - b = reshape(c.bias, map(_->1, c.stride)..., :, 1) σ = NNlib.fast_act(c.σ, x) cdims = DepthwiseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) - σ.(depthwiseconv(x, c.weight, cdims) .+ b) + σ.(depthwiseconv(x, c.weight, cdims) .+ conv_reshape_bias(c)) end function Base.show(io::IO, l::DepthwiseConv) @@ -453,10 +454,9 @@ function crosscor(x, w, ddims::DenseConvDims) end function (c::CrossCor)(x::AbstractArray) - b = reshape(c.bias, map(_->1, c.stride)..., :, 1) σ = NNlib.fast_act(c.σ, x) cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) - σ.(crosscor(x, c.weight, cdims) .+ b) + σ.(crosscor(x, c.weight, cdims) .+ conv_reshape_bias(c)) end function Base.show(io::IO, l::CrossCor) diff --git a/src/utils.jl b/src/utils.jl index b5edbad5e6..5f9a4e98d7 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -441,17 +441,17 @@ rand32(dims...) = Base.rand(Float32, dims...) randn32(dims...) = Base.randn(Float32, dims...) """ - create_bias(weights, bias, length) + create_bias(weights, bias, size...) Return a bias parameter for a layer, based on the value given to the constructor's keyword `bias=bias`. * `bias == true` creates a zero vector, of the same type as weights. -* `bias == false` returns `Zeros()`, a special struct which exists only to encode the absence of bias. +* `bias == false` returns `false` now, which is understood by AD to be non-differentiable. * `bias::AbstractArray` uses the array provided, provided it has the correct size and eltype. If the type is wrong, it will be converted. """ function create_bias(weights::AbstractArray, bias::Bool, dims::Integer...) - bias ? fill!(similar(weights, dims...), 0) : Zeros() + bias ? fill!(similar(weights, dims...), 0) : false end function create_bias(weights::AbstractArray, bias::AbstractArray, dims::Integer...) size(bias) == dims || throw(DimensionMismatch("expected bias of size $(dims), got size $(size(bias))")) diff --git a/src/zeros.jl b/src/zeros.jl deleted file mode 100644 index 1281f4c87a..0000000000 --- a/src/zeros.jl +++ /dev/null @@ -1,52 +0,0 @@ -import Base: +, -, *,/, reshape, broadcasted - -""" - Zeros() - -Acts as a stand-in for an array of zeros that can be -used during training which is ignored by the optimisers. - -Useful to turn bias off for a forward pass of a layer. - -## Examples - -```julia-repl -julia> bias_less_conv = Conv((2,2), 1=>3; bias = false) -Conv((2, 2), 1=>3) - -julia> params(bias_less_conv) |> length -1 - -julia> bias_less_conv.bias -Flux.Zeros() -``` -""" -struct Zeros end -# To allow for things like Dense(10, 2, initb = Zeros) -Zeros(args...) = Zeros() - -Base.reshape(x::Zeros, dims...) = x - -+(::Zeros, b::AbstractArray) = b -+(a::AbstractArray, ::Zeros) = a -+(a::Zeros, ::Zeros) = a - --(::Zeros, b::AbstractArray) = -b --(a::AbstractArray, ::Zeros) = a --(a::Zeros, ::Zeros) = a - -# Some opportunities to avoid scalar indexing, intermediaries -# Since it replicates a little of what we expect Base to do, -# it should be possible to remove in the future, but for now, -# these help with performance. -broadcasted(::typeof(+), a::AbstractArray, b::Zeros) = a -broadcasted(::typeof(+), a::Zeros, b::AbstractArray) = b -broadcasted(::typeof(-), a::AbstractArray, b::Zeros) = a -broadcasted(::typeof(-), a::Zeros, b::AbstractArray) = -b -# Need adjoints for these or else the gradient w.r.t to the non-Zeros arg will be nothing as well -@adjoint broadcasted(::typeof(*), a::AbstractArray, b::Zeros) = zero(a), _ -> (nothing, zero(a), nothing) -@adjoint broadcasted(::typeof(*), a::Zeros, b::AbstractArray) = zero(b), _ -> (nothing, nothing, zero(b)) -@adjoint broadcasted(::typeof(/), a::Zeros, b::AbstractArray) = zero(b), _ -> (nothing, nothing, zero(b)) - -# Pass-through for layer constructors -create_bias(weights::AbstractArray, bias::Flux.Zeros, dims::Integer...) = bias diff --git a/test/cuda/layers.jl b/test/cuda/layers.jl index 396e6c0ab5..677e50170d 100644 --- a/test/cuda/layers.jl +++ b/test/cuda/layers.jl @@ -155,8 +155,8 @@ end end end -@testset "Dense with Zeros bias" begin - l = Dense(ones(Float32, 4, 3), Flux.Zeros()) |> gpu +@testset "Dense without bias" begin + l = Dense(ones(Float32, 4, 3), false) |> gpu ip = zeros(Float32, 3, 7) |> gpu @test sum(l(ip)) ≈ 0.f0 diff --git a/test/layers/basic.jl b/test/layers/basic.jl index ca8e15a643..5befed57b4 100644 --- a/test/layers/basic.jl +++ b/test/layers/basic.jl @@ -175,7 +175,7 @@ import Flux: activations @test b1.σ == identity b2 = Flux.Bilinear(randn(3,4,5), false) - @test b2.bias == Flux.Zeros() + @test b2.bias === false b3 = Flux.Bilinear(randn(Float16, 3,4,5), true, tanh) @test b3.σ == tanh diff --git a/test/layers/conv.jl b/test/layers/conv.jl index 9ce1a27aa0..eb7d13be1c 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -273,7 +273,7 @@ end @testset "constructors: $fun" for fun in [Conv, CrossCor, ConvTranspose, DepthwiseConv] @test fun(rand(2,3,4)).bias isa Vector{Float64} - @test fun(rand(2,3,4,5), false).bias isa Flux.Zeros + @test fun(rand(2,3,4,5), false).bias === false if fun == Conv @test fun(rand(2,3,4,5,6), rand(6)).bias isa Vector{Float64} @test_skip fun(rand(2,3,4,5,6), 1:6).bias isa Vector{Float64} diff --git a/test/optimise.jl b/test/optimise.jl index 4c2d70b351..7b20a635db 100644 --- a/test/optimise.jl +++ b/test/optimise.jl @@ -15,7 +15,7 @@ using Random Nesterov(), RMSProp(), Momentum()] Random.seed!(42) w′ = randn(10, 10) - b = Flux.Zeros() + b = false loss(x) = Flux.Losses.mse(w*x, w′*x .+ b) for t = 1: 10^5 θ = params([w′, b]) diff --git a/test/utils.jl b/test/utils.jl index 3f4efe24ba..77d4b81b5f 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -1,8 +1,8 @@ using Flux using Flux: throttle, nfan, glorot_uniform, glorot_normal, kaiming_normal, kaiming_uniform, orthogonal, truncated_normal, - sparse_init, stack, unstack, Zeros, batch, unbatch, - unsqueeze, params + sparse_init, stack, unstack, batch, unbatch, + unsqueeze, params, loadparams! using StatsBase: var, std using Statistics, LinearAlgebra using Random @@ -263,88 +263,36 @@ end @test eltype(f32(f64(m))[1].weight) == Float32 end -@testset "Zeros" begin +@testset "zero bias" begin m = Dense(3,2; bias=false) - @test f64(m).bias === m.bias === Zeros() - @test f32(m).bias === m.bias === Zeros() + @test f64(m).bias === m.bias === false + @test f32(m).bias === m.bias === false @testset "Gradients for broadcasted $op with sizes $s" for op in (+,-,*), s in ((1,), (2,3)) o = ones(s) z = zeros(s) - Z = Zeros() @testset "Explicit" begin gfun(args...) = gradient((x, y) -> sum(op.(x,y)), args...) g = gfun(o, z) - @test gfun(o, Z) == (g[1], nothing) + @test gfun(o, false) == (g[1], nothing) g = gfun(z, o) - @test gfun(Z, o) == (nothing, g[2]) + @test gfun(false, o) == (nothing, g[2]) end @testset "Implicit" begin gfun(args...) = gradient(() -> sum(op.(args...)), params(collect(args))) g = gfun(o, z) - gres = gfun(o, Z) + gres = gfun(o, false) @test gres[o] == g[o] - @test Z ∉ gres.params + @test false ∉ gres.params g = gfun(z, o) - gres = gfun(Z, o) + gres = gfun(false, o) @test gres[o] == g[o] - @test Z ∉ gres.params - end - end - - @testset "Gradients for broadcasted / with sizes $s" for s in ((1,), (2,3)) - o = ones(s) - z = zeros(s) - Z = Zeros() # Only defined for 0-dim - - @testset "Explicit" begin - gfun(args...) = gradient((x, y) -> sum(x ./ y), args...) - g = gfun(z, o) - @test gfun(Z, o) == (nothing, g[2]) - end - - @testset "Implicit" begin - gfun(x,y) = gradient(() -> sum(x ./ y), params([x,y])) - - g = gfun(z, o) - gres = gfun(Z, o) - @test gres[o] == g[o] - @test Z ∉ gres.params - end - end - - @testset "Gradients for $op with sizes $s" for op in (+,-), s in (tuple(), (1,), (2,3)) - o = ones(s) - z = zeros(s) - Z = Zeros() - - - @testset "Explicit" begin - gfun(args...) = gradient((x, y) -> sum(op(x,y)), args...) - - g = gfun(o, z) - @test gfun(o, Z) == (g[1], nothing) - - g = gfun(z, o) - @test gfun(Z, o) == (nothing, g[2]) - end - - @testset "Implicit" begin - gfun(args...) = gradient(() -> sum(op(args...)), params(collect(args))) - g = gfun(o, z) - gres = gfun(o, Z) - @test gres[o] == g[o] - @test Z ∉ gres.params - - g = gfun(z, o) - gres = gfun(Z, o) - @test gres[o] == g[o] - @test Z ∉ gres.params + @test false ∉ gres.params end end end @@ -385,7 +333,7 @@ end dl(4, 3, bias) ) - nobias(n) = Zeros() + nobias(n) = false testdense(m, bt) = @testset "Check layer $i" for (i, (l1, l2)) in enumerate(zip(m, dm(bt))) @test l1.weight == l2.weight @test l1.bias == l2.bias @@ -393,11 +341,7 @@ end end @testset "loadparams!" begin - import Flux: loadparams! pars(w, b) = [w, b] - import Flux: loadparams!, Zeros - - pars(w, b::Zeros) = [w, Flux.zeros32(size(w,1))] pars(l) = pars(l.weight, l.bias) pararray(m) = mapreduce(pars, vcat, m) weights(m) = mapreduce(l -> [l.weight], vcat, m) @@ -407,16 +351,16 @@ end testdense(m, bt) end - @testset "$b1 to $b2" for (b1, b2, be) in ( - (Flux.zeros32, Flux.ones32, Flux.ones32), # Load ones as bias to a model with zeros as bias -> model gets ones as bias - (Flux.ones32, nobias, Flux.zeros32), # Load Zeros as bias to a model with ones as bias-> model gets zeros as bias - (nobias, Flux.ones32, nobias), # Load ones as bias to a model with Zeros as bias-> model bias does not change - ) - m1 = dm(b1) - m2 = dm(b2) - loadparams!(m1, b1 == nobias ? weights(m2) : pararray(m2)) - testdense(m1, be) - end + # @testset "$b1 to $b2" for (b1, b2, be) in ( + # (Flux.zeros32, Flux.ones32, Flux.ones32), # Load ones as bias to a model with zeros as bias -> model gets ones as bias + # (Flux.ones32, nobias, Flux.zeros32), # Load Zeros as bias to a model with ones as bias-> model gets zeros as bias + # (nobias, Flux.ones32, nobias), # Load ones as bias to a model with Zeros as bias-> model bias does not change + # ) + # m1 = dm(b1) + # m2 = dm(b2) + # loadparams!(m1, b1 == nobias ? weights(m2) : pararray(m2)) + # testdense(m1, be) + # end end @testset "destructure" begin From 160244153e8a38e610fc218a4272806b6bf640fe Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Sat, 19 Feb 2022 15:17:35 -0500 Subject: [PATCH 2/7] human-readable loadparams tests, same results --- test/utils.jl | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/test/utils.jl b/test/utils.jl index 77d4b81b5f..9b4ceacbb1 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -350,17 +350,6 @@ end loadparams!(m, params(m)) testdense(m, bt) end - - # @testset "$b1 to $b2" for (b1, b2, be) in ( - # (Flux.zeros32, Flux.ones32, Flux.ones32), # Load ones as bias to a model with zeros as bias -> model gets ones as bias - # (Flux.ones32, nobias, Flux.zeros32), # Load Zeros as bias to a model with ones as bias-> model gets zeros as bias - # (nobias, Flux.ones32, nobias), # Load ones as bias to a model with Zeros as bias-> model bias does not change - # ) - # m1 = dm(b1) - # m2 = dm(b2) - # loadparams!(m1, b1 == nobias ? weights(m2) : pararray(m2)) - # testdense(m1, be) - # end end @testset "destructure" begin @@ -386,6 +375,26 @@ end end end +@testset "loadparams! & absent bias" begin + m0 = Dense(2,3; bias=false, init = Flux.ones32) + m1 = Dense(2,3; bias = Flux.randn32(3)) + m2 = Dense(Float32[1 2; 3 4; 5 6], Float32[7, 8, 9]) + + Flux.loadparams!(m1, Flux.params(m2)) + @test m1.bias == 7:9 + @test sum(m1.weight) == 21 + + # load from a model without bias: + Flux.loadparams!(m1, Flux.params(m0)) + @test_broken iszero(m1.bias) # should ideally recognise the false but Params doesn't store it. + @test sum(m1.weight) == 6 + + # load into a model without bias: + Flux.loadparams!(m0, Flux.params(m2)) # ignore the parameter which has nowhere to go? Or error? + @test iszero(m0.bias) # obviously unchanged + @test sum(m0.weight) == 21 +end + @testset "Train and test mode" begin mutable struct DummyLayer testing::Bool From 312cad5d2eef080a32c0c9d419eba3d5922c7fa1 Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Sat, 19 Feb 2022 19:00:56 -0500 Subject: [PATCH 3/7] fixup --- src/layers/basic.jl | 8 ++------ test/utils.jl | 6 +++--- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index c61374455a..5974c34df7 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -301,7 +301,7 @@ end Bilinear((in1, in2) => out, σ=identity; bias=true, init=glorot_uniform) Bilinear(W::AbstractArray, [bias, σ]) -Creates a bilinear layer, which operates on two inputs at the same time. +Creates a fully connected layer which operates on two inputs. Its output, given vectors `x` & `y`, is another vector `z` with, for all `i ∈ 1:out`: @@ -394,11 +394,7 @@ function Base.show(io::IO, l::Bilinear) print(io, "Bilinear((", size(l.weight, 2), ", ", size(l.weight, 3), ") => ", size(l.weight, 1)) end l.σ == identity || print(io, ", ", l.σ) -<<<<<<< HEAD - l.bias == Flux.Zeros() && print(io, "; bias=false") -======= - l.bias === false && print(io, ", bias=false") ->>>>>>> 1ef2cd377 (rm Flux.Zeros, take N+1) + l.bias === false && print(io, "; bias=false") print(io, ")") end diff --git a/test/utils.jl b/test/utils.jl index 9b4ceacbb1..8e763dbc1d 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -264,7 +264,7 @@ end end @testset "zero bias" begin - m = Dense(3,2; bias=false) + m = Dense(3 => 2; bias=false) @test f64(m).bias === m.bias === false @test f32(m).bias === m.bias === false @@ -376,8 +376,8 @@ end end @testset "loadparams! & absent bias" begin - m0 = Dense(2,3; bias=false, init = Flux.ones32) - m1 = Dense(2,3; bias = Flux.randn32(3)) + m0 = Dense(2 => 3; bias=false, init = Flux.ones32) + m1 = Dense(2 => 3; bias = Flux.randn32(3)) m2 = Dense(Float32[1 2; 3 4; 5 6], Float32[7, 8, 9]) Flux.loadparams!(m1, Flux.params(m2)) From 875043ce213035d8778f9dc639954fffac081ffa Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Sun, 20 Feb 2022 15:44:50 -0500 Subject: [PATCH 4/7] make the words match the code --- src/utils.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 5f9a4e98d7..3258a57107 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -446,9 +446,10 @@ randn32(dims...) = Base.randn(Float32, dims...) Return a bias parameter for a layer, based on the value given to the constructor's keyword `bias=bias`. -* `bias == true` creates a zero vector, of the same type as weights. +* `bias == true` creates a trainable array of the given size, of the same type as `weights`, initialised to zero. * `bias == false` returns `false` now, which is understood by AD to be non-differentiable. -* `bias::AbstractArray` uses the array provided, provided it has the correct size and eltype. If the type is wrong, it will be converted. +* `bias::AbstractArray` uses the array provided, provided it has the correct size. + It does not at present correct the `eltype` to match that of `weights`. """ function create_bias(weights::AbstractArray, bias::Bool, dims::Integer...) bias ? fill!(similar(weights, dims...), 0) : false From fed18fde686a60c2e453a675729cb6c702d0c93d Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Mon, 21 Feb 2022 09:12:33 -0500 Subject: [PATCH 5/7] upgrade to test Chain, more errors, but same on master --- test/utils.jl | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/test/utils.jl b/test/utils.jl index 8e763dbc1d..f240c56882 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -376,23 +376,23 @@ end end @testset "loadparams! & absent bias" begin - m0 = Dense(2 => 3; bias=false, init = Flux.ones32) - m1 = Dense(2 => 3; bias = Flux.randn32(3)) - m2 = Dense(Float32[1 2; 3 4; 5 6], Float32[7, 8, 9]) + m0 = Chain(Dense(2 => 3; bias=false, init = Flux.ones32), Dense(3 => 1)) + m1 = Chain(Dense(2 => 3; bias = Flux.randn32(3)), Dense(3 => 1)) + m2 = Chain(Dense(Float32[1 2; 3 4; 5 6], Float32[7, 8, 9]), Dense(3 => 1)) Flux.loadparams!(m1, Flux.params(m2)) - @test m1.bias == 7:9 - @test sum(m1.weight) == 21 - - # load from a model without bias: - Flux.loadparams!(m1, Flux.params(m0)) - @test_broken iszero(m1.bias) # should ideally recognise the false but Params doesn't store it. - @test sum(m1.weight) == 6 - - # load into a model without bias: - Flux.loadparams!(m0, Flux.params(m2)) # ignore the parameter which has nowhere to go? Or error? - @test iszero(m0.bias) # obviously unchanged - @test sum(m0.weight) == 21 + @test m1[1].bias == 7:9 + @test sum(m1[1].weight) == 21 + + # load from a model without bias -- should ideally recognise the `false` but `Params` doesn't store it + @test_broken Flux.loadparams!(m1, Flux.params(m0)) + @test_broken iszero(m1[1].bias) + @test sum(m1[1].weight) == 6 # written before error + + # load into a model without bias -- should it ignore the parameter which has no home, or error? + @test_broken Flux.loadparams!(m0, Flux.params(m2)) + @test iszero(m0[1].bias) # obviously unchanged + @test sum(m0[1].weight) == 21 end @testset "Train and test mode" begin From d559685a369ff28c008041c744351ce0ee4aab85 Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Sat, 5 Mar 2022 08:17:39 -0500 Subject: [PATCH 6/7] Update src/utils.jl Co-authored-by: Carlo Lucibello --- src/utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.jl b/src/utils.jl index 3258a57107..ce715a4892 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -447,7 +447,7 @@ Return a bias parameter for a layer, based on the value given to the constructor's keyword `bias=bias`. * `bias == true` creates a trainable array of the given size, of the same type as `weights`, initialised to zero. -* `bias == false` returns `false` now, which is understood by AD to be non-differentiable. +* `bias == false` returns `false`, which is understood by AD to be non-differentiable. * `bias::AbstractArray` uses the array provided, provided it has the correct size. It does not at present correct the `eltype` to match that of `weights`. """ From b6e3f93f191cedb015b0471a9d83ef4f7a0ede4a Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Sat, 5 Mar 2022 11:38:22 -0500 Subject: [PATCH 7/7] Update src/layers/basic.jl --- src/layers/basic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index 5974c34df7..8aaf4e7df0 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -301,7 +301,7 @@ end Bilinear((in1, in2) => out, σ=identity; bias=true, init=glorot_uniform) Bilinear(W::AbstractArray, [bias, σ]) -Creates a fully connected layer which operates on two inputs. +Creates a layer which is fully connected between two inputs and the output, and otherwise similar to [`Dense`](@ref). Its output, given vectors `x` & `y`, is another vector `z` with, for all `i ∈ 1:out`: