diff --git a/NEWS.md b/NEWS.md index f8efaa5f1f..9ef9fc92f8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,8 @@ ## v0.12.0 -* The Dense layer now supports inputs with [multiple batch dimensions](https://github.com/FluxML/Flux.jl/pull/1405) +* The Dense layer now supports inputs with [multiple batch dimensions](https://github.com/FluxML/Flux.jl/pull/1405). +* Dense and Conv layers no longer perform [implicit type conversion](https://github.com/FluxML/Flux.jl/pull/1394). * Excise datasets in favour of other providers in the julia ecosystem. * Added option to set `bias` to [false](https://github.com/FluxML/Flux.jl/pull/1379) to eliminating `bias` from being trained. * Removed kwarg only constructors for [`convolutional layers`](https://github.com/FluxML/Flux.jl/pull/1379). diff --git a/docs/src/performance.md b/docs/src/performance.md index c78ff1a115..36eea32e95 100644 --- a/docs/src/performance.md +++ b/docs/src/performance.md @@ -13,7 +13,6 @@ not because the operations are faster, but because the memory usage is halved. Which means allocations occur much faster. And you use less memory. - ## Preserve inputs' types Not only should your activation and loss functions be [type-stable](https://docs.julialang.org/en/v1/manual/performance-tips/#Write-%22type-stable%22-functions-1), @@ -21,8 +20,8 @@ they should also preserve the type of their inputs. A very artificial example using an activation function like -``` - my_tanh(x) = Float64(tanh(x)) +```julia +my_tanh(x) = Float64(tanh(x)) ``` will result in performance on `Float32` input orders of magnitude slower than the normal `tanh` would, @@ -35,20 +34,21 @@ you will see a large slow-down. This can occur sneakily, because you can cause type-promotion by interacting with a numeric literals. E.g. the following will have run into the same problem as above: -``` - leaky_tanh(x) = 0.01*x + tanh(x) +```julia +leaky_tanh(x) = 0.01*x + tanh(x) ``` While one could change the activation function (e.g. to use `0.01f0*x`), the idiomatic (and safe way) to avoid type casts whenever inputs changes is to use `oftype`: -``` - leaky_tanh(x) = oftype(x/1, 0.01)*x + tanh(x) -``` +```julia +leaky_tanh(x) = oftype(x/1, 0.01)*x + tanh(x) +``` ## Evaluate batches as Matrices of features While it can sometimes be tempting to process your observations (feature vectors) one at a time e.g. + ```julia function loss_total(xs::AbstractVector{<:Vector}, ys::AbstractVector{<:Vector}) sum(zip(xs, ys)) do (x, y_target) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index 6a4be2dcab..c8d905be46 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -121,9 +121,8 @@ end function (a::Dense)(x::AbstractArray) W, b, σ = a.W, a.b, a.σ - # reshape to handle dims > 1 as batch dimensions sz = size(x) - x = reshape(x, sz[1], :) + x = reshape(x, sz[1], :) # reshape to handle dims > 1 as batch dimensions x = σ.(W*x .+ b) return reshape(x, :, sz[2:end]...) end @@ -134,14 +133,6 @@ function Base.show(io::IO, l::Dense) print(io, ")") end -# Try to avoid hitting generic matmul in some simple cases -# Base's matmul is so slow that it's worth the extra conversion to hit BLAS -(a::Dense{<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = - invoke(a, Tuple{AbstractArray}, x) - -(a::Dense{<:Any,W})(x::AbstractArray{<:AbstractFloat}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = - a(T.(x)) - """ Diagonal(in::Integer) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index b42f9bdae2..8724810762 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -164,11 +164,6 @@ function Base.show(io::IO, l::Conv) print(io, ")") end -(a::Conv{<:Any,<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = - invoke(a, Tuple{AbstractArray}, x) - -(a::Conv{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = - a(T.(x)) """ ConvTranspose(filter, in => out, σ=identity; stride=1, pad=0, dilation=1) @@ -265,11 +260,6 @@ function Base.show(io::IO, l::ConvTranspose) print(io, ")") end -(a::ConvTranspose{<:Any,<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = - invoke(a, Tuple{AbstractArray}, x) - -(a::ConvTranspose{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = - a(T.(x)) function calc_padding(::Type{ConvTranspose}, pad::SamePad, k::NTuple{N,T}, dilation, stride) where {N,T} calc_padding(Conv, pad, k .- stride .+ 1, dilation, stride) @@ -363,11 +353,6 @@ function Base.show(io::IO, l::DepthwiseConv) print(io, ")") end -(a::DepthwiseConv{<:Any,<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = - invoke(a, Tuple{AbstractArray}, x) - -(a::DepthwiseConv{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = - a(T.(x)) """ CrossCor(filter, in => out, σ=identity; stride=1, pad=0, dilation=1) @@ -449,12 +434,6 @@ function Base.show(io::IO, l::CrossCor) print(io, ")") end -(a::CrossCor{<:Any,<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = - invoke(a, Tuple{AbstractArray}, x) - -(a::CrossCor{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = - a(T.(x)) - """ AdaptiveMaxPool(out::NTuple) diff --git a/test/utils.jl b/test/utils.jl index 2d4537d871..377dcccc49 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -145,10 +145,13 @@ end @testset "Precision" begin m = Chain(Dense(10, 5, relu), Dense(5, 2)) - x = rand(10) + x64 = rand(Float64, 10) + x32 = rand(Float32, 10) @test eltype(m[1].W) == Float32 - @test eltype(m(x)) == Float32 - @test eltype(f64(m)(x)) == Float64 + @test eltype(m(x32)) == Float32 + @test eltype(m(x64)) == Float64 + @test eltype(f64(m)(x32)) == Float64 + @test eltype(f64(m)(x64)) == Float64 @test eltype(f64(m)[1].W) == Float64 @test eltype(f32(f64(m))[1].W) == Float32 end