From 5e70e0dbd5b758b1caaf7b6df22c5d6b7dde9d2f Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Thu, 29 Oct 2020 12:53:50 +0100 Subject: [PATCH 1/2] Clean-up some methods. --- lib/cudnn/activation.jl | 9 ++++++--- lib/cudnn/tensor.jl | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/cudnn/activation.jl b/lib/cudnn/activation.jl index b4ae812252..1f0439bf51 100644 --- a/lib/cudnn/activation.jl +++ b/lib/cudnn/activation.jl @@ -22,16 +22,19 @@ end function cudnnActivationForward(x::DenseCuArray{T,N}, y::DenseCuArray{T,N}=x; mode=CUDNN_ACTIVATION_RELU, # CUDNN_ACTIVATION_IDENTITY will not work - coeff=0.0, reluNanOpt=CUDNN_NOT_PROPAGATE_NAN, alpha=1, beta=0) where {T,N} + coeff=false, reluNanOpt=CUDNN_NOT_PROPAGATE_NAN, alpha=true, + beta=false) where {T,N} cudnnActivationForward(handle(), ActivationDesc(mode, T(coeff), reluNanOpt), scalingParameter(T, alpha), TensorDesc(x), x, scalingParameter(T, beta ), TensorDesc(y), y) return y end -function cudnnActivationBackward(x::DenseCuArray{T,N}, dx::DenseCuArray{T,N}, y::DenseCuArray{T,N}, dy::DenseCuArray{T,N}=dx; +function cudnnActivationBackward(x::DenseCuArray{T,N}, dx::DenseCuArray{T,N}, + y::DenseCuArray{T,N}, dy::DenseCuArray{T,N}=dx; mode=CUDNN_ACTIVATION_RELU, # CUDNN_ACTIVATION_IDENTITY will not work - coeff=0.0, reluNanOpt=CUDNN_NOT_PROPAGATE_NAN, alpha=1, beta=0) where {T,N} + coeff=false, reluNanOpt=CUDNN_NOT_PROPAGATE_NAN, alpha=1, + beta=false) where {T,N} cudnnActivationBackward(handle(), ActivationDesc(mode, T(coeff), reluNanOpt), scalingParameter(T, alpha), TensorDesc( y), y, TensorDesc(dy), dy, diff --git a/lib/cudnn/tensor.jl b/lib/cudnn/tensor.jl index 5a67a2c29f..9a2cb9fd17 100644 --- a/lib/cudnn/tensor.jl +++ b/lib/cudnn/tensor.jl @@ -62,7 +62,7 @@ OpTensorDesc(op::cudnnOpTensorOp_t, a::DenseCuArray) = OpTensorDesc(op, eltype(a function cudnnOpTensor(op::cudnnOpTensorOp_t, A::DenseCuArray{T,N}, B::DenseCuArray{T,N}, C::DenseCuArray{T,N}; - alpha1=1, alpha2=1, beta=0) where {T,N} + alpha1=true, alpha2=true, beta=false) where {T,N} cudnnOpTensor(handle(), OpTensorDesc(op, T), scalingParameter(T, alpha1), TensorDesc(A), A, scalingParameter(T, alpha2), TensorDesc(B), B, @@ -113,7 +113,7 @@ end function cudnnReduceTensor(op::cudnnReduceTensorOp_t, A::DenseCuArray{T,N}, C::DenseCuArray{T,N}; - alpha=1, beta=0) where {T,N} + alpha=true, beta=false) where {T,N} # indices = Array{UInt64, 1}(undef, N) indicesSizeInBytes = cudnnGetReductionIndicesSize(op, A, C) @workspace size=@argout( From beffb7dc10572518b6fef9df5574bbf7276ce6d7 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Thu, 29 Oct 2020 12:54:12 +0100 Subject: [PATCH 2/2] Fix optimized activation broadcasts. --- lib/cudnn/nnlib.jl | 53 ++++++++++++++++++++++++++++++---------------- test/cudnn.jl | 15 +++++++++++++ 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/lib/cudnn/nnlib.jl b/lib/cudnn/nnlib.jl index 7f2c16098f..8624f33e81 100644 --- a/lib/cudnn/nnlib.jl +++ b/lib/cudnn/nnlib.jl @@ -233,24 +233,41 @@ meanpool!(y::DenseCuArray{T}, x::DenseCuArray{T}, pdims::PoolDims) where T<:CUDN # Activation -# in-place for x -Base.broadcasted(::typeof(NNlib.σ), x::DenseCuArray{T}) where {T<:CUDNNFloat} = - (cudnnActivationForward(reshape4D(x), mode=CUDNN_ACTIVATION_SIGMOID, coeff=0.0); return x) - -Base.broadcasted(::typeof(NNlib.relu), x::DenseCuArray{T}) where {T<:CUDNNFloat} = - (cudnnActivationForward(reshape4D(x), mode=CUDNN_ACTIVATION_RELU, coeff=0.0); return x) - -Base.broadcasted(::typeof(NNlib.tanh), x::DenseCuArray{T}) where {T<:CUDNNFloat} = - (cudnnActivationForward(reshape4D(x), mode=CUDNN_ACTIVATION_TANH, coeff=0.0); return x) - -Base.broadcasted(::typeof(NNlib.relu6), x::DenseCuArray{T}) where {T<:CUDNNFloat} = - (cudnnActivationForward(reshape4D(x), mode=CUDNN_ACTIVATION_CLIPPED_RELU, coeff=6.0); return x) +using Base.Broadcast + +for (f, op) in [ + CUDA.tanh => (src,dst)->cudnnActivationForward(reshape4D(src), reshape4D(dst), + mode=CUDNN_ACTIVATION_TANH), + NNlib.σ => (src,dst)->cudnnActivationForward(reshape4D(src), reshape4D(dst), + mode=CUDNN_ACTIVATION_SIGMOID), + NNlib.elu => (src,dst)->cudnnActivationForward(reshape4D(src), reshape4D(dst), + mode=CUDNN_ACTIVATION_ELU), + NNlib.relu => (src,dst)->cudnnActivationForward(reshape4D(src), reshape4D(dst), + mode=CUDNN_ACTIVATION_RELU), + NNlib.relu6 => (src,dst)->cudnnActivationForward(reshape4D(src), reshape4D(dst), + mode=CUDNN_ACTIVATION_CLIPPED_RELU, + coeff=6.0), + NNlib.leakyrelu => (src,dst)->cudnnOpTensor(CUDNN_OP_TENSOR_MAX, reshape4D(src), + reshape4D(src), reshape4D(dst), + alpha1=0.01)] + @eval begin + # in-place + function Base.materialize!(dst::DenseCuArray{<:CUDNNFloat}, + bc::Broadcast.Broadcasted{<:Any,<:Any,typeof($f),<:Tuple{DenseCuArray}}) + $op(bc.args[1], dst) + return dst + end -Base.broadcasted(::typeof(NNlib.elu), x::DenseCuArray{T}) where {T<:CUDNNFloat} = - (cudnnActivationForward(reshape4D(x), mode=CUDNN_ACTIVATION_ELU, coeff=1.0); return x) + # out of place + function Base.materialize(bc::Broadcast.Broadcasted{<:Any,<:Any,typeof($f),<:Tuple{DenseCuArray}}) + ElType = Broadcast.combine_eltypes(bc.f, bc.args) + dst = similar(bc, ElType) + $op(bc.args[1], dst) + return dst + end + end +end # CUDNN_ACTIVATION_IDENTITY does not work with cudnnActivationForward -Base.broadcasted(::typeof(NNlib.identity), x::DenseCuArray{T}) where {T<:CUDNNFloat} = x - -Base.broadcasted(::typeof(NNlib.leakyrelu), x::DenseCuArray{T}, a=T(0.01)) where {T<:CUDNNFloat} = - (cudnnOpTensor(CUDNN_OP_TENSOR_MAX, reshape4D(x), reshape4D(x), reshape4D(x), alpha1=a); return x) +# FIXME: put this optimization in GPUArrays' `copyto!` (like Base.Broadcast's `copyto!`) +Base.broadcasted(::typeof(identity), x::DenseCuArray{T}) where {T<:CUDNNFloat} = x diff --git a/test/cudnn.jl b/test/cudnn.jl index 30aec34c39..6e61831d6d 100644 --- a/test/cudnn.jl +++ b/test/cudnn.jl @@ -82,9 +82,24 @@ end @test testf(x -> f.(x), rand(Float64, dims)) end end + # softplus does not give `Inf` for large arguments x = CuArray([1000.]) @test all(softplus.(x) .== x) + + # optimized activation overwrote inputs + let + x = CUDA.ones(1) + @test Array(x) == [1f0] + tanh.(x) + @test Array(x) == [1f0] + y = tanh.(x) + @test Array(x) == [1f0] + @test Array(y) == [tanh(1f0)] + x .= tanh.(y) + @test Array(y) == [tanh(1f0)] + @test Array(x) == [tanh(tanh(1f0))] + end end @testset "Batchnorm" begin