Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix CUDNN-optimized activation broadcasts #515

Merged
merged 2 commits into from
Oct 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions lib/cudnn/activation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,19 @@ end

function cudnnActivationForward(x::DenseCuArray{T,N}, y::DenseCuArray{T,N}=x;
mode=CUDNN_ACTIVATION_RELU, # CUDNN_ACTIVATION_IDENTITY will not work
coeff=0.0, reluNanOpt=CUDNN_NOT_PROPAGATE_NAN, alpha=1, beta=0) where {T,N}
coeff=false, reluNanOpt=CUDNN_NOT_PROPAGATE_NAN, alpha=true,
beta=false) where {T,N}
cudnnActivationForward(handle(), ActivationDesc(mode, T(coeff), reluNanOpt),
scalingParameter(T, alpha), TensorDesc(x), x,
scalingParameter(T, beta ), TensorDesc(y), y)
return y
end

function cudnnActivationBackward(x::DenseCuArray{T,N}, dx::DenseCuArray{T,N}, y::DenseCuArray{T,N}, dy::DenseCuArray{T,N}=dx;
function cudnnActivationBackward(x::DenseCuArray{T,N}, dx::DenseCuArray{T,N},
y::DenseCuArray{T,N}, dy::DenseCuArray{T,N}=dx;
mode=CUDNN_ACTIVATION_RELU, # CUDNN_ACTIVATION_IDENTITY will not work
coeff=0.0, reluNanOpt=CUDNN_NOT_PROPAGATE_NAN, alpha=1, beta=0) where {T,N}
coeff=false, reluNanOpt=CUDNN_NOT_PROPAGATE_NAN, alpha=1,
beta=false) where {T,N}
cudnnActivationBackward(handle(), ActivationDesc(mode, T(coeff), reluNanOpt),
scalingParameter(T, alpha), TensorDesc( y), y,
TensorDesc(dy), dy,
Expand Down
53 changes: 35 additions & 18 deletions lib/cudnn/nnlib.jl
Original file line number Diff line number Diff line change
Expand Up @@ -233,24 +233,41 @@ meanpool!(y::DenseCuArray{T}, x::DenseCuArray{T}, pdims::PoolDims) where T<:CUDN

# Activation

# in-place for x
Base.broadcasted(::typeof(NNlib.σ), x::DenseCuArray{T}) where {T<:CUDNNFloat} =
(cudnnActivationForward(reshape4D(x), mode=CUDNN_ACTIVATION_SIGMOID, coeff=0.0); return x)

Base.broadcasted(::typeof(NNlib.relu), x::DenseCuArray{T}) where {T<:CUDNNFloat} =
(cudnnActivationForward(reshape4D(x), mode=CUDNN_ACTIVATION_RELU, coeff=0.0); return x)

Base.broadcasted(::typeof(NNlib.tanh), x::DenseCuArray{T}) where {T<:CUDNNFloat} =
(cudnnActivationForward(reshape4D(x), mode=CUDNN_ACTIVATION_TANH, coeff=0.0); return x)

Base.broadcasted(::typeof(NNlib.relu6), x::DenseCuArray{T}) where {T<:CUDNNFloat} =
(cudnnActivationForward(reshape4D(x), mode=CUDNN_ACTIVATION_CLIPPED_RELU, coeff=6.0); return x)
using Base.Broadcast

for (f, op) in [
CUDA.tanh => (src,dst)->cudnnActivationForward(reshape4D(src), reshape4D(dst),
mode=CUDNN_ACTIVATION_TANH),
NNlib.σ => (src,dst)->cudnnActivationForward(reshape4D(src), reshape4D(dst),
mode=CUDNN_ACTIVATION_SIGMOID),
NNlib.elu => (src,dst)->cudnnActivationForward(reshape4D(src), reshape4D(dst),
mode=CUDNN_ACTIVATION_ELU),
NNlib.relu => (src,dst)->cudnnActivationForward(reshape4D(src), reshape4D(dst),
mode=CUDNN_ACTIVATION_RELU),
NNlib.relu6 => (src,dst)->cudnnActivationForward(reshape4D(src), reshape4D(dst),
mode=CUDNN_ACTIVATION_CLIPPED_RELU,
coeff=6.0),
NNlib.leakyrelu => (src,dst)->cudnnOpTensor(CUDNN_OP_TENSOR_MAX, reshape4D(src),
reshape4D(src), reshape4D(dst),
alpha1=0.01)]
@eval begin
# in-place
function Base.materialize!(dst::DenseCuArray{<:CUDNNFloat},
bc::Broadcast.Broadcasted{<:Any,<:Any,typeof($f),<:Tuple{DenseCuArray}})
$op(bc.args[1], dst)
return dst
end

Base.broadcasted(::typeof(NNlib.elu), x::DenseCuArray{T}) where {T<:CUDNNFloat} =
(cudnnActivationForward(reshape4D(x), mode=CUDNN_ACTIVATION_ELU, coeff=1.0); return x)
# out of place
function Base.materialize(bc::Broadcast.Broadcasted{<:Any,<:Any,typeof($f),<:Tuple{DenseCuArray}})
ElType = Broadcast.combine_eltypes(bc.f, bc.args)
dst = similar(bc, ElType)
$op(bc.args[1], dst)
return dst
end
end
end

# CUDNN_ACTIVATION_IDENTITY does not work with cudnnActivationForward
Base.broadcasted(::typeof(NNlib.identity), x::DenseCuArray{T}) where {T<:CUDNNFloat} = x

Base.broadcasted(::typeof(NNlib.leakyrelu), x::DenseCuArray{T}, a=T(0.01)) where {T<:CUDNNFloat} =
(cudnnOpTensor(CUDNN_OP_TENSOR_MAX, reshape4D(x), reshape4D(x), reshape4D(x), alpha1=a); return x)
# FIXME: put this optimization in GPUArrays' `copyto!` (like Base.Broadcast's `copyto!`)
Base.broadcasted(::typeof(identity), x::DenseCuArray{T}) where {T<:CUDNNFloat} = x
4 changes: 2 additions & 2 deletions lib/cudnn/tensor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ OpTensorDesc(op::cudnnOpTensorOp_t, a::DenseCuArray) = OpTensorDesc(op, eltype(a

function cudnnOpTensor(op::cudnnOpTensorOp_t,
A::DenseCuArray{T,N}, B::DenseCuArray{T,N}, C::DenseCuArray{T,N};
alpha1=1, alpha2=1, beta=0) where {T,N}
alpha1=true, alpha2=true, beta=false) where {T,N}
cudnnOpTensor(handle(), OpTensorDesc(op, T),
scalingParameter(T, alpha1), TensorDesc(A), A,
scalingParameter(T, alpha2), TensorDesc(B), B,
Expand Down Expand Up @@ -113,7 +113,7 @@ end

function cudnnReduceTensor(op::cudnnReduceTensorOp_t,
A::DenseCuArray{T,N}, C::DenseCuArray{T,N};
alpha=1, beta=0) where {T,N}
alpha=true, beta=false) where {T,N}
# indices = Array{UInt64, 1}(undef, N)
indicesSizeInBytes = cudnnGetReductionIndicesSize(op, A, C)
@workspace size=@argout(
Expand Down
15 changes: 15 additions & 0 deletions test/cudnn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,24 @@ end
@test testf(x -> f.(x), rand(Float64, dims))
end
end

# softplus does not give `Inf` for large arguments
x = CuArray([1000.])
@test all(softplus.(x) .== x)

# optimized activation overwrote inputs
let
x = CUDA.ones(1)
@test Array(x) == [1f0]
tanh.(x)
@test Array(x) == [1f0]
y = tanh.(x)
@test Array(x) == [1f0]
@test Array(y) == [tanh(1f0)]
x .= tanh.(y)
@test Array(y) == [tanh(1f0)]
@test Array(x) == [tanh(tanh(1f0))]
end
end

@testset "Batchnorm" begin
Expand Down