From 886b34c604192044380a2d73a2a1e07d09e19a38 Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Mon, 7 Mar 2022 11:18:05 -0500 Subject: [PATCH] Avoid `Rational` in activation function gradients (#399) * avoid rational numbers * move CUDA tests first, add overall testset * NNLIB_TEST_CUDA: true for v1 * two more rationals --- .buildkite/pipeline.yml | 2 + Project.toml | 2 +- src/activations.jl | 16 ++--- test/runtests.jl | 125 ++++++++++++++++++++-------------------- 4 files changed, 75 insertions(+), 70 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index f805f8c43..d4229b4eb 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -36,6 +36,8 @@ steps: agents: queue: "juliagpu" cuda: "*" + env: + NNLIB_TEST_CUDA: true timeout_in_minutes: 60 # - label: "GPU julia nightly" diff --git a/Project.toml b/Project.toml index 339cd1098..a2b4f6f13 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "NNlib" uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -version = "0.8.3" +version = "0.8.4" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" diff --git a/src/activations.jl b/src/activations.jl index 9829d2386..456dd468a 100644 --- a/src/activations.jl +++ b/src/activations.jl @@ -195,14 +195,16 @@ julia> lineplot(x -> leakyrelu(x, 0.5), -2, 2, height=7) ⠀-2⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀2⠀ ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀x⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ -julia> leakyrelu(-10f0, 1//5) +julia> leakyrelu(-10f0, 0.2) -2.0f0 -julia> leakyrelu(-10f0, 1//20) +julia> leakyrelu(-10f0, 0.02) -0.5f0 ``` """ -leakyrelu(x, a=oftf(x, 0.01)) = ifelse(x>0, float(x), oftf(x, a*x)) # max(a*x, x) is 3x slower +leakyrelu(x, a=oftf(x, leakyrelu_a)) = ifelse(x>0, float(x), oftf(x, a*x)) # max(a*x, x) is 3x slower + +const leakyrelu_a = 0.01 # also used in gradient below """ relu6(x) = min(max(0, x), 6) @@ -254,7 +256,7 @@ julia> extrema(rrelu.(fill(-10f0, 1000))) (-3.3316886f0, -1.2548422f0) ``` """ -function rrelu(x::T, l=1//8, u=1//3) where T<:Number +function rrelu(x::T, l=oftf(x,1/8), u=oftf(x,1/3)) where T<:Number a = (u - l) * rand(float(T)) + l return leakyrelu(x, a) end @@ -402,7 +404,7 @@ julia> hardswish.(-5:5)' """ @inline hardswish(x) = x * hardσ(x) -deriv_hardswish(x) = ifelse(x < -3, oftf(x,0), ifelse(x > 3, oftf(x,1), x/3 + 1//2)) +deriv_hardswish(x) = ifelse(x < -3, oftf(x,0), ifelse(x > 3, oftf(x,1), x/3 + oftf(x,1/2))) """ lisht(x) = x * tanh(x) @@ -844,11 +846,11 @@ this replacement for some array or element types. UNARY_ACTS = [ # f, dfdx ## In the same order as above! (:σ, :(conj(Ω * (1 - Ω)))), - (:hardσ, :(ifelse((Ω>0)&(Ω<1), 1//6, 1//1))), + (:hardσ, :(ifelse((Ω>0)&(Ω<1), oftf(Ω, 1/6), oftf(Ω, 1)))), (:logσ, :(sigmoid_fast(-x))), (:hardtanh, :((Ω>-1) & (Ω<1))), (:relu, :(Ω > 0)), - (:leakyrelu, :(ifelse(Ω > 0, 1//1, 1//100))), + (:leakyrelu, :(ifelse(Ω > 0, oftf(Ω, 1), oftf(Ω, leakyrelu_a)))), (:relu6, :((Ω>0) & (Ω<6))), # rrelu is random, can't write a rule. (:elu, :(deriv_elu(Ω))), diff --git a/test/runtests.jl b/test/runtests.jl index e99122bef..6a9c9bd6c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,83 +7,84 @@ using Zygote: gradient using StableRNGs using CUDA -if VERSION < v"1.6" - @info "skipping doctests, on Julia $VERSION" -else - using Documenter - DocMeta.setdocmeta!(NNlib, :DocTestSetup, :(using NNlib, UnicodePlots); recursive=true) - @testset "Doctests" begin - doctest(NNlib, manual=false) - end -end - const rng = StableRNG(123) - include("test_utils.jl") -@testset "Activation Functions" begin - include("activations.jl") -end +@testset verbose=true "NNlib.jl" begin + if CUDA.functional() + if get(ENV, "NNLIB_TEST_CUDA", "false") == "true" + import Pkg + using NNlibCUDA + @testset "CUDA" begin + Pkg.test("NNlibCUDA") + end + else + @info "Skipping CUDA tests, set NNLIB_TEST_CUDA=true to run them" + end + else + @info "Insufficient version or CUDA not found; Skipping CUDA tests" + end -@testset "Batched Multiplication" begin - include("batchedmul.jl") -end + if VERSION < v"1.6" + @info "skipping doctests, on Julia $VERSION" + else + using Documenter + DocMeta.setdocmeta!(NNlib, :DocTestSetup, :(using NNlib, UnicodePlots); recursive=true) + @testset "Doctests" begin + doctest(NNlib, manual=false) + end + end -@testset "Convolution" begin - include("conv.jl") - include("conv_bias_act.jl") -end + @testset "Activation Functions" begin + include("activations.jl") + end -@testset "Inference" begin - include("inference.jl") -end + @testset "Batched Multiplication" begin + include("batchedmul.jl") + end -@testset "Pooling" begin - include("pooling.jl") -end + @testset "Convolution" begin + include("conv.jl") + include("conv_bias_act.jl") + end -@testset "Padding" begin - include("padding.jl") -end + @testset "Inference" begin + include("inference.jl") + end -@testset "Softmax" begin - include("softmax.jl") -end + @testset "Pooling" begin + include("pooling.jl") + end -@testset "Upsampling" begin - include("upsample.jl") -end + @testset "Padding" begin + include("padding.jl") + end -@testset "Gather" begin - include("gather.jl") -end + @testset "Softmax" begin + include("softmax.jl") + end -@testset "Scatter" begin - include("scatter.jl") -end + @testset "Upsampling" begin + include("upsample.jl") + end -@testset "Utilities" begin - include("utils.jl") -end + @testset "Gather" begin + include("gather.jl") + end -@testset "Grid Sampling" begin - include("sampling.jl") -end + @testset "Scatter" begin + include("scatter.jl") + end -@testset "Functions" begin - include("functions.jl") -end + @testset "Utilities" begin + include("utils.jl") + end -if VERSION >= v"1.6" && CUDA.functional() - if get(ENV, "NNLIB_TEST_CUDA", "false") == "true" - import Pkg - using NNlibCUDA - @testset "CUDA" begin - Pkg.test("NNlibCUDA") - end - else - @info "Skipping CUDA tests, set NNLIB_TEST_CUDA=true to run them" + @testset "Grid Sampling" begin + include("sampling.jl") + end + + @testset "Functions" begin + include("functions.jl") end -else - @info "Insufficient version or CUDA not found; Skipping CUDA tests" end