From 886b34c604192044380a2d73a2a1e07d09e19a38 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Mon, 7 Mar 2022 11:18:05 -0500
Subject: [PATCH] Avoid `Rational` in activation function gradients (#399)

* avoid rational numbers

* move CUDA tests first, add overall testset

* NNLIB_TEST_CUDA: true for v1

* two more rationals
---
 .buildkite/pipeline.yml |   2 +
 Project.toml            |   2 +-
 src/activations.jl      |  16 ++---
 test/runtests.jl        | 125 ++++++++++++++++++++--------------------
 4 files changed, 75 insertions(+), 70 deletions(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index f805f8c43..d4229b4eb 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -36,6 +36,8 @@ steps:
     agents:
       queue: "juliagpu"
       cuda: "*"
+    env:
+      NNLIB_TEST_CUDA: true
     timeout_in_minutes: 60
 
   # - label: "GPU julia nightly"
diff --git a/Project.toml b/Project.toml
index 339cd1098..a2b4f6f13 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "NNlib"
 uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
-version = "0.8.3"
+version = "0.8.4"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
diff --git a/src/activations.jl b/src/activations.jl
index 9829d2386..456dd468a 100644
--- a/src/activations.jl
+++ b/src/activations.jl
@@ -195,14 +195,16 @@ julia> lineplot(x -> leakyrelu(x, 0.5), -2, 2, height=7)
            ⠀-2⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀2⠀       
            ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀x⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀       
 
-julia> leakyrelu(-10f0, 1//5)
+julia> leakyrelu(-10f0, 0.2)
 -2.0f0
 
-julia> leakyrelu(-10f0, 1//20)
+julia> leakyrelu(-10f0, 0.02)
 -0.5f0
 ```
 """
-leakyrelu(x, a=oftf(x, 0.01)) = ifelse(x>0, float(x), oftf(x, a*x))  # max(a*x, x) is 3x slower
+leakyrelu(x, a=oftf(x, leakyrelu_a)) = ifelse(x>0, float(x), oftf(x, a*x))  # max(a*x, x) is 3x slower
+
+const leakyrelu_a = 0.01  # also used in gradient below
 
 """
     relu6(x) = min(max(0, x), 6)
@@ -254,7 +256,7 @@ julia> extrema(rrelu.(fill(-10f0, 1000)))
 (-3.3316886f0, -1.2548422f0)
 ```
 """
-function rrelu(x::T, l=1//8, u=1//3) where T<:Number
+function rrelu(x::T, l=oftf(x,1/8), u=oftf(x,1/3)) where T<:Number
     a = (u - l) * rand(float(T)) + l
     return leakyrelu(x, a)
 end
@@ -402,7 +404,7 @@ julia> hardswish.(-5:5)'
 """
 @inline hardswish(x) = x * hardσ(x)
 
-deriv_hardswish(x) = ifelse(x < -3, oftf(x,0), ifelse(x > 3, oftf(x,1), x/3 + 1//2))
+deriv_hardswish(x) = ifelse(x < -3, oftf(x,0), ifelse(x > 3, oftf(x,1), x/3 + oftf(x,1/2)))
 
 """
     lisht(x) = x * tanh(x)
@@ -844,11 +846,11 @@ this replacement for some array or element types.
 UNARY_ACTS = [ # f, dfdx
     ## In the same order as above!
     (:σ,            :(conj(Ω * (1 - Ω)))),
-    (:hardσ,        :(ifelse((Ω>0)&(Ω<1), 1//6, 1//1))),
+    (:hardσ,        :(ifelse((Ω>0)&(Ω<1), oftf(Ω, 1/6), oftf(Ω, 1)))),
     (:logσ,         :(sigmoid_fast(-x))),
     (:hardtanh,     :((Ω>-1) & (Ω<1))),
     (:relu,         :(Ω > 0)),
-    (:leakyrelu,    :(ifelse(Ω > 0, 1//1, 1//100))),
+    (:leakyrelu,    :(ifelse(Ω > 0, oftf(Ω, 1), oftf(Ω, leakyrelu_a)))),
     (:relu6,        :((Ω>0) & (Ω<6))),
     # rrelu is random, can't write a rule.
     (:elu,          :(deriv_elu(Ω))),
diff --git a/test/runtests.jl b/test/runtests.jl
index e99122bef..6a9c9bd6c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -7,83 +7,84 @@ using Zygote: gradient
 using StableRNGs
 using CUDA
 
-if VERSION < v"1.6"
-    @info "skipping doctests, on Julia $VERSION"
-else
-    using Documenter
-    DocMeta.setdocmeta!(NNlib, :DocTestSetup, :(using NNlib, UnicodePlots); recursive=true)
-    @testset "Doctests" begin
-        doctest(NNlib, manual=false)
-    end
-end
-
 const rng = StableRNG(123)
-
 include("test_utils.jl")
 
-@testset "Activation Functions" begin
-    include("activations.jl")
-end
+@testset verbose=true "NNlib.jl" begin
+    if CUDA.functional()
+        if get(ENV, "NNLIB_TEST_CUDA", "false") == "true"
+            import Pkg
+            using NNlibCUDA
+            @testset "CUDA" begin
+                Pkg.test("NNlibCUDA")
+            end
+        else
+            @info "Skipping CUDA tests, set NNLIB_TEST_CUDA=true to run them"
+        end
+    else
+        @info "Insufficient version or CUDA not found; Skipping CUDA tests"
+    end
 
-@testset "Batched Multiplication" begin
-    include("batchedmul.jl")
-end
+    if VERSION < v"1.6"
+        @info "skipping doctests, on Julia $VERSION"
+    else
+        using Documenter
+        DocMeta.setdocmeta!(NNlib, :DocTestSetup, :(using NNlib, UnicodePlots); recursive=true)
+        @testset "Doctests" begin
+            doctest(NNlib, manual=false)
+        end
+    end
 
-@testset "Convolution" begin
-    include("conv.jl")
-    include("conv_bias_act.jl")
-end
+    @testset "Activation Functions" begin
+        include("activations.jl")
+    end
 
-@testset "Inference" begin
-    include("inference.jl")
-end
+    @testset "Batched Multiplication" begin
+        include("batchedmul.jl")
+    end
 
-@testset "Pooling" begin
-    include("pooling.jl")
-end
+    @testset "Convolution" begin
+        include("conv.jl")
+        include("conv_bias_act.jl")
+    end
 
-@testset "Padding" begin
-    include("padding.jl")
-end
+    @testset "Inference" begin
+        include("inference.jl")
+    end
 
-@testset "Softmax" begin
-    include("softmax.jl")
-end
+    @testset "Pooling" begin
+        include("pooling.jl")
+    end
 
-@testset "Upsampling" begin
-    include("upsample.jl")
-end
+    @testset "Padding" begin
+        include("padding.jl")
+    end
 
-@testset "Gather" begin
-    include("gather.jl")
-end
+    @testset "Softmax" begin
+        include("softmax.jl")
+    end
 
-@testset "Scatter" begin
-    include("scatter.jl")
-end
+    @testset "Upsampling" begin
+        include("upsample.jl")
+    end
 
-@testset "Utilities" begin
-    include("utils.jl")
-end
+    @testset "Gather" begin
+        include("gather.jl")
+    end
 
-@testset "Grid Sampling" begin
-    include("sampling.jl")
-end
+    @testset "Scatter" begin
+        include("scatter.jl")
+    end
 
-@testset "Functions" begin
-    include("functions.jl")
-end
+    @testset "Utilities" begin
+        include("utils.jl")
+    end
 
-if VERSION >= v"1.6" && CUDA.functional()
-    if get(ENV, "NNLIB_TEST_CUDA", "false") == "true"
-        import Pkg
-        using NNlibCUDA
-        @testset "CUDA" begin
-            Pkg.test("NNlibCUDA")
-        end
-    else
-        @info "Skipping CUDA tests, set NNLIB_TEST_CUDA=true to run them"
+    @testset "Grid Sampling" begin
+        include("sampling.jl")
+    end
+
+    @testset "Functions" begin
+        include("functions.jl")
     end
-else
-    @info "Insufficient version or CUDA not found; Skipping CUDA tests"
 end