From fd22bc35fbabeda32b2f516ef29f44b5775a389d Mon Sep 17 00:00:00 2001 From: "jeremie.db" Date: Fri, 22 Jan 2021 17:11:34 -0500 Subject: [PATCH 1/2] fix broken tests on CUDA RNN --- test/cuda/curnn.jl | 9 +- test/rnn-demo.jl | 285 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 289 insertions(+), 5 deletions(-) create mode 100644 test/rnn-demo.jl diff --git a/test/cuda/curnn.jl b/test/cuda/curnn.jl index 8dba7c0e57..63a5f93ada 100644 --- a/test/cuda/curnn.jl +++ b/test/cuda/curnn.jl @@ -1,5 +1,4 @@ using Flux, CUDA, Test -using Flux: pullback @testset for R in [RNN, GRU, LSTM] m = R(10, 5) |> gpu @@ -9,7 +8,7 @@ using Flux: pullback θ = gradient(() -> sum(m(x)), params(m)) @test x isa CuArray @test θ[m.cell.Wi] isa CuArray - @test_broken collect(m̄[].cell[].Wi) == collect(θ[m.cell.Wi]) + @test collect(m̄[].cell.Wi) == collect(θ[m.cell.Wi]) end @testset "RNN" begin @@ -34,9 +33,9 @@ end cum̄, cux̄ = cuback(gpu(ȳ)) @test x̄ ≈ collect(cux̄) - @test_broken m̄[].cell[].Wi ≈ collect(cum̄[].cell[].Wi) - @test_broken m̄[].cell[].Wh ≈ collect(cum̄[].cell[].Wh) - @test_broken m̄[].cell[].b ≈ collect(cum̄[].cell[].b) + @test m̄[].cell.Wi ≈ collect(cum̄[].cell.Wi) + @test m̄[].cell.Wh ≈ collect(cum̄[].cell.Wh) + @test m̄[].cell.b ≈ collect(cum̄[].cell.b) if m̄[].state isa Tuple for (x, cx) in zip(m̄[].state, cum̄[].state) @test x ≈ collect(cx) diff --git a/test/rnn-demo.jl b/test/rnn-demo.jl new file mode 100644 index 0000000000..791d6e0b67 --- /dev/null +++ b/test/rnn-demo.jl @@ -0,0 +1,285 @@ +using Revise +using Flux +using Flux: @functor +import Flux: trainable +using Statistics: mean +using Random: seed! + + +mutable struct Recur2{T,S} + cell::T + state::S +end + +# original definition +# function (m::Recur2)(xs...) +# m.state, y = m.cell(m.state, xs...) +# return y +# end + +# new def +function (m::Recur2)(xs...) + m.state, y = m.cell(m.state, xs...) + return y +end + +@functor Recur2 +trainable(a::Recur2) = (a.cell,) + +##################################### +# Basic test +##################################### +seed!(123) +feat = 3 +h_size = 5 +seq_len = 7 +batch_size = 4 + +X = [rand(Float32, feat, batch_size) for i in 1:seq_len] +Y = rand(Float32, batch_size, seq_len) ./ 10 + +cell = Flux.RNNCell(feat, h_size) +rnn = Recur2(cell, cell.state0) + +rnn(X[1]) +rnn.state +rnn(X[1]) + +rnn.(X) + +function fold_test_1(x, m) + foldl((a, b) -> m(b), x) +end +fold_test_1(X, rnn) + +rnn.(X) + +function rnn2(x) + # println((x)) + println("state: ", rnn.state) + rnn(x) +end +function fold_test_2(x) + foldl((a, b) -> rnn(b), x, init=x[1]) +end +fold_test_2(X) +rnn.state + +function fold_cell_1(x, c) + foldl((a, b) -> cell(a, b)[1], x, init=cell.state0) +end +fold_cell_1(X, cell) +rnn.state + + +f1(x) = begin + println(x) + x^2 +end + +function fold_test_2(x) + foldl((a, b) -> f1(b), x, init=5) +end +x1 = fold_test_2([2,3]) + +# rnn = Chain( +# RNN(feat, h_size), +# Dense(h_size, 1, σ), +# x -> reshape(x, :)) + + +#### transfer to gpu #### +rnn_gpu = rnn |> gpu +X_gpu = gpu(X) +Y_gpu = gpu(Y) + +θ = Flux.params(rnn) +θ_gpu = Flux.params(rnn_gpu) +length(θ) +length(θ_gpu) +function loss(x, y) + Flux.reset!(rnn) + l = mean((Flux.stack(map(rnn, x), 2) .- y).^2) + return l +end +function loss_gpu(x, y) + Flux.reset!(rnn_gpu) + l = mean((Flux.stack(map(rnn_gpu, x), 2) .- y).^2) + return l +end + +opt = ADAM(1e-3) +opt_gpu = ADAM(1e-3) +for i in 1:5 + println("iter: ", i) + Flux.train!(loss, θ, [(X, Y)], opt) + Flux.train!(loss_gpu, θ_gpu, [(X_gpu, Y_gpu)], opt_gpu) + println("loss_cpu: ", loss(X, Y)) + println("loss_gpu: ", loss_gpu(X_gpu, Y_gpu)) + # println("θ[3][1:2]: ", θ[3][1:2]) + # println("θ_gpu[3][1:2]: ", θ_gpu[3][1:2]) + # println("θ[4][1:2]: ", θ[4][1:2]) + # println("θ_gpu[4][1:2]: ", θ_gpu[4][1:2]) + # println("rnn.layers[1].state[1:2]: ", rnn.layers[1].state[1:2]) + # println("rnn_gpu.layers[1].state[1:2]: ", rnn_gpu.layers[1].state[1:2]) +end + +@code_warntype rnn(X[1]) + +function speed_cpu(n=10) + for i in 1:n + Flux.train!(loss, θ, [(X, Y)], opt) + end + return loss(X, Y) +end + +function speed_gpu(n=10) + for i in 1:n + Flux.train!(loss_gpu, θ_gpu, [(X_gpu, Y_gpu)], opt_gpu) + end + return loss_gpu(X_gpu, Y_gpu) +end + +@time speed_cpu(100) +@time speed_gpu(100) + + +##################################### +# RNN vanilla +##################################### +seed!(123) +feat = 32 +h_size = 64 +seq_len = 50 +batch_size = 256 + +rnn = Chain( + RNN(feat, h_size), + Dense(h_size, 1, σ), + x -> reshape(x, :)) + +X = [rand(Float32, feat, batch_size) for i in 1:seq_len] +Y = rand(Float32, batch_size, seq_len) ./ 10 + +#### transfer to gpu #### +rnn_gpu = rnn |> gpu +X_gpu = gpu(X) +Y_gpu = gpu(Y) + +θ = Flux.params(rnn) +θ_gpu = Flux.params(rnn_gpu) +length(θ) +length(θ_gpu) +function loss(x, y) + Flux.reset!(rnn) + l = mean((Flux.stack(map(rnn, x), 2) .- y).^2) + return l +end +function loss_gpu(x, y) + Flux.reset!(rnn_gpu) + l = mean((Flux.stack(map(rnn_gpu, x), 2) .- y).^2) + return l +end + +opt = ADAM(1e-3) +opt_gpu = ADAM(1e-3) +for i in 1:5 + println("iter: ", i) + Flux.train!(loss, θ, [(X, Y)], opt) + Flux.train!(loss_gpu, θ_gpu, [(X_gpu, Y_gpu)], opt_gpu) + println("loss_cpu: ", loss(X, Y)) + println("loss_gpu: ", loss_gpu(X_gpu, Y_gpu)) + # println("θ[3][1:2]: ", θ[3][1:2]) + # println("θ_gpu[3][1:2]: ", θ_gpu[3][1:2]) + # println("θ[4][1:2]: ", θ[4][1:2]) + # println("θ_gpu[4][1:2]: ", θ_gpu[4][1:2]) + # println("rnn.layers[1].state[1:2]: ", rnn.layers[1].state[1:2]) + # println("rnn_gpu.layers[1].state[1:2]: ", rnn_gpu.layers[1].state[1:2]) +end + +@code_warntype rnn(X[1]) + +function speed_cpu(n=10) + for i in 1:n + Flux.train!(loss, θ, [(X, Y)], opt) + end + return loss(X, Y) +end + +function speed_gpu(n=10) + for i in 1:n + Flux.train!(loss_gpu, θ_gpu, [(X_gpu, Y_gpu)], opt_gpu) + end + return loss_gpu(X_gpu, Y_gpu) +end + +@time speed_cpu(100) +@time speed_gpu(100) + +##################################### +# LSTM +##################################### +feat = 32 +h_size = 64 +seq_len = 50 +batch_size = 256 + +rnn = Chain(LSTM(feat, h_size), + LSTM(h_size, h_size), + LSTM(h_size, h_size), + Dense(h_size, 1, σ), + x -> reshape(x, :)) + +X = [rand(Float32, feat, batch_size) for i in 1:seq_len] +Y = rand(Float32, batch_size, seq_len) ./ 10 + +#### transfer to gpu #### +rnn_gpu = rnn |> gpu +X_gpu = gpu(X) +Y_gpu = gpu(Y) + +θ = Flux.params(rnn) +θ_gpu = Flux.params(rnn_gpu) +function loss(x, y) + Flux.reset!(rnn) + l = mean((Flux.stack(map(rnn, x), 2) .- y).^2) + return l +end +function loss_gpu(x, y) + Flux.reset!(rnn_gpu) + l = mean((Flux.stack(map(rnn_gpu, x), 2) .- y).^2) + return l +end + +opt = ADAM(1e-3) +opt_gpu = ADAM(1e-3) + +for i in 1:5 + println("iter: ", i) + Flux.train!(loss, θ, [(X, Y)], opt) + Flux.train!(loss_gpu, θ_gpu, [(X_gpu, Y_gpu)], opt_gpu) + println("loss_cpu: ", loss(X, Y)) + println("loss_gpu: ", loss_gpu(X_gpu, Y_gpu)) +end + + +function speed_cpu(n=10) + for i in 1:n + Flux.train!(loss, θ, [(X, Y)], opt) + end + return loss(X, Y) +end + +function speed_gpu(n=10) + for i in 1:n + Flux.train!(loss_gpu, θ_gpu, [(X_gpu, Y_gpu)], opt_gpu) + end + return loss_gpu(X_gpu, Y_gpu) +end + +@code_warntype rnn(X[1]) + +using BenchmarkTools +@time speed_cpu(100) +@btime speed_gpu(100) + From 0b147d8b80bee614c1db1ff3b234f8b3e5c6d056 Mon Sep 17 00:00:00 2001 From: "jeremie.db" Date: Fri, 22 Jan 2021 17:12:49 -0500 Subject: [PATCH 2/2] fix broken tests on CUDA RNN --- test/rnn-demo.jl | 285 ----------------------------------------------- 1 file changed, 285 deletions(-) delete mode 100644 test/rnn-demo.jl diff --git a/test/rnn-demo.jl b/test/rnn-demo.jl deleted file mode 100644 index 791d6e0b67..0000000000 --- a/test/rnn-demo.jl +++ /dev/null @@ -1,285 +0,0 @@ -using Revise -using Flux -using Flux: @functor -import Flux: trainable -using Statistics: mean -using Random: seed! - - -mutable struct Recur2{T,S} - cell::T - state::S -end - -# original definition -# function (m::Recur2)(xs...) -# m.state, y = m.cell(m.state, xs...) -# return y -# end - -# new def -function (m::Recur2)(xs...) - m.state, y = m.cell(m.state, xs...) - return y -end - -@functor Recur2 -trainable(a::Recur2) = (a.cell,) - -##################################### -# Basic test -##################################### -seed!(123) -feat = 3 -h_size = 5 -seq_len = 7 -batch_size = 4 - -X = [rand(Float32, feat, batch_size) for i in 1:seq_len] -Y = rand(Float32, batch_size, seq_len) ./ 10 - -cell = Flux.RNNCell(feat, h_size) -rnn = Recur2(cell, cell.state0) - -rnn(X[1]) -rnn.state -rnn(X[1]) - -rnn.(X) - -function fold_test_1(x, m) - foldl((a, b) -> m(b), x) -end -fold_test_1(X, rnn) - -rnn.(X) - -function rnn2(x) - # println((x)) - println("state: ", rnn.state) - rnn(x) -end -function fold_test_2(x) - foldl((a, b) -> rnn(b), x, init=x[1]) -end -fold_test_2(X) -rnn.state - -function fold_cell_1(x, c) - foldl((a, b) -> cell(a, b)[1], x, init=cell.state0) -end -fold_cell_1(X, cell) -rnn.state - - -f1(x) = begin - println(x) - x^2 -end - -function fold_test_2(x) - foldl((a, b) -> f1(b), x, init=5) -end -x1 = fold_test_2([2,3]) - -# rnn = Chain( -# RNN(feat, h_size), -# Dense(h_size, 1, σ), -# x -> reshape(x, :)) - - -#### transfer to gpu #### -rnn_gpu = rnn |> gpu -X_gpu = gpu(X) -Y_gpu = gpu(Y) - -θ = Flux.params(rnn) -θ_gpu = Flux.params(rnn_gpu) -length(θ) -length(θ_gpu) -function loss(x, y) - Flux.reset!(rnn) - l = mean((Flux.stack(map(rnn, x), 2) .- y).^2) - return l -end -function loss_gpu(x, y) - Flux.reset!(rnn_gpu) - l = mean((Flux.stack(map(rnn_gpu, x), 2) .- y).^2) - return l -end - -opt = ADAM(1e-3) -opt_gpu = ADAM(1e-3) -for i in 1:5 - println("iter: ", i) - Flux.train!(loss, θ, [(X, Y)], opt) - Flux.train!(loss_gpu, θ_gpu, [(X_gpu, Y_gpu)], opt_gpu) - println("loss_cpu: ", loss(X, Y)) - println("loss_gpu: ", loss_gpu(X_gpu, Y_gpu)) - # println("θ[3][1:2]: ", θ[3][1:2]) - # println("θ_gpu[3][1:2]: ", θ_gpu[3][1:2]) - # println("θ[4][1:2]: ", θ[4][1:2]) - # println("θ_gpu[4][1:2]: ", θ_gpu[4][1:2]) - # println("rnn.layers[1].state[1:2]: ", rnn.layers[1].state[1:2]) - # println("rnn_gpu.layers[1].state[1:2]: ", rnn_gpu.layers[1].state[1:2]) -end - -@code_warntype rnn(X[1]) - -function speed_cpu(n=10) - for i in 1:n - Flux.train!(loss, θ, [(X, Y)], opt) - end - return loss(X, Y) -end - -function speed_gpu(n=10) - for i in 1:n - Flux.train!(loss_gpu, θ_gpu, [(X_gpu, Y_gpu)], opt_gpu) - end - return loss_gpu(X_gpu, Y_gpu) -end - -@time speed_cpu(100) -@time speed_gpu(100) - - -##################################### -# RNN vanilla -##################################### -seed!(123) -feat = 32 -h_size = 64 -seq_len = 50 -batch_size = 256 - -rnn = Chain( - RNN(feat, h_size), - Dense(h_size, 1, σ), - x -> reshape(x, :)) - -X = [rand(Float32, feat, batch_size) for i in 1:seq_len] -Y = rand(Float32, batch_size, seq_len) ./ 10 - -#### transfer to gpu #### -rnn_gpu = rnn |> gpu -X_gpu = gpu(X) -Y_gpu = gpu(Y) - -θ = Flux.params(rnn) -θ_gpu = Flux.params(rnn_gpu) -length(θ) -length(θ_gpu) -function loss(x, y) - Flux.reset!(rnn) - l = mean((Flux.stack(map(rnn, x), 2) .- y).^2) - return l -end -function loss_gpu(x, y) - Flux.reset!(rnn_gpu) - l = mean((Flux.stack(map(rnn_gpu, x), 2) .- y).^2) - return l -end - -opt = ADAM(1e-3) -opt_gpu = ADAM(1e-3) -for i in 1:5 - println("iter: ", i) - Flux.train!(loss, θ, [(X, Y)], opt) - Flux.train!(loss_gpu, θ_gpu, [(X_gpu, Y_gpu)], opt_gpu) - println("loss_cpu: ", loss(X, Y)) - println("loss_gpu: ", loss_gpu(X_gpu, Y_gpu)) - # println("θ[3][1:2]: ", θ[3][1:2]) - # println("θ_gpu[3][1:2]: ", θ_gpu[3][1:2]) - # println("θ[4][1:2]: ", θ[4][1:2]) - # println("θ_gpu[4][1:2]: ", θ_gpu[4][1:2]) - # println("rnn.layers[1].state[1:2]: ", rnn.layers[1].state[1:2]) - # println("rnn_gpu.layers[1].state[1:2]: ", rnn_gpu.layers[1].state[1:2]) -end - -@code_warntype rnn(X[1]) - -function speed_cpu(n=10) - for i in 1:n - Flux.train!(loss, θ, [(X, Y)], opt) - end - return loss(X, Y) -end - -function speed_gpu(n=10) - for i in 1:n - Flux.train!(loss_gpu, θ_gpu, [(X_gpu, Y_gpu)], opt_gpu) - end - return loss_gpu(X_gpu, Y_gpu) -end - -@time speed_cpu(100) -@time speed_gpu(100) - -##################################### -# LSTM -##################################### -feat = 32 -h_size = 64 -seq_len = 50 -batch_size = 256 - -rnn = Chain(LSTM(feat, h_size), - LSTM(h_size, h_size), - LSTM(h_size, h_size), - Dense(h_size, 1, σ), - x -> reshape(x, :)) - -X = [rand(Float32, feat, batch_size) for i in 1:seq_len] -Y = rand(Float32, batch_size, seq_len) ./ 10 - -#### transfer to gpu #### -rnn_gpu = rnn |> gpu -X_gpu = gpu(X) -Y_gpu = gpu(Y) - -θ = Flux.params(rnn) -θ_gpu = Flux.params(rnn_gpu) -function loss(x, y) - Flux.reset!(rnn) - l = mean((Flux.stack(map(rnn, x), 2) .- y).^2) - return l -end -function loss_gpu(x, y) - Flux.reset!(rnn_gpu) - l = mean((Flux.stack(map(rnn_gpu, x), 2) .- y).^2) - return l -end - -opt = ADAM(1e-3) -opt_gpu = ADAM(1e-3) - -for i in 1:5 - println("iter: ", i) - Flux.train!(loss, θ, [(X, Y)], opt) - Flux.train!(loss_gpu, θ_gpu, [(X_gpu, Y_gpu)], opt_gpu) - println("loss_cpu: ", loss(X, Y)) - println("loss_gpu: ", loss_gpu(X_gpu, Y_gpu)) -end - - -function speed_cpu(n=10) - for i in 1:n - Flux.train!(loss, θ, [(X, Y)], opt) - end - return loss(X, Y) -end - -function speed_gpu(n=10) - for i in 1:n - Flux.train!(loss_gpu, θ_gpu, [(X_gpu, Y_gpu)], opt_gpu) - end - return loss_gpu(X_gpu, Y_gpu) -end - -@code_warntype rnn(X[1]) - -using BenchmarkTools -@time speed_cpu(100) -@btime speed_gpu(100) -