FluxML · CarloLucibello · Oct 26, 2024 · Oct 22, 2024 · Oct 26, 2024
diff --git a/Project.toml b/Project.toml
@@ -48,7 +48,7 @@ ChainRulesCore = "1.12"
 Compat = "4.10.0"
 Enzyme = "0.12, 0.13"
 Functors = "0.4"
-MLDataDevices = "1.4.0"
+MLDataDevices = "1.4.2"
 MLUtils = "0.4"
 MPI = "0.20.19"
 MacroTools = "0.5"

diff --git a/test/ext_cuda/cuda.jl b/test/ext_cuda/cuda.jl
@@ -1,8 +1,10 @@
-using Flux, Test
+using Flux, Test, Zygote
 using Flux: cpu, gpu
 using Statistics: mean
-using LinearAlgebra: I, cholesky, Cholesky
+using LinearAlgebra: I, cholesky, Cholesky, Adjoint
 using SparseArrays: sparse, SparseMatrixCSC, AbstractSparseArray
+using CUDA
+CUDA.allowscalar(false)
 
 @testset "CUDA" begin
   x = randn(5, 5)
@@ -48,11 +50,11 @@ end
   # construct from CuArray
   x = [1, 3, 2]
   y = Flux.onehotbatch(x, 0:3)
-  @test_skip begin  # https://github.com/FluxML/OneHotArrays.jl/issues/16
+
+  # https://github.com/FluxML/OneHotArrays.jl/issues/16
   y2 = Flux.onehotbatch(x |> gpu, 0:3)
   @test y2.indices isa CuArray
   @test y2 |> cpu == y
-  end
 end
 
 @testset "onecold gpu" begin
@@ -104,19 +106,19 @@ end
   # Trivial functions
   @test gradient(x -> sum(abs, gpu(x)), a)[1] isa Matrix
   @test gradient(x -> sum(gpu(x)), a)[1] isa Matrix
-  @test_skip gradient(x -> sum(gpu(x)), a')[1] isa Matrix  # sum(::Adjoint{T,CuArray}) makes a Fill
+  @test_broken gradient(x -> sum(gpu(x)), a')[1] isa Matrix  # sum(::Adjoint{T,CuArray}) makes a Fill
   @test gradient(x -> sum(abs, cpu(x)), ca)[1] isa CuArray
   # This test should really not go through indirections and pull out Fills for efficiency
   # but we forcefully materialise. TODO: remove materialising CuArray here
   @test gradient(x -> sum(cpu(x)), ca)[1] isa CuArray # This involves FillArray, which should be GPU compatible
-  @test gradient(x -> sum(cpu(x)), ca')[1] isa Adjoint{Float32, <:CuArray}
+  @test gradient(x -> sum(cpu(x)), ca')[1] isa CuArray
 
   # Even more trivial: no movement
   @test gradient(x -> sum(abs, cpu(x)), a)[1] isa Matrix
-  @test_broken gradient(x -> sum(abs, cpu(x)), a')[1] isa Matrix
+  @test gradient(x -> sum(abs, cpu(x)), a')[1] isa Matrix
   @test gradient(x -> sum(cpu(x)), a)[1] isa typeof(gradient(sum, a)[1]) # FillArray
   @test gradient(x -> sum(abs, gpu(x)), ca)[1] isa CuArray
-  @test_broken gradient(x -> sum(abs, gpu(x)), ca')[1] isa CuArray
+  @test gradient(x -> sum(abs, gpu(x)), ca')[1] isa CuArray
 
   # More complicated, Array * CuArray is an error
   g0 = gradient(x -> sum(abs, (a * (a * x))), a)[1]
@@ -198,7 +200,7 @@ end
   post2 = Flux.DataLoader((x=X, y=Y); batchsize=7, shuffle=false) |> gpu
   for (p, q) in zip(pre2, post2)
     @test p.x == q.x
-    @test_skip p.y == q.y  # https://github.com/FluxML/OneHotArrays.jl/issues/28 -- MethodError: getindex(::OneHotArrays.OneHotMatrix{UInt32, CuArray{UInt32, 1, CUDA.Mem.DeviceBuffer}}, ::Int64, ::Int64) is ambiguous
+    @test_broken p.y == q.y  # https://github.com/FluxML/OneHotArrays.jl/issues/28 -- MethodError: getindex(::OneHotArrays.OneHotMatrix{UInt32, CuArray{UInt32, 1, CUDA.Mem.DeviceBuffer}}, ::Int64, ::Int64) is ambiguous
   end
 
   @test collect(pre2) isa Vector{<:NamedTuple{(:x, :y)}}