FluxML · CarloLucibello · Oct 20, 2024 · Oct 20, 2024 · Oct 20, 2024
diff --git a/docs/src/tutorials/logistic_regression.md b/docs/src/tutorials/logistic_regression.md
@@ -140,8 +140,8 @@ Note, all the `flux_*` variables in this tutorial would be general, that is, the
 julia> flux_model = Chain(Dense(4 => 3), softmax)
 Chain(
   Dense(4 => 3),                        # 15 parameters
-  NNlib.softmax,
-)
+  softmax,
+) 
 ```
 
 A [`Dense(4 => 3)`](@ref Dense) layer denotes a layer with four inputs (four features in every data point) and three outputs (three classes or labels). This layer is the same as the mathematical model defined by us above. Under the hood, Flux too calculates the output using the same expression, but we don't have to initialize the parameters ourselves this time, instead Flux does it for us.

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -11,9 +11,9 @@ using Functors: fmapstructure_with_path
 
 ## Uncomment below to change the default test settings
 # ENV["FLUX_TEST_AMDGPU"] = "true"
-ENV["FLUX_TEST_CUDA"] = "true"
+# ENV["FLUX_TEST_CUDA"] = "true"
 # ENV["FLUX_TEST_METAL"] = "true"
-ENV["FLUX_TEST_CPU"] = "false"
+# ENV["FLUX_TEST_CPU"] = "false"
 # ENV["FLUX_TEST_DISTRIBUTED_MPI"] = "true"
 # ENV["FLUX_TEST_DISTRIBUTED_NCCL"] = "true"
 ENV["FLUX_TEST_ENZYME"] = "false" # We temporarily disable Enzyme tests since they are failing

diff --git a/test/train.jl b/test/train.jl
@@ -155,18 +155,15 @@ for (trainfn!, name) in ((Flux.train!, "Zygote"), (train_enzyme!, "Enzyme"))
       pen2(x::AbstractArray) = sum(abs2, x)/2
       opt = Flux.setup(Adam(0.1), model)
 
-      @test begin
-        trainfn!(model, data, opt) do m, x, y
-          err = Flux.mse(m(x), y)
-          l2 = sum(pen2, Flux.params(m))
-          err + 0.33 * l2
-        end
-
-        diff2 = model.weight .- init_weight
-        @test diff1 ≈ diff2
-
-        true
-      end broken = VERSION >= v"1.11"
+      trainfn!(model, data, opt) do m, x, y
+        err = Flux.mse(m(x), y)
+        l2 = sum(pen2, Flux.params(m))
+        err + 0.33 * l2
+      end
+
+      diff2 = model.weight .- init_weight
+      @test diff1 ≈ diff2
+
     end
 
     # Take 3: using WeightDecay instead. Need the /2 above, to match exactly.

diff --git a/test/utils.jl b/test/utils.jl
@@ -273,13 +273,11 @@ end
 @testset "params gradient" begin
   m = (x=[1,2.0], y=[3.0]);
 
-  @test begin
-    # Explicit -- was broken by #2054 / then fixed / now broken again on julia v1.11
-    gnew = gradient(m -> (sum(norm, Flux.params(m))), m)[1]
-    @test gnew.x ≈ [0.4472135954999579, 0.8944271909999159]
-    @test gnew.y ≈ [1.0]
-    true
-  end broken = VERSION >= v"1.11"
+  # Explicit -- was broken by #2054 / then fixed / now broken again on julia v1.11
+  gnew = gradient(m -> (sum(norm, Flux.params(m))), m)[1]
+  @test gnew.x ≈ [0.4472135954999579, 0.8944271909999159]
+  @test gnew.y ≈ [1.0]
+
 
   # Implicit
   gold = gradient(() -> (sum(norm, Flux.params(m))), Flux.params(m))