docs: keep the ConvMixer default backend as cuda.jl for now

LuxDL · Dec 20, 2024 · 431f3d7 · 431f3d7
1 parent 4528e03
commit 431f3d7
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 6 deletions.
diff --git a/examples/ConvMixer/Project.toml b/examples/ConvMixer/Project.toml
@@ -25,7 +25,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 Comonicon = "1.0.8"
 ConcreteStructs = "0.2.3"
 DataAugmentation = "0.3"
-Enzyme = "0.13.14"
+Enzyme = "0.13.16"
 ImageCore = "0.10.2"
 ImageShow = "0.3.8"
 Interpolations = "0.15.1"
@@ -39,7 +39,7 @@ PreferenceTools = "0.1.2"
 Printf = "1.10"
 ProgressBars = "1.5.1"
 Random = "1.10"
-Reactant = "0.2.5"
+Reactant = "0.2.8"
 StableRNGs = "1.0.2"
 Statistics = "1.10"
 Zygote = "0.6.70"
diff --git a/examples/ConvMixer/README.md b/examples/ConvMixer/README.md
@@ -69,6 +69,7 @@ Options
   --seed <42::Int>
   --epochs <25::Int>
   --lr-max <0.01::Float64>
+  --backend <reactant::String>
 
 Flags
   --clip-norm

diff --git a/examples/ConvMixer/main.jl b/examples/ConvMixer/main.jl
@@ -76,7 +76,7 @@ end
 Comonicon.@main function main(; batchsize::Int=512, hidden_dim::Int=256, depth::Int=8,
         patch_size::Int=2, kernel_size::Int=5, weight_decay::Float64=1e-5,
         clip_norm::Bool=false, seed::Int=42, epochs::Int=25, lr_max::Float64=0.01,
-        backend::String="reactant")
+        backend::String="gpu_if_available")
     rng = StableRNG(seed)
 
     if backend == "gpu_if_available"
@@ -111,13 +111,16 @@ Comonicon.@main function main(; batchsize::Int=512, hidden_dim::Int=256, depth::
 
     if backend == "reactant"
         x_ra = rand(rng, Float32, size(first(trainloader)[1])) |> accelerator_device
-        model_compiled = @compile model(x_ra, ps, st)
+        @printf "[Info] Compiling model with Reactant.jl\n"
+        model_compiled = @compile model(x_ra, ps, Lux.testmode(st))
+        @printf "[Info] Model compiled!\n"
     else
         model_compiled = model
     end
 
     loss = CrossEntropyLoss(; logits=Val(true))
 
+    @printf "[Info] Training model\n"
     for epoch in 1:epochs
         stime = time()
         lr = 0
@@ -127,6 +130,7 @@ Comonicon.@main function main(; batchsize::Int=512, hidden_dim::Int=256, depth::
             (_, _, _, train_state) = Training.single_train_step!(
                 adtype, loss, (x, y), train_state
             )
+            @show i, time() - stime
         end
         ttime = time() - stime
 
@@ -137,7 +141,8 @@ Comonicon.@main function main(; batchsize::Int=512, hidden_dim::Int=256, depth::
             model_compiled, train_state.parameters, train_state.states, testloader
         ) * 100
 
-        @printf "Epoch %2d: Learning Rate %.2e, Train Acc: %.2f%%, Test Acc: %.2f%%, \
-                 Time: %.2f\n" epoch lr train_acc test_acc ttime
+        @printf "[Train] Epoch %2d: Learning Rate %.2e, Train Acc: %.2f%%, Test Acc: \
+                 %.2f%%, Time: %.2f\n" epoch lr train_acc test_acc ttime
     end
+    @printf "[Info] Finished training\n"
 end
diff --git a/ext/LuxReactantExt/patches.jl b/ext/LuxReactantExt/patches.jl
@@ -1 +1,4 @@
 Utils.vec(x::AnyTracedRArray) = Reactant.materialize_traced_array(vec(x))
+
+# XXX: Use PoolDims once EnzymeJAX supports stablehlo.reduce_window adjoint
+Lux.calculate_pool_dims(g::Lux.GlobalPoolMode, ::TracedRArray) = g