LuxDL · avik-pal · Feb 23, 2024 · Feb 15, 2024 · Feb 16, 2024 · Feb 16, 2024
diff --git a/Project.toml b/Project.toml
@@ -66,7 +66,7 @@ LinearAlgebra = "1.9"
 Logging = "1.9"
 LuxAMDGPU = "0.2.2"
 LuxCUDA = "0.3.2"
-LuxCore = "0.1.7"
+LuxCore = "0.1.8"
 LuxDeviceUtils = "0.1.14"
 LuxLib = "0.3.10"
 LuxTestUtils = "0.1.15"

diff --git a/src/Lux.jl b/src/Lux.jl
@@ -13,7 +13,8 @@ PrecompileTools.@recompile_invalidations begin
 
     import LuxCore: AbstractExplicitLayer, AbstractExplicitContainerLayer,
                     initialparameters, initialstates, parameterlength, statelength,
-                    update_state, trainmode, testmode, setup, apply, display_name
+                    inputsize, outputsize, update_state, trainmode, testmode, setup, apply,
+                    display_name
     import LuxDeviceUtils: AbstractLuxDevice, AbstractLuxGPUDevice, AbstractLuxDeviceAdaptor
 end
 

diff --git a/src/layers/basic.jl b/src/layers/basic.jl
@@ -20,6 +20,8 @@
     dims::NTuple{N, Int}
 end
 
+outputsize(r::ReshapeLayer) = r.dims
+
 @inline function (r::ReshapeLayer)(x::AbstractArray, ps, st::NamedTuple)
     return reshape(x, r.dims..., size(x, ndims(x))), st
 end
@@ -197,6 +199,8 @@
 end
 statelength(d::Dense) = 0
 
+outputsize(d::Dense) = (d.out_dims,)
+
 @inline function (d::Dense{false})(x::AbstractVecOrMat, ps, st::NamedTuple)
     return __apply_activation(d.activation, ps.weight * x), st
 end
@@ -299,6 +303,8 @@
 parameterlength(d::Scale{use_bias}) where {use_bias} = (1 + use_bias) * prod(d.dims)
 statelength(d::Scale) = 0
 
+outputsize(d::Scale) = d.dims
+
 function (d::Scale{true})(x::AbstractArray, ps, st::NamedTuple)
     return __apply_activation(d.activation, ps.weight .* x .+ ps.bias), st
 end
@@ -400,6 +406,8 @@
 end
 statelength(b::Bilinear) = 0
 
+outputsize(b::Bilinear) = (b.out_dims,)
+
 function (b::Bilinear{use_bias})((x, y)::Tuple{<:AbstractVecOrMat, <:AbstractVecOrMat},
         ps, st::NamedTuple) where {use_bias}
     d_z, d_x, d_y = size(ps.weight)
@@ -500,3 +508,5 @@
 function Base.show(io::IO, e::Embedding)
     return print(io, "Embedding(", e.in_dims, " => ", e.out_dims, ")")
 end
+
+outputsize(e::Embedding) = (e.out_dims,)
diff --git a/test/layers/basic_tests.jl b/test/layers/basic_tests.jl
@@ -9,6 +9,7 @@
             x = randn(rng, 6, 3) |> aType
 
             @test size(layer(x, ps, st)[1]) == (2, 3, 3)
+            @test Lux.outputsize(layer) == (2, 3)
 
             @jet layer(x, ps, st)
             __f = x -> sum(first(layer(x, ps, st)))
@@ -103,6 +104,8 @@ end
 
             @test size(first(Lux.apply(layer, randn(10), ps, st))) == (5,)
             @test size(first(Lux.apply(layer, randn(10, 2), ps, st))) == (5, 2)
+
+            @test LuxCore.outputsize(layer) == (5,)
         end
 
         @testset "zeros" begin
@@ -178,6 +181,8 @@ end
             @test size(first(Lux.apply(layer, randn(10) |> aType, ps, st))) == (10, 5)
             @test size(first(Lux.apply(layer, randn(10, 5, 2) |> aType, ps, st))) ==
                   (10, 5, 2)
+
+            @test LuxCore.outputsize(layer) == (10, 5)
         end
 
         @testset "zeros" begin
@@ -274,6 +279,8 @@ end
             @test size(layer((x, y), ps, st)[1]) == (3, 1)
             @test sum(abs2, layer((x, y), ps, st)[1]) == 0.0f0
 
+            @test LuxCore.outputsize(layer) == (3,)
+
             @jet layer((x, y), ps, st)
             __f = (x, y, ps) -> sum(first(layer((x, y), ps, st)))
             @eval @test_gradients $__f $x $y $ps atol=1.0f-3 rtol=1.0f-3 gpu_testing=$ongpu
@@ -316,6 +323,8 @@ end
 
         @test size(ps.weight) == (embed_size, vocab_size)
 
+        @test LuxCore.outputsize(layer) == (4,)
+
         x = rand(1:vocab_size, 1)[1]
         y, st_ = layer(x, ps, st)
         @test size(layer(x, ps, st)[1]) == (embed_size,)