From 18c4606801472686e5705908db3bd9886adeddc8 Mon Sep 17 00:00:00 2001 From: Ram Samarth B B <97288756+achiverram28@users.noreply.github.com> Date: Sat, 16 Mar 2024 22:45:10 +0530 Subject: [PATCH] Creating dropout functionality in the GATConv and GATv2Conv Layers (#411) * Adding the dropout functionalities to GAT and GATV2 Signed-off-by: achiverram28 * Corrrecting dropout keyword Signed-off-by: achiverram28 * Adding the test for dropout for GATConv and GATV2Conv Signed-off-by: achiverram28 * Fix Signed-off-by: achiverram28 * Fix in test Signed-off-by: achiverram28 --------- Signed-off-by: achiverram28 --- src/layers/conv.jl | 20 +++++++++++++------- test/layers/conv.jl | 25 ++++++++----------------- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 41386a6c4..a098b2595 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -364,7 +364,7 @@ and the attention coefficients will be calculated as - `concat`: Concatenate layer output or not. If not, layer output is averaged over the heads. Default `true`. - `negative_slope`: The parameter of LeakyReLU.Default `0.2`. - `add_self_loops`: Add self loops to the graph before performing the convolution. Default `true`. - +- `dropout`: Dropout probability on the normalized attention coefficient. Default `0.0`. # Examples @@ -384,7 +384,7 @@ l = GATConv(in_channel => out_channel, add_self_loops = false, bias = false; hea y = l(g, x) ``` """ -struct GATConv{DX <: Dense, DE <: Union{Dense, Nothing}, T, A <: AbstractMatrix, F, B} <: +struct GATConv{DX <: Dense, DE <: Union{Dense, Nothing}, DV, T, A <: AbstractMatrix, F, B} <: GNNLayer dense_x::DX dense_e::DE @@ -396,6 +396,7 @@ struct GATConv{DX <: Dense, DE <: Union{Dense, Nothing}, T, A <: AbstractMatrix, heads::Int concat::Bool add_self_loops::Bool + dropout::DV end @functor GATConv @@ -405,7 +406,7 @@ GATConv(ch::Pair{Int, Int}, args...; kws...) = GATConv((ch[1], 0) => ch[2], args function GATConv(ch::Pair{NTuple{2, Int}, Int}, σ = identity; heads::Int = 1, concat::Bool = true, negative_slope = 0.2, - init = glorot_uniform, bias::Bool = true, add_self_loops = true) + init = glorot_uniform, bias::Bool = true, add_self_loops = true, dropout=0.0) (in, ein), out = ch if add_self_loops @assert ein==0 "Using edge features and setting add_self_loops=true at the same time is not yet supported." @@ -416,7 +417,7 @@ function GATConv(ch::Pair{NTuple{2, Int}, Int}, σ = identity; b = bias ? Flux.create_bias(dense_x.weight, true, concat ? out * heads : out) : false a = init(ein > 0 ? 3out : 2out, heads) negative_slope = convert(Float32, negative_slope) - GATConv(dense_x, dense_e, b, a, σ, negative_slope, ch, heads, concat, add_self_loops) + GATConv(dense_x, dense_e, b, a, σ, negative_slope, ch, heads, concat, add_self_loops, dropout) end (l::GATConv)(g::GNNGraph) = GNNGraph(g, ndata = l(g, node_features(g), edge_features(g))) @@ -448,6 +449,7 @@ function (l::GATConv)(g::AbstractGNNGraph, x, # a hand-written message passing m = apply_edges((xi, xj, e) -> message(l, xi, xj, e), g, Wxi, Wxj, e) α = softmax_edge_neighbors(g, m.logα) + α = dropout(α, l.dropout) β = α .* m.Wxj x = aggregate_neighbors(g, +, β) @@ -518,6 +520,7 @@ and the attention coefficients will be calculated as - `concat`: Concatenate layer output or not. If not, layer output is averaged over the heads. Default `true`. - `negative_slope`: The parameter of LeakyReLU.Default `0.2`. - `add_self_loops`: Add self loops to the graph before performing the convolution. Default `true`. +- `dropout`: Dropout probability on the normalized attention coefficient. Default `0.0`. # Examples ```julia @@ -540,7 +543,7 @@ e = randn(Float32, ein, length(s)) y = l(g, x, e) ``` """ -struct GATv2Conv{T, A1, A2, A3, B, C <: AbstractMatrix, F} <: GNNLayer +struct GATv2Conv{T, A1, A2, A3, DV, B, C <: AbstractMatrix, F} <: GNNLayer dense_i::A1 dense_j::A2 dense_e::A3 @@ -552,6 +555,7 @@ struct GATv2Conv{T, A1, A2, A3, B, C <: AbstractMatrix, F} <: GNNLayer heads::Int concat::Bool add_self_loops::Bool + dropout::DV end @functor GATv2Conv @@ -568,7 +572,8 @@ function GATv2Conv(ch::Pair{NTuple{2, Int}, Int}, negative_slope = 0.2, init = glorot_uniform, bias::Bool = true, - add_self_loops = true) + add_self_loops = true, + dropout=0.0) (in, ein), out = ch if add_self_loops @@ -586,7 +591,7 @@ function GATv2Conv(ch::Pair{NTuple{2, Int}, Int}, a = init(out, heads) negative_slope = convert(eltype(dense_i.weight), negative_slope) GATv2Conv(dense_i, dense_j, dense_e, b, a, σ, negative_slope, ch, heads, concat, - add_self_loops) + add_self_loops, dropout) end (l::GATv2Conv)(g::GNNGraph) = GNNGraph(g, ndata = l(g, node_features(g), edge_features(g))) @@ -611,6 +616,7 @@ function (l::GATv2Conv)(g::AbstractGNNGraph, x, m = apply_edges((xi, xj, e) -> message(l, xi, xj, e), g, Wxi, Wxj, e) α = softmax_edge_neighbors(g, m.logα) + α = dropout(α, l.dropout) β = α .* m.Wxj x = aggregate_neighbors(g, +, β) diff --git a/test/layers/conv.jl b/test/layers/conv.jl index ab4de5c73..60562b048 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -107,10 +107,10 @@ end @testset "GATConv" begin for heads in (1, 2), concat in (true, false) - l = GATConv(in_channel => out_channel; heads, concat) + l = GATConv(in_channel => out_channel; heads, concat, dropout=0) for g in test_graphs test_layer(l, g, rtol = RTOL_LOW, - exclude_grad_fields = [:negative_slope], + exclude_grad_fields = [:negative_slope, :dropout], outsize = (concat ? heads * out_channel : out_channel, g.num_nodes)) end @@ -118,10 +118,10 @@ end @testset "edge features" begin ein = 3 - l = GATConv((in_channel, ein) => out_channel, add_self_loops = false) + l = GATConv((in_channel, ein) => out_channel, add_self_loops = false, dropout=0) g = GNNGraph(g1, edata = rand(T, ein, g1.num_edges)) test_layer(l, g, rtol = RTOL_LOW, - exclude_grad_fields = [:negative_slope], + exclude_grad_fields = [:negative_slope, :dropout], outsize = (out_channel, g.num_nodes)) end @@ -137,10 +137,10 @@ end @testset "GATv2Conv" begin for heads in (1, 2), concat in (true, false) - l = GATv2Conv(in_channel => out_channel, tanh; heads, concat) + l = GATv2Conv(in_channel => out_channel, tanh; heads, concat, dropout=0) for g in test_graphs test_layer(l, g, rtol = RTOL_LOW, atol=ATOL_LOW, - exclude_grad_fields = [:negative_slope], + exclude_grad_fields = [:negative_slope, :dropout], outsize = (concat ? heads * out_channel : out_channel, g.num_nodes)) end @@ -148,10 +148,10 @@ end @testset "edge features" begin ein = 3 - l = GATv2Conv((in_channel, ein) => out_channel, add_self_loops = false) + l = GATv2Conv((in_channel, ein) => out_channel, add_self_loops = false, dropout=0) g = GNNGraph(g1, edata = rand(T, ein, g1.num_edges)) test_layer(l, g, rtol = RTOL_LOW, atol=ATOL_LOW, - exclude_grad_fields = [:negative_slope], + exclude_grad_fields = [:negative_slope, :dropout], outsize = (out_channel, g.num_nodes)) end @@ -163,15 +163,6 @@ end l = GATv2Conv((2, 4) => 3, add_self_loops = false, bias = false) @test length(Flux.params(l)) == 4 end - - @testset "edge features" begin - ein = 3 - l = GATv2Conv((in_channel, ein) => out_channel, add_self_loops = false) - g = GNNGraph(g1, edata = rand(T, ein, g1.num_edges)) - test_layer(l, g, rtol = RTOL_LOW, atol=ATOL_LOW, - exclude_grad_fields = [:negative_slope], - outsize = (out_channel, g.num_nodes)) - end end @testset "GatedGraphConv" begin