From 18c4606801472686e5705908db3bd9886adeddc8 Mon Sep 17 00:00:00 2001
From: Ram Samarth B B <97288756+achiverram28@users.noreply.github.com>
Date: Sat, 16 Mar 2024 22:45:10 +0530
Subject: [PATCH] Creating dropout functionality in the GATConv and   GATv2Conv
 Layers  (#411)

* Adding the dropout functionalities to GAT and GATV2

Signed-off-by: achiverram28 <ramsamarth21bcs24@iiitkottayam.ac.in>

* Corrrecting dropout keyword

Signed-off-by: achiverram28 <ramsamarth21bcs24@iiitkottayam.ac.in>

* Adding the test for dropout for GATConv and GATV2Conv

Signed-off-by: achiverram28 <ramsamarth21bcs24@iiitkottayam.ac.in>

* Fix

Signed-off-by: achiverram28 <ramsamarth21bcs24@iiitkottayam.ac.in>

* Fix in test

Signed-off-by: achiverram28 <ramsamarth21bcs24@iiitkottayam.ac.in>

---------

Signed-off-by: achiverram28 <ramsamarth21bcs24@iiitkottayam.ac.in>
---
 src/layers/conv.jl  | 20 +++++++++++++-------
 test/layers/conv.jl | 25 ++++++++-----------------
 2 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index 41386a6c4..a098b2595 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -364,7 +364,7 @@ and the attention coefficients will be calculated as
 - `concat`: Concatenate layer output or not. If not, layer output is averaged over the heads. Default `true`.
 - `negative_slope`: The parameter of LeakyReLU.Default `0.2`.
 - `add_self_loops`: Add self loops to the graph before performing the convolution. Default `true`.
-
+- `dropout`: Dropout probability on the normalized attention coefficient. Default `0.0`.
 
 # Examples
 
@@ -384,7 +384,7 @@ l = GATConv(in_channel => out_channel, add_self_loops = false, bias = false; hea
 y = l(g, x)       
 ```
 """
-struct GATConv{DX <: Dense, DE <: Union{Dense, Nothing}, T, A <: AbstractMatrix, F, B} <:
+struct GATConv{DX <: Dense, DE <: Union{Dense, Nothing}, DV, T, A <: AbstractMatrix, F, B} <:
        GNNLayer
     dense_x::DX
     dense_e::DE
@@ -396,6 +396,7 @@ struct GATConv{DX <: Dense, DE <: Union{Dense, Nothing}, T, A <: AbstractMatrix,
     heads::Int
     concat::Bool
     add_self_loops::Bool
+    dropout::DV
 end
 
 @functor GATConv
@@ -405,7 +406,7 @@ GATConv(ch::Pair{Int, Int}, args...; kws...) = GATConv((ch[1], 0) => ch[2], args
 
 function GATConv(ch::Pair{NTuple{2, Int}, Int}, σ = identity;
                  heads::Int = 1, concat::Bool = true, negative_slope = 0.2,
-                 init = glorot_uniform, bias::Bool = true, add_self_loops = true)
+                 init = glorot_uniform, bias::Bool = true, add_self_loops = true, dropout=0.0)
     (in, ein), out = ch
     if add_self_loops
         @assert ein==0 "Using edge features and setting add_self_loops=true at the same time is not yet supported."
@@ -416,7 +417,7 @@ function GATConv(ch::Pair{NTuple{2, Int}, Int}, σ = identity;
     b = bias ? Flux.create_bias(dense_x.weight, true, concat ? out * heads : out) : false
     a = init(ein > 0 ? 3out : 2out, heads)
     negative_slope = convert(Float32, negative_slope)
-    GATConv(dense_x, dense_e, b, a, σ, negative_slope, ch, heads, concat, add_self_loops)
+    GATConv(dense_x, dense_e, b, a, σ, negative_slope, ch, heads, concat, add_self_loops, dropout)
 end
 
 (l::GATConv)(g::GNNGraph) = GNNGraph(g, ndata = l(g, node_features(g), edge_features(g)))
@@ -448,6 +449,7 @@ function (l::GATConv)(g::AbstractGNNGraph, x,
     # a hand-written message passing
     m = apply_edges((xi, xj, e) -> message(l, xi, xj, e), g, Wxi, Wxj, e)
     α = softmax_edge_neighbors(g, m.logα)
+    α = dropout(α, l.dropout)
     β = α .* m.Wxj
     x = aggregate_neighbors(g, +, β)
 
@@ -518,6 +520,7 @@ and the attention coefficients will be calculated as
 - `concat`: Concatenate layer output or not. If not, layer output is averaged over the heads. Default `true`.
 - `negative_slope`: The parameter of LeakyReLU.Default `0.2`.
 - `add_self_loops`: Add self loops to the graph before performing the convolution. Default `true`.
+- `dropout`: Dropout probability on the normalized attention coefficient. Default `0.0`.
 
 # Examples
 ```julia
@@ -540,7 +543,7 @@ e = randn(Float32, ein, length(s))
 y = l(g, x, e)    
 ```
 """
-struct GATv2Conv{T, A1, A2, A3, B, C <: AbstractMatrix, F} <: GNNLayer
+struct GATv2Conv{T, A1, A2, A3, DV, B, C <: AbstractMatrix, F} <: GNNLayer
     dense_i::A1
     dense_j::A2
     dense_e::A3
@@ -552,6 +555,7 @@ struct GATv2Conv{T, A1, A2, A3, B, C <: AbstractMatrix, F} <: GNNLayer
     heads::Int
     concat::Bool
     add_self_loops::Bool
+    dropout::DV
 end
 
 @functor GATv2Conv
@@ -568,7 +572,8 @@ function GATv2Conv(ch::Pair{NTuple{2, Int}, Int},
                    negative_slope = 0.2,
                    init = glorot_uniform,
                    bias::Bool = true,
-                   add_self_loops = true)
+                   add_self_loops = true,
+                   dropout=0.0)
     (in, ein), out = ch
 
     if add_self_loops
@@ -586,7 +591,7 @@ function GATv2Conv(ch::Pair{NTuple{2, Int}, Int},
     a = init(out, heads)
     negative_slope = convert(eltype(dense_i.weight), negative_slope)
     GATv2Conv(dense_i, dense_j, dense_e, b, a, σ, negative_slope, ch, heads, concat,
-              add_self_loops)
+              add_self_loops, dropout)
 end
 
 (l::GATv2Conv)(g::GNNGraph) = GNNGraph(g, ndata = l(g, node_features(g), edge_features(g)))
@@ -611,6 +616,7 @@ function (l::GATv2Conv)(g::AbstractGNNGraph, x,
 
     m = apply_edges((xi, xj, e) -> message(l, xi, xj, e), g, Wxi, Wxj, e)
     α = softmax_edge_neighbors(g, m.logα)
+    α = dropout(α, l.dropout)
     β = α .* m.Wxj
     x = aggregate_neighbors(g, +, β)
 
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index ab4de5c73..60562b048 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -107,10 +107,10 @@ end
 
 @testset "GATConv" begin
     for heads in (1, 2), concat in (true, false)
-        l = GATConv(in_channel => out_channel; heads, concat)
+        l = GATConv(in_channel => out_channel; heads, concat, dropout=0)
         for g in test_graphs
             test_layer(l, g, rtol = RTOL_LOW,
-                        exclude_grad_fields = [:negative_slope],
+                        exclude_grad_fields = [:negative_slope, :dropout],
                         outsize = (concat ? heads * out_channel : out_channel,
                                     g.num_nodes))
         end
@@ -118,10 +118,10 @@ end
 
     @testset "edge features" begin
         ein = 3
-        l = GATConv((in_channel, ein) => out_channel, add_self_loops = false)
+        l = GATConv((in_channel, ein) => out_channel, add_self_loops = false, dropout=0)
         g = GNNGraph(g1, edata = rand(T, ein, g1.num_edges))
         test_layer(l, g, rtol = RTOL_LOW,
-                    exclude_grad_fields = [:negative_slope],
+                    exclude_grad_fields = [:negative_slope, :dropout],
                     outsize = (out_channel, g.num_nodes))
     end
 
@@ -137,10 +137,10 @@ end
 
 @testset "GATv2Conv" begin
     for heads in (1, 2), concat in (true, false)
-        l = GATv2Conv(in_channel => out_channel, tanh; heads, concat)
+        l = GATv2Conv(in_channel => out_channel, tanh; heads, concat, dropout=0)
         for g in test_graphs
             test_layer(l, g, rtol = RTOL_LOW, atol=ATOL_LOW,
-                        exclude_grad_fields = [:negative_slope],
+                        exclude_grad_fields = [:negative_slope, :dropout],
                         outsize = (concat ? heads * out_channel : out_channel,
                                     g.num_nodes))
         end
@@ -148,10 +148,10 @@ end
 
     @testset "edge features" begin
         ein = 3
-        l = GATv2Conv((in_channel, ein) => out_channel, add_self_loops = false)
+        l = GATv2Conv((in_channel, ein) => out_channel, add_self_loops = false, dropout=0)
         g = GNNGraph(g1, edata = rand(T, ein, g1.num_edges))
         test_layer(l, g, rtol = RTOL_LOW, atol=ATOL_LOW,
-                    exclude_grad_fields = [:negative_slope],
+                    exclude_grad_fields = [:negative_slope, :dropout],
                     outsize = (out_channel, g.num_nodes))
     end
 
@@ -163,15 +163,6 @@ end
         l = GATv2Conv((2, 4) => 3, add_self_loops = false, bias = false)
         @test length(Flux.params(l)) == 4
     end
-
-    @testset "edge features" begin
-        ein = 3
-        l = GATv2Conv((in_channel, ein) => out_channel, add_self_loops = false)
-        g = GNNGraph(g1, edata = rand(T, ein, g1.num_edges))
-        test_layer(l, g, rtol = RTOL_LOW, atol=ATOL_LOW,
-                    exclude_grad_fields = [:negative_slope],
-                    outsize = (out_channel, g.num_nodes))
-    end
 end
 
 @testset "GatedGraphConv" begin