From 89797358cd906b6f33ee89f09d3422a653f3d22e Mon Sep 17 00:00:00 2001
From: CarloLucibello <carlo.lucibello@gmail.com>
Date: Sun, 1 Dec 2024 09:51:31 +0100
Subject: [PATCH] move around docs

---
 GNNGraphs/docs/make.jl                        |   6 +-
 GNNGraphs/docs/src/{ => guides}/datasets.md   |   0
 GNNGraphs/docs/src/{ => guides}/gnngraph.md   |   0
 .../docs/src/{ => guides}/heterograph.md      |   0
 .../docs/src/{ => guides}/temporalgraph.md    |   0
 GNNlib/docs/make.jl                           |   2 +-
 .../docs/src/{ => guides}/messagepassing.md   |   0
 GraphNeuralNetworks/docs/make.jl              |  60 ++--
 GraphNeuralNetworks/docs/src/datasets.md      |   5 -
 .../docs/src/guides/datasets.md               |  10 +
 .../docs/src/guides/gnngraph.md               | 257 ++++++++++++++++++
 .../docs/src/guides/heterograph.md            | 140 ++++++++++
 .../docs/src/guides/messagepassing.md         | 143 ++++++++++
 .../docs/src/{ => guides}/models.md           |   0
 .../docs/src/guides/temporalgraph.md          | 145 ++++++++++
 15 files changed, 738 insertions(+), 30 deletions(-)
 rename GNNGraphs/docs/src/{ => guides}/datasets.md (100%)
 rename GNNGraphs/docs/src/{ => guides}/gnngraph.md (100%)
 rename GNNGraphs/docs/src/{ => guides}/heterograph.md (100%)
 rename GNNGraphs/docs/src/{ => guides}/temporalgraph.md (100%)
 rename GNNlib/docs/src/{ => guides}/messagepassing.md (100%)
 delete mode 100644 GraphNeuralNetworks/docs/src/datasets.md
 create mode 100644 GraphNeuralNetworks/docs/src/guides/datasets.md
 create mode 100644 GraphNeuralNetworks/docs/src/guides/gnngraph.md
 create mode 100644 GraphNeuralNetworks/docs/src/guides/heterograph.md
 create mode 100644 GraphNeuralNetworks/docs/src/guides/messagepassing.md
 rename GraphNeuralNetworks/docs/src/{ => guides}/models.md (100%)
 create mode 100644 GraphNeuralNetworks/docs/src/guides/temporalgraph.md

diff --git a/GNNGraphs/docs/make.jl b/GNNGraphs/docs/make.jl
index 2a923ac19..d94b291b1 100644
--- a/GNNGraphs/docs/make.jl
+++ b/GNNGraphs/docs/make.jl
@@ -16,8 +16,10 @@ makedocs(;
          format = Documenter.HTML(; mathengine, prettyurls, assets = assets, size_threshold=nothing),
          sitename = "GNNGraphs.jl",
          pages = ["Home" => "index.md",
-            "Graphs" => ["gnngraph.md", "heterograph.md", "temporalgraph.md"],
-            "Datasets" => "datasets.md",
+            "Guides" => [
+               "Graphs" => ["guides/gnngraph.md", "guides/heterograph.md", "guides/temporalgraph.md"],
+               "Datasets" => "guides/datasets.md",
+            ],
             "API Reference" => [
                  "GNNGraph" => "api/gnngraph.md",
                  "GNNHeteroGraph" => "api/heterograph.md",
diff --git a/GNNGraphs/docs/src/datasets.md b/GNNGraphs/docs/src/guides/datasets.md
similarity index 100%
rename from GNNGraphs/docs/src/datasets.md
rename to GNNGraphs/docs/src/guides/datasets.md
diff --git a/GNNGraphs/docs/src/gnngraph.md b/GNNGraphs/docs/src/guides/gnngraph.md
similarity index 100%
rename from GNNGraphs/docs/src/gnngraph.md
rename to GNNGraphs/docs/src/guides/gnngraph.md
diff --git a/GNNGraphs/docs/src/heterograph.md b/GNNGraphs/docs/src/guides/heterograph.md
similarity index 100%
rename from GNNGraphs/docs/src/heterograph.md
rename to GNNGraphs/docs/src/guides/heterograph.md
diff --git a/GNNGraphs/docs/src/temporalgraph.md b/GNNGraphs/docs/src/guides/temporalgraph.md
similarity index 100%
rename from GNNGraphs/docs/src/temporalgraph.md
rename to GNNGraphs/docs/src/guides/temporalgraph.md
diff --git a/GNNlib/docs/make.jl b/GNNlib/docs/make.jl
index 1e10396c0..dba621165 100644
--- a/GNNlib/docs/make.jl
+++ b/GNNlib/docs/make.jl
@@ -21,7 +21,7 @@ makedocs(;
          format = Documenter.HTML(; mathengine, prettyurls, assets = assets, size_threshold=nothing),
          sitename = "GNNlib.jl",
          pages = ["Home" => "index.md",
-            "Message Passing" => "messagepassing.md",
+            "Message Passing" => "guides/messagepassing.md",
 
             "API Reference" => [
      
diff --git a/GNNlib/docs/src/messagepassing.md b/GNNlib/docs/src/guides/messagepassing.md
similarity index 100%
rename from GNNlib/docs/src/messagepassing.md
rename to GNNlib/docs/src/guides/messagepassing.md
diff --git a/GraphNeuralNetworks/docs/make.jl b/GraphNeuralNetworks/docs/make.jl
index a9f9e1313..338fb979d 100644
--- a/GraphNeuralNetworks/docs/make.jl
+++ b/GraphNeuralNetworks/docs/make.jl
@@ -14,28 +14,44 @@ interlinks = InterLinks(
    
    )
 
+# Copy the guides from GNNGraphs and GNNlib
+dest_guides_dir = joinpath(@__DIR__, "src/guides")
+gnngraphs_guides_dir = joinpath(@__DIR__, "../../GNNGraphs/docs/src/guides")
+gnnlib_guides_dir = joinpath(@__DIR__, "../../GNNlib/docs/src/guides") 
+for file in readdir(gnngraphs_guides_dir)
+    cp(joinpath(gnngraphs_guides_dir, file), joinpath(dest_guides_dir, file))
+end
+for file in readdir(gnnlib_guides_dir)
+    cp(joinpath(gnnlib_guides_dir, file), joinpath(dest_guides_dir, file))
+end
+
 makedocs(;
-         modules = [GraphNeuralNetworks],
-         doctest = false,
-         clean = true,
-         plugins = [interlinks],
-         format = Documenter.HTML(; mathengine, prettyurls, assets = assets, size_threshold=nothing),
-         sitename = "GraphNeuralNetworks.jl",
-         pages = [
-            "Home" => "index.md",
-            "Guides" => [
-                "Models" => "models.md",
-            ],
-            "API Reference" => [
-                  "Basic" => "api/basic.md",
-                  "Convolutional layers" => "api/conv.md",
-                  "Pooling layers" => "api/pool.md",
-                  "Temporal Convolutional layers" => "api/temporalconv.md",
-                  "Hetero Convolutional layers" => "api/heteroconv.md",
-              ],
-              "Developer guide" => "dev.md",
-            
-         ],
-         )
+    modules = [GraphNeuralNetworks],
+    doctest = false,
+    clean = true,
+    plugins = [interlinks],
+    format = Documenter.HTML(; mathengine, prettyurls, assets = assets, size_threshold=nothing),
+    sitename = "GraphNeuralNetworks.jl",
+    pages = [
+    
+    "Home" => "index.md",
+    
+    "Guides" => [
+        "Graphs" => ["guides/gnngraph.md", "guides/heterograph.md", "guides/temporalgraph.md"],
+        "Message Passing" => "guides/messagepassing.md",
+        "Models" => "guides/models.md",
+        "Datasets" => "guides/datasets.md",
+    ],
+
+    "API Reference" => [
+            "Basic" => "api/basic.md",
+            "Convolutional layers" => "api/conv.md",
+            "Pooling layers" => "api/pool.md",
+            "Temporal Convolutional layers" => "api/temporalconv.md",
+            "Hetero Convolutional layers" => "api/heteroconv.md",
+        ],
+        "Developer guide" => "dev.md",
+    ],
+)
          
 deploydocs(;repo = "github.com/JuliaGraphs/GraphNeuralNetworks.jl.git", devbranch = "master", dirname= "GraphNeuralNetworks")
diff --git a/GraphNeuralNetworks/docs/src/datasets.md b/GraphNeuralNetworks/docs/src/datasets.md
deleted file mode 100644
index 050f27b3c..000000000
--- a/GraphNeuralNetworks/docs/src/datasets.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Datasets
-
-GraphNeuralNetworks.jl doesn't come with its own datasets, but leverages those available in the Julia (and non-Julia) ecosystem. In particular, the [examples in the GraphNeuralNetworks.jl repository](https://github.com/JuliaGraphs/GraphNeuralNetworks.jl/tree/master/examples) make use of the [MLDatasets.jl](https://github.com/JuliaML/MLDatasets.jl) package. There you will find common graph datasets such as Cora, PubMed, Citeseer, TUDataset and [many others](https://juliaml.github.io/MLDatasets.jl/dev/datasets/graphs/).
-
-GraphNeuralNetworks.jl provides the [`GNNGraphs.mldataset2gnngraph`](@ref) method for interfacing with MLDatasets.jl.
\ No newline at end of file
diff --git a/GraphNeuralNetworks/docs/src/guides/datasets.md b/GraphNeuralNetworks/docs/src/guides/datasets.md
new file mode 100644
index 000000000..60477d95e
--- /dev/null
+++ b/GraphNeuralNetworks/docs/src/guides/datasets.md
@@ -0,0 +1,10 @@
+# Datasets
+
+GNNGraphs.jl doesn't come with its own datasets, but leverages those available in the Julia (and non-Julia) ecosystem. In particular, the [examples in the GraphNeuralNetworks.jl repository](https://github.com/JuliaGraphs/GraphNeuralNetworks.jl/tree/master/examples) make use of the [MLDatasets.jl](https://github.com/JuliaML/MLDatasets.jl) package. There you will find common graph datasets such as Cora, PubMed, Citeseer, TUDataset and [many others](https://juliaml.github.io/MLDatasets.jl/dev/datasets/graphs/).
+For graphs with static structures and temporal features, datasets such as METRLA, PEMSBAY, ChickenPox, and WindMillEnergy are available. For graphs featuring both temporal structures and temporal features, the TemporalBrains dataset is suitable.
+
+GraphNeuralNetworks.jl provides the [`mldataset2gnngraph`](@ref) method for interfacing with MLDatasets.jl.
+
+```@docs
+mldataset2gnngraph
+```
diff --git a/GraphNeuralNetworks/docs/src/guides/gnngraph.md b/GraphNeuralNetworks/docs/src/guides/gnngraph.md
new file mode 100644
index 000000000..c62e279fa
--- /dev/null
+++ b/GraphNeuralNetworks/docs/src/guides/gnngraph.md
@@ -0,0 +1,257 @@
+# Static Graphs 
+
+The fundamental graph type in GNNGraphs.jl is the [`GNNGraph`](@ref).
+A GNNGraph `g` is a directed graph with nodes labeled from 1 to `g.num_nodes`.
+The underlying implementation allows for efficient application of graph neural network
+operators, gpu movement, and storage of node/edge/graph related feature arrays.
+
+`GNNGraph` inherits from [Graphs.jl](https://github.com/JuliaGraphs/Graphs.jl)'s `AbstractGraph`,
+therefore it supports most functionality from that library. 
+
+## Graph Creation
+A GNNGraph can be created from several different data sources encoding the graph topology:
+
+```julia
+using GNNGraphs, Graphs, SparseArrays
+
+
+# Construct a GNNGraph from from a Graphs.jl's graph
+lg = erdos_renyi(10, 30)
+g = GNNGraph(lg)
+
+# Same as above using convenience method rand_graph
+g = rand_graph(10, 60)
+
+# From an adjacency matrix
+A = sprand(10, 10, 0.3)
+g = GNNGraph(A)
+
+# From an adjacency list
+adjlist = [[2,3], [1,3], [1,2,4], [3]]
+g = GNNGraph(adjlist)
+
+# From COO representation
+source = [1,1,2,2,3,3,3,4]
+target = [2,3,1,3,1,2,4,3]
+g = GNNGraph(source, target)
+```
+
+See also the related methods [`Graphs.adjacency_matrix`](@ref), [`edge_index`](@ref), and [`adjacency_list`](@ref).
+
+## Basic Queries
+
+```julia
+julia> source = [1,1,2,2,3,3,3,4];
+
+julia> target = [2,3,1,3,1,2,4,3];
+
+julia> g = GNNGraph(source, target)
+GNNGraph:
+  num_nodes: 4
+  num_edges: 8
+
+
+julia> @assert g.num_nodes == 4   # number of nodes
+
+julia> @assert g.num_edges == 8   # number of edges
+
+julia> @assert g.num_graphs == 1  # number of subgraphs (a GNNGraph can batch many graphs together)
+
+julia> is_directed(g)      # a GNNGraph is always directed
+true
+
+julia> is_bidirected(g)      # for each edge, also the reverse edge is present
+true
+
+julia> has_self_loops(g)
+false
+
+julia> has_multi_edges(g)      
+false
+```
+
+## Data Features
+
+One or more arrays can be associated to nodes, edges, and (sub)graphs of a `GNNGraph`.
+They will be stored in the fields `g.ndata`, `g.edata`, and `g.gdata` respectively.
+
+The data fields are [`DataStore`](@ref) objects. [`DataStore`](@ref)s conveniently offer an interface similar to both dictionaries and named tuples. Similarly to dictionaries, DataStores support addition of new features after creation time.
+
+The array contained in the datastores have last dimension equal to `num_nodes` (in `ndata`), `num_edges` (in `edata`), or `num_graphs` (in `gdata`) respectively.
+
+```julia
+# Create a graph with a single feature array `x` associated to nodes
+g = rand_graph(10,  60, ndata = (; x = rand(Float32, 32, 10)))
+
+g.ndata.x  # access the features
+
+# Equivalent definition passing directly the array
+g = rand_graph(10,  60, ndata = rand(Float32, 32, 10))
+
+g.ndata.x  # `:x` is the default name for node features
+
+g.ndata.z = rand(Float32, 3, 10)  # add new feature array `z`
+
+# For convenience, we can access the features through the shortcut
+g.x 
+
+# You can have multiple feature arrays
+g = rand_graph(10,  60, ndata = (; x=rand(Float32, 32, 10), y=rand(Float32, 10)))
+
+g.ndata.y, g.ndata.x   # or g.x, g.y
+
+# Attach an array with edge features.
+# Since `GNNGraph`s are directed, the number of edges
+# will be double that of the original Graphs' undirected graph.
+g = GNNGraph(erdos_renyi(10,  30), edata = rand(Float32, 60))
+@assert g.num_edges == 60
+
+g.edata.e  # or g.e
+
+# If we pass only half of the edge features, they will be copied
+# on the reversed edges.
+g = GNNGraph(erdos_renyi(10,  30), edata = rand(Float32, 30))
+
+
+# Create a new graph from previous one, inheriting edge data
+# but replacing node data
+g′ = GNNGraph(g, ndata =(; z = ones(Float32, 16, 10)))
+
+g′.z
+g′.e
+```
+
+## Edge weights
+
+It is common to denote scalar edge features as edge weights. The `GNNGraph` has specific support
+for edge weights: they can be stored as part of internal representations of the graph (COO or adjacency matrix). Some graph convolutional layers, most notably the `GCNConv`, can use the edge weights to perform weighted sums over the nodes' neighborhoods.
+
+```julia
+julia> source = [1, 1, 2, 2, 3, 3];
+
+julia> target = [2, 3, 1, 3, 1, 2];
+
+julia> weight = [1.0, 0.5, 2.1, 2.3, 4, 4.1];
+
+julia> g = GNNGraph(source, target, weight)
+GNNGraph:
+  num_nodes: 3
+  num_edges: 6
+
+julia> get_edge_weight(g)
+6-element Vector{Float64}:
+ 1.0
+ 0.5
+ 2.1
+ 2.3
+ 4.0
+ 4.1
+```
+
+## Batches and Subgraphs
+
+Multiple `GNNGraph`s can be batched together into a single graph
+that contains the total number of the original nodes 
+and where the original graphs are disjoint subgraphs.
+
+```julia
+using Flux
+using Flux: DataLoader
+
+data = [rand_graph(10, 30, ndata=rand(Float32, 3, 10)) for _ in 1:160]
+gall = Flux.batch(data)
+
+# gall is a GNNGraph containing many graphs
+@assert gall.num_graphs == 160 
+@assert gall.num_nodes == 1600   # 10 nodes x 160 graphs
+@assert gall.num_edges == 4800  # 30 undirected edges x 160 graphs
+
+# Let's create a mini-batch from gall
+g23 = getgraph(gall, 2:3)
+@assert g23.num_graphs == 2
+@assert g23.num_nodes == 20   # 10 nodes x 2 graphs
+@assert g23.num_edges == 60  # 30 undirected edges X 2 graphs
+
+# We can pass a GNNGraph to Flux's DataLoader
+train_loader = DataLoader(gall, batchsize=16, shuffle=true)
+
+for g in train_loader
+    @assert g.num_graphs == 16
+    @assert g.num_nodes == 160
+    @assert size(g.ndata.x) = (3, 160)    
+    # .....
+end
+
+# Access the nodes' graph memberships 
+graph_indicator(gall)
+```
+
+## DataLoader and mini-batch iteration
+
+While constructing a batched graph and passing it to the `DataLoader` is always 
+an option for mini-batch iteration, the recommended way for better performance is
+to pass an array of graphs directly and set the `collate` option to `true`:
+
+```julia
+using Flux: DataLoader
+
+data = [rand_graph(10, 30, ndata=rand(Float32, 3, 10)) for _ in 1:320]
+
+train_loader = DataLoader(data, batchsize=16, shuffle=true, collate=true)
+
+for g in train_loader
+    @assert g.num_graphs == 16
+    @assert g.num_nodes == 160
+    @assert size(g.ndata.x) = (3, 160)    
+    # .....
+end
+```
+
+## Graph Manipulation
+
+```julia
+g′ = add_self_loops(g)
+g′ = remove_self_loops(g)
+g′ = add_edges(g, [1, 2], [2, 3]) # add edges 1->2 and 2->3
+```
+
+## GPU movement
+
+Move a `GNNGraph` to a CUDA device using `Flux.gpu` method. 
+
+```julia
+using CUDA, Flux
+
+g_gpu = g |> Flux.gpu
+```
+
+## Integration with Graphs.jl
+
+Since `GNNGraph <: Graphs.AbstractGraph`, we can use any functionality from [Graphs.jl](https://github.com/JuliaGraphs/Graphs.jl) for querying and analyzing the graph structure. 
+Moreover, a `GNNGraph` can be easily constructed from a `Graphs.Graph` or a `Graphs.DiGraph`:
+
+```julia
+julia> import Graphs
+
+julia> using GNNGraphs
+
+# A Graphs.jl undirected graph
+julia> gu = Graphs.erdos_renyi(10, 20)    
+{10, 20} undirected simple Int64 graph
+
+# Since GNNGraphs are undirected, the edges are doubled when converting 
+# to GNNGraph
+julia> GNNGraph(gu)
+GNNGraph:
+  num_nodes: 10
+  num_edges: 40
+
+# A Graphs.jl directed graph
+julia> gd = Graphs.erdos_renyi(10, 20, is_directed=true)
+{10, 20} directed simple Int64 graph
+
+julia> GNNGraph(gd)
+GNNGraph:
+  num_nodes: 10
+  num_edges: 20
+```
diff --git a/GraphNeuralNetworks/docs/src/guides/heterograph.md b/GraphNeuralNetworks/docs/src/guides/heterograph.md
new file mode 100644
index 000000000..2347b5844
--- /dev/null
+++ b/GraphNeuralNetworks/docs/src/guides/heterograph.md
@@ -0,0 +1,140 @@
+# Heterogeneous Graphs
+
+Heterogeneous graphs (also called heterographs), are graphs where each node has a type,
+that we denote with symbols such as `:user` and `:movie`.
+Relations such as `:rate` or `:like` can connect nodes of different types. We call a triplet `(source_node_type, relation_type, target_node_type)` the type of a edge, e.g. `(:user, :rate, :movie)`.
+
+Different node/edge types can store different groups of features
+and this makes heterographs a very flexible modeling tools 
+and data containers. In GNNGraphs.jl heterographs are implemented in 
+the type [`GNNHeteroGraph`](@ref).
+
+
+## Creating a Heterograph
+
+A heterograph can be created empty or by passing pairs `edge_type => data` to the constructor.
+```jldoctest
+julia> g = GNNHeteroGraph()
+GNNHeteroGraph:
+  num_nodes: Dict()
+  num_edges: Dict()
+  
+julia> g = GNNHeteroGraph((:user, :like, :actor) => ([1,2,2,3], [1,3,2,9]),
+                          (:user, :rate, :movie) => ([1,1,2,3], [7,13,5,7]))
+GNNHeteroGraph:
+  num_nodes: Dict(:actor => 9, :movie => 13, :user => 3)
+  num_edges: Dict((:user, :like, :actor) => 4, (:user, :rate, :movie) => 4)
+
+julia> g = GNNHeteroGraph((:user, :rate, :movie) => ([1,1,2,3], [7,13,5,7]))
+GNNHeteroGraph:
+  num_nodes: Dict(:movie => 13, :user => 3)
+  num_edges: Dict((:user, :rate, :movie) => 4)
+```
+New relations, possibly with new node types, can be added with the function [`add_edges`](@ref).
+```jldoctest
+julia> g = add_edges(g, (:user, :like, :actor) => ([1,2,3,3,3], [3,5,1,9,4]))
+GNNHeteroGraph:
+  num_nodes: Dict(:actor => 9, :movie => 13, :user => 3)
+  num_edges: Dict((:user, :like, :actor) => 5, (:user, :rate, :movie) => 4)
+```
+See [`rand_heterograph`](@ref), [`rand_bipartite_heterograph`](@ref)
+for generating random heterographs. 
+
+```jldoctest
+julia> g = rand_bipartite_heterograph((10, 15), 20)
+GNNHeteroGraph:
+  num_nodes: Dict(:A => 10, :B => 15)
+  num_edges: Dict((:A, :to, :B) => 20, (:B, :to, :A) => 20)
+```
+
+## Basic Queries
+
+Basic queries are similar to those for homogeneous graphs:
+```jldoctest
+julia> g = GNNHeteroGraph((:user, :rate, :movie) => ([1,1,2,3], [7,13,5,7]))
+GNNHeteroGraph:
+  num_nodes: Dict(:movie => 13, :user => 3)
+  num_edges: Dict((:user, :rate, :movie) => 4)
+
+julia> g.num_nodes
+Dict{Symbol, Int64} with 2 entries:
+  :user  => 3
+  :movie => 13
+
+julia> g.num_edges
+Dict{Tuple{Symbol, Symbol, Symbol}, Int64} with 1 entry:
+  (:user, :rate, :movie) => 4
+
+# source and target node for a given relation
+julia> edge_index(g, (:user, :rate, :movie))
+([1, 1, 2, 3], [7, 13, 5, 7])
+
+# node types
+julia> g.ntypes
+2-element Vector{Symbol}:
+ :user
+ :movie
+
+# edge types
+julia> g.etypes
+1-element Vector{Tuple{Symbol, Symbol, Symbol}}:
+ (:user, :rate, :movie)
+```
+
+## Data Features
+
+Node, edge, and graph features can be added at construction time or later using:
+```jldoctest
+# equivalent to g.ndata[:user][:x] = ...
+julia> g[:user].x = rand(Float32, 64, 3);
+
+julia> g[:movie].z = rand(Float32, 64, 13);
+
+# equivalent to g.edata[(:user, :rate, :movie)][:e] = ...
+julia> g[:user, :rate, :movie].e = rand(Float32, 64, 4);
+
+julia> g
+GNNHeteroGraph:
+  num_nodes: Dict(:movie => 13, :user => 3)
+  num_edges: Dict((:user, :rate, :movie) => 4)
+  ndata:
+        :movie  =>  DataStore(z = [64×13 Matrix{Float32}])
+        :user  =>  DataStore(x = [64×3 Matrix{Float32}])
+  edata:
+        (:user, :rate, :movie)  =>  DataStore(e = [64×4 Matrix{Float32}])
+```
+
+## Batching
+Similarly to graphs, also heterographs can be batched together.
+```jldoctest
+julia> gs = [rand_bipartite_heterograph((5, 10), 20) for _ in 1:32];
+
+julia> Flux.batch(gs)
+GNNHeteroGraph:
+  num_nodes: Dict(:A => 160, :B => 320)
+  num_edges: Dict((:A, :to, :B) => 640, (:B, :to, :A) => 640)
+  num_graphs: 32
+```
+Batching is automatically performed by the [`DataLoader`](https://fluxml.ai/Flux.jl/stable/data/mlutils/#MLUtils.DataLoader) iterator
+when the `collate` option is set to `true`.
+
+```jldoctest
+using Flux: DataLoader
+
+data = [rand_bipartite_heterograph((5, 10), 20, 
+            ndata=Dict(:A=>rand(Float32, 3, 5))) 
+        for _ in 1:320];
+
+train_loader = DataLoader(data, batchsize=16, shuffle=true, collate=true)
+
+for g in train_loader
+    @assert g.num_graphs == 16
+    @assert g.num_nodes[:A] == 80
+    @assert size(g.ndata[:A].x) == (3, 80)    
+    # ...
+end
+```
+
+## Graph convolutions on heterographs
+
+See `HeteroGraphConv` for how to perform convolutions on heterogeneous graphs.
diff --git a/GraphNeuralNetworks/docs/src/guides/messagepassing.md b/GraphNeuralNetworks/docs/src/guides/messagepassing.md
new file mode 100644
index 000000000..776cc0200
--- /dev/null
+++ b/GraphNeuralNetworks/docs/src/guides/messagepassing.md
@@ -0,0 +1,143 @@
+# Message Passing
+
+A generic message passing on graph takes the form
+
+```math
+\begin{aligned}
+\mathbf{m}_{j\to i} &= \phi(\mathbf{x}_i, \mathbf{x}_j, \mathbf{e}_{j\to i}) \\
+\bar{\mathbf{m}}_{i} &= \square_{j\in N(i)}  \mathbf{m}_{j\to i} \\
+\mathbf{x}_{i}' &= \gamma_x(\mathbf{x}_{i}, \bar{\mathbf{m}}_{i})\\
+\mathbf{e}_{j\to i}^\prime &=  \gamma_e(\mathbf{e}_{j \to i},\mathbf{m}_{j \to i})
+\end{aligned}
+```
+
+where we refer to ``\phi`` as to the message function, 
+and to ``\gamma_x`` and ``\gamma_e`` as to the node update and edge update function
+respectively. The aggregation ``\square`` is over the neighborhood ``N(i)`` of node ``i``, 
+and it is usually equal either to ``\sum``, to `max` or to a `mean` operation. 
+
+In GNNlib.jl, the message passing mechanism is exposed by the [`propagate`](@ref) function.
+[`propagate`](@ref) takes care of materializing the node features on each edge, applying the message function, performing the
+aggregation, and returning ``\bar{\mathbf{m}}``. 
+It is then left to the user to perform further node and edge updates,
+manipulating arrays of size ``D_{node} \times num\_nodes`` and   
+``D_{edge} \times num\_edges``.
+
+[`propagate`](@ref) is composed of two steps, also available as two independent methods:
+
+1. [`apply_edges`](@ref) materializes node features on edges and applies the message function. 
+2. [`aggregate_neighbors`](@ref) applies a reduction operator on the messages coming from the neighborhood of each node.
+
+The whole propagation mechanism internally relies on the [`NNlib.gather`](@ref) 
+and [`NNlib.scatter`](@ref) methods.
+
+
+## Examples
+
+### Basic use of apply_edges and propagate
+
+The function [`apply_edges`](@ref) can be used to broadcast node data
+on each edge and produce new edge data.
+```julia
+julia> using GNNlib, Graphs, Statistics
+
+julia> g = rand_graph(10, 20)
+GNNGraph:
+    num_nodes = 10
+    num_edges = 20
+
+julia> x = ones(2,10);
+
+julia> z = 2ones(2,10);
+
+# Return an edge features arrays (D × num_edges)
+julia> apply_edges((xi, xj, e) -> xi .+ xj, g, xi=x, xj=z)
+2×20 Matrix{Float64}:
+ 3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0
+ 3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0  3.0
+
+# now returning a named tuple
+julia> apply_edges((xi, xj, e) -> (a=xi .+ xj, b=xi .- xj), g, xi=x, xj=z)
+(a = [3.0 3.0 … 3.0 3.0; 3.0 3.0 … 3.0 3.0], b = [-1.0 -1.0 … -1.0 -1.0; -1.0 -1.0 … -1.0 -1.0])
+
+# Here we provide a named tuple input
+julia> apply_edges((xi, xj, e) -> xi.a + xi.b .* xj, g, xi=(a=x,b=z), xj=z)
+2×20 Matrix{Float64}:
+ 5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0
+ 5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0  5.0
+```
+
+The function [`propagate`](@ref) instead performs the [`apply_edges`](@ref) operation
+but then also applies a reduction over each node's neighborhood (see [`aggregate_neighbors`](@ref)).
+```julia
+julia> propagate((xi, xj, e) -> xi .+ xj, g, +, xi=x, xj=z)
+2×10 Matrix{Float64}:
+ 3.0  6.0  9.0  9.0  0.0  6.0  6.0  3.0  15.0  3.0
+ 3.0  6.0  9.0  9.0  0.0  6.0  6.0  3.0  15.0  3.0
+
+# Previous output can be understood by looking at the degree
+julia> degree(g)
+10-element Vector{Int64}:
+ 1
+ 2
+ 3
+ 3
+ 0
+ 2
+ 2
+ 1
+ 5
+ 1
+```
+
+### Implementing a custom Graph Convolutional Layer using Flux.jl
+
+Let's implement a simple graph convolutional layer using the message passing framework using the machine learning framework Flux.jl.
+The convolution reads 
+
+```math
+\mathbf{x}'_i = W \cdot \sum_{j \in N(i)}  \mathbf{x}_j
+```
+We will also add a bias and an activation function.
+
+```julia
+using Flux, Graphs, GraphNeuralNetworks
+
+struct GCN{A<:AbstractMatrix, B, F} <: GNNLayer
+    weight::A
+    bias::B
+    σ::F
+end
+
+Flux.@layer GCN # allow gpu movement, select trainable params etc...
+
+function GCN(ch::Pair{Int,Int}, σ=identity)
+    in, out = ch
+    W = Flux.glorot_uniform(out, in)
+    b = zeros(Float32, out)
+    GCN(W, b, σ)
+end
+
+function (l::GCN)(g::GNNGraph, x::AbstractMatrix{T}) where T
+    @assert size(x, 2) == g.num_nodes
+
+    # Computes messages from source/neighbour nodes (j) to target/root nodes (i).
+    # The message function will have to handle matrices of size (*, num_edges).
+    # In this simple case we just let the neighbor features go through.
+    message(xi, xj, e) = xj 
+
+    # The + operator gives the sum aggregation.
+    # `mean`, `max`, `min`, and `*` are other possibilities.
+    x = propagate(message, g, +, xj=x) 
+
+    return l.σ.(l.weight * x .+ l.bias)
+end
+```
+
+See the `GATConv` implementation [here](https://juliagraphs.org/GraphNeuralNetworks.jl/graphneuralnetworks/api/conv/) for a more complex example.
+
+
+## Built-in message functions
+
+In order to exploit optimized specializations of the [`propagate`](@ref), it is recommended 
+to use built-in message functions such as [`copy_xj`](@ref) whenever possible. 
diff --git a/GraphNeuralNetworks/docs/src/models.md b/GraphNeuralNetworks/docs/src/guides/models.md
similarity index 100%
rename from GraphNeuralNetworks/docs/src/models.md
rename to GraphNeuralNetworks/docs/src/guides/models.md
diff --git a/GraphNeuralNetworks/docs/src/guides/temporalgraph.md b/GraphNeuralNetworks/docs/src/guides/temporalgraph.md
new file mode 100644
index 000000000..560cfa8d6
--- /dev/null
+++ b/GraphNeuralNetworks/docs/src/guides/temporalgraph.md
@@ -0,0 +1,145 @@
+# Temporal Graphs
+
+Temporal Graphs are graphs with time varying topologies and  features. In GNNGraphs.jl, temporal graphs with fixed number of nodes over time are supported by the [`TemporalSnapshotsGNNGraph`](@ref) type.
+
+## Creating a TemporalSnapshotsGNNGraph
+
+A temporal graph can be created by passing a list of snapshots to the constructor. Each snapshot is a [`GNNGraph`](@ref). 
+
+```jldoctest
+julia> snapshots = [rand_graph(10,20) for i in 1:5];
+
+julia> tg = TemporalSnapshotsGNNGraph(snapshots)
+TemporalSnapshotsGNNGraph:
+  num_nodes: [10, 10, 10, 10, 10]
+  num_edges: [20, 20, 20, 20, 20]
+  num_snapshots: 5
+```
+
+A new temporal graph can be created by adding or removing snapshots to an existing temporal graph. 
+
+```jldoctest
+julia> new_tg = add_snapshot(tg, 3, rand_graph(10, 16)) # add a new snapshot at time 3
+TemporalSnapshotsGNNGraph:
+  num_nodes: [10, 10, 10, 10, 10, 10]
+  num_edges: [20, 20, 16, 20, 20, 20]
+  num_snapshots: 6
+```
+```jldoctest
+julia> snapshots = [rand_graph(10,20), rand_graph(10,14), rand_graph(10,22)];
+
+julia> tg = TemporalSnapshotsGNNGraph(snapshots)
+TemporalSnapshotsGNNGraph:
+  num_nodes: [10, 10, 10]
+  num_edges: [20, 14, 22]
+  num_snapshots: 3
+
+julia> new_tg = remove_snapshot(tg, 2) # remove snapshot at time 2
+TemporalSnapshotsGNNGraph:
+  num_nodes: [10, 10]
+  num_edges: [20, 22]
+  num_snapshots: 2
+```
+
+See [`rand_temporal_radius_graph`](@ref) and [`rand_temporal_hyperbolic_graph`](@ref) for generating random temporal graphs. 
+
+```jldoctest
+julia> tg = rand_temporal_radius_graph(10, 3, 0.1, 0.5)
+TemporalSnapshotsGNNGraph:
+  num_nodes: [10, 10, 10]
+  num_edges: [32, 30, 34]
+  num_snapshots: 3
+``` 
+
+## Basic Queries
+
+Basic queries are similar to those for [`GNNGraph`](@ref)s:
+```jldoctest
+julia> snapshots = [rand_graph(10,20), rand_graph(10,14), rand_graph(10,22)];
+
+julia> tg = TemporalSnapshotsGNNGraph(snapshots)
+TemporalSnapshotsGNNGraph:
+  num_nodes: [10, 10, 10]
+  num_edges: [20, 14, 22]
+  num_snapshots: 3
+
+julia> tg.num_nodes         # number of nodes in each snapshot
+3-element Vector{Int64}:
+ 10
+ 10
+ 10
+
+julia> tg.num_edges         # number of edges in each snapshot
+3-element Vector{Int64}:
+ 20
+ 14
+ 22
+
+julia> tg.num_snapshots     # number of snapshots
+3
+
+julia> tg.snapshots         # list of snapshots
+3-element Vector{GNNGraph{Tuple{Vector{Int64}, Vector{Int64}, Nothing}}}:
+ GNNGraph(10, 20) with no data
+ GNNGraph(10, 14) with no data
+ GNNGraph(10, 22) with no data
+
+julia> tg.snapshots[1]      # first snapshot, same as tg[1]
+GNNGraph:
+  num_nodes: 10
+  num_edges: 20
+```
+
+## Data Features
+A temporal graph can store global feature for the entire time series in the `tgdata` filed.
+Also, each snapshot can store node, edge, and graph features in the `ndata`, `edata`, and `gdata` fields, respectively. 
+
+```jldoctest
+julia> snapshots = [rand_graph(10,20; ndata = rand(3,10)), rand_graph(10,14; ndata = rand(4,10)), rand_graph(10,22; ndata = rand(5,10))]; # node features at construction time
+
+julia> tg = TemporalSnapshotsGNNGraph(snapshots);
+
+julia> tg.tgdata.y = rand(3,1); # add global features after construction
+
+julia> tg
+TemporalSnapshotsGNNGraph:
+  num_nodes: [10, 10, 10]
+  num_edges: [20, 14, 22]
+  num_snapshots: 3
+  tgdata:
+        y = 3×1 Matrix{Float64}
+
+julia> tg.ndata # vector of DataStore containing node features for each snapshot
+3-element Vector{DataStore}:
+ DataStore(10) with 1 element:
+  x = 3×10 Matrix{Float64}
+ DataStore(10) with 1 element:
+  x = 4×10 Matrix{Float64}
+ DataStore(10) with 1 element:
+  x = 5×10 Matrix{Float64}
+
+julia> [ds.x for ds in tg.ndata]; # vector containing the x feature of each snapshot
+
+julia> [g.x for g in tg.snapshots]; # same vector as above, now accessing 
+                                   # the x feature directly from the snapshots
+```
+
+## Graph convolutions on TemporalSnapshotsGNNGraph
+
+A graph convolutional layer can be applied to each snapshot independently, in the next example we apply a `GINConv` layer to each snapshot of a `TemporalSnapshotsGNNGraph`.  
+
+```jldoctest
+julia> using GNNGraphs, Flux
+
+julia> snapshots = [rand_graph(10, 20; ndata = rand(3, 10)), rand_graph(10, 14; ndata = rand(3, 10))];
+
+julia> tg = TemporalSnapshotsGNNGraph(snapshots);
+
+julia> m = GINConv(Dense(3 => 1), 0.4);
+
+julia> output = m(tg, tg.ndata.x);
+
+julia> size(output[1])
+(1, 10)
+```
+