Skip to content

Commit

Permalink
Implement NGCF (#529)
Browse files Browse the repository at this point in the history
* Generated model base from LightGCN

* wip

* wip example

* add self-connection

* refactor code

* added sanity check

* Changed train batch size in example to 1024

* Updated readme for example folder

* Update Readme

* update docs

* Update block comment

---------

Co-authored-by: tqtg <[email protected]>
  • Loading branch information
darrylong and tqtg authored Oct 13, 2023
1 parent 5747077 commit 1044354
Show file tree
Hide file tree
Showing 9 changed files with 533 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ The recommender models supported by Cornac are listed below. Why don't you join
| | [Hybrid neural recommendation with joint deep representation learning of ratings and reviews (HRDR)](cornac/models/hrdr), [paper](https://www.sciencedirect.com/science/article/abs/pii/S0925231219313207) | [requirements.txt](cornac/models/hrdr/requirements.txt) | [hrdr_example.py](examples/hrdr_example.py)
| | [LightGCN: Simplifying and Powering Graph Convolution Network for Recommendation](cornac/models/lightgcn), [paper](https://arxiv.org/pdf/2002.02126.pdf) | [requirements.txt](cornac/models/lightgcn/requirements.txt) | [lightgcn_example.py](examples/lightgcn_example.py)
| 2019 | [Embarrassingly Shallow Autoencoders for Sparse Data (EASEᴿ)](cornac/models/ease), [paper](https://arxiv.org/pdf/1905.03375.pdf) | N/A | [ease_movielens.py](examples/ease_movielens.py)
| | [Neural Graph Collaborative Filtering](cornac/models/ngcf), [paper](https://arxiv.org/pdf/1905.08108.pdf) | [requirements.txt](cornac/models/ngcf/requirements.txt) | [ngcf_example.py](examples/ngcf_example.py)
| 2018 | [Collaborative Context Poisson Factorization (C2PF)](cornac/models/c2pf), [paper](https://www.ijcai.org/proceedings/2018/0370.pdf) | N/A | [c2pf_exp.py](examples/c2pf_example.py)
| | [Graph Convolutional Matrix Completion (GCMC)](cornac/models/gcmc), [paper](https://www.kdd.org/kdd2018/files/deep-learning-day/DLDay18_paper_32.pdf) | [requirements.txt](cornac/models/gcmc/requirements.txt) | [gcmc_example.py](examples/gcmc_example.py)
| | [Multi-Task Explainable Recommendation (MTER)](cornac/models/mter), [paper](https://arxiv.org/pdf/1806.03568.pdf) | N/A | [mter_exp.py](examples/mter_example.py)
Expand Down
1 change: 1 addition & 0 deletions cornac/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
from .ncf import GMF
from .ncf import MLP
from .ncf import NeuMF
from .ngcf import NGCF
from .nmf import NMF
from .online_ibpr import OnlineIBPR
from .pcrl import PCRL
Expand Down
16 changes: 16 additions & 0 deletions cornac/models/ngcf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright 2018 The Cornac Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

from .recom_ngcf import NGCF
185 changes: 185 additions & 0 deletions cornac/models/ngcf/ngcf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# Reference: https://github.com/dmlc/dgl/blob/master/examples/pytorch/NGCF/NGCF/model.py

import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
import dgl.function as fn


USER_KEY = "user"
ITEM_KEY = "item"


def construct_graph(data_set):
"""
Generates graph given a cornac data set
Parameters
----------
data_set : cornac.data.dataset.Dataset
The data set as provided by cornac
"""
user_indices, item_indices, _ = data_set.uir_tuple

# construct graph from the train data and add self-loops
user_selfs = [i for i in range(data_set.total_users)]
item_selfs = [i for i in range(data_set.total_items)]

data_dict = {
(USER_KEY, "user_self", USER_KEY): (user_selfs, user_selfs),
(ITEM_KEY, "item_self", ITEM_KEY): (item_selfs, item_selfs),
(USER_KEY, "user_item", ITEM_KEY): (user_indices, item_indices),
(ITEM_KEY, "item_user", USER_KEY): (item_indices, user_indices),
}
num_dict = {USER_KEY: data_set.total_users, ITEM_KEY: data_set.total_items}

return dgl.heterograph(data_dict, num_nodes_dict=num_dict)


class NGCFLayer(nn.Module):
def __init__(self, in_size, out_size, norm_dict, dropout):
super(NGCFLayer, self).__init__()
self.in_size = in_size
self.out_size = out_size

# weights for different types of messages
self.W1 = nn.Linear(in_size, out_size, bias=True)
self.W2 = nn.Linear(in_size, out_size, bias=True)

# leaky relu
self.leaky_relu = nn.LeakyReLU(0.2)

# dropout layer
self.dropout = nn.Dropout(dropout)

# initialization
torch.nn.init.xavier_uniform_(self.W1.weight)
torch.nn.init.constant_(self.W1.bias, 0)
torch.nn.init.xavier_uniform_(self.W2.weight)
torch.nn.init.constant_(self.W2.bias, 0)

# norm
self.norm_dict = norm_dict

def forward(self, g, feat_dict):
funcs = {} # message and reduce functions dict
# for each type of edges, compute messages and reduce them all
for srctype, etype, dsttype in g.canonical_etypes:
if srctype == dsttype: # for self loops
messages = self.W1(feat_dict[srctype])
g.nodes[srctype].data[etype] = messages # store in ndata
funcs[(srctype, etype, dsttype)] = (
fn.copy_u(etype, "m"),
fn.sum("m", "h"),
) # define message and reduce functions
else:
src, dst = g.edges(etype=(srctype, etype, dsttype))
norm = self.norm_dict[(srctype, etype, dsttype)]
messages = norm * (
self.W1(feat_dict[srctype][src])
+ self.W2(feat_dict[srctype][src] * feat_dict[dsttype][dst])
) # compute messages
g.edges[(srctype, etype, dsttype)].data[
etype
] = messages # store in edata
funcs[(srctype, etype, dsttype)] = (
fn.copy_e(etype, "m"),
fn.sum("m", "h"),
) # define message and reduce functions

g.multi_update_all(
funcs, "sum"
) # update all, reduce by first type-wisely then across different types
feature_dict = {}
for ntype in g.ntypes:
h = self.leaky_relu(g.nodes[ntype].data["h"]) # leaky relu
h = self.dropout(h) # dropout
h = F.normalize(h, dim=1, p=2) # l2 normalize
feature_dict[ntype] = h
return feature_dict


class Model(nn.Module):
def __init__(self, g, in_size, layer_sizes, dropout_rates, lambda_reg, device=None):
super(Model, self).__init__()
self.norm_dict = dict()
self.lambda_reg = lambda_reg
self.device = device

for srctype, etype, dsttype in g.canonical_etypes:
src, dst = g.edges(etype=(srctype, etype, dsttype))
dst_degree = g.in_degrees(
dst, etype=(srctype, etype, dsttype)
).float() # obtain degrees
src_degree = g.out_degrees(src, etype=(srctype, etype, dsttype)).float()
norm = torch.pow(src_degree * dst_degree, -0.5).unsqueeze(1) # compute norm
self.norm_dict[(srctype, etype, dsttype)] = norm

self.layers = nn.ModuleList()

# sanity check, just to ensure layer sizes and dropout_rates have the same size
assert len(layer_sizes) == len(dropout_rates), "'layer_sizes' and " \
"'dropout_rates' must be of the same size"

self.layers.append(
NGCFLayer(in_size, layer_sizes[0], self.norm_dict, dropout_rates[0])
)
self.num_layers = len(layer_sizes)
for i in range(self.num_layers - 1):
self.layers.append(
NGCFLayer(
layer_sizes[i],
layer_sizes[i + 1],
self.norm_dict,
dropout_rates[i + 1],
)
)
self.initializer = nn.init.xavier_uniform_

# embeddings for different types of nodes
self.feature_dict = nn.ParameterDict(
{
ntype: nn.Parameter(
self.initializer(torch.empty(g.num_nodes(ntype), in_size))
)
for ntype in g.ntypes
}
)

def forward(self, g, users=None, pos_items=None, neg_items=None):
h_dict = {ntype: self.feature_dict[ntype] for ntype in g.ntypes}
# obtain features of each layer and concatenate them all
user_embeds = []
item_embeds = []
user_embeds.append(h_dict[USER_KEY])
item_embeds.append(h_dict[ITEM_KEY])
for layer in self.layers:
h_dict = layer(g, h_dict)
user_embeds.append(h_dict[USER_KEY])
item_embeds.append(h_dict[ITEM_KEY])
user_embd = torch.cat(user_embeds, 1)
item_embd = torch.cat(item_embeds, 1)

u_g_embeddings = user_embd if users is None else user_embd[users, :]
pos_i_g_embeddings = item_embd if pos_items is None else item_embd[pos_items, :]
neg_i_g_embeddings = item_embd if neg_items is None else item_embd[neg_items, :]

return u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings

def loss_fn(self, users, pos_items, neg_items):
pos_scores = (users * pos_items).sum(1)
neg_scores = (users * neg_items).sum(1)

bpr_loss = F.softplus(neg_scores - pos_scores).mean()
reg_loss = (
(1 / 2)
* (
torch.norm(users) ** 2
+ torch.norm(pos_items) ** 2
+ torch.norm(neg_items) ** 2
)
/ len(users)
)

return bpr_loss + self.lambda_reg * reg_loss, bpr_loss, reg_loss
Loading

0 comments on commit 1044354

Please sign in to comment.