diff --git a/.gitignore b/.gitignore index 5ff0a0ffa..270230400 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,9 @@ __pycache__/ # C extensions *.so +cornac/models/*/*.cpp +cornac/models/*/cython/*.cpp +cornac/utils/*.cpp # Distribution / packaging bin/ diff --git a/cornac/data/dataset.py b/cornac/data/dataset.py index 59deb9e09..5021b0335 100644 --- a/cornac/data/dataset.py +++ b/cornac/data/dataset.py @@ -98,8 +98,6 @@ def __init__( self.min_rating = np.min(r_values) self.global_mean = np.mean(r_values) - self.__total_users = None - self.__total_items = None self.__user_ids = None self.__item_ids = None @@ -111,28 +109,6 @@ def __init__( self.__csc_matrix = None self.__dok_matrix = None - @property - def total_users(self): - """Total number of users including test and validation users if exists""" - return self.__total_users if self.__total_users is not None else self.num_users - - @total_users.setter - def total_users(self, input_value): - """Set total number of users for the dataset""" - assert input_value >= self.num_users - self.__total_users = input_value - - @property - def total_items(self): - """Total number of items including test and validation items if exists""" - return self.__total_items if self.__total_items is not None else self.num_items - - @total_items.setter - def total_items(self, input_value): - """Set total number of items for the dataset""" - assert input_value >= self.num_items - self.__total_items = input_value - @property def user_ids(self): """Return the list of raw user ids""" diff --git a/cornac/eval_methods/base_method.py b/cornac/eval_methods/base_method.py index 0bc29f683..e6cd47e47 100644 --- a/cornac/eval_methods/base_method.py +++ b/cornac/eval_methods/base_method.py @@ -540,9 +540,6 @@ def _build_datasets(self, train_data, test_data, val_data=None): print("Total users = {}".format(self.total_users)) print("Total items = {}".format(self.total_items)) - self.train_set.total_users = self.total_users - self.train_set.total_items = self.total_items - def _build_modalities(self): for user_modality in [ self.user_feature, diff --git a/cornac/models/causalrec/recom_causalrec.py b/cornac/models/causalrec/recom_causalrec.py index dd89d7ea8..bc47ac425 100644 --- a/cornac/models/causalrec/recom_causalrec.py +++ b/cornac/models/causalrec/recom_causalrec.py @@ -185,8 +185,8 @@ def fit(self, train_set, val_set=None): train_features = train_set.item_image.features[: self.total_items] train_features = train_features.astype(np.float32) self._init( - n_users=train_set.total_users, - n_items=train_set.total_items, + n_users=self.total_users, + n_items=self.total_items, features=train_features, ) diff --git a/cornac/models/fm/recom_fm.pyx b/cornac/models/fm/recom_fm.pyx index 0898a25b3..a6cf50683 100644 --- a/cornac/models/fm/recom_fm.pyx +++ b/cornac/models/fm/recom_fm.pyx @@ -265,7 +265,7 @@ class FM(Recommender): (uid, iid, val) = train_set.uir_tuple cdef Data *train = _prepare_data( uid, - iid + train_set.total_users, + iid + self.total_users, val.astype(np.float32), num_feature, self.method in ["als", "mcmc"], diff --git a/cornac/models/gcmc/gcmc.py b/cornac/models/gcmc/gcmc.py index d7ad73a38..21228d578 100644 --- a/cornac/models/gcmc/gcmc.py +++ b/cornac/models/gcmc/gcmc.py @@ -13,165 +13,14 @@ from .utils import get_optimizer, torch_net_info, torch_total_param_num -def _apply_support(graph, rating_values, data_set, symm=True): - """Adds graph support. Returns DGLGraph.""" - - def _calc_norm(val): - val = val.numpy().astype("float32") - val[val == 0.0] = np.inf - val = torch.FloatTensor(1.0 / np.sqrt(val)) - return val.unsqueeze(1) - - n_users, n_items = data_set.total_users, data_set.total_items - - user_ci = [] - user_cj = [] - item_ci = [] - item_cj = [] - - for rating in rating_values: - rating = str(rating).replace(".", "_") - user_ci.append(graph[f"rev-{rating}"].in_degrees()) - item_ci.append(graph[rating].in_degrees()) - - if symm: - user_cj.append(graph[rating].out_degrees()) - item_cj.append(graph[f"rev-{rating}"].out_degrees()) - else: - user_cj.append(torch.zeros((n_users,))) - item_cj.append(torch.zeros((n_items,))) - user_ci = _calc_norm(sum(user_ci)) - item_ci = _calc_norm(sum(item_ci)) - if symm: - user_cj = _calc_norm(sum(user_cj)) - item_cj = _calc_norm(sum(item_cj)) - else: - user_cj = torch.ones( - n_users, - ) - item_cj = torch.ones( - n_items, - ) - graph.nodes["user"].data.update({"ci": user_ci, "cj": user_cj}) - graph.nodes["item"].data.update({"ci": item_ci, "cj": item_cj}) - - return graph - - -def _generate_enc_graph(data_set, add_support=False): - """ - Generates encoding graph given a cornac data set - - Parameters - ---------- - data_set : cornac.data.dataset.Dataset - The data set as provided by cornac - add_support : bool, optional - """ - data_dict = {} - num_nodes_dict = {"user": data_set.total_users, "item": data_set.total_items} - rating_row, rating_col, rating_values = data_set.uir_tuple - for rating in set(rating_values): - ridx = np.where(rating_values == rating) - rrow = rating_row[ridx] - rcol = rating_col[ridx] - rating = str(rating).replace(".", "_") - data_dict.update( - { - ("user", str(rating), "item"): (rrow, rcol), - ("item", f"rev-{str(rating)}", "user"): (rcol, rrow), - } - ) - - graph = dgl.heterograph(data_dict, num_nodes_dict=num_nodes_dict) - - # sanity check - assert ( - len(data_set.uir_tuple[2]) - == sum([graph.num_edges(et) for et in graph.etypes]) // 2 - ) - - if add_support: - graph = _apply_support( - graph=graph, - rating_values=np.unique(rating_values), - data_set=data_set, - ) - - return graph - - -def _generate_dec_graph(data_set): - """ - Generates decoding graph given a cornac data set - - Parameters - ---------- - data_set : cornac.data.dataset.Dataset - The data set as provided by cornac - - Returns - ------- - graph : dgl.heterograph - Heterograph containing user-item edges and nodes - """ - rating_pairs = data_set.uir_tuple[:2] - ones = np.ones_like(rating_pairs[0]) - user_item_ratings_coo = sp.coo_matrix( - (ones, rating_pairs), - shape=(data_set.total_users, data_set.total_items), - dtype=np.float32, - ) - - graph = dgl.bipartite_from_scipy( - user_item_ratings_coo, utype="_U", etype="_E", vtype="_V" - ) - - return dgl.heterograph( - {("user", "rate", "item"): graph.edges()}, - num_nodes_dict={"user": data_set.total_users, "item": data_set.total_items}, - ) - - -def _generate_test_user_graph(user_idx, total_users, total_items): - """ - Generates decoding graph given a cornac data set - - Parameters - ---------- - data_set : cornac.data.dataset.Dataset - The data set as provided by cornac - - Returns - ------- - graph : dgl.heterograph - Heterograph containing user-item edges and nodes - """ - u_list = np.array([user_idx for _ in range(total_items)]) - i_list = np.array([item_idx for item_idx in range(total_items)]) - - rating_pairs = (u_list, i_list) - ones = np.ones_like(rating_pairs[0]) - user_item_ratings_coo = sp.coo_matrix( - (ones, rating_pairs), - shape=(total_users, total_items), - dtype=np.float32, - ) - - graph = dgl.bipartite_from_scipy( - user_item_ratings_coo, utype="_U", etype="_E", vtype="_V" - ) - - return dgl.heterograph( - {("user", "rate", "item"): graph.edges()}, - num_nodes_dict={"user": total_users, "item": total_items}, - ) - class Model: def __init__( self, - activation_model, + rating_values, + total_users, + total_items, + activation_func, gcn_agg_units, gcn_out_units, gcn_dropout, @@ -181,14 +30,9 @@ def __init__( verbose, seed, ): - self.activation_model = activation_model - self.gcn_agg_units = gcn_agg_units - self.gcn_out_units = gcn_out_units - self.gcn_dropout = gcn_dropout - self.gcn_agg_accum = gcn_agg_accum - self.share_param = share_param - self.gen_r_num_basis_func = gen_r_num_basis_func - + self.rating_values = rating_values + self.total_users = total_users + self.total_items = total_items self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.verbose = verbose @@ -201,92 +45,180 @@ def __init__( if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) - def train( - self, - train_set, - val_set, - max_iter, - learning_rate, - optimizer, - train_grad_clip, - train_valid_interval, - train_early_stopping_patience, - train_min_learning_rate, - train_decay_patience, - train_lr_decay_factor, - ): - # Prepare data for training - ( - rating_values, - nd_positive_rating_values, - train_dec_graph, - valid_enc_graph, - valid_dec_graph, - train_labels, - train_truths, - valid_truths, - ) = self._prepare_data(train_set, val_set) - # Build Net self.net = NeuralNetwork( - self.activation_model, + activation_func, rating_values, - train_set.total_users, - train_set.total_items, - self.gcn_agg_units, - self.gcn_out_units, - self.gcn_dropout, - self.gcn_agg_accum, - self.gen_r_num_basis_func, - self.share_param, + total_users, + total_items, + gcn_agg_units, + gcn_out_units, + gcn_dropout, + gcn_agg_accum, + gen_r_num_basis_func, + share_param, self.device, ).to(self.device) - optimizer = get_optimizer(optimizer)(self.net.parameters(), lr=learning_rate) - rating_loss_net = nn.CrossEntropyLoss() + def _apply_support(self, graph, symm=True): + """Adds graph support. Returns DGLGraph.""" + + def _calc_norm(val): + val = val.numpy().astype("float32") + val[val == 0.0] = np.inf + val = torch.FloatTensor(1.0 / np.sqrt(val)) + return val.unsqueeze(1) + + user_ci = [] + user_cj = [] + item_ci = [] + item_cj = [] + + for rating in self.rating_values: + rating = str(rating).replace(".", "_") + user_ci.append(graph[f"rev-{rating}"].in_degrees()) + item_ci.append(graph[rating].in_degrees()) + + if symm: + user_cj.append(graph[rating].out_degrees()) + item_cj.append(graph[f"rev-{rating}"].out_degrees()) + else: + user_cj.append(torch.zeros((self.total_users,))) + item_cj.append(torch.zeros((self.total_items,))) + user_ci = _calc_norm(sum(user_ci)) + item_ci = _calc_norm(sum(item_ci)) + if symm: + user_cj = _calc_norm(sum(user_cj)) + item_cj = _calc_norm(sum(item_cj)) + else: + user_cj = torch.ones(self.total_users,) + item_cj = torch.ones(self.total_items,) + graph.nodes["user"].data.update({"ci": user_ci, "cj": user_cj}) + graph.nodes["item"].data.update({"ci": item_ci, "cj": item_cj}) - self._train_model( - rating_values, - train_dec_graph, - valid_enc_graph, - valid_dec_graph, - train_labels, - train_truths, - valid_truths, - nd_positive_rating_values, - rating_loss_net, - max_iter, - optimizer, - learning_rate, - train_grad_clip, - train_valid_interval, - train_early_stopping_patience, - train_min_learning_rate, - train_decay_patience, - train_lr_decay_factor, + return graph + + + def _generate_enc_graph(self, data_set, add_support=False): + """ + Generates encoding graph given a cornac data set + + Parameters + ---------- + data_set : cornac.data.dataset.Dataset + The data set as provided by cornac + + add_support : bool, optional + """ + data_dict = {} + num_nodes_dict = {"user": self.total_users, "item": self.total_items} + rating_row, rating_col, rating_values = data_set.uir_tuple + for rating in set(rating_values): + ridx = np.where(rating_values == rating) + rrow = rating_row[ridx] + rcol = rating_col[ridx] + rating = str(rating).replace(".", "_") + data_dict.update( + { + ("user", str(rating), "item"): (rrow, rcol), + ("item", f"rev-{str(rating)}", "user"): (rcol, rrow), + } + ) + + graph = dgl.heterograph(data_dict, num_nodes_dict=num_nodes_dict) + + # sanity check + assert ( + len(data_set.uir_tuple[2]) + == sum([graph.num_edges(et) for et in graph.etypes]) // 2 + ) + + graph = self._apply_support(graph) if add_support else graph + + return graph + + + def _generate_dec_graph(self, data_set): + """ + Generates decoding graph given a cornac data set + + Parameters + ---------- + data_set : cornac.data.dataset.Dataset + The data set as provided by cornac + + Returns + ------- + graph : dgl.heterograph + Heterograph containing user-item edges and nodes + """ + user_nodes, item_nodes, ratings = data_set.uir_tuple + user_item_ratings_coo = sp.coo_matrix( + (np.ones_like(ratings), (user_nodes, item_nodes)), + shape=(self.total_users, self.total_items), + dtype=np.float32, + ) + + graph = dgl.bipartite_from_scipy( + user_item_ratings_coo, utype="_U", etype="_E", vtype="_V" + ) + + return dgl.heterograph( + {("user", "rate", "item"): graph.edges()}, + num_nodes_dict={"user": self.total_users, "item": self.total_items}, + ) + + + def _generate_test_user_graph(self, user_idx): + """ + Generates decoding graph given a cornac data set + + Parameters + ---------- + data_set : cornac.data.dataset.Dataset + The data set as provided by cornac + + Returns + ------- + graph : dgl.heterograph + Heterograph containing user-item edges and nodes + """ + item_nodes = np.arange(self.total_items) + user_nodes = np.full_like(item_nodes, user_idx) + user_item_ratings_coo = sp.coo_matrix( + (np.ones_like(user_nodes), (user_nodes, item_nodes)), + shape=(self.total_users, self.total_items), + dtype=np.float32, + ) + + graph = dgl.bipartite_from_scipy( + user_item_ratings_coo, utype="_U", etype="_E", vtype="_V" + ) + + return dgl.heterograph( + {("user", "rate", "item"): graph.edges()}, + num_nodes_dict={"user": self.total_users, "item": self.total_items}, ) def _prepare_data(self, train_set, val_set): - rating_values = train_set.uir_tuple[2] # rating list - rating_values = np.unique(rating_values) - nd_positive_rating_values = torch.FloatTensor(rating_values).to(self.device) + nd_positive_rating_values = torch.FloatTensor(self.rating_values).to(self.device) # Prepare Data def _generate_labels(ratings): - labels = torch.LongTensor(np.searchsorted(rating_values, ratings)).to( + labels = torch.LongTensor(np.searchsorted(self.rating_values, ratings)).to( self.device ) return labels - self.train_enc_graph = _generate_enc_graph(train_set, add_support=True) - train_dec_graph = _generate_dec_graph(train_set) + self.train_enc_graph = self._generate_enc_graph(train_set, add_support=True) + train_dec_graph = self._generate_dec_graph(train_set) train_labels = _generate_labels(train_set.uir_tuple[2]) train_truths = torch.FloatTensor(train_set.uir_tuple[2]).to(self.device) def _count_pairs(graph): pair_count = 0 - for r_val in rating_values: + for r_val in self.rating_values: r_val = str(r_val).replace(".", "_") pair_count += graph.num_edges(str(r_val)) return pair_count @@ -307,7 +239,7 @@ def _count_pairs(graph): valid_enc_graph = self.train_enc_graph if val_set: - valid_dec_graph = _generate_dec_graph(val_set) + valid_dec_graph = self._generate_dec_graph(val_set) valid_truths = torch.FloatTensor(val_set.uir_tuple[2]).to(self.device) logging.info( "Valid enc graph: %s users, %s items, %s pairs", @@ -325,7 +257,6 @@ def _count_pairs(graph): valid_dec_graph = None return ( - rating_values, nd_positive_rating_values, train_dec_graph, valid_enc_graph, @@ -476,6 +407,56 @@ def _train_model( # load best model self.net.load_state_dict(best_model_state_dict) + + def train( + self, + train_set, + val_set, + max_iter, + learning_rate, + optimizer, + train_grad_clip, + train_valid_interval, + train_early_stopping_patience, + train_min_learning_rate, + train_decay_patience, + train_lr_decay_factor, + ): + # Prepare data for training + ( + nd_positive_rating_values, + train_dec_graph, + valid_enc_graph, + valid_dec_graph, + train_labels, + train_truths, + valid_truths, + ) = self._prepare_data(train_set, val_set) + + optimizer = get_optimizer(optimizer)(self.net.parameters(), lr=learning_rate) + rating_loss_net = nn.CrossEntropyLoss() + + self._train_model( + self.rating_values, + train_dec_graph, + valid_enc_graph, + valid_dec_graph, + train_labels, + train_truths, + valid_truths, + nd_positive_rating_values, + rating_loss_net, + max_iter, + optimizer, + learning_rate, + train_grad_clip, + train_valid_interval, + train_early_stopping_patience, + train_min_learning_rate, + train_decay_patience, + train_lr_decay_factor, + ) + def predict(self, test_set): """ @@ -494,7 +475,7 @@ def predict(self, test_set): Dictionary containing '{user_idx}-{item_idx}' as key and {score} as value. """ - test_dec_graph = _generate_dec_graph(test_set) + test_dec_graph = self._generate_dec_graph(test_set) test_dec_graph = test_dec_graph.int().to(self.device) self.net.eval() @@ -502,10 +483,7 @@ def predict(self, test_set): with torch.no_grad(): pred_ratings = self.net(self.train_enc_graph, test_dec_graph) - test_rating_values = test_set.uir_tuple[2] - test_rating_values = np.unique(test_rating_values) - - nd_positive_rating_values = torch.FloatTensor(test_rating_values).to( + nd_positive_rating_values = torch.FloatTensor(self.rating_values).to( self.device ) @@ -515,26 +493,14 @@ def predict(self, test_set): test_pred_ratings = test_pred_ratings.cpu().numpy() - uid_list = test_set.uir_tuple[0] - uid_list = np.unique(uid_list) - - u_list = np.array( - [user_idx for _ in range(test_set.total_items) for user_idx in uid_list] - ) - i_list = np.array( - [item_idx for item_idx in range(test_set.total_items) for _ in uid_list] - ) - - u_list = u_list.tolist() - i_list = i_list.tolist() - + user_nodes, item_nodes, _ = test_set.uir_tuple u_i_rating_dict = { - f"{u_list[idx]}-{i_list[idx]}": rating + f"{user_nodes[idx]}-{item_nodes[idx]}": rating for idx, rating in enumerate(test_pred_ratings) } return u_i_rating_dict - def predict_one(self, user_idx, total_users, total_items, rating_values): + def predict_one(self, user_idx): """ Processes single user_idx from test set and returns numpy list of scores for all items. @@ -549,7 +515,7 @@ def predict_one(self, user_idx, total_users, total_items, rating_values): test_pred_ratings : numpy.array Numpy array containing all ratings for the given user_idx. """ - test_dec_graph = _generate_test_user_graph(user_idx, total_users, total_items) + test_dec_graph = self._generate_test_user_graph(user_idx) test_dec_graph = test_dec_graph.int().to(self.device) self.net.eval() @@ -557,7 +523,7 @@ def predict_one(self, user_idx, total_users, total_items, rating_values): with torch.no_grad(): pred_ratings = self.net(self.train_enc_graph, test_dec_graph) - nd_positive_rating_values = torch.FloatTensor(rating_values).to(self.device) + nd_positive_rating_values = torch.FloatTensor(self.rating_values).to(self.device) test_pred_ratings = ( torch.softmax(pred_ratings, dim=1) * nd_positive_rating_values.view(1, -1) diff --git a/cornac/models/gcmc/nn_modules.py b/cornac/models/gcmc/nn_modules.py index 64faf9bf1..78a26fe77 100644 --- a/cornac/models/gcmc/nn_modules.py +++ b/cornac/models/gcmc/nn_modules.py @@ -16,7 +16,7 @@ class NeuralNetwork(nn.Module): def __init__( self, - activation_model, + activation_func, rating_values, n_users, n_items, @@ -29,7 +29,7 @@ def __init__( device, ): super(NeuralNetwork, self).__init__() - self._act = get_activation(activation_model) + self._act = get_activation(activation_func) self.encoder = GCMCLayer( rating_values, n_users, diff --git a/cornac/models/gcmc/recom_gcmc.py b/cornac/models/gcmc/recom_gcmc.py index a9051543f..855d61877 100644 --- a/cornac/models/gcmc/recom_gcmc.py +++ b/cornac/models/gcmc/recom_gcmc.py @@ -37,7 +37,7 @@ class GCMC(Recommender): optimizer: string, default: 'adam'. Supported values: 'adam','sgd'. The optimization method used for SGD - activation_model: string, default: 'leaky' + activation_func: string, default: 'leaky' The activation function used in the GCMC model. Supported values: ['leaky', 'linear','sigmoid','relu', 'tanh'] @@ -98,7 +98,7 @@ def __init__( max_iter=2000, learning_rate=0.01, optimizer="adam", - activation_model="leaky", + activation_func="leaky_relu", gcn_agg_units=500, gcn_out_units=75, gcn_dropout=0.7, @@ -118,7 +118,7 @@ def __init__( super().__init__(name=name, trainable=trainable, verbose=verbose) # architecture params - self.activation_model = activation_model + self.activation_func = activation_func self.gcn_agg_units = gcn_agg_units self.gcn_out_units = gcn_out_units self.gcn_dropout = gcn_dropout @@ -160,8 +160,11 @@ def fit(self, train_set, val_set=None): self.rating_values = np.unique(train_set.uir_tuple[2]) - self.model = Model( - activation_model=self.activation_model, + self.model = Model( + rating_values=self.rating_values, + total_users=self.total_users, + total_items=self.total_items, + activation_func=self.activation_func, gcn_agg_units=self.gcn_agg_units, gcn_out_units=self.gcn_out_units, gcn_dropout=self.gcn_dropout, @@ -218,8 +221,6 @@ def score(self, user_idx, item_idx=None): """ if item_idx is None: # Return scores of all items for a given user - return self.model.predict_one( - user_idx, self.total_users, self.total_items, self.rating_values - ) + return self.model.predict_one(user_idx) # Return score of known user/item return self.u_i_rating_dict.get(f"{user_idx}-{item_idx}", self.default_score()) diff --git a/cornac/models/gcmc/utils.py b/cornac/models/gcmc/utils.py index f16e1496b..29e8ec53a 100644 --- a/cornac/models/gcmc/utils.py +++ b/cornac/models/gcmc/utils.py @@ -17,7 +17,7 @@ def get_activation(act): if act is None: return lambda x: x if isinstance(act, str): - if act == "leaky": + if act == "leaky_relu": return nn.LeakyReLU(0.1) elif act == "relu": return nn.ReLU() diff --git a/cornac/models/lightgcn/lightgcn.py b/cornac/models/lightgcn/lightgcn.py index eedd72b42..6fcdb7f5c 100644 --- a/cornac/models/lightgcn/lightgcn.py +++ b/cornac/models/lightgcn/lightgcn.py @@ -9,7 +9,7 @@ ITEM_KEY = "item" -def construct_graph(data_set): +def construct_graph(data_set, total_users, total_items): """ Generates graph given a cornac data set @@ -24,7 +24,7 @@ def construct_graph(data_set): (USER_KEY, "user_item", ITEM_KEY): (user_indices, item_indices), (ITEM_KEY, "item_user", USER_KEY): (item_indices, user_indices), } - num_dict = {USER_KEY: data_set.total_users, ITEM_KEY: data_set.total_items} + num_dict = {USER_KEY: total_users, ITEM_KEY: total_items} return dgl.heterograph(data_dict, num_nodes_dict=num_dict) diff --git a/cornac/models/lightgcn/recom_lightgcn.py b/cornac/models/lightgcn/recom_lightgcn.py index 9ca2a3227..01fc26120 100644 --- a/cornac/models/lightgcn/recom_lightgcn.py +++ b/cornac/models/lightgcn/recom_lightgcn.py @@ -129,7 +129,7 @@ def fit(self, train_set, val_set=None): if torch.cuda.is_available(): torch.cuda.manual_seed_all(self.seed) - graph = construct_graph(train_set).to(self.device) + graph = construct_graph(train_set, self.total_users, self.total_items).to(self.device) model = Model( graph, self.emb_size, diff --git a/cornac/models/ngcf/ngcf.py b/cornac/models/ngcf/ngcf.py index 64e473bbc..dd184c55e 100644 --- a/cornac/models/ngcf/ngcf.py +++ b/cornac/models/ngcf/ngcf.py @@ -11,7 +11,7 @@ ITEM_KEY = "item" -def construct_graph(data_set): +def construct_graph(data_set, total_users, total_items): """ Generates graph given a cornac data set @@ -23,8 +23,8 @@ def construct_graph(data_set): user_indices, item_indices, _ = data_set.uir_tuple # construct graph from the train data and add self-loops - user_selfs = [i for i in range(data_set.total_users)] - item_selfs = [i for i in range(data_set.total_items)] + user_selfs = [i for i in range(total_users)] + item_selfs = [i for i in range(total_items)] data_dict = { (USER_KEY, "user_self", USER_KEY): (user_selfs, user_selfs), @@ -32,7 +32,7 @@ def construct_graph(data_set): (USER_KEY, "user_item", ITEM_KEY): (user_indices, item_indices), (ITEM_KEY, "item_user", USER_KEY): (item_indices, user_indices), } - num_dict = {USER_KEY: data_set.total_users, ITEM_KEY: data_set.total_items} + num_dict = {USER_KEY: total_users, ITEM_KEY: total_items} return dgl.heterograph(data_dict, num_nodes_dict=num_dict) diff --git a/cornac/models/ngcf/recom_ngcf.py b/cornac/models/ngcf/recom_ngcf.py index d3e00515e..719373c40 100644 --- a/cornac/models/ngcf/recom_ngcf.py +++ b/cornac/models/ngcf/recom_ngcf.py @@ -133,7 +133,7 @@ def fit(self, train_set, val_set=None): if torch.cuda.is_available(): torch.cuda.manual_seed_all(self.seed) - graph = construct_graph(train_set).to(self.device) + graph = construct_graph(train_set, self.total_users, self.total_items).to(self.device) model = Model( graph, self.emb_size, diff --git a/cornac/models/recommender.py b/cornac/models/recommender.py index 948d49b2e..301e9aab4 100644 --- a/cornac/models/recommender.py +++ b/cornac/models/recommender.py @@ -27,7 +27,7 @@ class Recommender: - """Generic class for a recommender model. All recommendation models should inherit from this class + """Generic class for a recommender model. All recommendation models should inherit from this class. Parameters ---------------- @@ -42,12 +42,20 @@ class Recommender: Attributes ---------- - num_users: int - Number of known users in training data. - - num_items: int - Number of known items in training data. - + train_users: int + Number of users in training data. + + train_items: int + Number of items in training data. + + total_users: int + Number of users in training, validation, and test data. + In other words, this includes unknown/unseen users. + + total_items: int + Number of items in training, validation, and test data. + In other words, this includes unknown/unseen items. + uid_map: int Global mapping of user ID-index. @@ -72,8 +80,8 @@ def __init__(self, name, trainable=True, verbose=False): self.ignored_attrs = [] # attributes to be ignored when saving model # useful information getting from train_set for prediction - self.num_users = None - self.num_items = None + self.train_users = None + self.train_items = None self.uid_map = None self.iid_map = None self.max_rating = None diff --git a/cornac/models/vbpr/recom_vbpr.py b/cornac/models/vbpr/recom_vbpr.py index f8c8ea358..26c0718d5 100644 --- a/cornac/models/vbpr/recom_vbpr.py +++ b/cornac/models/vbpr/recom_vbpr.py @@ -154,11 +154,11 @@ def fit(self, train_set, val_set=None): raise CornacException("item_image modality is required but None.") # Item visual feature from CNN - train_features = train_set.item_image.features[: train_set.total_items] + train_features = train_set.item_image.features[: self.total_items] train_features = train_features.astype(np.float32) self._init( - n_users=train_set.total_users, - n_items=train_set.total_items, + n_users=self.total_users, + n_items=self.total_items, features=train_features, )