From b2b9a0e884997e0e58fd2109d9e7ad90cae75d62 Mon Sep 17 00:00:00 2001 From: lthoang Date: Sat, 20 Jan 2024 02:29:55 +0800 Subject: [PATCH 1/4] Add Dynamic REcurrent bAsket Model (DREAM) --- cornac/models/__init__.py | 1 + cornac/models/dream/__init__.py | 16 ++ cornac/models/dream/dream.py | 375 +++++++++++++++++++++++++++ cornac/models/dream/recom_dream.py | 158 +++++++++++ cornac/models/dream/requirements.txt | 1 + examples/README.md | 2 + examples/dream_tafeng.py | 56 ++++ 7 files changed, 609 insertions(+) create mode 100644 cornac/models/dream/__init__.py create mode 100644 cornac/models/dream/dream.py create mode 100644 cornac/models/dream/recom_dream.py create mode 100644 cornac/models/dream/requirements.txt create mode 100644 examples/dream_tafeng.py diff --git a/cornac/models/__init__.py b/cornac/models/__init__.py index e87f4aa4e..b333ed132 100644 --- a/cornac/models/__init__.py +++ b/cornac/models/__init__.py @@ -38,6 +38,7 @@ from .ctr import CTR from .cvae import CVAE from .cvaecf import CVAECF +from .dream import DREAM from .ease import EASE from .efm import EFM from .fm import FM diff --git a/cornac/models/dream/__init__.py b/cornac/models/dream/__init__.py new file mode 100644 index 000000000..c93d3b5fd --- /dev/null +++ b/cornac/models/dream/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2023 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from .recom_dream import DREAM diff --git a/cornac/models/dream/dream.py b/cornac/models/dream/dream.py new file mode 100644 index 000000000..7777539dd --- /dev/null +++ b/cornac/models/dream/dream.py @@ -0,0 +1,375 @@ +import itertools +import random + +import numpy as np +import torch +import torch.nn as nn + +from torch.autograd import Variable +from torch.nn.init import constant_, xavier_normal_ +from torch.nn.utils.clip_grad import clip_grad_norm_ +from tqdm.auto import trange + + +class Wloss(nn.modules.loss._Loss): + def __init__(self, p, n): + super(Wloss, self).__init__() + self.p = p + self.n = n + if p > n: + self.mode = "positive" + else: + self.mode = "negative" + + def forward(self, pred, tgt, cand): + loss = 0.0 + if self.mode == "positive": + for ind in range(pred.size(0)): + if ind in tgt: + loss += -torch.log(pred[ind]) * self.p + else: + loss += -torch.log(1 - pred[ind]) * self.n + elif self.mode == "negative": + for ind in range(pred.size(0)): + if ind in tgt: + loss += -torch.log(pred[ind]) * self.p + else: + if ind in cand: + loss += -torch.log(1 - pred[ind]) * self.n + else: + loss += -torch.log(1 - pred[ind]) + return loss / pred.size(0) + + +class DREAM(nn.Module): + def __init__( + self, + n_items, + emb_size, + emb_type, + hidden_size, + dropout_prob, + max_len, + loss_mode, + loss_uplift, + attention, + device="cpu", + seed=None, + ): + super(DREAM, self).__init__() + if seed is not None: + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + # device setting + self.device = device + + # dataset features + self.n_items = n_items + + # model parameters + self.emb_size = emb_size + self.emb_type = emb_type + self.hidden_size = hidden_size + self.dropout_prob = dropout_prob + self.max_len = max_len # max sequence length + self.loss_mode = loss_mode + self.loss_uplift = loss_uplift + + self.BasketEmbedding = BasketEmbedding( + hidden_size=self.emb_size, + n_items=self.n_items, + max_len=self.max_len, + type=self.emb_type, + device=self.device, + ) + self.gru = nn.GRU( + input_size=self.emb_size, + hidden_size=self.hidden_size, + batch_first=True, + ) + self.attention = attention + self.decoder = Decoder( + hidden_size=self.hidden_size, + seq_len=self.max_len, + num_item=self.n_items, + dropout_prob=self.dropout_prob, + attention=self.attention, + device=self.device, + ) + + self.loss_fct = nn.BCELoss() + self.p_loss_fct = Wloss(self.loss_uplift, 1) + self.n_loss_fct = Wloss(1, self.loss_uplift) + self.meta_loss_fct = nn.MSELoss() + self.sigmoid = nn.Sigmoid() + self.apply(self._init_weights) + + def _init_weights(self, module): + if isinstance(module, nn.Embedding): + xavier_normal_(module.weight.data) + elif isinstance(module, nn.Linear): + xavier_normal_(module.weight.data) + if module.bias is not None: + constant_(module.bias.data, 0) + + def forward(self, basket_seq): + basket_seq_len = [] + for b in basket_seq: + basket_seq_len.append(len(b)) + basket_seq_len = torch.as_tensor(basket_seq_len).to(self.device) + batch_basket_seq_embed = self.BasketEmbedding(basket_seq) + all_memory, _ = self.gru(batch_basket_seq_embed) + last_memory = self.gather_indexes(all_memory, basket_seq_len - 1) + timeline_mask = get_timeline_mask( + batch_basket_seq_embed, self.device, self.emb_size + ) + pred = self.decoder.forward(all_memory, last_memory, timeline_mask) + return pred + + def get_batch_loss(self, pred, tgt, cand, tag, device): + batch_size = pred.size(0) + tmp_tgt = get_label_tensor(tgt, device, self.n_items) + loss = 0.0 + if self.loss_mode == 0: + for ind in range(batch_size): + pred_ind = torch.clamp(pred[ind], 0.001, 0.999) + loss += self.loss_fct(pred_ind.unsqueeze(0), tmp_tgt[ind].unsqueeze(0)) + if self.loss_mode == 1: + if tag == "negative": + for ind in range(batch_size): + user_pred_ind = torch.clamp(pred[ind], 0.001, 0.999) + user_tgt = torch.tensor(tgt[ind]) + user_cand = torch.tensor(cand[ind]) + loss += self.n_loss_fct(user_pred_ind, user_tgt, user_cand) + if tag == "positive": + for ind in range(batch_size): + user_pred_ind = torch.clamp(pred[ind], 0.001, 0.999) + user_tgt = torch.tensor(tgt[ind]) + user_cand = torch.tensor(cand[ind]) + loss += self.p_loss_fct(user_pred_ind, user_tgt, user_cand) + return loss / batch_size # compute average + + def global_loss(self, basket_seq, tgt_basket, cand_basket): + prediction = self.forward(basket_seq) + cand = [ + l1 + l2 for l1, l2 in zip(cand_basket["repeat"], cand_basket["explore"]) + ] + loss = self.get_batch_loss( + prediction, tgt_basket, cand, "positive", self.device + ) + return loss + + def calculate_loss(self, basket_seq, tgt_basket, cand_basket): + global_loss = self.global_loss(basket_seq, tgt_basket, cand_basket) + return global_loss + + def gather_indexes(self, output, gather_index): + """Gathers the vectors at the specific positions over a minibatch""" + gather_index = gather_index.view(-1, 1, 1).expand(-1, -1, output.shape[-1]) + output_tensor = output.gather(dim=1, index=gather_index) + return output_tensor.squeeze(1) + + +class BasketEmbedding(nn.Module): + def __init__( + self, + hidden_size, + n_items, + max_len, + type, + device, + ): # hidden_size is the emb_size + super(BasketEmbedding, self).__init__() + self.hidden_size = hidden_size + self.n_items = n_items + self.max_len = max_len + self.type = type + self.device = device + self.padding_idx = n_items + self.item_embedding = nn.Embedding(n_items + 1, hidden_size) + self.item_embedding.weight.data[-1] = torch.zeros(hidden_size) + + def forward(self, batch_basket): + # need to padding here + batch_embed_seq = [] # batch * seq_len * hidden size + for basket_seq in batch_basket: + embed_baskets = [] + for basket in basket_seq: + basket = torch.LongTensor(basket).resize_(1, len(basket)) + basket = Variable(basket).to(self.device) + basket = self.item_embedding(basket).squeeze(0) + if self.type == "mean": + embed_baskets.append(torch.mean(basket, 0)) + if self.type == "max": + embed_baskets.append(torch.max(basket, 0)[0]) + if self.type == "sum": + embed_baskets.append(torch.sum(basket, 0)) + # padding the seq + pad_num = self.max_len - len(embed_baskets) + for _ in range(pad_num): + embed_baskets.append( + torch.tile( + torch.tensor([self.padding_idx], device=self.device), + dims=(self.hidden_size,), + ) + ) + embed_seq = torch.stack(embed_baskets, 0) + embed_seq = torch.as_tensor(embed_seq) + batch_embed_seq.append(embed_seq) + batch_embed_output = torch.stack(batch_embed_seq, 0).to(self.device) + return batch_embed_output + + +class Decoder(nn.Module): + def __init__( + self, + hidden_size, + seq_len, + num_item, + dropout_prob, + attention, + device, + ): + super(Decoder, self).__init__() + self.dropout = nn.Dropout(dropout_prob) + self.hidden_size = hidden_size + self.device = device + self.seq_len = seq_len + self.n_items = num_item + self.attention = attention + + if self.attention == "attention": + self.W_repeat = nn.Linear(hidden_size, hidden_size, bias=False) + self.U_repeat = nn.Linear(hidden_size, hidden_size, bias=False) + self.tanh = nn.Tanh() + self.V_repeat = nn.Linear(hidden_size, 1) + self.Repeat = nn.Linear(hidden_size * 2, num_item) + else: + self.Repeat = nn.Linear(hidden_size, num_item) + + def forward(self, all_memory, last_memory, mask=None): + """item_seq is the appared items or candidate items""" + if self.attention == "attention": + all_memory_values, last_memory_values = all_memory, last_memory + all_memory = self.dropout(self.U_repeat(all_memory)) + last_memory = self.dropout(self.W_repeat(last_memory)) + last_memory = last_memory.unsqueeze(1) + last_memory = last_memory.repeat(1, self.seq_len, 1) + + output_er = self.tanh(all_memory + last_memory) + output_er = self.V_repeat(output_er).squeeze(-1) + + if mask is not None: + output_er.masked_fill_(mask, -1e9) + + output_er = output_er.unsqueeze(-1) + + alpha_r = nn.Softmax(dim=1)(output_er) + alpha_r = alpha_r.repeat(1, 1, self.hidden_size) + output_r = (all_memory_values * alpha_r).sum(dim=1) + output_r = torch.cat([output_r, last_memory_values], dim=1) + output_r = self.dropout(self.Repeat(output_r)) + + decoder = torch.sigmoid(output_r) + else: + decoder = torch.sigmoid(self.dropout(self.Repeat(last_memory))) + + return decoder + + +def get_timeline_mask(batch_basket_emb, device, emb_size): + batch_mask = [] + for basket_seq in batch_basket_emb: + mask = [] + for basket_emb in basket_seq: + if torch.equal(basket_emb, torch.zeros(emb_size).to(device)): + mask.append(1) + else: + mask.append(0) + batch_mask.append(torch.as_tensor(mask).bool()) + batch_mask = torch.stack(batch_mask, 0).to(device) + return batch_mask.bool() + + +def get_label_tensor(labels, device, max_index=None): + """Candidates is the output of basic models or repeat or popular + labels is list[]""" + batch_size = len(labels) + if torch.cuda.is_available(): + label_tensor = torch.FloatTensor(batch_size, max_index).fill_(0.0).to(device) + else: + label_tensor = torch.zeros(batch_size, max_index) + for ind in range(batch_size): + if len(labels[ind]) != 0: + label_tensor[ind].scatter_(0, torch.as_tensor(labels[ind]).to(device), 1) + label_tensor.requires_grad = False # because this is not trainable + return label_tensor + + +def transform_data(batch_basket_items, max_len): + batch_history_basket_items = [] + batch_target_items = [] + candidates = {"repeat": [], "explore": []} + for basket_items in batch_basket_items: + history_basket_items = basket_items[-max_len - 1 : -1] + target_items = basket_items[-1] + batch_history_basket_items.append(history_basket_items) + batch_target_items.append(target_items) + history_items = set(itertools.chain.from_iterable(history_basket_items)) + repeat_items = [iid for iid in target_items if iid in history_items] + explore_items = [iid for iid in target_items if iid not in repeat_items] + candidates["repeat"].append(repeat_items) + candidates["explore"].append(explore_items) + return batch_history_basket_items, batch_target_items, candidates + + +def learn( + model, + train_set, + val_set, + max_len, + lr, + n_epochs, + batch_size, + verbose, +): + optimizer = torch.optim.Adam(model.parameters(), lr=lr) + loss_func = model.calculate_loss + last_loss = np.inf + last_val_loss = np.inf + progress_bar = trange(1, n_epochs + 1, disable=not verbose) + for _ in progress_bar: + model.train() + for inc, (_, _, batch_basket_items) in enumerate( + train_set.ubi_iter(batch_size=batch_size, shuffle=True) + ): + batch_history_basket_items, batch_target_items, candidates = transform_data( + batch_basket_items, max_len=max_len + ) + optimizer.zero_grad() + loss = loss_func(batch_history_basket_items, batch_target_items, candidates) + loss.backward() + clip_grad_norm_(model.parameters(), max_norm=20, norm_type=2) + optimizer.step() + last_loss = loss.data.item() + if inc % 10: + progress_bar.set_postfix(loss=last_loss, val_loss=last_val_loss) + + if val_set is not None: + model.eval() + for inc, (_, _, batch_basket_items) in enumerate( + val_set.ubi_iter(batch_size=batch_size, shuffle=True) + ): + ( + batch_history_basket_items, + batch_target_items, + candidates, + ) = transform_data(batch_basket_items, max_len=max_len) + loss = loss_func( + batch_history_basket_items, batch_target_items, candidates + ) + last_val_loss = loss.data.item() + if inc % 10: + progress_bar.set_postfix(loss=last_loss, val_loss=last_val_loss) diff --git a/cornac/models/dream/recom_dream.py b/cornac/models/dream/recom_dream.py new file mode 100644 index 000000000..e52680841 --- /dev/null +++ b/cornac/models/dream/recom_dream.py @@ -0,0 +1,158 @@ +# Copyright 2023 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from ..recommender import NextBasketRecommender + + +class DREAM(NextBasketRecommender): + """Dynamic REcurrent bAsket Model (DREAM) + + Parameters + ---------- + name: string, default: 'DREAM' + The name of the recommender model. + + emb_size: int, optional, default: 32 + Embedding size + + emb_type: str, default: 'mean' + Embedding type. Including 'mean', 'max', 'sum' + + hidden_size: int, optional, default: 32 + Hidden size + + dropout: float, optional, default: 0.1 + Dropout ratio + + loss_mode: int, optional, default: 0 + Loss mode. Including 0 and 1 + + loss_uplift: int, optional, default: 100 + + attention: int, optional, default: 0 + Attention + + max_len: int, optional, default: None + Max sequence length. + If None, maximum sequence length is the maximum sequence length of training data + + lr: string, optional, default: 0.001 + Learning rate of Adam optimizer + + n_epochs: int, optional, default: 100 + Number of epochs + + batch_size: int, optional, default: 64 + Batch size + + device: string, optional, default: "cpu" + Device for learning and evaluation. Using cpu as default. + Use "cuda:0" for using gpu. + + trainable: boolean, optional, default: True + When False, the model will not be re-trained, and input of pre-trained parameters are required. + + verbose: boolean, optional, default: True + When True, running logs are displayed. + + seed: int, optional, default: None + Random seed + + References + ---------- + Feng Yu, Qiang Liu, Shu Wu, Liang Wang, and Tieniu Tan. 2016. + A Dynamic Recurrent Model for Next Basket Recommendation. + In Proceedings of the 39th International ACM SIGIR conference on Research and Development in Information Retrieval (SIGIR '16). + Association for Computing Machinery, New York, NY, USA, 729–732. + https://doi.org/10.1145/2911451.2914683 + + """ + + def __init__( + self, + name="DREAM", + emb_size=32, + emb_type="mean", + hidden_size=32, + dropout=0.1, + loss_mode=0, + loss_uplift=100, + attention=0, + max_len=None, + lr=0.001, + weight_decay=0, + n_epochs=100, + batch_size=32, + device="cpu", + trainable=True, + verbose=False, + seed=None, + ): + super().__init__(name=name, trainable=trainable, verbose=verbose) + self.emb_size = emb_size + self.emb_type = emb_type + self.hidden_size = hidden_size + self.dropout = dropout + self.loss_mode = loss_mode + self.loss_uplift = loss_uplift + self.attention = attention + self.max_len = max_len + self.lr = lr + self.n_epochs = n_epochs + self.batch_size = batch_size + self.seed = seed + self.device = device + + def fit(self, train_set, val_set=None): + super().fit(train_set=train_set, val_set=val_set) + from .dream import DREAM, learn + + # max sequence length + self.max_len = ( + max([len(bids) for bids in train_set.user_basket_data.values()]) + if self.max_len is None + else self.max_len + ) + self.model = DREAM( + n_items=self.total_items, + emb_size=self.emb_size, + emb_type=self.emb_type, + hidden_size=self.hidden_size, + dropout_prob=self.dropout, + max_len=self.max_len, + loss_mode=self.loss_mode, + loss_uplift=self.loss_uplift, + attention=self.attention, + device=self.device, + seed=self.seed, + ).to(self.device) + + learn( + self.model, + train_set=train_set, + val_set=val_set, + max_len=self.max_len, + lr=self.lr, + n_epochs=self.n_epochs, + batch_size=self.batch_size, + verbose=self.verbose, + ) + + return self + + def score(self, user_idx, history_baskets, **kwargs): + self.model.eval() + preds = self.model([history_baskets[-self.max_len :]]) + return preds.squeeze().cpu().detach().numpy() diff --git a/cornac/models/dream/requirements.txt b/cornac/models/dream/requirements.txt new file mode 100644 index 000000000..757fa182f --- /dev/null +++ b/cornac/models/dream/requirements.txt @@ -0,0 +1 @@ +torch>=2.0.0 diff --git a/examples/README.md b/examples/README.md index f7812f638..361beec8d 100644 --- a/examples/README.md +++ b/examples/README.md @@ -122,6 +122,8 @@ [beacon_tafeng.py](beacon_tafeng.py) - Correlation-Sensitive Next-Basket Recommendation (Beacon). +[dream_tafeng.py](dream_tafeng.py) - Example of Dynamic REcurrent bAsket Model (DREAM). + [tifuknn_tafeng.py](tifuknn_tafeng.py) - Example of Temporal-Item-Frequency-based User-KNN (TIFUKNN). [upcf_tafeng.py](upcf_tafeng.py) - Example of Recency Aware Collaborative Filtering for Next Basket Recommendation (UPCF). diff --git a/examples/dream_tafeng.py b/examples/dream_tafeng.py new file mode 100644 index 000000000..c4c6027a9 --- /dev/null +++ b/examples/dream_tafeng.py @@ -0,0 +1,56 @@ +# Copyright 2023 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Example of Dynamic REcurrent bAsket Model (DREAM)""" + +import cornac +from cornac.eval_methods import NextBasketEvaluation +from cornac.metrics import NDCG, HitRatio, Recall +from cornac.models import DREAM + +data = cornac.datasets.tafeng.load_basket( + reader=cornac.data.Reader( + min_basket_size=3, max_basket_size=50, min_basket_sequence=2 + ) +) + +next_basket_eval = NextBasketEvaluation( + data=data, fmt="UBITJson", test_size=0.2, val_size=0.08, seed=123, verbose=True +) + +models = [ + DREAM( + emb_size=32, + emb_type="mean", + hidden_size=32, + dropout=0.1, + lr=0.001, + n_epochs=10, + batch_size=32, + device="cuda:0", + verbose=True, + seed=123, + ) +] + +metrics = [ + Recall(k=10), + Recall(k=50), + NDCG(k=10), + NDCG(k=50), + HitRatio(k=10), + HitRatio(k=50), +] + +cornac.Experiment(eval_method=next_basket_eval, models=models, metrics=metrics).run() From 838fdca5610e93295c3622e764cb079ac2d3a995 Mon Sep 17 00:00:00 2001 From: lthoang Date: Sat, 20 Jan 2024 23:04:01 +0800 Subject: [PATCH 2/4] Update docs --- README.md | 1 + docs/source/api_ref/models.rst | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/README.md b/README.md index 8eb16e59b..3561d83fc 100644 --- a/README.md +++ b/README.md @@ -177,6 +177,7 @@ The recommender models supported by Cornac are listed below. Why don't you join | 2016 | [Collaborative Deep Ranking (CDR)](cornac/models/cdr), [paper](http://inpluslab.com/chenliang/homepagefiles/paper/hao-pakdd2016.pdf) | [requirements.txt](cornac/models/cdr/requirements.txt) | [cdr_exp.py](examples/cdr_example.py) | | [Collaborative Ordinal Embedding (COE)](cornac/models/coe), [paper](http://www.hadylauw.com/publications/sdm16.pdf) | [requirements.txt](cornac/models/coe/requirements.txt) | | | [Convolutional Matrix Factorization (ConvMF)](cornac/models/conv_mf), [paper](http://uclab.khu.ac.kr/resources/publication/C_351.pdf) | [requirements.txt](cornac/models/conv_mf/requirements.txt) | [convmf_exp.py](examples/conv_mf_example.py) +| | [Dynamic REcurrent bAsket Model (DREAM)](cornac/models/dream), [paper](https://cseweb.ucsd.edu/classes/fa17/cse291-b/reading/A%20Dynamic%20Recurrent%20Model%20for%20Next%20Basket%20Recommendation.pdf) | [requirements.txt](cornac/models/dream/requirements.txt) | [dream_tafeng.py](examples/dream_tafeng.py) | | [Learn to Rank user Preferences based on Phrase-level sentiment analysis across Multiple categories (LRPPM)](cornac/models/lrppm), [paper](https://www.yongfeng.me/attach/sigir16-chen.pdf) | N/A | [lrppm_example.py](examples/lrppm_example.py) | | [Session-based Recommendations With Recurrent Neural Networks (GRU4Rec)](cornac/models/gru4rec), [paper](https://arxiv.org/pdf/1511.06939.pdf) | [requirements.txt](cornac/models/gru4rec/requirements.txt) | [gru4rec_yoochoose.py](examples/gru4rec_yoochoose.py) | | [Spherical K-means (SKM)](cornac/models/skm), [paper](https://www.sciencedirect.com/science/article/pii/S092523121501509X) | N/A | [skm_movielens.py](examples/skm_movielens.py) diff --git a/docs/source/api_ref/models.rst b/docs/source/api_ref/models.rst index e92c8881e..48b50b23f 100644 --- a/docs/source/api_ref/models.rst +++ b/docs/source/api_ref/models.rst @@ -169,6 +169,11 @@ Convolutional Matrix Factorization (ConvMF) .. automodule:: cornac.models.conv_mf.recom_convmf :members: +Dynamic REcurrent bAsket Model (DREAM) +-------------------------------------- +.. automodule:: cornac.models.dream.recom_dream + :members: + Spherical k-means (Skmeans) --------------------------- .. automodule:: cornac.models.skm.recom_skmeans From 34089ec1b3021fe9416268f35fb22fed12f7ecbe Mon Sep 17 00:00:00 2001 From: lthoang Date: Sun, 21 Jan 2024 12:29:31 +0800 Subject: [PATCH 3/4] refactor code --- cornac/models/dream/dream.py | 35 +++++++++++++++--------------- cornac/models/dream/recom_dream.py | 22 ++++++++++--------- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/cornac/models/dream/dream.py b/cornac/models/dream/dream.py index 7777539dd..f4515c47c 100644 --- a/cornac/models/dream/dream.py +++ b/cornac/models/dream/dream.py @@ -49,7 +49,7 @@ def __init__( emb_type, hidden_size, dropout_prob, - max_len, + max_seq_length, loss_mode, loss_uplift, attention, @@ -73,14 +73,14 @@ def __init__( self.emb_type = emb_type self.hidden_size = hidden_size self.dropout_prob = dropout_prob - self.max_len = max_len # max sequence length + self.max_seq_length = max_seq_length # max sequence length self.loss_mode = loss_mode self.loss_uplift = loss_uplift self.BasketEmbedding = BasketEmbedding( hidden_size=self.emb_size, n_items=self.n_items, - max_len=self.max_len, + max_seq_length=self.max_seq_length, type=self.emb_type, device=self.device, ) @@ -92,7 +92,7 @@ def __init__( self.attention = attention self.decoder = Decoder( hidden_size=self.hidden_size, - seq_len=self.max_len, + max_seq_length=self.max_seq_length, num_item=self.n_items, dropout_prob=self.dropout_prob, attention=self.attention, @@ -177,14 +177,14 @@ def __init__( self, hidden_size, n_items, - max_len, + max_seq_length, type, device, ): # hidden_size is the emb_size super(BasketEmbedding, self).__init__() self.hidden_size = hidden_size self.n_items = n_items - self.max_len = max_len + self.max_seq_length = max_seq_length self.type = type self.device = device self.padding_idx = n_items @@ -193,7 +193,7 @@ def __init__( def forward(self, batch_basket): # need to padding here - batch_embed_seq = [] # batch * seq_len * hidden size + batch_embed_seq = [] # batch * max_seq_length * hidden size for basket_seq in batch_basket: embed_baskets = [] for basket in basket_seq: @@ -207,7 +207,7 @@ def forward(self, batch_basket): if self.type == "sum": embed_baskets.append(torch.sum(basket, 0)) # padding the seq - pad_num = self.max_len - len(embed_baskets) + pad_num = self.max_seq_length - len(embed_baskets) for _ in range(pad_num): embed_baskets.append( torch.tile( @@ -226,7 +226,7 @@ class Decoder(nn.Module): def __init__( self, hidden_size, - seq_len, + max_seq_length, num_item, dropout_prob, attention, @@ -236,7 +236,7 @@ def __init__( self.dropout = nn.Dropout(dropout_prob) self.hidden_size = hidden_size self.device = device - self.seq_len = seq_len + self.max_seq_length = max_seq_length self.n_items = num_item self.attention = attention @@ -256,7 +256,7 @@ def forward(self, all_memory, last_memory, mask=None): all_memory = self.dropout(self.U_repeat(all_memory)) last_memory = self.dropout(self.W_repeat(last_memory)) last_memory = last_memory.unsqueeze(1) - last_memory = last_memory.repeat(1, self.seq_len, 1) + last_memory = last_memory.repeat(1, self.max_seq_length, 1) output_er = self.tanh(all_memory + last_memory) output_er = self.V_repeat(output_er).squeeze(-1) @@ -308,12 +308,12 @@ def get_label_tensor(labels, device, max_index=None): return label_tensor -def transform_data(batch_basket_items, max_len): +def transform_data(batch_basket_items, max_seq_length): batch_history_basket_items = [] batch_target_items = [] candidates = {"repeat": [], "explore": []} for basket_items in batch_basket_items: - history_basket_items = basket_items[-max_len - 1 : -1] + history_basket_items = basket_items[-max_seq_length - 1 : -1] target_items = basket_items[-1] batch_history_basket_items.append(history_basket_items) batch_target_items.append(target_items) @@ -329,13 +329,14 @@ def learn( model, train_set, val_set, - max_len, + max_seq_length, lr, + weight_decay, n_epochs, batch_size, verbose, ): - optimizer = torch.optim.Adam(model.parameters(), lr=lr) + optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) loss_func = model.calculate_loss last_loss = np.inf last_val_loss = np.inf @@ -346,7 +347,7 @@ def learn( train_set.ubi_iter(batch_size=batch_size, shuffle=True) ): batch_history_basket_items, batch_target_items, candidates = transform_data( - batch_basket_items, max_len=max_len + batch_basket_items, max_seq_length=max_seq_length ) optimizer.zero_grad() loss = loss_func(batch_history_basket_items, batch_target_items, candidates) @@ -366,7 +367,7 @@ def learn( batch_history_basket_items, batch_target_items, candidates, - ) = transform_data(batch_basket_items, max_len=max_len) + ) = transform_data(batch_basket_items, max_seq_length=max_seq_length) loss = loss_func( batch_history_basket_items, batch_target_items, candidates ) diff --git a/cornac/models/dream/recom_dream.py b/cornac/models/dream/recom_dream.py index e52680841..f1541e7cf 100644 --- a/cornac/models/dream/recom_dream.py +++ b/cornac/models/dream/recom_dream.py @@ -44,9 +44,9 @@ class DREAM(NextBasketRecommender): attention: int, optional, default: 0 Attention - max_len: int, optional, default: None + max_seq_length: int, optional, default: None Max sequence length. - If None, maximum sequence length is the maximum sequence length of training data + If None, it is the maximum number of baskets in training sequences lr: string, optional, default: 0.001 Learning rate of Adam optimizer @@ -90,7 +90,7 @@ def __init__( loss_mode=0, loss_uplift=100, attention=0, - max_len=None, + max_seq_length=None, lr=0.001, weight_decay=0, n_epochs=100, @@ -108,8 +108,9 @@ def __init__( self.loss_mode = loss_mode self.loss_uplift = loss_uplift self.attention = attention - self.max_len = max_len + self.max_seq_length = max_seq_length self.lr = lr + self.weight_decay = weight_decay self.n_epochs = n_epochs self.batch_size = batch_size self.seed = seed @@ -120,10 +121,10 @@ def fit(self, train_set, val_set=None): from .dream import DREAM, learn # max sequence length - self.max_len = ( + self.max_seq_length = ( max([len(bids) for bids in train_set.user_basket_data.values()]) - if self.max_len is None - else self.max_len + if self.max_seq_length is None + else self.max_seq_length ) self.model = DREAM( n_items=self.total_items, @@ -131,7 +132,7 @@ def fit(self, train_set, val_set=None): emb_type=self.emb_type, hidden_size=self.hidden_size, dropout_prob=self.dropout, - max_len=self.max_len, + max_seq_length=self.max_seq_length, loss_mode=self.loss_mode, loss_uplift=self.loss_uplift, attention=self.attention, @@ -143,8 +144,9 @@ def fit(self, train_set, val_set=None): self.model, train_set=train_set, val_set=val_set, - max_len=self.max_len, + max_seq_length=self.max_seq_length, lr=self.lr, + weight_decay=self.weight_decay, n_epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, @@ -154,5 +156,5 @@ def fit(self, train_set, val_set=None): def score(self, user_idx, history_baskets, **kwargs): self.model.eval() - preds = self.model([history_baskets[-self.max_len :]]) + preds = self.model([history_baskets[-self.max_seq_length :]]) return preds.squeeze().cpu().detach().numpy() From 5e3be14aae9311a270e191e8c67c83f639231536 Mon Sep 17 00:00:00 2001 From: lthoang Date: Sun, 21 Jan 2024 12:31:14 +0800 Subject: [PATCH 4/4] Rename model for consistency --- README.md | 2 +- cornac/models/dream/recom_dream.py | 2 +- docs/source/api_ref/models.rst | 2 +- examples/README.md | 2 +- examples/dream_tafeng.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3561d83fc..2a0c4c567 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,7 @@ The recommender models supported by Cornac are listed below. Why don't you join | 2016 | [Collaborative Deep Ranking (CDR)](cornac/models/cdr), [paper](http://inpluslab.com/chenliang/homepagefiles/paper/hao-pakdd2016.pdf) | [requirements.txt](cornac/models/cdr/requirements.txt) | [cdr_exp.py](examples/cdr_example.py) | | [Collaborative Ordinal Embedding (COE)](cornac/models/coe), [paper](http://www.hadylauw.com/publications/sdm16.pdf) | [requirements.txt](cornac/models/coe/requirements.txt) | | | [Convolutional Matrix Factorization (ConvMF)](cornac/models/conv_mf), [paper](http://uclab.khu.ac.kr/resources/publication/C_351.pdf) | [requirements.txt](cornac/models/conv_mf/requirements.txt) | [convmf_exp.py](examples/conv_mf_example.py) -| | [Dynamic REcurrent bAsket Model (DREAM)](cornac/models/dream), [paper](https://cseweb.ucsd.edu/classes/fa17/cse291-b/reading/A%20Dynamic%20Recurrent%20Model%20for%20Next%20Basket%20Recommendation.pdf) | [requirements.txt](cornac/models/dream/requirements.txt) | [dream_tafeng.py](examples/dream_tafeng.py) +| | [Dynamic Recurrent Basket Model (DREAM)](cornac/models/dream), [paper](https://cseweb.ucsd.edu/classes/fa17/cse291-b/reading/A%20Dynamic%20Recurrent%20Model%20for%20Next%20Basket%20Recommendation.pdf) | [requirements.txt](cornac/models/dream/requirements.txt) | [dream_tafeng.py](examples/dream_tafeng.py) | | [Learn to Rank user Preferences based on Phrase-level sentiment analysis across Multiple categories (LRPPM)](cornac/models/lrppm), [paper](https://www.yongfeng.me/attach/sigir16-chen.pdf) | N/A | [lrppm_example.py](examples/lrppm_example.py) | | [Session-based Recommendations With Recurrent Neural Networks (GRU4Rec)](cornac/models/gru4rec), [paper](https://arxiv.org/pdf/1511.06939.pdf) | [requirements.txt](cornac/models/gru4rec/requirements.txt) | [gru4rec_yoochoose.py](examples/gru4rec_yoochoose.py) | | [Spherical K-means (SKM)](cornac/models/skm), [paper](https://www.sciencedirect.com/science/article/pii/S092523121501509X) | N/A | [skm_movielens.py](examples/skm_movielens.py) diff --git a/cornac/models/dream/recom_dream.py b/cornac/models/dream/recom_dream.py index f1541e7cf..ab3a281fa 100644 --- a/cornac/models/dream/recom_dream.py +++ b/cornac/models/dream/recom_dream.py @@ -17,7 +17,7 @@ class DREAM(NextBasketRecommender): - """Dynamic REcurrent bAsket Model (DREAM) + """Dynamic Recurrent Basket Model (DREAM) Parameters ---------- diff --git a/docs/source/api_ref/models.rst b/docs/source/api_ref/models.rst index 48b50b23f..55e456e74 100644 --- a/docs/source/api_ref/models.rst +++ b/docs/source/api_ref/models.rst @@ -169,7 +169,7 @@ Convolutional Matrix Factorization (ConvMF) .. automodule:: cornac.models.conv_mf.recom_convmf :members: -Dynamic REcurrent bAsket Model (DREAM) +Dynamic Recurrent Basket Model (DREAM) -------------------------------------- .. automodule:: cornac.models.dream.recom_dream :members: diff --git a/examples/README.md b/examples/README.md index aca2e4003..b6542d010 100644 --- a/examples/README.md +++ b/examples/README.md @@ -124,7 +124,7 @@ [beacon_tafeng.py](beacon_tafeng.py) - Correlation-Sensitive Next-Basket Recommendation (Beacon). -[dream_tafeng.py](dream_tafeng.py) - Example of Dynamic REcurrent bAsket Model (DREAM). +[dream_tafeng.py](dream_tafeng.py) - Example of Dynamic Recurrent Basket Model (DREAM). [tifuknn_tafeng.py](tifuknn_tafeng.py) - Example of Temporal-Item-Frequency-based User-KNN (TIFUKNN). diff --git a/examples/dream_tafeng.py b/examples/dream_tafeng.py index c4c6027a9..104d1b438 100644 --- a/examples/dream_tafeng.py +++ b/examples/dream_tafeng.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -"""Example of Dynamic REcurrent bAsket Model (DREAM)""" +"""Example of Dynamic Recurrent Basket Model (DREAM)""" import cornac from cornac.eval_methods import NextBasketEvaluation