From 1b50cbdaf3789d8ab8b7be2fe7ecd75aedbdc4ba Mon Sep 17 00:00:00 2001 From: hieuddo Date: Mon, 22 Apr 2024 08:36:23 +0000 Subject: [PATCH 1/5] fix .rank() method for multiple models --- cornac/models/comparer/recom_comparer_obj.pyx | 20 +++++++++---------- cornac/models/comparer/recom_comparer_sub.pyx | 2 +- cornac/models/efm/recom_efm.pyx | 2 +- cornac/models/lrppm/recom_lrppm.pyx | 2 +- cornac/models/recommender.py | 6 ++++-- 5 files changed, 17 insertions(+), 15 deletions(-) diff --git a/cornac/models/comparer/recom_comparer_obj.pyx b/cornac/models/comparer/recom_comparer_obj.pyx index 1c522b6f9..bd64945c0 100644 --- a/cornac/models/comparer/recom_comparer_obj.pyx +++ b/cornac/models/comparer/recom_comparer_obj.pyx @@ -663,39 +663,39 @@ class ComparERObj(Recommender): item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :]) return item_score - def rank(self, user_id, item_ids=None): + def rank(self, user_idx, item_indices=None, **kwargs): """Rank all test items for a given user. Parameters ---------- - user_id: int, required + user_idx: int, required The index of the user for whom to perform item raking. - item_ids: 1d array, optional, default: None + item_indices: 1d array, optional, default: None A list of candidate item indices to be ranked by the user. If `None`, list of ranked known item indices and their scores will be returned Returns ------- Tuple of `item_rank`, and `item_scores`. The order of values - in item_scores are corresponding to the order of their ids in item_ids + in item_scores are corresponding to the order of their ids in item_indices """ - X_ = self.U1[user_id, :].dot(self.V.T) + X_ = self.U1[user_idx, :].dot(self.V.T) most_cared_aspects_indices = (-X_).argsort()[:self.num_most_cared_aspects] most_cared_X_ = X_[most_cared_aspects_indices] most_cared_Y_ = self.U2.dot(self.V[most_cared_aspects_indices, :].T) explicit_scores = most_cared_X_.dot(most_cared_Y_.T) / (self.num_most_cared_aspects * self.rating_scale) - item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_id) + item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_idx) - if item_ids is None: + if item_indices is None: item_scores = item_scores item_rank = item_scores.argsort()[::-1] else: - num_items = max(self.num_items, max(item_ids) + 1) + num_items = max(self.num_items, max(item_indices) + 1) item_scores = np.ones(num_items) * np.min(item_scores) item_scores[:self.num_items] = item_scores item_rank = item_scores.argsort()[::-1] - item_rank = intersects(item_rank, item_ids, assume_unique=True) - item_scores = item_scores[item_ids] + item_rank = intersects(item_rank, item_indices, assume_unique=True) + item_scores = item_scores[item_indices] return item_rank, item_scores diff --git a/cornac/models/comparer/recom_comparer_sub.pyx b/cornac/models/comparer/recom_comparer_sub.pyx index e1eec1c77..be6593afe 100644 --- a/cornac/models/comparer/recom_comparer_sub.pyx +++ b/cornac/models/comparer/recom_comparer_sub.pyx @@ -759,7 +759,7 @@ class ComparERSub(MTER): return correct, skipped, loss, bpr_loss - def rank(self, user_idx, item_indices=None): + def rank(self, user_idx, item_indices=None, **kwargs): if self.alpha > 0 and self.n_top_aspects > 0: n_top_aspects = min(self.n_top_aspects, self.num_aspects) ts1 = np.einsum("abc,a->bc", self.G1, self.U[user_idx]) diff --git a/cornac/models/efm/recom_efm.pyx b/cornac/models/efm/recom_efm.pyx index 5d6dd582c..2fcc3406b 100644 --- a/cornac/models/efm/recom_efm.pyx +++ b/cornac/models/efm/recom_efm.pyx @@ -468,7 +468,7 @@ class EFM(Recommender): item_score = self.U2[item_idx, :].dot(self.U1[user_idx, :]) + self.H2[item_idx, :].dot(self.H1[user_idx, :]) return item_score - def rank(self, user_idx, item_indices=None): + def rank(self, user_idx, item_indices=None, **kwargs): """Rank all test items for a given user. Parameters diff --git a/cornac/models/lrppm/recom_lrppm.pyx b/cornac/models/lrppm/recom_lrppm.pyx index 2c8ec5475..71085a90d 100644 --- a/cornac/models/lrppm/recom_lrppm.pyx +++ b/cornac/models/lrppm/recom_lrppm.pyx @@ -516,7 +516,7 @@ class LRPPM(Recommender): item_score = self.I[i_idx].dot(self.U[u_idx]) return item_score - def rank(self, user_idx, item_indices=None): + def rank(self, user_idx, item_indices=None, **kwargs): if self.alpha > 0 and self.num_top_aspects > 0: n_items = self.num_items num_top_aspects = min(self.num_top_aspects, self.num_aspects) diff --git a/cornac/models/recommender.py b/cornac/models/recommender.py index c7080a4b2..ff6c69cd5 100644 --- a/cornac/models/recommender.py +++ b/cornac/models/recommender.py @@ -473,7 +473,7 @@ def rate(self, user_idx, item_idx, clipping=True): return rating_pred - def rank(self, user_idx, item_indices=None, k=-1, **kwargs): + def rank(self, user_idx, item_indices=None, **kwargs): """Rank all test items for a given user. Parameters @@ -485,7 +485,7 @@ def rank(self, user_idx, item_indices=None, k=-1, **kwargs): A list of candidate item indices to be ranked by the user. If `None`, list of ranked known item indices and their scores will be returned. - k: int, required + k: int, optional Cut-off length for recommendations, k=-1 will return ranked list of all items. This is more important for ANN to know the limit to avoid exhaustive ranking. @@ -502,6 +502,8 @@ def rank(self, user_idx, item_indices=None, k=-1, **kwargs): except ScoreException: known_item_scores = np.ones(self.total_items) * self.default_score() + k = kwargs.get("k", -1) + # check if the returned scores also cover unknown items # if not, all unknown items will be given the MIN score if len(known_item_scores) == self.total_items: From 018dff775ab73ad4fb1a595943a62a4600b23fd2 Mon Sep 17 00:00:00 2001 From: hieuddo Date: Mon, 22 Apr 2024 17:01:29 +0800 Subject: [PATCH 2/5] Revert "fix .rank() method for multiple models" This reverts commit 1b50cbdaf3789d8ab8b7be2fe7ecd75aedbdc4ba. --- cornac/models/comparer/recom_comparer_obj.pyx | 20 +++++++++---------- cornac/models/comparer/recom_comparer_sub.pyx | 2 +- cornac/models/efm/recom_efm.pyx | 2 +- cornac/models/lrppm/recom_lrppm.pyx | 2 +- cornac/models/recommender.py | 6 ++---- 5 files changed, 15 insertions(+), 17 deletions(-) diff --git a/cornac/models/comparer/recom_comparer_obj.pyx b/cornac/models/comparer/recom_comparer_obj.pyx index bd64945c0..1c522b6f9 100644 --- a/cornac/models/comparer/recom_comparer_obj.pyx +++ b/cornac/models/comparer/recom_comparer_obj.pyx @@ -663,39 +663,39 @@ class ComparERObj(Recommender): item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :]) return item_score - def rank(self, user_idx, item_indices=None, **kwargs): + def rank(self, user_id, item_ids=None): """Rank all test items for a given user. Parameters ---------- - user_idx: int, required + user_id: int, required The index of the user for whom to perform item raking. - item_indices: 1d array, optional, default: None + item_ids: 1d array, optional, default: None A list of candidate item indices to be ranked by the user. If `None`, list of ranked known item indices and their scores will be returned Returns ------- Tuple of `item_rank`, and `item_scores`. The order of values - in item_scores are corresponding to the order of their ids in item_indices + in item_scores are corresponding to the order of their ids in item_ids """ - X_ = self.U1[user_idx, :].dot(self.V.T) + X_ = self.U1[user_id, :].dot(self.V.T) most_cared_aspects_indices = (-X_).argsort()[:self.num_most_cared_aspects] most_cared_X_ = X_[most_cared_aspects_indices] most_cared_Y_ = self.U2.dot(self.V[most_cared_aspects_indices, :].T) explicit_scores = most_cared_X_.dot(most_cared_Y_.T) / (self.num_most_cared_aspects * self.rating_scale) - item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_idx) + item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_id) - if item_indices is None: + if item_ids is None: item_scores = item_scores item_rank = item_scores.argsort()[::-1] else: - num_items = max(self.num_items, max(item_indices) + 1) + num_items = max(self.num_items, max(item_ids) + 1) item_scores = np.ones(num_items) * np.min(item_scores) item_scores[:self.num_items] = item_scores item_rank = item_scores.argsort()[::-1] - item_rank = intersects(item_rank, item_indices, assume_unique=True) - item_scores = item_scores[item_indices] + item_rank = intersects(item_rank, item_ids, assume_unique=True) + item_scores = item_scores[item_ids] return item_rank, item_scores diff --git a/cornac/models/comparer/recom_comparer_sub.pyx b/cornac/models/comparer/recom_comparer_sub.pyx index be6593afe..e1eec1c77 100644 --- a/cornac/models/comparer/recom_comparer_sub.pyx +++ b/cornac/models/comparer/recom_comparer_sub.pyx @@ -759,7 +759,7 @@ class ComparERSub(MTER): return correct, skipped, loss, bpr_loss - def rank(self, user_idx, item_indices=None, **kwargs): + def rank(self, user_idx, item_indices=None): if self.alpha > 0 and self.n_top_aspects > 0: n_top_aspects = min(self.n_top_aspects, self.num_aspects) ts1 = np.einsum("abc,a->bc", self.G1, self.U[user_idx]) diff --git a/cornac/models/efm/recom_efm.pyx b/cornac/models/efm/recom_efm.pyx index 2fcc3406b..5d6dd582c 100644 --- a/cornac/models/efm/recom_efm.pyx +++ b/cornac/models/efm/recom_efm.pyx @@ -468,7 +468,7 @@ class EFM(Recommender): item_score = self.U2[item_idx, :].dot(self.U1[user_idx, :]) + self.H2[item_idx, :].dot(self.H1[user_idx, :]) return item_score - def rank(self, user_idx, item_indices=None, **kwargs): + def rank(self, user_idx, item_indices=None): """Rank all test items for a given user. Parameters diff --git a/cornac/models/lrppm/recom_lrppm.pyx b/cornac/models/lrppm/recom_lrppm.pyx index 71085a90d..2c8ec5475 100644 --- a/cornac/models/lrppm/recom_lrppm.pyx +++ b/cornac/models/lrppm/recom_lrppm.pyx @@ -516,7 +516,7 @@ class LRPPM(Recommender): item_score = self.I[i_idx].dot(self.U[u_idx]) return item_score - def rank(self, user_idx, item_indices=None, **kwargs): + def rank(self, user_idx, item_indices=None): if self.alpha > 0 and self.num_top_aspects > 0: n_items = self.num_items num_top_aspects = min(self.num_top_aspects, self.num_aspects) diff --git a/cornac/models/recommender.py b/cornac/models/recommender.py index ff6c69cd5..c7080a4b2 100644 --- a/cornac/models/recommender.py +++ b/cornac/models/recommender.py @@ -473,7 +473,7 @@ def rate(self, user_idx, item_idx, clipping=True): return rating_pred - def rank(self, user_idx, item_indices=None, **kwargs): + def rank(self, user_idx, item_indices=None, k=-1, **kwargs): """Rank all test items for a given user. Parameters @@ -485,7 +485,7 @@ def rank(self, user_idx, item_indices=None, **kwargs): A list of candidate item indices to be ranked by the user. If `None`, list of ranked known item indices and their scores will be returned. - k: int, optional + k: int, required Cut-off length for recommendations, k=-1 will return ranked list of all items. This is more important for ANN to know the limit to avoid exhaustive ranking. @@ -502,8 +502,6 @@ def rank(self, user_idx, item_indices=None, **kwargs): except ScoreException: known_item_scores = np.ones(self.total_items) * self.default_score() - k = kwargs.get("k", -1) - # check if the returned scores also cover unknown items # if not, all unknown items will be given the MIN score if len(known_item_scores) == self.total_items: From 1aa722dc24da6bd35b853667189a261714f06fb4 Mon Sep 17 00:00:00 2001 From: hieuddo Date: Mon, 22 Apr 2024 09:06:53 +0000 Subject: [PATCH 3/5] Add k=None to rank() method in some models --- cornac/models/comparer/recom_comparer_obj.pyx | 2 +- cornac/models/comparer/recom_comparer_sub.pyx | 2 +- cornac/models/efm/recom_efm.pyx | 2 +- cornac/models/lrppm/recom_lrppm.pyx | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cornac/models/comparer/recom_comparer_obj.pyx b/cornac/models/comparer/recom_comparer_obj.pyx index 1c522b6f9..3c2162a2f 100644 --- a/cornac/models/comparer/recom_comparer_obj.pyx +++ b/cornac/models/comparer/recom_comparer_obj.pyx @@ -663,7 +663,7 @@ class ComparERObj(Recommender): item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :]) return item_score - def rank(self, user_id, item_ids=None): + def rank(self, user_id, item_ids=None, k=None): """Rank all test items for a given user. Parameters diff --git a/cornac/models/comparer/recom_comparer_sub.pyx b/cornac/models/comparer/recom_comparer_sub.pyx index e1eec1c77..69dad58ef 100644 --- a/cornac/models/comparer/recom_comparer_sub.pyx +++ b/cornac/models/comparer/recom_comparer_sub.pyx @@ -759,7 +759,7 @@ class ComparERSub(MTER): return correct, skipped, loss, bpr_loss - def rank(self, user_idx, item_indices=None): + def rank(self, user_idx, item_indices=None, k=None): if self.alpha > 0 and self.n_top_aspects > 0: n_top_aspects = min(self.n_top_aspects, self.num_aspects) ts1 = np.einsum("abc,a->bc", self.G1, self.U[user_idx]) diff --git a/cornac/models/efm/recom_efm.pyx b/cornac/models/efm/recom_efm.pyx index 5d6dd582c..b0dc140e3 100644 --- a/cornac/models/efm/recom_efm.pyx +++ b/cornac/models/efm/recom_efm.pyx @@ -468,7 +468,7 @@ class EFM(Recommender): item_score = self.U2[item_idx, :].dot(self.U1[user_idx, :]) + self.H2[item_idx, :].dot(self.H1[user_idx, :]) return item_score - def rank(self, user_idx, item_indices=None): + def rank(self, user_idx, item_indices=None, k=None): """Rank all test items for a given user. Parameters diff --git a/cornac/models/lrppm/recom_lrppm.pyx b/cornac/models/lrppm/recom_lrppm.pyx index 2c8ec5475..3a2c98840 100644 --- a/cornac/models/lrppm/recom_lrppm.pyx +++ b/cornac/models/lrppm/recom_lrppm.pyx @@ -516,7 +516,7 @@ class LRPPM(Recommender): item_score = self.I[i_idx].dot(self.U[u_idx]) return item_score - def rank(self, user_idx, item_indices=None): + def rank(self, user_idx, item_indices=None, k=None): if self.alpha > 0 and self.num_top_aspects > 0: n_items = self.num_items num_top_aspects = min(self.num_top_aspects, self.num_aspects) From b6df7099569a784e0eccc057e6ab9dbf276367b5 Mon Sep 17 00:00:00 2001 From: hieuddo Date: Mon, 22 Apr 2024 09:12:19 +0000 Subject: [PATCH 4/5] sync parameter name for ComparERObj.rank() with Recommender.rank() --- cornac/models/comparer/recom_comparer_obj.pyx | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cornac/models/comparer/recom_comparer_obj.pyx b/cornac/models/comparer/recom_comparer_obj.pyx index 3c2162a2f..88ad81ca4 100644 --- a/cornac/models/comparer/recom_comparer_obj.pyx +++ b/cornac/models/comparer/recom_comparer_obj.pyx @@ -663,39 +663,39 @@ class ComparERObj(Recommender): item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :]) return item_score - def rank(self, user_id, item_ids=None, k=None): + def rank(self, user_idx, item_indices=None, k=None): """Rank all test items for a given user. Parameters ---------- - user_id: int, required + user_idx: int, required The index of the user for whom to perform item raking. - item_ids: 1d array, optional, default: None + item_indices: 1d array, optional, default: None A list of candidate item indices to be ranked by the user. If `None`, list of ranked known item indices and their scores will be returned Returns ------- Tuple of `item_rank`, and `item_scores`. The order of values - in item_scores are corresponding to the order of their ids in item_ids + in item_scores are corresponding to the order of their ids in item_indices """ - X_ = self.U1[user_id, :].dot(self.V.T) + X_ = self.U1[user_idx, :].dot(self.V.T) most_cared_aspects_indices = (-X_).argsort()[:self.num_most_cared_aspects] most_cared_X_ = X_[most_cared_aspects_indices] most_cared_Y_ = self.U2.dot(self.V[most_cared_aspects_indices, :].T) explicit_scores = most_cared_X_.dot(most_cared_Y_.T) / (self.num_most_cared_aspects * self.rating_scale) - item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_id) + item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_idx) - if item_ids is None: + if item_indices is None: item_scores = item_scores item_rank = item_scores.argsort()[::-1] else: - num_items = max(self.num_items, max(item_ids) + 1) + num_items = max(self.num_items, max(item_indices) + 1) item_scores = np.ones(num_items) * np.min(item_scores) item_scores[:self.num_items] = item_scores item_rank = item_scores.argsort()[::-1] - item_rank = intersects(item_rank, item_ids, assume_unique=True) - item_scores = item_scores[item_ids] + item_rank = intersects(item_rank, item_indices, assume_unique=True) + item_scores = item_scores[item_indices] return item_rank, item_scores From 3aafcf1c4df104295971fe54c584881465a533e7 Mon Sep 17 00:00:00 2001 From: hieuddo Date: Mon, 22 Apr 2024 09:45:24 +0000 Subject: [PATCH 5/5] add topK for some models --- cornac/models/comparer/recom_comparer_obj.pyx | 44 ++++++++++++------- cornac/models/comparer/recom_comparer_sub.pyx | 29 +++++++----- cornac/models/efm/recom_efm.pyx | 36 ++++++++++----- cornac/models/lrppm/recom_lrppm.pyx | 29 +++++++----- 4 files changed, 91 insertions(+), 47 deletions(-) diff --git a/cornac/models/comparer/recom_comparer_obj.pyx b/cornac/models/comparer/recom_comparer_obj.pyx index 88ad81ca4..cdd9d18c5 100644 --- a/cornac/models/comparer/recom_comparer_obj.pyx +++ b/cornac/models/comparer/recom_comparer_obj.pyx @@ -663,7 +663,7 @@ class ComparERObj(Recommender): item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :]) return item_score - def rank(self, user_idx, item_indices=None, k=None): + def rank(self, user_idx, item_indices=None, k=-1): """Rank all test items for a given user. Parameters @@ -675,10 +675,15 @@ class ComparERObj(Recommender): A list of candidate item indices to be ranked by the user. If `None`, list of ranked known item indices and their scores will be returned + k: int, required + Cut-off length for recommendations, k=-1 will return ranked list of all items. + This is more important for ANN to know the limit to avoid exhaustive ranking. + Returns ------- - Tuple of `item_rank`, and `item_scores`. The order of values - in item_scores are corresponding to the order of their ids in item_indices + (ranked_items, item_scores): tuple + `ranked_items` contains item indices being ranked by their scores. + `item_scores` contains scores of items corresponding to index in `item_indices` input. """ X_ = self.U1[user_idx, :].dot(self.V.T) @@ -686,16 +691,23 @@ class ComparERObj(Recommender): most_cared_X_ = X_[most_cared_aspects_indices] most_cared_Y_ = self.U2.dot(self.V[most_cared_aspects_indices, :].T) explicit_scores = most_cared_X_.dot(most_cared_Y_.T) / (self.num_most_cared_aspects * self.rating_scale) - item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_idx) - - if item_indices is None: - item_scores = item_scores - item_rank = item_scores.argsort()[::-1] - else: - num_items = max(self.num_items, max(item_indices) + 1) - item_scores = np.ones(num_items) * np.min(item_scores) - item_scores[:self.num_items] = item_scores - item_rank = item_scores.argsort()[::-1] - item_rank = intersects(item_rank, item_indices, assume_unique=True) - item_scores = item_scores[item_indices] - return item_rank, item_scores + all_item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_idx) + + # rank items based on their scores + item_indices = ( + np.arange(self.num_items) + if item_indices is None + else np.asarray(item_indices) + ) + item_scores = all_item_scores[item_indices] + + if k != -1: # O(n + k log k), faster for small k which is usually the case + partitioned_idx = np.argpartition(item_scores, -k) + top_k_idx = partitioned_idx[-k:] + sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])] + partitioned_idx[-k:] = sorted_top_k_idx + ranked_items = item_indices[partitioned_idx[::-1]] + else: # O(n log n) + ranked_items = item_indices[item_scores.argsort()[::-1]] + + return ranked_items, item_scores \ No newline at end of file diff --git a/cornac/models/comparer/recom_comparer_sub.pyx b/cornac/models/comparer/recom_comparer_sub.pyx index 69dad58ef..ec1173db7 100644 --- a/cornac/models/comparer/recom_comparer_sub.pyx +++ b/cornac/models/comparer/recom_comparer_sub.pyx @@ -759,7 +759,7 @@ class ComparERSub(MTER): return correct, skipped, loss, bpr_loss - def rank(self, user_idx, item_indices=None, k=None): + def rank(self, user_idx, item_indices=None, k=-1): if self.alpha > 0 and self.n_top_aspects > 0: n_top_aspects = min(self.n_top_aspects, self.num_aspects) ts1 = np.einsum("abc,a->bc", self.G1, self.U[user_idx]) @@ -786,12 +786,21 @@ class ComparERSub(MTER): all_item_scores[: self.num_items] = known_item_scores # rank items based on their scores - if item_indices is None: - item_scores = all_item_scores[: self.num_items] - item_rank = item_scores.argsort()[::-1] - else: - item_scores = all_item_scores[item_indices] - item_rank = np.array(item_indices)[item_scores.argsort()[::-1]] - - return item_rank, item_scores - return super().rank(user_idx, item_indices) \ No newline at end of file + item_indices = ( + np.arange(self.num_items) + if item_indices is None + else np.asarray(item_indices) + ) + item_scores = all_item_scores[item_indices] + + if k != -1: # O(n + k log k), faster for small k which is usually the case + partitioned_idx = np.argpartition(item_scores, -k) + top_k_idx = partitioned_idx[-k:] + sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])] + partitioned_idx[-k:] = sorted_top_k_idx + ranked_items = item_indices[partitioned_idx[::-1]] + else: # O(n log n) + ranked_items = item_indices[item_scores.argsort()[::-1]] + + return ranked_items, item_scores + return super().rank(user_idx, item_indices, k) \ No newline at end of file diff --git a/cornac/models/efm/recom_efm.pyx b/cornac/models/efm/recom_efm.pyx index b0dc140e3..81cf9acc0 100644 --- a/cornac/models/efm/recom_efm.pyx +++ b/cornac/models/efm/recom_efm.pyx @@ -468,7 +468,7 @@ class EFM(Recommender): item_score = self.U2[item_idx, :].dot(self.U1[user_idx, :]) + self.H2[item_idx, :].dot(self.H1[user_idx, :]) return item_score - def rank(self, user_idx, item_indices=None, k=None): + def rank(self, user_idx, item_indices=None, k=-1): """Rank all test items for a given user. Parameters @@ -480,10 +480,15 @@ class EFM(Recommender): A list of candidate item indices to be ranked by the user. If `None`, list of ranked known item indices and their scores will be returned + k: int, required + Cut-off length for recommendations, k=-1 will return ranked list of all items. + This is more important for ANN to know the limit to avoid exhaustive ranking. + Returns ------- - Tuple of `item_rank`, and `item_scores`. The order of values - in item_scores are corresponding to the order of their ids in item_ids + (ranked_items, item_scores): tuple + `ranked_items` contains item indices being ranked by their scores. + `item_scores` contains scores of items corresponding to index in `item_indices` input. """ X_ = self.U1[user_idx, :].dot(self.V.T) @@ -504,11 +509,20 @@ class EFM(Recommender): all_item_scores[: self.num_items] = known_item_scores # rank items based on their scores - if item_indices is None: - item_scores = all_item_scores[: self.num_items] - item_rank = item_scores.argsort()[::-1] - else: - item_scores = all_item_scores[item_indices] - item_rank = np.array(item_indices)[item_scores.argsort()[::-1]] - - return item_rank, item_scores + item_indices = ( + np.arange(self.num_items) + if item_indices is None + else np.asarray(item_indices) + ) + item_scores = all_item_scores[item_indices] + + if k != -1: # O(n + k log k), faster for small k which is usually the case + partitioned_idx = np.argpartition(item_scores, -k) + top_k_idx = partitioned_idx[-k:] + sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])] + partitioned_idx[-k:] = sorted_top_k_idx + ranked_items = item_indices[partitioned_idx[::-1]] + else: # O(n log n) + ranked_items = item_indices[item_scores.argsort()[::-1]] + + return ranked_items, item_scores diff --git a/cornac/models/lrppm/recom_lrppm.pyx b/cornac/models/lrppm/recom_lrppm.pyx index 3a2c98840..236da8acd 100644 --- a/cornac/models/lrppm/recom_lrppm.pyx +++ b/cornac/models/lrppm/recom_lrppm.pyx @@ -516,7 +516,7 @@ class LRPPM(Recommender): item_score = self.I[i_idx].dot(self.U[u_idx]) return item_score - def rank(self, user_idx, item_indices=None, k=None): + def rank(self, user_idx, item_indices=None, k=-1): if self.alpha > 0 and self.num_top_aspects > 0: n_items = self.num_items num_top_aspects = min(self.num_top_aspects, self.num_aspects) @@ -540,12 +540,21 @@ class LRPPM(Recommender): all_item_scores[: self.num_items] = known_item_scores # rank items based on their scores - if item_indices is None: - item_scores = all_item_scores[: self.num_items] - item_rank = item_scores.argsort()[::-1] - else: - item_scores = all_item_scores[item_indices] - item_rank = np.array(item_indices)[item_scores.argsort()[::-1]] - - return item_rank, item_scores - return super().rank(user_idx, item_indices) \ No newline at end of file + item_indices = ( + np.arange(self.num_items) + if item_indices is None + else np.asarray(item_indices) + ) + item_scores = all_item_scores[item_indices] + + if k != -1: # O(n + k log k), faster for small k which is usually the case + partitioned_idx = np.argpartition(item_scores, -k) + top_k_idx = partitioned_idx[-k:] + sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])] + partitioned_idx[-k:] = sorted_top_k_idx + ranked_items = item_indices[partitioned_idx[::-1]] + else: # O(n log n) + ranked_items = item_indices[item_scores.argsort()[::-1]] + + return ranked_items, item_scores + return super().rank(user_idx, item_indices, k) \ No newline at end of file