From ae7ba8601aa37041e027422ae802d36d1d1db1c8 Mon Sep 17 00:00:00 2001 From: Jaime Hieu Do Date: Thu, 25 Apr 2024 04:00:06 +0800 Subject: [PATCH] Fix .rank() method for multiple models (#615) The new Recommender.rank() function adds k as required value, which breaks some models that do not use k in ranking evaluation (e.g., ComparER, EFM, LRPPM). This commit updates .rank() for mentioned models with topK option. --- cornac/models/comparer/recom_comparer_obj.pyx | 50 ++++++++++++------- cornac/models/comparer/recom_comparer_sub.pyx | 29 +++++++---- cornac/models/efm/recom_efm.pyx | 36 +++++++++---- cornac/models/lrppm/recom_lrppm.pyx | 29 +++++++---- 4 files changed, 94 insertions(+), 50 deletions(-) diff --git a/cornac/models/comparer/recom_comparer_obj.pyx b/cornac/models/comparer/recom_comparer_obj.pyx index 1c522b6f9..cdd9d18c5 100644 --- a/cornac/models/comparer/recom_comparer_obj.pyx +++ b/cornac/models/comparer/recom_comparer_obj.pyx @@ -663,39 +663,51 @@ class ComparERObj(Recommender): item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :]) return item_score - def rank(self, user_id, item_ids=None): + def rank(self, user_idx, item_indices=None, k=-1): """Rank all test items for a given user. Parameters ---------- - user_id: int, required + user_idx: int, required The index of the user for whom to perform item raking. - item_ids: 1d array, optional, default: None + item_indices: 1d array, optional, default: None A list of candidate item indices to be ranked by the user. If `None`, list of ranked known item indices and their scores will be returned + k: int, required + Cut-off length for recommendations, k=-1 will return ranked list of all items. + This is more important for ANN to know the limit to avoid exhaustive ranking. + Returns ------- - Tuple of `item_rank`, and `item_scores`. The order of values - in item_scores are corresponding to the order of their ids in item_ids + (ranked_items, item_scores): tuple + `ranked_items` contains item indices being ranked by their scores. + `item_scores` contains scores of items corresponding to index in `item_indices` input. """ - X_ = self.U1[user_id, :].dot(self.V.T) + X_ = self.U1[user_idx, :].dot(self.V.T) most_cared_aspects_indices = (-X_).argsort()[:self.num_most_cared_aspects] most_cared_X_ = X_[most_cared_aspects_indices] most_cared_Y_ = self.U2.dot(self.V[most_cared_aspects_indices, :].T) explicit_scores = most_cared_X_.dot(most_cared_Y_.T) / (self.num_most_cared_aspects * self.rating_scale) - item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_id) - - if item_ids is None: - item_scores = item_scores - item_rank = item_scores.argsort()[::-1] - else: - num_items = max(self.num_items, max(item_ids) + 1) - item_scores = np.ones(num_items) * np.min(item_scores) - item_scores[:self.num_items] = item_scores - item_rank = item_scores.argsort()[::-1] - item_rank = intersects(item_rank, item_ids, assume_unique=True) - item_scores = item_scores[item_ids] - return item_rank, item_scores + all_item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_idx) + + # rank items based on their scores + item_indices = ( + np.arange(self.num_items) + if item_indices is None + else np.asarray(item_indices) + ) + item_scores = all_item_scores[item_indices] + + if k != -1: # O(n + k log k), faster for small k which is usually the case + partitioned_idx = np.argpartition(item_scores, -k) + top_k_idx = partitioned_idx[-k:] + sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])] + partitioned_idx[-k:] = sorted_top_k_idx + ranked_items = item_indices[partitioned_idx[::-1]] + else: # O(n log n) + ranked_items = item_indices[item_scores.argsort()[::-1]] + + return ranked_items, item_scores \ No newline at end of file diff --git a/cornac/models/comparer/recom_comparer_sub.pyx b/cornac/models/comparer/recom_comparer_sub.pyx index e1eec1c77..ec1173db7 100644 --- a/cornac/models/comparer/recom_comparer_sub.pyx +++ b/cornac/models/comparer/recom_comparer_sub.pyx @@ -759,7 +759,7 @@ class ComparERSub(MTER): return correct, skipped, loss, bpr_loss - def rank(self, user_idx, item_indices=None): + def rank(self, user_idx, item_indices=None, k=-1): if self.alpha > 0 and self.n_top_aspects > 0: n_top_aspects = min(self.n_top_aspects, self.num_aspects) ts1 = np.einsum("abc,a->bc", self.G1, self.U[user_idx]) @@ -786,12 +786,21 @@ class ComparERSub(MTER): all_item_scores[: self.num_items] = known_item_scores # rank items based on their scores - if item_indices is None: - item_scores = all_item_scores[: self.num_items] - item_rank = item_scores.argsort()[::-1] - else: - item_scores = all_item_scores[item_indices] - item_rank = np.array(item_indices)[item_scores.argsort()[::-1]] - - return item_rank, item_scores - return super().rank(user_idx, item_indices) \ No newline at end of file + item_indices = ( + np.arange(self.num_items) + if item_indices is None + else np.asarray(item_indices) + ) + item_scores = all_item_scores[item_indices] + + if k != -1: # O(n + k log k), faster for small k which is usually the case + partitioned_idx = np.argpartition(item_scores, -k) + top_k_idx = partitioned_idx[-k:] + sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])] + partitioned_idx[-k:] = sorted_top_k_idx + ranked_items = item_indices[partitioned_idx[::-1]] + else: # O(n log n) + ranked_items = item_indices[item_scores.argsort()[::-1]] + + return ranked_items, item_scores + return super().rank(user_idx, item_indices, k) \ No newline at end of file diff --git a/cornac/models/efm/recom_efm.pyx b/cornac/models/efm/recom_efm.pyx index 5d6dd582c..81cf9acc0 100644 --- a/cornac/models/efm/recom_efm.pyx +++ b/cornac/models/efm/recom_efm.pyx @@ -468,7 +468,7 @@ class EFM(Recommender): item_score = self.U2[item_idx, :].dot(self.U1[user_idx, :]) + self.H2[item_idx, :].dot(self.H1[user_idx, :]) return item_score - def rank(self, user_idx, item_indices=None): + def rank(self, user_idx, item_indices=None, k=-1): """Rank all test items for a given user. Parameters @@ -480,10 +480,15 @@ class EFM(Recommender): A list of candidate item indices to be ranked by the user. If `None`, list of ranked known item indices and their scores will be returned + k: int, required + Cut-off length for recommendations, k=-1 will return ranked list of all items. + This is more important for ANN to know the limit to avoid exhaustive ranking. + Returns ------- - Tuple of `item_rank`, and `item_scores`. The order of values - in item_scores are corresponding to the order of their ids in item_ids + (ranked_items, item_scores): tuple + `ranked_items` contains item indices being ranked by their scores. + `item_scores` contains scores of items corresponding to index in `item_indices` input. """ X_ = self.U1[user_idx, :].dot(self.V.T) @@ -504,11 +509,20 @@ class EFM(Recommender): all_item_scores[: self.num_items] = known_item_scores # rank items based on their scores - if item_indices is None: - item_scores = all_item_scores[: self.num_items] - item_rank = item_scores.argsort()[::-1] - else: - item_scores = all_item_scores[item_indices] - item_rank = np.array(item_indices)[item_scores.argsort()[::-1]] - - return item_rank, item_scores + item_indices = ( + np.arange(self.num_items) + if item_indices is None + else np.asarray(item_indices) + ) + item_scores = all_item_scores[item_indices] + + if k != -1: # O(n + k log k), faster for small k which is usually the case + partitioned_idx = np.argpartition(item_scores, -k) + top_k_idx = partitioned_idx[-k:] + sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])] + partitioned_idx[-k:] = sorted_top_k_idx + ranked_items = item_indices[partitioned_idx[::-1]] + else: # O(n log n) + ranked_items = item_indices[item_scores.argsort()[::-1]] + + return ranked_items, item_scores diff --git a/cornac/models/lrppm/recom_lrppm.pyx b/cornac/models/lrppm/recom_lrppm.pyx index 2c8ec5475..236da8acd 100644 --- a/cornac/models/lrppm/recom_lrppm.pyx +++ b/cornac/models/lrppm/recom_lrppm.pyx @@ -516,7 +516,7 @@ class LRPPM(Recommender): item_score = self.I[i_idx].dot(self.U[u_idx]) return item_score - def rank(self, user_idx, item_indices=None): + def rank(self, user_idx, item_indices=None, k=-1): if self.alpha > 0 and self.num_top_aspects > 0: n_items = self.num_items num_top_aspects = min(self.num_top_aspects, self.num_aspects) @@ -540,12 +540,21 @@ class LRPPM(Recommender): all_item_scores[: self.num_items] = known_item_scores # rank items based on their scores - if item_indices is None: - item_scores = all_item_scores[: self.num_items] - item_rank = item_scores.argsort()[::-1] - else: - item_scores = all_item_scores[item_indices] - item_rank = np.array(item_indices)[item_scores.argsort()[::-1]] - - return item_rank, item_scores - return super().rank(user_idx, item_indices) \ No newline at end of file + item_indices = ( + np.arange(self.num_items) + if item_indices is None + else np.asarray(item_indices) + ) + item_scores = all_item_scores[item_indices] + + if k != -1: # O(n + k log k), faster for small k which is usually the case + partitioned_idx = np.argpartition(item_scores, -k) + top_k_idx = partitioned_idx[-k:] + sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])] + partitioned_idx[-k:] = sorted_top_k_idx + ranked_items = item_indices[partitioned_idx[::-1]] + else: # O(n log n) + ranked_items = item_indices[item_scores.argsort()[::-1]] + + return ranked_items, item_scores + return super().rank(user_idx, item_indices, k) \ No newline at end of file