From 92b7891b3d841223f7fdaab0f02c4d4fc7a7c929 Mon Sep 17 00:00:00 2001
From: lthoang <trhoanglee@gmail.com>
Date: Thu, 7 Dec 2023 09:59:21 +0800
Subject: [PATCH] Add option to scoring function based on quantity provided in
 extra_data

---
 cornac/models/gp_top/recom_gp_top.py | 33 +++++++++++++++++++++++++---
 examples/gp_top_tafeng.py            |  3 ++-
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/cornac/models/gp_top/recom_gp_top.py b/cornac/models/gp_top/recom_gp_top.py
index ede471cf2..c8e271984 100644
--- a/cornac/models/gp_top/recom_gp_top.py
+++ b/cornac/models/gp_top/recom_gp_top.py
@@ -33,6 +33,10 @@ class GPTop(NextBasketRecommender):
     use_personalized_popularity: boolean, optional, default: True
         When False, no item frequency from history baskets are being used.
 
+    use_quantity: boolean, optional, default: False
+        When True, constructing item frequency based on its quantity (getting from extra_data).
+        The data must be in fmt 'UBITJson'.
+
     References
     ----------
     Ming Li, Sami Jullien, Mozhdeh Ariannezhad, and Maarten de Rijke. 2023.
@@ -42,17 +46,29 @@ class GPTop(NextBasketRecommender):
     """
 
     def __init__(
-        self, name="GPTop", use_global_popularity=True, use_personalized_popularity=True
+        self,
+        name="GPTop",
+        use_global_popularity=True,
+        use_personalized_popularity=True,
+        use_quantity=False,
     ):
         super().__init__(name=name, trainable=False)
         self.use_global_popularity = use_global_popularity
         self.use_personalized_popularity = use_personalized_popularity
+        self.use_quantity = use_quantity
         self.item_freq = Counter()
 
     def fit(self, train_set, val_set=None):
         super().fit(train_set=train_set, val_set=val_set)
         if self.use_global_popularity:
-            self.item_freq = Counter(self.train_set.uir_tuple[1])
+            if self.use_quantity:
+                self.item_freq = Counter()
+                for idx, iid in enumerate(self.train_set.uir_tuple[1]):
+                    self.item_freq[iid] += self.train_set.extra_data[idx].get(
+                        "quantity", 0
+                    )
+            else:
+                self.item_freq = Counter(self.train_set.uir_tuple[1])
         return self
 
     def score(self, user_idx, history_baskets, **kwargs):
@@ -65,7 +81,18 @@ def score(self, user_idx, history_baskets, **kwargs):
                 item_scores[iid] = freq / max_item_freq
 
         if self.use_personalized_popularity:
-            p_item_freq = Counter([iid for iids in history_baskets for iid in iids])
+            if self.use_quantity:
+                history_basket_bids = kwargs.get("history_basket_ids")
+                baskets = kwargs.get("baskets")
+                p_item_freq = Counter()
+                (_, item_ids, _) = kwargs.get("uir_tuple")
+                extra_data = kwargs.get("extra_data")
+                for bid in history_basket_bids:
+                    ids = baskets[bid]
+                    for idx in ids:
+                        p_item_freq[item_ids[idx]] += extra_data[idx].get("quantity", 0)
+            else:
+                p_item_freq = Counter([iid for iids in history_baskets for iid in iids])
             for iid, cnt in p_item_freq.most_common():
                 item_scores[iid] += cnt
         return item_scores
diff --git a/examples/gp_top_tafeng.py b/examples/gp_top_tafeng.py
index efaa56859..834acf67a 100644
--- a/examples/gp_top_tafeng.py
+++ b/examples/gp_top_tafeng.py
@@ -30,8 +30,9 @@
 )
 
 models = [
-    GPTop(name="PTop", use_global_popularity=False),
     GPTop(name="GTop", use_personalized_popularity=False),
+    GPTop(name="PTop", use_global_popularity=False),
+    GPTop(name="GPTop-quantity", use_quantity=True),
     GPTop(),
 ]