From 5eb2201fc458a10c5a8efaf8d74b9197c28e197f Mon Sep 17 00:00:00 2001
From: VincentAuriau <auriau.vincent@gmail.com>
Date: Thu, 1 Feb 2024 09:25:22 +0100
Subject: [PATCH 01/10] FIX: typo in issue

---
 choice_learn/models/conditional_mnl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/choice_learn/models/conditional_mnl.py b/choice_learn/models/conditional_mnl.py
index da03fe95..00733cf2 100644
--- a/choice_learn/models/conditional_mnl.py
+++ b/choice_learn/models/conditional_mnl.py
@@ -1102,7 +1102,7 @@ def get_weights_std(self, dataset):
             jacobian = tape_2.jacobian(loss, w)
         # Compute the Hessian from the Jacobian
         hessian = tape_1.batch_jacobian(jacobian, w)
-        return tf.sqrt([tf.linalg.inv(tf.squeeze(hessian))[i][i] for i in range(13)])
+        return tf.sqrt([tf.linalg.inv(tf.squeeze(hessian))[i][i] for i in range(len(w))])
 
     def clone(self):
         """Returns a clone of the model."""

From bbe7666c157ee2dd0669bf0b589ef24d9896912e Mon Sep 17 00:00:00 2001
From: VincentAuriau <auriau.vincent@gmail.com>
Date: Fri, 2 Feb 2024 11:52:45 +0100
Subject: [PATCH 02/10] ENH: renaming, cleaning

---
 choice_learn/tf_ops.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/choice_learn/tf_ops.py b/choice_learn/tf_ops.py
index cf8d617e..243fb8ba 100644
--- a/choice_learn/tf_ops.py
+++ b/choice_learn/tf_ops.py
@@ -3,8 +3,8 @@
 import tensorflow as tf
 
 
-def custom_softmax(
-    sessions_items_logits, sessions_items_availabilities, axis=-1, normalize_exit=False, eps=1e-5
+def softmax_with_availabilities(
+    contexts_items_logits, contexts_items_availabilities, axis=-1, normalize_exit=False, eps=1e-5
 ):
     """Function to compute softmax probabilities from utilities.
 
@@ -14,9 +14,9 @@ def custom_softmax(
 
     Parameters
     ----------
-    sessions_items_logits : np.ndarray (n_sessions, n_products)
+    contexts_items_logits : np.ndarray (n_sessions, n_products)
         Utilities / Logits on which to compute the softmax
-    sessions_items_availabilities : np.ndarray (n_sessions, n_products)
+    contexts_items_availabilities : np.ndarray (n_sessions, n_products)
         Matrix indicating the availabitily (1) or not (0) of the products
     axis : int, optional
         Axis of sessions_logits on which to apply the softmax, by default -1
@@ -34,10 +34,10 @@ def custom_softmax(
     """
     # Substract max utility to avoid overflow
     numerator = tf.exp(
-        sessions_items_logits - tf.reduce_max(sessions_items_logits, axis=axis, keepdims=True)
+        contexts_items_logits - tf.reduce_max(contexts_items_logits, axis=axis, keepdims=True)
     )
     # Set unavailable products utility to 0
-    numerator = tf.multiply(numerator, sessions_items_availabilities)
+    numerator = tf.multiply(numerator, contexts_items_availabilities)
     # Sum of total available utilities
     denominator = tf.reduce_sum(numerator, axis=axis, keepdims=True)
     # Add 1 to the denominator to take into account the exit choice

From 41adb38d459aaba54534644a5f0c3f1d33cb1a31 Mon Sep 17 00:00:00 2001
From: VincentAuriau <auriau.vincent@gmail.com>
Date: Fri, 2 Feb 2024 11:53:52 +0100
Subject: [PATCH 03/10] ENH: new signature, better names & doc

---
 choice_learn/models/base_model.py | 357 +++++++++++++++++-------------
 1 file changed, 197 insertions(+), 160 deletions(-)

diff --git a/choice_learn/models/base_model.py b/choice_learn/models/base_model.py
index ca8aae7a..488e32cc 100644
--- a/choice_learn/models/base_model.py
+++ b/choice_learn/models/base_model.py
@@ -9,10 +9,7 @@
 import tensorflow as tf
 import tqdm
 
-from choice_learn.tf_ops import (
-    CustomCategoricalCrossEntropy,
-    custom_softmax,
-)
+import choice_learn.tf_ops as tf_ops
 
 
 class ChoiceModel(object):
@@ -25,6 +22,8 @@ def __init__(
         optimizer="Adam",
         callbacks=None,
         lr=0.001,
+        epochs=1,
+        batch_size=32,
     ):
         """Instantiates the ChoiceModel.
 
@@ -45,10 +44,9 @@ def __init__(
         self.label_smoothing = label_smoothing
         self.stop_training = False
 
-        # self.loss = tf.keras.losses.CategoricalCrossentropy(
-        #     from_logits=False, label_smoothing=self.label_smoothing
-        # )
-        self.loss = CustomCategoricalCrossEntropy(
+        # Loss function wrapping tf.keras.losses.CategoricalCrossEntropy
+        # with smoothing and normalization options
+        self.loss = tf_ops.CustomCategoricalCrossEntropy(
             from_logits=False, label_smoothing=self.label_smoothing
         )
         self.callbacks = tf.keras.callbacks.CallbackList(callbacks, add_history=True, model=None)
@@ -69,9 +67,17 @@ def __init__(
             print(f"Optimizer {optimizer} not implemnted, switching for default Adam")
             self.optimizer = tf.keras.optimizers.Adam(lr)
 
+        self.epochs = epochs
+        self.batch_size = batch_size
+
     @abstractmethod
-    def compute_utility(
-        self, items_batch, sessions_batch, sessions_items_batch, availabilities_batch, choices_batch
+    def compute_batch_utility(
+        self,
+        fixed_items_features,
+        contexts_features,
+        contexts_items_features,
+        contexts_items_availabilities,
+        choices,
     ):
         """Method that defines how the model computes the utility of a product.
 
@@ -80,28 +86,28 @@ def compute_utility(
 
         Parameters
         ----------
-        items_batch : tuple of np.ndarray (items_features)
+        fixed_items_features : tuple of np.ndarray
             Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
             constant/fixed features.
             Shape must be (n_items, n_items_features)
-        sessions_batch : tuple of np.ndarray (sessions_features)
-            Time-Features
-            Shape must be (n_sessions, n_sessions_features)
-        sessions_items_batch : tuple of np.ndarray (sessions_items_features)
-            Time-Item-Features
-            Shape must be (n_sessions, n_sessions_items_features)
-        availabilities_batch : np.ndarray
-            Availabilities (sessions_items_availabilities)
-            Shape must be (n_sessions, n_items)
+        contexts_features : tuple of np.ndarray (contexts_features)
+            a batch of contexts features
+            Shape must be (n_contexts, n_contexts_features)
+        contexts_items_features : tuple of np.ndarray (contexts_items_features)
+            a batch of contexts items features
+            Shape must be (n_contexts, n_contexts_items_features)
+        contexts_items_availabilities : np.ndarray
+            A batch of contexts items availabilities
+            Shape must be (n_contexts, n_items)
         choices_batch : np.ndarray
             Choices
-            Shape must be (n_sessions, )
+            Shape must be (n_contexts, )
 
         Returns:
         --------
         np.ndarray
-            Utility of each product for each session.
-            Shape must be (n_sessions, n_items)
+            Utility of each product for each context.
+            Shape must be (n_contexts, n_items)
         """
         # To be implemented in children classes
         # Can be numpy or tensorflow based
@@ -110,28 +116,33 @@ def compute_utility(
     @tf.function
     def train_step(
         self,
-        items_batch,
-        sessions_batch,
-        sessions_items_batch,
-        availabilities_batch,
-        choices_batch,
+        fixed_items_features,
+        contexts_features,
+        contexts_items_features,
+        contexts_items_availabilities,
+        choices,
         sample_weight=None,
     ):
         """Function that represents one training step (= one gradient descent step) of the model.
 
         Parameters
         ----------
-        items_batch : tuple of np.ndarray (items_features)
+        fixed_items_features : tuple of np.ndarray
             Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
             constant/fixed features.
-        sessions_batch : tuple of np.ndarray (sessions_features)
-            Time-Features
-        sessions_items_batch : tuple of np.ndarray (sessions_items_features)
-            Time-Item-Features
-        availabilities_batch : np.ndarray
-            Availabilities (sessions_items_availabilities)
+            Shape must be (n_items, n_items_features)
+        contexts_features : tuple of np.ndarray (contexts_features)
+            a batch of contexts features
+            Shape must be (n_contexts, n_contexts_features)
+        contexts_items_features : tuple of np.ndarray (contexts_items_features)
+            a batch of contexts items features
+            Shape must be (n_contexts, n_contexts_items_features)
+        contexts_items_availabilities : np.ndarray
+            A batch of contexts items availabilities
+            Shape must be (n_contexts, n_items)
         choices_batch : np.ndarray
             Choices
+            Shape must be (n_contexts, )
         sample_weight : np.ndarray, optional
             List samples weights to apply during the gradient descent to the batch elements,
             by default None
@@ -142,69 +153,56 @@ def train_step(
             Value of NegativeLogLikelihood loss for the batch
         """
         with tf.GradientTape() as tape:
-            all_u = self.compute_utility(
-                items_batch,
-                sessions_batch,
-                sessions_items_batch,
-                availabilities_batch,
-                choices_batch,
+            utilities = self.compute_batch_utility(
+                fixed_items_features=fixed_items_features,
+                contexts_features=contexts_features,
+                contexts_items_features=contexts_items_features,
+                contexts_items_availabilities=contexts_items_availabilities,
+                choices=choices,
             )
-            """
-            all_u = tf.math.exp(all_u)
-
-            # Assortment(t) Utility
-            norms = tf.reduce_sum(tf.multiply(all_u, ia_batch), axis=1)
-            if self.normalize_non_buy:
-                norms += 1
-            # Probabilities
-            final_utilities = tf.divide(
-                all_u,
-                tf.repeat(tf.expand_dims(norms, 1), fif_batch[0].shape[0], axis=1),
-            )
-            # Probabilities of selected product
-            available_utilities = tf.gather_nd(indices=choices_nd, params=final_utilities)
-            """
-            # probabilities = availability_softmax(all_u, availabilities_batch, axis=-1)
-            probabilities = custom_softmax(
-                all_u, availabilities_batch, normalize_exit=self.normalize_non_buy, axis=-1
+
+            probabilities = tf_ops.softmax_with_availabilities(
+                contexts_items_logits=utilities,
+                contexts_items_availabilities=contexts_items_availabilities,
+                normalize_exit=self.normalize_non_buy,
+                axis=-1,
             )
             # Negative Log-Likelihood
             neg_loglikelihood = self.loss(
                 y_pred=probabilities,
-                y_true=tf.one_hot(choices_batch, depth=probabilities.shape[1]),
+                y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
                 sample_weight=sample_weight,
             )
-            """
-            if sample_weight is not None:
-                neg_loglikelihood = -tf.reduce_sum(
-                    tf.math.log(available_utilities + 1e-10) * sample_weight
-                )
-            else:
-                neg_loglikelihood = -tf.reduce_sum(tf.math.log(available_utilities + 1e-10))
-            """
+
         grads = tape.gradient(neg_loglikelihood, self.weights)
         self.optimizer.apply_gradients(zip(grads, self.weights))
         return neg_loglikelihood
 
     def fit(
-        self, choice_dataset, n_epochs, batch_size, sample_weight=None, val_dataset=None, verbose=0
+        self,
+        choice_dataset,
+        sample_weight=None,
+        val_dataset=None,
+        verbose=0,
+        epochs=None,
+        batch_size=None,
     ):
         """Method to train the model with a ChoiceDataset.
 
         Parameters
         ----------
         choice_dataset : ChoiceDataset
-            _description_
-        n_epochs : int
-            Number of epochs
-        batch_size : int
-            Batch size
+            Input data in the form of a ChoiceDataset
         sample_weight : np.ndarray, optional
             Sample weights to apply, by default None
         val_dataset : ChoiceDataset, optional
             Test ChoiceDataset to evaluate performances on test at each epoch, by default None
         verbose : int, optional
             print level, for debugging, by default 0
+        epochs : int, optional
+            Number of epochs, default is None, meaning we use self.epochs
+        batch_size : int, optional
+            Batch size, default is None, meaning we use self.batch_size
 
         Returns:
         --------
@@ -214,8 +212,13 @@ def fit(
         if hasattr(self, "instantiated"):
             if not self.instantiated:
                 raise ValueError("Model not instantiated. Please call .instantiate() first.")
+        if epochs is None:
+            epochs = self.epochs
+        if batch_size is None:
+            batch_size = self.batch_size
+
         losses_history = {"train_loss": []}
-        t_range = tqdm.trange(n_epochs, position=0)
+        t_range = tqdm.trange(epochs, position=0)
 
         self.callbacks.on_train_begin()
 
@@ -245,8 +248,8 @@ def fit(
                 for batch_nb, (
                     (
                         items_batch,
-                        sessions_batch,
-                        sessions_items_batch,
+                        contexts_batch,
+                        contexts_items_batch,
                         availabilities_batch,
                         choices_batch,
                     ),
@@ -256,8 +259,8 @@ def fit(
 
                     neg_loglikelihood = self.train_step(
                         items_batch,
-                        sessions_batch,
-                        sessions_items_batch,
+                        contexts_batch,
+                        contexts_items_batch,
                         availabilities_batch,
                         choices_batch,
                         sample_weight=weight_batch,
@@ -283,16 +286,16 @@ def fit(
                     inner_range = choice_dataset.iter_batch(shuffle=True, batch_size=batch_size)
                 for batch_nb, (
                     items_batch,
-                    sessions_batch,
-                    sessions_items_batch,
+                    contexts_batch,
+                    contexts_items_batch,
                     availabilities_batch,
                     choices_batch,
                 ) in enumerate(inner_range):
                     self.callbacks.on_train_batch_begin(batch_nb)
                     neg_loglikelihood = self.train_step(
                         items_batch,
-                        sessions_batch,
-                        sessions_items_batch,
+                        contexts_batch,
+                        contexts_items_batch,
                         availabilities_batch,
                         choices_batch,
                     )
@@ -329,8 +332,8 @@ def fit(
                 test_losses = []
                 for batch_nb, (
                     items_batch,
-                    sessions_batch,
-                    sessions_items_batch,
+                    contexts_batch,
+                    contexts_items_batch,
                     availabilities_batch,
                     choices_batch,
                 ) in enumerate(val_dataset.iter_batch(shuffle=False, batch_size=batch_size)):
@@ -339,8 +342,8 @@ def fit(
                     test_losses.append(
                         self.batch_predict(
                             items_batch,
-                            sessions_batch,
-                            sessions_items_batch,
+                            contexts_batch,
+                            contexts_items_batch,
                             availabilities_batch,
                             choices_batch,
                         )[0]
@@ -373,28 +376,33 @@ def fit(
     @tf.function
     def batch_predict(
         self,
-        items_batch,
-        sessions_batch,
-        sessions_items_batch,
-        availabilities_batch,
-        choices_batch,
+        fixed_items_features,
+        contexts_features,
+        contexts_items_features,
+        contexts_items_availabilities,
+        choices,
         sample_weight=None,
     ):
         """Function that represents one prediction (Probas + Loss) for one batch of a ChoiceDataset.
 
         Parameters
         ----------
-        items_batch : tuple of np.ndarray (items_features)
+        fixed_items_features : tuple of np.ndarray
             Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
-            constant features.
-        sessions_batch : tuple of np.ndarray (sessions_features)
-            Time-Features
-        sessions_items_batch : tuple of np.ndarray (sessions_items_features)
-            Time-Item-Features
-        availabilities_batch : np.ndarray
-            Availabilities (sessions_items_availabilities)
+            constant/fixed features.
+            Shape must be (n_items, n_items_features)
+        contexts_features : tuple of np.ndarray (contexts_features)
+            a batch of contexts features
+            Shape must be (n_contexts, n_contexts_features)
+        contexts_items_features : tuple of np.ndarray (contexts_items_features)
+            a batch of contexts items features
+            Shape must be (n_contexts, n_contexts_items_features)
+        contexts_items_availabilities : np.ndarray
+            A batch of contexts items availabilities
+            Shape must be (n_contexts, n_items)
         choices_batch : np.ndarray
             Choices
+            Shape must be (n_contexts, )
         sample_weight : np.ndarray, optional
             List samples weights to apply during the gradient descent to the batch elements,
             by default None
@@ -404,23 +412,29 @@ def batch_predict(
         tf.Tensor (1, )
             Value of NegativeLogLikelihood loss for the batch
         tf.Tensor (batch_size, n_items)
-            Probabilities for each product to be chosen for each session
+            Probabilities for each product to be chosen for each context
         """
         # Compute utilities from features
         utilities = self.compute_utility(
-            items_batch, sessions_batch, sessions_items_batch, availabilities_batch, choices_batch
+            fixed_items_features,
+            contexts_features,
+            contexts_items_features,
+            contexts_items_availabilities,
+            choices,
         )
         # Compute probabilities from utilities & availabilties
-        # probabilities = availability_softmax(utilities, availabilities_batch, axis=-1)
-        probabilities = custom_softmax(
-            utilities, availabilities_batch, normalize_exit=self.normalize_non_buy, axis=-1
+        probabilities = tf_ops.softmax_with_availabilities(
+            contexts_items_logits=utilities,
+            contexts_items_availabilities=contexts_items_availabilities,
+            normalize_exit=self.normalize_non_buy,
+            axis=-1,
         )
 
         # Compute loss from probabilities & actual choices
         # batch_loss = self.loss(probabilities, c_batch, sample_weight=sample_weight)
         batch_loss = self.loss(
             y_pred=probabilities,
-            y_true=tf.one_hot(choices_batch, depth=probabilities.shape[1]),
+            y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
             sample_weight=sample_weight,
         )
         return batch_loss, probabilities
@@ -477,7 +491,7 @@ def load_model(cls, path):
         return cls
 
     def predict_probas(self, choice_dataset, batch_size=-1):
-        """Predicts the choice probabilities for each session and each product of a ChoiceDataset.
+        """Predicts the choice probabilities for each context and each product of a ChoiceDataset.
 
         Parameters
         ----------
@@ -488,30 +502,30 @@ def predict_probas(self, choice_dataset, batch_size=-1):
 
         Returns:
         --------
-        np.ndarray (n_sessions, n_items)
-            Choice probabilties for each session and each product
+        np.ndarray (n_contexts, n_items)
+            Choice probabilties for each context and each product
         """
         stacked_probabilities = []
         for (
-            items_batch,
-            sessions_batch,
-            sessions_items_batch,
-            availabilities_batch,
-            choices_batch,
+            fixed_items_features,
+            contexts_features,
+            contexts_items_features,
+            contexts_items_availabilities,
+            choices,
         ) in choice_dataset.iter_batch(batch_size=batch_size):
             _, probabilities = self.batch_predict(
-                items_batch,
-                sessions_batch,
-                sessions_items_batch,
-                availabilities_batch,
-                choices_batch,
+                fixed_items_features=fixed_items_features,
+                contexts_features=contexts_features,
+                contexts_items_features=contexts_items_features,
+                contexts_items_availabilities=contexts_items_availabilities,
+                choices=choices,
             )
             stacked_probabilities.append(probabilities)
 
         return tf.concat(stacked_probabilities, axis=0)
 
     def evaluate(self, choice_dataset, batch_size=-1):
-        """Evaluates the model for each session and each product of a ChoiceDataset.
+        """Evaluates the model for each context and each product of a ChoiceDataset.
 
         Predicts the probabilities according to the model and computes the Negative-Log-Likelihood
         loss from the actual choices.
@@ -523,27 +537,27 @@ def evaluate(self, choice_dataset, batch_size=-1):
 
         Returns:
         --------
-        np.ndarray (n_sessions, n_items)
-            Choice probabilties for each session and each product
+        np.ndarray (n_contexts, n_items)
+            Choice probabilties for each context and each product
         """
         batch_losses = []
         for (
-            items_batch,
-            sessions_batch,
-            sessions_items_batch,
-            availabilities_batch,
-            choices_batch,
+            fixed_items_features,
+            contexts_features,
+            contexts_items_features,
+            contexts_items_availabilities,
+            choices,
         ) in choice_dataset.iter_batch(batch_size=batch_size):
             loss, _ = self.batch_predict(
-                items_batch,
-                sessions_batch,
-                sessions_items_batch,
-                availabilities_batch,
-                choices_batch,
+                fixed_items_features=fixed_items_features,
+                contexts_features=contexts_features,
+                contexts_items_features=contexts_items_features,
+                contexts_items_availabilities=contexts_items_availabilities,
+                choices=choices,
             )
             batch_losses.append(loss)
         if batch_size != -1:
-            last_batch_size = availabilities_batch.shape[0]
+            last_batch_size = contexts_items_availabilities.shape[0]
             coefficients = tf.concat(
                 [tf.ones(len(batch_losses) - 1) * batch_size, [last_batch_size]], axis=0
             )
@@ -700,37 +714,49 @@ def __init__(self, **kwargs):
         """Initialization of the model."""
         super().__init__(**kwargs)
 
-    def compute_utility(
-        self, items_batch, sessions_batch, sessions_items_batch, availabilities_batch, choices_batch
+    def compute_batch_utility(
+        self,
+        fixed_items_features,
+        contexts_features,
+        contexts_items_features,
+        contexts_items_availabilities,
+        choices,
     ):
-        """Computes the random utility for each product of each session.
+        """Computes the random utility for each product of each context.
 
         Parameters
         ----------
-        items_batch : tuple of np.ndarray (items_features)
+        fixed_items_features : tuple of np.ndarray
             Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
             constant/fixed features.
-        sessions_batch : tuple of np.ndarray (sessions_features)
-            Time-Features
-        sessions_items_batch : tuple of np.ndarray (sessions_items_features)
-            Time-Item-Features
-        availabilities_batch : np.ndarray
-            Availabilities (sessions_items_availabilities)
+            Shape must be (n_items, n_items_features)
+        contexts_features : tuple of np.ndarray (contexts_features)
+            a batch of contexts features
+            Shape must be (n_contexts, n_contexts_features)
+        contexts_items_features : tuple of np.ndarray (contexts_items_features)
+            a batch of contexts items features
+            Shape must be (n_contexts, n_contexts_items_features)
+        contexts_items_availabilities : np.ndarray
+            A batch of contexts items availabilities
+            Shape must be (n_contexts, n_items)
         choices_batch : np.ndarray
             Choices
+            Shape must be (n_contexts, )
 
         Returns:
         --------
         tf.Tensor
-            (n_sessions, n_items) matrix of random utilities
+            (n_contexts, n_items) matrix of random utilities
         """
         # In order to avoid unused arguments warnings
-        del items_batch, sessions_batch, availabilities_batch, choices_batch
-        return np.squeeze(np.random.uniform(shape=(sessions_items_batch.shape), minval=0, maxval=1))
+        _ = fixed_items_features, contexts_features, contexts_items_availabilities, choices
+        return np.squeeze(
+            np.random.uniform(shape=(contexts_items_features.shape), minval=0, maxval=1)
+        )
 
     def fit(**kwargs):
         """Make sure that nothing happens during .fit."""
-        del kwargs
+        _ = kwargs
         return {}
 
 
@@ -747,34 +773,44 @@ def __init__(self, **kwargs):
 
     def fit(self, choice_dataset, **kwargs):
         """Computes the choice frequency of each product and defines it as choice probabilities."""
-        del kwargs
+        _ = kwargs
         choices = choice_dataset.choices
         for i in range(choice_dataset.get_num_items()):
             self.weights.append(tf.reduce_sum(tf.cast(choices == i, tf.float32)))
         self.weights = tf.stack(self.weights) / len(choices)
 
-    def compute_utility(
-        self, items_batch, sessions_batch, sessions_items_batch, availabilities_batch, choices_batch
+    def compute_batch_utility(
+        self,
+        fixed_items_features,
+        contexts_features,
+        contexts_items_features,
+        contexts_items_availabilities,
+        choices,
     ):
         """Returns utility that is fixed. U = log(P).
 
         Parameters
         ----------
-        items_batch : tuple of np.ndarray (items_features)
+        fixed_items_features : tuple of np.ndarray
             Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
             constant/fixed features.
-        sessions_batch : tuple of np.ndarray (sessions_features)
-            Time-Features
-        sessions_items_batch : tuple of np.ndarray (sessions_items_features)
-            Time-Item-Features
-        availabilities_batch : np.ndarray
-            Availabilities (sessions_items_availabilities)
+            Shape must be (n_items, n_items_features)
+        contexts_features : tuple of np.ndarray (contexts_features)
+            a batch of contexts features
+            Shape must be (n_contexts, n_contexts_features)
+        contexts_items_features : tuple of np.ndarray (contexts_items_features)
+            a batch of contexts items features
+            Shape must be (n_contexts, n_contexts_items_features)
+        contexts_items_availabilities : np.ndarray
+            A batch of contexts items availabilities
+            Shape must be (n_contexts, n_items)
         choices_batch : np.ndarray
             Choices
+            Shape must be (n_contexts, )
 
         Returns:
         --------
-        np.ndarray (n_sessions, n_items)
+        np.ndarray (n_contexts, n_items)
             Utilities
 
         Raises:
@@ -783,7 +819,8 @@ def compute_utility(
             If the model has not been fitted cannot evaluate the utility
         """
         # In order to avoid unused arguments warnings
-        del items_batch, sessions_batch, sessions_items_batch, availabilities_batch
+        _ = fixed_items_features, contexts_features, contexts_items_availabilities
+        _ = contexts_items_features
         if self.weights is None:
             raise ValueError("Model not fitted")
-        return np.stack([np.log(self.weights.numpy())] * len(choices_batch), axis=0)
+        return np.stack([np.log(self.weights.numpy())] * len(choices), axis=0)

From c53222ae0368915a616602f6afe431b6e73705c4 Mon Sep 17 00:00:00 2001
From: VincentAuriau <auriau.vincent@gmail.com>
Date: Fri, 2 Feb 2024 16:09:52 +0100
Subject: [PATCH 04/10] ENH: some cleaning in code

---
 choice_learn/models/rumnet.py | 1436 ++++++++++++++++-----------------
 1 file changed, 718 insertions(+), 718 deletions(-)

diff --git a/choice_learn/models/rumnet.py b/choice_learn/models/rumnet.py
index 6fbe47e8..09f08fa9 100644
--- a/choice_learn/models/rumnet.py
+++ b/choice_learn/models/rumnet.py
@@ -1,502 +1,388 @@
 """Implementation of RUMnet for easy use."""
 import tensorflow as tf
 
+import choice_learn.tf_ops as tf_ops
 from choice_learn.models.base_model import ChoiceModel
-from choice_learn.tf_ops import CustomCategoricalCrossEntropy
 
 
-class PaperRUMnet(ChoiceModel):
-    """Re-Implementation of the RUMnet model.
+def create_ff_network(
+    input_shape, depth, width, activation="elu", add_last=False, l2_regularization_coeff=0.0
+):
+    """Base function to create a simple fully connected (Dense) network.
 
-    Re-implemented from the paper:
-    Representing Random Utility Choice Models with Neural Networks from Ali Aouad and Antoine Désir
-    https://arxiv.org/abs/2207.12877
+    Parameters
+    ----------
+    input_shape : tuple of int
+        shape of the input of the network. Typically (num_features, )
+    depth : int
+        Number of dense/fully-connected of the network to create.
+    width : int
+        Neurons number for all dense layers.
+    add_last : bool, optional
+        Whether to add a Dense layer with a single output at the end, by default False
+        Typically to be used when creating the utility network, that outputs a single number:
+        the utility.
+    l2_regularization_coeff : float, optional
+        Regularization coefficient for Dense layers weights during training, by default 0.0
 
-    Inherits from base_model.ChoiceModel
-    TODO: Verify that all parameters are implemented.
+    Returns:
+    --------
+    tf.keras.Model
+        Dense Neural Network with tensorflow backend.
     """
+    input = tf.keras.layers.Input(shape=input_shape)
+    regularizer = tf.keras.regularizers.L2(l2_regularization_coeff)
+    out = input
+    for _ in range(depth):
+        out = tf.keras.layers.Dense(
+            width, activation=activation, kernel_regularizer=regularizer, use_bias=True
+        )(out)
+    if add_last:
+        out = tf.keras.layers.Dense(1, activation="linear", use_bias=False)(out)
+    return tf.keras.Model(inputs=input, outputs=out)
 
-    def __init__(
-        self,
-        num_products_features,
-        num_customer_features,
-        width_eps_x,
-        depth_eps_x,
-        heterogeneity_x,
-        width_eps_z,
-        depth_eps_z,
-        heterogeneity_z,
-        width_u,
-        depth_u,
-        tol,
-        optimizer,
-        lr,
-        normalize_non_buy=True,
-        logmin=1e-5,
-        l2_regularization_coef=0.0,
-        label_smoothing=0.0,
-        **kwargs,
-    ):
-        """Initiation of the RUMnet Model.
-
-        Parameters
-        ----------
-        num_products_features : int
-            Number of features each product will be described with.
-            In terms of ChoiceDataset it is the number of
-            { items_features + sessions_items_features } for one product.
-        num_customer_features : int
-            Number of features each customer will be described with.
-            In terms of ChoiceDataset it is the number of sessions_features.
-        width_eps_x : int
-            Number of neurons for each dense layer for the products encoding net.
-        depth_eps_x : int
-            Number of dense layers for the products encoding net.
-        heterogeneity_x : int
-            Number of nets of products features encoding.
-        width_eps_z : int
-            Number of neurons for each dense layer for the customers encoding net.
-        depth_eps_z : int
-            Number of dense layers for the customers encoding net.
-        heterogeneity_z : int
-            Number of nets of customers features encoding.
-        width_u : int
-            Number of neurons for each dense layer for the utility net.
-        depth_u : int
-            Number of dense layers for the utility net.
-        tol : float
-            # To be Implemented
-        optimizer : str
-            String representation of the optimizer to use. By default is Adam if not specified.
-            Should be within tf.keras.optimizers.
-        lr : float
-            Starting learning rate to associate with optimizer.
-        normalize_non_buy : bool, optional
-            Whether or not to add exit option with utility 1, by default True
-        logmin : float, optional
-            Value to be added within log computation to avoid infinity, by default 1e-5
-        l2_regularization_coef : float, optional
-            Value of dense layers weights regulariation to apply during training, by default 0.0
-        label_smoothing : float, optional
-            Value of smoothing to apply in CrossEntropy loss computation, by default 0.0
-        """
-        super().__init__(normalize_non_buy=normalize_non_buy, **kwargs)
-        # Number of features
-        self.num_products_features = num_products_features
-        self.num_customer_features = num_customer_features
-
-        # Dimension of encoding nets
-        self.width_eps_x = width_eps_x
-        self.depth_eps_x = depth_eps_x
-        self.heterogeneity_x = heterogeneity_x
-
-        self.width_eps_z = width_eps_z
-        self.depth_eps_z = depth_eps_z
-        self.heterogeneity_z = heterogeneity_z
-
-        # Dimension of utility net
-        self.width_u = width_u
-        self.depth_u = depth_u
-
-        # Optimization parameters
-        self.logmin = logmin
-        self.tol = tol
-        self.lr = lr
-        self.normalize_non_buy = normalize_non_buy
-        self.l2_regularization_coef = l2_regularization_coef
-        self.label_smoothing = label_smoothing
-
-        if optimizer == "Adam":
-            self.optimizer = tf.keras.optimizers.Adam(lr)
-        elif optimizer == "SGD":
-            self.optimizer = tf.keras.optimizers.SGD(lr)
-        elif optimizer == "Adamax":
-            self.optimizer = tf.keras.optimizers.Adamax(lr)
-        else:
-            print(f"Optimizer {optimizer} not implemnted, switching for default Adam")
-            self.optimizer = tf.keras.optimizers.Adam(lr)
-
-        self.instantiated = False
 
-    def instantiate(self):
-        """Instatiation of the RUMnet model.
+def recreate_official_nets(
+    num_products_features,
+    x_width,
+    x_depth,
+    x_eps,
+    num_customer_features,
+    z_width,
+    z_depth,
+    z_eps,
+    width_u,
+    depth_u,
+    l2_regularization_coeff=0.0,
+):
+    """Function to create the three nets used in RUMnet: X_net, Z_net and U_net.
 
-        Creation of :
-            - x_model encoding products features,
-            - z_model encoding customers features,
-            - u_model computing utilities from product, customer features and their embeddings
-        """
-        # Instatiation of the different nets
-        self.x_model, self.z_model, self.u_model = recreate_official_nets(
-            num_products_features=self.num_products_features,
-            num_customer_features=self.num_customer_features,
-            x_width=self.width_eps_x,
-            x_depth=self.depth_eps_x,
-            x_eps=self.heterogeneity_x,
-            z_width=self.width_eps_z,
-            z_depth=self.depth_eps_z,
-            z_eps=self.heterogeneity_z,
-            width_u=self.width_u,
-            depth_u=self.depth_u,
-            l2_regularization_coeff=self.l2_regularization_coef,
-        )
+    Parameters
+    ----------
+    num_products_features : int
+        Number of features each product will be described with.
+        In terms of ChoiceDataset it is the number of { items_features + contexts_items_features }
+        for one product.
+    num_customer_features : int
+        Number of features each customer will be described with.
+        In terms of ChoiceDataset it is the number of contexts_features.
+    width_eps_x : int
+        Number of neurons for each dense layer for the products encoding net.
+    depth_eps_x : int
+        Number of dense layers for the products encoding net.
+    heterogeneity_x : int
+        Number of nets of products features encoding.
+    width_eps_z : int
+        Number of neurons for each dense layer for the customers encoding net.
+    depth_eps_z : int
+        Number of dense layers for the customers encoding net.
+    heterogeneity_z : int
+        Number of nets of customers features encoding.
+    width_u : int
+        Number of neurons for each dense layer for the utility net.
+    depth_u : int
+        Number of dense layers for the utility net.
+    l2_regularization_coef : float, optional
+        Value of dense layers weights regulariation to apply during training, by default 0.0
 
-        # Storing weights for back-propagation
-        self.weights = self.x_model.weights + self.z_model.weights + self.u_model.weights
-        self.loss = CustomCategoricalCrossEntropy(
-            from_logits=False, label_smoothing=self.label_smoothing
-        )
-        self.instantiated = True
+    Returns:
+    --------
+    tf.keras.Model
+        Product features encoding network
+    tf.keras.Model
+        Customer features encoding network
+    tf.keras.Model
+        Features and encoding to utility computation network
+    """
+    # Products and Customers embeddings nets, quiet symmetrical
+    products_input = tf.keras.layers.Input(shape=(num_products_features))
+    customer_input = tf.keras.layers.Input(shape=(num_customer_features))
+    x_embeddings = []
+    z_embeddings = []
 
-    def compute_batch_utility(
-        self,
-        fixed_items_features,
-        contexts_features,
-        contexts_items_features,
-        contexts_items_availabilities,
-        choices,
-    ):
-        """Compute utility from a batch of ChoiceDataset.
+    # Creating independant nets for each heterogeneity
+    for _ in range(x_eps):
+        x_embedding = create_ff_network(
+            input_shape=num_products_features,
+            depth=x_depth,
+            width=x_width,
+            l2_regularization_coeff=l2_regularization_coeff,
+        )(products_input)
+        x_embeddings.append(x_embedding)
 
-        Here we asssume that: item features = {fixed item features + session item features}
-                              user features = {session features}
+    # Creating independant nets for each heterogeneity
+    for _ in range(z_eps):
+        z_embedding = create_ff_network(
+            input_shape=num_customer_features,
+            depth=z_depth,
+            width=z_width,
+            l2_regularization_coeff=l2_regularization_coeff,
+        )(customer_input)
 
-        Parameters
-        ----------
-        fixed_items_features : tuple of np.ndarray (n_items, n_features)
-            Items-Features: formatting from ChoiceDataset: a matrix representing the
-            products fixed features.
-        contexts_features : tuple of np.ndarray (n_contexts, n_features)
-            Contexts-Features: features varying with contexts, shared by all products
-        contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features)
-            Features varying with contexts and products
-        contexts_items_availabilities : np.ndarray (n_contexts, n_items)
-            Availabilities: here for ChoiceModel signature
-        choices :  np.ndarray (n_contexts, )
-            Choices: here for ChoiceModel signature
+        z_embeddings.append(z_embedding)
 
-        Returns:
-        --------
-        np.ndarray
-            Utility of each product for each session.
-            Shape must be (n_sessions, n_items)
-        """
-        (_, _) = contexts_items_availabilities, choices
-        ### Restacking of the item features
-        items_features_batch = tf.concat([*fixed_items_features], axis=-1)
-        session_features_batch = tf.concat([*contexts_features], axis=-1)
-        session_items_features_batch = tf.concat([*contexts_items_features], axis=-1)
+    x_net = tf.keras.Model(inputs=products_input, outputs=x_embeddings, name="X_embedding")
+    z_net = tf.keras.Model(inputs=customer_input, outputs=z_embeddings, name="Z_embedding")
 
-        full_item_features = tf.stack(
-            [items_features_batch] * session_items_features_batch.shape[0], axis=0
-        )
-        full_item_features = tf.concat([session_items_features_batch, full_item_features], axis=-1)
+    # Utility network
+    u_net = create_ff_network(
+        input_shape=(
+            x_width + z_width + num_products_features + num_customer_features
+        ),  # Input shape from previous nets
+        width=width_u,
+        depth=depth_u,
+        add_last=True,  # Add last for utility
+        l2_regularization_coeff=l2_regularization_coeff,
+    )
 
-        ### Computation of utilities
-        utilities = []
+    return x_net, z_net, u_net
 
-        # Computation of the customer features embeddings
-        z_embeddings = self.z_model(session_features_batch)
 
-        # Iterate over items in assortment
-        for item_i in range(full_item_features.shape[1]):
-            # Computation of item features embeddings
-            x_embeddings = self.x_model(full_item_features[:, item_i, :])
+class ParallelDense(tf.keras.layers.Layer):
+    """Layer that represents several Dense layers in Parallel.
 
-            utilities.append([])
+    Parallel means that they have the same input, but then are not intricated and
+    are totally independant from each other.
+    """
 
-            # Computation of utilites from embeddings, iteration over heterogeneities
-            # (eps_x * eps_z)
-            for _x in x_embeddings:
-                for _z in z_embeddings:
-                    _u = tf.keras.layers.Concatenate()(
-                        [full_item_features[:, item_i, :], _x, session_features_batch, _z]
-                    )
-                    utilities[-1].append(self.u_model(_u))
+    def __init__(self, width, depth, heterogeneity, activation="relu", **kwargs):
+        """Instantiation of the layer.
 
-        ### Reshape utilities: (batch_size, num_items, heterogeneity)
-        return tf.transpose(tf.squeeze(tf.stack(utilities, axis=0), -1))
+        Following tf.keras.Layer API. Note that there will be width * depth * heterogeneity
+        number of neurons in the layer.
 
-    @tf.function
-    def train_step(
-        self,
-        fixed_items_features,
-        contexts_features,
-        contexts_items_features,
-        contexts_items_availabilities,
-        choices,
-        sample_weight=None,
-    ):
-        """Modified version of train step, as we have to average probabilities over heterogeneities.
+        Parameters
+        ----------
+        width : int
+            Number of neurons for each dense layer.
+        depth : int
+            Number of neuron layers.
+        heterogeneity : int
+            Number of dense layers that are in parallel
+        activation : str, optional
+            activation function at the end of each layer, by default "relu"
+        """
+        super().__init__(**kwargs)
+        self.width = width
+        self.depth = depth
+        self.heterogeneity = heterogeneity
+        self.activation = tf.keras.layers.Activation(activation)
 
-        Function that represents one training step (= one gradient descent step) of the model.
-        Handles a batch of data of size n_contexts = n_choices = batch_size
+    def build(self, input_shape):
+        """Lazy build of the layer.
 
         Parameters
         ----------
-        fixed_items_features : tuple of np.ndarray (n_items, n_features)
-            Items-Features: formatting from ChoiceDataset: a matrix representing the
-            products fixed features.
-        contexts_features : tuple of np.ndarray (n_contexts, n_features)
-            Contexts-Features: features varying with contexts, shared by all products
-        contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features)
-            Features varying with contexts and products
-        contexts_items_availabilities : np.ndarray (n_contexts, n_items)
-            Availabilities of items
-        choices :  np.ndarray (n_contexts, )
-            Choices
-        sample_weight : np.ndarray, optional
-            List samples weights to apply during the gradient descent to the batch elements,
-            by default None
-
-        Returns:
-        --------
-        tf.Tensor
-            Value of NegativeLogLikelihood loss for the batch
+        input_shape : tuple
+            shape of the input of the layer. Typically (batch_size, num_features).
+            Batch_size (None) is ignored, but num_features is the shape of the input.
         """
-        with tf.GradientTape() as tape:
-            ### Computation of utilities
-            all_u = self.compute_batch_utility(
-                fixed_items_features=fixed_items_features,
-                contexts_features=contexts_features,
-                contexts_items_features=contexts_items_features,
-                contexts_items_availabilities=contexts_items_availabilities,
-                choices=choices,
-            )
-            probabilities = []
-
-            # Iterate over heterogeneities
-            # for i in range(all_u.shape[2]):
-            # Assortment(t) Utility
-            # eps_probabilities = availability_softmax(all_u[:, :, i], ia_batch, axis=2)
-            eps_probabilities = tf.nn.softmax(all_u, axis=1)
-            # probabilities.append(eps_probabilities)
-
-            # Average probabilities over heterogeneities
-            probabilities = tf.reduce_mean(eps_probabilities, axis=-1)
+        super().build(input_shape)
 
-            # It is not in the paper, but let's normalize with availabilities
-            probabilities = tf.multiply(probabilities, contexts_items_availabilities)
-            probabilities = tf.divide(
-                probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5
+        weights = [
+            (
+                self.add_weight(
+                    shape=(input_shape[-1], self.width, self.heterogeneity),
+                    initializer="glorot_normal",
+                    trainable=True,
+                ),
+                self.add_weight(
+                    shape=(self.width, self.heterogeneity),
+                    initializer="glorot_normal",
+                    trainable=True,
+                ),
             )
-
-            # Probabilities of selected products
-            # chosen_probabilities = tf.gather_nd(indices=choices_nd, params=probabilities)
-
-            # Negative Log-Likelihood
-            batch_nll = self.loss(
-                y_pred=probabilities,
-                y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
-                sample_weight=sample_weight,
+        ]
+        for i in range(self.depth - 1):
+            weights.append(
+                (
+                    self.add_weight(
+                        shape=(self.width, self.width, self.heterogeneity),
+                        initializer="glorot_normal",
+                        trainable=True,
+                    ),
+                    self.add_weight(
+                        shape=(self.width, self.heterogeneity),
+                        initializer="glorot_normal",
+                        trainable=True,
+                    ),
+                )
             )
-            # nll = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)(
-            #     y_pred=probabilities, y_true=c_batch
-            # )
-            # nll = -tf.reduce_sum(tf.math.log(chosen_probabilities + self.logmin))
 
-        grads = tape.gradient(batch_nll, self.weights)
-        self.optimizer.apply_gradients(zip(grads, self.weights))
-        return batch_nll
+        self.w = weights
 
-    @tf.function
-    def batch_predict(
-        self,
-        fixed_items_features,
-        contexts_features,
-        contexts_items_features,
-        contexts_items_availabilities,
-        choices,
-        sample_weight=None,
-    ):
-        """Function that represents one prediction (Probas + Loss) for one batch of a ChoiceDataset.
+    def call(self, inputs):
+        """Predict of the layer.
 
-        Specific version for RUMnet because it is needed to average probabilities over
-        heterogeneities.
+        Follows tf.keras.Layer API.
 
         Parameters
         ----------
-        fixed_items_features : tuple of np.ndarray (n_items, n_features)
-            Items-Features: formatting from ChoiceDataset: a matrix representing the
-            products fixed features.
-        contexts_features : tuple of np.ndarray (n_contexts, n_features)
-            Contexts-Features: features varying with contexts, shared by all products
-        contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features)
-            Features varying with contexts and products
-        contexts_items_availabilities : np.ndarray (n_contexts, n_items)
-            Availabilities of items
-        choices :  np.ndarray (n_contexts, )
-            Choices
-        sample_weight : np.ndarray, optional
-            List samples weights to apply during the gradient descent to the batch elements,
-            by default None
+        inputs : tf.Tensor, np.ndarray
+            Tensor of shape (batch_size, n_features) as input of the model.
 
         Returns:
         --------
-        tf.Tensor (1, )
-            Value of NegativeLogLikelihood loss for the batch
-        tf.Tensor (batch_size, n_items)
-            Probabilities for each product to be chosen for each session
+        outputs
+            tensor of shape (batch_size, width, heterogeneity)
         """
-        utilities = self.compute_batch_utility(
-            fixed_items_features=fixed_items_features,
-            contexts_features=contexts_features,
-            contexts_items_features=contexts_items_features,
-            contexts_items_availabilities=contexts_items_availabilities,
-            choices=choices,
-        )
-        probabilities = tf.nn.softmax(utilities, axis=1)
-        probabilities = tf.reduce_mean(probabilities, axis=-1)
+        outputs = tf.tensordot(inputs, self.w[0][0], axes=1) + self.w[0][1]
+        outputs = self.activation(outputs)
+        # tf.nn.bias_add(y, weights[0][1], data_format="NC...")
 
-        # Normalization with availabilties
-        probabilities = tf.multiply(probabilities, contexts_items_availabilities)
-        probabilities = tf.divide(
-            probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5
-        )
-        batch_nll = self.loss(
-            y_pred=probabilities,
-            y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
-            sample_weight=sample_weight,
-        )
-        return batch_nll, probabilities
+        for w, b in self.w[1:]:
+            outputs = tf.einsum("ijk,jlk->ilk", outputs, w) + b
+            outputs = self.activation(outputs)
 
+        return outputs
 
-class CPURUMnet(PaperRUMnet):
-    """CPU-optimized Re-Implementation of the RUMnet model.
 
-    This implementation handles in parallel the heterogeneities so that the training is faster.
+class AssortmentParallelDense(tf.keras.layers.Layer):
+    """Several Dense layers in Parallel applied to an Assortment.
+
+    Parallel means that they have the same input, but then are not intricated and
+    are totally independant from each other. The layer applies the same Dense layers
+    to an assortment of items.
     """
 
-    def compute_batch_utility(
-        self,
-        fixed_items_features,
-        contexts_features,
-        contexts_items_features,
-        contexts_items_availabilities,
-        choices,
-    ):
-        """Compute utility from a batch of ChoiceDataset.
+    def __init__(self, width, depth, heterogeneity, activation="relu", **kwargs):
+        """Inialization of the layer.
+
+        Parameters
+        ----------
+        width : int
+            Number of neurons of each dense layer.
+        depth : int
+            Number of dense layers
+        heterogeneity : int
+            Number of dense networks in parallel.
+        activation : str, optional
+            activation function of each dense, by default "relu"
+        """
+        super().__init__(**kwargs)
+        self.width = width
+        self.depth = depth
+        self.heterogeneity = heterogeneity
+        self.activation = tf.keras.layers.Activation(activation)
+
+    def build(self, input_shape):
+        """Lazy build of the layer.
 
-        Here we asssume that: item features = {fixed item features + session item features}
-                              user features = {session features}
+        Follows tf.keras API.
 
         Parameters
         ----------
-        fixed_items_features : tuple of np.ndarray (n_items, n_features)
-            Items-Features: formatting from ChoiceDataset: a matrix representing the
-            products fixed features.
-        contexts_features : tuple of np.ndarray (n_contexts, n_features)
-            Contexts-Features: features varying with contexts, shared by all products
-        contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features)
-            Features varying with contexts and products
-        contexts_items_availabilities : np.ndarray (n_contexts, n_items)
-            Availabilities of items
-        choices :  np.ndarray (n_contexts, )
-            Choices
+        input_shape : tuple
+            Shape of the input of the layer.
+            Typically (batch_size, num_items, num_features).
+        """
+        super().build(input_shape)
+
+        weights = [
+            (
+                self.add_weight(
+                    shape=(input_shape[-1], self.width, self.heterogeneity),
+                    initializer="glorot_normal",
+                    trainable=True,
+                ),
+                self.add_weight(
+                    shape=(self.width, self.heterogeneity),
+                    initializer="glorot_normal",
+                    trainable=True,
+                ),
+            )
+        ]
+        for i in range(self.depth - 1):
+            weights.append(
+                (
+                    self.add_weight(
+                        shape=(self.width, self.width, self.heterogeneity),
+                        initializer="glorot_normal",
+                        trainable=True,
+                    ),
+                    self.add_weight(
+                        shape=(self.width, self.heterogeneity),
+                        initializer="glorot_normal",
+                        trainable=True,
+                    ),
+                )
+            )
+
+        self.w = weights
+
+    def call(self, inputs):
+        """Predict of the layer.
+
+        Follows tf.keras.Layer API.
+
+        Parameters
+        ----------
+        inputs : tf.Tensor, np.ndarray
+            Tensor of shape (batch_size, n_items, n_features) as input of the model.
 
         Returns:
         --------
-        np.ndarray
-            Utility of each product for each session.
-            Shape must be (n_sessions, n_items)
+        tf.Tensor
+            Embeddings of shape (batch_size, n_items, width, heterogeneity)
         """
-        (_, _) = contexts_items_availabilities, choices
-        ### Restacking of the item features
-        stacked_fixed_items_features = tf.concat([*fixed_items_features], axis=-1)
-        stacked_contexts_features = tf.concat([*contexts_features], axis=-1)
-        stacked_contexts_items_features = tf.concat([*contexts_items_features], axis=-1)
-
-        full_item_features = tf.stack(
-            [stacked_fixed_items_features] * stacked_contexts_items_features.shape[0], axis=0
-        )
-        full_item_features = tf.concat(
-            [stacked_contexts_items_features, full_item_features], axis=-1
-        )
-
-        ### Computation of utilities
-        utilities = []
-        batch_size = stacked_contexts_features.shape[0]
-
-        # Computation of the customer features embeddings
-        z_embeddings = self.z_model(stacked_contexts_features)
+        outputs = tf.tensordot(inputs, self.w[0][0], axes=[[2], [0]]) + self.w[0][1]
+        outputs = self.activation(outputs)
 
-        # Iterate over items in assortment
-        for item_i in range(full_item_features.shape[1]):
-            # Computation of item features embeddings
-            x_embeddings = self.x_model(full_item_features[:, item_i, :])
+        for w, b in self.w[1:]:
+            outputs = tf.einsum("imjk,jlk->imlk", outputs, w) + b
+            outputs = self.activation(outputs)
 
-            stacked_heterogeneities = []
-            # Computation of utilites from embeddings, iteration over heterogeneities
-            # eps_x * eps_z
-            for _x in x_embeddings:
-                for _z in z_embeddings:
-                    full_embedding = tf.keras.layers.Concatenate()(
-                        [full_item_features[:, item_i, :], _x, stacked_contexts_features, _z]
-                    )
-                    stacked_heterogeneities.append(full_embedding)
-            item_utilities = self.u_model(tf.concat(stacked_heterogeneities, axis=0))
-            item_utilities = tf.stack(
-                [
-                    item_utilities[batch_size * i : batch_size * (i + 1)]
-                    for i in range(len(x_embeddings) * len(z_embeddings))
-                ],
-                axis=1,
-            )
-            utilities.append(item_utilities)
-        ### Reshape utilities: (batch_size, num_items, heterogeneity)
-        return tf.squeeze(tf.stack(utilities, axis=1), -1)
+        return outputs
 
 
-class ParallelDense(tf.keras.layers.Layer):
-    """Layer that represents several Dense layers in Parallel.
+class AssortmentUtilityDenseNetwork(tf.keras.layers.Layer):
+    """Dense Network that is applied to an assortment of items.
 
-    Parallel means that they have the same input, but then are not intricated and
-    are totally independant from each other.
+    We apply to the same network over several items and several heterogeneitites.
     """
 
-    def __init__(self, width, depth, heterogeneity, activation="relu", **kwargs):
-        """Instantiation of the layer.
-
-        Following tf.keras.Layer API. Note that there will be width * depth * heterogeneity
-        number of neurons in the layer.
+    def __init__(self, width, depth, activation="relu", add_last=True, **kwargs):
+        """Initialization of the layer.
 
         Parameters
         ----------
         width : int
-            Number of neurons for each dense layer.
+            Nnumber of neurons of each dense layer.
         depth : int
-            Number of neuron layers.
-        heterogeneity : int
-            Number of dense layers that are in parallel
+            Number of dense layers.
         activation : str, optional
-            activation function at the end of each layer, by default "relu"
+            Activation function for each layer, by default "relu"
+        add_last : bool, optional
+            Whether to add a final dense layer with 1 neuron, by default True
         """
         super().__init__(**kwargs)
         self.width = width
         self.depth = depth
-        self.heterogeneity = heterogeneity
         self.activation = tf.keras.layers.Activation(activation)
+        self.add_last = add_last
 
     def build(self, input_shape):
         """Lazy build of the layer.
 
+        Follows tf.keras.Layer API.
+
         Parameters
         ----------
         input_shape : tuple
-            shape of the input of the layer. Typically (batch_size, num_features).
-            Batch_size (None) is ignored, but num_features is the shape of the input.
+            Shape of the input of the layer.
+            Typically (batch_size, num_items, width, heterogeneity).
         """
         super().build(input_shape)
 
         weights = [
             (
                 self.add_weight(
-                    shape=(input_shape[-1], self.width, self.heterogeneity),
+                    shape=(input_shape[-2], self.width),
                     initializer="glorot_normal",
                     trainable=True,
                 ),
                 self.add_weight(
-                    shape=(self.width, self.heterogeneity),
+                    shape=(self.width, 1),
                     initializer="glorot_normal",
                     trainable=True,
                 ),
@@ -506,243 +392,488 @@ def build(self, input_shape):
             weights.append(
                 (
                     self.add_weight(
-                        shape=(self.width, self.width, self.heterogeneity),
+                        shape=(self.width, self.width),
                         initializer="glorot_normal",
                         trainable=True,
                     ),
                     self.add_weight(
-                        shape=(self.width, self.heterogeneity),
+                        shape=(self.width, 1),
                         initializer="glorot_normal",
                         trainable=True,
                     ),
                 )
             )
+        if self.add_last:
+            self.last = self.add_weight(
+                shape=(self.width, 1), initializer="glorot_normal", trainable=True
+            )
 
         self.w = weights
 
     def call(self, inputs):
         """Predict of the layer.
 
-        Follows tf.keras.Layer API.
-
         Parameters
         ----------
         inputs : tf.Tensor, np.ndarray
-            Tensor of shape (batch_size, n_features) as input of the model.
+            Input Tensor of shape (batch_size, num_items, width, heterogeneity)
 
         Returns:
         --------
-        outputs
-            tensor of shape (batch_size, width, heterogeneity)
+        tf.Tensor
+            Utilities of shape (batch_size, num_items, heterogeneity)
         """
-        outputs = tf.tensordot(inputs, self.w[0][0], axes=1) + self.w[0][1]
-        outputs = self.activation(outputs)
-        # tf.nn.bias_add(y, weights[0][1], data_format="NC...")
+        outputs = inputs
 
-        for w, b in self.w[1:]:
-            outputs = tf.einsum("ijk,jlk->ilk", outputs, w) + b
+        for w, b in self.w:
+            # bs, items, features, heterogeneities
+            outputs = tf.einsum("ijlk, lm->ijmk", outputs, w) + b
             outputs = self.activation(outputs)
 
+        if self.add_last:
+            outputs = tf.einsum("ijlk, lm->ijmk", outputs, self.last)
+
         return outputs
 
 
-class AssortmentParallelDense(tf.keras.layers.Layer):
-    """Several Dense layers in Parallel applied to an Assortment.
+class PaperRUMnet(ChoiceModel):
+    """Re-Implementation of the RUMnet model.
+
+    Re-implemented from the paper:
+    Representing Random Utility Choice Models with Neural Networks from Ali Aouad and Antoine Désir
+    https://arxiv.org/abs/2207.12877
+
+    Inherits from base_model.ChoiceModel
+    TODO: Verify that all parameters are implemented.
+    """
+
+    def __init__(
+        self,
+        num_products_features,
+        num_customer_features,
+        width_eps_x,
+        depth_eps_x,
+        heterogeneity_x,
+        width_eps_z,
+        depth_eps_z,
+        heterogeneity_z,
+        width_u,
+        depth_u,
+        tol,
+        optimizer,
+        lr,
+        normalize_non_buy=False,
+        logmin=1e-5,
+        l2_regularization_coef=0.0,
+        label_smoothing=0.0,
+        **kwargs,
+    ):
+        """Initiation of the RUMnet Model.
+
+        Parameters
+        ----------
+        num_products_features : int
+            Number of features each product will be described with.
+            In terms of ChoiceDataset it is the number of
+            { items_features + contexts_items_features } for one product.
+        num_customer_features : int
+            Number of features each customer will be described with.
+            In terms of ChoiceDataset it is the number of contexts_features.
+        width_eps_x : int
+            Number of neurons for each dense layer for the products encoding net.
+        depth_eps_x : int
+            Number of dense layers for the products encoding net.
+        heterogeneity_x : int
+            Number of nets of products features encoding.
+        width_eps_z : int
+            Number of neurons for each dense layer for the customers encoding net.
+        depth_eps_z : int
+            Number of dense layers for the customers encoding net.
+        heterogeneity_z : int
+            Number of nets of customers features encoding.
+        width_u : int
+            Number of neurons for each dense layer for the utility net.
+        depth_u : int
+            Number of dense layers for the utility net.
+        tol : float
+            # To be Implemented
+        optimizer : str
+            String representation of the optimizer to use. By default is Adam if not specified.
+            Should be within tf.keras.optimizers.
+        lr : float
+            Starting learning rate to associate with optimizer.
+        normalize_non_buy : bool, optional
+            Whether or not to add exit option with utility 1, by default True
+        logmin : float, optional
+            Value to be added within log computation to avoid infinity, by default 1e-5
+        l2_regularization_coef : float, optional
+            Value of dense layers weights regulariation to apply during training, by default 0.0
+        label_smoothing : float, optional
+            Value of smoothing to apply in CrossEntropy loss computation, by default 0.0
+        """
+        super().__init__(normalize_non_buy=normalize_non_buy, **kwargs)
+        # Number of features
+        self.num_products_features = num_products_features
+        self.num_customer_features = num_customer_features
+
+        # Dimension of encoding nets
+        self.width_eps_x = width_eps_x
+        self.depth_eps_x = depth_eps_x
+        self.heterogeneity_x = heterogeneity_x
+
+        self.width_eps_z = width_eps_z
+        self.depth_eps_z = depth_eps_z
+        self.heterogeneity_z = heterogeneity_z
+
+        # Dimension of utility net
+        self.width_u = width_u
+        self.depth_u = depth_u
+
+        # Optimization parameters
+        self.logmin = logmin
+        self.tol = tol
+        self.lr = lr
+        self.normalize_non_buy = normalize_non_buy
+        self.l2_regularization_coef = l2_regularization_coef
+        self.label_smoothing = label_smoothing
+
+        if optimizer == "Adam":
+            self.optimizer = tf.keras.optimizers.Adam(lr)
+        elif optimizer == "SGD":
+            self.optimizer = tf.keras.optimizers.SGD(lr)
+        elif optimizer == "Adamax":
+            self.optimizer = tf.keras.optimizers.Adamax(lr)
+        else:
+            print(f"Optimizer {optimizer} not implemnted, switching for default Adam")
+            self.optimizer = tf.keras.optimizers.Adam(lr)
+
+        self.instantiated = False
+
+    def instantiate(self):
+        """Instatiation of the RUMnet model.
+
+        Creation of :
+            - x_model encoding products features,
+            - z_model encoding customers features,
+            - u_model computing utilities from product, customer features and their embeddings
+        """
+        # Instatiation of the different nets
+        self.x_model, self.z_model, self.u_model = recreate_official_nets(
+            num_products_features=self.num_products_features,
+            num_customer_features=self.num_customer_features,
+            x_width=self.width_eps_x,
+            x_depth=self.depth_eps_x,
+            x_eps=self.heterogeneity_x,
+            z_width=self.width_eps_z,
+            z_depth=self.depth_eps_z,
+            z_eps=self.heterogeneity_z,
+            width_u=self.width_u,
+            depth_u=self.depth_u,
+            l2_regularization_coeff=self.l2_regularization_coef,
+        )
+
+        # Storing weights for back-propagation
+        self.weights = self.x_model.weights + self.z_model.weights + self.u_model.weights
+        self.loss = tf_ops.CustomCategoricalCrossEntropy(
+            from_logits=False,
+            label_smoothing=self.label_smoothing,
+            epsilon=self.logmin,
+        )
+        self.instantiated = True
 
-    Parallel means that they have the same input, but then are not intricated and
-    are totally independant from each other. The layer applies the same Dense layers
-    to an assortment of items.
-    """
+    def compute_batch_utility(
+        self,
+        fixed_items_features,
+        contexts_features,
+        contexts_items_features,
+        contexts_items_availabilities,
+        choices,
+    ):
+        """Compute utility from a batch of ChoiceDataset.
 
-    def __init__(self, width, depth, heterogeneity, activation="relu", **kwargs):
-        """Inialization of the layer.
+        Here we asssume that: item features = {fixed item features + contexts item features}
+                              user features = {contexts features}
 
         Parameters
         ----------
-        width : int
-            Number of neurons of each dense layer.
-        depth : int
-            Number of dense layers
-        heterogeneity : int
-            Number of dense networks in parallel.
-        activation : str, optional
-            activation function of each dense, by default "relu"
+        fixed_items_features : tuple of np.ndarray (n_items, n_features)
+            Items-Features: formatting from ChoiceDataset: a matrix representing the
+            products fixed features.
+        contexts_features : tuple of np.ndarray (n_contexts, n_features)
+            Contexts-Features: features varying with contexts, shared by all products
+        contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features)
+            Features varying with contexts and products
+        contexts_items_availabilities : np.ndarray (n_contexts, n_items)
+            Availabilities: here for ChoiceModel signature
+        choices :  np.ndarray (n_contexts, )
+            Choices: here for ChoiceModel signature
+
+        Returns:
+        --------
+        np.ndarray
+            Utility of each product for each contexts.
+            Shape must be (n_contexts, n_items)
         """
-        super().__init__(**kwargs)
-        self.width = width
-        self.depth = depth
-        self.heterogeneity = heterogeneity
-        self.activation = tf.keras.layers.Activation(activation)
+        (_, _) = contexts_items_availabilities, choices
+        ### Restacking of the item features
+        items_features_batch = tf.concat([*fixed_items_features], axis=-1)
+        contexts_features_batch = tf.concat([*contexts_features], axis=-1)
+        contexts_items_features_batch = tf.concat([*contexts_items_features], axis=-1)
 
-    def build(self, input_shape):
-        """Lazy build of the layer.
+        full_item_features = tf.stack(
+            [items_features_batch] * contexts_items_features_batch.shape[0], axis=0
+        )
+        full_item_features = tf.concat([contexts_items_features_batch, full_item_features], axis=-1)
 
-        Follows tf.keras API.
+        ### Computation of utilities
+        utilities = []
 
-        Parameters
-        ----------
-        input_shape : tuple
-            Shape of the input of the layer.
-            Typically (batch_size, num_items, num_features).
-        """
-        super().build(input_shape)
+        # Computation of the customer features embeddings
+        z_embeddings = self.z_model(contexts_features_batch)
 
-        weights = [
-            (
-                self.add_weight(
-                    shape=(input_shape[-1], self.width, self.heterogeneity),
-                    initializer="glorot_normal",
-                    trainable=True,
-                ),
-                self.add_weight(
-                    shape=(self.width, self.heterogeneity),
-                    initializer="glorot_normal",
-                    trainable=True,
-                ),
-            )
-        ]
-        for i in range(self.depth - 1):
-            weights.append(
-                (
-                    self.add_weight(
-                        shape=(self.width, self.width, self.heterogeneity),
-                        initializer="glorot_normal",
-                        trainable=True,
-                    ),
-                    self.add_weight(
-                        shape=(self.width, self.heterogeneity),
-                        initializer="glorot_normal",
-                        trainable=True,
-                    ),
-                )
-            )
+        # Iterate over items in assortment
+        for item_i in range(full_item_features.shape[1]):
+            # Computation of item features embeddings
+            x_embeddings = self.x_model(full_item_features[:, item_i, :])
 
-        self.w = weights
+            utilities.append([])
 
-    def call(self, inputs):
-        """Predict of the layer.
+            # Computation of utilites from embeddings, iteration over heterogeneities
+            # (eps_x * eps_z)
+            for _x in x_embeddings:
+                for _z in z_embeddings:
+                    _u = tf.keras.layers.Concatenate()(
+                        [full_item_features[:, item_i, :], _x, contexts_features_batch, _z]
+                    )
+                    utilities[-1].append(self.u_model(_u))
 
-        Follows tf.keras.Layer API.
+        ### Reshape utilities: (batch_size, num_items, heterogeneity)
+        return tf.transpose(tf.squeeze(tf.stack(utilities, axis=0), -1))
+
+    @tf.function
+    def train_step(
+        self,
+        fixed_items_features,
+        contexts_features,
+        contexts_items_features,
+        contexts_items_availabilities,
+        choices,
+        sample_weight=None,
+    ):
+        """Modified version of train step, as we have to average probabilities over heterogeneities.
+
+        Function that represents one training step (= one gradient descent step) of the model.
+        Handles a batch of data of size n_contexts = n_choices = batch_size
 
         Parameters
         ----------
-        inputs : tf.Tensor, np.ndarray
-            Tensor of shape (batch_size, n_items, n_features) as input of the model.
+        fixed_items_features : tuple of np.ndarray (n_items, n_features)
+            Items-Features: formatting from ChoiceDataset: a matrix representing the
+            products fixed features.
+        contexts_features : tuple of np.ndarray (n_contexts, n_features)
+            Contexts-Features: features varying with contexts, shared by all products
+        contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features)
+            Features varying with contexts and products
+        contexts_items_availabilities : np.ndarray (n_contexts, n_items)
+            Availabilities of items
+        choices :  np.ndarray (n_contexts, )
+            Choices
+        sample_weight : np.ndarray, optional
+            List samples weights to apply during the gradient descent to the batch elements,
+            by default None
 
         Returns:
         --------
         tf.Tensor
-            Embeddings of shape (batch_size, n_items, width, heterogeneity)
+            Value of NegativeLogLikelihood loss for the batch
         """
-        outputs = tf.tensordot(inputs, self.w[0][0], axes=[[2], [0]]) + self.w[0][1]
-        outputs = self.activation(outputs)
+        with tf.GradientTape() as tape:
+            ### Computation of utilities
+            all_u = self.compute_batch_utility(
+                fixed_items_features=fixed_items_features,
+                contexts_features=contexts_features,
+                contexts_items_features=contexts_items_features,
+                contexts_items_availabilities=contexts_items_availabilities,
+                choices=choices,
+            )
+            probabilities = []
 
-        for w, b in self.w[1:]:
-            outputs = tf.einsum("imjk,jlk->imlk", outputs, w) + b
-            outputs = self.activation(outputs)
+            # Iterate over heterogeneities
+            eps_probabilities = tf.nn.softmax(all_u, axis=1)
 
-        return outputs
+            # Average probabilities over heterogeneities
+            probabilities = tf.reduce_mean(eps_probabilities, axis=-1)
 
+            # It is not in the paper, but let's normalize with availabilities
+            probabilities = tf.multiply(probabilities, contexts_items_availabilities)
+            probabilities = tf.divide(
+                probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5
+            )
+            if self.tol > 0:
+                probabilities = (1 - self.tol) * probabilities + self.tol * tf.ones_like(
+                    probabilities
+                ) / probabilities.shape[-1]
 
-class AssortmentUtilityDenseNetwork(tf.keras.layers.Layer):
-    """Dense Network that is applied to an assortment of items.
+            # Probabilities of selected products
 
-    We apply to the same network over several items and several heterogeneitites.
-    """
+            # Negative Log-Likelihood
+            batch_nll = self.loss(
+                y_pred=probabilities,
+                y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
+                sample_weight=sample_weight,
+            )
 
-    def __init__(self, width, depth, activation="relu", add_last=True, **kwargs):
-        """Initialization of the layer.
+        grads = tape.gradient(batch_nll, self.weights)
+        self.optimizer.apply_gradients(zip(grads, self.weights))
+        return batch_nll
+
+    @tf.function
+    def batch_predict(
+        self,
+        fixed_items_features,
+        contexts_features,
+        contexts_items_features,
+        contexts_items_availabilities,
+        choices,
+        sample_weight=None,
+    ):
+        """Function that represents one prediction (Probas + Loss) for one batch of a ChoiceDataset.
+
+        Specific version for RUMnet because it is needed to average probabilities over
+        heterogeneities.
 
         Parameters
         ----------
-        width : int
-            Nnumber of neurons of each dense layer.
-        depth : int
-            Number of dense layers.
-        activation : str, optional
-            Activation function for each layer, by default "relu"
-        add_last : bool, optional
-            Whether to add a final dense layer with 1 neuron, by default True
+        fixed_items_features : tuple of np.ndarray (n_items, n_features)
+            Items-Features: formatting from ChoiceDataset: a matrix representing the
+            products fixed features.
+        contexts_features : tuple of np.ndarray (n_contexts, n_features)
+            Contexts-Features: features varying with contexts, shared by all products
+        contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features)
+            Features varying with contexts and products
+        contexts_items_availabilities : np.ndarray (n_contexts, n_items)
+            Availabilities of items
+        choices :  np.ndarray (n_contexts, )
+            Choices
+        sample_weight : np.ndarray, optional
+            List samples weights to apply during the gradient descent to the batch elements,
+            by default None
+
+        Returns:
+        --------
+        tf.Tensor (1, )
+            Value of NegativeLogLikelihood loss for the batch
+        tf.Tensor (batch_size, n_items)
+            Probabilities for each product to be chosen for each contexts
         """
-        super().__init__(**kwargs)
-        self.width = width
-        self.depth = depth
-        self.activation = tf.keras.layers.Activation(activation)
-        self.add_last = add_last
+        utilities = self.compute_batch_utility(
+            fixed_items_features=fixed_items_features,
+            contexts_features=contexts_features,
+            contexts_items_features=contexts_items_features,
+            contexts_items_availabilities=contexts_items_availabilities,
+            choices=choices,
+        )
+        probabilities = tf.nn.softmax(utilities, axis=1)
+        probabilities = tf.reduce_mean(probabilities, axis=-1)
 
-    def build(self, input_shape):
-        """Lazy build of the layer.
+        # Normalization with availabilties
+        probabilities = tf.multiply(probabilities, contexts_items_availabilities)
+        probabilities = tf.divide(
+            probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5
+        )
+
+        batch_nll = self.loss(
+            y_pred=probabilities,
+            y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
+            sample_weight=sample_weight,
+        )
+        return batch_nll, probabilities
 
-        Follows tf.keras.Layer API.
 
-        Parameters
-        ----------
-        input_shape : tuple
-            Shape of the input of the layer.
-            Typically (batch_size, num_items, width, heterogeneity).
-        """
-        super().build(input_shape)
+class CPURUMnet(PaperRUMnet):
+    """CPU-optimized Re-Implementation of the RUMnet model.
 
-        weights = [
-            (
-                self.add_weight(
-                    shape=(input_shape[-2], self.width),
-                    initializer="glorot_normal",
-                    trainable=True,
-                ),
-                self.add_weight(
-                    shape=(self.width, 1),
-                    initializer="glorot_normal",
-                    trainable=True,
-                ),
-            )
-        ]
-        for i in range(self.depth - 1):
-            weights.append(
-                (
-                    self.add_weight(
-                        shape=(self.width, self.width),
-                        initializer="glorot_normal",
-                        trainable=True,
-                    ),
-                    self.add_weight(
-                        shape=(self.width, 1),
-                        initializer="glorot_normal",
-                        trainable=True,
-                    ),
-                )
-            )
-        if self.add_last:
-            self.last = self.add_weight(
-                shape=(self.width, 1), initializer="glorot_normal", trainable=True
-            )
+    This implementation handles in parallel the heterogeneities so that the training is faster.
+    """
 
-        self.w = weights
+    def compute_batch_utility(
+        self,
+        fixed_items_features,
+        contexts_features,
+        contexts_items_features,
+        contexts_items_availabilities,
+        choices,
+    ):
+        """Compute utility from a batch of ChoiceDataset.
 
-    def call(self, inputs):
-        """Predict of the layer.
+        Here we asssume that: item features = {fixed item features + contexts item features}
+                              user features = {contexts features}
 
         Parameters
         ----------
-        inputs : tf.Tensor, np.ndarray
-            Input Tensor of shape (batch_size, num_items, width, heterogeneity)
+        fixed_items_features : tuple of np.ndarray (n_items, n_features)
+            Items-Features: formatting from ChoiceDataset: a matrix representing the
+            products fixed features.
+        contexts_features : tuple of np.ndarray (n_contexts, n_features)
+            Contexts-Features: features varying with contexts, shared by all products
+        contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features)
+            Features varying with contexts and products
+        contexts_items_availabilities : np.ndarray (n_contexts, n_items)
+            Availabilities of items
+        choices :  np.ndarray (n_contexts, )
+            Choices
 
         Returns:
         --------
-        tf.Tensor
-            Utilities of shape (batch_size, num_items, heterogeneity)
+        np.ndarray
+            Utility of each product for each contexts.
+            Shape must be (n_contexts, n_items)
         """
-        outputs = inputs
+        (_, _) = contexts_items_availabilities, choices
+        ### Restacking of the item features
+        stacked_fixed_items_features = tf.concat([*fixed_items_features], axis=-1)
+        stacked_contexts_features = tf.concat([*contexts_features], axis=-1)
+        stacked_contexts_items_features = tf.concat([*contexts_items_features], axis=-1)
 
-        for w, b in self.w:
-            # bs, items, features, heterogeneities
-            outputs = tf.einsum("ijlk, lm->ijmk", outputs, w) + b
-            outputs = self.activation(outputs)
+        full_item_features = tf.stack(
+            [stacked_fixed_items_features] * stacked_contexts_items_features.shape[0], axis=0
+        )
+        full_item_features = tf.concat(
+            [stacked_contexts_items_features, full_item_features], axis=-1
+        )
 
-        if self.add_last:
-            outputs = tf.einsum("ijlk, lm->ijmk", outputs, self.last)
+        ### Computation of utilities
+        utilities = []
+        batch_size = stacked_contexts_features.shape[0]
 
-        return outputs
+        # Computation of the customer features embeddings
+        z_embeddings = self.z_model(stacked_contexts_features)
+
+        # Iterate over items in assortment
+        for item_i in range(full_item_features.shape[1]):
+            # Computation of item features embeddings
+            x_embeddings = self.x_model(full_item_features[:, item_i, :])
+
+            stacked_heterogeneities = []
+            # Computation of utilites from embeddings, iteration over heterogeneities
+            # eps_x * eps_z
+            for _x in x_embeddings:
+                for _z in z_embeddings:
+                    full_embedding = tf.keras.layers.Concatenate()(
+                        [full_item_features[:, item_i, :], _x, stacked_contexts_features, _z]
+                    )
+                    stacked_heterogeneities.append(full_embedding)
+            item_utilities = self.u_model(tf.concat(stacked_heterogeneities, axis=0))
+            item_utilities = tf.stack(
+                [
+                    item_utilities[batch_size * i : batch_size * (i + 1)]
+                    for i in range(len(x_embeddings) * len(z_embeddings))
+                ],
+                axis=1,
+            )
+            utilities.append(item_utilities)
+        ### Reshape utilities: (batch_size, num_items, heterogeneity)
+        return tf.squeeze(tf.stack(utilities, axis=1), -1)
 
 
 class GPURUMnet(PaperRUMnet):
@@ -777,7 +908,7 @@ def instantiate(self):
             + self.z_model.trainable_variables
             + self.u_model.trainable_variables
         )
-        self.loss = CustomCategoricalCrossEntropy(
+        self.loss = tf_ops.CustomCategoricalCrossEntropy(
             from_logits=False, label_smoothing=self.label_smoothing
         )
         self.time_dict = {}
@@ -793,8 +924,8 @@ def compute_batch_utility(
     ):
         """Compute utility from a batch of ChoiceDataset.
 
-        Here we asssume that: item features = {fixed item features + session item features}
-                              user features = {session features}
+        Here we asssume that: item features = {fixed item features + contexts item features}
+                              user features = {contexts features}
 
         Parameters
         ----------
@@ -813,8 +944,8 @@ def compute_batch_utility(
         Returns:
         --------
         np.ndarray
-            Utility of each product for each session.
-            Shape must be (n_sessions, n_items)
+            Utility of each product for each contexts.
+            Shape must be (n_contexts, n_items)
         """
         (_, _) = contexts_items_availabilities, choices
 
@@ -888,12 +1019,12 @@ def train_step(
         items_batch : tuple of np.ndarray (items_features)
             Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
             constant features.
-        sessions_batch : tuple of np.ndarray (sessions_features)
+        contexts_batch : tuple of np.ndarray (contexts_features)
             Time-Features
-        sessions_items_batch : tuple of np.ndarray (sessions_items_features)
+        contexts_items_batch : tuple of np.ndarray (contexts_items_features)
             Time-Item-Features
         availabilities_batch : np.ndarray
-            Availabilities (sessions_items_availabilities)
+            Availabilities (contexts_items_availabilities)
         choices_batch : np.ndarray
             Choices
         sample_weight : np.ndarray, optional
@@ -907,14 +1038,14 @@ def train_step(
         """
         with tf.GradientTape() as tape:
             ### Computation of utilities
-            all_u = self.compute_batch_utility(
+            utilities = self.compute_batch_utility(
                 fixed_items_features=fixed_items_features,
                 contexts_features=contexts_features,
                 contexts_items_features=contexts_items_features,
                 contexts_items_availabilities=contexts_items_availabilities,
                 choices=choices,
             )
-            eps_probabilities = tf.nn.softmax(all_u, axis=2)
+            eps_probabilities = tf.nn.softmax(utilities, axis=2)
             # Average probabilities over heterogeneities
             probabilities = tf.reduce_mean(eps_probabilities, axis=1)
 
@@ -923,8 +1054,10 @@ def train_step(
             probabilities = tf.divide(
                 probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5
             )
-            # Probabilities of selected products
-            # chosen_probabilities = tf.gather_nd(indices=choices_nd, params=probabilities)
+            if self.tol > 0:
+                probabilities = (1 - self.tol) * probabilities + self.tol * tf.ones_like(
+                    probabilities
+                ) / probabilities.shape[-1]
 
             # Negative Log-Likelihood
             batch_nll = self.loss(
@@ -966,12 +1099,12 @@ def batch_predict(
         items_batch : tuple of np.ndarray (items_features)
             Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
             constant features.
-        sessions_batch : tuple of np.ndarray (sessions_features)
+        contexts_batch : tuple of np.ndarray (contexts_features)
             Time-Features
-        sessions_items_batch : tuple of np.ndarray (sessions_items_features)
+        contexts_items_batch : tuple of np.ndarray (contexts_items_features)
             Time-Item-Features
         availabilities_batch : np.ndarray
-            Availabilities (sessions_items_availabilities)
+            Availabilities (contexts_items_availabilities)
         choices_batch : np.ndarray
             Choices
         sample_weight : np.ndarray, optional
@@ -983,7 +1116,7 @@ def batch_predict(
         tf.Tensor (1, )
             Value of NegativeLogLikelihood loss for the batch
         tf.Tensor (batch_size, n_items)
-            Probabilities for each product to be chosen for each session
+            Probabilities for each product to be chosen for each contexts
         """
         utilities = self.compute_batch_utility(
             fixed_items_features=fixed_items_features,
@@ -1006,136 +1139,3 @@ def batch_predict(
             sample_weight=sample_weight,
         )
         return batch_loss, probabilities
-
-
-def create_ff_network(
-    input_shape, depth, width, activation="elu", add_last=False, l2_regularization_coeff=0.0
-):
-    """Base function to create a simple fully connected (Dense) network.
-
-    Parameters
-    ----------
-    input_shape : tuple of int
-        shape of the input of the network. Typically (num_features, )
-    depth : int
-        Number of dense/fully-connected of the network to create.
-    width : int
-        Neurons number for all dense layers.
-    add_last : bool, optional
-        Whether to add a Dense layer with a single output at the end, by default False
-        Typically to be used when creating the utility network, that outputs a single number:
-        the utility.
-    l2_regularization_coeff : float, optional
-        Regularization coefficient for Dense layers weights during training, by default 0.0
-
-    Returns:
-    --------
-    tf.keras.Model
-        Dense Neural Network with tensorflow backend.
-    """
-    input = tf.keras.layers.Input(shape=input_shape)
-    regularizer = tf.keras.regularizers.L2(l2_regularization_coeff)
-    out = input
-    for _ in range(depth):
-        out = tf.keras.layers.Dense(
-            width, activation=activation, kernel_regularizer=regularizer, use_bias=True
-        )(out)
-    if add_last:
-        out = tf.keras.layers.Dense(1, activation="linear", use_bias=False)(out)
-    return tf.keras.Model(inputs=input, outputs=out)
-
-
-def recreate_official_nets(
-    num_products_features,
-    x_width,
-    x_depth,
-    x_eps,
-    num_customer_features,
-    z_width,
-    z_depth,
-    z_eps,
-    width_u,
-    depth_u,
-    l2_regularization_coeff=0.0,
-):
-    """Function to create the three nets used in RUMnet: X_net, Z_net and U_net.
-
-    Parameters
-    ----------
-    num_products_features : int
-        Number of features each product will be described with.
-        In terms of ChoiceDataset it is the number of { items_features + sessions_items_features }
-        for one product.
-    num_customer_features : int
-        Number of features each customer will be described with.
-        In terms of ChoiceDataset it is the number of sessions_features.
-    width_eps_x : int
-        Number of neurons for each dense layer for the products encoding net.
-    depth_eps_x : int
-        Number of dense layers for the products encoding net.
-    heterogeneity_x : int
-        Number of nets of products features encoding.
-    width_eps_z : int
-        Number of neurons for each dense layer for the customers encoding net.
-    depth_eps_z : int
-        Number of dense layers for the customers encoding net.
-    heterogeneity_z : int
-        Number of nets of customers features encoding.
-    width_u : int
-        Number of neurons for each dense layer for the utility net.
-    depth_u : int
-        Number of dense layers for the utility net.
-    l2_regularization_coef : float, optional
-        Value of dense layers weights regulariation to apply during training, by default 0.0
-
-    Returns:
-    --------
-    tf.keras.Model
-        Product features encoding network
-    tf.keras.Model
-        Customer features encoding network
-    tf.keras.Model
-        Features and encoding to utility computation network
-    """
-    # Products and Customers embeddings nets, quiet symmetrical
-    products_input = tf.keras.layers.Input(shape=(num_products_features))
-    customer_input = tf.keras.layers.Input(shape=(num_customer_features))
-    x_embeddings = []
-    z_embeddings = []
-
-    # Creating independant nets for each heterogeneity
-    for _ in range(x_eps):
-        x_embedding = create_ff_network(
-            input_shape=num_products_features,
-            depth=x_depth,
-            width=x_width,
-            l2_regularization_coeff=l2_regularization_coeff,
-        )(products_input)
-        x_embeddings.append(x_embedding)
-
-    # Creating independant nets for each heterogeneity
-    for _ in range(z_eps):
-        z_embedding = create_ff_network(
-            input_shape=num_customer_features,
-            depth=z_depth,
-            width=z_width,
-            l2_regularization_coeff=l2_regularization_coeff,
-        )(customer_input)
-
-        z_embeddings.append(z_embedding)
-
-    x_net = tf.keras.Model(inputs=products_input, outputs=x_embeddings, name="X_embedding")
-    z_net = tf.keras.Model(inputs=customer_input, outputs=z_embeddings, name="Z_embedding")
-
-    # Utility network
-    u_net = create_ff_network(
-        input_shape=(
-            x_width + z_width + num_products_features + num_customer_features
-        ),  # Input shape from previous nets
-        width=width_u,
-        depth=depth_u,
-        add_last=True,  # Add last for utility
-        l2_regularization_coeff=l2_regularization_coeff,
-    )
-
-    return x_net, z_net, u_net

From a3b83fb69ef10b71b08fe7631c5b53fb99c067f5 Mon Sep 17 00:00:00 2001
From: VincentAuriau <auriau.vincent@gmail.com>
Date: Fri, 2 Feb 2024 16:18:02 +0100
Subject: [PATCH 05/10] ENH: moved tfp import inside report function

---
 choice_learn/models/conditional_mnl.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/choice_learn/models/conditional_mnl.py b/choice_learn/models/conditional_mnl.py
index 00733cf2..712f38fc 100644
--- a/choice_learn/models/conditional_mnl.py
+++ b/choice_learn/models/conditional_mnl.py
@@ -2,7 +2,6 @@
 
 import pandas as pd
 import tensorflow as tf
-import tensorflow_probability as tfp
 
 from .base_model import ChoiceModel
 
@@ -1038,6 +1037,8 @@ def compute_report(self, dataset):
         pandas.DataFrame
             A DF with estimation, Std Err, z_value and p_value for each coefficient.
         """
+        import tensorflow_probability as tfp
+
         weights_std = self.get_weights_std(dataset)
         dist = tfp.distributions.Normal(loc=0.0, scale=1.0)
 

From 8894dd216e1ab9b9e109db52069c0a7a5a22cd68 Mon Sep 17 00:00:00 2001
From: VincentAuriau <auriau.vincent@gmail.com>
Date: Fri, 2 Feb 2024 16:18:14 +0100
Subject: [PATCH 06/10] ADD: requirements.txt

---
 requirements.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index d23458ee..3c9de3ac 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
--e .
+numpy==1.24.3
 pandas==1.5.3
-numpy==1.24.2
+tensorflow==2.13.0
+tensorflow_probability==0.20.1
+tqdm==4.65.0

From 36e2755e5a748421ff184367d58c214789f2982e Mon Sep 17 00:00:00 2001
From: VincentAuriau <auriau.vincent@gmail.com>
Date: Fri, 2 Feb 2024 16:22:21 +0100
Subject: [PATCH 07/10] ADD: tfp as requirement in README

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 76d0c248..976714b8 100644
--- a/README.md
+++ b/README.md
@@ -90,8 +90,11 @@ git clone git@github.com:artefactory/choice-learn-private.git
 Choice-Learn requires the following:
 - Python (>=3.8)
 - NumPy (>=1.24)
-- TensorFlow (>=2.13)
 - pandas (>=1.5)
+For modelling you need:
+- TensorFlow (>=2.13)
+Finally, an optional requirement used for specific functions is:
+- tensorflow_probability (>=0.20.1)
 
 ## Usage
 ```python

From 5109415b3fee6195f7be5af15b5ccbc294493fcd Mon Sep 17 00:00:00 2001
From: VincentAuriau <auriau.vincent@gmail.com>
Date: Fri, 2 Feb 2024 17:07:45 +0100
Subject: [PATCH 08/10] ENH: match latest signature

---
 choice_learn/models/base_model.py      |   8 +-
 choice_learn/models/conditional_mnl.py | 184 ++++++++++++++-----------
 2 files changed, 106 insertions(+), 86 deletions(-)

diff --git a/choice_learn/models/base_model.py b/choice_learn/models/base_model.py
index 488e32cc..4808324e 100644
--- a/choice_learn/models/base_model.py
+++ b/choice_learn/models/base_model.py
@@ -415,7 +415,7 @@ def batch_predict(
             Probabilities for each product to be chosen for each context
         """
         # Compute utilities from features
-        utilities = self.compute_utility(
+        utilities = self.compute_batch_utility(
             fixed_items_features,
             contexts_features,
             contexts_items_features,
@@ -659,7 +659,7 @@ def f(params_1d):
         f.history = []
         return f
 
-    def _fit_with_lbfgs(self, dataset, n_epochs, tolerance=1e-8):
+    def _fit_with_lbfgs(self, dataset, epochs=None, tolerance=1e-8):
         """Fit function for L-BFGS optimizer.
 
         Replaces the .fit method when the optimizer is set to L-BFGS.
@@ -682,6 +682,8 @@ def _fit_with_lbfgs(self, dataset, n_epochs, tolerance=1e-8):
         # dependency
         import tensorflow_probability as tfp
 
+        if epochs is None:
+            epochs = self.epochs
         func = self._lbfgs_train_step(dataset)
 
         # convert initial model parameters to a 1D tf.Tensor
@@ -691,7 +693,7 @@ def _fit_with_lbfgs(self, dataset, n_epochs, tolerance=1e-8):
         results = tfp.optimizer.lbfgs_minimize(
             value_and_gradients_function=func,
             initial_position=init_params,
-            max_iterations=n_epochs,
+            max_iterations=epochs,
             tolerance=tolerance,
             f_absolute_tolerance=-1,
             f_relative_tolerance=-1,
diff --git a/choice_learn/models/conditional_mnl.py b/choice_learn/models/conditional_mnl.py
index 712f38fc..5a95b60c 100644
--- a/choice_learn/models/conditional_mnl.py
+++ b/choice_learn/models/conditional_mnl.py
@@ -338,35 +338,35 @@ def _store_dataset_features_names(self, dataset):
         self._contexts_features_names = dataset.contexts_features_names
         self._contexts_items_features_names = dataset.contexts_items_features_names
 
-    def compute_utility_from_specification(
+    def compute_batch_utility_from_specification(
         self,
-        items_batch,
-        contexts_batch,
-        contexts_items_batch,
-        availabilities_batch,
-        choices_batch,
+        fixed_items_features,
+        contexts_features,
+        contexts_items_features,
+        contexts_items_availabilities,
+        choices,
         verbose=0,
     ):
         """Computes the utility when the model is constructed from a ModelSpecification object.
 
         Parameters
         ----------
-        tems_batch : tuple of np.ndarray (items_features)
+        fixed_items_features : tuple of np.ndarray
             Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
             constant/fixed features.
             Shape must be (n_items, n_items_features)
-        contexts_batch : tuple of np.ndarray (contexts_features)
-            Time-Features
-            Shape must be (n_choices, n_contexts_features)
-        contexts_items_batch : tuple of np.ndarray (contexts_items_features)
-            Time-Item-Features
-            Shape must be (n_choices, n_contexts_items_features)
-        availabilities_batch : np.ndarray
-            Availabilities (contexts_items_availabilities)
-            Shape must be (n_choices, n_items)
+        contexts_features : tuple of np.ndarray (contexts_features)
+            a batch of contexts features
+            Shape must be (n_contexts, n_contexts_features)
+        contexts_items_features : tuple of np.ndarray (contexts_items_features)
+            a batch of contexts items features
+            Shape must be (n_contexts, n_contexts_items_features)
+        contexts_items_availabilities : np.ndarray
+            A batch of contexts items availabilities
+            Shape must be (n_contexts, n_items)
         choices_batch : np.ndarray
             Choices
-            Shape must be (n_choices, )
+            Shape must be (n_contexts, )
         verbose : int, optional
             Parametrization of the logging outputs, by default 0
 
@@ -375,10 +375,10 @@ def compute_utility_from_specification(
         tf.Tensor
             Utilities corresponding of shape (n_choices, n_items)
         """
-        _ = choices_batch
+        _ = choices
 
-        num_items = availabilities_batch.shape[1]
-        num_choices = availabilities_batch.shape[0]
+        num_items = contexts_items_availabilities.shape[1]
+        num_choices = contexts_items_availabilities.shape[0]
         contexts_items_utilities = []
         # Items features
         if self._items_features_names is not None:
@@ -397,7 +397,7 @@ def compute_utility_from_specification(
                                             [
                                                 s_i_u[:k],
                                                 tf.multiply(
-                                                    items_batch[i][k, j],
+                                                    fixed_items_features[i][k, j],
                                                     self.weights[weight_index][:, q],
                                                 ),
                                                 s_i_u[k + 1 :],
@@ -409,7 +409,7 @@ def compute_utility_from_specification(
                                         [
                                             s_i_u[:idx],
                                             tf.multiply(
-                                                items_batch[i][idx, j],
+                                                fixed_items_features[i][idx, j],
                                                 self.weights[weight_index][:, q],
                                             ),
                                             s_i_u[idx + 1 :],
@@ -458,9 +458,9 @@ def compute_utility_from_specification(
                                             axis=1,
                                         )
                                         """
-                                        contexts_batch[i][:, j]
+                                        contexts_features[i][:, j]
                                         compute = tf.multiply(
-                                            contexts_batch[i][:, j],
+                                            contexts_features[i][:, j],
                                             self.weights[weight_index][:, q],
                                         )
                                         s_i_u[k] += compute
@@ -482,7 +482,7 @@ def compute_utility_from_specification(
                                     )
                                     """
                                     compute = tf.multiply(
-                                        contexts_batch[i][:, j], self.weights[weight_index][:, q]
+                                        contexts_features[i][:, j], self.weights[weight_index][:, q]
                                     )
                                     s_i_u[idx] += compute
 
@@ -516,7 +516,7 @@ def compute_utility_from_specification(
                                                 s_i_u[:, :k],
                                                 tf.expand_dims(
                                                     tf.multiply(
-                                                        contexts_items_batch[i][:, k, j],
+                                                        contexts_items_features[i][:, k, j],
                                                         self.weights[weight_index][:, q],
                                                     ),
                                                     axis=-1,
@@ -531,7 +531,7 @@ def compute_utility_from_specification(
                                             s_i_u[:, :idx],
                                             tf.expand_dims(
                                                 tf.multiply(
-                                                    contexts_items_batch[i][:, idx, j],
+                                                    contexts_items_features[i][:, idx, j],
                                                     self.weights[weight_index][:, q],
                                                 ),
                                                 axis=-1,
@@ -785,28 +785,34 @@ def instantiate(
             raise ValueError("No weights instantiated")
         return weights
 
-    def compute_utility(
-        self, items_batch, contexts_batch, contexts_items_batch, availabilities_batch, choices_batch
+    def compute_batch_utility(
+        self,
+        fixed_items_features,
+        contexts_features,
+        contexts_items_features,
+        contexts_items_availabilities,
+        choices,
     ):
         """Main method to compute the utility of the model. Selects the right method to compute.
 
         Parameters
         ----------
-        items_batch : tuple of np.ndarray (items_features)
+        fixed_items_features : tuple of np.ndarray
             Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
             constant/fixed features.
             Shape must be (n_items, n_items_features)
-        contexts_batch : tuple of np.ndarray (contexts_features)
-            Time-Features
-            Shape must be (n_choices, n_contexts_features)
-        contexts_items_batch : tuple of np.ndarray (contexts_items_features)
-            Time-Item-Features
-            Shape must be (n_choices, n_contexts_items_features)
-        availabilities_batch : np.ndarray
-            Availabilities (contexts_items_availabilities)
-            Shape must be (n_choices, n_items)
+        contexts_features : tuple of np.ndarray (contexts_features)
+            a batch of contexts features
+            Shape must be (n_contexts, n_contexts_features)
+        contexts_items_features : tuple of np.ndarray (contexts_items_features)
+            a batch of contexts items features
+            Shape must be (n_contexts, n_contexts_items_features)
+        contexts_items_availabilities : np.ndarray
+            A batch of contexts items availabilities
+            Shape must be (n_contexts, n_items)
         choices_batch : np.ndarray
-            Choices Shape must be (n_choices, )
+            Choices
+            Shape must be (n_contexts, )
 
         Returns:
         --------
@@ -814,44 +820,49 @@ def compute_utility(
             Computed utilities of shape (n_choices, n_items).
         """
         if isinstance(self.params, ModelSpecification):
-            return self.compute_utility_from_specification(
-                items_batch,
-                contexts_batch,
-                contexts_items_batch,
-                availabilities_batch,
-                choices_batch,
+            return self.compute_batch_utility_from_specification(
+                fixed_items_features=fixed_items_features,
+                contexts_features=contexts_features,
+                contexts_items_features=contexts_items_features,
+                contexts_items_availabilities=contexts_items_availabilities,
+                choices=choices,
             )
-        return self.compute_utility_from_dict(
-            items_batch,
-            contexts_batch,
-            contexts_items_batch,
-            availabilities_batch,
-            choices_batch,
+        return self.compute_batch_utility_from_dict(
+            fixed_items_features=fixed_items_features,
+            contexts_features=contexts_features,
+            contexts_items_features=contexts_items_features,
+            contexts_items_availabilities=contexts_items_availabilities,
+            choices=choices,
         )
 
-    def compute_utility_from_dict(
-        self, items_batch, contexts_batch, contexts_items_batch, availabilities_batch, choices_batch
+    def compute_batch_utility_from_dict(
+        self,
+        fixed_items_features,
+        contexts_features,
+        contexts_items_features,
+        contexts_items_availabilities,
+        choices,
     ):
         """Computes the utility when the model is constructed from a dictionnary object.
 
         Parameters
         ----------
-        items_batch : tuple of np.ndarray (items_features)
+        fixed_items_features : tuple of np.ndarray
             Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
             constant/fixed features.
             Shape must be (n_items, n_items_features)
-        contexts_batch : tuple of np.ndarray (contexts_features)
-            Time-Features
-            Shape must be (n_choices, n_contexts_features)
-        contexts_items_batch : tuple of np.ndarray (contexts_items_features)
-            Time-Item-Features
-            Shape must be (n_choices, n_contexts_items_features)
-        availabilities_batch : np.ndarray
-            Availabilities (contexts_items_availabilities)
-            Shape must be (n_choices, n_items)
+        contexts_features : tuple of np.ndarray (contexts_features)
+            a batch of contexts features
+            Shape must be (n_contexts, n_contexts_features)
+        contexts_items_features : tuple of np.ndarray (contexts_items_features)
+            a batch of contexts items features
+            Shape must be (n_contexts, n_contexts_items_features)
+        contexts_items_availabilities : np.ndarray
+            A batch of contexts items availabilities
+            Shape must be (n_contexts, n_items)
         choices_batch : np.ndarray
             Choices
-            Shape must be (n_choices, )
+            Shape must be (n_contexts, )
         verbose : int, optional
             Parametrization of the logging outputs, by default 0
 
@@ -860,14 +871,14 @@ def compute_utility_from_dict(
         tf.Tensor
             Utilities corresponding of shape (n_choices, n_items)
         """
-        _, _ = availabilities_batch, choices_batch
+        _ = choices
 
         contexts_items_utilities = []
-        if items_batch is not None:
-            num_items = items_batch[0].shape[0]
+        if fixed_items_features is not None:
+            num_items = fixed_items_features[0].shape[0]
         else:
-            num_items = contexts_items_batch[0].shape[1]
-        num_choices = availabilities_batch.shape[0]
+            num_items = contexts_items_features[0].shape[1]
+        num_choices = contexts_items_availabilities.shape[0]
 
         # Items features
         for i, feat_tuple in enumerate(self._items_features_names):
@@ -876,16 +887,19 @@ def compute_utility_from_dict(
                     weight = self.weights[k]
                     if self.params[feat] == "constant":
                         s_i_u = tf.concat(
-                            [tf.multiply(items_batch[i][:, j], weight)] * num_choices, axis=0
+                            [tf.multiply(fixed_items_features[i][:, j], weight)] * num_choices,
+                            axis=0,
                         )
                     elif self.params[feat] == "item":
                         weight = tf.concat([tf.constant([[0.0]]), weight], axis=-1)
                         s_i_u = tf.concat(
-                            [tf.multiply(items_batch[i][:, j], weight)] * num_choices, axis=0
+                            [tf.multiply(fixed_items_features[i][:, j], weight)] * num_choices,
+                            axis=0,
                         )
                     elif self.params[feat] == "item-full":
                         s_i_u = tf.concat(
-                            [tf.multiply(items_batch[i][:, j], weight)] * num_choices, axis=0
+                            [tf.multiply(fixed_items_features[i][:, j], weight)] * num_choices,
+                            axis=0,
                         )
                     else:
                         raise NotImplementedError(f"Param {self.params[feat]} not implemented")
@@ -903,13 +917,13 @@ def compute_utility_from_dict(
                     weight = self.weights[k]
                     if self.params[feat] == "constant":
                         s_i_u = tf.concat(
-                            [tf.multiply(contexts_batch[i][j], weight)] * num_items, axis=-1
+                            [tf.multiply(contexts_features[i][j], weight)] * num_items, axis=-1
                         )
                     elif self.params[feat] == "item":
                         weight = tf.concat([tf.constant([[0.0]]), weight], axis=-1)
-                        s_i_u = tf.tensordot(contexts_batch[i][:, j : j + 1], weight, axes=1)
+                        s_i_u = tf.tensordot(contexts_features[i][:, j : j + 1], weight, axes=1)
                     elif self.params[feat] == "item-full":
-                        s_i_u = tf.tensordot(contexts_batch[i][:, j : j + 1], weight, axes=1)
+                        s_i_u = tf.tensordot(contexts_features[i][:, j : j + 1], weight, axes=1)
                     else:
                         raise NotImplementedError(f"Param {self.params[feat]} not implemented")
                     contexts_items_utilities.append(s_i_u)
@@ -925,12 +939,12 @@ def compute_utility_from_dict(
                 if feat in self.params.keys():
                     weight = self.weights[k]
                     if self.params[feat] == "constant":
-                        s_i_u = tf.multiply(contexts_items_batch[i][:, :, j], weight)
+                        s_i_u = tf.multiply(contexts_items_features[i][:, :, j], weight)
                     elif self.params[feat] == "item":
                         weight = tf.concat([tf.constant([[0.0]]), weight], axis=-1)
-                        s_i_u = tf.multiply(contexts_items_batch[i][:, :, j], weight)
+                        s_i_u = tf.multiply(contexts_items_features[i][:, :, j], weight)
                     elif self.params[feat] == "item-full":
-                        s_i_u = tf.multiply(contexts_items_batch[i][:, :, j], weight)
+                        s_i_u = tf.multiply(contexts_items_features[i][:, :, j], weight)
                     else:
                         raise NotImplementedError(f"Param {self.params[feat]} not implemented")
                     contexts_items_utilities.append(s_i_u)
@@ -987,7 +1001,7 @@ def fit(self, choice_dataset, get_report=False, **kwargs):
             self.report = self.compute_report(choice_dataset)
         return fit
 
-    def _fit_with_lbfgs(self, choice_dataset, n_epochs, tolerance=1e-8, get_report=False):
+    def _fit_with_lbfgs(self, choice_dataset, epochs=None, tolerance=1e-8, get_report=False):
         """Specific fit function to estimate the paramters with LBFGS.
 
         Parameters
@@ -1018,7 +1032,9 @@ def _fit_with_lbfgs(self, choice_dataset, n_epochs, tolerance=1e-8, get_report=F
                     contexts_items_features_names=choice_dataset.contexts_items_features_names,
                 )
             self.instantiated = True
-        fit = super()._fit_with_lbfgs(choice_dataset, n_epochs, tolerance)
+        if epochs is None:
+            epochs = self.epochs
+        fit = super()._fit_with_lbfgs(choice_dataset, epochs, tolerance)
         if get_report:
             self.report = self.compute_report(choice_dataset)
         return fit
@@ -1093,7 +1109,7 @@ def get_weights_std(self, dataset):
                     index += _w.shape[1]
                 model.weights = mw
                 for batch in dataset.iter_batch(batch_size=-1):
-                    utilities = model.compute_utility(*batch)
+                    utilities = model.compute_batch_utility(*batch)
                     probabilities = tf.nn.softmax(utilities, axis=-1)
                     loss = tf.keras.losses.CategoricalCrossentropy(reduction="sum")(
                         y_pred=probabilities,
@@ -1103,7 +1119,9 @@ def get_weights_std(self, dataset):
             jacobian = tape_2.jacobian(loss, w)
         # Compute the Hessian from the Jacobian
         hessian = tape_1.batch_jacobian(jacobian, w)
-        return tf.sqrt([tf.linalg.inv(tf.squeeze(hessian))[i][i] for i in range(len(w))])
+        return tf.sqrt(
+            [tf.linalg.inv(tf.squeeze(hessian))[i][i] for i in range(len(tf.squeeze(hessian)))]
+        )
 
     def clone(self):
         """Returns a clone of the model."""

From dadf5f4972d1acea808c2ef881c5de5bce577755 Mon Sep 17 00:00:00 2001
From: VincentAuriau <auriau.vincent@gmail.com>
Date: Fri, 2 Feb 2024 17:08:25 +0100
Subject: [PATCH 09/10] ENH: latest signature in example

---
 README.md                                     |  2 +-
 .../choice_learn_introduction_clogit.ipynb    | 36 +++++++++----------
 notebooks/custom_model.ipynb                  | 16 ++++-----
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/README.md b/README.md
index 976714b8..9481432a 100644
--- a/README.md
+++ b/README.md
@@ -93,7 +93,7 @@ Choice-Learn requires the following:
 - pandas (>=1.5)
 For modelling you need:
 - TensorFlow (>=2.13)
-Finally, an optional requirement used for specific functions is:
+Finally, an optional requirement used for report and LBFG-s use is:
 - tensorflow_probability (>=0.20.1)
 
 ## Usage
diff --git a/notebooks/choice_learn_introduction_clogit.ipynb b/notebooks/choice_learn_introduction_clogit.ipynb
index 51886a9d..0dcc47a5 100644
--- a/notebooks/choice_learn_introduction_clogit.ipynb
+++ b/notebooks/choice_learn_introduction_clogit.ipynb
@@ -174,7 +174,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "history = model.fit(dataset, n_epochs=1000)"
+    "history = model.fit(dataset, epochs=1000, get_report=True)"
    ]
   },
   {
@@ -447,7 +447,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "history = cmnl.fit(dataset, n_epochs=1000)\n",
+    "history = cmnl.fit(dataset, epochs=1000)\n",
     "print(cmnl.weights)"
    ]
   },
@@ -469,7 +469,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 1/1 [00:02<00:00,  2.46s/it]"
+      "100%|██████████| 1/1 [00:02<00:00,  2.41s/it]"
      ]
     },
     {
@@ -499,8 +499,8 @@
     "    tf.constant([[0.0595089, -0.00678188, -0.00645982, -0.00145029]]),\n",
     "    tf.constant([[0.697311, 1.8437, 3.27381]]),\n",
     "]\n",
-    "gt_model = ConditionalMNL(parameters=params, lr=0.01)\n",
-    "gt_model.fit(dataset, n_epochs=1, batch_size=-1)\n",
+    "gt_model = ConditionalMNL(parameters=params, lr=0.01, epochs=1, batch_size=-1)\n",
+    "gt_model.fit(dataset)\n",
     "\n",
     "# Here we estimate the negative log-likelihood with these coefficients (also, we obtain same value as in those papers):\n",
     "gt_model.weights = gt_weights\n",
@@ -565,11 +565,11 @@
    "outputs": [],
    "source": [
     "cmnl = ConditionalMNL(parameters=params, optimizer=\"Adam\")\n",
-    "history = cmnl.fit(dataset, n_epochs=2000, batch_size=-1)\n",
+    "history = cmnl.fit(dataset, epochs=2000, batch_size=-1)\n",
     "cmnl.optimizer.lr = cmnl.optimizer.lr / 5\n",
-    "history2 = cmnl.fit(dataset, n_epochs=4000, batch_size=-1)\n",
+    "history2 = cmnl.fit(dataset, epochs=4000, batch_size=-1)\n",
     "cmnl.optimizer.lr = cmnl.optimizer.lr  / 10\n",
-    "history3 = cmnl.fit(dataset, n_epochs=20000, batch_size=-1)"
+    "history3 = cmnl.fit(dataset, epochs=20000, batch_size=-1)"
    ]
   },
   {
@@ -582,14 +582,14 @@
     {
      "data": {
       "text/plain": [
-       "[<tf.Variable 'income:0' shape=(1, 3) dtype=float32, numpy=array([[-0.08402897, -0.02359924, -0.03233591]], dtype=float32)>,\n",
-       " <tf.Variable 'cost:0' shape=(1, 1) dtype=float32, numpy=array([[-0.05140839]], dtype=float32)>,\n",
-       " <tf.Variable 'freq:0' shape=(1, 1) dtype=float32, numpy=array([[0.09645352]], dtype=float32)>,\n",
-       " <tf.Variable 'ovt:0' shape=(1, 1) dtype=float32, numpy=array([[-0.04099075]], dtype=float32)>,\n",
+       "[<tf.Variable 'income:0' shape=(1, 3) dtype=float32, numpy=array([[-0.08402928, -0.02359886, -0.03233609]], dtype=float32)>,\n",
+       " <tf.Variable 'cost:0' shape=(1, 1) dtype=float32, numpy=array([[-0.05140914]], dtype=float32)>,\n",
+       " <tf.Variable 'freq:0' shape=(1, 1) dtype=float32, numpy=array([[0.09645311]], dtype=float32)>,\n",
+       " <tf.Variable 'ovt:0' shape=(1, 1) dtype=float32, numpy=array([[-0.04099129]], dtype=float32)>,\n",
        " <tf.Variable 'ivt:0' shape=(1, 4) dtype=float32, numpy=\n",
-       " array([[ 0.05871379, -0.00726091, -0.0036869 , -0.00105644]],\n",
+       " array([[ 0.05871324, -0.00726123, -0.00368657, -0.00105644]],\n",
        "       dtype=float32)>,\n",
-       " <tf.Variable 'intercept:0' shape=(1, 3) dtype=float32, numpy=array([[-1.6874328, -0.3963728,  1.1344546]], dtype=float32)>]"
+       " <tf.Variable 'intercept:0' shape=(1, 3) dtype=float32, numpy=array([[-1.687443 , -0.3964062,  1.1344563]], dtype=float32)>]"
       ]
      },
      "execution_count": null,
@@ -611,7 +611,7 @@
     {
      "data": {
       "text/plain": [
-       "<tf.Tensor: shape=(), dtype=float32, numpy=0.676656>"
+       "<tf.Tensor: shape=(), dtype=float32, numpy=0.67665595>"
       ]
      },
      "execution_count": null,
@@ -653,7 +653,7 @@
     " \"intercept\": \"item\"}\n",
     "\n",
     "# Instantiation of the model\n",
-    "cmnl = ConditionalMNL(parameters=params, optimizer=\"lbfgs\")"
+    "cmnl = ConditionalMNL(parameters=params, optimizer=\"lbfgs\", epochs=1000)"
    ]
   },
   {
@@ -678,7 +678,7 @@
     }
    ],
    "source": [
-    "history = cmnl.fit(dataset, n_epochs=1000)\n",
+    "history = cmnl.fit(dataset)\n",
     "print(cmnl.weights)"
    ]
   },
@@ -821,7 +821,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "history = swiss_model.fit(swiss_dataset, n_epochs=10000)"
+    "history = swiss_model.fit(swiss_dataset, epochs=10000)"
    ]
   },
   {
diff --git a/notebooks/custom_model.ipynb b/notebooks/custom_model.ipynb
index 2e929611..9440ef69 100644
--- a/notebooks/custom_model.ipynb
+++ b/notebooks/custom_model.ipynb
@@ -70,17 +70,17 @@
     "transport_df.income = transport_df.income.astype(\"float32\")\n",
     "\n",
     "dataset = ChoiceDataset.from_single_df(df=transport_df,\n",
-    "                                       items_features_columns=[\"oh_air\",\n",
+    "                                       fixed_items_features_columns=[\"oh_air\",\n",
     "                                                               \"oh_bus\",\n",
     "                                                               \"oh_car\",\n",
     "                                                               \"oh_train\"],\n",
-    "                                       sessions_features_columns=[\"income\"],\n",
-    "                                       sessions_items_features_columns=[\"cost\",\n",
+    "                                       contexts_features_columns=[\"income\"],\n",
+    "                                       contexts_items_features_columns=[\"cost\",\n",
     "                                                                        \"freq\",\n",
     "                                                                        \"ovt\",\n",
     "                                                                        \"ivt\"],\n",
     "                                       items_id_column=\"alt\",\n",
-    "                                       sessions_id_column=\"case\",\n",
+    "                                       contexts_id_column=\"case\",\n",
     "                                       choices_column=\"choice\",\n",
     "                                       choice_mode=\"one_zero\")"
    ]
@@ -93,7 +93,7 @@
     "\n",
     "For our custom model to work, we need to specify:\n",
     "- Weights initialization in __init__()\n",
-    "- the utility function in compute_utility()"
+    "- the utility function in compute_batch_utility()"
    ]
   },
   {
@@ -179,7 +179,7 @@
     "        self.weights = [beta_inter, beta_freq_cost_ovt, beta_income, beta_ivt]\n",
     "\n",
     "\n",
-    "    def compute_utility(self,\n",
+    "    def compute_batch_utility(self,\n",
     "                        items_batch,\n",
     "                        sessions_batch,\n",
     "                        sessions_items_batch,\n",
@@ -249,7 +249,7 @@
    "outputs": [],
    "source": [
     "model = CustomCanadaConditionalMNL(optimizer=\"lbfgs\")\n",
-    "history = model.fit(dataset, n_epochs=400)"
+    "history = model.fit(dataset, epochs=400)"
    ]
   },
   {
@@ -427,7 +427,7 @@
     "        # Easy with TensorFlow.Layer\n",
     "        self.weights = self.dense_1.trainable_variables + self.dense_2.trainable_variables\n",
     "        \n",
-    "    def compute_utility(self,\n",
+    "    def compute_batch_utility(self,\n",
     "                        items_batch,\n",
     "                        sessions_batch,\n",
     "                        sessions_items_batch,\n",

From 68a4ebfbf31c4a641f3311f30bee95cd0d7e2203 Mon Sep 17 00:00:00 2001
From: VincentAuriau <auriau.vincent@gmail.com>
Date: Fri, 2 Feb 2024 17:16:51 +0100
Subject: [PATCH 10/10] FIX: add local in requirements

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 3c9de3ac..07c7368e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+-e .
 numpy==1.24.3
 pandas==1.5.3
 tensorflow==2.13.0