From 5eb2201fc458a10c5a8efaf8d74b9197c28e197f Mon Sep 17 00:00:00 2001 From: VincentAuriau Date: Thu, 1 Feb 2024 09:25:22 +0100 Subject: [PATCH 01/10] FIX: typo in issue --- choice_learn/models/conditional_mnl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/choice_learn/models/conditional_mnl.py b/choice_learn/models/conditional_mnl.py index da03fe95..00733cf2 100644 --- a/choice_learn/models/conditional_mnl.py +++ b/choice_learn/models/conditional_mnl.py @@ -1102,7 +1102,7 @@ def get_weights_std(self, dataset): jacobian = tape_2.jacobian(loss, w) # Compute the Hessian from the Jacobian hessian = tape_1.batch_jacobian(jacobian, w) - return tf.sqrt([tf.linalg.inv(tf.squeeze(hessian))[i][i] for i in range(13)]) + return tf.sqrt([tf.linalg.inv(tf.squeeze(hessian))[i][i] for i in range(len(w))]) def clone(self): """Returns a clone of the model.""" From bbe7666c157ee2dd0669bf0b589ef24d9896912e Mon Sep 17 00:00:00 2001 From: VincentAuriau Date: Fri, 2 Feb 2024 11:52:45 +0100 Subject: [PATCH 02/10] ENH: renaming, cleaning --- choice_learn/tf_ops.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/choice_learn/tf_ops.py b/choice_learn/tf_ops.py index cf8d617e..243fb8ba 100644 --- a/choice_learn/tf_ops.py +++ b/choice_learn/tf_ops.py @@ -3,8 +3,8 @@ import tensorflow as tf -def custom_softmax( - sessions_items_logits, sessions_items_availabilities, axis=-1, normalize_exit=False, eps=1e-5 +def softmax_with_availabilities( + contexts_items_logits, contexts_items_availabilities, axis=-1, normalize_exit=False, eps=1e-5 ): """Function to compute softmax probabilities from utilities. @@ -14,9 +14,9 @@ def custom_softmax( Parameters ---------- - sessions_items_logits : np.ndarray (n_sessions, n_products) + contexts_items_logits : np.ndarray (n_sessions, n_products) Utilities / Logits on which to compute the softmax - sessions_items_availabilities : np.ndarray (n_sessions, n_products) + contexts_items_availabilities : np.ndarray (n_sessions, n_products) Matrix indicating the availabitily (1) or not (0) of the products axis : int, optional Axis of sessions_logits on which to apply the softmax, by default -1 @@ -34,10 +34,10 @@ def custom_softmax( """ # Substract max utility to avoid overflow numerator = tf.exp( - sessions_items_logits - tf.reduce_max(sessions_items_logits, axis=axis, keepdims=True) + contexts_items_logits - tf.reduce_max(contexts_items_logits, axis=axis, keepdims=True) ) # Set unavailable products utility to 0 - numerator = tf.multiply(numerator, sessions_items_availabilities) + numerator = tf.multiply(numerator, contexts_items_availabilities) # Sum of total available utilities denominator = tf.reduce_sum(numerator, axis=axis, keepdims=True) # Add 1 to the denominator to take into account the exit choice From 41adb38d459aaba54534644a5f0c3f1d33cb1a31 Mon Sep 17 00:00:00 2001 From: VincentAuriau Date: Fri, 2 Feb 2024 11:53:52 +0100 Subject: [PATCH 03/10] ENH: new signature, better names & doc --- choice_learn/models/base_model.py | 357 +++++++++++++++++------------- 1 file changed, 197 insertions(+), 160 deletions(-) diff --git a/choice_learn/models/base_model.py b/choice_learn/models/base_model.py index ca8aae7a..488e32cc 100644 --- a/choice_learn/models/base_model.py +++ b/choice_learn/models/base_model.py @@ -9,10 +9,7 @@ import tensorflow as tf import tqdm -from choice_learn.tf_ops import ( - CustomCategoricalCrossEntropy, - custom_softmax, -) +import choice_learn.tf_ops as tf_ops class ChoiceModel(object): @@ -25,6 +22,8 @@ def __init__( optimizer="Adam", callbacks=None, lr=0.001, + epochs=1, + batch_size=32, ): """Instantiates the ChoiceModel. @@ -45,10 +44,9 @@ def __init__( self.label_smoothing = label_smoothing self.stop_training = False - # self.loss = tf.keras.losses.CategoricalCrossentropy( - # from_logits=False, label_smoothing=self.label_smoothing - # ) - self.loss = CustomCategoricalCrossEntropy( + # Loss function wrapping tf.keras.losses.CategoricalCrossEntropy + # with smoothing and normalization options + self.loss = tf_ops.CustomCategoricalCrossEntropy( from_logits=False, label_smoothing=self.label_smoothing ) self.callbacks = tf.keras.callbacks.CallbackList(callbacks, add_history=True, model=None) @@ -69,9 +67,17 @@ def __init__( print(f"Optimizer {optimizer} not implemnted, switching for default Adam") self.optimizer = tf.keras.optimizers.Adam(lr) + self.epochs = epochs + self.batch_size = batch_size + @abstractmethod - def compute_utility( - self, items_batch, sessions_batch, sessions_items_batch, availabilities_batch, choices_batch + def compute_batch_utility( + self, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, ): """Method that defines how the model computes the utility of a product. @@ -80,28 +86,28 @@ def compute_utility( Parameters ---------- - items_batch : tuple of np.ndarray (items_features) + fixed_items_features : tuple of np.ndarray Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products constant/fixed features. Shape must be (n_items, n_items_features) - sessions_batch : tuple of np.ndarray (sessions_features) - Time-Features - Shape must be (n_sessions, n_sessions_features) - sessions_items_batch : tuple of np.ndarray (sessions_items_features) - Time-Item-Features - Shape must be (n_sessions, n_sessions_items_features) - availabilities_batch : np.ndarray - Availabilities (sessions_items_availabilities) - Shape must be (n_sessions, n_items) + contexts_features : tuple of np.ndarray (contexts_features) + a batch of contexts features + Shape must be (n_contexts, n_contexts_features) + contexts_items_features : tuple of np.ndarray (contexts_items_features) + a batch of contexts items features + Shape must be (n_contexts, n_contexts_items_features) + contexts_items_availabilities : np.ndarray + A batch of contexts items availabilities + Shape must be (n_contexts, n_items) choices_batch : np.ndarray Choices - Shape must be (n_sessions, ) + Shape must be (n_contexts, ) Returns: -------- np.ndarray - Utility of each product for each session. - Shape must be (n_sessions, n_items) + Utility of each product for each context. + Shape must be (n_contexts, n_items) """ # To be implemented in children classes # Can be numpy or tensorflow based @@ -110,28 +116,33 @@ def compute_utility( @tf.function def train_step( self, - items_batch, - sessions_batch, - sessions_items_batch, - availabilities_batch, - choices_batch, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, sample_weight=None, ): """Function that represents one training step (= one gradient descent step) of the model. Parameters ---------- - items_batch : tuple of np.ndarray (items_features) + fixed_items_features : tuple of np.ndarray Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products constant/fixed features. - sessions_batch : tuple of np.ndarray (sessions_features) - Time-Features - sessions_items_batch : tuple of np.ndarray (sessions_items_features) - Time-Item-Features - availabilities_batch : np.ndarray - Availabilities (sessions_items_availabilities) + Shape must be (n_items, n_items_features) + contexts_features : tuple of np.ndarray (contexts_features) + a batch of contexts features + Shape must be (n_contexts, n_contexts_features) + contexts_items_features : tuple of np.ndarray (contexts_items_features) + a batch of contexts items features + Shape must be (n_contexts, n_contexts_items_features) + contexts_items_availabilities : np.ndarray + A batch of contexts items availabilities + Shape must be (n_contexts, n_items) choices_batch : np.ndarray Choices + Shape must be (n_contexts, ) sample_weight : np.ndarray, optional List samples weights to apply during the gradient descent to the batch elements, by default None @@ -142,69 +153,56 @@ def train_step( Value of NegativeLogLikelihood loss for the batch """ with tf.GradientTape() as tape: - all_u = self.compute_utility( - items_batch, - sessions_batch, - sessions_items_batch, - availabilities_batch, - choices_batch, + utilities = self.compute_batch_utility( + fixed_items_features=fixed_items_features, + contexts_features=contexts_features, + contexts_items_features=contexts_items_features, + contexts_items_availabilities=contexts_items_availabilities, + choices=choices, ) - """ - all_u = tf.math.exp(all_u) - - # Assortment(t) Utility - norms = tf.reduce_sum(tf.multiply(all_u, ia_batch), axis=1) - if self.normalize_non_buy: - norms += 1 - # Probabilities - final_utilities = tf.divide( - all_u, - tf.repeat(tf.expand_dims(norms, 1), fif_batch[0].shape[0], axis=1), - ) - # Probabilities of selected product - available_utilities = tf.gather_nd(indices=choices_nd, params=final_utilities) - """ - # probabilities = availability_softmax(all_u, availabilities_batch, axis=-1) - probabilities = custom_softmax( - all_u, availabilities_batch, normalize_exit=self.normalize_non_buy, axis=-1 + + probabilities = tf_ops.softmax_with_availabilities( + contexts_items_logits=utilities, + contexts_items_availabilities=contexts_items_availabilities, + normalize_exit=self.normalize_non_buy, + axis=-1, ) # Negative Log-Likelihood neg_loglikelihood = self.loss( y_pred=probabilities, - y_true=tf.one_hot(choices_batch, depth=probabilities.shape[1]), + y_true=tf.one_hot(choices, depth=probabilities.shape[1]), sample_weight=sample_weight, ) - """ - if sample_weight is not None: - neg_loglikelihood = -tf.reduce_sum( - tf.math.log(available_utilities + 1e-10) * sample_weight - ) - else: - neg_loglikelihood = -tf.reduce_sum(tf.math.log(available_utilities + 1e-10)) - """ + grads = tape.gradient(neg_loglikelihood, self.weights) self.optimizer.apply_gradients(zip(grads, self.weights)) return neg_loglikelihood def fit( - self, choice_dataset, n_epochs, batch_size, sample_weight=None, val_dataset=None, verbose=0 + self, + choice_dataset, + sample_weight=None, + val_dataset=None, + verbose=0, + epochs=None, + batch_size=None, ): """Method to train the model with a ChoiceDataset. Parameters ---------- choice_dataset : ChoiceDataset - _description_ - n_epochs : int - Number of epochs - batch_size : int - Batch size + Input data in the form of a ChoiceDataset sample_weight : np.ndarray, optional Sample weights to apply, by default None val_dataset : ChoiceDataset, optional Test ChoiceDataset to evaluate performances on test at each epoch, by default None verbose : int, optional print level, for debugging, by default 0 + epochs : int, optional + Number of epochs, default is None, meaning we use self.epochs + batch_size : int, optional + Batch size, default is None, meaning we use self.batch_size Returns: -------- @@ -214,8 +212,13 @@ def fit( if hasattr(self, "instantiated"): if not self.instantiated: raise ValueError("Model not instantiated. Please call .instantiate() first.") + if epochs is None: + epochs = self.epochs + if batch_size is None: + batch_size = self.batch_size + losses_history = {"train_loss": []} - t_range = tqdm.trange(n_epochs, position=0) + t_range = tqdm.trange(epochs, position=0) self.callbacks.on_train_begin() @@ -245,8 +248,8 @@ def fit( for batch_nb, ( ( items_batch, - sessions_batch, - sessions_items_batch, + contexts_batch, + contexts_items_batch, availabilities_batch, choices_batch, ), @@ -256,8 +259,8 @@ def fit( neg_loglikelihood = self.train_step( items_batch, - sessions_batch, - sessions_items_batch, + contexts_batch, + contexts_items_batch, availabilities_batch, choices_batch, sample_weight=weight_batch, @@ -283,16 +286,16 @@ def fit( inner_range = choice_dataset.iter_batch(shuffle=True, batch_size=batch_size) for batch_nb, ( items_batch, - sessions_batch, - sessions_items_batch, + contexts_batch, + contexts_items_batch, availabilities_batch, choices_batch, ) in enumerate(inner_range): self.callbacks.on_train_batch_begin(batch_nb) neg_loglikelihood = self.train_step( items_batch, - sessions_batch, - sessions_items_batch, + contexts_batch, + contexts_items_batch, availabilities_batch, choices_batch, ) @@ -329,8 +332,8 @@ def fit( test_losses = [] for batch_nb, ( items_batch, - sessions_batch, - sessions_items_batch, + contexts_batch, + contexts_items_batch, availabilities_batch, choices_batch, ) in enumerate(val_dataset.iter_batch(shuffle=False, batch_size=batch_size)): @@ -339,8 +342,8 @@ def fit( test_losses.append( self.batch_predict( items_batch, - sessions_batch, - sessions_items_batch, + contexts_batch, + contexts_items_batch, availabilities_batch, choices_batch, )[0] @@ -373,28 +376,33 @@ def fit( @tf.function def batch_predict( self, - items_batch, - sessions_batch, - sessions_items_batch, - availabilities_batch, - choices_batch, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, sample_weight=None, ): """Function that represents one prediction (Probas + Loss) for one batch of a ChoiceDataset. Parameters ---------- - items_batch : tuple of np.ndarray (items_features) + fixed_items_features : tuple of np.ndarray Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products - constant features. - sessions_batch : tuple of np.ndarray (sessions_features) - Time-Features - sessions_items_batch : tuple of np.ndarray (sessions_items_features) - Time-Item-Features - availabilities_batch : np.ndarray - Availabilities (sessions_items_availabilities) + constant/fixed features. + Shape must be (n_items, n_items_features) + contexts_features : tuple of np.ndarray (contexts_features) + a batch of contexts features + Shape must be (n_contexts, n_contexts_features) + contexts_items_features : tuple of np.ndarray (contexts_items_features) + a batch of contexts items features + Shape must be (n_contexts, n_contexts_items_features) + contexts_items_availabilities : np.ndarray + A batch of contexts items availabilities + Shape must be (n_contexts, n_items) choices_batch : np.ndarray Choices + Shape must be (n_contexts, ) sample_weight : np.ndarray, optional List samples weights to apply during the gradient descent to the batch elements, by default None @@ -404,23 +412,29 @@ def batch_predict( tf.Tensor (1, ) Value of NegativeLogLikelihood loss for the batch tf.Tensor (batch_size, n_items) - Probabilities for each product to be chosen for each session + Probabilities for each product to be chosen for each context """ # Compute utilities from features utilities = self.compute_utility( - items_batch, sessions_batch, sessions_items_batch, availabilities_batch, choices_batch + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, ) # Compute probabilities from utilities & availabilties - # probabilities = availability_softmax(utilities, availabilities_batch, axis=-1) - probabilities = custom_softmax( - utilities, availabilities_batch, normalize_exit=self.normalize_non_buy, axis=-1 + probabilities = tf_ops.softmax_with_availabilities( + contexts_items_logits=utilities, + contexts_items_availabilities=contexts_items_availabilities, + normalize_exit=self.normalize_non_buy, + axis=-1, ) # Compute loss from probabilities & actual choices # batch_loss = self.loss(probabilities, c_batch, sample_weight=sample_weight) batch_loss = self.loss( y_pred=probabilities, - y_true=tf.one_hot(choices_batch, depth=probabilities.shape[1]), + y_true=tf.one_hot(choices, depth=probabilities.shape[1]), sample_weight=sample_weight, ) return batch_loss, probabilities @@ -477,7 +491,7 @@ def load_model(cls, path): return cls def predict_probas(self, choice_dataset, batch_size=-1): - """Predicts the choice probabilities for each session and each product of a ChoiceDataset. + """Predicts the choice probabilities for each context and each product of a ChoiceDataset. Parameters ---------- @@ -488,30 +502,30 @@ def predict_probas(self, choice_dataset, batch_size=-1): Returns: -------- - np.ndarray (n_sessions, n_items) - Choice probabilties for each session and each product + np.ndarray (n_contexts, n_items) + Choice probabilties for each context and each product """ stacked_probabilities = [] for ( - items_batch, - sessions_batch, - sessions_items_batch, - availabilities_batch, - choices_batch, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, ) in choice_dataset.iter_batch(batch_size=batch_size): _, probabilities = self.batch_predict( - items_batch, - sessions_batch, - sessions_items_batch, - availabilities_batch, - choices_batch, + fixed_items_features=fixed_items_features, + contexts_features=contexts_features, + contexts_items_features=contexts_items_features, + contexts_items_availabilities=contexts_items_availabilities, + choices=choices, ) stacked_probabilities.append(probabilities) return tf.concat(stacked_probabilities, axis=0) def evaluate(self, choice_dataset, batch_size=-1): - """Evaluates the model for each session and each product of a ChoiceDataset. + """Evaluates the model for each context and each product of a ChoiceDataset. Predicts the probabilities according to the model and computes the Negative-Log-Likelihood loss from the actual choices. @@ -523,27 +537,27 @@ def evaluate(self, choice_dataset, batch_size=-1): Returns: -------- - np.ndarray (n_sessions, n_items) - Choice probabilties for each session and each product + np.ndarray (n_contexts, n_items) + Choice probabilties for each context and each product """ batch_losses = [] for ( - items_batch, - sessions_batch, - sessions_items_batch, - availabilities_batch, - choices_batch, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, ) in choice_dataset.iter_batch(batch_size=batch_size): loss, _ = self.batch_predict( - items_batch, - sessions_batch, - sessions_items_batch, - availabilities_batch, - choices_batch, + fixed_items_features=fixed_items_features, + contexts_features=contexts_features, + contexts_items_features=contexts_items_features, + contexts_items_availabilities=contexts_items_availabilities, + choices=choices, ) batch_losses.append(loss) if batch_size != -1: - last_batch_size = availabilities_batch.shape[0] + last_batch_size = contexts_items_availabilities.shape[0] coefficients = tf.concat( [tf.ones(len(batch_losses) - 1) * batch_size, [last_batch_size]], axis=0 ) @@ -700,37 +714,49 @@ def __init__(self, **kwargs): """Initialization of the model.""" super().__init__(**kwargs) - def compute_utility( - self, items_batch, sessions_batch, sessions_items_batch, availabilities_batch, choices_batch + def compute_batch_utility( + self, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, ): - """Computes the random utility for each product of each session. + """Computes the random utility for each product of each context. Parameters ---------- - items_batch : tuple of np.ndarray (items_features) + fixed_items_features : tuple of np.ndarray Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products constant/fixed features. - sessions_batch : tuple of np.ndarray (sessions_features) - Time-Features - sessions_items_batch : tuple of np.ndarray (sessions_items_features) - Time-Item-Features - availabilities_batch : np.ndarray - Availabilities (sessions_items_availabilities) + Shape must be (n_items, n_items_features) + contexts_features : tuple of np.ndarray (contexts_features) + a batch of contexts features + Shape must be (n_contexts, n_contexts_features) + contexts_items_features : tuple of np.ndarray (contexts_items_features) + a batch of contexts items features + Shape must be (n_contexts, n_contexts_items_features) + contexts_items_availabilities : np.ndarray + A batch of contexts items availabilities + Shape must be (n_contexts, n_items) choices_batch : np.ndarray Choices + Shape must be (n_contexts, ) Returns: -------- tf.Tensor - (n_sessions, n_items) matrix of random utilities + (n_contexts, n_items) matrix of random utilities """ # In order to avoid unused arguments warnings - del items_batch, sessions_batch, availabilities_batch, choices_batch - return np.squeeze(np.random.uniform(shape=(sessions_items_batch.shape), minval=0, maxval=1)) + _ = fixed_items_features, contexts_features, contexts_items_availabilities, choices + return np.squeeze( + np.random.uniform(shape=(contexts_items_features.shape), minval=0, maxval=1) + ) def fit(**kwargs): """Make sure that nothing happens during .fit.""" - del kwargs + _ = kwargs return {} @@ -747,34 +773,44 @@ def __init__(self, **kwargs): def fit(self, choice_dataset, **kwargs): """Computes the choice frequency of each product and defines it as choice probabilities.""" - del kwargs + _ = kwargs choices = choice_dataset.choices for i in range(choice_dataset.get_num_items()): self.weights.append(tf.reduce_sum(tf.cast(choices == i, tf.float32))) self.weights = tf.stack(self.weights) / len(choices) - def compute_utility( - self, items_batch, sessions_batch, sessions_items_batch, availabilities_batch, choices_batch + def compute_batch_utility( + self, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, ): """Returns utility that is fixed. U = log(P). Parameters ---------- - items_batch : tuple of np.ndarray (items_features) + fixed_items_features : tuple of np.ndarray Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products constant/fixed features. - sessions_batch : tuple of np.ndarray (sessions_features) - Time-Features - sessions_items_batch : tuple of np.ndarray (sessions_items_features) - Time-Item-Features - availabilities_batch : np.ndarray - Availabilities (sessions_items_availabilities) + Shape must be (n_items, n_items_features) + contexts_features : tuple of np.ndarray (contexts_features) + a batch of contexts features + Shape must be (n_contexts, n_contexts_features) + contexts_items_features : tuple of np.ndarray (contexts_items_features) + a batch of contexts items features + Shape must be (n_contexts, n_contexts_items_features) + contexts_items_availabilities : np.ndarray + A batch of contexts items availabilities + Shape must be (n_contexts, n_items) choices_batch : np.ndarray Choices + Shape must be (n_contexts, ) Returns: -------- - np.ndarray (n_sessions, n_items) + np.ndarray (n_contexts, n_items) Utilities Raises: @@ -783,7 +819,8 @@ def compute_utility( If the model has not been fitted cannot evaluate the utility """ # In order to avoid unused arguments warnings - del items_batch, sessions_batch, sessions_items_batch, availabilities_batch + _ = fixed_items_features, contexts_features, contexts_items_availabilities + _ = contexts_items_features if self.weights is None: raise ValueError("Model not fitted") - return np.stack([np.log(self.weights.numpy())] * len(choices_batch), axis=0) + return np.stack([np.log(self.weights.numpy())] * len(choices), axis=0) From c53222ae0368915a616602f6afe431b6e73705c4 Mon Sep 17 00:00:00 2001 From: VincentAuriau Date: Fri, 2 Feb 2024 16:09:52 +0100 Subject: [PATCH 04/10] ENH: some cleaning in code --- choice_learn/models/rumnet.py | 1436 ++++++++++++++++----------------- 1 file changed, 718 insertions(+), 718 deletions(-) diff --git a/choice_learn/models/rumnet.py b/choice_learn/models/rumnet.py index 6fbe47e8..09f08fa9 100644 --- a/choice_learn/models/rumnet.py +++ b/choice_learn/models/rumnet.py @@ -1,502 +1,388 @@ """Implementation of RUMnet for easy use.""" import tensorflow as tf +import choice_learn.tf_ops as tf_ops from choice_learn.models.base_model import ChoiceModel -from choice_learn.tf_ops import CustomCategoricalCrossEntropy -class PaperRUMnet(ChoiceModel): - """Re-Implementation of the RUMnet model. +def create_ff_network( + input_shape, depth, width, activation="elu", add_last=False, l2_regularization_coeff=0.0 +): + """Base function to create a simple fully connected (Dense) network. - Re-implemented from the paper: - Representing Random Utility Choice Models with Neural Networks from Ali Aouad and Antoine Désir - https://arxiv.org/abs/2207.12877 + Parameters + ---------- + input_shape : tuple of int + shape of the input of the network. Typically (num_features, ) + depth : int + Number of dense/fully-connected of the network to create. + width : int + Neurons number for all dense layers. + add_last : bool, optional + Whether to add a Dense layer with a single output at the end, by default False + Typically to be used when creating the utility network, that outputs a single number: + the utility. + l2_regularization_coeff : float, optional + Regularization coefficient for Dense layers weights during training, by default 0.0 - Inherits from base_model.ChoiceModel - TODO: Verify that all parameters are implemented. + Returns: + -------- + tf.keras.Model + Dense Neural Network with tensorflow backend. """ + input = tf.keras.layers.Input(shape=input_shape) + regularizer = tf.keras.regularizers.L2(l2_regularization_coeff) + out = input + for _ in range(depth): + out = tf.keras.layers.Dense( + width, activation=activation, kernel_regularizer=regularizer, use_bias=True + )(out) + if add_last: + out = tf.keras.layers.Dense(1, activation="linear", use_bias=False)(out) + return tf.keras.Model(inputs=input, outputs=out) - def __init__( - self, - num_products_features, - num_customer_features, - width_eps_x, - depth_eps_x, - heterogeneity_x, - width_eps_z, - depth_eps_z, - heterogeneity_z, - width_u, - depth_u, - tol, - optimizer, - lr, - normalize_non_buy=True, - logmin=1e-5, - l2_regularization_coef=0.0, - label_smoothing=0.0, - **kwargs, - ): - """Initiation of the RUMnet Model. - - Parameters - ---------- - num_products_features : int - Number of features each product will be described with. - In terms of ChoiceDataset it is the number of - { items_features + sessions_items_features } for one product. - num_customer_features : int - Number of features each customer will be described with. - In terms of ChoiceDataset it is the number of sessions_features. - width_eps_x : int - Number of neurons for each dense layer for the products encoding net. - depth_eps_x : int - Number of dense layers for the products encoding net. - heterogeneity_x : int - Number of nets of products features encoding. - width_eps_z : int - Number of neurons for each dense layer for the customers encoding net. - depth_eps_z : int - Number of dense layers for the customers encoding net. - heterogeneity_z : int - Number of nets of customers features encoding. - width_u : int - Number of neurons for each dense layer for the utility net. - depth_u : int - Number of dense layers for the utility net. - tol : float - # To be Implemented - optimizer : str - String representation of the optimizer to use. By default is Adam if not specified. - Should be within tf.keras.optimizers. - lr : float - Starting learning rate to associate with optimizer. - normalize_non_buy : bool, optional - Whether or not to add exit option with utility 1, by default True - logmin : float, optional - Value to be added within log computation to avoid infinity, by default 1e-5 - l2_regularization_coef : float, optional - Value of dense layers weights regulariation to apply during training, by default 0.0 - label_smoothing : float, optional - Value of smoothing to apply in CrossEntropy loss computation, by default 0.0 - """ - super().__init__(normalize_non_buy=normalize_non_buy, **kwargs) - # Number of features - self.num_products_features = num_products_features - self.num_customer_features = num_customer_features - - # Dimension of encoding nets - self.width_eps_x = width_eps_x - self.depth_eps_x = depth_eps_x - self.heterogeneity_x = heterogeneity_x - - self.width_eps_z = width_eps_z - self.depth_eps_z = depth_eps_z - self.heterogeneity_z = heterogeneity_z - - # Dimension of utility net - self.width_u = width_u - self.depth_u = depth_u - - # Optimization parameters - self.logmin = logmin - self.tol = tol - self.lr = lr - self.normalize_non_buy = normalize_non_buy - self.l2_regularization_coef = l2_regularization_coef - self.label_smoothing = label_smoothing - - if optimizer == "Adam": - self.optimizer = tf.keras.optimizers.Adam(lr) - elif optimizer == "SGD": - self.optimizer = tf.keras.optimizers.SGD(lr) - elif optimizer == "Adamax": - self.optimizer = tf.keras.optimizers.Adamax(lr) - else: - print(f"Optimizer {optimizer} not implemnted, switching for default Adam") - self.optimizer = tf.keras.optimizers.Adam(lr) - - self.instantiated = False - def instantiate(self): - """Instatiation of the RUMnet model. +def recreate_official_nets( + num_products_features, + x_width, + x_depth, + x_eps, + num_customer_features, + z_width, + z_depth, + z_eps, + width_u, + depth_u, + l2_regularization_coeff=0.0, +): + """Function to create the three nets used in RUMnet: X_net, Z_net and U_net. - Creation of : - - x_model encoding products features, - - z_model encoding customers features, - - u_model computing utilities from product, customer features and their embeddings - """ - # Instatiation of the different nets - self.x_model, self.z_model, self.u_model = recreate_official_nets( - num_products_features=self.num_products_features, - num_customer_features=self.num_customer_features, - x_width=self.width_eps_x, - x_depth=self.depth_eps_x, - x_eps=self.heterogeneity_x, - z_width=self.width_eps_z, - z_depth=self.depth_eps_z, - z_eps=self.heterogeneity_z, - width_u=self.width_u, - depth_u=self.depth_u, - l2_regularization_coeff=self.l2_regularization_coef, - ) + Parameters + ---------- + num_products_features : int + Number of features each product will be described with. + In terms of ChoiceDataset it is the number of { items_features + contexts_items_features } + for one product. + num_customer_features : int + Number of features each customer will be described with. + In terms of ChoiceDataset it is the number of contexts_features. + width_eps_x : int + Number of neurons for each dense layer for the products encoding net. + depth_eps_x : int + Number of dense layers for the products encoding net. + heterogeneity_x : int + Number of nets of products features encoding. + width_eps_z : int + Number of neurons for each dense layer for the customers encoding net. + depth_eps_z : int + Number of dense layers for the customers encoding net. + heterogeneity_z : int + Number of nets of customers features encoding. + width_u : int + Number of neurons for each dense layer for the utility net. + depth_u : int + Number of dense layers for the utility net. + l2_regularization_coef : float, optional + Value of dense layers weights regulariation to apply during training, by default 0.0 - # Storing weights for back-propagation - self.weights = self.x_model.weights + self.z_model.weights + self.u_model.weights - self.loss = CustomCategoricalCrossEntropy( - from_logits=False, label_smoothing=self.label_smoothing - ) - self.instantiated = True + Returns: + -------- + tf.keras.Model + Product features encoding network + tf.keras.Model + Customer features encoding network + tf.keras.Model + Features and encoding to utility computation network + """ + # Products and Customers embeddings nets, quiet symmetrical + products_input = tf.keras.layers.Input(shape=(num_products_features)) + customer_input = tf.keras.layers.Input(shape=(num_customer_features)) + x_embeddings = [] + z_embeddings = [] - def compute_batch_utility( - self, - fixed_items_features, - contexts_features, - contexts_items_features, - contexts_items_availabilities, - choices, - ): - """Compute utility from a batch of ChoiceDataset. + # Creating independant nets for each heterogeneity + for _ in range(x_eps): + x_embedding = create_ff_network( + input_shape=num_products_features, + depth=x_depth, + width=x_width, + l2_regularization_coeff=l2_regularization_coeff, + )(products_input) + x_embeddings.append(x_embedding) - Here we asssume that: item features = {fixed item features + session item features} - user features = {session features} + # Creating independant nets for each heterogeneity + for _ in range(z_eps): + z_embedding = create_ff_network( + input_shape=num_customer_features, + depth=z_depth, + width=z_width, + l2_regularization_coeff=l2_regularization_coeff, + )(customer_input) - Parameters - ---------- - fixed_items_features : tuple of np.ndarray (n_items, n_features) - Items-Features: formatting from ChoiceDataset: a matrix representing the - products fixed features. - contexts_features : tuple of np.ndarray (n_contexts, n_features) - Contexts-Features: features varying with contexts, shared by all products - contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features) - Features varying with contexts and products - contexts_items_availabilities : np.ndarray (n_contexts, n_items) - Availabilities: here for ChoiceModel signature - choices : np.ndarray (n_contexts, ) - Choices: here for ChoiceModel signature + z_embeddings.append(z_embedding) - Returns: - -------- - np.ndarray - Utility of each product for each session. - Shape must be (n_sessions, n_items) - """ - (_, _) = contexts_items_availabilities, choices - ### Restacking of the item features - items_features_batch = tf.concat([*fixed_items_features], axis=-1) - session_features_batch = tf.concat([*contexts_features], axis=-1) - session_items_features_batch = tf.concat([*contexts_items_features], axis=-1) + x_net = tf.keras.Model(inputs=products_input, outputs=x_embeddings, name="X_embedding") + z_net = tf.keras.Model(inputs=customer_input, outputs=z_embeddings, name="Z_embedding") - full_item_features = tf.stack( - [items_features_batch] * session_items_features_batch.shape[0], axis=0 - ) - full_item_features = tf.concat([session_items_features_batch, full_item_features], axis=-1) + # Utility network + u_net = create_ff_network( + input_shape=( + x_width + z_width + num_products_features + num_customer_features + ), # Input shape from previous nets + width=width_u, + depth=depth_u, + add_last=True, # Add last for utility + l2_regularization_coeff=l2_regularization_coeff, + ) - ### Computation of utilities - utilities = [] + return x_net, z_net, u_net - # Computation of the customer features embeddings - z_embeddings = self.z_model(session_features_batch) - # Iterate over items in assortment - for item_i in range(full_item_features.shape[1]): - # Computation of item features embeddings - x_embeddings = self.x_model(full_item_features[:, item_i, :]) +class ParallelDense(tf.keras.layers.Layer): + """Layer that represents several Dense layers in Parallel. - utilities.append([]) + Parallel means that they have the same input, but then are not intricated and + are totally independant from each other. + """ - # Computation of utilites from embeddings, iteration over heterogeneities - # (eps_x * eps_z) - for _x in x_embeddings: - for _z in z_embeddings: - _u = tf.keras.layers.Concatenate()( - [full_item_features[:, item_i, :], _x, session_features_batch, _z] - ) - utilities[-1].append(self.u_model(_u)) + def __init__(self, width, depth, heterogeneity, activation="relu", **kwargs): + """Instantiation of the layer. - ### Reshape utilities: (batch_size, num_items, heterogeneity) - return tf.transpose(tf.squeeze(tf.stack(utilities, axis=0), -1)) + Following tf.keras.Layer API. Note that there will be width * depth * heterogeneity + number of neurons in the layer. - @tf.function - def train_step( - self, - fixed_items_features, - contexts_features, - contexts_items_features, - contexts_items_availabilities, - choices, - sample_weight=None, - ): - """Modified version of train step, as we have to average probabilities over heterogeneities. + Parameters + ---------- + width : int + Number of neurons for each dense layer. + depth : int + Number of neuron layers. + heterogeneity : int + Number of dense layers that are in parallel + activation : str, optional + activation function at the end of each layer, by default "relu" + """ + super().__init__(**kwargs) + self.width = width + self.depth = depth + self.heterogeneity = heterogeneity + self.activation = tf.keras.layers.Activation(activation) - Function that represents one training step (= one gradient descent step) of the model. - Handles a batch of data of size n_contexts = n_choices = batch_size + def build(self, input_shape): + """Lazy build of the layer. Parameters ---------- - fixed_items_features : tuple of np.ndarray (n_items, n_features) - Items-Features: formatting from ChoiceDataset: a matrix representing the - products fixed features. - contexts_features : tuple of np.ndarray (n_contexts, n_features) - Contexts-Features: features varying with contexts, shared by all products - contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features) - Features varying with contexts and products - contexts_items_availabilities : np.ndarray (n_contexts, n_items) - Availabilities of items - choices : np.ndarray (n_contexts, ) - Choices - sample_weight : np.ndarray, optional - List samples weights to apply during the gradient descent to the batch elements, - by default None - - Returns: - -------- - tf.Tensor - Value of NegativeLogLikelihood loss for the batch + input_shape : tuple + shape of the input of the layer. Typically (batch_size, num_features). + Batch_size (None) is ignored, but num_features is the shape of the input. """ - with tf.GradientTape() as tape: - ### Computation of utilities - all_u = self.compute_batch_utility( - fixed_items_features=fixed_items_features, - contexts_features=contexts_features, - contexts_items_features=contexts_items_features, - contexts_items_availabilities=contexts_items_availabilities, - choices=choices, - ) - probabilities = [] - - # Iterate over heterogeneities - # for i in range(all_u.shape[2]): - # Assortment(t) Utility - # eps_probabilities = availability_softmax(all_u[:, :, i], ia_batch, axis=2) - eps_probabilities = tf.nn.softmax(all_u, axis=1) - # probabilities.append(eps_probabilities) - - # Average probabilities over heterogeneities - probabilities = tf.reduce_mean(eps_probabilities, axis=-1) + super().build(input_shape) - # It is not in the paper, but let's normalize with availabilities - probabilities = tf.multiply(probabilities, contexts_items_availabilities) - probabilities = tf.divide( - probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5 + weights = [ + ( + self.add_weight( + shape=(input_shape[-1], self.width, self.heterogeneity), + initializer="glorot_normal", + trainable=True, + ), + self.add_weight( + shape=(self.width, self.heterogeneity), + initializer="glorot_normal", + trainable=True, + ), ) - - # Probabilities of selected products - # chosen_probabilities = tf.gather_nd(indices=choices_nd, params=probabilities) - - # Negative Log-Likelihood - batch_nll = self.loss( - y_pred=probabilities, - y_true=tf.one_hot(choices, depth=probabilities.shape[1]), - sample_weight=sample_weight, + ] + for i in range(self.depth - 1): + weights.append( + ( + self.add_weight( + shape=(self.width, self.width, self.heterogeneity), + initializer="glorot_normal", + trainable=True, + ), + self.add_weight( + shape=(self.width, self.heterogeneity), + initializer="glorot_normal", + trainable=True, + ), + ) ) - # nll = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)( - # y_pred=probabilities, y_true=c_batch - # ) - # nll = -tf.reduce_sum(tf.math.log(chosen_probabilities + self.logmin)) - grads = tape.gradient(batch_nll, self.weights) - self.optimizer.apply_gradients(zip(grads, self.weights)) - return batch_nll + self.w = weights - @tf.function - def batch_predict( - self, - fixed_items_features, - contexts_features, - contexts_items_features, - contexts_items_availabilities, - choices, - sample_weight=None, - ): - """Function that represents one prediction (Probas + Loss) for one batch of a ChoiceDataset. + def call(self, inputs): + """Predict of the layer. - Specific version for RUMnet because it is needed to average probabilities over - heterogeneities. + Follows tf.keras.Layer API. Parameters ---------- - fixed_items_features : tuple of np.ndarray (n_items, n_features) - Items-Features: formatting from ChoiceDataset: a matrix representing the - products fixed features. - contexts_features : tuple of np.ndarray (n_contexts, n_features) - Contexts-Features: features varying with contexts, shared by all products - contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features) - Features varying with contexts and products - contexts_items_availabilities : np.ndarray (n_contexts, n_items) - Availabilities of items - choices : np.ndarray (n_contexts, ) - Choices - sample_weight : np.ndarray, optional - List samples weights to apply during the gradient descent to the batch elements, - by default None + inputs : tf.Tensor, np.ndarray + Tensor of shape (batch_size, n_features) as input of the model. Returns: -------- - tf.Tensor (1, ) - Value of NegativeLogLikelihood loss for the batch - tf.Tensor (batch_size, n_items) - Probabilities for each product to be chosen for each session + outputs + tensor of shape (batch_size, width, heterogeneity) """ - utilities = self.compute_batch_utility( - fixed_items_features=fixed_items_features, - contexts_features=contexts_features, - contexts_items_features=contexts_items_features, - contexts_items_availabilities=contexts_items_availabilities, - choices=choices, - ) - probabilities = tf.nn.softmax(utilities, axis=1) - probabilities = tf.reduce_mean(probabilities, axis=-1) + outputs = tf.tensordot(inputs, self.w[0][0], axes=1) + self.w[0][1] + outputs = self.activation(outputs) + # tf.nn.bias_add(y, weights[0][1], data_format="NC...") - # Normalization with availabilties - probabilities = tf.multiply(probabilities, contexts_items_availabilities) - probabilities = tf.divide( - probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5 - ) - batch_nll = self.loss( - y_pred=probabilities, - y_true=tf.one_hot(choices, depth=probabilities.shape[1]), - sample_weight=sample_weight, - ) - return batch_nll, probabilities + for w, b in self.w[1:]: + outputs = tf.einsum("ijk,jlk->ilk", outputs, w) + b + outputs = self.activation(outputs) + return outputs -class CPURUMnet(PaperRUMnet): - """CPU-optimized Re-Implementation of the RUMnet model. - This implementation handles in parallel the heterogeneities so that the training is faster. +class AssortmentParallelDense(tf.keras.layers.Layer): + """Several Dense layers in Parallel applied to an Assortment. + + Parallel means that they have the same input, but then are not intricated and + are totally independant from each other. The layer applies the same Dense layers + to an assortment of items. """ - def compute_batch_utility( - self, - fixed_items_features, - contexts_features, - contexts_items_features, - contexts_items_availabilities, - choices, - ): - """Compute utility from a batch of ChoiceDataset. + def __init__(self, width, depth, heterogeneity, activation="relu", **kwargs): + """Inialization of the layer. + + Parameters + ---------- + width : int + Number of neurons of each dense layer. + depth : int + Number of dense layers + heterogeneity : int + Number of dense networks in parallel. + activation : str, optional + activation function of each dense, by default "relu" + """ + super().__init__(**kwargs) + self.width = width + self.depth = depth + self.heterogeneity = heterogeneity + self.activation = tf.keras.layers.Activation(activation) + + def build(self, input_shape): + """Lazy build of the layer. - Here we asssume that: item features = {fixed item features + session item features} - user features = {session features} + Follows tf.keras API. Parameters ---------- - fixed_items_features : tuple of np.ndarray (n_items, n_features) - Items-Features: formatting from ChoiceDataset: a matrix representing the - products fixed features. - contexts_features : tuple of np.ndarray (n_contexts, n_features) - Contexts-Features: features varying with contexts, shared by all products - contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features) - Features varying with contexts and products - contexts_items_availabilities : np.ndarray (n_contexts, n_items) - Availabilities of items - choices : np.ndarray (n_contexts, ) - Choices + input_shape : tuple + Shape of the input of the layer. + Typically (batch_size, num_items, num_features). + """ + super().build(input_shape) + + weights = [ + ( + self.add_weight( + shape=(input_shape[-1], self.width, self.heterogeneity), + initializer="glorot_normal", + trainable=True, + ), + self.add_weight( + shape=(self.width, self.heterogeneity), + initializer="glorot_normal", + trainable=True, + ), + ) + ] + for i in range(self.depth - 1): + weights.append( + ( + self.add_weight( + shape=(self.width, self.width, self.heterogeneity), + initializer="glorot_normal", + trainable=True, + ), + self.add_weight( + shape=(self.width, self.heterogeneity), + initializer="glorot_normal", + trainable=True, + ), + ) + ) + + self.w = weights + + def call(self, inputs): + """Predict of the layer. + + Follows tf.keras.Layer API. + + Parameters + ---------- + inputs : tf.Tensor, np.ndarray + Tensor of shape (batch_size, n_items, n_features) as input of the model. Returns: -------- - np.ndarray - Utility of each product for each session. - Shape must be (n_sessions, n_items) + tf.Tensor + Embeddings of shape (batch_size, n_items, width, heterogeneity) """ - (_, _) = contexts_items_availabilities, choices - ### Restacking of the item features - stacked_fixed_items_features = tf.concat([*fixed_items_features], axis=-1) - stacked_contexts_features = tf.concat([*contexts_features], axis=-1) - stacked_contexts_items_features = tf.concat([*contexts_items_features], axis=-1) - - full_item_features = tf.stack( - [stacked_fixed_items_features] * stacked_contexts_items_features.shape[0], axis=0 - ) - full_item_features = tf.concat( - [stacked_contexts_items_features, full_item_features], axis=-1 - ) - - ### Computation of utilities - utilities = [] - batch_size = stacked_contexts_features.shape[0] - - # Computation of the customer features embeddings - z_embeddings = self.z_model(stacked_contexts_features) + outputs = tf.tensordot(inputs, self.w[0][0], axes=[[2], [0]]) + self.w[0][1] + outputs = self.activation(outputs) - # Iterate over items in assortment - for item_i in range(full_item_features.shape[1]): - # Computation of item features embeddings - x_embeddings = self.x_model(full_item_features[:, item_i, :]) + for w, b in self.w[1:]: + outputs = tf.einsum("imjk,jlk->imlk", outputs, w) + b + outputs = self.activation(outputs) - stacked_heterogeneities = [] - # Computation of utilites from embeddings, iteration over heterogeneities - # eps_x * eps_z - for _x in x_embeddings: - for _z in z_embeddings: - full_embedding = tf.keras.layers.Concatenate()( - [full_item_features[:, item_i, :], _x, stacked_contexts_features, _z] - ) - stacked_heterogeneities.append(full_embedding) - item_utilities = self.u_model(tf.concat(stacked_heterogeneities, axis=0)) - item_utilities = tf.stack( - [ - item_utilities[batch_size * i : batch_size * (i + 1)] - for i in range(len(x_embeddings) * len(z_embeddings)) - ], - axis=1, - ) - utilities.append(item_utilities) - ### Reshape utilities: (batch_size, num_items, heterogeneity) - return tf.squeeze(tf.stack(utilities, axis=1), -1) + return outputs -class ParallelDense(tf.keras.layers.Layer): - """Layer that represents several Dense layers in Parallel. +class AssortmentUtilityDenseNetwork(tf.keras.layers.Layer): + """Dense Network that is applied to an assortment of items. - Parallel means that they have the same input, but then are not intricated and - are totally independant from each other. + We apply to the same network over several items and several heterogeneitites. """ - def __init__(self, width, depth, heterogeneity, activation="relu", **kwargs): - """Instantiation of the layer. - - Following tf.keras.Layer API. Note that there will be width * depth * heterogeneity - number of neurons in the layer. + def __init__(self, width, depth, activation="relu", add_last=True, **kwargs): + """Initialization of the layer. Parameters ---------- width : int - Number of neurons for each dense layer. + Nnumber of neurons of each dense layer. depth : int - Number of neuron layers. - heterogeneity : int - Number of dense layers that are in parallel + Number of dense layers. activation : str, optional - activation function at the end of each layer, by default "relu" + Activation function for each layer, by default "relu" + add_last : bool, optional + Whether to add a final dense layer with 1 neuron, by default True """ super().__init__(**kwargs) self.width = width self.depth = depth - self.heterogeneity = heterogeneity self.activation = tf.keras.layers.Activation(activation) + self.add_last = add_last def build(self, input_shape): """Lazy build of the layer. + Follows tf.keras.Layer API. + Parameters ---------- input_shape : tuple - shape of the input of the layer. Typically (batch_size, num_features). - Batch_size (None) is ignored, but num_features is the shape of the input. + Shape of the input of the layer. + Typically (batch_size, num_items, width, heterogeneity). """ super().build(input_shape) weights = [ ( self.add_weight( - shape=(input_shape[-1], self.width, self.heterogeneity), + shape=(input_shape[-2], self.width), initializer="glorot_normal", trainable=True, ), self.add_weight( - shape=(self.width, self.heterogeneity), + shape=(self.width, 1), initializer="glorot_normal", trainable=True, ), @@ -506,243 +392,488 @@ def build(self, input_shape): weights.append( ( self.add_weight( - shape=(self.width, self.width, self.heterogeneity), + shape=(self.width, self.width), initializer="glorot_normal", trainable=True, ), self.add_weight( - shape=(self.width, self.heterogeneity), + shape=(self.width, 1), initializer="glorot_normal", trainable=True, ), ) ) + if self.add_last: + self.last = self.add_weight( + shape=(self.width, 1), initializer="glorot_normal", trainable=True + ) self.w = weights def call(self, inputs): """Predict of the layer. - Follows tf.keras.Layer API. - Parameters ---------- inputs : tf.Tensor, np.ndarray - Tensor of shape (batch_size, n_features) as input of the model. + Input Tensor of shape (batch_size, num_items, width, heterogeneity) Returns: -------- - outputs - tensor of shape (batch_size, width, heterogeneity) + tf.Tensor + Utilities of shape (batch_size, num_items, heterogeneity) """ - outputs = tf.tensordot(inputs, self.w[0][0], axes=1) + self.w[0][1] - outputs = self.activation(outputs) - # tf.nn.bias_add(y, weights[0][1], data_format="NC...") + outputs = inputs - for w, b in self.w[1:]: - outputs = tf.einsum("ijk,jlk->ilk", outputs, w) + b + for w, b in self.w: + # bs, items, features, heterogeneities + outputs = tf.einsum("ijlk, lm->ijmk", outputs, w) + b outputs = self.activation(outputs) + if self.add_last: + outputs = tf.einsum("ijlk, lm->ijmk", outputs, self.last) + return outputs -class AssortmentParallelDense(tf.keras.layers.Layer): - """Several Dense layers in Parallel applied to an Assortment. +class PaperRUMnet(ChoiceModel): + """Re-Implementation of the RUMnet model. + + Re-implemented from the paper: + Representing Random Utility Choice Models with Neural Networks from Ali Aouad and Antoine Désir + https://arxiv.org/abs/2207.12877 + + Inherits from base_model.ChoiceModel + TODO: Verify that all parameters are implemented. + """ + + def __init__( + self, + num_products_features, + num_customer_features, + width_eps_x, + depth_eps_x, + heterogeneity_x, + width_eps_z, + depth_eps_z, + heterogeneity_z, + width_u, + depth_u, + tol, + optimizer, + lr, + normalize_non_buy=False, + logmin=1e-5, + l2_regularization_coef=0.0, + label_smoothing=0.0, + **kwargs, + ): + """Initiation of the RUMnet Model. + + Parameters + ---------- + num_products_features : int + Number of features each product will be described with. + In terms of ChoiceDataset it is the number of + { items_features + contexts_items_features } for one product. + num_customer_features : int + Number of features each customer will be described with. + In terms of ChoiceDataset it is the number of contexts_features. + width_eps_x : int + Number of neurons for each dense layer for the products encoding net. + depth_eps_x : int + Number of dense layers for the products encoding net. + heterogeneity_x : int + Number of nets of products features encoding. + width_eps_z : int + Number of neurons for each dense layer for the customers encoding net. + depth_eps_z : int + Number of dense layers for the customers encoding net. + heterogeneity_z : int + Number of nets of customers features encoding. + width_u : int + Number of neurons for each dense layer for the utility net. + depth_u : int + Number of dense layers for the utility net. + tol : float + # To be Implemented + optimizer : str + String representation of the optimizer to use. By default is Adam if not specified. + Should be within tf.keras.optimizers. + lr : float + Starting learning rate to associate with optimizer. + normalize_non_buy : bool, optional + Whether or not to add exit option with utility 1, by default True + logmin : float, optional + Value to be added within log computation to avoid infinity, by default 1e-5 + l2_regularization_coef : float, optional + Value of dense layers weights regulariation to apply during training, by default 0.0 + label_smoothing : float, optional + Value of smoothing to apply in CrossEntropy loss computation, by default 0.0 + """ + super().__init__(normalize_non_buy=normalize_non_buy, **kwargs) + # Number of features + self.num_products_features = num_products_features + self.num_customer_features = num_customer_features + + # Dimension of encoding nets + self.width_eps_x = width_eps_x + self.depth_eps_x = depth_eps_x + self.heterogeneity_x = heterogeneity_x + + self.width_eps_z = width_eps_z + self.depth_eps_z = depth_eps_z + self.heterogeneity_z = heterogeneity_z + + # Dimension of utility net + self.width_u = width_u + self.depth_u = depth_u + + # Optimization parameters + self.logmin = logmin + self.tol = tol + self.lr = lr + self.normalize_non_buy = normalize_non_buy + self.l2_regularization_coef = l2_regularization_coef + self.label_smoothing = label_smoothing + + if optimizer == "Adam": + self.optimizer = tf.keras.optimizers.Adam(lr) + elif optimizer == "SGD": + self.optimizer = tf.keras.optimizers.SGD(lr) + elif optimizer == "Adamax": + self.optimizer = tf.keras.optimizers.Adamax(lr) + else: + print(f"Optimizer {optimizer} not implemnted, switching for default Adam") + self.optimizer = tf.keras.optimizers.Adam(lr) + + self.instantiated = False + + def instantiate(self): + """Instatiation of the RUMnet model. + + Creation of : + - x_model encoding products features, + - z_model encoding customers features, + - u_model computing utilities from product, customer features and their embeddings + """ + # Instatiation of the different nets + self.x_model, self.z_model, self.u_model = recreate_official_nets( + num_products_features=self.num_products_features, + num_customer_features=self.num_customer_features, + x_width=self.width_eps_x, + x_depth=self.depth_eps_x, + x_eps=self.heterogeneity_x, + z_width=self.width_eps_z, + z_depth=self.depth_eps_z, + z_eps=self.heterogeneity_z, + width_u=self.width_u, + depth_u=self.depth_u, + l2_regularization_coeff=self.l2_regularization_coef, + ) + + # Storing weights for back-propagation + self.weights = self.x_model.weights + self.z_model.weights + self.u_model.weights + self.loss = tf_ops.CustomCategoricalCrossEntropy( + from_logits=False, + label_smoothing=self.label_smoothing, + epsilon=self.logmin, + ) + self.instantiated = True - Parallel means that they have the same input, but then are not intricated and - are totally independant from each other. The layer applies the same Dense layers - to an assortment of items. - """ + def compute_batch_utility( + self, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, + ): + """Compute utility from a batch of ChoiceDataset. - def __init__(self, width, depth, heterogeneity, activation="relu", **kwargs): - """Inialization of the layer. + Here we asssume that: item features = {fixed item features + contexts item features} + user features = {contexts features} Parameters ---------- - width : int - Number of neurons of each dense layer. - depth : int - Number of dense layers - heterogeneity : int - Number of dense networks in parallel. - activation : str, optional - activation function of each dense, by default "relu" + fixed_items_features : tuple of np.ndarray (n_items, n_features) + Items-Features: formatting from ChoiceDataset: a matrix representing the + products fixed features. + contexts_features : tuple of np.ndarray (n_contexts, n_features) + Contexts-Features: features varying with contexts, shared by all products + contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features) + Features varying with contexts and products + contexts_items_availabilities : np.ndarray (n_contexts, n_items) + Availabilities: here for ChoiceModel signature + choices : np.ndarray (n_contexts, ) + Choices: here for ChoiceModel signature + + Returns: + -------- + np.ndarray + Utility of each product for each contexts. + Shape must be (n_contexts, n_items) """ - super().__init__(**kwargs) - self.width = width - self.depth = depth - self.heterogeneity = heterogeneity - self.activation = tf.keras.layers.Activation(activation) + (_, _) = contexts_items_availabilities, choices + ### Restacking of the item features + items_features_batch = tf.concat([*fixed_items_features], axis=-1) + contexts_features_batch = tf.concat([*contexts_features], axis=-1) + contexts_items_features_batch = tf.concat([*contexts_items_features], axis=-1) - def build(self, input_shape): - """Lazy build of the layer. + full_item_features = tf.stack( + [items_features_batch] * contexts_items_features_batch.shape[0], axis=0 + ) + full_item_features = tf.concat([contexts_items_features_batch, full_item_features], axis=-1) - Follows tf.keras API. + ### Computation of utilities + utilities = [] - Parameters - ---------- - input_shape : tuple - Shape of the input of the layer. - Typically (batch_size, num_items, num_features). - """ - super().build(input_shape) + # Computation of the customer features embeddings + z_embeddings = self.z_model(contexts_features_batch) - weights = [ - ( - self.add_weight( - shape=(input_shape[-1], self.width, self.heterogeneity), - initializer="glorot_normal", - trainable=True, - ), - self.add_weight( - shape=(self.width, self.heterogeneity), - initializer="glorot_normal", - trainable=True, - ), - ) - ] - for i in range(self.depth - 1): - weights.append( - ( - self.add_weight( - shape=(self.width, self.width, self.heterogeneity), - initializer="glorot_normal", - trainable=True, - ), - self.add_weight( - shape=(self.width, self.heterogeneity), - initializer="glorot_normal", - trainable=True, - ), - ) - ) + # Iterate over items in assortment + for item_i in range(full_item_features.shape[1]): + # Computation of item features embeddings + x_embeddings = self.x_model(full_item_features[:, item_i, :]) - self.w = weights + utilities.append([]) - def call(self, inputs): - """Predict of the layer. + # Computation of utilites from embeddings, iteration over heterogeneities + # (eps_x * eps_z) + for _x in x_embeddings: + for _z in z_embeddings: + _u = tf.keras.layers.Concatenate()( + [full_item_features[:, item_i, :], _x, contexts_features_batch, _z] + ) + utilities[-1].append(self.u_model(_u)) - Follows tf.keras.Layer API. + ### Reshape utilities: (batch_size, num_items, heterogeneity) + return tf.transpose(tf.squeeze(tf.stack(utilities, axis=0), -1)) + + @tf.function + def train_step( + self, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, + sample_weight=None, + ): + """Modified version of train step, as we have to average probabilities over heterogeneities. + + Function that represents one training step (= one gradient descent step) of the model. + Handles a batch of data of size n_contexts = n_choices = batch_size Parameters ---------- - inputs : tf.Tensor, np.ndarray - Tensor of shape (batch_size, n_items, n_features) as input of the model. + fixed_items_features : tuple of np.ndarray (n_items, n_features) + Items-Features: formatting from ChoiceDataset: a matrix representing the + products fixed features. + contexts_features : tuple of np.ndarray (n_contexts, n_features) + Contexts-Features: features varying with contexts, shared by all products + contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features) + Features varying with contexts and products + contexts_items_availabilities : np.ndarray (n_contexts, n_items) + Availabilities of items + choices : np.ndarray (n_contexts, ) + Choices + sample_weight : np.ndarray, optional + List samples weights to apply during the gradient descent to the batch elements, + by default None Returns: -------- tf.Tensor - Embeddings of shape (batch_size, n_items, width, heterogeneity) + Value of NegativeLogLikelihood loss for the batch """ - outputs = tf.tensordot(inputs, self.w[0][0], axes=[[2], [0]]) + self.w[0][1] - outputs = self.activation(outputs) + with tf.GradientTape() as tape: + ### Computation of utilities + all_u = self.compute_batch_utility( + fixed_items_features=fixed_items_features, + contexts_features=contexts_features, + contexts_items_features=contexts_items_features, + contexts_items_availabilities=contexts_items_availabilities, + choices=choices, + ) + probabilities = [] - for w, b in self.w[1:]: - outputs = tf.einsum("imjk,jlk->imlk", outputs, w) + b - outputs = self.activation(outputs) + # Iterate over heterogeneities + eps_probabilities = tf.nn.softmax(all_u, axis=1) - return outputs + # Average probabilities over heterogeneities + probabilities = tf.reduce_mean(eps_probabilities, axis=-1) + # It is not in the paper, but let's normalize with availabilities + probabilities = tf.multiply(probabilities, contexts_items_availabilities) + probabilities = tf.divide( + probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5 + ) + if self.tol > 0: + probabilities = (1 - self.tol) * probabilities + self.tol * tf.ones_like( + probabilities + ) / probabilities.shape[-1] -class AssortmentUtilityDenseNetwork(tf.keras.layers.Layer): - """Dense Network that is applied to an assortment of items. + # Probabilities of selected products - We apply to the same network over several items and several heterogeneitites. - """ + # Negative Log-Likelihood + batch_nll = self.loss( + y_pred=probabilities, + y_true=tf.one_hot(choices, depth=probabilities.shape[1]), + sample_weight=sample_weight, + ) - def __init__(self, width, depth, activation="relu", add_last=True, **kwargs): - """Initialization of the layer. + grads = tape.gradient(batch_nll, self.weights) + self.optimizer.apply_gradients(zip(grads, self.weights)) + return batch_nll + + @tf.function + def batch_predict( + self, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, + sample_weight=None, + ): + """Function that represents one prediction (Probas + Loss) for one batch of a ChoiceDataset. + + Specific version for RUMnet because it is needed to average probabilities over + heterogeneities. Parameters ---------- - width : int - Nnumber of neurons of each dense layer. - depth : int - Number of dense layers. - activation : str, optional - Activation function for each layer, by default "relu" - add_last : bool, optional - Whether to add a final dense layer with 1 neuron, by default True + fixed_items_features : tuple of np.ndarray (n_items, n_features) + Items-Features: formatting from ChoiceDataset: a matrix representing the + products fixed features. + contexts_features : tuple of np.ndarray (n_contexts, n_features) + Contexts-Features: features varying with contexts, shared by all products + contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features) + Features varying with contexts and products + contexts_items_availabilities : np.ndarray (n_contexts, n_items) + Availabilities of items + choices : np.ndarray (n_contexts, ) + Choices + sample_weight : np.ndarray, optional + List samples weights to apply during the gradient descent to the batch elements, + by default None + + Returns: + -------- + tf.Tensor (1, ) + Value of NegativeLogLikelihood loss for the batch + tf.Tensor (batch_size, n_items) + Probabilities for each product to be chosen for each contexts """ - super().__init__(**kwargs) - self.width = width - self.depth = depth - self.activation = tf.keras.layers.Activation(activation) - self.add_last = add_last + utilities = self.compute_batch_utility( + fixed_items_features=fixed_items_features, + contexts_features=contexts_features, + contexts_items_features=contexts_items_features, + contexts_items_availabilities=contexts_items_availabilities, + choices=choices, + ) + probabilities = tf.nn.softmax(utilities, axis=1) + probabilities = tf.reduce_mean(probabilities, axis=-1) - def build(self, input_shape): - """Lazy build of the layer. + # Normalization with availabilties + probabilities = tf.multiply(probabilities, contexts_items_availabilities) + probabilities = tf.divide( + probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5 + ) + + batch_nll = self.loss( + y_pred=probabilities, + y_true=tf.one_hot(choices, depth=probabilities.shape[1]), + sample_weight=sample_weight, + ) + return batch_nll, probabilities - Follows tf.keras.Layer API. - Parameters - ---------- - input_shape : tuple - Shape of the input of the layer. - Typically (batch_size, num_items, width, heterogeneity). - """ - super().build(input_shape) +class CPURUMnet(PaperRUMnet): + """CPU-optimized Re-Implementation of the RUMnet model. - weights = [ - ( - self.add_weight( - shape=(input_shape[-2], self.width), - initializer="glorot_normal", - trainable=True, - ), - self.add_weight( - shape=(self.width, 1), - initializer="glorot_normal", - trainable=True, - ), - ) - ] - for i in range(self.depth - 1): - weights.append( - ( - self.add_weight( - shape=(self.width, self.width), - initializer="glorot_normal", - trainable=True, - ), - self.add_weight( - shape=(self.width, 1), - initializer="glorot_normal", - trainable=True, - ), - ) - ) - if self.add_last: - self.last = self.add_weight( - shape=(self.width, 1), initializer="glorot_normal", trainable=True - ) + This implementation handles in parallel the heterogeneities so that the training is faster. + """ - self.w = weights + def compute_batch_utility( + self, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, + ): + """Compute utility from a batch of ChoiceDataset. - def call(self, inputs): - """Predict of the layer. + Here we asssume that: item features = {fixed item features + contexts item features} + user features = {contexts features} Parameters ---------- - inputs : tf.Tensor, np.ndarray - Input Tensor of shape (batch_size, num_items, width, heterogeneity) + fixed_items_features : tuple of np.ndarray (n_items, n_features) + Items-Features: formatting from ChoiceDataset: a matrix representing the + products fixed features. + contexts_features : tuple of np.ndarray (n_contexts, n_features) + Contexts-Features: features varying with contexts, shared by all products + contexts_items_features :tuple of np.ndarray (n_contexts, n_items, n_features) + Features varying with contexts and products + contexts_items_availabilities : np.ndarray (n_contexts, n_items) + Availabilities of items + choices : np.ndarray (n_contexts, ) + Choices Returns: -------- - tf.Tensor - Utilities of shape (batch_size, num_items, heterogeneity) + np.ndarray + Utility of each product for each contexts. + Shape must be (n_contexts, n_items) """ - outputs = inputs + (_, _) = contexts_items_availabilities, choices + ### Restacking of the item features + stacked_fixed_items_features = tf.concat([*fixed_items_features], axis=-1) + stacked_contexts_features = tf.concat([*contexts_features], axis=-1) + stacked_contexts_items_features = tf.concat([*contexts_items_features], axis=-1) - for w, b in self.w: - # bs, items, features, heterogeneities - outputs = tf.einsum("ijlk, lm->ijmk", outputs, w) + b - outputs = self.activation(outputs) + full_item_features = tf.stack( + [stacked_fixed_items_features] * stacked_contexts_items_features.shape[0], axis=0 + ) + full_item_features = tf.concat( + [stacked_contexts_items_features, full_item_features], axis=-1 + ) - if self.add_last: - outputs = tf.einsum("ijlk, lm->ijmk", outputs, self.last) + ### Computation of utilities + utilities = [] + batch_size = stacked_contexts_features.shape[0] - return outputs + # Computation of the customer features embeddings + z_embeddings = self.z_model(stacked_contexts_features) + + # Iterate over items in assortment + for item_i in range(full_item_features.shape[1]): + # Computation of item features embeddings + x_embeddings = self.x_model(full_item_features[:, item_i, :]) + + stacked_heterogeneities = [] + # Computation of utilites from embeddings, iteration over heterogeneities + # eps_x * eps_z + for _x in x_embeddings: + for _z in z_embeddings: + full_embedding = tf.keras.layers.Concatenate()( + [full_item_features[:, item_i, :], _x, stacked_contexts_features, _z] + ) + stacked_heterogeneities.append(full_embedding) + item_utilities = self.u_model(tf.concat(stacked_heterogeneities, axis=0)) + item_utilities = tf.stack( + [ + item_utilities[batch_size * i : batch_size * (i + 1)] + for i in range(len(x_embeddings) * len(z_embeddings)) + ], + axis=1, + ) + utilities.append(item_utilities) + ### Reshape utilities: (batch_size, num_items, heterogeneity) + return tf.squeeze(tf.stack(utilities, axis=1), -1) class GPURUMnet(PaperRUMnet): @@ -777,7 +908,7 @@ def instantiate(self): + self.z_model.trainable_variables + self.u_model.trainable_variables ) - self.loss = CustomCategoricalCrossEntropy( + self.loss = tf_ops.CustomCategoricalCrossEntropy( from_logits=False, label_smoothing=self.label_smoothing ) self.time_dict = {} @@ -793,8 +924,8 @@ def compute_batch_utility( ): """Compute utility from a batch of ChoiceDataset. - Here we asssume that: item features = {fixed item features + session item features} - user features = {session features} + Here we asssume that: item features = {fixed item features + contexts item features} + user features = {contexts features} Parameters ---------- @@ -813,8 +944,8 @@ def compute_batch_utility( Returns: -------- np.ndarray - Utility of each product for each session. - Shape must be (n_sessions, n_items) + Utility of each product for each contexts. + Shape must be (n_contexts, n_items) """ (_, _) = contexts_items_availabilities, choices @@ -888,12 +1019,12 @@ def train_step( items_batch : tuple of np.ndarray (items_features) Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products constant features. - sessions_batch : tuple of np.ndarray (sessions_features) + contexts_batch : tuple of np.ndarray (contexts_features) Time-Features - sessions_items_batch : tuple of np.ndarray (sessions_items_features) + contexts_items_batch : tuple of np.ndarray (contexts_items_features) Time-Item-Features availabilities_batch : np.ndarray - Availabilities (sessions_items_availabilities) + Availabilities (contexts_items_availabilities) choices_batch : np.ndarray Choices sample_weight : np.ndarray, optional @@ -907,14 +1038,14 @@ def train_step( """ with tf.GradientTape() as tape: ### Computation of utilities - all_u = self.compute_batch_utility( + utilities = self.compute_batch_utility( fixed_items_features=fixed_items_features, contexts_features=contexts_features, contexts_items_features=contexts_items_features, contexts_items_availabilities=contexts_items_availabilities, choices=choices, ) - eps_probabilities = tf.nn.softmax(all_u, axis=2) + eps_probabilities = tf.nn.softmax(utilities, axis=2) # Average probabilities over heterogeneities probabilities = tf.reduce_mean(eps_probabilities, axis=1) @@ -923,8 +1054,10 @@ def train_step( probabilities = tf.divide( probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5 ) - # Probabilities of selected products - # chosen_probabilities = tf.gather_nd(indices=choices_nd, params=probabilities) + if self.tol > 0: + probabilities = (1 - self.tol) * probabilities + self.tol * tf.ones_like( + probabilities + ) / probabilities.shape[-1] # Negative Log-Likelihood batch_nll = self.loss( @@ -966,12 +1099,12 @@ def batch_predict( items_batch : tuple of np.ndarray (items_features) Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products constant features. - sessions_batch : tuple of np.ndarray (sessions_features) + contexts_batch : tuple of np.ndarray (contexts_features) Time-Features - sessions_items_batch : tuple of np.ndarray (sessions_items_features) + contexts_items_batch : tuple of np.ndarray (contexts_items_features) Time-Item-Features availabilities_batch : np.ndarray - Availabilities (sessions_items_availabilities) + Availabilities (contexts_items_availabilities) choices_batch : np.ndarray Choices sample_weight : np.ndarray, optional @@ -983,7 +1116,7 @@ def batch_predict( tf.Tensor (1, ) Value of NegativeLogLikelihood loss for the batch tf.Tensor (batch_size, n_items) - Probabilities for each product to be chosen for each session + Probabilities for each product to be chosen for each contexts """ utilities = self.compute_batch_utility( fixed_items_features=fixed_items_features, @@ -1006,136 +1139,3 @@ def batch_predict( sample_weight=sample_weight, ) return batch_loss, probabilities - - -def create_ff_network( - input_shape, depth, width, activation="elu", add_last=False, l2_regularization_coeff=0.0 -): - """Base function to create a simple fully connected (Dense) network. - - Parameters - ---------- - input_shape : tuple of int - shape of the input of the network. Typically (num_features, ) - depth : int - Number of dense/fully-connected of the network to create. - width : int - Neurons number for all dense layers. - add_last : bool, optional - Whether to add a Dense layer with a single output at the end, by default False - Typically to be used when creating the utility network, that outputs a single number: - the utility. - l2_regularization_coeff : float, optional - Regularization coefficient for Dense layers weights during training, by default 0.0 - - Returns: - -------- - tf.keras.Model - Dense Neural Network with tensorflow backend. - """ - input = tf.keras.layers.Input(shape=input_shape) - regularizer = tf.keras.regularizers.L2(l2_regularization_coeff) - out = input - for _ in range(depth): - out = tf.keras.layers.Dense( - width, activation=activation, kernel_regularizer=regularizer, use_bias=True - )(out) - if add_last: - out = tf.keras.layers.Dense(1, activation="linear", use_bias=False)(out) - return tf.keras.Model(inputs=input, outputs=out) - - -def recreate_official_nets( - num_products_features, - x_width, - x_depth, - x_eps, - num_customer_features, - z_width, - z_depth, - z_eps, - width_u, - depth_u, - l2_regularization_coeff=0.0, -): - """Function to create the three nets used in RUMnet: X_net, Z_net and U_net. - - Parameters - ---------- - num_products_features : int - Number of features each product will be described with. - In terms of ChoiceDataset it is the number of { items_features + sessions_items_features } - for one product. - num_customer_features : int - Number of features each customer will be described with. - In terms of ChoiceDataset it is the number of sessions_features. - width_eps_x : int - Number of neurons for each dense layer for the products encoding net. - depth_eps_x : int - Number of dense layers for the products encoding net. - heterogeneity_x : int - Number of nets of products features encoding. - width_eps_z : int - Number of neurons for each dense layer for the customers encoding net. - depth_eps_z : int - Number of dense layers for the customers encoding net. - heterogeneity_z : int - Number of nets of customers features encoding. - width_u : int - Number of neurons for each dense layer for the utility net. - depth_u : int - Number of dense layers for the utility net. - l2_regularization_coef : float, optional - Value of dense layers weights regulariation to apply during training, by default 0.0 - - Returns: - -------- - tf.keras.Model - Product features encoding network - tf.keras.Model - Customer features encoding network - tf.keras.Model - Features and encoding to utility computation network - """ - # Products and Customers embeddings nets, quiet symmetrical - products_input = tf.keras.layers.Input(shape=(num_products_features)) - customer_input = tf.keras.layers.Input(shape=(num_customer_features)) - x_embeddings = [] - z_embeddings = [] - - # Creating independant nets for each heterogeneity - for _ in range(x_eps): - x_embedding = create_ff_network( - input_shape=num_products_features, - depth=x_depth, - width=x_width, - l2_regularization_coeff=l2_regularization_coeff, - )(products_input) - x_embeddings.append(x_embedding) - - # Creating independant nets for each heterogeneity - for _ in range(z_eps): - z_embedding = create_ff_network( - input_shape=num_customer_features, - depth=z_depth, - width=z_width, - l2_regularization_coeff=l2_regularization_coeff, - )(customer_input) - - z_embeddings.append(z_embedding) - - x_net = tf.keras.Model(inputs=products_input, outputs=x_embeddings, name="X_embedding") - z_net = tf.keras.Model(inputs=customer_input, outputs=z_embeddings, name="Z_embedding") - - # Utility network - u_net = create_ff_network( - input_shape=( - x_width + z_width + num_products_features + num_customer_features - ), # Input shape from previous nets - width=width_u, - depth=depth_u, - add_last=True, # Add last for utility - l2_regularization_coeff=l2_regularization_coeff, - ) - - return x_net, z_net, u_net From a3b83fb69ef10b71b08fe7631c5b53fb99c067f5 Mon Sep 17 00:00:00 2001 From: VincentAuriau Date: Fri, 2 Feb 2024 16:18:02 +0100 Subject: [PATCH 05/10] ENH: moved tfp import inside report function --- choice_learn/models/conditional_mnl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/choice_learn/models/conditional_mnl.py b/choice_learn/models/conditional_mnl.py index 00733cf2..712f38fc 100644 --- a/choice_learn/models/conditional_mnl.py +++ b/choice_learn/models/conditional_mnl.py @@ -2,7 +2,6 @@ import pandas as pd import tensorflow as tf -import tensorflow_probability as tfp from .base_model import ChoiceModel @@ -1038,6 +1037,8 @@ def compute_report(self, dataset): pandas.DataFrame A DF with estimation, Std Err, z_value and p_value for each coefficient. """ + import tensorflow_probability as tfp + weights_std = self.get_weights_std(dataset) dist = tfp.distributions.Normal(loc=0.0, scale=1.0) From 8894dd216e1ab9b9e109db52069c0a7a5a22cd68 Mon Sep 17 00:00:00 2001 From: VincentAuriau Date: Fri, 2 Feb 2024 16:18:14 +0100 Subject: [PATCH 06/10] ADD: requirements.txt --- requirements.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index d23458ee..3c9de3ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ --e . +numpy==1.24.3 pandas==1.5.3 -numpy==1.24.2 +tensorflow==2.13.0 +tensorflow_probability==0.20.1 +tqdm==4.65.0 From 36e2755e5a748421ff184367d58c214789f2982e Mon Sep 17 00:00:00 2001 From: VincentAuriau Date: Fri, 2 Feb 2024 16:22:21 +0100 Subject: [PATCH 07/10] ADD: tfp as requirement in README --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 76d0c248..976714b8 100644 --- a/README.md +++ b/README.md @@ -90,8 +90,11 @@ git clone git@github.com:artefactory/choice-learn-private.git Choice-Learn requires the following: - Python (>=3.8) - NumPy (>=1.24) -- TensorFlow (>=2.13) - pandas (>=1.5) +For modelling you need: +- TensorFlow (>=2.13) +Finally, an optional requirement used for specific functions is: +- tensorflow_probability (>=0.20.1) ## Usage ```python From 5109415b3fee6195f7be5af15b5ccbc294493fcd Mon Sep 17 00:00:00 2001 From: VincentAuriau Date: Fri, 2 Feb 2024 17:07:45 +0100 Subject: [PATCH 08/10] ENH: match latest signature --- choice_learn/models/base_model.py | 8 +- choice_learn/models/conditional_mnl.py | 184 ++++++++++++++----------- 2 files changed, 106 insertions(+), 86 deletions(-) diff --git a/choice_learn/models/base_model.py b/choice_learn/models/base_model.py index 488e32cc..4808324e 100644 --- a/choice_learn/models/base_model.py +++ b/choice_learn/models/base_model.py @@ -415,7 +415,7 @@ def batch_predict( Probabilities for each product to be chosen for each context """ # Compute utilities from features - utilities = self.compute_utility( + utilities = self.compute_batch_utility( fixed_items_features, contexts_features, contexts_items_features, @@ -659,7 +659,7 @@ def f(params_1d): f.history = [] return f - def _fit_with_lbfgs(self, dataset, n_epochs, tolerance=1e-8): + def _fit_with_lbfgs(self, dataset, epochs=None, tolerance=1e-8): """Fit function for L-BFGS optimizer. Replaces the .fit method when the optimizer is set to L-BFGS. @@ -682,6 +682,8 @@ def _fit_with_lbfgs(self, dataset, n_epochs, tolerance=1e-8): # dependency import tensorflow_probability as tfp + if epochs is None: + epochs = self.epochs func = self._lbfgs_train_step(dataset) # convert initial model parameters to a 1D tf.Tensor @@ -691,7 +693,7 @@ def _fit_with_lbfgs(self, dataset, n_epochs, tolerance=1e-8): results = tfp.optimizer.lbfgs_minimize( value_and_gradients_function=func, initial_position=init_params, - max_iterations=n_epochs, + max_iterations=epochs, tolerance=tolerance, f_absolute_tolerance=-1, f_relative_tolerance=-1, diff --git a/choice_learn/models/conditional_mnl.py b/choice_learn/models/conditional_mnl.py index 712f38fc..5a95b60c 100644 --- a/choice_learn/models/conditional_mnl.py +++ b/choice_learn/models/conditional_mnl.py @@ -338,35 +338,35 @@ def _store_dataset_features_names(self, dataset): self._contexts_features_names = dataset.contexts_features_names self._contexts_items_features_names = dataset.contexts_items_features_names - def compute_utility_from_specification( + def compute_batch_utility_from_specification( self, - items_batch, - contexts_batch, - contexts_items_batch, - availabilities_batch, - choices_batch, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, verbose=0, ): """Computes the utility when the model is constructed from a ModelSpecification object. Parameters ---------- - tems_batch : tuple of np.ndarray (items_features) + fixed_items_features : tuple of np.ndarray Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products constant/fixed features. Shape must be (n_items, n_items_features) - contexts_batch : tuple of np.ndarray (contexts_features) - Time-Features - Shape must be (n_choices, n_contexts_features) - contexts_items_batch : tuple of np.ndarray (contexts_items_features) - Time-Item-Features - Shape must be (n_choices, n_contexts_items_features) - availabilities_batch : np.ndarray - Availabilities (contexts_items_availabilities) - Shape must be (n_choices, n_items) + contexts_features : tuple of np.ndarray (contexts_features) + a batch of contexts features + Shape must be (n_contexts, n_contexts_features) + contexts_items_features : tuple of np.ndarray (contexts_items_features) + a batch of contexts items features + Shape must be (n_contexts, n_contexts_items_features) + contexts_items_availabilities : np.ndarray + A batch of contexts items availabilities + Shape must be (n_contexts, n_items) choices_batch : np.ndarray Choices - Shape must be (n_choices, ) + Shape must be (n_contexts, ) verbose : int, optional Parametrization of the logging outputs, by default 0 @@ -375,10 +375,10 @@ def compute_utility_from_specification( tf.Tensor Utilities corresponding of shape (n_choices, n_items) """ - _ = choices_batch + _ = choices - num_items = availabilities_batch.shape[1] - num_choices = availabilities_batch.shape[0] + num_items = contexts_items_availabilities.shape[1] + num_choices = contexts_items_availabilities.shape[0] contexts_items_utilities = [] # Items features if self._items_features_names is not None: @@ -397,7 +397,7 @@ def compute_utility_from_specification( [ s_i_u[:k], tf.multiply( - items_batch[i][k, j], + fixed_items_features[i][k, j], self.weights[weight_index][:, q], ), s_i_u[k + 1 :], @@ -409,7 +409,7 @@ def compute_utility_from_specification( [ s_i_u[:idx], tf.multiply( - items_batch[i][idx, j], + fixed_items_features[i][idx, j], self.weights[weight_index][:, q], ), s_i_u[idx + 1 :], @@ -458,9 +458,9 @@ def compute_utility_from_specification( axis=1, ) """ - contexts_batch[i][:, j] + contexts_features[i][:, j] compute = tf.multiply( - contexts_batch[i][:, j], + contexts_features[i][:, j], self.weights[weight_index][:, q], ) s_i_u[k] += compute @@ -482,7 +482,7 @@ def compute_utility_from_specification( ) """ compute = tf.multiply( - contexts_batch[i][:, j], self.weights[weight_index][:, q] + contexts_features[i][:, j], self.weights[weight_index][:, q] ) s_i_u[idx] += compute @@ -516,7 +516,7 @@ def compute_utility_from_specification( s_i_u[:, :k], tf.expand_dims( tf.multiply( - contexts_items_batch[i][:, k, j], + contexts_items_features[i][:, k, j], self.weights[weight_index][:, q], ), axis=-1, @@ -531,7 +531,7 @@ def compute_utility_from_specification( s_i_u[:, :idx], tf.expand_dims( tf.multiply( - contexts_items_batch[i][:, idx, j], + contexts_items_features[i][:, idx, j], self.weights[weight_index][:, q], ), axis=-1, @@ -785,28 +785,34 @@ def instantiate( raise ValueError("No weights instantiated") return weights - def compute_utility( - self, items_batch, contexts_batch, contexts_items_batch, availabilities_batch, choices_batch + def compute_batch_utility( + self, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, ): """Main method to compute the utility of the model. Selects the right method to compute. Parameters ---------- - items_batch : tuple of np.ndarray (items_features) + fixed_items_features : tuple of np.ndarray Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products constant/fixed features. Shape must be (n_items, n_items_features) - contexts_batch : tuple of np.ndarray (contexts_features) - Time-Features - Shape must be (n_choices, n_contexts_features) - contexts_items_batch : tuple of np.ndarray (contexts_items_features) - Time-Item-Features - Shape must be (n_choices, n_contexts_items_features) - availabilities_batch : np.ndarray - Availabilities (contexts_items_availabilities) - Shape must be (n_choices, n_items) + contexts_features : tuple of np.ndarray (contexts_features) + a batch of contexts features + Shape must be (n_contexts, n_contexts_features) + contexts_items_features : tuple of np.ndarray (contexts_items_features) + a batch of contexts items features + Shape must be (n_contexts, n_contexts_items_features) + contexts_items_availabilities : np.ndarray + A batch of contexts items availabilities + Shape must be (n_contexts, n_items) choices_batch : np.ndarray - Choices Shape must be (n_choices, ) + Choices + Shape must be (n_contexts, ) Returns: -------- @@ -814,44 +820,49 @@ def compute_utility( Computed utilities of shape (n_choices, n_items). """ if isinstance(self.params, ModelSpecification): - return self.compute_utility_from_specification( - items_batch, - contexts_batch, - contexts_items_batch, - availabilities_batch, - choices_batch, + return self.compute_batch_utility_from_specification( + fixed_items_features=fixed_items_features, + contexts_features=contexts_features, + contexts_items_features=contexts_items_features, + contexts_items_availabilities=contexts_items_availabilities, + choices=choices, ) - return self.compute_utility_from_dict( - items_batch, - contexts_batch, - contexts_items_batch, - availabilities_batch, - choices_batch, + return self.compute_batch_utility_from_dict( + fixed_items_features=fixed_items_features, + contexts_features=contexts_features, + contexts_items_features=contexts_items_features, + contexts_items_availabilities=contexts_items_availabilities, + choices=choices, ) - def compute_utility_from_dict( - self, items_batch, contexts_batch, contexts_items_batch, availabilities_batch, choices_batch + def compute_batch_utility_from_dict( + self, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, ): """Computes the utility when the model is constructed from a dictionnary object. Parameters ---------- - items_batch : tuple of np.ndarray (items_features) + fixed_items_features : tuple of np.ndarray Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products constant/fixed features. Shape must be (n_items, n_items_features) - contexts_batch : tuple of np.ndarray (contexts_features) - Time-Features - Shape must be (n_choices, n_contexts_features) - contexts_items_batch : tuple of np.ndarray (contexts_items_features) - Time-Item-Features - Shape must be (n_choices, n_contexts_items_features) - availabilities_batch : np.ndarray - Availabilities (contexts_items_availabilities) - Shape must be (n_choices, n_items) + contexts_features : tuple of np.ndarray (contexts_features) + a batch of contexts features + Shape must be (n_contexts, n_contexts_features) + contexts_items_features : tuple of np.ndarray (contexts_items_features) + a batch of contexts items features + Shape must be (n_contexts, n_contexts_items_features) + contexts_items_availabilities : np.ndarray + A batch of contexts items availabilities + Shape must be (n_contexts, n_items) choices_batch : np.ndarray Choices - Shape must be (n_choices, ) + Shape must be (n_contexts, ) verbose : int, optional Parametrization of the logging outputs, by default 0 @@ -860,14 +871,14 @@ def compute_utility_from_dict( tf.Tensor Utilities corresponding of shape (n_choices, n_items) """ - _, _ = availabilities_batch, choices_batch + _ = choices contexts_items_utilities = [] - if items_batch is not None: - num_items = items_batch[0].shape[0] + if fixed_items_features is not None: + num_items = fixed_items_features[0].shape[0] else: - num_items = contexts_items_batch[0].shape[1] - num_choices = availabilities_batch.shape[0] + num_items = contexts_items_features[0].shape[1] + num_choices = contexts_items_availabilities.shape[0] # Items features for i, feat_tuple in enumerate(self._items_features_names): @@ -876,16 +887,19 @@ def compute_utility_from_dict( weight = self.weights[k] if self.params[feat] == "constant": s_i_u = tf.concat( - [tf.multiply(items_batch[i][:, j], weight)] * num_choices, axis=0 + [tf.multiply(fixed_items_features[i][:, j], weight)] * num_choices, + axis=0, ) elif self.params[feat] == "item": weight = tf.concat([tf.constant([[0.0]]), weight], axis=-1) s_i_u = tf.concat( - [tf.multiply(items_batch[i][:, j], weight)] * num_choices, axis=0 + [tf.multiply(fixed_items_features[i][:, j], weight)] * num_choices, + axis=0, ) elif self.params[feat] == "item-full": s_i_u = tf.concat( - [tf.multiply(items_batch[i][:, j], weight)] * num_choices, axis=0 + [tf.multiply(fixed_items_features[i][:, j], weight)] * num_choices, + axis=0, ) else: raise NotImplementedError(f"Param {self.params[feat]} not implemented") @@ -903,13 +917,13 @@ def compute_utility_from_dict( weight = self.weights[k] if self.params[feat] == "constant": s_i_u = tf.concat( - [tf.multiply(contexts_batch[i][j], weight)] * num_items, axis=-1 + [tf.multiply(contexts_features[i][j], weight)] * num_items, axis=-1 ) elif self.params[feat] == "item": weight = tf.concat([tf.constant([[0.0]]), weight], axis=-1) - s_i_u = tf.tensordot(contexts_batch[i][:, j : j + 1], weight, axes=1) + s_i_u = tf.tensordot(contexts_features[i][:, j : j + 1], weight, axes=1) elif self.params[feat] == "item-full": - s_i_u = tf.tensordot(contexts_batch[i][:, j : j + 1], weight, axes=1) + s_i_u = tf.tensordot(contexts_features[i][:, j : j + 1], weight, axes=1) else: raise NotImplementedError(f"Param {self.params[feat]} not implemented") contexts_items_utilities.append(s_i_u) @@ -925,12 +939,12 @@ def compute_utility_from_dict( if feat in self.params.keys(): weight = self.weights[k] if self.params[feat] == "constant": - s_i_u = tf.multiply(contexts_items_batch[i][:, :, j], weight) + s_i_u = tf.multiply(contexts_items_features[i][:, :, j], weight) elif self.params[feat] == "item": weight = tf.concat([tf.constant([[0.0]]), weight], axis=-1) - s_i_u = tf.multiply(contexts_items_batch[i][:, :, j], weight) + s_i_u = tf.multiply(contexts_items_features[i][:, :, j], weight) elif self.params[feat] == "item-full": - s_i_u = tf.multiply(contexts_items_batch[i][:, :, j], weight) + s_i_u = tf.multiply(contexts_items_features[i][:, :, j], weight) else: raise NotImplementedError(f"Param {self.params[feat]} not implemented") contexts_items_utilities.append(s_i_u) @@ -987,7 +1001,7 @@ def fit(self, choice_dataset, get_report=False, **kwargs): self.report = self.compute_report(choice_dataset) return fit - def _fit_with_lbfgs(self, choice_dataset, n_epochs, tolerance=1e-8, get_report=False): + def _fit_with_lbfgs(self, choice_dataset, epochs=None, tolerance=1e-8, get_report=False): """Specific fit function to estimate the paramters with LBFGS. Parameters @@ -1018,7 +1032,9 @@ def _fit_with_lbfgs(self, choice_dataset, n_epochs, tolerance=1e-8, get_report=F contexts_items_features_names=choice_dataset.contexts_items_features_names, ) self.instantiated = True - fit = super()._fit_with_lbfgs(choice_dataset, n_epochs, tolerance) + if epochs is None: + epochs = self.epochs + fit = super()._fit_with_lbfgs(choice_dataset, epochs, tolerance) if get_report: self.report = self.compute_report(choice_dataset) return fit @@ -1093,7 +1109,7 @@ def get_weights_std(self, dataset): index += _w.shape[1] model.weights = mw for batch in dataset.iter_batch(batch_size=-1): - utilities = model.compute_utility(*batch) + utilities = model.compute_batch_utility(*batch) probabilities = tf.nn.softmax(utilities, axis=-1) loss = tf.keras.losses.CategoricalCrossentropy(reduction="sum")( y_pred=probabilities, @@ -1103,7 +1119,9 @@ def get_weights_std(self, dataset): jacobian = tape_2.jacobian(loss, w) # Compute the Hessian from the Jacobian hessian = tape_1.batch_jacobian(jacobian, w) - return tf.sqrt([tf.linalg.inv(tf.squeeze(hessian))[i][i] for i in range(len(w))]) + return tf.sqrt( + [tf.linalg.inv(tf.squeeze(hessian))[i][i] for i in range(len(tf.squeeze(hessian)))] + ) def clone(self): """Returns a clone of the model.""" From dadf5f4972d1acea808c2ef881c5de5bce577755 Mon Sep 17 00:00:00 2001 From: VincentAuriau Date: Fri, 2 Feb 2024 17:08:25 +0100 Subject: [PATCH 09/10] ENH: latest signature in example --- README.md | 2 +- .../choice_learn_introduction_clogit.ipynb | 36 +++++++++---------- notebooks/custom_model.ipynb | 16 ++++----- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 976714b8..9481432a 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ Choice-Learn requires the following: - pandas (>=1.5) For modelling you need: - TensorFlow (>=2.13) -Finally, an optional requirement used for specific functions is: +Finally, an optional requirement used for report and LBFG-s use is: - tensorflow_probability (>=0.20.1) ## Usage diff --git a/notebooks/choice_learn_introduction_clogit.ipynb b/notebooks/choice_learn_introduction_clogit.ipynb index 51886a9d..0dcc47a5 100644 --- a/notebooks/choice_learn_introduction_clogit.ipynb +++ b/notebooks/choice_learn_introduction_clogit.ipynb @@ -174,7 +174,7 @@ "metadata": {}, "outputs": [], "source": [ - "history = model.fit(dataset, n_epochs=1000)" + "history = model.fit(dataset, epochs=1000, get_report=True)" ] }, { @@ -447,7 +447,7 @@ "metadata": {}, "outputs": [], "source": [ - "history = cmnl.fit(dataset, n_epochs=1000)\n", + "history = cmnl.fit(dataset, epochs=1000)\n", "print(cmnl.weights)" ] }, @@ -469,7 +469,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 1/1 [00:02<00:00, 2.46s/it]" + "100%|██████████| 1/1 [00:02<00:00, 2.41s/it]" ] }, { @@ -499,8 +499,8 @@ " tf.constant([[0.0595089, -0.00678188, -0.00645982, -0.00145029]]),\n", " tf.constant([[0.697311, 1.8437, 3.27381]]),\n", "]\n", - "gt_model = ConditionalMNL(parameters=params, lr=0.01)\n", - "gt_model.fit(dataset, n_epochs=1, batch_size=-1)\n", + "gt_model = ConditionalMNL(parameters=params, lr=0.01, epochs=1, batch_size=-1)\n", + "gt_model.fit(dataset)\n", "\n", "# Here we estimate the negative log-likelihood with these coefficients (also, we obtain same value as in those papers):\n", "gt_model.weights = gt_weights\n", @@ -565,11 +565,11 @@ "outputs": [], "source": [ "cmnl = ConditionalMNL(parameters=params, optimizer=\"Adam\")\n", - "history = cmnl.fit(dataset, n_epochs=2000, batch_size=-1)\n", + "history = cmnl.fit(dataset, epochs=2000, batch_size=-1)\n", "cmnl.optimizer.lr = cmnl.optimizer.lr / 5\n", - "history2 = cmnl.fit(dataset, n_epochs=4000, batch_size=-1)\n", + "history2 = cmnl.fit(dataset, epochs=4000, batch_size=-1)\n", "cmnl.optimizer.lr = cmnl.optimizer.lr / 10\n", - "history3 = cmnl.fit(dataset, n_epochs=20000, batch_size=-1)" + "history3 = cmnl.fit(dataset, epochs=20000, batch_size=-1)" ] }, { @@ -582,14 +582,14 @@ { "data": { "text/plain": [ - "[,\n", - " ,\n", - " ,\n", - " ,\n", + "[,\n", + " ,\n", + " ,\n", + " ,\n", " ,\n", - " ]" + " ]" ] }, "execution_count": null, @@ -611,7 +611,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": null, @@ -653,7 +653,7 @@ " \"intercept\": \"item\"}\n", "\n", "# Instantiation of the model\n", - "cmnl = ConditionalMNL(parameters=params, optimizer=\"lbfgs\")" + "cmnl = ConditionalMNL(parameters=params, optimizer=\"lbfgs\", epochs=1000)" ] }, { @@ -678,7 +678,7 @@ } ], "source": [ - "history = cmnl.fit(dataset, n_epochs=1000)\n", + "history = cmnl.fit(dataset)\n", "print(cmnl.weights)" ] }, @@ -821,7 +821,7 @@ "metadata": {}, "outputs": [], "source": [ - "history = swiss_model.fit(swiss_dataset, n_epochs=10000)" + "history = swiss_model.fit(swiss_dataset, epochs=10000)" ] }, { diff --git a/notebooks/custom_model.ipynb b/notebooks/custom_model.ipynb index 2e929611..9440ef69 100644 --- a/notebooks/custom_model.ipynb +++ b/notebooks/custom_model.ipynb @@ -70,17 +70,17 @@ "transport_df.income = transport_df.income.astype(\"float32\")\n", "\n", "dataset = ChoiceDataset.from_single_df(df=transport_df,\n", - " items_features_columns=[\"oh_air\",\n", + " fixed_items_features_columns=[\"oh_air\",\n", " \"oh_bus\",\n", " \"oh_car\",\n", " \"oh_train\"],\n", - " sessions_features_columns=[\"income\"],\n", - " sessions_items_features_columns=[\"cost\",\n", + " contexts_features_columns=[\"income\"],\n", + " contexts_items_features_columns=[\"cost\",\n", " \"freq\",\n", " \"ovt\",\n", " \"ivt\"],\n", " items_id_column=\"alt\",\n", - " sessions_id_column=\"case\",\n", + " contexts_id_column=\"case\",\n", " choices_column=\"choice\",\n", " choice_mode=\"one_zero\")" ] @@ -93,7 +93,7 @@ "\n", "For our custom model to work, we need to specify:\n", "- Weights initialization in __init__()\n", - "- the utility function in compute_utility()" + "- the utility function in compute_batch_utility()" ] }, { @@ -179,7 +179,7 @@ " self.weights = [beta_inter, beta_freq_cost_ovt, beta_income, beta_ivt]\n", "\n", "\n", - " def compute_utility(self,\n", + " def compute_batch_utility(self,\n", " items_batch,\n", " sessions_batch,\n", " sessions_items_batch,\n", @@ -249,7 +249,7 @@ "outputs": [], "source": [ "model = CustomCanadaConditionalMNL(optimizer=\"lbfgs\")\n", - "history = model.fit(dataset, n_epochs=400)" + "history = model.fit(dataset, epochs=400)" ] }, { @@ -427,7 +427,7 @@ " # Easy with TensorFlow.Layer\n", " self.weights = self.dense_1.trainable_variables + self.dense_2.trainable_variables\n", " \n", - " def compute_utility(self,\n", + " def compute_batch_utility(self,\n", " items_batch,\n", " sessions_batch,\n", " sessions_items_batch,\n", From 68a4ebfbf31c4a641f3311f30bee95cd0d7e2203 Mon Sep 17 00:00:00 2001 From: VincentAuriau Date: Fri, 2 Feb 2024 17:16:51 +0100 Subject: [PATCH 10/10] FIX: add local in requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 3c9de3ac..07c7368e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +-e . numpy==1.24.3 pandas==1.5.3 tensorflow==2.13.0