Skip to content

Commit

Permalink
Add lc endpoint: predict_modelwise_probas (#206)
Browse files Browse the repository at this point in the history
  • Loading branch information
VincentAuriau authored Dec 27, 2024
1 parent cda484a commit b2bcc5e
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 44 deletions.
85 changes: 44 additions & 41 deletions choice_learn/models/latent_class_base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def compute_batch_utility(
utilities.append(model_utilities)
return utilities

def fit(self, choice_dataset, sample_weight=None, verbose=0):
def fit(self, choice_dataset, sample_weight=None, val_dataset=None, verbose=0):
"""Fit the model on a ChoiceDataset.
Parameters
Expand All @@ -245,6 +245,8 @@ def fit(self, choice_dataset, sample_weight=None, verbose=0):
Dataset to be used for coefficients estimations
sample_weight : np.ndarray, optional
sample weights to apply, by default None
val_dataset: ChoiceDataset
Validation dataset for MLE Gradient Descent Optimization
verbose : int, optional
print level, for debugging, by default 0
Expand Down Expand Up @@ -277,7 +279,10 @@ def fit(self, choice_dataset, sample_weight=None, verbose=0):
self.optimizer = tf.keras.optimizers.Adam(self.lr)

return self._fit_with_gd(
choice_dataset=choice_dataset, sample_weight=sample_weight, verbose=verbose
choice_dataset=choice_dataset,
sample_weight=sample_weight,
verbose=verbose,
val_dataset=val_dataset,
)

raise ValueError(f"Fit method not implemented: {self.fit_method}")
Expand Down Expand Up @@ -762,45 +767,6 @@ def _fit_with_gd(
# self.callbacks.on_train_end(logs=temps_logs)
return losses_history

def _nothing(self, inputs):
"""_summary_.
Parameters
----------
inputs : _type_
_description_
Returns
-------
_type_
_description_
"""
latent_probas = tf.clip_by_value(
self.latent_logits - tf.reduce_max(self.latent_logits), self.minf, 0
)
latent_probas = tf.math.exp(latent_probas)
# latent_probas = tf.math.abs(self.logit_latent_probas) # alternative implementation
latent_probas = latent_probas / tf.reduce_sum(latent_probas)
proba_list = []
avail = inputs[4]
for q in range(self.n_latent_classes):
combined = self.models[q].compute_batch_utility(*inputs)
combined = tf.clip_by_value(
combined - tf.reduce_max(combined, axis=1, keepdims=True), self.minf, 0
)
combined = tf.keras.layers.Activation(activation=tf.nn.softmax)(combined)
# combined = tf.keras.layers.Softmax()(combined)
combined = combined * avail
combined = latent_probas[q] * tf.math.divide(
combined, tf.reduce_sum(combined, axis=1, keepdims=True)
)
combined = tf.expand_dims(combined, -1)
proba_list.append(combined)
# print(combined.get_shape()) # it is useful to print the shape of tensors for debugging

proba_final = tf.keras.layers.Concatenate(axis=2)(proba_list)
return tf.math.reduce_sum(proba_final, axis=2, keepdims=False)

def _expectation(self, choice_dataset):
predicted_probas = [model.predict_probas(choice_dataset) for model in self.models]
latent_probabilities = self.get_latent_classes_weights()
Expand Down Expand Up @@ -936,6 +902,43 @@ def predict_probas(self, choice_dataset, batch_size=-1):

return tf.concat(stacked_probabilities, axis=0)

def predict_modelwise_probas(self, choice_dataset, batch_size=-1):
"""Predicts the choice probabilities for each choice and each product of a ChoiceDataset.
Stacks each model probability.
Parameters
----------
choice_dataset : ChoiceDataset
Dataset on which to apply to prediction
batch_size : int, optional
Batch size to use for the prediction, by default -1
Returns
-------
np.ndarray (n_choices, n_items)
Choice probabilties for each choice and each product
"""
modelwise_probabilities = []
for model in self.models:
stacked_probabilities = []
for (
shared_features,
items_features,
available_items,
choices,
) in choice_dataset.iter_batch(batch_size=batch_size):
_, probabilities = model.batch_predict(
shared_features_by_choice=shared_features,
items_features_by_choice=items_features,
available_items_by_choice=available_items,
choices=choices,
)
stacked_probabilities.append(probabilities)
modelwise_probabilities.append(tf.concat(stacked_probabilities, axis=0))

return tf.stack(modelwise_probabilities, axis=0)

def get_latent_classes_weights(self):
"""Return the latent classes weights / probabilities from logits.
Expand Down
26 changes: 25 additions & 1 deletion tests/integration_tests/models/test_latent_class.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Tests basic stuff for the latent class models."""

import numpy as np
import tensorflow as tf

tf.config.run_functions_eagerly(True)
Expand All @@ -23,6 +24,11 @@ def test_latent_simple_mnl():
_, _ = lc_model.fit(elec_dataset)
lc_model.compute_report(elec_dataset)

probas = lc_model.predict_modelwise_probas(elec_dataset)
assert probas.shape == (2, len(elec_dataset), 4)
probas = lc_model.predict_probas(elec_dataset)
assert probas.shape == (len(elec_dataset), 4)

assert lc_model.evaluate(elec_dataset).numpy() < 1.15


Expand Down Expand Up @@ -83,5 +89,23 @@ def test_manual_lc_gd():
)
manual_lc.instantiate(n_items=4, n_shared_features=0, n_items_features=6)
nll_before = manual_lc.evaluate(elec_dataset)
_ = manual_lc.fit(elec_dataset)
_ = manual_lc.fit(
elec_dataset, sample_weight=np.ones(len(elec_dataset)), val_dataset=elec_dataset[-10:]
)
assert manual_lc.evaluate(elec_dataset) < nll_before


def test_em_fit():
"""Test EM algorithm to estimate Latent Class Model."""
lc_model_em = LatentClassSimpleMNL(
n_latent_classes=3, fit_method="EM", optimizer="lbfgs", epochs=15, lbfgs_tolerance=1e-6
)
lc_model_em.instantiate(
n_items=elec_dataset.get_n_items(),
n_shared_features=elec_dataset.get_n_shared_features(),
n_items_features=elec_dataset.get_n_items_features(),
)
nll_b = lc_model_em.evaluate(elec_dataset)
_, _ = lc_model_em.fit(elec_dataset, verbose=0)
nll_a = lc_model_em.evaluate(elec_dataset)
assert nll_a < nll_b
4 changes: 2 additions & 2 deletions tests/unit_tests/models/test_rumnet_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,8 @@ def test_gpu_rumnet():
depth_u=3,
tol=1e-5,
optimizer="adam",
lr=0.01,
epochs=5,
lr=0.001,
epochs=10,
)
model.instantiate()
assert model.batch_predict(
Expand Down

0 comments on commit b2bcc5e

Please sign in to comment.