diff --git a/choice_learn/models/latent_class_base_model.py b/choice_learn/models/latent_class_base_model.py index 0c11645d..339ceec8 100644 --- a/choice_learn/models/latent_class_base_model.py +++ b/choice_learn/models/latent_class_base_model.py @@ -236,7 +236,7 @@ def compute_batch_utility( utilities.append(model_utilities) return utilities - def fit(self, choice_dataset, sample_weight=None, verbose=0): + def fit(self, choice_dataset, sample_weight=None, val_dataset=None, verbose=0): """Fit the model on a ChoiceDataset. Parameters @@ -245,6 +245,8 @@ def fit(self, choice_dataset, sample_weight=None, verbose=0): Dataset to be used for coefficients estimations sample_weight : np.ndarray, optional sample weights to apply, by default None + val_dataset: ChoiceDataset + Validation dataset for MLE Gradient Descent Optimization verbose : int, optional print level, for debugging, by default 0 @@ -277,7 +279,10 @@ def fit(self, choice_dataset, sample_weight=None, verbose=0): self.optimizer = tf.keras.optimizers.Adam(self.lr) return self._fit_with_gd( - choice_dataset=choice_dataset, sample_weight=sample_weight, verbose=verbose + choice_dataset=choice_dataset, + sample_weight=sample_weight, + verbose=verbose, + val_dataset=val_dataset, ) raise ValueError(f"Fit method not implemented: {self.fit_method}") @@ -762,45 +767,6 @@ def _fit_with_gd( # self.callbacks.on_train_end(logs=temps_logs) return losses_history - def _nothing(self, inputs): - """_summary_. - - Parameters - ---------- - inputs : _type_ - _description_ - - Returns - ------- - _type_ - _description_ - """ - latent_probas = tf.clip_by_value( - self.latent_logits - tf.reduce_max(self.latent_logits), self.minf, 0 - ) - latent_probas = tf.math.exp(latent_probas) - # latent_probas = tf.math.abs(self.logit_latent_probas) # alternative implementation - latent_probas = latent_probas / tf.reduce_sum(latent_probas) - proba_list = [] - avail = inputs[4] - for q in range(self.n_latent_classes): - combined = self.models[q].compute_batch_utility(*inputs) - combined = tf.clip_by_value( - combined - tf.reduce_max(combined, axis=1, keepdims=True), self.minf, 0 - ) - combined = tf.keras.layers.Activation(activation=tf.nn.softmax)(combined) - # combined = tf.keras.layers.Softmax()(combined) - combined = combined * avail - combined = latent_probas[q] * tf.math.divide( - combined, tf.reduce_sum(combined, axis=1, keepdims=True) - ) - combined = tf.expand_dims(combined, -1) - proba_list.append(combined) - # print(combined.get_shape()) # it is useful to print the shape of tensors for debugging - - proba_final = tf.keras.layers.Concatenate(axis=2)(proba_list) - return tf.math.reduce_sum(proba_final, axis=2, keepdims=False) - def _expectation(self, choice_dataset): predicted_probas = [model.predict_probas(choice_dataset) for model in self.models] latent_probabilities = self.get_latent_classes_weights() @@ -936,6 +902,43 @@ def predict_probas(self, choice_dataset, batch_size=-1): return tf.concat(stacked_probabilities, axis=0) + def predict_modelwise_probas(self, choice_dataset, batch_size=-1): + """Predicts the choice probabilities for each choice and each product of a ChoiceDataset. + + Stacks each model probability. + + Parameters + ---------- + choice_dataset : ChoiceDataset + Dataset on which to apply to prediction + batch_size : int, optional + Batch size to use for the prediction, by default -1 + + Returns + ------- + np.ndarray (n_choices, n_items) + Choice probabilties for each choice and each product + """ + modelwise_probabilities = [] + for model in self.models: + stacked_probabilities = [] + for ( + shared_features, + items_features, + available_items, + choices, + ) in choice_dataset.iter_batch(batch_size=batch_size): + _, probabilities = model.batch_predict( + shared_features_by_choice=shared_features, + items_features_by_choice=items_features, + available_items_by_choice=available_items, + choices=choices, + ) + stacked_probabilities.append(probabilities) + modelwise_probabilities.append(tf.concat(stacked_probabilities, axis=0)) + + return tf.stack(modelwise_probabilities, axis=0) + def get_latent_classes_weights(self): """Return the latent classes weights / probabilities from logits. diff --git a/tests/integration_tests/models/test_latent_class.py b/tests/integration_tests/models/test_latent_class.py index 1c49cf63..854e73f9 100644 --- a/tests/integration_tests/models/test_latent_class.py +++ b/tests/integration_tests/models/test_latent_class.py @@ -1,5 +1,6 @@ """Tests basic stuff for the latent class models.""" +import numpy as np import tensorflow as tf tf.config.run_functions_eagerly(True) @@ -23,6 +24,11 @@ def test_latent_simple_mnl(): _, _ = lc_model.fit(elec_dataset) lc_model.compute_report(elec_dataset) + probas = lc_model.predict_modelwise_probas(elec_dataset) + assert probas.shape == (2, len(elec_dataset), 4) + probas = lc_model.predict_probas(elec_dataset) + assert probas.shape == (len(elec_dataset), 4) + assert lc_model.evaluate(elec_dataset).numpy() < 1.15 @@ -83,5 +89,23 @@ def test_manual_lc_gd(): ) manual_lc.instantiate(n_items=4, n_shared_features=0, n_items_features=6) nll_before = manual_lc.evaluate(elec_dataset) - _ = manual_lc.fit(elec_dataset) + _ = manual_lc.fit( + elec_dataset, sample_weight=np.ones(len(elec_dataset)), val_dataset=elec_dataset[-10:] + ) assert manual_lc.evaluate(elec_dataset) < nll_before + + +def test_em_fit(): + """Test EM algorithm to estimate Latent Class Model.""" + lc_model_em = LatentClassSimpleMNL( + n_latent_classes=3, fit_method="EM", optimizer="lbfgs", epochs=15, lbfgs_tolerance=1e-6 + ) + lc_model_em.instantiate( + n_items=elec_dataset.get_n_items(), + n_shared_features=elec_dataset.get_n_shared_features(), + n_items_features=elec_dataset.get_n_items_features(), + ) + nll_b = lc_model_em.evaluate(elec_dataset) + _, _ = lc_model_em.fit(elec_dataset, verbose=0) + nll_a = lc_model_em.evaluate(elec_dataset) + assert nll_a < nll_b diff --git a/tests/unit_tests/models/test_rumnet_unit.py b/tests/unit_tests/models/test_rumnet_unit.py index 94578b62..61993fd8 100644 --- a/tests/unit_tests/models/test_rumnet_unit.py +++ b/tests/unit_tests/models/test_rumnet_unit.py @@ -254,8 +254,8 @@ def test_gpu_rumnet(): depth_u=3, tol=1e-5, optimizer="adam", - lr=0.01, - epochs=5, + lr=0.001, + epochs=10, ) model.instantiate() assert model.batch_predict(