ENH: Expectation-Maximization Algorithm (#205)

artefactory · Dec 27, 2024 · cda484a · cda484a
1 parent f34eee4
commit cda484a
Show file tree

Hide file tree

Showing 4 changed files with 342 additions and 151 deletions.
diff --git a/choice_learn/models/base_model.py b/choice_learn/models/base_model.py
@@ -444,7 +444,7 @@ def fit(
         self.callbacks.on_train_end(logs=temps_logs)
         return losses_history
 
-    @tf.function
+    @tf.function(reduce_retracing=True)
     def batch_predict(
         self,
         shared_features_by_choice,
@@ -731,7 +731,6 @@ def f(params_1d):
             # calculate gradients and convert to 1D tf.Tensor
             grads = tape.gradient(loss_value, self.trainable_weights)
             grads = tf.dynamic_stitch(idx, grads)
-            # print out iteration & loss
             f.iter.assign_add(1)
 
             # store loss value so we can retrieve later

diff --git a/choice_learn/models/latent_class_base_model.py b/choice_learn/models/latent_class_base_model.py
@@ -104,12 +104,18 @@ def instantiate(self, **kwargs):
             name="Latent-Logits",
         )
         self.latent_logits = init_logit
-        self.models = [self.model_class(**mp) for mp in self.model_parameters]
-        for model in self.models:
-            model.instantiate(**kwargs)
 
+        self.models = self.instantiate_latent_models(**kwargs)
         self.instantiated = True
 
+    def instantiate_latent_models(self, **kwargs):
+        """Instantiate latent models."""
+        models = [self.model_class(**mp) for mp in self.model_parameters]
+        for model in models:
+            model.instantiate(**kwargs)
+
+        return models
+
     # @tf.function
     def batch_predict(
         self,
@@ -249,7 +255,6 @@ def fit(self, choice_dataset, sample_weight=None, verbose=0):
         """
         if self.fit_method.lower() == "em":
             self.minf = np.log(1e-3)
-            print("Expectation-Maximization estimation algorithm not well implemented yet.")
             return self._em_fit(
                 choice_dataset=choice_dataset, sample_weight=sample_weight, verbose=verbose
             )
@@ -824,7 +829,7 @@ def _expectation(self, choice_dataset):
         )
 
         return tf.clip_by_value(
-            predicted_probas / np.sum(predicted_probas, axis=1, keepdims=True), 1e-10, 1
+            predicted_probas / np.sum(predicted_probas, axis=1, keepdims=True), 1e-6, 1
         ), loss
 
     def _maximization(self, choice_dataset, verbose=0):
@@ -842,10 +847,17 @@ def _maximization(self, choice_dataset, verbose=0):
         np.ndarray
             latent probabilities resulting of maximization step
         """
-        self.models = [self.model_class(**mp) for mp in self.model_parameters]
+        # models = [self.model_class(**mp) for mp in self.model_parameters]
+        # for i in range(len(models)):
+        #     for j, var in enumerate(self.models[i].trainable_weights):
+        #         models[i]._trainable_weights[j] = var
+        # self.instantiate_latent_models(choice_dataset)
+
         # M-step: MNL estimation
         for q in range(self.n_latent_classes):
-            self.models[q].fit(choice_dataset, sample_weight=self.weights[:, q], verbose=verbose)
+            self.models[q].fit(
+                choice_dataset, sample_weight=self.weights[:, q].numpy(), verbose=verbose
+            )
 
         # M-step: latent probability estimation
         latent_probas = np.sum(self.weights, axis=0)
@@ -876,7 +888,9 @@ def _em_fit(self, choice_dataset, sample_weight=None, verbose=0):
 
         # Initialization
         init_sample_weight = np.random.rand(self.n_latent_classes, len(choice_dataset))
-        init_sample_weight = init_sample_weight / np.sum(init_sample_weight, axis=0, keepdims=True)
+        init_sample_weight = np.clip(
+            init_sample_weight / np.sum(init_sample_weight, axis=0, keepdims=True), 1e-6, 1
+        )
         for i, model in enumerate(self.models):
             # model.instantiate()
             model.fit(choice_dataset, sample_weight=init_sample_weight[i], verbose=verbose)
@@ -888,7 +902,7 @@ def _em_fit(self, choice_dataset, sample_weight=None, verbose=0):
             if np.sum(np.isnan(self.latent_logits)) > 0:
                 print("Nan in logits")
                 break
-        return hist_logits, hist_loss
+        return hist_loss, hist_logits
 
     def predict_probas(self, choice_dataset, batch_size=-1):
         """Predicts the choice probabilities for each choice and each product of a ChoiceDataset.

diff --git a/choice_learn/models/latent_class_mnl.py b/choice_learn/models/latent_class_mnl.py
@@ -4,6 +4,8 @@
 
 import tensorflow as tf
 
+import choice_learn.tf_ops as tf_ops
+
 from .conditional_logit import ConditionalLogit, MNLCoefficients
 from .latent_class_base_model import BaseLatentClassModel
 from .simple_mnl import SimpleMNL
@@ -23,6 +25,7 @@ def __init__(
         intercept=None,
         optimizer="Adam",
         lr=0.001,
+        epochs_maximization=1000,
         **kwargs,
     ):
         """Initialize model.
@@ -56,7 +59,7 @@ def __init__(
             "batch_size": batch_size,
             "lbfgs_tolerance": lbfgs_tolerance,
             "lr": lr,
-            "epochs": 1000,
+            "epochs": epochs_maximization,
         }
 
         super().__init__(
@@ -88,6 +91,15 @@ def instantiate_latent_models(self, n_items, n_shared_features, n_items_features
             model.indexes, model.weights = model.instantiate(
                 n_items, n_shared_features, n_items_features
             )
+            model.exact_nll = tf_ops.CustomCategoricalCrossEntropy(
+                from_logits=False,
+                label_smoothing=0.0,
+                sparse=False,
+                axis=-1,
+                epsilon=1e-25,
+                name="exact_categorical_crossentropy",
+                reduction="sum_over_batch_size",
+            )
             model.instantiated = True
 
     def instantiate(self, n_items, n_shared_features, n_items_features):