diff --git a/choice_learn/models/latent_class_base_model.py b/choice_learn/models/latent_class_base_model.py
index 0c11645d..339ceec8 100644
--- a/choice_learn/models/latent_class_base_model.py
+++ b/choice_learn/models/latent_class_base_model.py
@@ -236,7 +236,7 @@ def compute_batch_utility(
             utilities.append(model_utilities)
         return utilities
 
-    def fit(self, choice_dataset, sample_weight=None, verbose=0):
+    def fit(self, choice_dataset, sample_weight=None, val_dataset=None, verbose=0):
         """Fit the model on a ChoiceDataset.
 
         Parameters
@@ -245,6 +245,8 @@ def fit(self, choice_dataset, sample_weight=None, verbose=0):
             Dataset to be used for coefficients estimations
         sample_weight : np.ndarray, optional
             sample weights to apply, by default None
+        val_dataset: ChoiceDataset
+            Validation dataset for MLE Gradient Descent Optimization
         verbose : int, optional
             print level, for debugging, by default 0
 
@@ -277,7 +279,10 @@ def fit(self, choice_dataset, sample_weight=None, verbose=0):
                     self.optimizer = tf.keras.optimizers.Adam(self.lr)
 
             return self._fit_with_gd(
-                choice_dataset=choice_dataset, sample_weight=sample_weight, verbose=verbose
+                choice_dataset=choice_dataset,
+                sample_weight=sample_weight,
+                verbose=verbose,
+                val_dataset=val_dataset,
             )
 
         raise ValueError(f"Fit method not implemented: {self.fit_method}")
@@ -762,45 +767,6 @@ def _fit_with_gd(
         # self.callbacks.on_train_end(logs=temps_logs)
         return losses_history
 
-    def _nothing(self, inputs):
-        """_summary_.
-
-        Parameters
-        ----------
-        inputs : _type_
-            _description_
-
-        Returns
-        -------
-        _type_
-            _description_
-        """
-        latent_probas = tf.clip_by_value(
-            self.latent_logits - tf.reduce_max(self.latent_logits), self.minf, 0
-        )
-        latent_probas = tf.math.exp(latent_probas)
-        # latent_probas = tf.math.abs(self.logit_latent_probas)  # alternative implementation
-        latent_probas = latent_probas / tf.reduce_sum(latent_probas)
-        proba_list = []
-        avail = inputs[4]
-        for q in range(self.n_latent_classes):
-            combined = self.models[q].compute_batch_utility(*inputs)
-            combined = tf.clip_by_value(
-                combined - tf.reduce_max(combined, axis=1, keepdims=True), self.minf, 0
-            )
-            combined = tf.keras.layers.Activation(activation=tf.nn.softmax)(combined)
-            # combined = tf.keras.layers.Softmax()(combined)
-            combined = combined * avail
-            combined = latent_probas[q] * tf.math.divide(
-                combined, tf.reduce_sum(combined, axis=1, keepdims=True)
-            )
-            combined = tf.expand_dims(combined, -1)
-            proba_list.append(combined)
-            # print(combined.get_shape()) # it is useful to print the shape of tensors for debugging
-
-        proba_final = tf.keras.layers.Concatenate(axis=2)(proba_list)
-        return tf.math.reduce_sum(proba_final, axis=2, keepdims=False)
-
     def _expectation(self, choice_dataset):
         predicted_probas = [model.predict_probas(choice_dataset) for model in self.models]
         latent_probabilities = self.get_latent_classes_weights()
@@ -936,6 +902,43 @@ def predict_probas(self, choice_dataset, batch_size=-1):
 
         return tf.concat(stacked_probabilities, axis=0)
 
+    def predict_modelwise_probas(self, choice_dataset, batch_size=-1):
+        """Predicts the choice probabilities for each choice and each product of a ChoiceDataset.
+
+        Stacks each model probability.
+
+        Parameters
+        ----------
+        choice_dataset : ChoiceDataset
+            Dataset on which to apply to prediction
+        batch_size : int, optional
+            Batch size to use for the prediction, by default -1
+
+        Returns
+        -------
+        np.ndarray (n_choices, n_items)
+            Choice probabilties for each choice and each product
+        """
+        modelwise_probabilities = []
+        for model in self.models:
+            stacked_probabilities = []
+            for (
+                shared_features,
+                items_features,
+                available_items,
+                choices,
+            ) in choice_dataset.iter_batch(batch_size=batch_size):
+                _, probabilities = model.batch_predict(
+                    shared_features_by_choice=shared_features,
+                    items_features_by_choice=items_features,
+                    available_items_by_choice=available_items,
+                    choices=choices,
+                )
+                stacked_probabilities.append(probabilities)
+            modelwise_probabilities.append(tf.concat(stacked_probabilities, axis=0))
+
+        return tf.stack(modelwise_probabilities, axis=0)
+
     def get_latent_classes_weights(self):
         """Return the latent classes weights / probabilities from logits.
 
diff --git a/tests/integration_tests/models/test_latent_class.py b/tests/integration_tests/models/test_latent_class.py
index 1c49cf63..854e73f9 100644
--- a/tests/integration_tests/models/test_latent_class.py
+++ b/tests/integration_tests/models/test_latent_class.py
@@ -1,5 +1,6 @@
 """Tests basic stuff for the latent class models."""
 
+import numpy as np
 import tensorflow as tf
 
 tf.config.run_functions_eagerly(True)
@@ -23,6 +24,11 @@ def test_latent_simple_mnl():
     _, _ = lc_model.fit(elec_dataset)
     lc_model.compute_report(elec_dataset)
 
+    probas = lc_model.predict_modelwise_probas(elec_dataset)
+    assert probas.shape == (2, len(elec_dataset), 4)
+    probas = lc_model.predict_probas(elec_dataset)
+    assert probas.shape == (len(elec_dataset), 4)
+
     assert lc_model.evaluate(elec_dataset).numpy() < 1.15
 
 
@@ -83,5 +89,23 @@ def test_manual_lc_gd():
     )
     manual_lc.instantiate(n_items=4, n_shared_features=0, n_items_features=6)
     nll_before = manual_lc.evaluate(elec_dataset)
-    _ = manual_lc.fit(elec_dataset)
+    _ = manual_lc.fit(
+        elec_dataset, sample_weight=np.ones(len(elec_dataset)), val_dataset=elec_dataset[-10:]
+    )
     assert manual_lc.evaluate(elec_dataset) < nll_before
+
+
+def test_em_fit():
+    """Test EM algorithm to estimate Latent Class Model."""
+    lc_model_em = LatentClassSimpleMNL(
+        n_latent_classes=3, fit_method="EM", optimizer="lbfgs", epochs=15, lbfgs_tolerance=1e-6
+    )
+    lc_model_em.instantiate(
+        n_items=elec_dataset.get_n_items(),
+        n_shared_features=elec_dataset.get_n_shared_features(),
+        n_items_features=elec_dataset.get_n_items_features(),
+    )
+    nll_b = lc_model_em.evaluate(elec_dataset)
+    _, _ = lc_model_em.fit(elec_dataset, verbose=0)
+    nll_a = lc_model_em.evaluate(elec_dataset)
+    assert nll_a < nll_b
diff --git a/tests/unit_tests/models/test_rumnet_unit.py b/tests/unit_tests/models/test_rumnet_unit.py
index 94578b62..61993fd8 100644
--- a/tests/unit_tests/models/test_rumnet_unit.py
+++ b/tests/unit_tests/models/test_rumnet_unit.py
@@ -254,8 +254,8 @@ def test_gpu_rumnet():
         depth_u=3,
         tol=1e-5,
         optimizer="adam",
-        lr=0.01,
-        epochs=5,
+        lr=0.001,
+        epochs=10,
     )
     model.instantiate()
     assert model.batch_predict(