diff --git a/configure/setup.py b/configure/setup.py
index 2c523240..c7e0bb6e 100644
--- a/configure/setup.py
+++ b/configure/setup.py
@@ -34,6 +34,7 @@
     "absl_py",
     "wheel",
     "wurlitzer",
+    "tf_keras",
 ]
 
 
diff --git a/documentation/known_issues.md b/documentation/known_issues.md
index d08b7a35..fd4e0ef6 100644
--- a/documentation/known_issues.md
+++ b/documentation/known_issues.md
@@ -90,7 +90,7 @@ to the estimator format.
 
 While abstracted by the Keras API, a model instantiated in Python (e.g., with
 `tfdf.keras.RandomForestModel()`) and a model loaded from disk (e.g., with
-`tf.keras.models.load_model()`) can behave differently. Notably, a Python
+`tf_keras.models.load_model()`) can behave differently. Notably, a Python
 instantiated model automatically applies necessary type conversions. For
 example, if a `float64` feature is fed to a model expecting a `float32` feature,
 this conversion is performed implicitly. However, such a conversion is not
diff --git a/documentation/migration.md b/documentation/migration.md
index 325b4c72..79b5068b 100644
--- a/documentation/migration.md
+++ b/documentation/migration.md
@@ -352,18 +352,18 @@ dataset reads are deterministic as well.
 #### Specify a task (e.g. classification, ranking) instead of a loss (e.g. binary cross-entropy)
 
 ```diff {.bad}
-- model = tf.keras.Sequential()
+- model = tf_keras.Sequential()
 - model.add(Dense(64, activation=relu))
 - model.add(Dense(1)) # One output for binary classification
 
-- model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+- model.compile(loss=tf_keras.losses.BinaryCrossentropy(from_logits=True),
 -               optimizer='adam',
 -               metrics=['accuracy'])
 ```
 
 ```diff {.good}
 # The loss is automatically determined from the task.
-+ model = tfdf.keras.GradientBoostedTreesModel(task=tf.keras.Task.CLASSIFICATION)
++ model = tfdf.keras.GradientBoostedTreesModel(task=tf_keras.Task.CLASSIFICATION)
 
 # Optional if you want to report the accuracy.
 + model.compile(metrics=['accuracy'])
diff --git a/documentation/tf_df_in_tf_js.md b/documentation/tf_df_in_tf_js.md
index 0ef67888..98d46488 100644
--- a/documentation/tf_df_in_tf_js.md
+++ b/documentation/tf_df_in_tf_js.md
@@ -45,9 +45,10 @@ import tensorflow as tf
 import tensorflow_decision_forests as tfdf
 import tensorflowjs as tfjs
 from google.colab import files
+import tf_keras
 
 # Load the model with Keras
-model = tf.keras.models.load_model("/tmp/my_saved_model/")
+model = tf_keras.models.load_model("/tmp/my_saved_model/")
 
 # Convert the keras model to TensorFlow.js
 tfjs.converters.tf_saved_model_conversion_v2.convert_keras_model_to_graph_model(model, "./tfjs_model")
diff --git a/documentation/tutorials/advanced_colab.ipynb b/documentation/tutorials/advanced_colab.ipynb
index 9ffa076b..6a7d151a 100644
--- a/documentation/tutorials/advanced_colab.ipynb
+++ b/documentation/tutorials/advanced_colab.ipynb
@@ -639,7 +639,7 @@
       },
       "outputs": [],
       "source": [
-        "manual_model = tf.keras.models.load_model(\"/tmp/manual_model\")"
+        "manual_model = tf_keras.models.load_model(\"/tmp/manual_model\")"
       ]
     },
     {
diff --git a/documentation/tutorials/beginner_colab.ipynb b/documentation/tutorials/beginner_colab.ipynb
index a5beb153..c39baa6d 100644
--- a/documentation/tutorials/beginner_colab.ipynb
+++ b/documentation/tutorials/beginner_colab.ipynb
@@ -1078,16 +1078,16 @@
       "source": [
         "%set_cell_height 300\n",
         "\n",
-        "body_mass_g = tf.keras.layers.Input(shape=(1,), name=\"body_mass_g\")\n",
+        "body_mass_g = tf_keras.layers.Input(shape=(1,), name=\"body_mass_g\")\n",
         "body_mass_kg = body_mass_g / 1000.0\n",
         "\n",
-        "bill_length_mm = tf.keras.layers.Input(shape=(1,), name=\"bill_length_mm\")\n",
+        "bill_length_mm = tf_keras.layers.Input(shape=(1,), name=\"bill_length_mm\")\n",
         "\n",
         "raw_inputs = {\"body_mass_g\": body_mass_g, \"bill_length_mm\": bill_length_mm}\n",
         "processed_inputs = {\"body_mass_kg\": body_mass_kg, \"bill_length_mm\": bill_length_mm}\n",
         "\n",
         "# \"preprocessor\" contains the preprocessing logic.\n",
-        "preprocessor = tf.keras.Model(inputs=raw_inputs, outputs=processed_inputs)\n",
+        "preprocessor = tf_keras.Model(inputs=raw_inputs, outputs=processed_inputs)\n",
         "\n",
         "# \"model_4\" contains both the pre-processing logic and the decision forest.\n",
         "model_4 = tfdf.keras.RandomForestModel(preprocessing=preprocessor)\n",
@@ -1122,7 +1122,7 @@
         "    tf.feature_column.numeric_column(\"bill_length_mm\"),\n",
         "]\n",
         "\n",
-        "preprocessing = tf.keras.layers.DenseFeatures(feature_columns)\n",
+        "preprocessing = tf_keras.layers.DenseFeatures(feature_columns)\n",
         "\n",
         "model_5 = tfdf.keras.RandomForestModel(preprocessing=preprocessing)\n",
         "model_5.fit(train_ds)"
diff --git a/documentation/tutorials/intermediate_colab.ipynb b/documentation/tutorials/intermediate_colab.ipynb
index 30d256fe..f19156ab 100644
--- a/documentation/tutorials/intermediate_colab.ipynb
+++ b/documentation/tutorials/intermediate_colab.ipynb
@@ -421,12 +421,12 @@
         "hub_url = \"https://tfhub.dev/google/universal-sentence-encoder/4\"\n",
         "embedding = hub.KerasLayer(hub_url)\n",
         "\n",
-        "sentence = tf.keras.layers.Input(shape=(), name=\"sentence\", dtype=tf.string)\n",
+        "sentence = tf_keras.layers.Input(shape=(), name=\"sentence\", dtype=tf.string)\n",
         "embedded_sentence = embedding(sentence)\n",
         "\n",
         "raw_inputs = {\"sentence\": sentence}\n",
         "processed_inputs = {\"embedded_sentence\": embedded_sentence}\n",
-        "preprocessor = tf.keras.Model(inputs=raw_inputs, outputs=processed_inputs)\n",
+        "preprocessor = tf_keras.Model(inputs=raw_inputs, outputs=processed_inputs)\n",
         "\n",
         "model_2 = tfdf.keras.RandomForestModel(\n",
         "    preprocessing=preprocessor,\n",
@@ -621,8 +621,8 @@
       },
       "outputs": [],
       "source": [
-        "input_1 = tf.keras.Input(shape=(1,), name=\"bill_length_mm\", dtype=\"float\")\n",
-        "input_2 = tf.keras.Input(shape=(1,), name=\"island\", dtype=\"string\")\n",
+        "input_1 = tf_keras.Input(shape=(1,), name=\"bill_length_mm\", dtype=\"float\")\n",
+        "input_2 = tf_keras.Input(shape=(1,), name=\"island\", dtype=\"string\")\n",
         "\n",
         "nn_raw_inputs = [input_1, input_2]"
       ]
@@ -645,9 +645,9 @@
       "outputs": [],
       "source": [
         "# Normalization.\n",
-        "Normalization = tf.keras.layers.Normalization\n",
-        "CategoryEncoding = tf.keras.layers.CategoryEncoding\n",
-        "StringLookup = tf.keras.layers.StringLookup\n",
+        "Normalization = tf_keras.layers.Normalization\n",
+        "CategoryEncoding = tf_keras.layers.CategoryEncoding\n",
+        "StringLookup = tf_keras.layers.StringLookup\n",
         "\n",
         "values = train_ds_pd[\"bill_length_mm\"].values[:, tf.newaxis]\n",
         "input_1_normalizer = Normalization()\n",
@@ -682,15 +682,15 @@
       },
       "outputs": [],
       "source": [
-        "y = tf.keras.layers.Concatenate()(nn_processed_inputs)\n",
-        "y = tf.keras.layers.Dense(16, activation=tf.nn.relu6)(y)\n",
-        "last_layer = tf.keras.layers.Dense(8, activation=tf.nn.relu, name=\"last\")(y)\n",
+        "y = tf_keras.layers.Concatenate()(nn_processed_inputs)\n",
+        "y = tf_keras.layers.Dense(16, activation=tf.nn.relu6)(y)\n",
+        "last_layer = tf_keras.layers.Dense(8, activation=tf.nn.relu, name=\"last\")(y)\n",
         "\n",
         "# \"3\" for the three label classes. If it were a binary classification, the\n",
         "# output dim would be 1.\n",
-        "classification_output = tf.keras.layers.Dense(3)(y)\n",
+        "classification_output = tf_keras.layers.Dense(3)(y)\n",
         "\n",
-        "nn_model = tf.keras.models.Model(nn_raw_inputs, classification_output)"
+        "nn_model = tf_keras.models.Model(nn_raw_inputs, classification_output)"
       ]
     },
     {
@@ -714,7 +714,7 @@
       "source": [
         "# To reduce the risk of mistakes, group both the decision forest and the\n",
         "# neural network in a single keras model.\n",
-        "nn_without_head = tf.keras.models.Model(inputs=nn_model.inputs, outputs=last_layer)\n",
+        "nn_without_head = tf_keras.models.Model(inputs=nn_model.inputs, outputs=last_layer)\n",
         "df_and_nn_model = tfdf.keras.RandomForestModel(preprocessing=nn_without_head)"
       ]
     },
@@ -740,8 +740,8 @@
         "%set_cell_height 300\n",
         "\n",
         "nn_model.compile(\n",
-        "  optimizer=tf.keras.optimizers.Adam(),\n",
-        "  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "  optimizer=tf_keras.optimizers.Adam(),\n",
+        "  loss=tf_keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
         "  metrics=[\"accuracy\"])\n",
         "\n",
         "nn_model.fit(x=train_ds, validation_data=test_ds, epochs=10)\n",
diff --git a/documentation/tutorials/model_composition_colab.ipynb b/documentation/tutorials/model_composition_colab.ipynb
index 3e717a98..9ae06bdd 100644
--- a/documentation/tutorials/model_composition_colab.ipynb
+++ b/documentation/tutorials/model_composition_colab.ipynb
@@ -414,25 +414,25 @@
       "outputs": [],
       "source": [
         "# Input features.\n",
-        "raw_features = tf.keras.layers.Input(shape=(num_features,))\n",
+        "raw_features = tf_keras.layers.Input(shape=(num_features,))\n",
         "\n",
         "# Stage 1\n",
         "# =======\n",
         "\n",
         "# Common learnable pre-processing\n",
-        "preprocessor = tf.keras.layers.Dense(10, activation=tf.nn.relu6)\n",
+        "preprocessor = tf_keras.layers.Dense(10, activation=tf.nn.relu6)\n",
         "preprocess_features = preprocessor(raw_features)\n",
         "\n",
         "# Stage 2\n",
         "# =======\n",
         "\n",
         "# Model #1: NN\n",
-        "m1_z1 = tf.keras.layers.Dense(5, activation=tf.nn.relu6)(preprocess_features)\n",
-        "m1_pred = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)(m1_z1)\n",
+        "m1_z1 = tf_keras.layers.Dense(5, activation=tf.nn.relu6)(preprocess_features)\n",
+        "m1_pred = tf_keras.layers.Dense(1, activation=tf.nn.sigmoid)(m1_z1)\n",
         "\n",
         "# Model #2: NN\n",
-        "m2_z1 = tf.keras.layers.Dense(5, activation=tf.nn.relu6)(preprocess_features)\n",
-        "m2_pred = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)(m2_z1)\n",
+        "m2_z1 = tf_keras.layers.Dense(5, activation=tf.nn.relu6)(preprocess_features)\n",
+        "m2_pred = tf_keras.layers.Dense(1, activation=tf.nn.sigmoid)(m2_z1)\n",
         "\n",
         "\n",
         "# Model #3: DF\n",
@@ -460,8 +460,8 @@
         "# Keras Models\n",
         "# ============\n",
         "\n",
-        "ensemble_nn_only = tf.keras.models.Model(raw_features, mean_nn_only)\n",
-        "ensemble_nn_and_df = tf.keras.models.Model(raw_features, mean_nn_and_df)"
+        "ensemble_nn_only = tf_keras.models.Model(raw_features, mean_nn_only)\n",
+        "ensemble_nn_and_df = tf_keras.models.Model(raw_features, mean_nn_and_df)"
       ]
     },
     {
@@ -509,8 +509,8 @@
       "source": [
         "%%time\n",
         "ensemble_nn_only.compile(\n",
-        "        optimizer=tf.keras.optimizers.Adam(),\n",
-        "        loss=tf.keras.losses.BinaryCrossentropy(),\n",
+        "        optimizer=tf_keras.optimizers.Adam(),\n",
+        "        loss=tf_keras.losses.BinaryCrossentropy(),\n",
         "        metrics=[\"accuracy\"])\n",
         "\n",
         "ensemble_nn_only.fit(train_dataset, epochs=20, validation_data=test_dataset)"
@@ -610,7 +610,7 @@
       "outputs": [],
       "source": [
         "ensemble_nn_and_df.compile(\n",
-        "    loss=tf.keras.losses.BinaryCrossentropy(), metrics=[\"accuracy\"])\n",
+        "    loss=tf_keras.losses.BinaryCrossentropy(), metrics=[\"accuracy\"])\n",
         "\n",
         "evaluation_nn_and_df = ensemble_nn_and_df.evaluate(\n",
         "    test_dataset, return_dict=True)\n",
diff --git a/documentation/tutorials/predict_colab.ipynb b/documentation/tutorials/predict_colab.ipynb
index cd1d7c8f..52958843 100644
--- a/documentation/tutorials/predict_colab.ipynb
+++ b/documentation/tutorials/predict_colab.ipynb
@@ -93,7 +93,7 @@
         "\n",
         "While abstracted by the Keras API, a model instantiated in Python (e.g., with\n",
         "`tfdf.keras.RandomForestModel()`) and a model loaded from disk (e.g., with\n",
-        "`tf.keras.models.load_model()`) can behave differently. Notably, a Python\n",
+        "`tf_keras.models.load_model()`) can behave differently. Notably, a Python\n",
         "instantiated model automatically applies necessary type conversions. For\n",
         "example, if a `float64` feature is fed to a model expecting a `float32` feature,\n",
         "this conversion is performed implicitly. However, such a conversion is not\n",
diff --git a/documentation/tutorials/ranking_colab.ipynb b/documentation/tutorials/ranking_colab.ipynb
index 675154c0..b6c018af 100644
--- a/documentation/tutorials/ranking_colab.ipynb
+++ b/documentation/tutorials/ranking_colab.ipynb
@@ -244,7 +244,7 @@
       },
       "outputs": [],
       "source": [
-        "archive_path = tf.keras.utils.get_file(\"letor.zip\",\n",
+        "archive_path = tf_keras.utils.get_file(\"letor.zip\",\n",
         "  \"https://download.microsoft.com/download/E/7/E/E7EABEF1-4C7B-4E31-ACE5-73927950ED5E/Letor.zip\",\n",
         "  extract=True)\n",
         "\n",
diff --git a/examples/BUILD b/examples/BUILD
index 8de36c81..cf5cbd77 100644
--- a/examples/BUILD
+++ b/examples/BUILD
@@ -14,6 +14,7 @@ py_binary(
         # pandas dep,
         # TensorFlow Python,
         "//tensorflow_decision_forests",
+        # tf_keras dep,
     ],
 )
 
@@ -28,6 +29,7 @@ py_binary(
         # pandas dep,
         # TensorFlow Python,
         "//tensorflow_decision_forests",
+        # tf_keras dep,
     ],
 )
 
@@ -43,6 +45,7 @@ py_binary(
         # pandas dep,
         # TensorFlow Python,
         "//tensorflow_decision_forests",
+        # tf_keras dep,
     ],
 )
 
@@ -56,5 +59,6 @@ py_binary(
         # absl/logging dep,
         # TensorFlow Python,
         "//tensorflow_decision_forests",
+        # tf_keras dep,
     ],
 )
diff --git a/examples/hyperparameter_optimization.py b/examples/hyperparameter_optimization.py
index 23a26028..98318ab0 100644
--- a/examples/hyperparameter_optimization.py
+++ b/examples/hyperparameter_optimization.py
@@ -36,14 +36,14 @@
 import pandas as pd
 import tensorflow as tf
 import tensorflow_decision_forests as tfdf
-
+import tf_keras
 
 def main(argv):
   if len(argv) > 1:
     raise app.UsageError("Too many command-line arguments.")
 
   # Download the Adult dataset.
-  dataset_path = tf.keras.utils.get_file(
+  dataset_path = tf_keras.utils.get_file(
       "adult.csv",
       "https://raw.githubusercontent.com/google/yggdrasil-decision-forests/"
       "main/yggdrasil_decision_forests/test_data/dataset/adult.csv")
diff --git a/examples/minimal.py b/examples/minimal.py
index 2f1a1f1e..4c951297 100644
--- a/examples/minimal.py
+++ b/examples/minimal.py
@@ -30,11 +30,11 @@
 """
 
 from absl import app
-
 import numpy as np
 import pandas as pd
 import tensorflow as tf
 import tensorflow_decision_forests as tfdf
+import tf_keras
 
 
 def main(argv):
@@ -42,10 +42,11 @@ def main(argv):
     raise app.UsageError("Too many command-line arguments.")
 
   # Download the Adult dataset.
-  dataset_path = tf.keras.utils.get_file(
+  dataset_path = tf_keras.utils.get_file(
       "adult.csv",
       "https://raw.githubusercontent.com/google/yggdrasil-decision-forests/"
-      "main/yggdrasil_decision_forests/test_data/dataset/adult.csv")
+      "main/yggdrasil_decision_forests/test_data/dataset/adult.csv",
+  )
 
   # Load a dataset into a Pandas Dataframe.
   dataset_df = pd.read_csv(dataset_path)  # "df" for Pandas's DataFrame.
@@ -61,8 +62,10 @@ def main(argv):
   test_indices = np.random.rand(len(dataset_df)) < 0.30
   test_ds_pd = dataset_df[test_indices]
   train_ds_pd = dataset_df[~test_indices]
-  print(f"{len(train_ds_pd)} examples in training"
-        f", {len(test_ds_pd)} examples for testing.")
+  print(
+      f"{len(train_ds_pd)} examples in training"
+      f", {len(test_ds_pd)} examples for testing."
+  )
 
   # Converts datasets from Pandas dataframe to TensorFlow dataset format.
   train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_ds_pd, label="income")
diff --git a/tensorflow_decision_forests/__init__.py b/tensorflow_decision_forests/__init__.py
index c561f2e5..5da90f96 100644
--- a/tensorflow_decision_forests/__init__.py
+++ b/tensorflow_decision_forests/__init__.py
@@ -46,7 +46,7 @@
 # ...
 
 # Load a model: it loads as a generic keras model.
-loaded_model = tf.keras.models.load_model("/tmp/my_saved_model")
+loaded_model = tf_keras.models.load_model("/tmp/my_saved_model")
 ```
 
 """
diff --git a/tensorflow_decision_forests/component/builder/builder.py b/tensorflow_decision_forests/component/builder/builder.py
index fb27b755..833e849f 100644
--- a/tensorflow_decision_forests/component/builder/builder.py
+++ b/tensorflow_decision_forests/component/builder/builder.py
@@ -106,7 +106,7 @@
 builder.close()
 
 # Load and use the model
-model = tf.keras.models.load_model("/path/to/model")
+model = tf_keras.models.load_model("/path/to/model")
 predictions = model.predict(...)
 ```
 """
diff --git a/tensorflow_decision_forests/component/builder/builder_test.py b/tensorflow_decision_forests/component/builder/builder_test.py
index 33079c96..b24983cd 100644
--- a/tensorflow_decision_forests/component/builder/builder_test.py
+++ b/tensorflow_decision_forests/component/builder/builder_test.py
@@ -25,6 +25,7 @@
 import numpy as np
 import pandas as pd
 import tensorflow as tf
+import tf_keras
 
 from tensorflow_decision_forests import keras
 from tensorflow_decision_forests.component import py_tree
@@ -48,8 +49,9 @@ def data_root_path() -> str:
 
 
 def test_data_path() -> str:
-  return os.path.join(data_root_path(),
-                      "external/ydf/yggdrasil_decision_forests/test_data")
+  return os.path.join(
+      data_root_path(), "external/ydf/yggdrasil_decision_forests/test_data"
+  )
 
 
 def tmp_path() -> str:
@@ -94,47 +96,64 @@ def test_classification_random_forest(self, file_prefix, model_name):
             NonLeafNode(
                 condition=NumericalHigherThanCondition(
                     feature=SimpleColumnSpec(
-                        name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL),
+                        name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL
+                    ),
                     threshold=1.5,
-                    missing_evaluation=False),
+                    missing_evaluation=False,
+                ),
                 pos_child=NonLeafNode(
                     condition=CategoricalIsInCondition(
                         feature=SimpleColumnSpec(
                             name="f2",
-                            type=py_tree.dataspec.ColumnType.CATEGORICAL),
+                            type=py_tree.dataspec.ColumnType.CATEGORICAL,
+                        ),
                         mask=["cat", "dog"],
-                        missing_evaluation=False),
+                        missing_evaluation=False,
+                    ),
                     pos_child=LeafNode(
                         value=ProbabilityValue(
-                            probability=[0.8, 0.1, 0.1], num_examples=10)),
+                            probability=[0.8, 0.1, 0.1], num_examples=10
+                        )
+                    ),
                     neg_child=LeafNode(
                         value=ProbabilityValue(
-                            probability=[0.1, 0.8, 0.1], num_examples=20))),
+                            probability=[0.1, 0.8, 0.1], num_examples=20
+                        )
+                    ),
+                ),
                 neg_child=LeafNode(
                     value=ProbabilityValue(
-                        probability=[0.1, 0.1, 0.8], num_examples=30)))))
+                        probability=[0.1, 0.1, 0.8], num_examples=30
+                    )
+                ),
+            )
+        )
+    )
 
     builder.close()
 
     if file_prefix is not None:
       self.assertEqual(
           inspector_lib.detect_model_file_prefix(
-              os.path.join(model_path, "assets")), file_prefix)
+              os.path.join(model_path, "assets")
+          ),
+          file_prefix,
+      )
 
     logging.info("Loading model")
-    loaded_model = tf.keras.models.load_model(model_path)
+    loaded_model = tf_keras.models.load_model(model_path)
     expected_model_name = (
         "inference_core_model" if model_name is None else model_name
     )
     self.assertEqual(loaded_model.name, expected_model_name)
     logging.info("Make predictions")
-    tf_dataset = tf.data.Dataset.from_tensor_slices({
-        "f1": [1.0, 2.0, 3.0],
-        "f2": ["cat", "cat", "bird"]
-    }).batch(2)
+    tf_dataset = tf.data.Dataset.from_tensor_slices(
+        {"f1": [1.0, 2.0, 3.0], "f2": ["cat", "cat", "bird"]}
+    ).batch(2)
     predictions = loaded_model.predict(tf_dataset)
-    self.assertAllClose(predictions,
-                        [[0.1, 0.1, 0.8], [0.8, 0.1, 0.1], [0.1, 0.8, 0.1]])
+    self.assertAllClose(
+        predictions, [[0.1, 0.1, 0.8], [0.8, 0.1, 0.1], [0.1, 0.8, 0.1]]
+    )
 
   @parameterized.parameters((None,), ("",), ("prefix_",))
   def test_classification_cart(self, file_prefix):
@@ -160,44 +179,61 @@ def test_classification_cart(self, file_prefix):
             NonLeafNode(
                 condition=NumericalHigherThanCondition(
                     feature=SimpleColumnSpec(
-                        name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL),
+                        name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL
+                    ),
                     threshold=1.5,
-                    missing_evaluation=False),
+                    missing_evaluation=False,
+                ),
                 pos_child=NonLeafNode(
                     condition=CategoricalIsInCondition(
                         feature=SimpleColumnSpec(
                             name="f2",
-                            type=py_tree.dataspec.ColumnType.CATEGORICAL),
+                            type=py_tree.dataspec.ColumnType.CATEGORICAL,
+                        ),
                         mask=["cat", "dog"],
-                        missing_evaluation=False),
+                        missing_evaluation=False,
+                    ),
                     pos_child=LeafNode(
                         value=ProbabilityValue(
-                            probability=[0.8, 0.1, 0.1], num_examples=10)),
+                            probability=[0.8, 0.1, 0.1], num_examples=10
+                        )
+                    ),
                     neg_child=LeafNode(
                         value=ProbabilityValue(
-                            probability=[0.1, 0.8, 0.1], num_examples=20))),
+                            probability=[0.1, 0.8, 0.1], num_examples=20
+                        )
+                    ),
+                ),
                 neg_child=LeafNode(
                     value=ProbabilityValue(
-                        probability=[0.1, 0.1, 0.8], num_examples=30)))))
+                        probability=[0.1, 0.1, 0.8], num_examples=30
+                    )
+                ),
+            )
+        )
+    )
 
     builder.close()
 
     if file_prefix is not None:
       self.assertEqual(
           inspector_lib.detect_model_file_prefix(
-              os.path.join(model_path, "assets")), file_prefix)
+              os.path.join(model_path, "assets")
+          ),
+          file_prefix,
+      )
 
     logging.info("Loading model")
-    loaded_model = tf.keras.models.load_model(model_path)
+    loaded_model = tf_keras.models.load_model(model_path)
     self.assertEqual(loaded_model.name, "classification_cart")
     logging.info("Make predictions")
-    tf_dataset = tf.data.Dataset.from_tensor_slices({
-        "f1": [1.0, 2.0, 3.0],
-        "f2": ["cat", "cat", "bird"]
-    }).batch(2)
+    tf_dataset = tf.data.Dataset.from_tensor_slices(
+        {"f1": [1.0, 2.0, 3.0], "f2": ["cat", "cat", "bird"]}
+    ).batch(2)
     predictions = loaded_model.predict(tf_dataset)
-    self.assertAllClose(predictions,
-                        [[0.1, 0.1, 0.8], [0.8, 0.1, 0.1], [0.1, 0.8, 0.1]])
+    self.assertAllClose(
+        predictions, [[0.1, 0.1, 0.8], [0.8, 0.1, 0.1], [0.1, 0.8, 0.1]]
+    )
 
   def test_regression_random_forest(self):
     model_path = os.path.join(tmp_path(), "regression_rf")
@@ -205,7 +241,8 @@ def test_regression_random_forest(self):
     builder = builder_lib.RandomForestBuilder(
         path=model_path,
         model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL,
-        objective=py_tree.objective.RegressionObjective(label="age"))
+        objective=py_tree.objective.RegressionObjective(label="age"),
+    )
 
     #  f1>=1.5
     #    ├─(pos)─ age: 1
@@ -215,18 +252,25 @@ def test_regression_random_forest(self):
             NonLeafNode(
                 condition=NumericalHigherThanCondition(
                     feature=SimpleColumnSpec(
-                        name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL),
+                        name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL
+                    ),
                     threshold=1.5,
-                    missing_evaluation=False),
+                    missing_evaluation=False,
+                ),
                 pos_child=LeafNode(
-                    value=RegressionValue(value=1, num_examples=30)),
+                    value=RegressionValue(value=1, num_examples=30)
+                ),
                 neg_child=LeafNode(
-                    value=RegressionValue(value=2, num_examples=30)))))
+                    value=RegressionValue(value=2, num_examples=30)
+                ),
+            )
+        )
+    )
 
     builder.close()
 
     logging.info("Loading model")
-    loaded_model = tf.keras.models.load_model(model_path)
+    loaded_model = tf_keras.models.load_model(model_path)
 
     logging.info("Make predictions")
     tf_dataset = tf.data.Dataset.from_tensor_slices({
@@ -243,7 +287,9 @@ def test_regression_random_forest_with_categorical_integer(self):
         model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL,
         objective=py_tree.objective.RegressionObjective(label="age"),
         advanced_arguments=builder_lib.AdvancedArguments(
-            disable_categorical_integer_offset_correction=True))
+            disable_categorical_integer_offset_correction=True
+        ),
+    )
 
     #  f1 in [2,3]
     #    ├─(pos)─ age: 1
@@ -253,19 +299,25 @@ def test_regression_random_forest_with_categorical_integer(self):
             NonLeafNode(
                 condition=CategoricalIsInCondition(
                     feature=SimpleColumnSpec(
-                        name="f1",
-                        type=py_tree.dataspec.ColumnType.CATEGORICAL),
+                        name="f1", type=py_tree.dataspec.ColumnType.CATEGORICAL
+                    ),
                     mask=[2, 3],
-                    missing_evaluation=False),
+                    missing_evaluation=False,
+                ),
                 pos_child=LeafNode(
-                    value=RegressionValue(value=1, num_examples=30)),
+                    value=RegressionValue(value=1, num_examples=30)
+                ),
                 neg_child=LeafNode(
-                    value=RegressionValue(value=2, num_examples=30)))))
+                    value=RegressionValue(value=2, num_examples=30)
+                ),
+            )
+        )
+    )
 
     builder.close()
 
     logging.info("Loading model")
-    loaded_model = tf.keras.models.load_model(model_path)
+    loaded_model = tf_keras.models.load_model(model_path)
 
     logging.info("Make predictions")
     tf_dataset = tf.data.Dataset.from_tensor_slices({
@@ -296,18 +348,25 @@ def test_binary_classification_gbt(self):
             NonLeafNode(
                 condition=NumericalHigherThanCondition(
                     feature=SimpleColumnSpec(
-                        name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL),
+                        name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL
+                    ),
                     threshold=1.5,
-                    missing_evaluation=False),
+                    missing_evaluation=False,
+                ),
                 pos_child=LeafNode(
-                    value=RegressionValue(value=+1, num_examples=30)),
+                    value=RegressionValue(value=+1, num_examples=30)
+                ),
                 neg_child=LeafNode(
-                    value=RegressionValue(value=-1, num_examples=30)))))
+                    value=RegressionValue(value=-1, num_examples=30)
+                ),
+            )
+        )
+    )
 
     builder.close()
 
     logging.info("Loading model")
-    loaded_model = tf.keras.models.load_model(model_path)
+    loaded_model = tf_keras.models.load_model(model_path)
     self.assertEqual(loaded_model.name, "binary_classification_gbt")
     logging.info("Make predictions")
     tf_dataset = tf.data.Dataset.from_tensor_slices({
@@ -316,7 +375,8 @@ def test_binary_classification_gbt(self):
     predictions = loaded_model.predict(tf_dataset)
     self.assertAllClose(
         predictions,
-        [[1.0 / (1.0 + math.exp(0.0))], [1.0 / (1.0 + math.exp(-2.0))]])
+        [[1.0 / (1.0 + math.exp(0.0))], [1.0 / (1.0 + math.exp(-2.0))]],
+    )
 
   @parameterized.parameters((None,), ("",), ("prefix_",))
   def test_multi_class_classification_gbt(self, file_prefix):
@@ -326,8 +386,10 @@ def test_multi_class_classification_gbt(self, file_prefix):
         path=model_path,
         model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL,
         objective=py_tree.objective.ClassificationObjective(
-            label="color", classes=["red", "blue", "green"]),
-        file_prefix=file_prefix)
+            label="color", classes=["red", "blue", "green"]
+        ),
+        file_prefix=file_prefix,
+    )
 
     #  f1>=1.5
     #    ├─(pos)─ +1.0 (toward "red")
@@ -345,24 +407,33 @@ def test_multi_class_classification_gbt(self, file_prefix):
               NonLeafNode(
                   condition=NumericalHigherThanCondition(
                       feature=SimpleColumnSpec(
-                          name="f1",
-                          type=py_tree.dataspec.ColumnType.NUMERICAL),
+                          name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL
+                      ),
                       threshold=threshold,
-                      missing_evaluation=False),
+                      missing_evaluation=False,
+                  ),
                   pos_child=LeafNode(
-                      value=RegressionValue(value=+1, num_examples=30)),
+                      value=RegressionValue(value=+1, num_examples=30)
+                  ),
                   neg_child=LeafNode(
-                      value=RegressionValue(value=-1, num_examples=30)))))
+                      value=RegressionValue(value=-1, num_examples=30)
+                  ),
+              )
+          )
+      )
 
     builder.close()
 
     if file_prefix is not None:
       self.assertEqual(
           inspector_lib.detect_model_file_prefix(
-              os.path.join(model_path, "assets")), file_prefix)
+              os.path.join(model_path, "assets")
+          ),
+          file_prefix,
+      )
 
     logging.info("Loading model")
-    loaded_model = tf.keras.models.load_model(model_path)
+    loaded_model = tf_keras.models.load_model(model_path)
 
     logging.info("Make predictions")
     tf_dataset = tf.data.Dataset.from_tensor_slices({
@@ -371,12 +442,17 @@ def test_multi_class_classification_gbt(self, file_prefix):
     predictions = loaded_model.predict(tf_dataset)
 
     soft_max_sum = np.sum(np.exp([+1, -1, -1]))
-    self.assertAllClose(predictions, [[1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0],
-                                      [
-                                          math.exp(+1) / soft_max_sum,
-                                          math.exp(-1) / soft_max_sum,
-                                          math.exp(-1) / soft_max_sum
-                                      ]])
+    self.assertAllClose(
+        predictions,
+        [
+            [1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0],
+            [
+                math.exp(+1) / soft_max_sum,
+                math.exp(-1) / soft_max_sum,
+                math.exp(-1) / soft_max_sum,
+            ],
+        ],
+    )
 
   def test_regression_gbt(self):
     model_path = os.path.join(tmp_path(), "regression_gbt")
@@ -385,7 +461,8 @@ def test_regression_gbt(self):
         path=model_path,
         bias=1.0,
         model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL,
-        objective=py_tree.objective.RegressionObjective(label="age"))
+        objective=py_tree.objective.RegressionObjective(label="age"),
+    )
 
     # bias: 1.0
     #  f1>=1.5
@@ -396,18 +473,25 @@ def test_regression_gbt(self):
             NonLeafNode(
                 condition=NumericalHigherThanCondition(
                     feature=SimpleColumnSpec(
-                        name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL),
+                        name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL
+                    ),
                     threshold=1.5,
-                    missing_evaluation=False),
+                    missing_evaluation=False,
+                ),
                 pos_child=LeafNode(
-                    value=RegressionValue(value=+1, num_examples=30)),
+                    value=RegressionValue(value=+1, num_examples=30)
+                ),
                 neg_child=LeafNode(
-                    value=RegressionValue(value=-1, num_examples=30)))))
+                    value=RegressionValue(value=-1, num_examples=30)
+                ),
+            )
+        )
+    )
 
     builder.close()
 
     logging.info("Loading model")
-    loaded_model = tf.keras.models.load_model(model_path)
+    loaded_model = tf_keras.models.load_model(model_path)
 
     logging.info("Make predictions")
     tf_dataset = tf.data.Dataset.from_tensor_slices({
@@ -424,7 +508,9 @@ def test_ranking_gbt(self):
         bias=1.0,
         model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL,
         objective=py_tree.objective.RankingObjective(
-            label="document", group="query"))
+            label="document", group="query"
+        ),
+    )
 
     # bias: 1.0
     #  f1>=1.5
@@ -435,18 +521,25 @@ def test_ranking_gbt(self):
             NonLeafNode(
                 condition=NumericalHigherThanCondition(
                     feature=SimpleColumnSpec(
-                        name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL),
+                        name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL
+                    ),
                     threshold=1.5,
-                    missing_evaluation=False),
+                    missing_evaluation=False,
+                ),
                 pos_child=LeafNode(
-                    value=RegressionValue(value=+1, num_examples=30)),
+                    value=RegressionValue(value=+1, num_examples=30)
+                ),
                 neg_child=LeafNode(
-                    value=RegressionValue(value=-1, num_examples=30)))))
+                    value=RegressionValue(value=-1, num_examples=30)
+                ),
+            )
+        )
+    )
 
     builder.close()
 
     logging.info("Loading model")
-    loaded_model = tf.keras.models.load_model(model_path)
+    loaded_model = tf_keras.models.load_model(model_path)
 
     logging.info("Make predictions")
     tf_dataset = tf.data.Dataset.from_tensor_slices({
@@ -457,71 +550,104 @@ def test_ranking_gbt(self):
 
   def test_error_empty_path(self):
     self.assertRaises(
-        ValueError, lambda: builder_lib.RandomForestBuilder(
+        ValueError,
+        lambda: builder_lib.RandomForestBuilder(
             path="",
             model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL,
-            objective=py_tree.objective.RegressionObjective("label")))
+            objective=py_tree.objective.RegressionObjective("label"),
+        ),
+    )
 
   def test_error_multi_tree_cart(self):
     builder = builder_lib.CARTBuilder(
         path=os.path.join(tmp_path(), "model"),
-        objective=py_tree.objective.RegressionObjective("label"))
+        objective=py_tree.objective.RegressionObjective("label"),
+    )
     builder.add_tree(Tree(LeafNode(RegressionValue(1, 30))))
 
     self.assertRaises(
         ValueError,
-        lambda: builder.add_tree(Tree(LeafNode(RegressionValue(1, 30)))))
+        lambda: builder.add_tree(Tree(LeafNode(RegressionValue(1, 30)))),
+    )
 
   def test_error_reg_cart_with_class_tree(self):
     builder = builder_lib.CARTBuilder(
         path=os.path.join(tmp_path(), "model"),
-        objective=py_tree.objective.RegressionObjective("label"))
+        objective=py_tree.objective.RegressionObjective("label"),
+    )
     self.assertRaises(
-        ValueError, lambda: builder.add_tree(
+        ValueError,
+        lambda: builder.add_tree(
             Tree(
                 LeafNode(
                     ProbabilityValue(
-                        probability=[0.8, 0.1, 0.1], num_examples=10)))))
+                        probability=[0.8, 0.1, 0.1], num_examples=10
+                    )
+                )
+            )
+        ),
+    )
 
   def test_error_class_cart_with_reg_tree(self):
     builder = builder_lib.CARTBuilder(
         path=os.path.join(tmp_path(), "model"),
         objective=py_tree.objective.ClassificationObjective(
-            "label", classes=["red", "blue"]))
+            "label", classes=["red", "blue"]
+        ),
+    )
     self.assertRaises(
         ValueError,
-        lambda: builder.add_tree(Tree(LeafNode(RegressionValue(1, 10)))))
+        lambda: builder.add_tree(Tree(LeafNode(RegressionValue(1, 10)))),
+    )
 
   def test_error_wrong_class_leaf_dim(self):
     builder = builder_lib.CARTBuilder(
         path=os.path.join(tmp_path(), "model"),
         objective=py_tree.objective.ClassificationObjective(
-            "label", classes=["red", "blue"]))
+            "label", classes=["red", "blue"]
+        ),
+    )
     self.assertRaises(
-        ValueError, lambda: builder.add_tree(
+        ValueError,
+        lambda: builder.add_tree(
             Tree(
                 LeafNode(
                     ProbabilityValue(
-                        probability=[0.8, 0.1, 0.1], num_examples=10)))))
+                        probability=[0.8, 0.1, 0.1], num_examples=10
+                    )
+                )
+            )
+        ),
+    )
 
   def test_error_gbt_with_class_tree(self):
     builder = builder_lib.GradientBoostedTreeBuilder(
         path=os.path.join(tmp_path(), "model"),
         objective=py_tree.objective.ClassificationObjective(
-            "label", classes=["red", "blue", "green"]))
+            "label", classes=["red", "blue", "green"]
+        ),
+    )
 
     self.assertRaises(
-        ValueError, lambda: builder.add_tree(
+        ValueError,
+        lambda: builder.add_tree(
             Tree(
                 LeafNode(
                     ProbabilityValue(
-                        probability=[0.8, 0.1, 0.1], num_examples=10)))))
+                        probability=[0.8, 0.1, 0.1], num_examples=10
+                    )
+                )
+            )
+        ),
+    )
 
   def test_error_gbt_wrong_number_of_trees(self):
     builder = builder_lib.GradientBoostedTreeBuilder(
         path=os.path.join(tmp_path(), "model"),
         objective=py_tree.objective.ClassificationObjective(
-            "label", classes=["red", "blue", "green"]))
+            "label", classes=["red", "blue", "green"]
+        ),
+    )
 
     builder.add_tree(Tree(LeafNode(RegressionValue(1, num_examples=10))))
     self.assertRaises(ValueError, builder.close)
@@ -530,23 +656,33 @@ def test_get_set_dictionary(self):
     builder = builder_lib.RandomForestBuilder(
         path=os.path.join(tmp_path(), "model"),
         objective=py_tree.objective.ClassificationObjective(
-            "label", classes=["true", "false"]))
+            "label", classes=["true", "false"]
+        ),
+    )
 
     builder.add_tree(
         Tree(
             NonLeafNode(
                 condition=CategoricalIsInCondition(
                     feature=SimpleColumnSpec(
-                        name="f1",
-                        type=py_tree.dataspec.ColumnType.CATEGORICAL),
+                        name="f1", type=py_tree.dataspec.ColumnType.CATEGORICAL
+                    ),
                     mask=["x", "y"],
-                    missing_evaluation=False),
+                    missing_evaluation=False,
+                ),
                 pos_child=LeafNode(
                     value=ProbabilityValue(
-                        probability=[0.8, 0.2], num_examples=10)),
+                        probability=[0.8, 0.2], num_examples=10
+                    )
+                ),
                 neg_child=LeafNode(
                     value=ProbabilityValue(
-                        probability=[0.2, 0.8], num_examples=20)))))
+                        probability=[0.2, 0.8], num_examples=20
+                    )
+                ),
+            )
+        )
+    )
 
     self.assertEqual(builder.get_dictionary("f1"), ["<OOD>", "x", "y"])
     builder.set_dictionary("f1", ["<OOD>", "x", "y", "z"])
@@ -565,17 +701,23 @@ def test_extract_random_forest(self):
     dataset = keras.pd_dataframe_to_tf_dataset(dataframe, "income")
 
     # Load an inspector to an existing model.
-    src_model_path = os.path.join(test_model_directory(),
-                                  "adult_binary_class_rf")
+    src_model_path = os.path.join(
+        test_model_directory(), "adult_binary_class_rf"
+    )
     inspector = inspector_lib.make_inspector(src_model_path)
 
     # Extract a piece of this model
     def custom_model_input_signature(
-        inspector: inspector_lib.AbstractInspector):
+        inspector: inspector_lib.AbstractInspector,
+    ):
       input_spec = keras.build_default_input_model_signature(inspector)
       # Those features are stored as int64 in the dataset.
       for feature_name in [
-          "age", "fnlwgt", "capital_gain", "capital_loss", "hours_per_week"
+          "age",
+          "fnlwgt",
+          "capital_gain",
+          "capital_loss",
+          "hours_per_week",
       ]:
         input_spec[feature_name] = tf.TensorSpec(shape=[None], dtype=tf.int64)
       return input_spec
@@ -587,7 +729,8 @@ def custom_model_input_signature(
         # Make sure the features and feature dictionaries are the same as in the
         # original model.
         import_dataspec=inspector.dataspec,
-        input_signature_example_fn=custom_model_input_signature)
+        input_signature_example_fn=custom_model_input_signature,
+    )
 
     # Extract the first 5 trees
     for i in range(5):
@@ -596,7 +739,7 @@ def custom_model_input_signature(
 
     builder.close()
 
-    truncated_model = tf.keras.models.load_model(dst_model_path)
+    truncated_model = tf_keras.models.load_model(dst_model_path)
     predictions = truncated_model.predict(dataset)
     self.assertEqual(predictions.shape, (9769, 1))
 
@@ -607,7 +750,8 @@ def test_fast_serving_with_custom_numerical_default_evaluation(self):
         path=model_path,
         bias=0.0,
         model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL,
-        objective=py_tree.objective.RegressionObjective(label="label"))
+        objective=py_tree.objective.RegressionObjective(label="label"),
+    )
 
     # f1>=-1.0 (default: false)
     #   │
@@ -627,11 +771,14 @@ def condition(feature, threshold, missing_evaluation, pos, neg):
       return NonLeafNode(
           condition=NumericalHigherThanCondition(
               feature=SimpleColumnSpec(
-                  name=feature, type=py_tree.dataspec.ColumnType.NUMERICAL),
+                  name=feature, type=py_tree.dataspec.ColumnType.NUMERICAL
+              ),
               threshold=threshold,
-              missing_evaluation=missing_evaluation),
+              missing_evaluation=missing_evaluation,
+          ),
           pos_child=pos,
-          neg_child=neg)
+          neg_child=neg,
+      )
 
     def leaf(value):
       return LeafNode(RegressionValue(value=value, num_examples=1))
@@ -639,15 +786,20 @@ def leaf(value):
     builder.add_tree(
         Tree(
             condition(
-                "f1", -1.0, False, condition("f1", 2.0, False, leaf(1),
-                                             leaf(2)),
+                "f1",
+                -1.0,
+                False,
+                condition("f1", 2.0, False, leaf(1), leaf(2)),
                 condition(
                     "f2",
                     -3.0,
                     True,
                     condition("f2", 4.0, False, leaf(3), leaf(4)),
                     leaf(5),
-                ))))
+                ),
+            )
+        )
+    )
     builder.close()
 
     logging.info("Loading model")
@@ -658,7 +810,7 @@ def leaf(value):
     #
     # TODO:: Add API to check which inference engine is used.
 
-    loaded_model = tf.keras.models.load_model(model_path)
+    loaded_model = tf_keras.models.load_model(model_path)
 
     logging.info("Make predictions")
     tf_dataset = tf.data.Dataset.from_tensor_slices({
diff --git a/tensorflow_decision_forests/component/inspector/inspector_test.py b/tensorflow_decision_forests/component/inspector/inspector_test.py
index 8d86f0c2..d03c6452 100644
--- a/tensorflow_decision_forests/component/inspector/inspector_test.py
+++ b/tensorflow_decision_forests/component/inspector/inspector_test.py
@@ -23,6 +23,7 @@
 from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf
+import tf_keras
 
 from tensorflow_decision_forests import keras
 from tensorflow_decision_forests.component import py_tree
@@ -338,7 +339,7 @@ def test_inspect_combined_model(self):
     features = tf.random.uniform(shape=[100, 2], minval=1, maxval=100)
     target = tf.random.uniform(shape=[100, 1], minval=25, maxval=50)
     dataset = tf.data.Dataset.from_tensor_slices((features, target)).batch(32)
-    inputs = tf.keras.Input(shape=(2,))
+    inputs = tf_keras.Input(shape=(2,))
 
     model_rf = keras.RandomForestModel(num_trees=10, task=keras.Task.REGRESSION)
     model_gbt = keras.GradientBoostedTreesModel(task=keras.Task.REGRESSION)
@@ -347,7 +348,7 @@ def model_gbt_preprocessing(x):
       return tf.concat([model_rf(x), x], axis=1)
     model_gbt_pred = model_gbt(model_gbt_preprocessing(inputs))
 
-    combined_model = tf.keras.models.Model(inputs, model_gbt_pred)
+    combined_model = tf_keras.models.Model(inputs, model_gbt_pred)
 
     # Train first model.
     model_rf.fit(dataset)
diff --git a/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter.py b/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter.py
index cfed099d..7497736b 100644
--- a/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter.py
+++ b/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter.py
@@ -27,10 +27,12 @@
 from sklearn import tree
 import tensorflow as tf
 import tensorflow_decision_forests as tfdf
+import tf_keras
 
 
 class TaskType(enum.Enum):
   """The type of task that a scikit-learn model performs."""
+
   UNKNOWN = 1
   SCALAR_REGRESSION = 2
   SINGLE_LABEL_CLASSIFICATION = 3
@@ -43,7 +45,7 @@ class TaskType(enum.Enum):
 def convert(
     sklearn_model: ScikitLearnModel,
     intermediate_write_path: Optional[os.PathLike] = None,
-) -> tf.keras.Model:
+) -> tf_keras.Model:
   """Converts a tree-based scikit-learn model to a tensorflow model.
 
   Currently supported models are:
@@ -66,8 +68,8 @@ def convert(
       process, a TFDF model is written to disk. If intermediate_write_path is
       specified, the TFDF model is written to this directory. Otherwise, a
       temporary directory is created that is immediately removed after this
-      function executes. Note that in order to save the converted model and
-      load it again later, this argument must be provided.
+      function executes. Note that in order to save the converted model and load
+      it again later, this argument must be provided.
 
   Returns:
     a keras Model that emulates the provided scikit-learn model.
@@ -87,30 +89,35 @@ def convert(
   # The resultant tfdf model only receives the features that are used
   # to split samples in nodes in the trees as input. But we want to pass the
   # full design matrix as an input to match the scikit-learn API, thus we
-  # create another tf.keras.Model with the desired call signature.
-  template_input = tf.keras.Input(shape=(sklearn_model.n_features_in_,))
+  # create another tf_keras.Model with the desired call signature.
+  template_input = tf_keras.Input(shape=(sklearn_model.n_features_in_,))
   # Extracts the indices of the features that are used by the TFDF model.
   # The features have names with the format "feature_<index-of-feature>".
-  feature_names = tfdf_model.signatures[
-      "serving_default"].structured_input_signature[1].keys()
+  feature_names = (
+      tfdf_model.signatures["serving_default"]
+      .structured_input_signature[1]
+      .keys()
+  )
   template_output = tfdf_model(
-      {i: template_input[:, int(i.split("_")[1])] for i in feature_names})
-  return tf.keras.Model(inputs=template_input, outputs=template_output)
+      {i: template_input[:, int(i.split("_")[1])] for i in feature_names}
+  )
+  return tf_keras.Model(inputs=template_input, outputs=template_output)
 
 
 @functools.singledispatch
 def _build_tfdf_model(
     sklearn_model: ScikitLearnModel,
     path: os.PathLike,
-) -> tf.keras.Model:
+) -> tf_keras.Model:
   """Builds a TFDF model from the given scikit-learn model."""
   raise NotImplementedError(
-      f"Can't build a TFDF model for {type(sklearn_model)}")
+      f"Can't build a TFDF model for {type(sklearn_model)}"
+  )
 
 
 @_build_tfdf_model.register(tree.DecisionTreeRegressor)
 @_build_tfdf_model.register(tree.ExtraTreeRegressor)
-def _(sklearn_model: ScikitLearnTree, path: os.PathLike) -> tf.keras.Model:
+def _(sklearn_model: ScikitLearnTree, path: os.PathLike) -> tf_keras.Model:
   """Converts a single scikit-learn regression tree to a TFDF model."""
   # The label argument is unused when the model is loaded, so we pass a
   # placeholder.
@@ -119,12 +126,12 @@ def _(sklearn_model: ScikitLearnTree, path: os.PathLike) -> tf.keras.Model:
   cart_builder = tfdf.builder.CARTBuilder(path=path, objective=objective)
   cart_builder.add_tree(pytree)
   cart_builder.close()
-  return tf.keras.models.load_model(path)
+  return tf_keras.models.load_model(path)
 
 
 @_build_tfdf_model.register(tree.DecisionTreeClassifier)
 @_build_tfdf_model.register(tree.ExtraTreeClassifier)
-def _(sklearn_model: ScikitLearnTree, path: os.PathLike) -> tf.keras.Model:
+def _(sklearn_model: ScikitLearnTree, path: os.PathLike) -> tf_keras.Model:
   """Converts a single scikit-learn classification tree to a TFDF model."""
   objective = tfdf.py_tree.objective.ClassificationObjective(
       label="label",
@@ -136,30 +143,34 @@ def _(sklearn_model: ScikitLearnTree, path: os.PathLike) -> tf.keras.Model:
   cart_builder = tfdf.builder.CARTBuilder(path=path, objective=objective)
   cart_builder.add_tree(pytree)
   cart_builder.close()
-  return tf.keras.models.load_model(path)
+  return tf_keras.models.load_model(path)
 
 
 @_build_tfdf_model.register(ensemble.ExtraTreesRegressor)
 @_build_tfdf_model.register(ensemble.RandomForestRegressor)
 def _(
-    sklearn_model: Union[ensemble.ExtraTreesRegressor, ensemble.RandomForestRegressor],
+    sklearn_model: Union[
+        ensemble.ExtraTreesRegressor, ensemble.RandomForestRegressor
+    ],
     path: os.PathLike,
-) -> tf.keras.Model:
+) -> tf_keras.Model:
   """Converts a forest regression model into a TFDF model."""
   objective = tfdf.py_tree.objective.RegressionObjective(label="label")
   rf_builder = tfdf.builder.RandomForestBuilder(path=path, objective=objective)
   for single_tree in sklearn_model.estimators_:
     rf_builder.add_tree(convert_sklearn_tree_to_tfdf_pytree(single_tree))
   rf_builder.close()
-  return tf.keras.models.load_model(path)
+  return tf_keras.models.load_model(path)
 
 
 @_build_tfdf_model.register(ensemble.ExtraTreesClassifier)
 @_build_tfdf_model.register(ensemble.RandomForestClassifier)
 def _(
-    sklearn_model: Union[ensemble.ExtraTreesClassifier, ensemble.RandomForestClassifier],
+    sklearn_model: Union[
+        ensemble.ExtraTreesClassifier, ensemble.RandomForestClassifier
+    ],
     path: os.PathLike,
-) -> tf.keras.Model:
+) -> tf_keras.Model:
   """Converts a forest classification model into a TFDF model."""
   objective = tfdf.py_tree.objective.ClassificationObjective(
       label="label",
@@ -169,14 +180,14 @@ def _(
   for single_tree in sklearn_model.estimators_:
     rf_builder.add_tree(convert_sklearn_tree_to_tfdf_pytree(single_tree))
   rf_builder.close()
-  return tf.keras.models.load_model(path)
+  return tf_keras.models.load_model(path)
 
 
 @_build_tfdf_model.register(ensemble.GradientBoostingRegressor)
 def _(
     sklearn_model: ensemble.GradientBoostingRegressor,
     path: os.PathLike,
-) -> tf.keras.Model:
+) -> tf_keras.Model:
   """Converts a gradient boosting regression model into a TFDF model."""
   if isinstance(sklearn_model.init_, dummy.DummyRegressor):
     # If the initial estimator is a DummyRegressor, then it predicts a constant
@@ -194,9 +205,11 @@ def _(
     init_pytree = None
     bias = 0.0
   else:
-    raise ValueError("The initial estimator must be either a DummyRegressor"
-                     "or a DecisionTreeRegressor, but got"
-                     f"{type(sklearn_model.init_)}.")
+    raise ValueError(
+        "The initial estimator must be either a DummyRegressor"
+        "or a DecisionTreeRegressor, but got"
+        f"{type(sklearn_model.init_)}."
+    )
 
   gbt_builder = tfdf.builder.GradientBoostedTreeBuilder(
       path=path,
@@ -207,12 +220,14 @@ def _(
     gbt_builder.add_tree(init_pytree)
 
   for weak_learner in sklearn_model.estimators_.ravel():
-    gbt_builder.add_tree(convert_sklearn_tree_to_tfdf_pytree(
-        weak_learner,
-        weight=sklearn_model.learning_rate,
-    ))
+    gbt_builder.add_tree(
+        convert_sklearn_tree_to_tfdf_pytree(
+            weak_learner,
+            weight=sklearn_model.learning_rate,
+        )
+    )
   gbt_builder.close()
-  return tf.keras.models.load_model(path)
+  return tf_keras.models.load_model(path)
 
 
 def convert_sklearn_tree_to_tfdf_pytree(
@@ -233,7 +248,8 @@ def convert_sklearn_tree_to_tfdf_pytree(
     sklearn_tree_data = sklearn_tree.tree_.__getstate__()
   except AttributeError as e:
     raise ValueError(
-        "Scikit-Learn model must be fit to data before converting.") from e
+        "Scikit-Learn model must be fit to data before converting."
+    ) from e
 
   field_names = sklearn_tree_data["nodes"].dtype.names
   task_type = _get_sklearn_tree_task_type(sklearn_tree)
@@ -251,8 +267,9 @@ def convert_sklearn_tree_to_tfdf_pytree(
     }
     if task_type is TaskType.SCALAR_REGRESSION:
       scaling_factor = weight if weight else 1.0
-      node["value"] = tfdf.py_tree.value.RegressionValue(target_value[0][0] *
-                                                         scaling_factor)
+      node["value"] = tfdf.py_tree.value.RegressionValue(
+          target_value[0][0] * scaling_factor
+      )
     elif task_type is TaskType.SINGLE_LABEL_CLASSIFICATION:
       # Normalise to probabilities if we have a classification tree.
       probabilities = list(target_value[0] / target_value[0].sum())
@@ -260,7 +277,8 @@ def convert_sklearn_tree_to_tfdf_pytree(
     else:
       raise ValueError(
           "Only scalar regression and single-label classification are "
-          "supported.")
+          "supported."
+      )
     nodes.append(node)
 
   root_node = _convert_sklearn_node_to_tfdf_node(
diff --git a/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter_test.py b/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter_test.py
index 890b8c20..b0e21f6c 100644
--- a/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter_test.py
+++ b/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter_test.py
@@ -23,6 +23,7 @@
 from sklearn import linear_model
 from sklearn import tree
 import tensorflow as tf
+import tf_keras
 
 from tensorflow_decision_forests.contrib import scikit_learn_model_converter
 
@@ -34,12 +35,18 @@ class ScikitLearnModelConverterTest(tf.test.TestCase, parameterized.TestCase):
       (tree.ExtraTreeRegressor(random_state=42),),
       (ensemble.RandomForestRegressor(random_state=42),),
       (ensemble.ExtraTreesRegressor(random_state=42),),
-      (ensemble.GradientBoostingRegressor(random_state=42,),),
+      (
+          ensemble.GradientBoostingRegressor(
+              random_state=42,
+          ),
+      ),
       (ensemble.GradientBoostingRegressor(random_state=42, init="zero"),),
-      (ensemble.GradientBoostingRegressor(
-          random_state=42,
-          init=tree.DecisionTreeRegressor(random_state=42),
-      ),),
+      (
+          ensemble.GradientBoostingRegressor(
+              random_state=42,
+              init=tree.DecisionTreeRegressor(random_state=42),
+          ),
+      ),
   )
   def test_convert_reproduces_regression_model(
       self,
@@ -69,10 +76,12 @@ def test_convert_reproduces_regression_model(
       loaded_tf_tree = tf.saved_model.load(path)
       self.assertAllEqual(tf_tree(tf_features), loaded_tf_tree(tf_features))
 
-  @parameterized.parameters((tree.DecisionTreeClassifier(random_state=42),),
-                            (tree.ExtraTreeClassifier(random_state=42),),
-                            (ensemble.RandomForestClassifier(random_state=42),),
-                            (ensemble.ExtraTreesClassifier(random_state=42),))
+  @parameterized.parameters(
+      (tree.DecisionTreeClassifier(random_state=42),),
+      (tree.ExtraTreeClassifier(random_state=42),),
+      (ensemble.RandomForestClassifier(random_state=42),),
+      (ensemble.ExtraTreesClassifier(random_state=42),),
+  )
   def test_convert_reproduces_classification_model(
       self,
       sklearn_tree,
@@ -165,11 +174,12 @@ def test_convert_uses_intermediate_model_path_if_provided(self):
         intermediate_write_path=write_path,
     )
     # We should be able to load the intermediate TFDF model from the given path.
-    tfdf_tree = tf.keras.models.load_model(write_path)
-    self.assertIsInstance(tfdf_tree, tf.keras.Model)
+    tfdf_tree = tf_keras.models.load_model(write_path)
+    self.assertIsInstance(tfdf_tree, tf_keras.Model)
 
   def test_convert_sklearn_tree_to_tfdf_pytree_raises_if_weight_provided_for_classification_tree(
-      self):
+      self,
+  ):
     features, labels = datasets.make_classification(random_state=42)
     sklearn_tree = tree.DecisionTreeClassifier(random_state=42).fit(
         features,
@@ -185,7 +195,8 @@ def test_convert_sklearn_tree_to_tfdf_pytree_raises_if_weight_provided_for_class
       )
 
   def test_convert_raises_when_gbt_initial_estimator_is_not_tree_or_constant(
-      self):
+      self,
+  ):
     features, labels = datasets.make_regression(
         n_samples=100,
         n_features=10,
diff --git a/tensorflow_decision_forests/contrib/training_preprocessing/training_preprocessing_test.py b/tensorflow_decision_forests/contrib/training_preprocessing/training_preprocessing_test.py
index 51f0ca12..318d98bf 100644
--- a/tensorflow_decision_forests/contrib/training_preprocessing/training_preprocessing_test.py
+++ b/tensorflow_decision_forests/contrib/training_preprocessing/training_preprocessing_test.py
@@ -21,6 +21,7 @@
 import numpy as np
 import tensorflow as tf
 import tensorflow_decision_forests as tfdf
+import tf_keras
 
 from tensorflow_decision_forests.contrib.training_preprocessing import training_preprocessing
 
@@ -231,7 +232,7 @@ def make_dataset(num_examples):
     model.save(saved_model_path)
 
     logging.info('Loading model from %s', saved_model_path)
-    loaded_model = tf.keras.models.load_model(saved_model_path)
+    loaded_model = tf_keras.models.load_model(saved_model_path)
     loaded_model.summary()
 
     # Check exported / imported model predictions
diff --git a/tensorflow_decision_forests/keras/BUILD b/tensorflow_decision_forests/keras/BUILD
index 85dd2a63..613b3c2a 100644
--- a/tensorflow_decision_forests/keras/BUILD
+++ b/tensorflow_decision_forests/keras/BUILD
@@ -12,9 +12,7 @@ package(
 
 cc_library(
     name = "learners",
-    deps = [
-        "//tensorflow_decision_forests/tensorflow:canonical_learners",
-    ],
+    deps = ["//tensorflow_decision_forests/tensorflow:canonical_learners"],
     alwayslink = 1,
 )
 
@@ -57,6 +55,7 @@ py_library(
     deps = [
         ":core",
         # TensorFlow Python,
+        # tf_keras dep,
         "//tensorflow_decision_forests/component/tuner",
         "@ydf//yggdrasil_decision_forests/learner:abstract_learner_py_proto",
         "@ydf//yggdrasil_decision_forests/model:abstract_model_py_proto",
@@ -67,8 +66,14 @@ py_library(
     name = "core_inference",
     srcs = ["core_inference.py"],
     srcs_version = "PY3",
+    tags = [
+        # Certain deps are not provided on purpose, so this cannot be a python_strict_library.
+        "ignore_for_dep=third_party.tensorflow.python.framework.type_spec_registry",
+    ],
     deps = [
+        ":keras_internal",
         # TensorFlow Python,
+        # tf_keras dep,
         # TensorFlow /distribute:input_lib,
         "//tensorflow_decision_forests/component/inspector",
         "//tensorflow_decision_forests/component/tuner",
@@ -88,7 +93,9 @@ py_library(
     srcs_version = "PY3",
     deps = [
         ":core_inference",
+        ":keras_internal",
         # TensorFlow Python,
+        # tf_keras dep,
         # TensorFlow /data/ops:dataset_ops,
         "//tensorflow_decision_forests/component/inspector",
         "//tensorflow_decision_forests/component/tuner",
@@ -105,6 +112,21 @@ py_library(
     ],
 )
 
+py_library(
+    name = "keras_internal",
+    srcs = ["keras_internal.py"],
+    srcs_version = "PY3",
+    deps = [
+        # tf_keras dep,
+        # tf_keras/engine dep,
+        # tf_keras/engine:data_adapter dep,
+        # tf_keras/engine:input_layer dep,
+        # tf_keras/feature_column:dense_features_v2 dep,
+        ## tf_keras/layers dep,
+        # tf_keras/utils:dataset_creator dep,
+    ],
+)
+
 # Tests
 # =====
 
@@ -123,6 +145,7 @@ py_test(
     deps = [
         ":core",
         ":keras",
+        ":keras_internal",
         "@com_google_protobuf//:python_srcs",
         # absl/flags dep,
         # absl/logging dep,
@@ -130,6 +153,7 @@ py_test(
         # numpy dep,
         # pandas dep,
         # TensorFlow Python,
+        # tf_keras dep,
         "//tensorflow_decision_forests/component/inspector",
         "//tensorflow_decision_forests/component/model_plotter",
         "//tensorflow_decision_forests/tensorflow:core",
@@ -159,6 +183,7 @@ py_test(
         # pandas dep,
         # TensorFlow Python,
         "//tensorflow_decision_forests",
+        # tf_keras dep,
     ],
 )
 
@@ -182,6 +207,7 @@ py_test(
         "manual",
     ],
     deps = [
+        ":keras_internal",
         # absl/flags dep,
         # absl/logging dep,
         # absl/testing:parameterized dep,
@@ -189,6 +215,7 @@ py_test(
         # pandas dep,
         # portpicker dep,
         # TensorFlow Python,
+        # tf_keras dep,
         # TensorFlow /distribute:distribute_lib,
         "//tensorflow_decision_forests",
         "@ydf//yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees:dgbt_py_proto",
@@ -208,6 +235,7 @@ py_binary(
         # numpy dep,
         # pandas dep,
         # TensorFlow Python,
+        # tf_keras dep,
         "//tensorflow_decision_forests/tensorflow/ops/inference:op_py",
     ],
 )
@@ -229,3 +257,13 @@ tf_cc_binary(
         "@ydf//yggdrasil_decision_forests/utils/distribute/implementations/grpc:grpc_worker_lib_with_main",
     ],
 )
+
+py_test(
+    name = "keras_internal_test",
+    srcs = ["keras_internal_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":keras_internal",
+        # TensorFlow Python,
+    ],
+)
diff --git a/tensorflow_decision_forests/keras/__init__.py b/tensorflow_decision_forests/keras/__init__.py
index 7da378cc..b348b87a 100644
--- a/tensorflow_decision_forests/keras/__init__.py
+++ b/tensorflow_decision_forests/keras/__init__.py
@@ -44,7 +44,7 @@
 # ...
 
 # Load a model: it loads as a generic keras model.
-loaded_model = tf.keras.models.load_model("/path/to/my/model")
+loaded_model = tf_keras.models.load_model("/path/to/my/model")
 ```
 """
 
diff --git a/tensorflow_decision_forests/keras/core.py b/tensorflow_decision_forests/keras/core.py
index f11739bb..c11f3246 100644
--- a/tensorflow_decision_forests/keras/core.py
+++ b/tensorflow_decision_forests/keras/core.py
@@ -54,6 +54,8 @@
 from typing import Optional, List, Dict, Any, Tuple, NamedTuple, Set, Union, Literal
 
 import tensorflow as tf
+import tf_keras
+
 
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import load_op
@@ -70,20 +72,8 @@
 from yggdrasil_decision_forests.learner import abstract_learner_pb2
 from yggdrasil_decision_forests.model import abstract_model_pb2  # pylint: disable=unused-import
 from yggdrasil_decision_forests.utils.distribute.implementations.grpc import grpc_pb2  # pylint: disable=unused-import
+from tensorflow_decision_forests.keras import keras_internal
 
-try:
-  # tf>1.12
-  import keras.src.engine.data_adapter as data_adapter
-except ImportError:
-  # tf<=1.12
-  import keras.engine.data_adapter as data_adapter
-get_data_handler = data_adapter.get_data_handler
-
-layers = tf.keras.layers
-models = tf.keras.models
-optimizers = tf.keras.optimizers
-losses = tf.keras.losses
-backend = tf.keras.backend
 
 no_automatic_dependency_tracking = (
     tf1_compatibility.no_automatic_dependency_tracking
@@ -385,7 +375,7 @@ class CoreModel(InferenceCoreModel):
   # ...
 
   # Load a model: it loads as a generic keras model.
-  model = tf.keras.models.load_model("/tmp/my_saved_model")
+  model = tf_keras.models.load_model("/tmp/my_saved_model")
   ```
 
   The training logs (e.g. feature statistics, validation loss, remaining
@@ -513,9 +503,9 @@ def __init__(
       learner_params: Optional[HyperParameters] = None,
       features: Optional[List[FeatureUsage]] = None,
       exclude_non_specified_features: Optional[bool] = False,
-      preprocessing: Optional["models.Functional"] = None,
-      postprocessing: Optional["models.Functional"] = None,
-      training_preprocessing: Optional["models.Functional"] = None,
+      preprocessing: Optional[keras_internal.Functional] = None,
+      postprocessing: Optional[keras_internal.Functional] = None,
+      training_preprocessing: Optional[keras_internal.Functional] = None,
       ranking_group: Optional[str] = None,
       uplift_treatment: Optional[str] = None,
       temp_directory: Optional[str] = None,
@@ -652,7 +642,7 @@ def load_weights(self, *args, **kwargs):  # pylint: disable=useless-super-delega
 
     `load_weights` is not supported by TensorFlow Decision Forests models.
     To save and restore a model, use the SavedModel API i.e.
-    `model.save(...)` and `tf.keras.models.load_model(...)`. To resume the
+    `model.save(...)` and `tf_keras.models.load_model(...)`. To resume the
     training of an existing model, create the model with
     `try_resume_training=True` (default value) and with a similar
     `temp_directory` argument. See documentation of `try_resume_training`
@@ -1118,7 +1108,7 @@ def fit(
       steps_per_epoch: Optional[Any] = None,
       class_weight: Optional[Any] = None,
       **kwargs,
-  ) -> tf.keras.callbacks.History:
+  ) -> tf_keras.callbacks.History:
     """Trains the model.
 
     Local training
@@ -1406,7 +1396,7 @@ def _fit_implementation(
       validation_steps,
       steps_per_epoch,
       class_weight,
-  ) -> tf.keras.callbacks.History:  # pylint: disable=g-doc-args,g-doc-return-or-yield
+  ) -> tf_keras.callbacks.History:  # pylint: disable=g-doc-args,g-doc-return-or-yield
     """Train the model.
 
     This method performs operations that resembles as the Keras' fit function.
@@ -1426,15 +1416,15 @@ def _fit_implementation(
     """
 
     # Create the callback manager
-    if not isinstance(callbacks, tf.keras.callbacks.CallbackList):
-      callbacks = tf.keras.callbacks.CallbackList(
+    if not isinstance(callbacks, tf_keras.callbacks.CallbackList):
+      callbacks = tf_keras.callbacks.CallbackList(
           callbacks, model=self, add_history=False
       )
 
     # Manages the history manually.
     # Note: The both the History and CallbackList object will override the
     # "model.history" field.
-    history = tf.keras.callbacks.History()
+    history = tf_keras.callbacks.History()
     history.model = self
     history.on_train_begin()
     history.on_epoch_begin(0)
@@ -1453,7 +1443,7 @@ def _fit_implementation(
     # training).
     validation_data_handler = None
     if validation_data:
-      val_x, val_y, val_sample_weight = tf.keras.utils.unpack_x_y_sample_weight(
+      val_x, val_y, val_sample_weight = keras_internal.unpack_x_y_sample_weight(
           validation_data
       )
 
@@ -1464,7 +1454,7 @@ def _fit_implementation(
         # seems to cause some issues.
 
         # Create data_handler for evaluation and cache it.
-        validation_data_handler = get_data_handler(
+        validation_data_handler = keras_internal.get_data_handler(
             x=val_x,
             y=val_y,
             sample_weight=val_sample_weight,
@@ -1484,7 +1474,7 @@ def _fit_implementation(
       # Wraps the input training dataset into a tf.data.Dataset like object.
       # This is a noop if the training dataset is already provided as a
       # tf.data.Dataset.
-      data_handler = get_data_handler(
+      data_handler = keras_internal.get_data_handler(
           x=x,
           y=y,
           sample_weight=sample_weight,
@@ -1990,7 +1980,7 @@ def fit_on_dataset_path(
 
   def _add_training_logs_to_history(
       self,
-      history: tf.keras.callbacks.History,
+      history: tf_keras.callbacks.History,
       inspector: Optional[inspector_lib.AbstractInspector] = None,
   ) -> Optional[Dict[str, Any]]:
     if inspector is None:
@@ -2010,8 +2000,8 @@ def _add_training_logs_to_history(
 
   def _training_logs_to_history(
       self, inspector: Optional[inspector_lib.AbstractInspector] = None
-  ) -> tf.keras.callbacks.History:
-    history = tf.keras.callbacks.History()
+  ) -> tf_keras.callbacks.History:
+    history = tf_keras.callbacks.History()
     history.model = self
     history.on_train_begin()
     history.on_epoch_begin(0)
diff --git a/tensorflow_decision_forests/keras/core_inference.py b/tensorflow_decision_forests/keras/core_inference.py
index 5f39a46a..7205d794 100644
--- a/tensorflow_decision_forests/keras/core_inference.py
+++ b/tensorflow_decision_forests/keras/core_inference.py
@@ -22,11 +22,13 @@
 from functools import partial  # pylint: disable=g-importing-member
 import os
 import tempfile
-from typing import Optional, List, Dict, Any, Union, NamedTuple
+from typing import Any, Dict, List, NamedTuple, Optional, Union
 import uuid
 import zipfile
 
 import tensorflow as tf
+import tf_keras
+from tensorflow_decision_forests.keras import keras_internal
 
 from tensorflow.python.distribute import input_lib
 from tensorflow_decision_forests.component.inspector import inspector as inspector_lib
@@ -39,11 +41,6 @@
 from yggdrasil_decision_forests.model import abstract_model_pb2  # pylint: disable=unused-import
 from yggdrasil_decision_forests.utils.distribute.implementations.grpc import grpc_pb2  # pylint: disable=unused-import
 
-layers = tf.keras.layers
-models = tf.keras.models
-optimizers = tf.keras.optimizers
-losses = tf.keras.losses
-backend = tf.keras.backend
 
 # The length of a model identifier
 MODEL_IDENTIFIER_LENGTH = 16
@@ -225,7 +222,7 @@ def prediction_key(self) -> str:
     return self.output if self.output else self.label
 
 
-class InferenceCoreModel(models.Model):
+class InferenceCoreModel(tf_keras.models.Model):
   """Keras Model V2 wrapper around an Yggdrasil Model.
 
   See "CoreModel" in "core.py" for the definition of the arguments.
@@ -238,8 +235,8 @@ def __init__(
       verbose: int = 1,
       advanced_arguments: Optional[AdvancedArguments] = None,
       name: Optional[str] = None,
-      preprocessing: Optional["models.Functional"] = None,
-      postprocessing: Optional["models.Functional"] = None,
+      preprocessing: Optional[keras_internal.Functional] = None,
+      postprocessing: Optional[keras_internal.Functional] = None,
       uplift_treatment: Optional[str] = None,
       temp_directory: Optional[str] = None,
       multitask: Optional[List[MultiTaskItem]] = None,
@@ -464,9 +461,8 @@ def make_predict_function(self):  # pytype: disable=signature-mismatch  # overri
     @tf.function(reduce_retracing=True)
     def predict_function_not_trained(iterator):
       """Prediction of a non-trained model. Returns "zeros"."""
-
       data = next(iterator)
-      x, _, _ = tf.keras.utils.unpack_x_y_sample_weight(data)
+      x, _, _ = keras_internal.unpack_x_y_sample_weight(data)
       batch_size = _batch_size(x)
       return tf.zeros([batch_size, 1])
 
@@ -898,7 +894,7 @@ def _extract_sample(self, x):
         # Extract the example here (instead of inside of "predict") to make
         # sure this operation is done on the chief.
         for row in dataset.take(1):
-          x, _, _ = tf.keras.utils.unpack_x_y_sample_weight(row)
+          x, _, _ = keras_internal.unpack_x_y_sample_weight(row)
           return x
       except Exception:  # pylint: disable=broad-except
         pass
@@ -1282,7 +1278,7 @@ def yggdrasil_model_to_keras_model(
       input_model_signature_fn=input_model_signature_fn,
   )
 
-  tf.keras.models.save_model(model, dst_path)
+  tf_keras.models.save_model(model, dst_path)
   return
 
 
diff --git a/tensorflow_decision_forests/keras/keras_distributed_test.py b/tensorflow_decision_forests/keras/keras_distributed_test.py
index 3c170c55..e123aaa1 100644
--- a/tensorflow_decision_forests/keras/keras_distributed_test.py
+++ b/tensorflow_decision_forests/keras/keras_distributed_test.py
@@ -32,6 +32,7 @@
 
 from tensorflow.python.distribute import distribute_lib
 import tensorflow_decision_forests as tfdf
+from tensorflow_decision_forests.keras import keras_internal
 from yggdrasil_decision_forests.learner.distributed_gradient_boosted_trees import distributed_gradient_boosted_trees_pb2
 
 
@@ -183,7 +184,7 @@ def dataset_fn(
     with strategy.scope():
       model = tfdf.keras.DistributedGradientBoostedTreesModel(worker_logs=False)
       model.compile(metrics=["accuracy"])
-      # Note: "tf.keras.utils.experimental.DatasetCreator" seems to also work.
+      # Note: "tf_keras.utils.experimental.DatasetCreator" seems to also work.
       train_dataset_creator = strategy.distribute_datasets_from_function(
           lambda context: dataset_fn(context, seed=111)
       )
@@ -249,7 +250,7 @@ def dataset_fn(input_context):
       dataset = dataset.prefetch(2)
       return dataset
 
-    dataset_creator = tf.keras.utils.experimental.DatasetCreator(dataset_fn)
+    dc = keras_internal.DatasetCreator(dataset_fn)
 
     cluster_resolver = _create_in_process_tf_ps_cluster(num_workers=2, num_ps=1)
 
@@ -262,9 +263,7 @@ def dataset_fn(input_context):
     with self.assertRaisesRegex(
         ValueError, "does not support training with a TF Distribution strategy"
     ):
-      model.fit(
-          dataset_creator, steps_per_epoch=num_examples // global_batch_size
-      )
+      model.fit(dc, steps_per_epoch=num_examples // global_batch_size)
 
   def _shard_dataset(self, path, num_shards=20) -> List[str]:
     """Splits a csv dataset into multiple csv files."""
diff --git a/tensorflow_decision_forests/keras/keras_internal.py b/tensorflow_decision_forests/keras/keras_internal.py
new file mode 100644
index 00000000..e28127f9
--- /dev/null
+++ b/tensorflow_decision_forests/keras/keras_internal.py
@@ -0,0 +1,24 @@
+# Copyright 2021 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Access to Keras function with a different internal and external path."""
+
+from tf_keras.src.engine import data_adapter as _data_adapter
+from tf_keras.src.models import Functional
+from tf_keras.layers import DenseFeatures
+from tf_keras.src.utils.dataset_creator import DatasetCreator
+
+
+unpack_x_y_sample_weight = _data_adapter.unpack_x_y_sample_weight
+get_data_handler = _data_adapter.get_data_handler
diff --git a/tensorflow_decision_forests/keras/keras_internal_test.py b/tensorflow_decision_forests/keras/keras_internal_test.py
new file mode 100644
index 00000000..72221c98
--- /dev/null
+++ b/tensorflow_decision_forests/keras/keras_internal_test.py
@@ -0,0 +1,22 @@
+# Copyright 2021 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+from tensorflow_decision_forests.keras import keras_internal
+
+
+# Does nothing. Ensures keras_internal can be loaded.
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow_decision_forests/keras/keras_test.py b/tensorflow_decision_forests/keras/keras_test.py
index 721438f4..97728417 100644
--- a/tensorflow_decision_forests/keras/keras_test.py
+++ b/tensorflow_decision_forests/keras/keras_test.py
@@ -31,6 +31,7 @@
 import numpy as np
 import pandas as pd
 import tensorflow as tf
+import tf_keras
 
 from google.protobuf import text_format
 
@@ -38,6 +39,7 @@
 from tensorflow_decision_forests.component.inspector import inspector as inspector_lib
 from tensorflow_decision_forests.component.model_plotter import model_plotter
 from tensorflow_decision_forests.keras import core
+from tensorflow_decision_forests.keras import keras_internal
 from tensorflow_decision_forests.tensorflow import core as tf_core
 from yggdrasil_decision_forests.dataset import synthetic_dataset_pb2
 from yggdrasil_decision_forests.learner import abstract_learner_pb2
@@ -45,13 +47,10 @@
 from yggdrasil_decision_forests.learner.random_forest import random_forest_pb2
 from yggdrasil_decision_forests.model import abstract_model_pb2
 
-layers = tf.keras.layers
-models = tf.keras.models
-optimizers = tf.keras.optimizers
-callbacks = tf.keras.callbacks
-Normalization = layers.experimental.preprocessing.Normalization
-CategoryEncoding = layers.experimental.preprocessing.CategoryEncoding
-StringLookup = layers.experimental.preprocessing.StringLookup
+
+Normalization = tf_keras.layers.Normalization
+CategoryEncoding = tf_keras.layers.CategoryEncoding
+StringLookup = tf_keras.layers.StringLookup
 
 Dataset = collections.namedtuple(
     "Dataset", ["train", "test", "semantics", "label", "num_classes"]
@@ -264,7 +263,7 @@ def build_preprocessing(dataset: Dataset) -> Tuple[List[Any], List[Any]]:
       normalizer = Normalization(axis=None)
       normalizer.adapt(raw_input_values)
 
-      raw_input = layers.Input(shape=(1,), name=key)
+      raw_input = tf_keras.layers.Input(shape=(1,), name=key)
       processed_input = normalizer(raw_input)
 
       raw_inputs.append(raw_input)
@@ -273,8 +272,8 @@ def build_preprocessing(dataset: Dataset) -> Tuple[List[Any], List[Any]]:
     elif semantic == keras.FeatureSemantic.CATEGORICAL:
       if raw_input_values.dtype in [np.int64]:
         # Integer
-        raw_input = layers.Input(shape=(1,), name=key, dtype="int64")
-        raw_input = layers.minimum([raw_input, 5])
+        raw_input = tf_keras.layers.Input(shape=(1,), name=key, dtype="int64")
+        raw_input = tf_keras.layers.minimum([raw_input, 5])
         onehot = CategoryEncoding(
             num_tokens=np.minimum(raw_input_values, 5), output_mode="binary"
         )
@@ -282,7 +281,7 @@ def build_preprocessing(dataset: Dataset) -> Tuple[List[Any], List[Any]]:
 
       else:
         # String
-        raw_input = layers.Input(shape=(1,), name=key, dtype="string")
+        raw_input = tf_keras.layers.Input(shape=(1,), name=key, dtype="string")
 
         lookup = StringLookup(max_tokens=5, output_mode="binary")
         lookup.adapt(raw_input_values)
@@ -363,7 +362,9 @@ class Signature(enum.Enum):
   ANY_FEATURE_COLUMN = 9
 
 
-def build_model(signature: Signature, dataset: Dataset, **args) -> models.Model:
+def build_model(
+    signature: Signature, dataset: Dataset, **args
+) -> tf_keras.models.Model:
   """Builds a model with the different supported signatures.
 
   Setting nn_baseline=True creates a NN keras model instead. This is useful to
@@ -391,25 +392,33 @@ def build_model(signature: Signature, dataset: Dataset, **args) -> models.Model:
 
   elif signature == Signature.DENSE_PREPROCESSING:
     raw_inputs, processed_inputs = build_preprocessing(dataset)
-    processed_inputs = layers.Concatenate()(processed_inputs)
-    preprocessing = models.Model(inputs=raw_inputs, outputs=processed_inputs)
+    processed_inputs = tf_keras.layers.Concatenate()(processed_inputs)
+    preprocessing = tf_keras.models.Model(
+        inputs=raw_inputs, outputs=processed_inputs
+    )
     model = keras.RandomForestModel(preprocessing=preprocessing, **args)
 
   elif signature == Signature.STRUCTURED_DICTIONARY_PREPROCESSING:
     raw_inputs, processed_inputs = build_preprocessing(dataset)
     processed_inputs = {value.name: value for value in processed_inputs}
-    preprocessing = models.Model(inputs=raw_inputs, outputs=processed_inputs)
+    preprocessing = tf_keras.models.Model(
+        inputs=raw_inputs, outputs=processed_inputs
+    )
     model = keras.RandomForestModel(preprocessing=preprocessing, **args)
 
   elif signature == Signature.STRUCTURED_LIST_PREPROCESSING:
     raw_inputs, processed_inputs = build_preprocessing(dataset)
-    preprocessing = models.Model(inputs=raw_inputs, outputs=processed_inputs)
+    preprocessing = tf_keras.models.Model(
+        inputs=raw_inputs, outputs=processed_inputs
+    )
     model = keras.RandomForestModel(preprocessing=preprocessing, **args)
 
   elif signature == Signature.STRUCTURED_PREPROCESSING_WITH_SEMANTIC:
     raw_inputs, processed_inputs = build_preprocessing(dataset)
     processed_inputs = {value.name: value for value in processed_inputs}
-    preprocessing = models.Model(inputs=raw_inputs, outputs=processed_inputs)
+    preprocessing = tf_keras.models.Model(
+        inputs=raw_inputs, outputs=processed_inputs
+    )
     features = []
     for key in processed_inputs.keys():
       features.append(keras.FeatureUsage(key))
@@ -419,12 +428,12 @@ def build_model(signature: Signature, dataset: Dataset, **args) -> models.Model:
 
   elif signature == Signature.DENSE_FEATURE_COLUMN:
     feature_columns = build_feature_columns(dataset, dense=True)
-    preprocessing = layers.DenseFeatures(feature_columns)
+    preprocessing = keras_internal.DenseFeatures(feature_columns)
     model = keras.RandomForestModel(preprocessing=preprocessing, **args)
 
   elif signature == Signature.ANY_FEATURE_COLUMN:
     feature_columns = build_feature_columns(dataset, dense=False)
-    preprocessing = layers.DenseFeatures(feature_columns)
+    preprocessing = keras_internal.DenseFeatures(feature_columns)
     model = keras.RandomForestModel(preprocessing=preprocessing, **args)
 
   else:
@@ -489,7 +498,7 @@ def _check_adult_model(
     logging.info("Predictions: %s", predictions)
 
     if check_serialization:
-      tf.keras.backend.clear_session()
+      tf_keras.backend.clear_session()
 
       # Export the trained model.
       saved_model_path = os.path.join(self.get_temp_dir(), "saved_model")
@@ -499,7 +508,7 @@ def _check_adult_model(
       logging.info("Saving model to %s", saved_model_path)
       model.save(saved_model_path)
 
-      tf.keras.backend.clear_session()
+      tf_keras.backend.clear_session()
 
       logging.info("Run model in separate binary")
       process = subprocess.Popen(
@@ -529,7 +538,7 @@ def _check_adult_model(
 
       # Load and evaluate the exported trained model.
       logging.info("Loading model from %s", new_saved_model_path)
-      loaded_model = models.load_model(new_saved_model_path)
+      loaded_model = tf_keras.models.load_model(new_saved_model_path)
       loaded_model.summary()
 
       evaluation = loaded_model.evaluate(tf_test)
@@ -755,7 +764,7 @@ def test_save_model_without_evaluation(self):
 
     # Load and evaluate the exported trained model.
     logging.info("Loading model from %s", saved_model_path)
-    loaded_model = models.load_model(saved_model_path)
+    loaded_model = tf_keras.models.load_model(saved_model_path)
     loaded_model.summary()
 
     loaded_model.compile(metrics=["accuracy"])
@@ -816,7 +825,7 @@ def test_model_adult_dense_feature_columns(self):
   def test_model_adult_dense_nparray(self):
     dataset = adult_dataset()
     feature_columns = build_feature_columns(dataset, dense=True)
-    dense_features = layers.DenseFeatures(feature_columns)
+    dense_features = keras_internal.DenseFeatures(feature_columns)
 
     train_x = dense_features(dict(dataset.train)).numpy()
     train_y = dataset.train[dataset.label].values
@@ -848,7 +857,7 @@ def test_model_adult_dense_nparray(self):
   def test_model_adult_dense_tfdataset(self):
     dataset = adult_dataset()
     feature_columns = build_feature_columns(dataset, dense=True)
-    dense_features = layers.DenseFeatures(feature_columns)
+    dense_features = keras_internal.DenseFeatures(feature_columns)
 
     train_x = dense_features(dict(dataset.train))
     train_y = dataset.train[dataset.label].values
@@ -1168,7 +1177,7 @@ def preprocess(feature_values, label):
     else:
       raise ValueError("Non initialized model")
 
-    class _TestEvalCallback(tf.keras.callbacks.Callback):
+    class _TestEvalCallback(tf_keras.callbacks.Callback):
 
       def on_train_end(self, logs=None):
         self.evaluation = model.evaluate(test_dataset)
@@ -1305,15 +1314,15 @@ def test_model_adult_df_on_top_of_nn(self):
     # Note: The following code does not work with the "models.Sequential" API
     # (Nov.17, 2020).
     raw_inputs, preprocessed_inputs = build_preprocessing(dataset)
-    z1 = layers.Concatenate()(preprocessed_inputs)
-    z2 = layers.Dense(16, activation=tf.nn.relu6)(z1)
-    z3 = layers.Dense(16, activation=tf.nn.relu, name="last")(z2)
-    y = layers.Dense(1)(z3)
-    nn_model = models.Model(raw_inputs, y)
+    z1 = tf_keras.layers.Concatenate()(preprocessed_inputs)
+    z2 = tf_keras.layers.Dense(16, activation=tf.nn.relu6)(z1)
+    z3 = tf_keras.layers.Dense(16, activation=tf.nn.relu, name="last")(z2)
+    y = tf_keras.layers.Dense(1)(z3)
+    nn_model = tf_keras.models.Model(raw_inputs, y)
 
     nn_model.compile(
-        optimizer=optimizers.Adam(),
-        loss=tf.keras.losses.BinaryCrossentropy(),
+        optimizer=tf_keras.optimizers.Adam(),
+        loss=tf_keras.losses.BinaryCrossentropy(),
         metrics=["accuracy"],
     )
 
@@ -1322,7 +1331,7 @@ def test_model_adult_df_on_top_of_nn(self):
     nn_model.summary()
 
     # Build a DF on top of the NN
-    nn_without_head = models.Model(
+    nn_without_head = tf_keras.models.Model(
         inputs=nn_model.inputs, outputs=nn_model.get_layer("last").output
     )
     df_model = keras.RandomForestModel(preprocessing=nn_without_head)
@@ -1692,7 +1701,7 @@ def test_override_save(self):
     model_2.fit(keras.pd_dataframe_to_tf_dataset(dataset_2, label="label"))
     model_2.save(model_path)
 
-    model_2_restored = tf.keras.models.load_model(model_path)
+    model_2_restored = tf_keras.models.load_model(model_path)
     model_2_restored.predict(
         keras.pd_dataframe_to_tf_dataset(dataset_2, label="label")
     )
@@ -1708,7 +1717,7 @@ def test_output_logits(self):
     self.assertAlmostEqual(np.mean(predictions), -2.2, delta=0.2)
     self.assertAlmostEqual(np.std(predictions), 2.8, delta=0.25)
 
-    model.compile(metrics=[tf.keras.metrics.BinaryAccuracy(threshold=0.0)])
+    model.compile(metrics=[tf_keras.metrics.BinaryAccuracy(threshold=0.0)])
     evaluation = model.evaluate(tf_test, return_dict=True)
     logging.info("Evaluation: %s", evaluation)
 
@@ -1734,9 +1743,9 @@ def make_dataset():
 
     model = keras.GradientBoostedTreesModel()
 
-    inputs = tf.keras.layers.Input(shape=(num_features,))
+    inputs = tf_keras.layers.Input(shape=(num_features,))
     outputs = model(inputs)
-    functional_model = tf.keras.Model(inputs=inputs, outputs=outputs)
+    functional_model = tf_keras.Model(inputs=inputs, outputs=outputs)
 
     # Generate predictions before training.
     for features, _ in test_dataset.take(1):
@@ -2277,7 +2286,7 @@ def test_properties(self):
 
   def test_golden_model_gbt(self):
     dataset = adult_dataset()
-    loaded_model = models.load_model(
+    loaded_model = tf_keras.models.load_model(
         os.path.join(tfdf_test_data_path(), "model/saved_model_adult_rf")
     )
     prediction = loaded_model.predict(
@@ -2323,7 +2332,7 @@ def custom_model_input_signature(
         tfdf_model_path,
         input_model_signature_fn=custom_model_input_signature,
     )
-    loaded_model = models.load_model(tfdf_model_path)
+    loaded_model = tf_keras.models.load_model(tfdf_model_path)
     dataset = adult_dataset()
     prediction = loaded_model.predict(
         keras.pd_dataframe_to_tf_dataset(dataset.test, label="income")
@@ -2346,7 +2355,7 @@ def test_ydf_to_keras_model_uplift(self):
     test_df = test_df.drop(treatment_group, axis=1)
 
     core.yggdrasil_model_to_keras_model(ygg_model_path, tfdf_model_path)
-    loaded_model = models.load_model(tfdf_model_path)
+    loaded_model = tf_keras.models.load_model(tfdf_model_path)
     prediction = loaded_model.predict(
         keras.pd_dataframe_to_tf_dataset(test_df, label=outcome_key)
     )
@@ -2385,7 +2394,7 @@ def test_ydf_to_keras_model_with_source_container(
     core.yggdrasil_model_to_keras_model(src_model_path, dst_model_path)
 
     # Load/Check the model
-    _ = models.load_model(dst_model_path)
+    _ = tf_keras.models.load_model(dst_model_path)
 
   def test_load_combined_model(self):
     target = tf.random.uniform(shape=[100, 1], minval=25, maxval=50)
@@ -2393,7 +2402,7 @@ def test_load_combined_model(self):
         "my_feature": tf.random.uniform(shape=[100, 2], minval=1, maxval=100)
     }
     dataset = tf.data.Dataset.from_tensor_slices((features, target)).batch(32)
-    inputs = {"my_feature": tf.keras.Input(shape=(2,))}
+    inputs = {"my_feature": tf_keras.Input(shape=(2,))}
 
     model_1 = keras.RandomForestModel(num_trees=10, task=keras.Task.REGRESSION)
     model_2 = keras.RandomForestModel(num_trees=20, task=keras.Task.REGRESSION)
@@ -2403,7 +2412,7 @@ def model_2_preprocessing(x):
 
     model_2_pred = model_2(model_2_preprocessing(inputs))
 
-    combined_model = models.Model(inputs, model_2_pred)
+    combined_model = tf_keras.models.Model(inputs, model_2_pred)
 
     # Train first model.
     model_1.fit(dataset)
@@ -2417,7 +2426,7 @@ def mix(x, y):
     combined_model_path = os.path.join(tmp_path(), "combined_model")
     combined_model.save(combined_model_path, overwrite=True)
     combined_model_prediction = combined_model.predict([[1, 1]])
-    loaded_combined_model = models.load_model(combined_model_path)
+    loaded_combined_model = tf_keras.models.load_model(combined_model_path)
 
     # Check if inference is working on the combined model.
     loaded_combined_model_prediction = loaded_combined_model.predict([[1, 1]])
@@ -2435,7 +2444,7 @@ def mix(x, y):
         loaded_model_1_path,
         file_prefix=model_1.training_model_id,
     )
-    loaded_model_1 = models.load_model(loaded_model_1_path)
+    loaded_model_1 = tf_keras.models.load_model(loaded_model_1_path)
     logging.info(
         "Prediction result 1 is %s", loaded_model_1.predict(examples_1)
     )
@@ -2449,7 +2458,7 @@ def mix(x, y):
         loaded_model_2_path,
         file_prefix=model_2.training_model_id,
     )
-    loaded_model_2 = models.load_model(loaded_model_2_path)
+    loaded_model_2 = tf_keras.models.load_model(loaded_model_2_path)
     logging.info(
         "Prediction result 2 is %s", loaded_model_2.predict(examples_2)
     )
@@ -2642,7 +2651,7 @@ def make_dataset(num_examples):
     model.save(saved_model_path)
 
     logging.info("Loading model from %s", saved_model_path)
-    loaded_model = models.load_model(saved_model_path)
+    loaded_model = tf_keras.models.load_model(saved_model_path)
     loaded_model.summary()
 
     # Check exported / imported model predictions
@@ -2728,7 +2737,7 @@ def make_dataset(num_examples):
     model.save(saved_model_path)
 
     logging.info("Loading model from %s", saved_model_path)
-    loaded_model = models.load_model(saved_model_path)
+    loaded_model = tf_keras.models.load_model(saved_model_path)
     loaded_model.summary()
 
     # Check exported / imported model predictions
@@ -2901,7 +2910,7 @@ def test_plot_ydf_model(self):
     )
     model_tmp_path_keras = os.path.join(self.get_temp_dir(), "kerasmodel")
     keras.yggdrasil_model_to_keras_model(ygg_model_path, model_tmp_path_keras)
-    model = tf.keras.models.load_model(model_tmp_path_keras)
+    model = tf_keras.models.load_model(model_tmp_path_keras)
     tree_plot = model_plotter.plot_model(model, tree_idx=0, max_depth=2)
     expected_tree_start = (
         'display_tree({"margin": 10, "node_x_size": 160, "node_y_size": 28,'
diff --git a/tensorflow_decision_forests/keras/test_runner.py b/tensorflow_decision_forests/keras/test_runner.py
index b2abbfd4..07bbe1e3 100644
--- a/tensorflow_decision_forests/keras/test_runner.py
+++ b/tensorflow_decision_forests/keras/test_runner.py
@@ -30,6 +30,7 @@
 import numpy as np
 import pandas as pd
 import tensorflow as tf
+import tf_keras
 
 # Inject the inference ops only.
 from tensorflow_decision_forests.tensorflow.ops.inference import op  # pylint: disable=unused-import
@@ -45,7 +46,7 @@ def main(argv):
   del argv
 
   logging.info("Load model")
-  model = tf.keras.models.load_model(FLAGS.model_path)
+  model = tf_keras.models.load_model(FLAGS.model_path)
 
   logging.info("Load dataset")
   pd_dataset = load_dataset()
diff --git a/tensorflow_decision_forests/keras/wrapper/wrapper.cc b/tensorflow_decision_forests/keras/wrapper/wrapper.cc
index 76529ba3..ffec1e5b 100644
--- a/tensorflow_decision_forests/keras/wrapper/wrapper.cc
+++ b/tensorflow_decision_forests/keras/wrapper/wrapper.cc
@@ -254,6 +254,7 @@ documentation (and meta-data) used to generate this file.
 
 from typing import Optional, List, Set
 import tensorflow as tf
+import tf_keras
 $0
 TaskType = "abstract_model_pb2.Task"  # pylint: disable=invalid-name
 AdvancedArguments = core.AdvancedArguments
@@ -550,9 +551,9 @@ class $0(core.CoreModel):
       task: Optional[TaskType] = core.Task.CLASSIFICATION,
       features: Optional[List[core.FeatureUsage]] = None,
       exclude_non_specified_features: Optional[bool] = False,
-      preprocessing: Optional["tf.keras.models.Functional"] = None,
-      postprocessing: Optional["tf.keras.models.Functional"] = None,
-      training_preprocessing: Optional["tf.keras.models.Functional"] = None,
+      preprocessing: Optional["tf_keras.models.Functional"] = None,
+      postprocessing: Optional["tf_keras.models.Functional"] = None,
+      training_preprocessing: Optional["tf_keras.models.Functional"] = None,
       ranking_group: Optional[str] = None,
       uplift_treatment: Optional[str] = None,
       temp_directory: Optional[str] = None,
diff --git a/tensorflow_decision_forests/keras/wrappers_pre_generated.py b/tensorflow_decision_forests/keras/wrappers_pre_generated.py
index 20527189..35752bd9 100644
--- a/tensorflow_decision_forests/keras/wrappers_pre_generated.py
+++ b/tensorflow_decision_forests/keras/wrappers_pre_generated.py
@@ -28,6 +28,7 @@
 
 from typing import Optional, List, Set
 import tensorflow as tf
+import tf_keras
 
 from tensorflow_decision_forests.keras import core
 from tensorflow_decision_forests.component.tuner import tuner as tuner_lib
@@ -341,9 +342,9 @@ def __init__(
       task: Optional[TaskType] = core.Task.CLASSIFICATION,
       features: Optional[List[core.FeatureUsage]] = None,
       exclude_non_specified_features: Optional[bool] = False,
-      preprocessing: Optional["tf.keras.models.Functional"] = None,
-      postprocessing: Optional["tf.keras.models.Functional"] = None,
-      training_preprocessing: Optional["tf.keras.models.Functional"] = None,
+      preprocessing: Optional["tf_keras.models.Functional"] = None,
+      postprocessing: Optional["tf_keras.models.Functional"] = None,
+      training_preprocessing: Optional["tf_keras.models.Functional"] = None,
       ranking_group: Optional[str] = None,
       uplift_treatment: Optional[str] = None,
       temp_directory: Optional[str] = None,
@@ -479,6 +480,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities:
         support_partial_cache_dataset_format=False
     )
 
+
 class DistributedGradientBoostedTreesModel(core.CoreModel):
   r"""Distributed Gradient Boosted Trees learning algorithm.
 
@@ -682,9 +684,9 @@ def __init__(
       task: Optional[TaskType] = core.Task.CLASSIFICATION,
       features: Optional[List[core.FeatureUsage]] = None,
       exclude_non_specified_features: Optional[bool] = False,
-      preprocessing: Optional["tf.keras.models.Functional"] = None,
-      postprocessing: Optional["tf.keras.models.Functional"] = None,
-      training_preprocessing: Optional["tf.keras.models.Functional"] = None,
+      preprocessing: Optional["tf_keras.models.Functional"] = None,
+      postprocessing: Optional["tf_keras.models.Functional"] = None,
+      training_preprocessing: Optional["tf_keras.models.Functional"] = None,
       ranking_group: Optional[str] = None,
       uplift_treatment: Optional[str] = None,
       temp_directory: Optional[str] = None,
@@ -782,6 +784,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities:
         support_partial_cache_dataset_format=True
     )
 
+
 class GradientBoostedTreesModel(core.CoreModel):
   r"""Gradient Boosted Trees learning algorithm.
 
@@ -1199,9 +1202,9 @@ def __init__(
       task: Optional[TaskType] = core.Task.CLASSIFICATION,
       features: Optional[List[core.FeatureUsage]] = None,
       exclude_non_specified_features: Optional[bool] = False,
-      preprocessing: Optional["tf.keras.models.Functional"] = None,
-      postprocessing: Optional["tf.keras.models.Functional"] = None,
-      training_preprocessing: Optional["tf.keras.models.Functional"] = None,
+      preprocessing: Optional["tf_keras.models.Functional"] = None,
+      postprocessing: Optional["tf_keras.models.Functional"] = None,
+      training_preprocessing: Optional["tf_keras.models.Functional"] = None,
       ranking_group: Optional[str] = None,
       uplift_treatment: Optional[str] = None,
       temp_directory: Optional[str] = None,
@@ -1416,6 +1419,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities:
         support_partial_cache_dataset_format=False
     )
 
+
 class HyperparameterOptimizerModel(core.CoreModel):
   r"""Hyperparameter Optimizer learning algorithm.
 
@@ -1572,9 +1576,9 @@ def __init__(
       task: Optional[TaskType] = core.Task.CLASSIFICATION,
       features: Optional[List[core.FeatureUsage]] = None,
       exclude_non_specified_features: Optional[bool] = False,
-      preprocessing: Optional["tf.keras.models.Functional"] = None,
-      postprocessing: Optional["tf.keras.models.Functional"] = None,
-      training_preprocessing: Optional["tf.keras.models.Functional"] = None,
+      preprocessing: Optional["tf_keras.models.Functional"] = None,
+      postprocessing: Optional["tf_keras.models.Functional"] = None,
+      training_preprocessing: Optional["tf_keras.models.Functional"] = None,
       ranking_group: Optional[str] = None,
       uplift_treatment: Optional[str] = None,
       temp_directory: Optional[str] = None,
@@ -1648,6 +1652,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities:
         support_partial_cache_dataset_format=False
     )
 
+
 class MultitaskerModel(core.CoreModel):
   r"""Multitasker learning algorithm.
 
@@ -1804,9 +1809,9 @@ def __init__(
       task: Optional[TaskType] = core.Task.CLASSIFICATION,
       features: Optional[List[core.FeatureUsage]] = None,
       exclude_non_specified_features: Optional[bool] = False,
-      preprocessing: Optional["tf.keras.models.Functional"] = None,
-      postprocessing: Optional["tf.keras.models.Functional"] = None,
-      training_preprocessing: Optional["tf.keras.models.Functional"] = None,
+      preprocessing: Optional["tf_keras.models.Functional"] = None,
+      postprocessing: Optional["tf_keras.models.Functional"] = None,
+      training_preprocessing: Optional["tf_keras.models.Functional"] = None,
       ranking_group: Optional[str] = None,
       uplift_treatment: Optional[str] = None,
       temp_directory: Optional[str] = None,
@@ -1880,6 +1885,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities:
         support_partial_cache_dataset_format=False
     )
 
+
 class RandomForestModel(core.CoreModel):
   r"""Random Forest learning algorithm.
 
@@ -2231,9 +2237,9 @@ def __init__(
       task: Optional[TaskType] = core.Task.CLASSIFICATION,
       features: Optional[List[core.FeatureUsage]] = None,
       exclude_non_specified_features: Optional[bool] = False,
-      preprocessing: Optional["tf.keras.models.Functional"] = None,
-      postprocessing: Optional["tf.keras.models.Functional"] = None,
-      training_preprocessing: Optional["tf.keras.models.Functional"] = None,
+      preprocessing: Optional["tf_keras.models.Functional"] = None,
+      postprocessing: Optional["tf_keras.models.Functional"] = None,
+      training_preprocessing: Optional["tf_keras.models.Functional"] = None,
       ranking_group: Optional[str] = None,
       uplift_treatment: Optional[str] = None,
       temp_directory: Optional[str] = None,
diff --git a/tensorflow_decision_forests/tensorflow/BUILD b/tensorflow_decision_forests/tensorflow/BUILD
index d62ac107..365c7c50 100644
--- a/tensorflow_decision_forests/tensorflow/BUILD
+++ b/tensorflow_decision_forests/tensorflow/BUILD
@@ -77,6 +77,7 @@ py_library(
         # numpy dep,
         # pandas dep,
         # TensorFlow Python,
+        # tf_keras dep,
         "//tensorflow_decision_forests/component/inspector",
         "@ydf//yggdrasil_decision_forests/dataset:data_spec_py_proto",
         "@ydf//yggdrasil_decision_forests/model:abstract_model_py_proto",
@@ -92,6 +93,7 @@ py_library(
         # numpy dep,
         # pandas dep,
         # TensorFlow Python,
+        # tf_keras dep,
         # TensorFlow /distribute:distribute_lib,
         # TensorFlow /distribute:parameter_server_strategy_v2,
         # TensorFlow /distribute/coordinator:cluster_coordinator,
@@ -143,6 +145,7 @@ py_library(
     srcs_version = "PY3",
     deps = [
         # TensorFlow Python,
+        # tf_keras dep,
         # TensorFlow /trackable,
     ],
 )
@@ -160,6 +163,7 @@ py_test(
         # absl/logging dep,
         # pandas dep,
         # TensorFlow Python,
+        # tf_keras dep,
         "@ydf//yggdrasil_decision_forests/dataset:data_spec_py_proto",
         "@ydf//yggdrasil_decision_forests/model:hyperparameter_py_proto",
     ],
diff --git a/tools/test_bazel.sh b/tools/test_bazel.sh
index f26afbeb..41857a5c 100755
--- a/tools/test_bazel.sh
+++ b/tools/test_bazel.sh
@@ -51,7 +51,7 @@ function is_macos() {
 # Install Pip dependencies
 ${PYTHON} -m ensurepip --upgrade || true
 ${PYTHON} -m pip install pip setuptools --upgrade
-${PYTHON} -m pip install numpy pandas scikit-learn
+${PYTHON} -m pip install numpy pandas scikit-learn tf_keras
 
 # Install Tensorflow at the chosen version.
 if [ ${TF_VERSION} == "nightly" ]; then